From a47a8c46020b31ae707175ba621aaaa6f181bde7 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:03:53 +0000 Subject: [PATCH 001/292] Merge branch 'cherry-pick-9c8c652a' into 'cnch-2.2' fix(clickhousech@m-4505982308): [cp] optimize dedup worker lock See merge request dp/ClickHouse!22412 --- src/CloudServices/CnchServerServiceImpl.cpp | 47 +++++++++++---------- src/CloudServices/DedupWorkerManager.cpp | 18 ++++---- src/CloudServices/DedupWorkerManager.h | 18 ++++---- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index bf5aaef4b40..b8abe86fcae 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -567,39 +567,40 @@ void CnchServerServiceImpl::reportTaskHeartbeat( } void CnchServerServiceImpl::reportDeduperHeartbeat( - google::protobuf::RpcController * cntl, + google::protobuf::RpcController *, const Protos::ReportDeduperHeartbeatReq * request, Protos::ReportDeduperHeartbeatResp * response, google::protobuf::Closure * done) { - brpc::ClosureGuard done_guard(done); - - try - { - auto cnch_storage_id = RPCHelpers::createStorageID(request->cnch_storage_id()); - - if (auto bg_thread = getContext()->tryGetDedupWorkerManager(cnch_storage_id)) + RPCHelpers::serviceHandler(done, response, [request = request, response = response, done = done, gc = getContext(), log = log] { + brpc::ClosureGuard done_guard(done); + try { - auto worker_table_name = request->worker_table_name(); - auto & manager = static_cast(*bg_thread); + auto cnch_storage_id = RPCHelpers::createStorageID(request->cnch_storage_id()); - auto ret = manager.reportHeartbeat(worker_table_name); + if (auto bg_thread = gc->tryGetDedupWorkerManager(cnch_storage_id)) + { + const auto & worker_table_name = request->worker_table_name(); + auto & manager = static_cast(*bg_thread); - // NOTE: here we send a response back to let the worker know the result. - response->set_code(static_cast(ret)); - return; + auto ret = manager.reportHeartbeat(worker_table_name); + + // NOTE: here we send a response back to let the worker know the result. + response->set_code(static_cast(ret)); + return; + } + else + { + LOG_WARNING(log, "Failed to get background thread"); + } } - else + catch (...) { - LOG_WARNING(log, "Failed to get background thread"); + tryLogCurrentException(log, __PRETTY_FUNCTION__); + RPCHelpers::handleException(response->mutable_exception()); } - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - RPCHelpers::handleException(response->mutable_exception()); - } - response->set_code(static_cast(DedupWorkerHeartbeatResult::Kill)); + response->set_code(static_cast(DedupWorkerHeartbeatResult::Kill)); + }); } void CnchServerServiceImpl::fetchDataParts( diff --git a/src/CloudServices/DedupWorkerManager.cpp b/src/CloudServices/DedupWorkerManager.cpp index 3a1d06c9667..30ab8fe94b2 100644 --- a/src/CloudServices/DedupWorkerManager.cpp +++ b/src/CloudServices/DedupWorkerManager.cpp @@ -163,7 +163,7 @@ void DedupWorkerManager::initialize(StoragePtr & storage, StorageCnchMergeTree & } } -void DedupWorkerManager::createDeduperOnWorker(StoragePtr & storage, StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock) +void DedupWorkerManager::createDeduperOnWorker(StoragePtr & storage, StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock) { if (info->worker_client) return; @@ -200,7 +200,7 @@ void DedupWorkerManager::createDeduperOnWorker(StoragePtr & storage, StorageCnch } } -void DedupWorkerManager::selectDedupWorker(StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) +void DedupWorkerManager::selectDedupWorker(StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) { auto vw_handle = getContext()->getVirtualWarehousePool().get(cnch_table.getSettings()->cnch_vw_write); HostWithPorts history_dedup_worker = dedup_scheduler->tryPickWorker(info->index); @@ -216,13 +216,13 @@ void DedupWorkerManager::selectDedupWorker(StorageCnchMergeTree & cnch_table, De } } -void DedupWorkerManager::markDedupWorker(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) +void DedupWorkerManager::markDedupWorker(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) { dedup_scheduler->markIndexDedupWorker(info->index, info->worker_client->getHostWithPorts()); info->is_running = true; } -void DedupWorkerManager::assignHighPriorityDedupPartition(DeduperInfoPtr & info, const Names & high_priority_partition, std::unique_lock & /*info_lock*/) +void DedupWorkerManager::assignHighPriorityDedupPartition(DeduperInfoPtr & info, const Names & high_priority_partition, std::unique_lock & /*info_lock*/) { if (!info->worker_client) return; @@ -230,13 +230,13 @@ void DedupWorkerManager::assignHighPriorityDedupPartition(DeduperInfoPtr & info, info->worker_client->assignHighPriorityDedupPartition(info->worker_storage_id, high_priority_partition); } -void DedupWorkerManager::unsetWorkerClient(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) +void DedupWorkerManager::unsetWorkerClient(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) { info->worker_client = nullptr; info->is_running = false; } -void DedupWorkerManager::assignRepairGran(DeduperInfoPtr & info, const DedupGran & dedup_gran, const UInt64 & max_event_time, std::unique_lock & /*info_lock*/) +void DedupWorkerManager::assignRepairGran(DeduperInfoPtr & info, const DedupGran & dedup_gran, const UInt64 & max_event_time, std::unique_lock & /*info_lock*/) { if (!info->worker_client) return; @@ -265,7 +265,7 @@ void DedupWorkerManager::stopDeduperWorker(DeduperInfoPtr & info) unsetWorkerClient(info, info_lock); } -String DedupWorkerManager::getDedupWorkerDebugInfo(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) +String DedupWorkerManager::getDedupWorkerDebugInfo(DeduperInfoPtr & info, std::unique_lock & /*info_lock*/) { if (!info->worker_client) return "dedup worker is not assigned."; @@ -273,7 +273,7 @@ String DedupWorkerManager::getDedupWorkerDebugInfo(DeduperInfoPtr & info, std::u + info->worker_client->getHostWithPorts().toDebugString(); } -bool DedupWorkerManager::checkDedupWorkerStatus(DeduperInfoPtr & info, std::unique_lock & info_lock) +bool DedupWorkerManager::checkDedupWorkerStatus(DeduperInfoPtr & info, std::unique_lock & info_lock) { if (!info->worker_client) return false; @@ -340,8 +340,8 @@ void DedupWorkerManager::dedupWithHighPriority(const ASTPtr & partition, const C DedupWorkerHeartbeatResult DedupWorkerManager::reportHeartbeat(const String & worker_table_name) { - std::lock_guard lock(deduper_infos_mutex); LOG_TRACE(log, "Report heartbeat of dedup worker: worker table name is {}", worker_table_name); + std::lock_guard lock(deduper_infos_mutex); for (const auto & info : deduper_infos) { std::lock_guard info_lock(info->mutex); diff --git a/src/CloudServices/DedupWorkerManager.h b/src/CloudServices/DedupWorkerManager.h index a98e1eee240..f9cd8acdc45 100644 --- a/src/CloudServices/DedupWorkerManager.h +++ b/src/CloudServices/DedupWorkerManager.h @@ -64,7 +64,7 @@ class DedupWorkerManager: public ICnchBGThread worker_client(other.worker_client), worker_storage_id(other.worker_storage_id) {} - mutable std::mutex mutex; + mutable bthread::Mutex mutex; bool is_running{false}; size_t index{0}; CnchWorkerClientPtr worker_client; @@ -80,20 +80,20 @@ class DedupWorkerManager: public ICnchBGThread void initialize(StoragePtr & storage, StorageCnchMergeTree & cnch_table); - void createDeduperOnWorker(StoragePtr & storage, StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock); + void createDeduperOnWorker(StoragePtr & storage, StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock); - void selectDedupWorker(StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock); + void selectDedupWorker(StorageCnchMergeTree & cnch_table, DeduperInfoPtr & info, std::unique_lock & info_lock); - void markDedupWorker(DeduperInfoPtr & info, std::unique_lock & info_lock); + void markDedupWorker(DeduperInfoPtr & info, std::unique_lock & info_lock); void stopDeduperWorker(DeduperInfoPtr & info); - bool checkDedupWorkerStatus(DeduperInfoPtr & info, std::unique_lock & info_lock); + bool checkDedupWorkerStatus(DeduperInfoPtr & info, std::unique_lock & info_lock); - static void assignHighPriorityDedupPartition(DeduperInfoPtr & info, const Names & high_priority_partition, std::unique_lock & info_lock); - static void unsetWorkerClient(DeduperInfoPtr & info, std::unique_lock & info_lock); - static void assignRepairGran(DeduperInfoPtr & info, const DedupGran & dedup_gran, const UInt64 & max_event_time, std::unique_lock & info_lock); - static String getDedupWorkerDebugInfo(DeduperInfoPtr & info, std::unique_lock & info_lock); + static void assignHighPriorityDedupPartition(DeduperInfoPtr & info, const Names & high_priority_partition, std::unique_lock & info_lock); + static void unsetWorkerClient(DeduperInfoPtr & info, std::unique_lock & info_lock); + static void assignRepairGran(DeduperInfoPtr & info, const DedupGran & dedup_gran, const UInt64 & max_event_time, std::unique_lock & info_lock); + static String getDedupWorkerDebugInfo(DeduperInfoPtr & info, std::unique_lock & info_lock); mutable bthread::Mutex deduper_infos_mutex; std::atomic initialized{false}; From 093a693ba66d953b8baee8f167f836e4fa84c9b4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:05:24 +0000 Subject: [PATCH 002/292] Merge 'feat/fix_cte_buffer_cnch_2.2' into 'cnch-2.2' fix(optimizer@m-4171913691): add buffer for all cte exists in join build cnch-2.2 See merge request: !22413 # Conflicts: # src/Optimizer/PlanOptimizer.cpp # src/QueryPlan/AggregatingStep.cpp --- src/Core/Settings.h | 2 + src/Core/tests/gtest_protobuf.cpp | 2 +- src/Core/tests/gtest_protobuf_common.h | 8 +- src/Optimizer/PlanOptimizer.cpp | 7 +- src/Optimizer/Property/Equivalences.h | 54 --- .../Property/SymbolEquivalencesDeriver.cpp | 25 +- .../Rewriter/AddBufferForDeadlockCTE.cpp | 222 +++++++++-- .../Rewriter/AddBufferForDeadlockCTE.h | 7 +- src/Optimizer/Rewriter/AddRuntimeFilters.cpp | 31 +- src/Optimizer/Rewriter/AddRuntimeFilters.h | 2 + src/Optimizer/Rewriter/ColumnPruning.cpp | 13 +- ...leDistinctAggregationToExpandAggregate.cpp | 4 +- .../SingleDistinctAggregationToGroupBy.cpp | 15 +- src/QueryPlan/AggregatingStep.cpp | 29 +- src/QueryPlan/ExpandStep.cpp | 6 +- src/QueryPlan/MergingAggregatedStep.cpp | 9 + src/QueryPlan/tests/gtest_protobuf.cpp | 12 +- .../tpcds/explains/tpcds100/q1.explain | 9 +- .../tpcds/explains/tpcds100/q14.explain | 18 +- .../tpcds/explains/tpcds100/q23.explain | 32 +- .../tpcds/explains/tpcds100/q24.explain | 12 +- .../tpcds/explains/tpcds100/q31.explain | 4 +- .../tpcds/explains/tpcds100/q33.explain | 9 +- .../tpcds/explains/tpcds100/q54.explain | 10 +- .../tpcds/explains/tpcds100/q56.explain | 15 +- .../tpcds/explains/tpcds100/q58.explain | 15 +- .../tpcds/explains/tpcds100/q59.explain | 12 +- .../tpcds/explains/tpcds100/q60.explain | 15 +- .../tpcds/explains/tpcds100/q64.explain | 8 +- .../tpcds/explains/tpcds100/q74.explain | 4 +- .../tpcds/explains/tpcds100/q83.explain | 19 +- .../tpcds/explains/tpcds1000/q1.explain | 9 +- .../tpcds/explains/tpcds1000/q14.explain | 30 +- .../tpcds/explains/tpcds1000/q23.explain | 32 +- .../tpcds/explains/tpcds1000/q24.explain | 354 +++++++++++------- .../tpcds/explains/tpcds1000/q31.explain | 4 +- .../tpcds/explains/tpcds1000/q33.explain | 9 +- .../tpcds/explains/tpcds1000/q54.explain | 10 +- .../tpcds/explains/tpcds1000/q56.explain | 15 +- .../tpcds/explains/tpcds1000/q58.explain | 15 +- .../tpcds/explains/tpcds1000/q59.explain | 20 +- .../tpcds/explains/tpcds1000/q60.explain | 15 +- .../tpcds/explains/tpcds1000/q64.explain | 6 +- .../tpcds/explains/tpcds1000/q74.explain | 4 +- .../tpcds/explains/tpcds1000/q83.explain | 15 +- .../tpcds1000_not_show_stats/q1.explain | 9 +- .../tpcds1000_not_show_stats/q14.explain | 30 +- .../tpcds1000_not_show_stats/q23.explain | 24 +- .../tpcds1000_not_show_stats/q24.explain | 346 ++++++++++------- .../tpcds1000_not_show_stats/q33.explain | 9 +- .../tpcds1000_not_show_stats/q54.explain | 6 +- .../tpcds1000_not_show_stats/q56.explain | 9 +- .../tpcds1000_not_show_stats/q58.explain | 9 +- .../tpcds1000_not_show_stats/q59.explain | 12 +- .../tpcds1000_not_show_stats/q60.explain | 9 +- .../tpcds1000_not_show_stats/q83.explain | 9 +- .../explains/tpcds1000_sample/q1.explain | 9 +- .../explains/tpcds1000_sample/q14.explain | 32 +- .../explains/tpcds1000_sample/q23.explain | 32 +- .../explains/tpcds1000_sample/q24.explain | 354 +++++++++++------- .../explains/tpcds1000_sample/q31.explain | 4 +- .../explains/tpcds1000_sample/q33.explain | 9 +- .../explains/tpcds1000_sample/q54.explain | 8 +- .../explains/tpcds1000_sample/q56.explain | 15 +- .../explains/tpcds1000_sample/q58.explain | 15 +- .../explains/tpcds1000_sample/q59.explain | 20 +- .../explains/tpcds1000_sample/q60.explain | 15 +- .../explains/tpcds1000_sample/q64.explain | 8 +- .../explains/tpcds1000_sample/q74.explain | 4 +- .../explains/tpcds1000_sample/q83.explain | 15 +- ...091_distinct_aggregate_to_expand.reference | 2 + .../40091_distinct_aggregate_to_expand.sql | 6 + ...ol_reallocate_duplicate_group_by.reference | 0 ...6_symbol_reallocate_duplicate_group_by.sql | 4 + ...ate_subquery_expression_planning.reference | 38 +- .../40052_deadlock_cte.reference | 77 +++- .../40052_deadlock_cte.sql | 1 + .../40075_subcolumn_with_cte.sql | 2 +- .../46001_remove_redundant_distinct.reference | 32 +- .../48016_cte_projection.reference | 51 +-- 80 files changed, 1515 insertions(+), 878 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference create mode 100644 tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql create mode 100644 tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.reference create mode 100644 tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cb7f6d343d5..3c2e45da361 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1503,6 +1503,8 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_subcolumn_optimization_through_union, true, "Whether enable sub column optimization through set operation.", 0) \ M(Bool, enable_buffer_for_deadlock_cte, true, "Whether to buffer data for deadlock cte", 0) \ M(UInt64, statistics_collect_debug_level, 0, "Debug level for statistics collector", 0) \ + M(Bool, enable_remove_remove_unnecessary_buffer, false, "Whether to only add buffer for cte consumer that may cause deadlock", 0) \ + M(Int64, max_buffer_size_for_deadlock_cte, 8000000000, "Inline CTE if buffer is oversized, set 0 to inline all cte, set -1 to buffer data for all cte even no stats", 0) \ M(Bool, enable_add_exchange, true, "Whether to enable AddExchange rule", 0) \ M(Bool, enable_bitmap_index_splitter, true, "Whether to enable BitMapIndexSplitter", 0) \ M(Bool, enable_column_pruning, true, "Whether to enable ColumnPruning", 0) \ diff --git a/src/Core/tests/gtest_protobuf.cpp b/src/Core/tests/gtest_protobuf.cpp index 63e2e0ff53e..efbf9ab22a4 100644 --- a/src/Core/tests/gtest_protobuf.cpp +++ b/src/Core/tests/gtest_protobuf.cpp @@ -192,7 +192,7 @@ TEST_F(ProtobufTest, AggregateDescription) { std::default_random_engine eng(42); // construct valid object - auto obj = generateAggregateDescription(eng); + auto obj = generateAggregateDescription(eng, 6); // serialize to protobuf Protos::AggregateDescription pb; obj.toProto(pb); diff --git a/src/Core/tests/gtest_protobuf_common.h b/src/Core/tests/gtest_protobuf_common.h index 8510d13872b..8cff2512bed 100644 --- a/src/Core/tests/gtest_protobuf_common.h +++ b/src/Core/tests/gtest_protobuf_common.h @@ -261,7 +261,7 @@ class ProtobufTest : public testing::Test static Block generateBlock(std::default_random_engine & eng, bool arr = false) { - std::vector columns = {"a", "b", "c"}; + std::vector columns = {"a", "b", "c", "col_0", "col_1", "col_3"}; size_t rows = 10; size_t stride = 1; size_t start = 0; @@ -400,7 +400,7 @@ class ProtobufTest : public testing::Test return res; } - static AggregateDescription generateAggregateDescription(std::default_random_engine & eng) + static AggregateDescription generateAggregateDescription(std::default_random_engine & /*eng*/, int i) { AggregateDescription res; AggregateFunctionProperties properties; @@ -412,7 +412,7 @@ class ProtobufTest : public testing::Test // generate Names // for (int i = 0; i < 10; ++i) // res.argument_names.emplace_back(fmt::format("text{}", eng() % 100)); - res.column_name = std::vector{"a", "b"}[eng() % 2]; + res.column_name = "col_" + std::to_string(i); res.mask_column = res.column_name; return res; } @@ -426,7 +426,7 @@ class ProtobufTest : public testing::Test keys.emplace_back(eng() % 3); AggregateDescriptions aggregates; for (int i = 0; i < 2; ++i) - aggregates.emplace_back(generateAggregateDescription(eng)); + aggregates.emplace_back(generateAggregateDescription(eng, i)); auto overflow_row = eng() % 2 == 1; auto max_rows_to_group_by = eng() % 1000; auto group_by_overflow_mode = static_cast(eng() % 3); diff --git a/src/Optimizer/PlanOptimizer.cpp b/src/Optimizer/PlanOptimizer.cpp index d655bdc7c11..a81df3535d2 100644 --- a/src/Optimizer/PlanOptimizer.cpp +++ b/src/Optimizer/PlanOptimizer.cpp @@ -121,6 +121,7 @@ const Rewriters & PlanOptimizer::getSimpleRewriters() // add exchange std::make_shared(false), + std::make_shared(), std::make_shared(Rules::pushPartialStepRules(), "PushPartialStep"), std::make_shared(Rules::optimizeAggregateRules(), "OptimizeAggregate"), std::make_shared(), @@ -138,7 +139,6 @@ const Rewriters & PlanOptimizer::getSimpleRewriters() std::make_shared(), std::make_shared(), /* some rules generates incorrect column ptr for DataStream, e.g. use a non-nullable column ptr for a nullable column */ - std::make_shared(), std::make_shared(Rules::pushTableScanEmbeddedStepRules(), "PushTableScanEmbeddedStepRules"), std::make_shared(), @@ -280,6 +280,10 @@ const Rewriters & PlanOptimizer::getFullRewriters() // Cost-based optimizer std::make_shared(), + // remove not inlined CTEs + std::make_shared(), + std::make_shared(), + // add runtime filters std::make_shared(), @@ -313,7 +317,6 @@ const Rewriters & PlanOptimizer::getFullRewriters() std::make_shared(), std::make_shared(), /* some rules generates incorrect column ptr for DataStream, e.g. use a non-nullable column ptr for a nullable column */ - std::make_shared(), std::make_shared(Rules::pushTableScanEmbeddedStepRules(), "PushTableScanEmbeddedStepRules"), std::make_shared(), std::make_shared(), diff --git a/src/Optimizer/Property/Equivalences.h b/src/Optimizer/Property/Equivalences.h index b676f3c4f26..f681bd5d1b9 100644 --- a/src/Optimizer/Property/Equivalences.h +++ b/src/Optimizer/Property/Equivalences.h @@ -106,60 +106,6 @@ class Equivalences bool isEqual(T first, T second) const { return union_find.isConnected(first, second); } - Ptr translate(std::unordered_map & identities) const - { - auto result = std::make_shared(); - TMap> str_to_set; - for (auto & item : union_find.parent) - { - if (identities.contains(item.first)) - { - str_to_set[item.second].insert(identities[item.first]); - } - } - - for (auto & item : str_to_set) - { - auto & set = item.second; - if (set.size() > 1) - { - auto first = *set.begin(); - for (auto iter = set.begin()++; iter != set.end(); iter++) - { - result->add(first, *iter); - } - } - } - return result; - } - - Ptr translate(std::unordered_set & identities) const - { - auto result = std::make_shared(); - std::unordered_map> str_to_set; - for (auto & item : union_find.parent) - { - if (identities.contains(item.first)) - { - str_to_set[item.second].insert(item.first); - } - } - - for (auto & item : str_to_set) - { - auto & set = item.second; - if (set.size() > 1) - { - auto first = *set.begin(); - for (auto iter = set.begin()++; iter != set.end(); iter++) - { - result->add(first, *iter); - } - } - } - return result; - } - Map representMap() const { if (map) diff --git a/src/Optimizer/Property/SymbolEquivalencesDeriver.cpp b/src/Optimizer/Property/SymbolEquivalencesDeriver.cpp index 0870d09d4ab..d12c557c56c 100644 --- a/src/Optimizer/Property/SymbolEquivalencesDeriver.cpp +++ b/src/Optimizer/Property/SymbolEquivalencesDeriver.cpp @@ -69,26 +69,15 @@ SymbolEquivalencesDeriverVisitor::visitProjectionStep(const ProjectionStep & ste { const auto & assignments = step.getAssignments(); std::unordered_map identities = Utils::computeIdentityTranslations(assignments); - std::unordered_map revert_identifies; - - for (auto & item : identities) - { - revert_identifies[item.second] = item.first; - } - - auto equivalences = context[0]->translate(revert_identifies); for (auto & item : identities) - { - equivalences->add(item.second, item.first); - } - return equivalences; + context[0]->add(item.second, item.first); + return context[0]; } SymbolEquivalencesPtr -SymbolEquivalencesDeriverVisitor::visitAggregatingStep(const AggregatingStep & step, std::vector & context) +SymbolEquivalencesDeriverVisitor::visitAggregatingStep(const AggregatingStep &, std::vector & context) { - NameSet set{step.getKeys().begin(), step.getKeys().end()}; - return context[0]->translate(set); + return context[0]; } SymbolEquivalencesPtr SymbolEquivalencesDeriverVisitor::visitExchangeStep(const ExchangeStep &, std::vector & context) @@ -99,10 +88,12 @@ SymbolEquivalencesDeriverVisitor::visitExchangeStep(const ExchangeStep &, std::v SymbolEquivalencesPtr SymbolEquivalencesDeriverVisitor::visitCTERefStep(const CTERefStep & step, std::vector & context) { - auto mapping = step.getReverseOutputColumns(); if (!context.empty() && context[0]) { - context[0]->translate(mapping); + auto mappings = step.getOutputColumns(); + for (const auto & mapping : mappings) + context[0]->add(mapping.first, mapping.second); + return context[0]; } return std::make_shared(); } diff --git a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp index 360efef1030..ad80a8adeb2 100644 --- a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp +++ b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp @@ -1,11 +1,17 @@ +#include #include #include +#include +#include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -24,6 +30,10 @@ namespace class UpdateParentExecuteOrderVisitor; // visitor to update execute_order from a join node to left descendant node class UpdateLeftExecuteOrderVisitor; + + // visitor to find cte ref exists both sides of join build and probe to add buffer + class FindAllCTEIfExistsOnJoinBuildVisitor; + // visitor to add BufferStep for deadlock CTEs class AddBufferVisitor; @@ -35,7 +45,14 @@ namespace using VisitEntry = std::pair; using VisitPath = std::vector; - using ExecuteOrderMap = std::unordered_map>; + struct CTEExecuteOrder + { + PlanNodeId node_id; + CTEId cte_id; + int execute_order; + }; + + using ExecuteOrders = std::vector; class FindDirectRightVisitor : public PlanNodeVisitor { @@ -50,7 +67,7 @@ namespace CTEInfo & cte_info; Poco::Logger * logger; - std::unordered_set deadlock_ctes; + std::unordered_set deadlock_ctes; VisitPath visit_path; }; @@ -67,14 +84,14 @@ namespace CTEInfo & cte_info; VisitPath visit_path; - ExecuteOrderMap execute_orders; + ExecuteOrders execute_orders; int cur_execute_order = 0; }; class UpdateLeftExecuteOrderVisitor : public PlanNodeVisitor { public: - UpdateLeftExecuteOrderVisitor(CTEInfo & cte_info_, ExecuteOrderMap & execute_orders_, int execute_order_) + UpdateLeftExecuteOrderVisitor(CTEInfo & cte_info_, ExecuteOrders & execute_orders_, int execute_order_) : cte_info(cte_info_), execute_orders(execute_orders_), execute_order(execute_order_) { } @@ -84,21 +101,23 @@ namespace void visitJoinNode(JoinNode & node, const Void &) override; CTEInfo & cte_info; - ExecuteOrderMap & execute_orders; + ExecuteOrders & execute_orders; const int execute_order = 0; }; class AddBufferVisitor : public SimplePlanRewriter { public: - AddBufferVisitor(const std::unordered_set & deadlock_ctes_, ContextMutablePtr context_, CTEInfo & cte_info_) - : SimplePlanRewriter(std::move(context_), cte_info_), deadlock_ctes(deadlock_ctes_) + AddBufferVisitor( + const std::unordered_set & deadlock_ctes_, ContextMutablePtr context_, CTEInfo & cte_info_, Poco::Logger * logger_) + : SimplePlanRewriter(std::move(context_), cte_info_), deadlock_ctes(deadlock_ctes_), logger(logger_) { } PlanNodePtr visitCTERefNode(CTERefNode & node, const Void & c) override; - const std::unordered_set & deadlock_ctes; + const std::unordered_set & deadlock_ctes; + Poco::Logger * logger; }; void FindDirectRightVisitor::visitPlanNode(PlanNodeBase & node, const JoinPath & join_path) @@ -111,19 +130,31 @@ namespace VisitorUtil::accept(node, update_parent_order_visitor, {}); const auto & execute_orders = update_parent_order_visitor.execute_orders; - if (logger && logger->is(Poco::Message::PRIO_TRACE)) + LOG_TRACE(logger, "FindDirectRightVisitor visit on node {}", node.getId()); + + std::unordered_map cte_min_execute_orders; + for (const auto & execute_order : execute_orders) { - std::ostringstream os; - for (const auto & [node_id, node_orders] : execute_orders) - os << node_id << "->" << fmt::format("({})", fmt::join(node_orders, ",")) << " "; - LOG_TRACE(logger, "Direct right node id: {}, calculated execute order: {}", node.getId(), os.str()); + auto it = cte_min_execute_orders.find(execute_order.cte_id); + if (it == cte_min_execute_orders.end()) + cte_min_execute_orders.emplace(execute_order.cte_id, execute_order.execute_order); + else + it->second = std::min(it->second, execute_order.execute_order); } - for (const auto & [cte_id, cte_def_node] : cte_info.getCTEs()) + // find deadlock ctes + for (const auto & execute_order : execute_orders) { - auto cte_def_node_id = cte_def_node->getId(); - if (execute_orders.count(cte_def_node_id) && execute_orders.at(cte_def_node_id).size() > 1) - deadlock_ctes.emplace(cte_id); + LOG_TRACE( + logger, + "Direct right node id: {}, cte_id: {}, execute order: {}, cte min execute order: {}", + execute_order.node_id, + execute_order.cte_id, + execute_order.execute_order, + cte_min_execute_orders[execute_order.cte_id]); + + if (execute_order.execute_order > cte_min_execute_orders[execute_order.cte_id]) + deadlock_ctes.emplace(execute_order.node_id); } } @@ -158,7 +189,6 @@ namespace { assert(visit_path.back().second == JoinPath::RIGHT); visit_path.pop_back(); - execute_orders[node.getId()].emplace(cur_execute_order); if (!visit_path.empty()) { @@ -179,7 +209,8 @@ namespace { auto join_path = visit_path.back().second; visit_path.pop_back(); - execute_orders[node.getId()].emplace(++cur_execute_order); + + ++cur_execute_order; // update execute_order for left tree of join node UpdateLeftExecuteOrderVisitor update_left_order_visitor{cte_info, execute_orders, cur_execute_order}; @@ -194,7 +225,6 @@ namespace void UpdateLeftExecuteOrderVisitor::visitPlanNode(PlanNodeBase & node, const Void & ctx) { - execute_orders[node.getId()].emplace(execute_order); for (auto & child : node.getChildren()) VisitorUtil::accept(*child, *this, ctx); @@ -202,16 +232,14 @@ namespace void UpdateLeftExecuteOrderVisitor::visitCTERefNode(CTERefNode & node, const Void & ctx) { - execute_orders[node.getId()].emplace(execute_order); auto cte_id = node.getStep()->getId(); + execute_orders.emplace_back(CTEExecuteOrder{node.getId(), cte_id, execute_order}); VisitorUtil::accept(*cte_info.getCTEDef(cte_id), *this, ctx); } void UpdateLeftExecuteOrderVisitor::visitJoinNode(JoinNode & node, const Void & ctx) { - execute_orders[node.getId()].emplace(execute_order); - VisitorUtil::accept(*node.getChildren().at(0), *this, ctx); } @@ -220,41 +248,159 @@ namespace SimplePlanRewriter::visitCTERefNode(node, c); auto cte_id = node.getStep()->getId(); - if (!deadlock_ctes.count(cte_id)) - { + if (!deadlock_ctes.count(node.getId())) return node.shared_from_this(); + + /** + * if buffer size exceed max_buffer_size_for_deadlock_cte, we inline cte instead of add buffer. + * + * note: max buffer size for tpcds 1t is 7994883314, so we set max_buffer_size_for_deadlock_cte + * 8'000'000'000 bytes (8Gb) by default for tpcds 1T + */ + Int64 max_buffer_size = context->getSettingsRef().max_buffer_size_for_deadlock_cte; + if (max_buffer_size == 0) + { + LOG_TRACE(logger, "Inline CTE {} because max_buffer_size_for_deadlock_cte=0", cte_id); + return node.getStep()->toInlinedPlanNode(cte_helper.getCTEInfo(), context); } - else + + if (max_buffer_size > 0) { - QueryPlanStepPtr buffer_step = std::make_shared(node.getCurrentDataStream()); - PlanNodePtr buffer_node = PlanNodeBase::createPlanNode( - context->nextNodeId(), std::move(buffer_step), {node.shared_from_this()}, node.getStatistics()); - return buffer_node; + auto stats = CardinalityEstimator::estimate(node, cte_helper.getCTEInfo(), context); + if (!stats) + { + LOG_TRACE(logger, "Inline CTE {} because estimates stats failed", cte_id); + return node.getStep()->toInlinedPlanNode(cte_helper.getCTEInfo(), context); + } + + Int64 buffer_size = (*stats)->getOutputSizeInBytes(); + LOG_TRACE(logger, "CTE {} estimated buffer size {}", cte_id, (*stats)->getOutputSizeInBytes()); + if (buffer_size > max_buffer_size) + { + LOG_TRACE( + logger, + "Inline CTE {} because estimates buffer size {} is bigger than max_buffer_size_for_deadlock_cte({})", + cte_id, + buffer_size, + max_buffer_size); + return node.getStep()->toInlinedPlanNode(cte_helper.getCTEInfo(), context); + } } + + QueryPlanStepPtr buffer_step = std::make_shared(node.getCurrentDataStream()); + PlanNodePtr buffer_node + = PlanNodeBase::createPlanNode(context->nextNodeId(), std::move(buffer_step), {node.shared_from_this()}, node.getStatistics()); + return buffer_node; } + + class FindAllCTEIfExistsOnJoinBuildVisitor : public PlanNodeVisitor, Void> + { + public: + explicit FindAllCTEIfExistsOnJoinBuildVisitor(CTEInfo & cte_info) : cte_helper(cte_info) + { + } + + std::unordered_set visitPlanNode(PlanNodeBase & node, Void & c) override + { + std::unordered_set ctes; + for (const auto & child : node.getChildren()) + { + auto child_ctes = VisitorUtil::accept(*child, *this, c); + ctes.insert(child_ctes.begin(), child_ctes.end()); + } + return ctes; + } + + std::unordered_set visitCTERefNode(CTERefNode & node, Void & c) override + { + const auto * cte_step = dynamic_cast(node.getStep().get()); + auto cte_id = cte_step->getId(); + cte_refs[cte_id].emplace_back(node.getId()); + + auto ctes = cte_helper.accept(cte_id, *this, c); + ctes.emplace(cte_id); + + return ctes; + } + + std::unordered_set visitJoinNode(JoinNode & node, Void & c) override + { + auto left_ctes = VisitorUtil::accept(*node.getChildren()[0], *this, c); + auto right_ctes = VisitorUtil::accept(*node.getChildren()[1], *this, c); + for (const auto & cte_id : right_ctes) + { + for (const auto & node_id : cte_refs[cte_id]) + deadlock_ctes.emplace(node_id); + } + + left_ctes.insert(right_ctes.begin(), left_ctes.end()); + return left_ctes; + } + + SimpleCTEVisitHelper> cte_helper; + std::unordered_map> cte_refs; + + std::unordered_set deadlock_ctes; + }; } void AddBufferForDeadlockCTE::rewrite(QueryPlan & plan, ContextMutablePtr context) const { static auto * logger = &Poco::Logger::get("AddBufferForDeadlockCTE"); - FindDirectRightVisitor find_deadlock_cte_visitor{plan.getCTEInfo(), logger}; - VisitorUtil::accept(plan.getPlanNode(), find_deadlock_cte_visitor, JoinPath::RIGHT); + if (plan.getCTEInfo().empty()) + return; + + std::unordered_set deadlock_ctes; - if (logger && logger->is(Poco::Message::PRIO_DEBUG) && !find_deadlock_cte_visitor.deadlock_ctes.empty()) + // fixme: fix deadlock algorithm to enable this settings + if (context->getSettings().enable_remove_remove_unnecessary_buffer) + { + FindDirectRightVisitor find_deadlock_cte_visitor{plan.getCTEInfo(), logger}; + VisitorUtil::accept(plan.getPlanNode(), find_deadlock_cte_visitor, JoinPath::RIGHT); + deadlock_ctes = std::move(find_deadlock_cte_visitor.deadlock_ctes); + } + else + { + FindAllCTEIfExistsOnJoinBuildVisitor find_all_cte_ref_visitor{plan.getCTEInfo()}; + Void c; + VisitorUtil::accept(plan.getPlanNode(), find_all_cte_ref_visitor, c); + deadlock_ctes = std::move(find_all_cte_ref_visitor.deadlock_ctes); + } + + if (deadlock_ctes.empty()) + return; + + if (logger->debug()) { std::ostringstream os; - for (const auto & cte_id : find_deadlock_cte_visitor.deadlock_ctes) - os << cte_id << '#' << plan.getCTEInfo().getCTEDef(cte_id)->getId() << ", "; - LOG_DEBUG(logger, "Detected deadlock ctes(cte_id#plan_node_id): {}", os.str()); + for (const auto & cte_ref_id : deadlock_ctes) + os << cte_ref_id << ", "; + LOG_DEBUG(logger, "Detected deadlock ctes(cte_ref_id): {}", os.str()); } - AddBufferVisitor add_buffer_visitor{find_deadlock_cte_visitor.deadlock_ctes, context, plan.getCTEInfo()}; - VisitorUtil::accept(plan.getPlanNode(), add_buffer_visitor, {}); + AddBufferVisitor add_buffer_visitor{deadlock_ctes, context, plan.getCTEInfo(), logger}; + plan.update(VisitorUtil::accept(plan.getPlanNode(), add_buffer_visitor, {})); RewriterPtr push_limit_through_buffer = std::make_shared( std::vector{std::make_shared()}, "PushDownLimitThroughBuffer"); push_limit_through_buffer->rewritePlan(plan, context); + + if (context->getSettingsRef().max_buffer_size_for_deadlock_cte >= 0) + { + static Rewriters rewriters + = {std::make_shared(), + std::make_shared(), + std::make_shared(false, true), + std::make_shared(Rules::inlineProjectionRules(), "InlineProjection"), + std::make_shared(Rules::normalizeExpressionRules(), "NormalizeExpression"), + std::make_shared(Rules::swapPredicateRules(), "SwapPredicate"), + std::make_shared(Rules::simplifyExpressionRules(), "SimplifyExpression"), + std::make_shared(Rules::removeRedundantRules(), "RemoveRedundant")}; + + for (auto & rewriter : rewriters) + rewriter->rewritePlan(plan, context); + } } } diff --git a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.h b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.h index d4adb0cbd54..5638ae3ccba 100644 --- a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.h +++ b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.h @@ -43,10 +43,9 @@ namespace DB /// the rewriter will output: /// Join /// / \ -/// Buffer Buffer -/// | | -/// CTERef[0] CTERef[0] -/// TODO: add buffer step only on left table side +/// Buffer CTERef[0] +/// | +/// CTERef[0] /// /// /// Currently the algorithm will add buffer step aggresively to solve cyclic deadlock ctes. diff --git a/src/Optimizer/Rewriter/AddRuntimeFilters.cpp b/src/Optimizer/Rewriter/AddRuntimeFilters.cpp index e787a88277d..63bc178e91a 100644 --- a/src/Optimizer/Rewriter/AddRuntimeFilters.cpp +++ b/src/Optimizer/Rewriter/AddRuntimeFilters.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -161,6 +162,19 @@ PlanPropEquivalences AddRuntimeFilters::AddRuntimeFilterRewriter::visitJoinNode( if (!is_broadcast && isFixedHashShuffleOrBucketTableShuffle(left.property)) { partition_columns = left.property.getNodePartitioning().getColumns(); + bool all_contains = std::all_of( + partition_columns.begin(), partition_columns.end(), [&](const auto & column) { + return left.plan->getStep()->getOutputStream().header.has(column); + }); + if (!all_contains) + { + LOG_WARNING( + logger, + "partition columns not found in AddRuntimeFilteres, required: {}, left output: {}", + fmt::join(partition_columns, ", "), + fmt::join(left.plan->getOutputNames(), ", ")); + break; + } } double selectivity; @@ -669,20 +683,27 @@ PlanNodePtr AddRuntimeFilters::AddExchange::visitJoinNode(JoinNode & node, std:: } // fixme: fix buffer step to remove this method -PlanNodePtr AddRuntimeFilters::AddExchange::visitCTERefNode(CTERefNode & node, std::unordered_set & need_exchange) +PlanNodePtr AddRuntimeFilters::AddExchange::visitBufferNode(BufferNode & node, std::unordered_set & need_exchange) { + auto res = SimplePlanRewriter::visitBufferNode(node, need_exchange); if (need_exchange.empty()) - return SimplePlanRewriter::visitPlanNode(node, need_exchange); + return res; return PlanNodeBase::createPlanNode( context->nextNodeId(), std::make_unique( - DataStreams{node.getCurrentDataStream()}, + DataStreams{res->getCurrentDataStream()}, ExchangeMode::LOCAL_NO_NEED_REPARTITION, Partitioning{Partitioning::Handle::FIXED_ARBITRARY}, context->getSettingsRef().enable_shuffle_with_order), - PlanNodes{node.shared_from_this()}, - node.getStatistics()); + PlanNodes{res}, + res->getStatistics()); +} + +PlanNodePtr AddRuntimeFilters::AddExchange::visitCTERefNode(CTERefNode & node, std::unordered_set &) +{ + std::unordered_set need_exchange; + return SimplePlanRewriter::visitCTERefNode(node, need_exchange); } PlanNodePtr AddRuntimeFilters::AddExchange::visitFilterNode(FilterNode & node, std::unordered_set & need_exchange) diff --git a/src/Optimizer/Rewriter/AddRuntimeFilters.h b/src/Optimizer/Rewriter/AddRuntimeFilters.h index d405cca689b..671fb02dfa5 100644 --- a/src/Optimizer/Rewriter/AddRuntimeFilters.h +++ b/src/Optimizer/Rewriter/AddRuntimeFilters.h @@ -70,6 +70,7 @@ class AddRuntimeFilters::AddRuntimeFilterRewriter : public PlanNodeVisitor cte_helper; + Poco::Logger * logger = &Poco::Logger::get("AddRuntimeFilters"); }; struct RuntimeFilterContext @@ -164,6 +165,7 @@ class AddRuntimeFilters::AddExchange : public SimplePlanRewriter &) override; PlanNodePtr visitJoinNode(JoinNode & node, std::unordered_set &) override; PlanNodePtr visitCTERefNode(CTERefNode & node, std::unordered_set &) override; + PlanNodePtr visitBufferNode(BufferNode & node, std::unordered_set &) override; }; } diff --git a/src/Optimizer/Rewriter/ColumnPruning.cpp b/src/Optimizer/Rewriter/ColumnPruning.cpp index 67f3d4ab52c..56bba602d5d 100644 --- a/src/Optimizer/Rewriter/ColumnPruning.cpp +++ b/src/Optimizer/Rewriter/ColumnPruning.cpp @@ -462,10 +462,19 @@ PlanNodePtr ColumnPruningVisitor::visitExpandNode(ExpandNode & node, ColumnPruni ColumnPruningContext child_column_pruning_context{.name_set = child_require}; auto child = VisitorUtil::accept(node.getChildren()[0], *this, child_column_pruning_context); + Assignments assignments; + for (const auto & assignment : step->getAssignments()) + if (child_require.contains(assignment.first)) + assignments.emplace_back(assignment.first, assignment.second); + NameToType name_to_type; + for (const auto & item : step->getNameToType()) + if (child_require.contains(item.first)) + name_to_type.emplace(item.first, item.second); + auto expr_step = std::make_shared( child->getStep()->getOutputStream(), - step->getAssignments(), - step->getNameToType(), + assignments, + name_to_type, step->getGroupIdSymbol(), step->getGroupIdValue(), step->getGroupIdNonNullSymbol()); diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp index 2711b30f800..317e640e266 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp @@ -149,11 +149,11 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan for (const auto & input_column : input) { DataTypePtr type = input_column.type; - type = JoinCommon::tryConvertTypeToNullable(type); + // type = JoinCommon::tryConvertTypeToNullable(type); name_type[input_column.name] = type; assignments.emplace( input_column.name, - makeASTFunction("cast", std::make_shared(Field()), std::make_shared(type->getName()))); + makeASTFunction("cast", std::make_shared(type->getDefault()), std::make_shared(type->getName()))); } /// append a extra mark field : group_id. diff --git a/src/Optimizer/Rule/Rewrite/SingleDistinctAggregationToGroupBy.cpp b/src/Optimizer/Rule/Rewrite/SingleDistinctAggregationToGroupBy.cpp index 657ea828568..deb86faa6d5 100644 --- a/src/Optimizer/Rule/Rewrite/SingleDistinctAggregationToGroupBy.cpp +++ b/src/Optimizer/Rule/Rewrite/SingleDistinctAggregationToGroupBy.cpp @@ -82,14 +82,19 @@ TransformResult SingleDistinctAggregationToGroupBy::transformImpl(PlanNodePtr no const auto & step = dynamic_cast(*step_ptr); // insert a extra Group-by Aggregate, perform distinct operation - auto symbols = step.getAggregates()[0].argument_names; - auto group_by = step.getKeys(); - symbols.insert(symbols.begin(), group_by.begin(), group_by.end()); + NameSet distinct_keys; + Names keys; + for (const auto & symbol : step.getKeys()) + if (distinct_keys.emplace(symbol).second) + keys.emplace_back(symbol); + for (const auto & symbol : step.getAggregates()[0].argument_names) + if (distinct_keys.emplace(symbol).second) + keys.emplace_back(symbol); AggregateDescriptions aggregate_descriptions; auto group_by_step = std::make_shared( node->getChildren()[0]->getStep()->getOutputStream(), - symbols, + keys, step.getKeysNotHashed(), aggregate_descriptions, GroupingSetsParamsList{}, @@ -110,7 +115,7 @@ TransformResult SingleDistinctAggregationToGroupBy::transformImpl(PlanNodePtr no auto remove_distinct_agg_step = std::make_shared( group_by_node->getStep()->getOutputStream(), - group_by, + step.getKeys(), step.getKeysNotHashed(), remove_distinct_agg_descs, GroupingSetsParamsList{}, diff --git a/src/QueryPlan/AggregatingStep.cpp b/src/QueryPlan/AggregatingStep.cpp index d58c3760444..4ba584d78e2 100644 --- a/src/QueryPlan/AggregatingStep.cpp +++ b/src/QueryPlan/AggregatingStep.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -41,11 +42,12 @@ #include #include #include -#include -#include "Core/SettingsEnums.h" -#include +#include #include -#include +#include + +#include +#include namespace DB { @@ -232,11 +234,11 @@ AggregatingStep::createParams(Block header_before_aggregation, AggregateDescript return Aggregator::Params( - header_before_aggregation, keys, aggregates, overflow_row, 0, OverflowMode::THROW, - 0, - 0, - 0, - false, + header_before_aggregation, keys, aggregates, overflow_row, 0, OverflowMode::THROW, + 0, + 0, + 0, + false, 10485760, false, nullptr, 0, 0, false, 0); } @@ -322,6 +324,15 @@ AggregatingStep::AggregatingStep( , no_shuffle(no_shuffle_) { + NameSet output_names; + for (const auto & key : keys) + if (!output_names.emplace(key).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "duplicate group by key: {}", key); + + for (const auto & aggregate : params.aggregates) + if (!output_names.emplace(aggregate.column_name).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "duplicate aggreagte function output name: {}", aggregate.column_name); + // final = final && !totals && !cube & !rollup; setInputStreams(input_streams); } diff --git a/src/QueryPlan/ExpandStep.cpp b/src/QueryPlan/ExpandStep.cpp index 5e43cc6f009..d3969849507 100644 --- a/src/QueryPlan/ExpandStep.cpp +++ b/src/QueryPlan/ExpandStep.cpp @@ -42,6 +42,8 @@ ExpandStep::ExpandStep( { if (unlikely(!name_to_type[item.first])) throw Exception(ErrorCodes::LOGICAL_ERROR, "ExpandStep miss type info for column " + item.first); + if (unlikely(!input_stream_.header.has(item.first))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ExpandStep miss input column " + item.first); output_stream->header.insert(ColumnWithTypeAndName{name_to_type[item.first], item.first}); } auto group_id_symbol_type = std::make_shared(); @@ -54,9 +56,7 @@ void ExpandStep::setInputStreams(const DataStreams & input_streams_) Block block; for (auto & input : input_streams[0].header) - { - block.insert(ColumnWithTypeAndName{JoinCommon::tryConvertTypeToNullable(input.type), input.name}); - } + block.insert(ColumnWithTypeAndName{input.type, input.name}); output_stream->header = block; auto group_id_symbol_type = std::make_shared(); output_stream->header.insert(ColumnWithTypeAndName{group_id_symbol_type, group_id_symbol}); diff --git a/src/QueryPlan/MergingAggregatedStep.cpp b/src/QueryPlan/MergingAggregatedStep.cpp index 7154e3b2550..f1c9fee281d 100644 --- a/src/QueryPlan/MergingAggregatedStep.cpp +++ b/src/QueryPlan/MergingAggregatedStep.cpp @@ -86,6 +86,15 @@ MergingAggregatedStep::MergingAggregatedStep( , memory_efficient_merge_threads(memory_efficient_merge_threads_) , should_produce_results_in_order_of_bucket_number(!(params->final) && memory_efficient_aggregation) { + NameSet output_names; + for (const auto & key : keys) + if (!output_names.emplace(key).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "duplicate group by key: {}", key); + + for (const auto & aggregate : params->params.aggregates) + if (!output_names.emplace(aggregate.column_name).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "duplicate aggreagte function output name: {}", aggregate.column_name); + /// Aggregation keys are distinct for (auto key : params->params.keys) output_stream->distinct_columns.insert(params->params.intermediate_header.getByPosition(key).name); diff --git a/src/QueryPlan/tests/gtest_protobuf.cpp b/src/QueryPlan/tests/gtest_protobuf.cpp index d9f64922928..a3bdab48472 100644 --- a/src/QueryPlan/tests/gtest_protobuf.cpp +++ b/src/QueryPlan/tests/gtest_protobuf.cpp @@ -896,9 +896,10 @@ TEST_F(ProtobufTest, MergingAggregatedStep) auto step = [&eng] { std::string step_description = fmt::format("description {}", eng() % 100); auto base_input_stream = generateDataStream(eng); - Names keys; + NameSet distinct_keys; for (int i = 0; i < 10; ++i) - keys.emplace_back(fmt::format("text{}", eng() % 100)); + distinct_keys.emplace(fmt::format("text{}", eng() % 100)); + Names keys{distinct_keys.begin(), distinct_keys.end()}; GroupingSetsParamsList grouping_sets_params; for (int i = 0; i < 2; ++i) grouping_sets_params.emplace_back(generateGroupingSetsParams(eng)); @@ -941,9 +942,10 @@ TEST_F(ProtobufTest, AggregatingStep) auto step = [&eng] { std::string step_description = fmt::format("description {}", eng() % 100); auto base_input_stream = generateDataStream(eng); - Names keys; + NameSet distinct_keys; for (int i = 0; i < 10; ++i) - keys.emplace_back(fmt::format("text{}", eng() % 100)); + distinct_keys.emplace(fmt::format("text{}", eng() % 100)); + Names keys{distinct_keys.begin(), distinct_keys.end()}; NameSet keys_not_hashed; for (int i = 0; i < 10; ++i) keys_not_hashed.emplace(fmt::format("text{}", eng() % 100)); @@ -1122,7 +1124,7 @@ TEST_F(ProtobufTest, ReadStorageRowCountStep) auto base_output_header = generateBlock(eng); auto storage_id = test_storage_ids[eng() % 3]; auto query = generateAST(eng); - auto agg_desc = generateAggregateDescription(eng); + auto agg_desc = generateAggregateDescription(eng, 0); auto num_rows = eng() % 1000; auto is_final_agg = false; auto s = std::make_shared(base_output_header, query, agg_desc, num_rows, is_final_agg); diff --git a/tests/optimizers/tpcds/explains/tpcds100/q1.explain b/tests/optimizers/tpcds/explains/tpcds100/q1.explain index 2ac33ce653b..fbcc41bcf04 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q1.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q1.explain @@ -50,7 +50,8 @@ Projection Est. 100 rows │ │ Where: d_year = 2000 │ │ Outputs: d_date_sk_1:=d_date_sk, d_year_1:=d_year │ └─ Broadcast Exchange Est. 45 rows - │ └─ CTERef[1] Est. 45 rows + │ └─ Buffer Est. 45 rows + │ └─ CTERef[1] Est. 45 rows └─ Broadcast Exchange Est. 45 rows └─ Inner Join Est. 45 rows │ Condition: sr_store_sk_4 == s_store_sk @@ -89,10 +90,12 @@ Projection Est. 100 rows │ │ Where: d_year = 2000 │ │ Outputs: d_date_sk_2:=d_date_sk, d_year_2:=d_year │ └─ Broadcast Exchange Est. 45 rows - │ └─ CTERef[1] Est. 45 rows + │ └─ Buffer Est. 45 rows + │ └─ CTERef[1] Est. 45 rows └─ Repartition Exchange Est. 45 rows │ Partition by: {s_store_sk} - └─ CTERef[1] Est. 45 rows + └─ Buffer Est. 45 rows + └─ CTERef[1] Est. 45 rows CTEDef [1] Projection Est. 45 rows │ Expressions: [s_store_sk] diff --git a/tests/optimizers/tpcds/explains/tpcds100/q14.explain b/tests/optimizers/tpcds/explains/tpcds100/q14.explain index c0886cf066d..662ab70aa09 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q14.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q14.explain @@ -56,9 +56,11 @@ Projection Est. 100 rows │ │ │ │ Outputs: [d_date_sk, d_year, d_moy] │ │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ │ Outputs: [i_item_sk, i_brand_id, i_class_id, i_category_id] - │ │ └─ CTERef[0] Est. 4230034 rows + │ │ └─ Buffer Est. 4230034 rows + │ │ └─ CTERef[0] Est. 4230034 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows ├─ Aggregating Est. 13880 rows │ │ Group by: {expr#'catalog', i_brand_id_7, i_class_id_7, i_category_id_7} │ │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(cs_quantity, cs_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_1) @@ -101,7 +103,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ Outputs: i_item_sk_6:=i_item_sk, i_brand_id_7:=i_brand_id, i_class_id_7:=i_class_id, i_category_id_7:=i_category_id │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows └─ Aggregating Est. 13880 rows │ Group by: {expr#'web', i_brand_id_8, i_class_id_8, i_category_id_8} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(ws_quantity, ws_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_2) @@ -144,7 +147,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds100.item Est. 204000 rows │ Outputs: i_item_sk_8:=i_item_sk, i_brand_id_8:=i_brand_id, i_class_id_8:=i_class_id, i_category_id_8:=i_category_id └─ Broadcast Exchange Est. 1 rows - └─ CTERef[2] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[2] Est. 1 rows CTEDef [0] Repartition Exchange Est. 4230034 rows │ Partition by: {i_item_sk_1} @@ -361,7 +365,8 @@ Projection Est. 100 rows │ │ Where: (d_year = cast(2000, 'UInt32')) AND (d_moy = 12) AND (d_dom = 11) │ │ Outputs: [d_dom], d_week_seq_1:=d_week_seq, d_year_30:=d_year, d_moy_3:=d_moy │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[3] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[3] Est. 1 rows └─ Projection Est. 771 rows │ Expressions: [expr#count()_5, expr#sum(multiply(ss_quantity, ss_list_price))_3, i_brand_id_29, i_category_id_29, i_class_id_29], expr#'store'_3:='store' └─ Inner Join Est. 771 rows @@ -413,7 +418,8 @@ Projection Est. 100 rows │ Where: (d_year = 1999) AND (d_moy = 12) AND (d_dom = 11) │ Outputs: d_week_seq_4:=d_week_seq, d_year_37:=d_year, d_moy_4:=d_moy, d_dom_1:=d_dom └─ Broadcast Exchange Est. 1 rows - └─ CTERef[3] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[3] Est. 1 rows CTEDef [0] Repartition Exchange Est. 4230034 rows │ Partition by: {i_item_sk_18} diff --git a/tests/optimizers/tpcds/explains/tpcds100/q23.explain b/tests/optimizers/tpcds/explains/tpcds100/q23.explain index 003daf47764..90a7db1d2a2 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q23.explain @@ -21,8 +21,7 @@ Projection Est. 1 rows │ │ │ Runtime Filters Builder: {cs_item_sk} │ │ ├─ Filter Est. 14386027 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_1} - │ │ │ └─ Local Exchange Est. 57544111 rows - │ │ │ └─ CTERef[0] Est. 57544111 rows + │ │ │ └─ CTERef[0] Est. 57544111 rows │ │ └─ Inner Join Est. 2196033 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk │ │ │ Runtime Filters Builder: {d_date_sk} @@ -39,7 +38,8 @@ Projection Est. 1 rows │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows │ │ Where: (d_year = 2000) AND (d_moy = 2) │ │ Outputs: [d_date_sk, d_year, d_moy] - │ └─ CTERef[1] Est. 996434 rows + │ └─ Buffer Est. 996434 rows + │ └─ CTERef[1] Est. 996434 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)) @@ -55,8 +55,7 @@ Projection Est. 1 rows │ │ Runtime Filters Builder: {ws_item_sk} │ ├─ Filter Est. 14386027 rows │ │ │ Condition: Runtime Filters: {i_item_sk_2} - │ │ └─ Local Exchange Est. 57544111 rows - │ │ └─ CTERef[0] Est. 57544111 rows + │ │ └─ CTERef[0] Est. 57544111 rows │ └─ Inner Join Est. 1105888 rows │ │ Condition: ws_sold_date_sk == d_date_sk_3 │ │ Runtime Filters Builder: {d_date_sk_3} @@ -74,7 +73,8 @@ Projection Est. 1 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_3:=d_date_sk, d_year_3:=d_year, d_moy_1:=d_moy └─ Local Exchange Est. 996434 rows - └─ CTERef[1] Est. 996434 rows + └─ Buffer Est. 996434 rows + └─ CTERef[1] Est. 996434 rows CTEDef [0] Projection Est. 57544111 rows │ Expressions: i_item_sk:=ss_item_sk @@ -193,7 +193,8 @@ Projection Est. 100 rows │ │ ├─ Filter Est. 14386027 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} │ │ │ └─ Local Exchange Est. 57544111 rows - │ │ │ └─ CTERef[0] Est. 57544111 rows + │ │ │ └─ Buffer Est. 57544111 rows + │ │ │ └─ CTERef[0] Est. 57544111 rows │ │ └─ Inner Join Est. 2196033 rows │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 │ │ │ Runtime Filters Builder: {d_date_sk_10} @@ -210,16 +211,17 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows │ │ Where: (d_moy = 2) AND (d_year = 2000) │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ CTERef[1] Est. 996434 rows + │ └─ Buffer Est. 996434 rows + │ └─ CTERef[1] Est. 996434 rows └─ MergingAggregated Est. 996434 rows - └─ Repartition Exchange Est. 996434 rows + └─ Repartition Exchange Est. 999986 rows │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 996434 rows + └─ Aggregating Est. 999986 rows │ Group by: {c_last_name_1, c_first_name_1} │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 996434 rows + └─ Projection Est. 999986 rows │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner Join Est. 996434 rows + └─ Inner Join Est. 999986 rows │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 │ Runtime Filters Builder: {ws_bill_customer_sk_1} ├─ Repartition Exchange Est. 2000000 rows @@ -240,7 +242,8 @@ Projection Est. 100 rows │ ├─ Filter Est. 14386027 rows │ │ │ Condition: Runtime Filters: {i_item_sk_7} │ │ └─ Local Exchange Est. 57544111 rows - │ │ └─ CTERef[0] Est. 57544111 rows + │ │ └─ Buffer Est. 57544111 rows + │ │ └─ CTERef[0] Est. 57544111 rows │ └─ Inner Join Est. 1105888 rows │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 │ │ Runtime Filters Builder: {d_date_sk_13} @@ -257,7 +260,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds100.date_dim Est. 73049 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ CTERef[1] Est. 996434 rows + └─ Buffer Est. 996434 rows + └─ CTERef[1] Est. 996434 rows CTEDef [0] Projection Est. 57544111 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds100/q24.explain b/tests/optimizers/tpcds/explains/tpcds100/q24.explain index fabc6be1504..db13252a4cb 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q24.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q24.explain @@ -32,12 +32,12 @@ Projection Est. 1360 rows └─ CTERef[0] Est. 19789831 rows CTEDef [0] MergingAggregated Est. 19789831 rows - └─ Repartition Exchange Est. 19789831 rows + └─ Repartition Exchange Est. 19790089 rows │ Partition by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - └─ Aggregating Est. 19789831 rows + └─ Aggregating Est. 19790089 rows │ Group by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} │ Aggregates: expr#sum(ss_net_paid):=AggNull(sum)(ss_net_paid) - └─ Inner Join Est. 19789831 rows + └─ Inner Join Est. 19790089 rows │ Condition: s_zip == ca_zip, ss_customer_sk == c_customer_sk ├─ Repartition Exchange Est. 49474579 rows │ │ Partition by: {s_zip, ss_customer_sk} @@ -114,12 +114,12 @@ Projection Est. 1360 rows └─ CTERef[0] Est. 19789831 rows CTEDef [0] MergingAggregated Est. 19789831 rows - └─ Repartition Exchange Est. 19789831 rows + └─ Repartition Exchange Est. 19790089 rows │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - └─ Aggregating Est. 19789831 rows + └─ Aggregating Est. 19790089 rows │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) - └─ Inner Join Est. 19789831 rows + └─ Inner Join Est. 19790089 rows │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 ├─ Repartition Exchange Est. 49474579 rows │ │ Partition by: {s_zip_3, ss_customer_sk_5} diff --git a/tests/optimizers/tpcds/explains/tpcds100/q31.explain b/tests/optimizers/tpcds/explains/tpcds100/q31.explain index 16b683bb01b..aa82bcd3a8d 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q31.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q31.explain @@ -5,9 +5,9 @@ Projection Est. 59469 rows └─ Gather Exchange Est. 59469 rows └─ Sorting Est. 59469 rows │ Order by: {ca_county_3 ASC NULLS LAST} - └─ Projection Est. 59469 rows + └─ Projection Est. 59472 rows │ Expressions: [ca_county_3, d_year_1], expr#divide(ss2.store_sales, ss1.store_sales):=`expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, expr#divide(ss3.store_sales, ss2.store_sales):=`expr#sum(ss_ext_sales_price)_3` / `expr#sum(ss_ext_sales_price)_2`, expr#divide(ws2.web_sales, ws1.web_sales):=`expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, expr#divide(ws3.web_sales, ws2.web_sales):=`expr#sum(ws_ext_sales_price)_3` / `expr#sum(ws_ext_sales_price)_2` - └─ Inner Join Est. 59469 rows + └─ Inner Join Est. 59472 rows │ Condition: ca_county_3 == ca_county_1 │ Filter: multiIf(`expr#sum(ws_ext_sales_price)_1` > 0, `expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, NULL) > multiIf(`expr#sum(ss_ext_sales_price)_1` > 0, `expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, NULL) ├─ Inner Join Est. 16319 rows diff --git a/tests/optimizers/tpcds/explains/tpcds100/q33.explain b/tests/optimizers/tpcds/explains/tpcds100/q33.explain index 535da469833..a2df9a18564 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q33.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q33.explain @@ -66,7 +66,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: [i_item_sk, i_manufact_id] - │ └─ CTERef[1] Est. 20434 rows + │ └─ Buffer Est. 20434 rows + │ └─ CTERef[1] Est. 20434 rows ├─ MergingAggregated Est. 100 rows │ └─ Repartition Exchange Est. 100 rows │ │ Partition by: {i_manufact_id_2} @@ -119,7 +120,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_manufact_id_2:=i_manufact_id - │ └─ CTERef[1] Est. 20434 rows + │ └─ Buffer Est. 20434 rows + │ └─ CTERef[1] Est. 20434 rows └─ MergingAggregated Est. 100 rows └─ Repartition Exchange Est. 100 rows │ Partition by: {i_manufact_id_4} @@ -172,7 +174,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds100.item Est. 204000 rows │ Where: Runtime Filters: {i_manufact_id} │ Outputs: i_item_sk_2:=i_item_sk, i_manufact_id_4:=i_manufact_id - └─ CTERef[1] Est. 20434 rows + └─ Buffer Est. 20434 rows + └─ CTERef[1] Est. 20434 rows CTEDef [1] Repartition Exchange Est. 20434 rows │ Partition by: {i_manufact_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds100/q54.explain b/tests/optimizers/tpcds/explains/tpcds100/q54.explain index 8c667b21658..01802a77893 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q54.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q54.explain @@ -29,7 +29,7 @@ Projection Est. 100 rows │ Condition: ss_sold_date_sk == d_date_sk_1 ├─ Repartition Exchange Est. 71576 rows │ │ Partition by: {ss_sold_date_sk} - │ └─ Inner Join Est. 71576 rows + │ └─ Inner Join Est. 71678 rows │ │ Condition: ss_customer_sk == c_customer_sk │ │ Runtime Filters Builder: {c_customer_sk} │ ├─ Filter Est. 287997024 rows @@ -43,7 +43,7 @@ Projection Est. 100 rows │ │ Runtime Filters Builder: {s_county,s_state} │ ├─ Repartition Exchange Est. 4437 rows │ │ │ Partition by: {ca_county, ca_state} - │ │ └─ Inner Join Est. 4437 rows + │ │ └─ Inner Join Est. 4462 rows │ │ │ Condition: ca_address_sk == c_current_addr_sk │ │ │ Runtime Filters Builder: {c_current_addr_sk} │ │ ├─ Filter Est. 1000000 rows @@ -123,7 +123,8 @@ Projection Est. 100 rows │ │ Group by: {expr#plus(d_month_seq, 1)} │ └─ Projection Est. 30 rows │ │ Expressions: expr#plus(d_month_seq, 1):=d_month_seq_1 + 1 - │ └─ CTERef[1] Est. 30 rows + │ └─ Buffer Est. 30 rows + │ └─ CTERef[1] Est. 30 rows └─ Broadcast Exchange Est. 1 rows └─ EnforceSingleRow Est. 1 rows └─ Gather Exchange Est. 1 rows @@ -136,7 +137,8 @@ Projection Est. 100 rows │ Group by: {expr#plus(d_month_seq, 3)} └─ Projection Est. 30 rows │ Expressions: expr#plus(d_month_seq, 3):=d_month_seq_2 + 3 - └─ CTERef[1] Est. 30 rows + └─ Buffer Est. 30 rows + └─ CTERef[1] Est. 30 rows CTEDef [1] Projection Est. 30 rows │ Expressions: [d_month_seq_1] diff --git a/tests/optimizers/tpcds/explains/tpcds100/q56.explain b/tests/optimizers/tpcds/explains/tpcds100/q56.explain index f55a4d9a73d..217276597e0 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q56.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q56.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 2748 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner Join Est. 86776 rows + │ └─ Inner Join Est. 237959 rows │ │ Condition: ca_address_sk == ss_addr_sk │ │ Runtime Filters Builder: {ss_addr_sk} │ ├─ Repartition Exchange Est. 364761 rows @@ -64,14 +64,15 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: [i_item_sk, i_item_id] - │ └─ CTERef[1] Est. 5491 rows + │ └─ Buffer Est. 5491 rows + │ └─ CTERef[1] Est. 5491 rows ├─ MergingAggregated Est. 2748 rows │ └─ Repartition Exchange Est. 2748 rows │ │ Partition by: {i_item_id_2} │ └─ Aggregating Est. 2748 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 43080 rows + │ └─ Inner Join Est. 118136 rows │ │ Condition: ca_address_sk_1 == cs_bill_addr_sk │ │ Runtime Filters Builder: {cs_bill_addr_sk} │ ├─ Repartition Exchange Est. 364761 rows @@ -115,14 +116,15 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ └─ CTERef[1] Est. 5491 rows + │ └─ Buffer Est. 5491 rows + │ └─ CTERef[1] Est. 5491 rows └─ MergingAggregated Est. 2748 rows └─ Repartition Exchange Est. 2748 rows │ Partition by: {i_item_id_4} └─ Aggregating Est. 2748 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 21707 rows + └─ Inner Join Est. 59491 rows │ Condition: ca_address_sk_2 == ws_bill_addr_sk │ Runtime Filters Builder: {ws_bill_addr_sk} ├─ Repartition Exchange Est. 364761 rows @@ -166,7 +168,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds100.item Est. 204000 rows │ Where: Runtime Filters: {i_item_id} │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - └─ CTERef[1] Est. 5491 rows + └─ Buffer Est. 5491 rows + └─ CTERef[1] Est. 5491 rows CTEDef [1] Repartition Exchange Est. 5491 rows │ Partition by: {i_item_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds100/q58.explain b/tests/optimizers/tpcds/explains/tpcds100/q58.explain index f41ef4e2079..4bf1d13b647 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q58.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q58.explain @@ -21,7 +21,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 102098 rows │ │ Group by: {i_item_id_1} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 469555 rows + │ └─ Inner Join Est. 470578 rows │ │ Condition: cs_item_sk == i_item_sk_1 │ ├─ Inner Join Est. 470578 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk_1 @@ -41,7 +41,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 204000 rows │ │ Condition: Runtime Filters: {i_item_id_1} │ └─ TableScan tpcds100.item Est. 204000 rows @@ -57,7 +58,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 102098 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner Join Est. 945803 rows + │ └─ Inner Join Est. 947878 rows │ │ Condition: ss_item_sk == i_item_sk │ ├─ Inner Join Est. 947878 rows │ │ │ Condition: ss_sold_date_sk == d_date_sk @@ -77,7 +78,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 204000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds100.item Est. 204000 rows @@ -89,7 +91,7 @@ Projection Est. 100 rows └─ Aggregating Est. 102098 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 236457 rows + └─ Inner Join Est. 236976 rows │ Condition: ws_item_sk == i_item_sk_2 ├─ Inner Join Est. 236976 rows │ │ Condition: ws_sold_date_sk == d_date_sk_2 @@ -109,7 +111,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange Est. 6 rows - │ └─ CTERef[1] Est. 6 rows + │ └─ Buffer Est. 6 rows + │ └─ CTERef[1] Est. 6 rows └─ TableScan tpcds100.item Est. 204000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds100/q59.explain b/tests/optimizers/tpcds/explains/tpcds100/q59.explain index e709918f547..3f7a66eaa29 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q59.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q59.explain @@ -52,14 +52,16 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_week_seq_5:=d_week_seq, d_day_name_1:=d_day_name │ │ │ └─ Broadcast Exchange Est. 333 rows - │ │ │ └─ CTERef[1] Est. 333 rows + │ │ │ └─ Buffer Est. 333 rows + │ │ │ └─ CTERef[1] Est. 333 rows │ │ └─ Repartition Exchange Est. 402 rows │ │ │ Partition by: {s_store_sk} │ │ └─ TableScan tpcds100.store Est. 402 rows │ │ Outputs: [s_store_sk, s_store_id, s_store_name] │ └─ Repartition Exchange Est. 333 rows │ │ Partition by: {d_week_seq_2} - │ └─ CTERef[1] Est. 333 rows + │ └─ Buffer Est. 333 rows + │ └─ CTERef[1] Est. 333 rows └─ Repartition Exchange Est. 60539 rows │ Partition by: {expr#cast(minus(d_week_seq_3, 52), 'Int32'), s_store_id_1} └─ Projection Est. 60539 rows @@ -99,14 +101,16 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: d_date_sk_2:=d_date_sk, d_week_seq_6:=d_week_seq, d_day_name_2:=d_day_name │ │ └─ Broadcast Exchange Est. 333 rows - │ │ └─ CTERef[2] Est. 333 rows + │ │ └─ Buffer Est. 333 rows + │ │ └─ CTERef[2] Est. 333 rows │ └─ Repartition Exchange Est. 402 rows │ │ Partition by: {s_store_sk_1} │ └─ TableScan tpcds100.store Est. 402 rows │ Outputs: s_store_sk_1:=s_store_sk, s_store_id_1:=s_store_id └─ Repartition Exchange Est. 333 rows │ Partition by: {d_week_seq_4} - └─ CTERef[2] Est. 333 rows + └─ Buffer Est. 333 rows + └─ CTERef[2] Est. 333 rows CTEDef [1] Projection Est. 333 rows │ Expressions: [d_week_seq_2] diff --git a/tests/optimizers/tpcds/explains/tpcds100/q60.explain b/tests/optimizers/tpcds/explains/tpcds100/q60.explain index 8b4eb6d6cc6..a391342c87e 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q60.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q60.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 10187 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner Join Est. 333183 rows + │ └─ Inner Join Est. 364761 rows │ │ Condition: ss_addr_sk == ca_address_sk │ │ Runtime Filters Builder: {ca_address_sk} │ ├─ Repartition Exchange Est. 913676 rows @@ -55,7 +55,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] Est. 20356 rows + │ │ └─ Buffer Est. 20356 rows + │ │ └─ CTERef[1] Est. 20356 rows │ └─ Repartition Exchange Est. 364761 rows │ │ Partition by: {ca_address_sk} │ └─ Projection Est. 364761 rows @@ -71,7 +72,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 10187 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 165410 rows + │ └─ Inner Join Est. 364761 rows │ │ Condition: cs_bill_addr_sk == ca_address_sk_1 │ │ Runtime Filters Builder: {ca_address_sk_1} │ ├─ Repartition Exchange Est. 453598 rows @@ -109,7 +110,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds100.item Est. 204000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ │ └─ CTERef[1] Est. 20356 rows + │ │ └─ Buffer Est. 20356 rows + │ │ └─ CTERef[1] Est. 20356 rows │ └─ Repartition Exchange Est. 364761 rows │ │ Partition by: {ca_address_sk_1} │ └─ Projection Est. 364761 rows @@ -125,7 +127,7 @@ Projection Est. 100 rows └─ Aggregating Est. 10187 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 83347 rows + └─ Inner Join Est. 228425 rows │ Condition: ca_address_sk_2 == ws_bill_addr_sk │ Runtime Filters Builder: {ws_bill_addr_sk} ├─ Repartition Exchange Est. 364761 rows @@ -172,7 +174,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds100.item Est. 204000 rows │ Where: Runtime Filters: {i_item_id} │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - └─ CTERef[1] Est. 20356 rows + └─ Buffer Est. 20356 rows + └─ CTERef[1] Est. 20356 rows CTEDef [1] Repartition Exchange Est. 20356 rows │ Partition by: {i_item_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds100/q64.explain b/tests/optimizers/tpcds/explains/tpcds100/q64.explain index 11e8f084c53..8e4f5ba24a4 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q64.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q64.explain @@ -24,13 +24,13 @@ CTEDef [0] Projection Est. 74517 rows │ Expressions: [ca_city, ca_city_1, ca_street_name, ca_street_name_1, ca_street_number, ca_street_number_1, ca_zip, ca_zip_1, d_year, expr#count(), expr#sum(ss_coupon_amt), expr#sum(ss_list_price), expr#sum(ss_wholesale_cost), i_product_name, s_store_name, s_zip], i_item_sk:=ss_item_sk └─ MergingAggregated Est. 74517 rows - └─ Repartition Exchange Est. 74517 rows + └─ Repartition Exchange Est. 37488 rows │ Partition by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} - └─ Aggregating Est. 74517 rows + └─ Aggregating Est. 37488 rows │ Group by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} │ Group by keys not hashed: {i_product_name} │ Aggregates: expr#count():=AggNull(count)(), expr#sum(ss_wholesale_cost):=AggNull(sum)(ss_wholesale_cost), expr#sum(ss_list_price):=AggNull(sum)(ss_list_price), expr#sum(ss_coupon_amt):=AggNull(sum)(ss_coupon_amt) - └─ Inner Join Est. 74517 rows + └─ Inner Join Est. 37488 rows │ Condition: ca_address_sk_1 == c_current_addr_sk │ Runtime Filters Builder: {c_current_addr_sk} ├─ Repartition Exchange Est. 1000000 rows @@ -69,7 +69,7 @@ CTEDef [0] │ │ │ │ Condition: c_current_hdemo_sk == hd_demo_sk_1 │ │ │ ├─ Repartition Exchange Est. 74778 rows │ │ │ │ │ Partition by: {c_current_hdemo_sk} - │ │ │ │ └─ Inner Join Est. 74778 rows + │ │ │ │ └─ Inner Join Est. 74712 rows │ │ │ │ │ Condition: c_customer_sk == ss_customer_sk │ │ │ │ │ Runtime Filters Builder: {ss_customer_sk} │ │ │ │ ├─ Repartition Exchange Est. 2000000 rows diff --git a/tests/optimizers/tpcds/explains/tpcds100/q74.explain b/tests/optimizers/tpcds/explains/tpcds100/q74.explain index 86107b00e90..cd5b7e783d0 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q74.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q74.explain @@ -9,7 +9,7 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST} │ Limit: 100 - └─ Inner Join Est. 1242792 rows + └─ Inner Join Est. 1240490 rows │ Condition: ss_customer_sk_5 == c_customer_sk_2 │ Filter: multiIf(`expr#sum(ss_net_paid)_4` > 0, `expr#sum(ss_net_paid)_5` / `expr#sum(ss_net_paid)_4`, NULL) > multiIf(`expr#sum(ss_net_paid)_2` > 0, `expr#sum(ss_net_paid)_3` / `expr#sum(ss_net_paid)_2`, NULL) ├─ Inner Join Est. 807110 rows @@ -28,7 +28,7 @@ Projection Est. 100 rows │ └─ Local Exchange Est. 1749187 rows │ └─ Buffer Est. 1749187 rows │ └─ CTERef[0] Est. 1749187 rows - └─ Inner Join Est. 767156 rows + └─ Inner Join Est. 765735 rows │ Condition: ss_customer_sk_3 == c_customer_sk_2 ├─ Projection Est. 437296 rows │ │ Expressions: [expr#sum(ss_net_paid)_3, ss_customer_sk_3] diff --git a/tests/optimizers/tpcds/explains/tpcds100/q83.explain b/tests/optimizers/tpcds/explains/tpcds100/q83.explain index 25cb9196259..ccc9972d354 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q83.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 102098 rows │ │ Group by: {i_item_id_1} │ │ Aggregates: expr#sum(cr_return_quantity):=AggNull(sum)(cr_return_quantity) - │ └─ Inner Join Est. 136858 rows + │ └─ Inner Join Est. 137224 rows │ │ Condition: i_item_sk_1 == cr_item_sk │ ├─ Filter Est. 204000 rows │ │ │ Condition: Runtime Filters: {i_item_id_1} @@ -54,7 +54,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_week_seq} │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq │ └─ Broadcast Exchange Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 3 rows + │ └─ CTERef[1] Est. 3 rows └─ Inner Join Est. 65882 rows │ Condition: i_item_id == i_item_id_2 │ Runtime Filters Builder: {i_item_id_2} @@ -64,7 +65,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 102098 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(sr_return_quantity):=AggNull(sum)(sr_return_quantity) - │ └─ Inner Join Est. 286946 rows + │ └─ Inner Join Est. 287519 rows │ │ Condition: sr_item_sk == i_item_sk │ ├─ Inner Join Est. 287519 rows │ │ │ Condition: sr_returned_date_sk == d_date_sk @@ -93,19 +94,20 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: [d_week_seq], d_date_1:=d_date │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 3 rows + │ │ └─ CTERef[1] Est. 3 rows │ └─ Filter Est. 204000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds100.item Est. 204000 rows │ Where: Runtime Filters: {i_item_id} │ Outputs: [i_item_sk, i_item_id] └─ MergingAggregated Est. 65882 rows - └─ Repartition Exchange Est. 65882 rows + └─ Repartition Exchange Est. 66052 rows │ Partition by: {i_item_id_2} - └─ Aggregating Est. 65882 rows + └─ Aggregating Est. 66052 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(wr_return_quantity):=AggNull(sum)(wr_return_quantity) - └─ Inner Join Est. 65882 rows + └─ Inner Join Est. 66052 rows │ Condition: i_item_sk_2 == wr_item_sk │ Runtime Filters Builder: {wr_item_sk} ├─ Filter Est. 204000 rows @@ -140,7 +142,8 @@ Projection Est. 100 rows │ Where: Runtime Filters: {d_week_seq} │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq └─ Broadcast Exchange Est. 3 rows - └─ CTERef[1] Est. 3 rows + └─ Buffer Est. 3 rows + └─ CTERef[1] Est. 3 rows CTEDef [1] Repartition Exchange Est. 3 rows │ Partition by: {d_week_seq_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q1.explain b/tests/optimizers/tpcds/explains/tpcds1000/q1.explain index 11e1bd22dd0..81824c93adc 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q1.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q1.explain @@ -42,7 +42,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ │ Outputs: sr_returned_date_sk_1:=sr_returned_date_sk, sr_customer_sk_3:=sr_customer_sk, sr_store_sk_3:=sr_store_sk, sr_return_amt_1:=sr_return_amt │ │ └─ Broadcast Exchange Est. 41 rows - │ │ └─ CTERef[1] Est. 41 rows + │ │ └─ Buffer Est. 41 rows + │ │ └─ CTERef[1] Est. 41 rows │ └─ Broadcast Exchange Est. 366 rows │ └─ Projection Est. 366 rows │ │ Expressions: [d_date_sk_1] @@ -81,7 +82,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ │ Outputs: sr_returned_date_sk_2:=sr_returned_date_sk, sr_customer_sk_4:=sr_customer_sk, sr_store_sk_4:=sr_store_sk, sr_return_amt_2:=sr_return_amt │ │ └─ Broadcast Exchange Est. 41 rows - │ │ └─ CTERef[1] Est. 41 rows + │ │ └─ Buffer Est. 41 rows + │ │ └─ CTERef[1] Est. 41 rows │ └─ Broadcast Exchange Est. 366 rows │ └─ Projection Est. 366 rows │ │ Expressions: [d_date_sk_2] @@ -92,7 +94,8 @@ Projection Est. 100 rows │ Outputs: d_date_sk_2:=d_date_sk, d_year_2:=d_year └─ Repartition Exchange Est. 41 rows │ Partition by: {s_store_sk} - └─ CTERef[1] Est. 41 rows + └─ Buffer Est. 41 rows + └─ CTERef[1] Est. 41 rows CTEDef [1] Projection Est. 41 rows │ Expressions: [s_store_sk] diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q14.explain b/tests/optimizers/tpcds/explains/tpcds1000/q14.explain index 334623a2611..69d69cdec61 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q14.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q14.explain @@ -56,9 +56,11 @@ Projection Est. 100 rows │ │ │ │ Outputs: [d_date_sk, d_year, d_moy] │ │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ │ Outputs: [i_item_sk, i_brand_id, i_class_id, i_category_id] - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows ├─ Aggregating Est. 13880 rows │ │ Group by: {expr#'catalog', i_brand_id_7, i_class_id_7, i_category_id_7} │ │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(cs_quantity, cs_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_1) @@ -99,9 +101,11 @@ Projection Est. 100 rows │ │ │ │ Outputs: d_date_sk_7:=d_date_sk, d_year_7:=d_year, d_moy_1:=d_moy │ │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ │ Outputs: i_item_sk_6:=i_item_sk, i_brand_id_7:=i_brand_id, i_class_id_7:=i_class_id, i_category_id_7:=i_category_id - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows └─ Aggregating Est. 13880 rows │ Group by: {expr#'web', i_brand_id_8, i_class_id_8, i_category_id_8} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(ws_quantity, ws_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_2) @@ -142,9 +146,11 @@ Projection Est. 100 rows │ │ │ Outputs: d_date_sk_8:=d_date_sk, d_year_8:=d_year, d_moy_2:=d_moy │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ Outputs: i_item_sk_8:=i_item_sk, i_brand_id_8:=i_brand_id, i_class_id_8:=i_class_id, i_category_id_8:=i_category_id - │ └─ CTERef[0] Est. 6220638 rows + │ └─ Buffer Est. 6220638 rows + │ └─ CTERef[0] Est. 6220638 rows └─ Broadcast Exchange Est. 1 rows - └─ CTERef[2] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[2] Est. 1 rows CTEDef [0] Repartition Exchange Est. 6220638 rows │ Partition by: {i_item_sk_1} @@ -358,9 +364,11 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows │ │ │ Where: (d_year = cast(2000, 'UInt32')) AND (d_moy = 12) AND (d_dom = 11) │ │ │ Outputs: [d_dom], d_week_seq_1:=d_week_seq, d_year_30:=d_year, d_moy_3:=d_moy - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[3] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[3] Est. 1 rows └─ Projection Est. 771 rows │ Expressions: [expr#count()_5, expr#sum(multiply(ss_quantity, ss_list_price))_3, i_brand_id_29, i_category_id_29, i_class_id_29], expr#'store'_3:='store' └─ Inner Join Est. 771 rows @@ -409,9 +417,11 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows │ │ Where: (d_year = 1999) AND (d_moy = 12) AND (d_dom = 11) │ │ Outputs: d_week_seq_4:=d_week_seq, d_year_37:=d_year, d_moy_4:=d_moy, d_dom_1:=d_dom - │ └─ CTERef[0] Est. 6220638 rows + │ └─ Buffer Est. 6220638 rows + │ └─ CTERef[0] Est. 6220638 rows └─ Broadcast Exchange Est. 1 rows - └─ CTERef[3] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[3] Est. 1 rows CTEDef [0] Repartition Exchange Est. 6220638 rows │ Partition by: {i_item_sk_18} diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000/q23.explain index f76351520da..a2a5ddb4ef6 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q23.explain @@ -21,8 +21,7 @@ Projection Est. 1 rows │ │ │ Runtime Filters Builder: {cs_item_sk} │ │ ├─ Filter Est. 143861166 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_1} - │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows │ │ └─ Inner Join Est. 21960485 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk │ │ │ Runtime Filters Builder: {d_date_sk} @@ -39,7 +38,8 @@ Projection Est. 1 rows │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows │ │ Where: (d_year = 2000) AND (d_moy = 2) │ │ Outputs: [d_date_sk, d_year, d_moy] - │ └─ CTERef[1] Est. 5975354 rows + │ └─ Buffer Est. 5975354 rows + │ └─ CTERef[1] Est. 5975354 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)) @@ -55,8 +55,7 @@ Projection Est. 1 rows │ │ Runtime Filters Builder: {ws_item_sk} │ ├─ Filter Est. 143861166 rows │ │ │ Condition: Runtime Filters: {i_item_sk_2} - │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows │ └─ Inner Join Est. 11058700 rows │ │ Condition: ws_sold_date_sk == d_date_sk_3 │ │ Runtime Filters Builder: {d_date_sk_3} @@ -74,7 +73,8 @@ Projection Est. 1 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_3:=d_date_sk, d_year_3:=d_year, d_moy_1:=d_moy └─ Local Exchange Est. 5975354 rows - └─ CTERef[1] Est. 5975354 rows + └─ Buffer Est. 5975354 rows + └─ CTERef[1] Est. 5975354 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk:=ss_item_sk @@ -193,7 +193,8 @@ Projection Est. 100 rows │ │ ├─ Filter Est. 143861166 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ │ └─ Buffer Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows │ │ └─ Inner Join Est. 21960485 rows │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 │ │ │ Runtime Filters Builder: {d_date_sk_10} @@ -210,16 +211,17 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows │ │ Where: (d_moy = 2) AND (d_year = 2000) │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ CTERef[1] Est. 5975354 rows + │ └─ Buffer Est. 5975354 rows + │ └─ CTERef[1] Est. 5975354 rows └─ MergingAggregated Est. 5975354 rows - └─ Repartition Exchange Est. 5975354 rows + └─ Repartition Exchange Est. 5999999 rows │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 5975354 rows + └─ Aggregating Est. 5999999 rows │ Group by: {c_last_name_1, c_first_name_1} │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 5975354 rows + └─ Projection Est. 5999999 rows │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner (PARALLEL_HASH) Join Est. 5975354 rows + └─ Inner (PARALLEL_HASH) Join Est. 5999999 rows │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 │ Runtime Filters Builder: {ws_bill_customer_sk_1} ├─ Repartition Exchange Est. 12000000 rows @@ -240,7 +242,8 @@ Projection Est. 100 rows │ ├─ Filter Est. 143861166 rows │ │ │ Condition: Runtime Filters: {i_item_sk_7} │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ Buffer Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows │ └─ Inner Join Est. 11058700 rows │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 │ │ Runtime Filters Builder: {d_date_sk_13} @@ -257,7 +260,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000.date_dim Est. 73049 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ CTERef[1] Est. 5975354 rows + └─ Buffer Est. 5975354 rows + └─ CTERef[1] Est. 5975354 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q24.explain b/tests/optimizers/tpcds/explains/tpcds1000/q24.explain index 61f158c4203..a8ec5d403d8 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q24.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q24.explain @@ -1,25 +1,73 @@ Projection Est. 1360 rows -│ Expressions: c_first_name:=c_first_name_1, c_last_name:=c_last_name_1, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_1 +│ Expressions: c_first_name:=c_first_name_5, c_last_name:=c_last_name_5, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_5 └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Gather Exchange Est. 1360 rows └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Inner Join Est. 1360 rows │ Condition: │ Filter: `expr#sum(netpaid)` > `expr#multiply('0.05', avg(netpaid))` ├─ MergingAggregated Est. 2721 rows - │ └─ Repartition Exchange Est. 362812 rows - │ │ Partition by: {c_last_name_1, c_first_name_1, s_store_name_1} - │ └─ Aggregating Est. 362812 rows - │ │ Group by: {c_last_name_1, c_first_name_1, s_store_name_1} - │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_1) - │ └─ Projection Est. 46326129 rows - │ │ Expressions: [c_first_name_1, c_last_name_1, expr#sum(ss_net_paid)_1, s_store_name_1] - │ └─ Filter Est. 46326129 rows - │ │ Condition: i_color_1 = 'peach' - │ └─ Buffer Est. 185304517 rows - │ └─ CTERef[0] Est. 185304517 rows + │ └─ Repartition Exchange Est. 5807249 rows + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5} + │ └─ Aggregating Est. 5807249 rows + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5} + │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_5) + │ └─ MergingAggregated Est. 185304517 rows + │ └─ Repartition Exchange Est. 185304501 rows + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ └─ Aggregating Est. 185304501 rows + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) + │ └─ Inner (PARALLEL_HASH) Join Est. 185304501 rows + │ │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 + │ ├─ Repartition Exchange Est. 463261286 rows + │ │ │ Partition by: {s_zip_3, ss_customer_sk_5} + │ │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows + │ │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 + │ │ ├─ Inner Join Est. 482872239 rows + │ │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 + │ │ │ │ Runtime Filters Builder: {s_store_sk_9} + │ │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} + │ │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid + │ │ │ └─ Broadcast Exchange Est. 84 rows + │ │ │ └─ Projection Est. 84 rows + │ │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] + │ │ │ └─ Filter Est. 84 rows + │ │ │ │ Condition: s_market_id_9 = 8 + │ │ │ └─ TableScan tpcds1000.store Est. 1002 rows + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip + │ │ └─ Inner Join Est. 287999764 rows + │ │ │ Condition: sr_item_sk_7 == i_item_sk_7 + │ │ │ Runtime Filters Builder: {i_item_sk_7} + │ │ ├─ Filter Est. 287999764 rows + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_7} + │ │ │ └─ TableScan tpcds1000.store_returns Est. 287999764 rows + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number + │ │ └─ Filter Est. 6604 rows + │ │ │ Condition: i_color_5 = 'peach' + │ │ └─ TableScan tpcds1000.item Est. 300000 rows + │ │ Where: i_color = 'peach' + │ │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_zip_3, c_customer_sk_5} + │ └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ │ Condition: c_current_addr_sk_3 == ca_address_sk_3 + │ │ Filter: c_birth_country_3 != upper(ca_country_3) + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_current_addr_sk_3} + │ │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_address_sk_3} + │ └─ TableScan tpcds1000.customer_address Est. 6000000 rows + │ Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country └─ Broadcast Exchange Est. 1 rows └─ Projection Est. 1 rows │ Expressions: expr#multiply('0.05', avg(netpaid)):='0.05' * `expr#avg(netpaid)` @@ -27,81 +75,126 @@ Projection Est. 1360 rows └─ Gather Exchange Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} - │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_2) + │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_6) └─ Buffer Est. 185304517 rows - └─ CTERef[0] Est. 185304517 rows -CTEDef [0] - MergingAggregated Est. 185304517 rows - └─ Repartition Exchange Est. 185304517 rows - │ Partition by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - └─ Aggregating Est. 185304517 rows - │ Group by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - │ Aggregates: expr#sum(ss_net_paid):=AggNull(sum)(ss_net_paid) - └─ Inner (PARALLEL_HASH) Join Est. 185304517 rows - │ Condition: s_zip == ca_zip, ss_customer_sk == c_customer_sk - ├─ Repartition Exchange Est. 463261286 rows - │ │ Partition by: {s_zip, ss_customer_sk} - │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows - │ │ Condition: ss_item_sk == sr_item_sk, ss_ticket_number == sr_ticket_number - │ ├─ Inner Join Est. 482872239 rows - │ │ │ Condition: ss_store_sk == s_store_sk - │ │ │ Runtime Filters Builder: {s_store_sk} - │ │ ├─ Filter Est. 2879987999 rows - │ │ │ │ Condition: Runtime Filters: {ss_store_sk} - │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: [ss_item_sk, ss_customer_sk, ss_store_sk, ss_ticket_number, ss_net_paid] - │ │ └─ Broadcast Exchange Est. 84 rows - │ │ └─ Projection Est. 84 rows - │ │ │ Expressions: [s_state, s_store_name, s_store_sk, s_zip] - │ │ └─ Filter Est. 84 rows - │ │ │ Condition: s_market_id = 8 - │ │ └─ TableScan tpcds1000.store Est. 1002 rows - │ │ Where: s_market_id = 8 - │ │ Outputs: [s_store_sk, s_store_name, s_market_id, s_state, s_zip] - │ └─ Inner Join Est. 287999764 rows - │ │ Condition: sr_item_sk == i_item_sk - │ ├─ TableScan tpcds1000.store_returns Est. 287999764 rows - │ │ Outputs: [sr_item_sk, sr_ticket_number] - │ └─ TableScan tpcds1000.item Est. 300000 rows - │ Outputs: [i_item_sk, i_current_price, i_size, i_color, i_units, i_manager_id] - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_zip, c_customer_sk} - └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows - │ Condition: c_current_addr_sk == ca_address_sk - │ Filter: c_birth_country != upper(ca_country) - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_current_addr_sk} - │ └─ TableScan tpcds1000.customer Est. 12000000 rows - │ Outputs: [c_customer_sk, c_current_addr_sk, c_first_name, c_last_name, c_birth_country] - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_address_sk} - └─ TableScan tpcds1000.customer_address Est. 6000000 rows - Outputs: [ca_address_sk, ca_state, ca_zip, ca_country] -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated Est. 185304517 rows + └─ Repartition Exchange Est. 185304501 rows + │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + └─ Aggregating Est. 185304501 rows + │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + │ Aggregates: expr#sum(ss_net_paid)_6:=AggNull(sum)(ss_net_paid_4) + └─ Inner (PARALLEL_HASH) Join Est. 185304501 rows + │ Condition: s_zip_5 == ca_zip_5, ss_customer_sk_7 == c_customer_sk_7 + ├─ Repartition Exchange Est. 463261286 rows + │ │ Partition by: {s_zip_5, ss_customer_sk_7} + │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows + │ │ Condition: ss_item_sk_14 == sr_item_sk_11, ss_ticket_number_14 == sr_ticket_number_14 + │ ├─ Inner Join Est. 482872239 rows + │ │ │ Condition: ss_store_sk_13 == s_store_sk_13 + │ │ │ Runtime Filters Builder: {s_store_sk_13} + │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_13} + │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_14:=ss_item_sk, ss_customer_sk_7:=ss_customer_sk, ss_store_sk_13:=ss_store_sk, ss_ticket_number_14:=ss_ticket_number, ss_net_paid_4:=ss_net_paid + │ │ └─ Broadcast Exchange Est. 84 rows + │ │ └─ Projection Est. 84 rows + │ │ │ Expressions: [s_state_6, s_store_name_6, s_store_sk_13, s_zip_5] + │ │ └─ Filter Est. 84 rows + │ │ │ Condition: s_market_id_13 = 8 + │ │ └─ TableScan tpcds1000.store Est. 1002 rows + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_13:=s_store_sk, s_store_name_6:=s_store_name, s_market_id_13:=s_market_id, s_state_6:=s_state, s_zip_5:=s_zip + │ └─ Inner Join Est. 287999764 rows + │ │ Condition: sr_item_sk_11 == i_item_sk_10 + │ ├─ TableScan tpcds1000.store_returns Est. 287999764 rows + │ │ Outputs: sr_item_sk_11:=sr_item_sk, sr_ticket_number_14:=sr_ticket_number + │ └─ TableScan tpcds1000.item Est. 300000 rows + │ Outputs: i_item_sk_10:=i_item_sk, i_current_price_6:=i_current_price, i_size_6:=i_size, i_color_6:=i_color, i_units_6:=i_units, i_manager_id_6:=i_manager_id + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_zip_5, c_customer_sk_7} + └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ Condition: c_current_addr_sk_6 == ca_address_sk_6 + │ Filter: c_birth_country_4 != upper(ca_country_4) + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_current_addr_sk_6} + │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ Outputs: c_customer_sk_7:=c_customer_sk, c_current_addr_sk_6:=c_current_addr_sk, c_first_name_6:=c_first_name, c_last_name_6:=c_last_name, c_birth_country_4:=c_birth_country + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_address_sk_6} + └─ TableScan tpcds1000.customer_address Est. 6000000 rows + Outputs: ca_address_sk_6:=ca_address_sk, ca_state_6:=ca_state, ca_zip_5:=ca_zip, ca_country_4:=ca_country +note: Runtime Filter is applied for 3 times. Projection Est. 1360 rows -│ Expressions: c_first_name:=c_first_name_6, c_last_name:=c_last_name_6, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_6 +│ Expressions: c_first_name:=c_first_name_12, c_last_name:=c_last_name_12, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_12 └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Gather Exchange Est. 1360 rows └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Inner Join Est. 1360 rows │ Condition: │ Filter: `expr#sum(netpaid)_1` > `expr#multiply('0.05', avg(netpaid))_1` ├─ MergingAggregated Est. 2721 rows - │ └─ Repartition Exchange Est. 362812 rows - │ │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6} - │ └─ Aggregating Est. 362812 rows - │ │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6} - │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_6) - │ └─ Projection Est. 46326129 rows - │ │ Expressions: [c_first_name_6, c_last_name_6, expr#sum(ss_net_paid)_6, s_store_name_6] - │ └─ Filter Est. 46326129 rows - │ │ Condition: i_color_6 = 'saddle' - │ └─ Buffer Est. 185304517 rows - │ └─ CTERef[0] Est. 185304517 rows + │ └─ Repartition Exchange Est. 5807249 rows + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12} + │ └─ Aggregating Est. 5807249 rows + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12} + │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_12) + │ └─ MergingAggregated Est. 185304517 rows + │ └─ Repartition Exchange Est. 185304501 rows + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ └─ Aggregating Est. 185304501 rows + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ │ Aggregates: expr#sum(ss_net_paid)_12:=AggNull(sum)(ss_net_paid_8) + │ └─ Inner (PARALLEL_HASH) Join Est. 185304501 rows + │ │ Condition: s_zip_10 == ca_zip_10, ss_customer_sk_14 == c_customer_sk_14 + │ ├─ Repartition Exchange Est. 463261286 rows + │ │ │ Partition by: {s_zip_10, ss_customer_sk_14} + │ │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows + │ │ │ Condition: ss_item_sk_28 == sr_item_sk_22, ss_ticket_number_28 == sr_ticket_number_28 + │ │ ├─ Inner Join Est. 482872239 rows + │ │ │ │ Condition: ss_store_sk_26 == s_store_sk_26 + │ │ │ │ Runtime Filters Builder: {s_store_sk_26} + │ │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_26} + │ │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_28:=ss_item_sk, ss_customer_sk_14:=ss_customer_sk, ss_store_sk_26:=ss_store_sk, ss_ticket_number_28:=ss_ticket_number, ss_net_paid_8:=ss_net_paid + │ │ │ └─ Broadcast Exchange Est. 84 rows + │ │ │ └─ Projection Est. 84 rows + │ │ │ │ Expressions: [s_state_12, s_store_name_12, s_store_sk_26, s_zip_10] + │ │ │ └─ Filter Est. 84 rows + │ │ │ │ Condition: s_market_id_26 = 8 + │ │ │ └─ TableScan tpcds1000.store Est. 1002 rows + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_26:=s_store_sk, s_store_name_12:=s_store_name, s_market_id_26:=s_market_id, s_state_12:=s_state, s_zip_10:=s_zip + │ │ └─ Inner Join Est. 287999764 rows + │ │ │ Condition: sr_item_sk_22 == i_item_sk_20 + │ │ │ Runtime Filters Builder: {i_item_sk_20} + │ │ ├─ Filter Est. 287999764 rows + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_22} + │ │ │ └─ TableScan tpcds1000.store_returns Est. 287999764 rows + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_22:=sr_item_sk, sr_ticket_number_28:=sr_ticket_number + │ │ └─ Filter Est. 6628 rows + │ │ │ Condition: i_color_12 = 'saddle' + │ │ └─ TableScan tpcds1000.item Est. 300000 rows + │ │ Where: i_color = 'saddle' + │ │ Outputs: i_item_sk_20:=i_item_sk, i_current_price_12:=i_current_price, i_size_12:=i_size, i_color_12:=i_color, i_units_12:=i_units, i_manager_id_12:=i_manager_id + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_zip_10, c_customer_sk_14} + │ └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ │ Condition: c_current_addr_sk_12 == ca_address_sk_12 + │ │ Filter: c_birth_country_8 != upper(ca_country_8) + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_current_addr_sk_12} + │ │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ │ Outputs: c_customer_sk_14:=c_customer_sk, c_current_addr_sk_12:=c_current_addr_sk, c_first_name_12:=c_first_name, c_last_name_12:=c_last_name, c_birth_country_8:=c_birth_country + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_address_sk_12} + │ └─ TableScan tpcds1000.customer_address Est. 6000000 rows + │ Outputs: ca_address_sk_12:=ca_address_sk, ca_state_12:=ca_state, ca_zip_10:=ca_zip, ca_country_8:=ca_country └─ Broadcast Exchange Est. 1 rows └─ Projection Est. 1 rows │ Expressions: expr#multiply('0.05', avg(netpaid))_1:='0.05' * `expr#avg(netpaid)_1` @@ -109,56 +202,53 @@ Projection Est. 1360 rows └─ Gather Exchange Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} - │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_7) + │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_13) └─ Buffer Est. 185304517 rows - └─ CTERef[0] Est. 185304517 rows -CTEDef [0] - MergingAggregated Est. 185304517 rows - └─ Repartition Exchange Est. 185304517 rows - │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - └─ Aggregating Est. 185304517 rows - │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) - └─ Inner (PARALLEL_HASH) Join Est. 185304517 rows - │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 - ├─ Repartition Exchange Est. 463261286 rows - │ │ Partition by: {s_zip_3, ss_customer_sk_5} - │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows - │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 - │ ├─ Inner Join Est. 482872239 rows - │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 - │ │ │ Runtime Filters Builder: {s_store_sk_9} - │ │ ├─ Filter Est. 2879987999 rows - │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} - │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid - │ │ └─ Broadcast Exchange Est. 84 rows - │ │ └─ Projection Est. 84 rows - │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] - │ │ └─ Filter Est. 84 rows - │ │ │ Condition: s_market_id_9 = 8 - │ │ └─ TableScan tpcds1000.store Est. 1002 rows - │ │ Where: s_market_id = 8 - │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip - │ └─ Inner Join Est. 287999764 rows - │ │ Condition: sr_item_sk_7 == i_item_sk_7 - │ ├─ TableScan tpcds1000.store_returns Est. 287999764 rows - │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number - │ └─ TableScan tpcds1000.item Est. 300000 rows - │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_zip_3, c_customer_sk_5} - └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows - │ Condition: c_current_addr_sk_3 == ca_address_sk_3 - │ Filter: c_birth_country_3 != upper(ca_country_3) - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_current_addr_sk_3} - │ └─ TableScan tpcds1000.customer Est. 12000000 rows - │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_address_sk_3} - └─ TableScan tpcds1000.customer_address Est. 6000000 rows - Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated Est. 185304517 rows + └─ Repartition Exchange Est. 185304501 rows + │ Partition by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + └─ Aggregating Est. 185304501 rows + │ Group by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + │ Aggregates: expr#sum(ss_net_paid)_13:=AggNull(sum)(ss_net_paid_9) + └─ Inner (PARALLEL_HASH) Join Est. 185304501 rows + │ Condition: s_zip_12 == ca_zip_12, ss_customer_sk_16 == c_customer_sk_16 + ├─ Repartition Exchange Est. 463261286 rows + │ │ Partition by: {s_zip_12, ss_customer_sk_16} + │ └─ Inner (PARALLEL_HASH) Join Est. 463261286 rows + │ │ Condition: ss_item_sk_31 == sr_item_sk_26, ss_ticket_number_31 == sr_ticket_number_31 + │ ├─ Inner Join Est. 482872239 rows + │ │ │ Condition: ss_store_sk_30 == s_store_sk_30 + │ │ │ Runtime Filters Builder: {s_store_sk_30} + │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_30} + │ │ │ └─ TableScan tpcds1000.store_sales Est. 2879987999 rows + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_31:=ss_item_sk, ss_customer_sk_16:=ss_customer_sk, ss_store_sk_30:=ss_store_sk, ss_ticket_number_31:=ss_ticket_number, ss_net_paid_9:=ss_net_paid + │ │ └─ Broadcast Exchange Est. 84 rows + │ │ └─ Projection Est. 84 rows + │ │ │ Expressions: [s_state_13, s_store_name_13, s_store_sk_30, s_zip_12] + │ │ └─ Filter Est. 84 rows + │ │ │ Condition: s_market_id_30 = 8 + │ │ └─ TableScan tpcds1000.store Est. 1002 rows + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_30:=s_store_sk, s_store_name_13:=s_store_name, s_market_id_30:=s_market_id, s_state_13:=s_state, s_zip_12:=s_zip + │ └─ Inner Join Est. 287999764 rows + │ │ Condition: sr_item_sk_26 == i_item_sk_23 + │ ├─ TableScan tpcds1000.store_returns Est. 287999764 rows + │ │ Outputs: sr_item_sk_26:=sr_item_sk, sr_ticket_number_31:=sr_ticket_number + │ └─ TableScan tpcds1000.item Est. 300000 rows + │ Outputs: i_item_sk_23:=i_item_sk, i_current_price_13:=i_current_price, i_size_13:=i_size, i_color_13:=i_color, i_units_13:=i_units, i_manager_id_13:=i_manager_id + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_zip_12, c_customer_sk_16} + └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ Condition: c_current_addr_sk_15 == ca_address_sk_15 + │ Filter: c_birth_country_9 != upper(ca_country_9) + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_current_addr_sk_15} + │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ Outputs: c_customer_sk_16:=c_customer_sk, c_current_addr_sk_15:=c_current_addr_sk, c_first_name_13:=c_first_name, c_last_name_13:=c_last_name, c_birth_country_9:=c_birth_country + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_address_sk_15} + └─ TableScan tpcds1000.customer_address Est. 6000000 rows + Outputs: ca_address_sk_15:=ca_address_sk, ca_state_13:=ca_state, ca_zip_12:=ca_zip, ca_country_9:=ca_country +note: Runtime Filter is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q31.explain b/tests/optimizers/tpcds/explains/tpcds1000/q31.explain index 889c28b05df..4b24214124d 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q31.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q31.explain @@ -5,9 +5,9 @@ Projection Est. 59469 rows └─ Gather Exchange Est. 59469 rows └─ Sorting Est. 59469 rows │ Order by: {ca_county_3 ASC NULLS LAST} - └─ Projection Est. 59469 rows + └─ Projection Est. 59472 rows │ Expressions: [ca_county_3, d_year_1], expr#divide(ss2.store_sales, ss1.store_sales):=`expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, expr#divide(ss3.store_sales, ss2.store_sales):=`expr#sum(ss_ext_sales_price)_3` / `expr#sum(ss_ext_sales_price)_2`, expr#divide(ws2.web_sales, ws1.web_sales):=`expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, expr#divide(ws3.web_sales, ws2.web_sales):=`expr#sum(ws_ext_sales_price)_3` / `expr#sum(ws_ext_sales_price)_2` - └─ Inner Join Est. 59469 rows + └─ Inner Join Est. 59472 rows │ Condition: ca_county_3 == ca_county_1 │ Filter: multiIf(`expr#sum(ws_ext_sales_price)_1` > 0, `expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, NULL) > multiIf(`expr#sum(ss_ext_sales_price)_1` > 0, `expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, NULL) ├─ Inner Join Est. 16319 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q33.explain b/tests/optimizers/tpcds/explains/tpcds1000/q33.explain index c8954baeaea..a822d5bbd61 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q33.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q33.explain @@ -66,7 +66,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: [i_item_sk, i_manufact_id] - │ └─ CTERef[1] Est. 29887 rows + │ └─ Buffer Est. 29887 rows + │ └─ CTERef[1] Est. 29887 rows ├─ MergingAggregated Est. 99 rows │ └─ Repartition Exchange Est. 99 rows │ │ Partition by: {i_manufact_id_2} @@ -119,7 +120,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_manufact_id_2:=i_manufact_id - │ └─ CTERef[1] Est. 29887 rows + │ └─ Buffer Est. 29887 rows + │ └─ CTERef[1] Est. 29887 rows └─ MergingAggregated Est. 99 rows └─ Repartition Exchange Est. 99 rows │ Partition by: {i_manufact_id_4} @@ -172,7 +174,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000.item Est. 300000 rows │ Where: Runtime Filters: {i_manufact_id} │ Outputs: i_item_sk_2:=i_item_sk, i_manufact_id_4:=i_manufact_id - └─ CTERef[1] Est. 29887 rows + └─ Buffer Est. 29887 rows + └─ CTERef[1] Est. 29887 rows CTEDef [1] Repartition Exchange Est. 29887 rows │ Partition by: {i_manufact_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q54.explain b/tests/optimizers/tpcds/explains/tpcds1000/q54.explain index 64f217a6084..aece0a08435 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q54.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q54.explain @@ -29,7 +29,7 @@ Projection Est. 100 rows │ Condition: ss_sold_date_sk == d_date_sk_1 ├─ Repartition Exchange Est. 627291 rows │ │ Partition by: {ss_sold_date_sk} - │ └─ Inner Join Est. 627291 rows + │ └─ Inner Join Est. 627294 rows │ │ Condition: ss_customer_sk == c_customer_sk │ │ Runtime Filters Builder: {c_customer_sk} │ ├─ Filter Est. 2879987999 rows @@ -38,7 +38,7 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {ss_customer_sk} │ │ Outputs: [ss_sold_date_sk, ss_customer_sk, ss_ext_sales_price] │ └─ Broadcast Exchange Est. 2603 rows - │ └─ Inner Join Est. 2603 rows + │ └─ Inner Join Est. 44255 rows │ │ Condition: ca_address_sk == c_current_addr_sk │ │ Runtime Filters Builder: {c_current_addr_sk} │ ├─ Repartition Exchange Est. 352900 rows @@ -123,7 +123,8 @@ Projection Est. 100 rows │ │ Group by: {expr#plus(d_month_seq, 1)} │ └─ Projection Est. 30 rows │ │ Expressions: expr#plus(d_month_seq, 1):=d_month_seq_1 + 1 - │ └─ CTERef[1] Est. 30 rows + │ └─ Buffer Est. 30 rows + │ └─ CTERef[1] Est. 30 rows └─ Broadcast Exchange Est. 1 rows └─ EnforceSingleRow Est. 1 rows └─ Gather Exchange Est. 1 rows @@ -136,7 +137,8 @@ Projection Est. 100 rows │ Group by: {expr#plus(d_month_seq, 3)} └─ Projection Est. 30 rows │ Expressions: expr#plus(d_month_seq, 3):=d_month_seq_2 + 3 - └─ CTERef[1] Est. 30 rows + └─ Buffer Est. 30 rows + └─ CTERef[1] Est. 30 rows CTEDef [1] Projection Est. 30 rows │ Expressions: [d_month_seq_1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q56.explain b/tests/optimizers/tpcds/explains/tpcds1000/q56.explain index 8ae41bd80f2..fedeb029e07 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q56.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q56.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 4015 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 868236 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ │ Condition: ss_addr_sk == ca_address_sk │ │ Runtime Filters Builder: {ca_address_sk} │ ├─ Repartition Exchange Est. 2379526 rows @@ -55,7 +55,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] Est. 8050 rows + │ │ └─ Buffer Est. 8050 rows + │ │ └─ CTERef[1] Est. 8050 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk} │ └─ Projection Est. 2189168 rows @@ -71,7 +72,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 4015 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 431040 rows + │ └─ Inner Join Est. 1181327 rows │ │ Condition: ca_address_sk_1 == cs_bill_addr_sk │ │ Runtime Filters Builder: {cs_bill_addr_sk} │ ├─ Repartition Exchange Est. 2189168 rows @@ -115,14 +116,15 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ └─ CTERef[1] Est. 8050 rows + │ └─ Buffer Est. 8050 rows + │ └─ CTERef[1] Est. 8050 rows └─ MergingAggregated Est. 4015 rows └─ Repartition Exchange Est. 4015 rows │ Partition by: {i_item_id_4} └─ Aggregating Est. 4015 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 217060 rows + └─ Inner Join Est. 594884 rows │ Condition: ca_address_sk_2 == ws_bill_addr_sk │ Runtime Filters Builder: {ws_bill_addr_sk} ├─ Repartition Exchange Est. 2189168 rows @@ -166,7 +168,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000.item Est. 300000 rows │ Where: Runtime Filters: {i_item_id} │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - └─ CTERef[1] Est. 8050 rows + └─ Buffer Est. 8050 rows + └─ CTERef[1] Est. 8050 rows CTEDef [1] Repartition Exchange Est. 8050 rows │ Partition by: {i_item_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q58.explain b/tests/optimizers/tpcds/explains/tpcds1000/q58.explain index f989ca20014..4547756f92d 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q58.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q58.explain @@ -21,7 +21,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id_1} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 4695588 rows + │ └─ Inner Join Est. 4705818 rows │ │ Condition: cs_item_sk == i_item_sk_1 │ ├─ Inner Join Est. 4705818 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk_1 @@ -41,7 +41,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id_1} │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -57,7 +58,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner Join Est. 9458088 rows + │ └─ Inner Join Est. 9478841 rows │ │ Condition: ss_item_sk == i_item_sk │ ├─ Inner Join Est. 9478841 rows │ │ │ Condition: ss_sold_date_sk == d_date_sk @@ -77,7 +78,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -89,7 +91,7 @@ Projection Est. 100 rows └─ Aggregating Est. 149640 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 2364533 rows + └─ Inner Join Est. 2369721 rows │ Condition: ws_item_sk == i_item_sk_2 ├─ Inner Join Est. 2369721 rows │ │ Condition: ws_sold_date_sk == d_date_sk_2 @@ -109,7 +111,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange Est. 6 rows - │ └─ CTERef[1] Est. 6 rows + │ └─ Buffer Est. 6 rows + │ └─ CTERef[1] Est. 6 rows └─ TableScan tpcds1000.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q59.explain b/tests/optimizers/tpcds/explains/tpcds1000/q59.explain index 4ddbc709440..100cd3f7027 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q59.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q59.explain @@ -15,9 +15,9 @@ Projection Est. 100 rows │ Condition: expr#cast(d_week_seq_1, 'Int32') == expr#cast(minus(d_week_seq_3, 52), 'Int32'), s_store_id == s_store_id_1 ├─ Repartition Exchange Est. 150449 rows │ │ Partition by: {expr#cast(d_week_seq_1, 'Int32'), s_store_id} - │ └─ Projection Est. 150449 rows + │ └─ Projection Est. 150147 rows │ │ Expressions: [d_week_seq_5, expr#sum(multiIf(equals(d_day_name, 'Friday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Monday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Saturday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Sunday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Thursday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Tuesday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Wednesday'), ss_sales_price, NULL))_3, s_store_id, s_store_name], expr#cast(d_week_seq_1, 'Int32'):=cast(d_week_seq_5, 'Int32') - │ └─ Inner Join Est. 150449 rows + │ └─ Inner Join Est. 150147 rows │ │ Condition: d_week_seq_5 == d_week_seq_2 │ │ Runtime Filters Builder: {d_week_seq_2} │ ├─ Inner Join Est. 21192 rows @@ -50,18 +50,20 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_week_seq_5:=d_week_seq, d_day_name_1:=d_day_name │ │ │ └─ Broadcast Exchange Est. 333 rows - │ │ │ └─ CTERef[1] Est. 333 rows + │ │ │ └─ Buffer Est. 333 rows + │ │ │ └─ CTERef[1] Est. 333 rows │ │ └─ Repartition Exchange Est. 1002 rows │ │ │ Partition by: {s_store_sk} │ │ └─ TableScan tpcds1000.store Est. 1002 rows │ │ Outputs: [s_store_sk, s_store_id, s_store_name] │ └─ Broadcast Exchange Est. 333 rows - │ └─ CTERef[1] Est. 333 rows + │ └─ Buffer Est. 333 rows + │ └─ CTERef[1] Est. 333 rows └─ Repartition Exchange Est. 150449 rows │ Partition by: {expr#cast(minus(d_week_seq_3, 52), 'Int32'), s_store_id_1} - └─ Projection Est. 150449 rows + └─ Projection Est. 150147 rows │ Expressions: [expr#sum(multiIf(equals(d_day_name, 'Friday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Monday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Saturday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Sunday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Thursday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Tuesday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Wednesday'), ss_sales_price, NULL))_4, s_store_id_1], expr#cast(minus(d_week_seq_3, 52), 'Int32'):=cast(d_week_seq_6 - 52, 'Int32') - └─ Inner Join Est. 150449 rows + └─ Inner Join Est. 150147 rows │ Condition: d_week_seq_6 == d_week_seq_4 │ Runtime Filters Builder: {d_week_seq_4} ├─ Inner Join Est. 21192 rows @@ -94,13 +96,15 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: d_date_sk_2:=d_date_sk, d_week_seq_6:=d_week_seq, d_day_name_2:=d_day_name │ │ └─ Broadcast Exchange Est. 333 rows - │ │ └─ CTERef[2] Est. 333 rows + │ │ └─ Buffer Est. 333 rows + │ │ └─ CTERef[2] Est. 333 rows │ └─ Repartition Exchange Est. 1002 rows │ │ Partition by: {s_store_sk_1} │ └─ TableScan tpcds1000.store Est. 1002 rows │ Outputs: s_store_sk_1:=s_store_sk, s_store_id_1:=s_store_id └─ Broadcast Exchange Est. 333 rows - └─ CTERef[2] Est. 333 rows + └─ Buffer Est. 333 rows + └─ CTERef[2] Est. 333 rows CTEDef [1] Projection Est. 333 rows │ Expressions: [d_week_seq_2] diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q60.explain b/tests/optimizers/tpcds/explains/tpcds1000/q60.explain index 9523d9c7ba4..f9b5ca83e0a 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q60.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q60.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 14965 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 3351670 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 3351649 rows │ │ Condition: ss_addr_sk == ca_address_sk │ │ Runtime Filters Builder: {ca_address_sk} │ ├─ Repartition Exchange Est. 9186092 rows @@ -55,7 +55,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] Est. 30004 rows + │ │ └─ Buffer Est. 30004 rows + │ │ └─ CTERef[1] Est. 30004 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk} │ └─ Projection Est. 2189168 rows @@ -71,7 +72,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 14965 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 1663953 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ │ Condition: cs_bill_addr_sk == ca_address_sk_1 │ │ Runtime Filters Builder: {ca_address_sk_1} │ ├─ Repartition Exchange Est. 4560481 rows @@ -106,7 +107,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ │ └─ CTERef[1] Est. 30004 rows + │ │ └─ Buffer Est. 30004 rows + │ │ └─ CTERef[1] Est. 30004 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk_1} │ └─ Projection Est. 2189168 rows @@ -122,7 +124,7 @@ Projection Est. 100 rows └─ Aggregating Est. 14965 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner (PARALLEL_HASH) Join Est. 837921 rows + └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ Condition: ws_bill_addr_sk == ca_address_sk_2 │ Runtime Filters Builder: {ca_address_sk_2} ├─ Repartition Exchange Est. 2296533 rows @@ -157,7 +159,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000.item Est. 300000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - │ └─ CTERef[1] Est. 30004 rows + │ └─ Buffer Est. 30004 rows + │ └─ CTERef[1] Est. 30004 rows └─ Repartition Exchange Est. 2189168 rows │ Partition by: {ca_address_sk_2} └─ Projection Est. 2189168 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q64.explain b/tests/optimizers/tpcds/explains/tpcds1000/q64.explain index a62bf9dcf89..b384c58c43a 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q64.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q64.explain @@ -24,13 +24,13 @@ CTEDef [0] Projection Est. 747011 rows │ Expressions: [ca_city, ca_city_1, ca_street_name, ca_street_name_1, ca_street_number, ca_street_number_1, ca_zip, ca_zip_1, d_year, expr#count(), expr#sum(ss_coupon_amt), expr#sum(ss_list_price), expr#sum(ss_wholesale_cost), i_product_name, s_store_name, s_zip], i_item_sk:=ss_item_sk └─ MergingAggregated Est. 747011 rows - └─ Repartition Exchange Est. 747011 rows + └─ Repartition Exchange Est. 376045 rows │ Partition by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} - └─ Aggregating Est. 747011 rows + └─ Aggregating Est. 376045 rows │ Group by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} │ Group by keys not hashed: {i_product_name} │ Aggregates: expr#count():=AggNull(count)(), expr#sum(ss_wholesale_cost):=AggNull(sum)(ss_wholesale_cost), expr#sum(ss_list_price):=AggNull(sum)(ss_list_price), expr#sum(ss_coupon_amt):=AggNull(sum)(ss_coupon_amt) - └─ Inner Join Est. 747011 rows + └─ Inner Join Est. 376045 rows │ Condition: ca_address_sk_1 == c_current_addr_sk │ Runtime Filters Builder: {c_current_addr_sk} ├─ Repartition Exchange Est. 6000000 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q74.explain b/tests/optimizers/tpcds/explains/tpcds1000/q74.explain index 490bd8505ba..0ced0ef1579 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q74.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q74.explain @@ -9,7 +9,7 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST} │ Limit: 100 - └─ Inner (PARALLEL_HASH) Join Est. 7798070 rows + └─ Inner (PARALLEL_HASH) Join Est. 7769526 rows │ Condition: ss_customer_sk_5 == c_customer_sk_2 │ Filter: multiIf(`expr#sum(ss_net_paid)_4` > 0, `expr#sum(ss_net_paid)_5` / `expr#sum(ss_net_paid)_4`, NULL) > multiIf(`expr#sum(ss_net_paid)_2` > 0, `expr#sum(ss_net_paid)_3` / `expr#sum(ss_net_paid)_2`, NULL) ├─ Inner (PARALLEL_HASH) Join Est. 4840036 rows @@ -28,7 +28,7 @@ Projection Est. 100 rows │ └─ Local Exchange Est. 10726288 rows │ └─ Buffer Est. 10726288 rows │ └─ CTERef[0] Est. 10726288 rows - └─ Inner (PARALLEL_HASH) Join Est. 4813622 rows + └─ Inner (PARALLEL_HASH) Join Est. 4796003 rows │ Condition: ss_customer_sk_3 == c_customer_sk_2 ├─ Projection Est. 2681572 rows │ │ Expressions: [expr#sum(ss_net_paid)_3, ss_customer_sk_3] diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000/q83.explain index 603fa040ebd..4620b7aa3f3 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q83.explain @@ -23,7 +23,7 @@ Projection Est. 100 rows │ │ └─ Aggregating Est. 149640 rows │ │ │ Group by: {i_item_id_1} │ │ │ Aggregates: expr#sum(cr_return_quantity):=AggNull(sum)(cr_return_quantity) - │ │ └─ Inner Join Est. 1365545 rows + │ │ └─ Inner Join Est. 1368790 rows │ │ │ Condition: cr_item_sk == i_item_sk_1 │ │ ├─ Inner Join Est. 1368790 rows │ │ │ │ Condition: cr_returned_date_sk == d_date_sk_1 @@ -52,7 +52,8 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq │ │ │ └─ Broadcast Exchange Est. 3 rows - │ │ │ └─ CTERef[1] Est. 3 rows + │ │ │ └─ Buffer Est. 3 rows + │ │ │ └─ CTERef[1] Est. 3 rows │ │ └─ Filter Est. 300000 rows │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -64,7 +65,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(sr_return_quantity):=AggNull(sum)(sr_return_quantity) - │ └─ Inner Join Est. 2869952 rows + │ └─ Inner Join Est. 2875684 rows │ │ Condition: sr_item_sk == i_item_sk │ ├─ Inner Join Est. 2875684 rows │ │ │ Condition: sr_returned_date_sk == d_date_sk @@ -93,7 +94,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: [d_week_seq], d_date_1:=d_date │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 3 rows + │ │ └─ CTERef[1] Est. 3 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -105,7 +107,7 @@ Projection Est. 100 rows └─ Aggregating Est. 149640 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(wr_return_quantity):=AggNull(sum)(wr_return_quantity) - └─ Inner Join Est. 657811 rows + └─ Inner Join Est. 659317 rows │ Condition: wr_item_sk == i_item_sk_2 ├─ Inner Join Est. 659317 rows │ │ Condition: wr_returned_date_sk == d_date_sk_2 @@ -134,7 +136,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_week_seq} │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq │ └─ Broadcast Exchange Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 3 rows + │ └─ CTERef[1] Est. 3 rows └─ TableScan tpcds1000.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q1.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q1.explain index 03c4ff7b7c8..387712b9273 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q1.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q1.explain @@ -42,7 +42,8 @@ Projection │ │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ │ Outputs: sr_returned_date_sk_1:=sr_returned_date_sk, sr_customer_sk_3:=sr_customer_sk, sr_store_sk_3:=sr_store_sk, sr_return_amt_1:=sr_return_amt │ │ └─ Broadcast Exchange - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Broadcast Exchange │ └─ Projection │ │ Expressions: [d_date_sk_1] @@ -81,7 +82,8 @@ Projection │ │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ │ Outputs: sr_returned_date_sk_2:=sr_returned_date_sk, sr_customer_sk_4:=sr_customer_sk, sr_store_sk_4:=sr_store_sk, sr_return_amt_2:=sr_return_amt │ │ └─ Broadcast Exchange - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Broadcast Exchange │ └─ Projection │ │ Expressions: [d_date_sk_2] @@ -92,7 +94,8 @@ Projection │ Outputs: d_date_sk_2:=d_date_sk, d_year_2:=d_year └─ Repartition Exchange │ Partition by: {s_store_sk} - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [1] Projection │ Expressions: [s_store_sk] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q14.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q14.explain index 4b55845ae52..bfc0fdd9a46 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q14.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q14.explain @@ -56,9 +56,11 @@ Projection │ │ │ │ Outputs: [d_date_sk, d_year, d_moy] │ │ │ └─ TableScan tpcds1000.item │ │ │ Outputs: [i_item_sk, i_brand_id, i_class_id, i_category_id] - │ │ └─ CTERef[0] + │ │ └─ Buffer + │ │ └─ CTERef[0] │ └─ Broadcast Exchange - │ └─ CTERef[2] + │ └─ Buffer + │ └─ CTERef[2] ├─ Aggregating │ │ Group by: {expr#'catalog', i_brand_id_7, i_class_id_7, i_category_id_7} │ │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(cs_quantity, cs_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_1) @@ -99,9 +101,11 @@ Projection │ │ │ │ Outputs: d_date_sk_7:=d_date_sk, d_year_7:=d_year, d_moy_1:=d_moy │ │ │ └─ TableScan tpcds1000.item │ │ │ Outputs: i_item_sk_6:=i_item_sk, i_brand_id_7:=i_brand_id, i_class_id_7:=i_class_id, i_category_id_7:=i_category_id - │ │ └─ CTERef[0] + │ │ └─ Buffer + │ │ └─ CTERef[0] │ └─ Broadcast Exchange - │ └─ CTERef[2] + │ └─ Buffer + │ └─ CTERef[2] └─ Aggregating │ Group by: {expr#'web', i_brand_id_8, i_class_id_8, i_category_id_8} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(ws_quantity, ws_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_2) @@ -142,9 +146,11 @@ Projection │ │ │ Outputs: d_date_sk_8:=d_date_sk, d_year_8:=d_year, d_moy_2:=d_moy │ │ └─ TableScan tpcds1000.item │ │ Outputs: i_item_sk_8:=i_item_sk, i_brand_id_8:=i_brand_id, i_class_id_8:=i_class_id, i_category_id_8:=i_category_id - │ └─ CTERef[0] + │ └─ Buffer + │ └─ CTERef[0] └─ Broadcast Exchange - └─ CTERef[2] + └─ Buffer + └─ CTERef[2] CTEDef [0] Repartition Exchange │ Partition by: {i_item_sk_1} @@ -358,9 +364,11 @@ Projection │ │ │ └─ TableScan tpcds1000.date_dim │ │ │ Where: (d_year = cast(2000, 'UInt32')) AND (d_moy = 12) AND (d_dom = 11) │ │ │ Outputs: [d_dom], d_week_seq_1:=d_week_seq, d_year_30:=d_year, d_moy_3:=d_moy - │ │ └─ CTERef[0] + │ │ └─ Buffer + │ │ └─ CTERef[0] │ └─ Broadcast Exchange - │ └─ CTERef[3] + │ └─ Buffer + │ └─ CTERef[3] └─ Projection │ Expressions: [expr#count()_5, expr#sum(multiply(ss_quantity, ss_list_price))_3, i_brand_id_29, i_category_id_29, i_class_id_29], expr#'store'_3:='store' └─ Inner Join @@ -409,9 +417,11 @@ Projection │ │ └─ TableScan tpcds1000.date_dim │ │ Where: (d_year = 1999) AND (d_moy = 12) AND (d_dom = 11) │ │ Outputs: d_week_seq_4:=d_week_seq, d_year_37:=d_year, d_moy_4:=d_moy, d_dom_1:=d_dom - │ └─ CTERef[0] + │ └─ Buffer + │ └─ CTERef[0] └─ Broadcast Exchange - └─ CTERef[3] + └─ Buffer + └─ CTERef[3] CTEDef [0] Repartition Exchange │ Partition by: {i_item_sk_18} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain index 37381855edf..81a23f0e26d 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain @@ -21,8 +21,7 @@ Projection │ │ │ Runtime Filters Builder: {cs_item_sk} │ │ ├─ Filter │ │ │ │ Condition: Runtime Filters: {i_item_sk_1} - │ │ │ └─ Local Exchange - │ │ │ └─ CTERef[0] + │ │ │ └─ CTERef[0] │ │ └─ Inner Join │ │ │ Condition: cs_sold_date_sk == d_date_sk │ │ │ Runtime Filters Builder: {d_date_sk} @@ -39,7 +38,8 @@ Projection │ │ └─ TableScan tpcds1000.date_dim │ │ Where: (d_year = 2000) AND (d_moy = 2) │ │ Outputs: [d_date_sk, d_year, d_moy] - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ Aggregating │ Group by: {} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)) @@ -55,8 +55,7 @@ Projection │ │ Runtime Filters Builder: {ws_item_sk} │ ├─ Filter │ │ │ Condition: Runtime Filters: {i_item_sk_2} - │ │ └─ Local Exchange - │ │ └─ CTERef[0] + │ │ └─ CTERef[0] │ └─ Inner Join │ │ Condition: ws_sold_date_sk == d_date_sk_3 │ │ Runtime Filters Builder: {d_date_sk_3} @@ -74,7 +73,8 @@ Projection │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_3:=d_date_sk, d_year_3:=d_year, d_moy_1:=d_moy └─ Local Exchange - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [0] Projection │ Expressions: i_item_sk:=ss_item_sk @@ -193,7 +193,8 @@ Projection │ │ ├─ Filter │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} │ │ │ └─ Local Exchange - │ │ │ └─ CTERef[0] + │ │ │ └─ Buffer + │ │ │ └─ CTERef[0] │ │ └─ Inner Join │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 │ │ │ Runtime Filters Builder: {d_date_sk_10} @@ -210,7 +211,8 @@ Projection │ │ └─ TableScan tpcds1000.date_dim │ │ Where: (d_moy = 2) AND (d_year = 2000) │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ MergingAggregated └─ Repartition Exchange │ Partition by: {c_last_name_1, c_first_name_1} @@ -240,7 +242,8 @@ Projection │ ├─ Filter │ │ │ Condition: Runtime Filters: {i_item_sk_7} │ │ └─ Local Exchange - │ │ └─ CTERef[0] + │ │ └─ Buffer + │ │ └─ CTERef[0] │ └─ Inner Join │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 │ │ Runtime Filters Builder: {d_date_sk_13} @@ -257,7 +260,8 @@ Projection │ └─ TableScan tpcds1000.date_dim │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [0] Projection │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q24.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q24.explain index fe801a8c422..dfd5eba84fa 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q24.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q24.explain @@ -1,25 +1,73 @@ Projection -│ Expressions: c_first_name:=c_first_name_1, c_last_name:=c_last_name_1, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_1 +│ Expressions: c_first_name:=c_first_name_5, c_last_name:=c_last_name_5, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_5 └─ Sorting - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Gather Exchange └─ Sorting - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Inner Join │ Condition: │ Filter: `expr#sum(netpaid)` > `expr#multiply('0.05', avg(netpaid))` ├─ MergingAggregated │ └─ Repartition Exchange - │ │ Partition by: {c_last_name_1, c_first_name_1, s_store_name_1} + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5} │ └─ Aggregating - │ │ Group by: {c_last_name_1, c_first_name_1, s_store_name_1} - │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_1) - │ └─ Projection - │ │ Expressions: [c_first_name_1, c_last_name_1, expr#sum(ss_net_paid)_1, s_store_name_1] - │ └─ Filter - │ │ Condition: i_color_1 = 'peach' - │ └─ Buffer - │ └─ CTERef[0] + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5} + │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_5) + │ └─ MergingAggregated + │ └─ Repartition Exchange + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ └─ Aggregating + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 + │ ├─ Repartition Exchange + │ │ │ Partition by: {s_zip_3, ss_customer_sk_5} + │ │ └─ Inner (PARALLEL_HASH) Join + │ │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 + │ │ ├─ Inner Join + │ │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 + │ │ │ │ Runtime Filters Builder: {s_store_sk_9} + │ │ │ ├─ Filter + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} + │ │ │ │ └─ TableScan tpcds1000.store_sales + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid + │ │ │ └─ Broadcast Exchange + │ │ │ └─ Projection + │ │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] + │ │ │ └─ Filter + │ │ │ │ Condition: s_market_id_9 = 8 + │ │ │ └─ TableScan tpcds1000.store + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip + │ │ └─ Inner Join + │ │ │ Condition: sr_item_sk_7 == i_item_sk_7 + │ │ │ Runtime Filters Builder: {i_item_sk_7} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_7} + │ │ │ └─ TableScan tpcds1000.store_returns + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number + │ │ └─ Filter + │ │ │ Condition: i_color_5 = 'peach' + │ │ └─ TableScan tpcds1000.item + │ │ Where: i_color = 'peach' + │ │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id + │ └─ Repartition Exchange + │ │ Partition by: {ca_zip_3, c_customer_sk_5} + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: c_current_addr_sk_3 == ca_address_sk_3 + │ │ Filter: c_birth_country_3 != upper(ca_country_3) + │ ├─ Repartition Exchange + │ │ │ Partition by: {c_current_addr_sk_3} + │ │ └─ TableScan tpcds1000.customer + │ │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country + │ └─ Repartition Exchange + │ │ Partition by: {ca_address_sk_3} + │ └─ TableScan tpcds1000.customer_address + │ Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country └─ Broadcast Exchange └─ Projection │ Expressions: expr#multiply('0.05', avg(netpaid)):='0.05' * `expr#avg(netpaid)` @@ -27,81 +75,126 @@ Projection └─ Gather Exchange └─ Aggregating │ Group by: {} - │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_2) + │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_6) └─ Buffer - └─ CTERef[0] -CTEDef [0] - MergingAggregated - └─ Repartition Exchange - │ Partition by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - └─ Aggregating - │ Group by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - │ Aggregates: expr#sum(ss_net_paid):=AggNull(sum)(ss_net_paid) - └─ Inner (PARALLEL_HASH) Join - │ Condition: s_zip == ca_zip, ss_customer_sk == c_customer_sk - ├─ Repartition Exchange - │ │ Partition by: {s_zip, ss_customer_sk} - │ └─ Inner (PARALLEL_HASH) Join - │ │ Condition: ss_item_sk == sr_item_sk, ss_ticket_number == sr_ticket_number - │ ├─ Inner Join - │ │ │ Condition: ss_store_sk == s_store_sk - │ │ │ Runtime Filters Builder: {s_store_sk} - │ │ ├─ Filter - │ │ │ │ Condition: Runtime Filters: {ss_store_sk} - │ │ │ └─ TableScan tpcds1000.store_sales - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: [ss_item_sk, ss_customer_sk, ss_store_sk, ss_ticket_number, ss_net_paid] - │ │ └─ Broadcast Exchange - │ │ └─ Projection - │ │ │ Expressions: [s_state, s_store_name, s_store_sk, s_zip] - │ │ └─ Filter - │ │ │ Condition: s_market_id = 8 - │ │ └─ TableScan tpcds1000.store - │ │ Where: s_market_id = 8 - │ │ Outputs: [s_store_sk, s_store_name, s_market_id, s_state, s_zip] - │ └─ Inner Join - │ │ Condition: sr_item_sk == i_item_sk - │ ├─ TableScan tpcds1000.store_returns - │ │ Outputs: [sr_item_sk, sr_ticket_number] - │ └─ TableScan tpcds1000.item - │ Outputs: [i_item_sk, i_current_price, i_size, i_color, i_units, i_manager_id] - └─ Repartition Exchange - │ Partition by: {ca_zip, c_customer_sk} - └─ Inner (PARALLEL_HASH) Join - │ Condition: c_current_addr_sk == ca_address_sk - │ Filter: c_birth_country != upper(ca_country) - ├─ Repartition Exchange - │ │ Partition by: {c_current_addr_sk} - │ └─ TableScan tpcds1000.customer - │ Outputs: [c_customer_sk, c_current_addr_sk, c_first_name, c_last_name, c_birth_country] - └─ Repartition Exchange - │ Partition by: {ca_address_sk} - └─ TableScan tpcds1000.customer_address - Outputs: [ca_address_sk, ca_state, ca_zip, ca_country] -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated + └─ Repartition Exchange + │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + └─ Aggregating + │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + │ Aggregates: expr#sum(ss_net_paid)_6:=AggNull(sum)(ss_net_paid_4) + └─ Inner (PARALLEL_HASH) Join + │ Condition: s_zip_5 == ca_zip_5, ss_customer_sk_7 == c_customer_sk_7 + ├─ Repartition Exchange + │ │ Partition by: {s_zip_5, ss_customer_sk_7} + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: ss_item_sk_14 == sr_item_sk_11, ss_ticket_number_14 == sr_ticket_number_14 + │ ├─ Inner Join + │ │ │ Condition: ss_store_sk_13 == s_store_sk_13 + │ │ │ Runtime Filters Builder: {s_store_sk_13} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_13} + │ │ │ └─ TableScan tpcds1000.store_sales + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_14:=ss_item_sk, ss_customer_sk_7:=ss_customer_sk, ss_store_sk_13:=ss_store_sk, ss_ticket_number_14:=ss_ticket_number, ss_net_paid_4:=ss_net_paid + │ │ └─ Broadcast Exchange + │ │ └─ Projection + │ │ │ Expressions: [s_state_6, s_store_name_6, s_store_sk_13, s_zip_5] + │ │ └─ Filter + │ │ │ Condition: s_market_id_13 = 8 + │ │ └─ TableScan tpcds1000.store + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_13:=s_store_sk, s_store_name_6:=s_store_name, s_market_id_13:=s_market_id, s_state_6:=s_state, s_zip_5:=s_zip + │ └─ Inner Join + │ │ Condition: sr_item_sk_11 == i_item_sk_10 + │ ├─ TableScan tpcds1000.store_returns + │ │ Outputs: sr_item_sk_11:=sr_item_sk, sr_ticket_number_14:=sr_ticket_number + │ └─ TableScan tpcds1000.item + │ Outputs: i_item_sk_10:=i_item_sk, i_current_price_6:=i_current_price, i_size_6:=i_size, i_color_6:=i_color, i_units_6:=i_units, i_manager_id_6:=i_manager_id + └─ Repartition Exchange + │ Partition by: {ca_zip_5, c_customer_sk_7} + └─ Inner (PARALLEL_HASH) Join + │ Condition: c_current_addr_sk_6 == ca_address_sk_6 + │ Filter: c_birth_country_4 != upper(ca_country_4) + ├─ Repartition Exchange + │ │ Partition by: {c_current_addr_sk_6} + │ └─ TableScan tpcds1000.customer + │ Outputs: c_customer_sk_7:=c_customer_sk, c_current_addr_sk_6:=c_current_addr_sk, c_first_name_6:=c_first_name, c_last_name_6:=c_last_name, c_birth_country_4:=c_birth_country + └─ Repartition Exchange + │ Partition by: {ca_address_sk_6} + └─ TableScan tpcds1000.customer_address + Outputs: ca_address_sk_6:=ca_address_sk, ca_state_6:=ca_state, ca_zip_5:=ca_zip, ca_country_4:=ca_country +note: Runtime Filter is applied for 3 times. Projection -│ Expressions: c_first_name:=c_first_name_6, c_last_name:=c_last_name_6, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_6 +│ Expressions: c_first_name:=c_first_name_12, c_last_name:=c_last_name_12, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_12 └─ Sorting - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Gather Exchange └─ Sorting - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Inner Join │ Condition: │ Filter: `expr#sum(netpaid)_1` > `expr#multiply('0.05', avg(netpaid))_1` ├─ MergingAggregated │ └─ Repartition Exchange - │ │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6} + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12} │ └─ Aggregating - │ │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6} - │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_6) - │ └─ Projection - │ │ Expressions: [c_first_name_6, c_last_name_6, expr#sum(ss_net_paid)_6, s_store_name_6] - │ └─ Filter - │ │ Condition: i_color_6 = 'saddle' - │ └─ Buffer - │ └─ CTERef[0] + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12} + │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_12) + │ └─ MergingAggregated + │ └─ Repartition Exchange + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ └─ Aggregating + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ │ Aggregates: expr#sum(ss_net_paid)_12:=AggNull(sum)(ss_net_paid_8) + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: s_zip_10 == ca_zip_10, ss_customer_sk_14 == c_customer_sk_14 + │ ├─ Repartition Exchange + │ │ │ Partition by: {s_zip_10, ss_customer_sk_14} + │ │ └─ Inner (PARALLEL_HASH) Join + │ │ │ Condition: ss_item_sk_28 == sr_item_sk_22, ss_ticket_number_28 == sr_ticket_number_28 + │ │ ├─ Inner Join + │ │ │ │ Condition: ss_store_sk_26 == s_store_sk_26 + │ │ │ │ Runtime Filters Builder: {s_store_sk_26} + │ │ │ ├─ Filter + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_26} + │ │ │ │ └─ TableScan tpcds1000.store_sales + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_28:=ss_item_sk, ss_customer_sk_14:=ss_customer_sk, ss_store_sk_26:=ss_store_sk, ss_ticket_number_28:=ss_ticket_number, ss_net_paid_8:=ss_net_paid + │ │ │ └─ Broadcast Exchange + │ │ │ └─ Projection + │ │ │ │ Expressions: [s_state_12, s_store_name_12, s_store_sk_26, s_zip_10] + │ │ │ └─ Filter + │ │ │ │ Condition: s_market_id_26 = 8 + │ │ │ └─ TableScan tpcds1000.store + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_26:=s_store_sk, s_store_name_12:=s_store_name, s_market_id_26:=s_market_id, s_state_12:=s_state, s_zip_10:=s_zip + │ │ └─ Inner Join + │ │ │ Condition: sr_item_sk_22 == i_item_sk_20 + │ │ │ Runtime Filters Builder: {i_item_sk_20} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_22} + │ │ │ └─ TableScan tpcds1000.store_returns + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_22:=sr_item_sk, sr_ticket_number_28:=sr_ticket_number + │ │ └─ Filter + │ │ │ Condition: i_color_12 = 'saddle' + │ │ └─ TableScan tpcds1000.item + │ │ Where: i_color = 'saddle' + │ │ Outputs: i_item_sk_20:=i_item_sk, i_current_price_12:=i_current_price, i_size_12:=i_size, i_color_12:=i_color, i_units_12:=i_units, i_manager_id_12:=i_manager_id + │ └─ Repartition Exchange + │ │ Partition by: {ca_zip_10, c_customer_sk_14} + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: c_current_addr_sk_12 == ca_address_sk_12 + │ │ Filter: c_birth_country_8 != upper(ca_country_8) + │ ├─ Repartition Exchange + │ │ │ Partition by: {c_current_addr_sk_12} + │ │ └─ TableScan tpcds1000.customer + │ │ Outputs: c_customer_sk_14:=c_customer_sk, c_current_addr_sk_12:=c_current_addr_sk, c_first_name_12:=c_first_name, c_last_name_12:=c_last_name, c_birth_country_8:=c_birth_country + │ └─ Repartition Exchange + │ │ Partition by: {ca_address_sk_12} + │ └─ TableScan tpcds1000.customer_address + │ Outputs: ca_address_sk_12:=ca_address_sk, ca_state_12:=ca_state, ca_zip_10:=ca_zip, ca_country_8:=ca_country └─ Broadcast Exchange └─ Projection │ Expressions: expr#multiply('0.05', avg(netpaid))_1:='0.05' * `expr#avg(netpaid)_1` @@ -109,56 +202,53 @@ Projection └─ Gather Exchange └─ Aggregating │ Group by: {} - │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_7) + │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_13) └─ Buffer - └─ CTERef[0] -CTEDef [0] - MergingAggregated - └─ Repartition Exchange - │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - └─ Aggregating - │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) - └─ Inner (PARALLEL_HASH) Join - │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 - ├─ Repartition Exchange - │ │ Partition by: {s_zip_3, ss_customer_sk_5} - │ └─ Inner (PARALLEL_HASH) Join - │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 - │ ├─ Inner Join - │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 - │ │ │ Runtime Filters Builder: {s_store_sk_9} - │ │ ├─ Filter - │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} - │ │ │ └─ TableScan tpcds1000.store_sales - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid - │ │ └─ Broadcast Exchange - │ │ └─ Projection - │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] - │ │ └─ Filter - │ │ │ Condition: s_market_id_9 = 8 - │ │ └─ TableScan tpcds1000.store - │ │ Where: s_market_id = 8 - │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip - │ └─ Inner Join - │ │ Condition: sr_item_sk_7 == i_item_sk_7 - │ ├─ TableScan tpcds1000.store_returns - │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number - │ └─ TableScan tpcds1000.item - │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id - └─ Repartition Exchange - │ Partition by: {ca_zip_3, c_customer_sk_5} - └─ Inner (PARALLEL_HASH) Join - │ Condition: c_current_addr_sk_3 == ca_address_sk_3 - │ Filter: c_birth_country_3 != upper(ca_country_3) - ├─ Repartition Exchange - │ │ Partition by: {c_current_addr_sk_3} - │ └─ TableScan tpcds1000.customer - │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country - └─ Repartition Exchange - │ Partition by: {ca_address_sk_3} - └─ TableScan tpcds1000.customer_address - Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated + └─ Repartition Exchange + │ Partition by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + └─ Aggregating + │ Group by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + │ Aggregates: expr#sum(ss_net_paid)_13:=AggNull(sum)(ss_net_paid_9) + └─ Inner (PARALLEL_HASH) Join + │ Condition: s_zip_12 == ca_zip_12, ss_customer_sk_16 == c_customer_sk_16 + ├─ Repartition Exchange + │ │ Partition by: {s_zip_12, ss_customer_sk_16} + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: ss_item_sk_31 == sr_item_sk_26, ss_ticket_number_31 == sr_ticket_number_31 + │ ├─ Inner Join + │ │ │ Condition: ss_store_sk_30 == s_store_sk_30 + │ │ │ Runtime Filters Builder: {s_store_sk_30} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_30} + │ │ │ └─ TableScan tpcds1000.store_sales + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_31:=ss_item_sk, ss_customer_sk_16:=ss_customer_sk, ss_store_sk_30:=ss_store_sk, ss_ticket_number_31:=ss_ticket_number, ss_net_paid_9:=ss_net_paid + │ │ └─ Broadcast Exchange + │ │ └─ Projection + │ │ │ Expressions: [s_state_13, s_store_name_13, s_store_sk_30, s_zip_12] + │ │ └─ Filter + │ │ │ Condition: s_market_id_30 = 8 + │ │ └─ TableScan tpcds1000.store + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_30:=s_store_sk, s_store_name_13:=s_store_name, s_market_id_30:=s_market_id, s_state_13:=s_state, s_zip_12:=s_zip + │ └─ Inner Join + │ │ Condition: sr_item_sk_26 == i_item_sk_23 + │ ├─ TableScan tpcds1000.store_returns + │ │ Outputs: sr_item_sk_26:=sr_item_sk, sr_ticket_number_31:=sr_ticket_number + │ └─ TableScan tpcds1000.item + │ Outputs: i_item_sk_23:=i_item_sk, i_current_price_13:=i_current_price, i_size_13:=i_size, i_color_13:=i_color, i_units_13:=i_units, i_manager_id_13:=i_manager_id + └─ Repartition Exchange + │ Partition by: {ca_zip_12, c_customer_sk_16} + └─ Inner (PARALLEL_HASH) Join + │ Condition: c_current_addr_sk_15 == ca_address_sk_15 + │ Filter: c_birth_country_9 != upper(ca_country_9) + ├─ Repartition Exchange + │ │ Partition by: {c_current_addr_sk_15} + │ └─ TableScan tpcds1000.customer + │ Outputs: c_customer_sk_16:=c_customer_sk, c_current_addr_sk_15:=c_current_addr_sk, c_first_name_13:=c_first_name, c_last_name_13:=c_last_name, c_birth_country_9:=c_birth_country + └─ Repartition Exchange + │ Partition by: {ca_address_sk_15} + └─ TableScan tpcds1000.customer_address + Outputs: ca_address_sk_15:=ca_address_sk, ca_state_13:=ca_state, ca_zip_12:=ca_zip, ca_country_9:=ca_country +note: Runtime Filter is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q33.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q33.explain index 2f84c00e7ed..9cf4d45ba38 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q33.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q33.explain @@ -66,7 +66,8 @@ Projection │ │ └─ TableScan tpcds1000.item │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: [i_item_sk, i_manufact_id] - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] ├─ MergingAggregated │ └─ Repartition Exchange │ │ Partition by: {i_manufact_id_2} @@ -119,7 +120,8 @@ Projection │ │ └─ TableScan tpcds1000.item │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_manufact_id_2:=i_manufact_id - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ MergingAggregated └─ Repartition Exchange │ Partition by: {i_manufact_id_4} @@ -172,7 +174,8 @@ Projection │ └─ TableScan tpcds1000.item │ Where: Runtime Filters: {i_manufact_id} │ Outputs: i_item_sk_2:=i_item_sk, i_manufact_id_4:=i_manufact_id - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [1] Repartition Exchange │ Partition by: {i_manufact_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q54.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q54.explain index e71fcbbe5d3..8343cebb2f9 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q54.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q54.explain @@ -123,7 +123,8 @@ Projection │ │ Group by: {expr#plus(d_month_seq, 1)} │ └─ Projection │ │ Expressions: expr#plus(d_month_seq, 1):=d_month_seq_1 + 1 - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ Broadcast Exchange └─ EnforceSingleRow └─ Gather Exchange @@ -136,7 +137,8 @@ Projection │ Group by: {expr#plus(d_month_seq, 3)} └─ Projection │ Expressions: expr#plus(d_month_seq, 3):=d_month_seq_2 + 3 - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [1] Projection │ Expressions: [d_month_seq_1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q56.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q56.explain index a9aa07c50ac..59fc0fa62fc 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q56.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q56.explain @@ -55,7 +55,8 @@ Projection │ │ │ └─ TableScan tpcds1000.item │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Repartition Exchange │ │ Partition by: {ca_address_sk} │ └─ Projection @@ -115,7 +116,8 @@ Projection │ │ └─ TableScan tpcds1000.item │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ MergingAggregated └─ Repartition Exchange │ Partition by: {i_item_id_4} @@ -166,7 +168,8 @@ Projection │ └─ TableScan tpcds1000.item │ Where: Runtime Filters: {i_item_id} │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - └─ CTERef[1] + └─ Buffer + └─ CTERef[1] CTEDef [1] Repartition Exchange │ Partition by: {i_item_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q58.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q58.explain index d942c07f6e3..637e40b777e 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q58.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q58.explain @@ -41,7 +41,8 @@ Projection │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ └─ Broadcast Exchange - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Filter │ │ Condition: Runtime Filters: {i_item_id_1} │ └─ TableScan tpcds1000.item @@ -77,7 +78,8 @@ Projection │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Filter │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item @@ -109,7 +111,8 @@ Projection │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ TableScan tpcds1000.item Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q59.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q59.explain index ca375eee23a..f0890fd8a98 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q59.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q59.explain @@ -50,13 +50,15 @@ Projection │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_week_seq_5:=d_week_seq, d_day_name_1:=d_day_name │ │ │ └─ Broadcast Exchange - │ │ │ └─ CTERef[1] + │ │ │ └─ Buffer + │ │ │ └─ CTERef[1] │ │ └─ Repartition Exchange │ │ │ Partition by: {s_store_sk} │ │ └─ TableScan tpcds1000.store │ │ Outputs: [s_store_sk, s_store_id, s_store_name] │ └─ Broadcast Exchange - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ Repartition Exchange │ Partition by: {expr#cast(minus(d_week_seq_3, 52), 'Int32'), s_store_id_1} └─ Projection @@ -94,13 +96,15 @@ Projection │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: d_date_sk_2:=d_date_sk, d_week_seq_6:=d_week_seq, d_day_name_2:=d_day_name │ │ └─ Broadcast Exchange - │ │ └─ CTERef[2] + │ │ └─ Buffer + │ │ └─ CTERef[2] │ └─ Repartition Exchange │ │ Partition by: {s_store_sk_1} │ └─ TableScan tpcds1000.store │ Outputs: s_store_sk_1:=s_store_sk, s_store_id_1:=s_store_id └─ Broadcast Exchange - └─ CTERef[2] + └─ Buffer + └─ CTERef[2] CTEDef [1] Projection │ Expressions: [d_week_seq_2] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q60.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q60.explain index ad77e46747b..4b6f45bcadf 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q60.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q60.explain @@ -55,7 +55,8 @@ Projection │ │ │ └─ TableScan tpcds1000.item │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Repartition Exchange │ │ Partition by: {ca_address_sk} │ └─ Projection @@ -106,7 +107,8 @@ Projection │ │ │ └─ TableScan tpcds1000.item │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Repartition Exchange │ │ Partition by: {ca_address_sk_1} │ └─ Projection @@ -157,7 +159,8 @@ Projection │ │ └─ TableScan tpcds1000.item │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ Repartition Exchange │ Partition by: {ca_address_sk_2} └─ Projection diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain index 5e340ce979e..fc465a45cf7 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain @@ -52,7 +52,8 @@ Projection │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq │ │ │ └─ Broadcast Exchange - │ │ │ └─ CTERef[1] + │ │ │ └─ Buffer + │ │ │ └─ CTERef[1] │ │ └─ Filter │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000.item @@ -93,7 +94,8 @@ Projection │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: [d_week_seq], d_date_1:=d_date │ │ └─ Broadcast Exchange - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Filter │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item @@ -134,7 +136,8 @@ Projection │ │ Where: Runtime Filters: {d_week_seq} │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq │ └─ Broadcast Exchange - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ TableScan tpcds1000.item Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q1.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q1.explain index 68ee703ed74..338daf00ae3 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q1.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q1.explain @@ -43,7 +43,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ │ Outputs: sr_returned_date_sk_1:=sr_returned_date_sk, sr_customer_sk_3:=sr_customer_sk, sr_store_sk_3:=sr_store_sk, sr_return_amt_1:=sr_return_amt │ │ └─ Broadcast Exchange Est. 41 rows - │ │ └─ CTERef[1] Est. 41 rows + │ │ └─ Buffer Est. 41 rows + │ │ └─ CTERef[1] Est. 41 rows │ └─ Broadcast Exchange Est. 366 rows │ └─ Projection Est. 366 rows │ │ Expressions: [d_date_sk_1] @@ -57,7 +58,8 @@ Projection Est. 100 rows │ Condition: s_store_sk == sr_store_sk_4 ├─ Repartition Exchange Est. 41 rows │ │ Partition by: {s_store_sk} - │ └─ CTERef[1] Est. 41 rows + │ └─ Buffer Est. 41 rows + │ └─ CTERef[1] Est. 41 rows └─ MergingAggregated Est. 40 rows └─ Repartition Exchange Est. 40 rows │ Partition by: {sr_store_sk_4} @@ -84,7 +86,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {sr_returned_date_sk, sr_store_sk} │ │ Outputs: sr_returned_date_sk_2:=sr_returned_date_sk, sr_customer_sk_4:=sr_customer_sk, sr_store_sk_4:=sr_store_sk, sr_return_amt_2:=sr_return_amt │ └─ Broadcast Exchange Est. 41 rows - │ └─ CTERef[1] Est. 41 rows + │ └─ Buffer Est. 41 rows + │ └─ CTERef[1] Est. 41 rows └─ Broadcast Exchange Est. 366 rows └─ Projection Est. 366 rows │ Expressions: [d_date_sk_2] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q14.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q14.explain index 7a584b120d4..096b4b4567d 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q14.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q14.explain @@ -56,9 +56,11 @@ Projection Est. 100 rows │ │ │ │ Outputs: [d_date_sk, d_year, d_moy] │ │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ │ Outputs: [i_item_sk, i_brand_id, i_class_id, i_category_id] - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows ├─ Aggregating Est. 13880 rows │ │ Group by: {expr#'catalog', i_brand_id_7, i_class_id_7, i_category_id_7} │ │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(cs_quantity, cs_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_1) @@ -99,9 +101,11 @@ Projection Est. 100 rows │ │ │ │ Outputs: d_date_sk_7:=d_date_sk, d_year_7:=d_year, d_moy_1:=d_moy │ │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ │ Outputs: i_item_sk_6:=i_item_sk, i_brand_id_7:=i_brand_id, i_class_id_7:=i_class_id, i_category_id_7:=i_category_id - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[2] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[2] Est. 1 rows └─ Aggregating Est. 13880 rows │ Group by: {expr#'web', i_brand_id_8, i_class_id_8, i_category_id_8} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#sum(multiply(ws_quantity, ws_list_price))), expr#sum(number_sales):=AggNull(sum)(expr#count()_2) @@ -122,7 +126,7 @@ Projection Est. 100 rows │ │ Condition: ws_item_sk_1 == i_item_sk_9 │ ├─ Repartition Exchange Est. 10992147 rows │ │ │ Partition by: {ws_item_sk_1} - │ │ └─ Inner Join Est. 10992147 rows + │ │ └─ Inner Join Est. 10992146 rows │ │ │ Condition: ws_item_sk_1 == i_item_sk_8 │ │ ├─ Inner Join Est. 11453653 rows │ │ │ │ Condition: ws_sold_date_sk_2 == d_date_sk_8 @@ -142,9 +146,11 @@ Projection Est. 100 rows │ │ │ Outputs: d_date_sk_8:=d_date_sk, d_year_8:=d_year, d_moy_2:=d_moy │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ Outputs: i_item_sk_8:=i_item_sk, i_brand_id_8:=i_brand_id, i_class_id_8:=i_class_id, i_category_id_8:=i_category_id - │ └─ CTERef[0] Est. 6220638 rows + │ └─ Buffer Est. 6220638 rows + │ └─ CTERef[0] Est. 6220638 rows └─ Broadcast Exchange Est. 1 rows - └─ CTERef[2] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[2] Est. 1 rows CTEDef [0] Repartition Exchange Est. 6220638 rows │ Partition by: {i_item_sk_1} @@ -358,9 +364,11 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows │ │ │ Where: (d_year = cast(2000, 'UInt32')) AND (d_moy = 12) AND (d_dom = 11) │ │ │ Outputs: [d_dom], d_week_seq_1:=d_week_seq, d_year_30:=d_year, d_moy_3:=d_moy - │ │ └─ CTERef[0] Est. 6220638 rows + │ │ └─ Buffer Est. 6220638 rows + │ │ └─ CTERef[0] Est. 6220638 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[3] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[3] Est. 1 rows └─ Projection Est. 771 rows │ Expressions: [expr#count()_5, expr#sum(multiply(ss_quantity, ss_list_price))_3, i_brand_id_29, i_category_id_29, i_class_id_29], expr#'store'_3:='store' └─ Inner Join Est. 771 rows @@ -409,9 +417,11 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows │ │ Where: (d_year = 1999) AND (d_moy = 12) AND (d_dom = 11) │ │ Outputs: d_week_seq_4:=d_week_seq, d_year_37:=d_year, d_moy_4:=d_moy, d_dom_1:=d_dom - │ └─ CTERef[0] Est. 6220638 rows + │ └─ Buffer Est. 6220638 rows + │ └─ CTERef[0] Est. 6220638 rows └─ Broadcast Exchange Est. 1 rows - └─ CTERef[3] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[3] Est. 1 rows CTEDef [0] Repartition Exchange Est. 6220638 rows │ Partition by: {i_item_sk_18} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain index 37d1b1eb9b5..6a389296a57 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain @@ -21,8 +21,7 @@ Projection Est. 1 rows │ │ │ Runtime Filters Builder: {cs_item_sk} │ │ ├─ Filter Est. 143861166 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_1} - │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows │ │ └─ Inner Join Est. 21960485 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk │ │ │ Runtime Filters Builder: {d_date_sk} @@ -39,7 +38,8 @@ Projection Est. 1 rows │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows │ │ Where: (d_year = 2000) AND (d_moy = 2) │ │ Outputs: [d_date_sk, d_year, d_moy] - │ └─ CTERef[1] Est. 5907893 rows + │ └─ Buffer Est. 5907893 rows + │ └─ CTERef[1] Est. 5907893 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#sum(sales):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)) @@ -55,8 +55,7 @@ Projection Est. 1 rows │ │ Runtime Filters Builder: {ws_item_sk} │ ├─ Filter Est. 143861166 rows │ │ │ Condition: Runtime Filters: {i_item_sk_2} - │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows │ └─ Inner Join Est. 11058700 rows │ │ Condition: ws_sold_date_sk == d_date_sk_3 │ │ Runtime Filters Builder: {d_date_sk_3} @@ -74,7 +73,8 @@ Projection Est. 1 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_3:=d_date_sk, d_year_3:=d_year, d_moy_1:=d_moy └─ Local Exchange Est. 5907893 rows - └─ CTERef[1] Est. 5907893 rows + └─ Buffer Est. 5907893 rows + └─ CTERef[1] Est. 5907893 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk:=ss_item_sk @@ -193,7 +193,8 @@ Projection Est. 100 rows │ │ ├─ Filter Est. 143861166 rows │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ │ └─ Buffer Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows │ │ └─ Inner Join Est. 21960485 rows │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 │ │ │ Runtime Filters Builder: {d_date_sk_10} @@ -210,16 +211,17 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows │ │ Where: (d_moy = 2) AND (d_year = 2000) │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ CTERef[1] Est. 5907893 rows + │ └─ Buffer Est. 5907893 rows + │ └─ CTERef[1] Est. 5907893 rows └─ MergingAggregated Est. 5907893 rows - └─ Repartition Exchange Est. 5907893 rows + └─ Repartition Exchange Est. 5932260 rows │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 5907893 rows + └─ Aggregating Est. 5932260 rows │ Group by: {c_last_name_1, c_first_name_1} │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 5907893 rows + └─ Projection Est. 5932260 rows │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner (PARALLEL_HASH) Join Est. 5907893 rows + └─ Inner (PARALLEL_HASH) Join Est. 5932260 rows │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 │ Runtime Filters Builder: {ws_bill_customer_sk_1} ├─ Repartition Exchange Est. 12000000 rows @@ -240,7 +242,8 @@ Projection Est. 100 rows │ ├─ Filter Est. 143861166 rows │ │ │ Condition: Runtime Filters: {i_item_sk_7} │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ Buffer Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows │ └─ Inner Join Est. 11058700 rows │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 │ │ Runtime Filters Builder: {d_date_sk_13} @@ -257,7 +260,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows │ Where: (d_moy = 2) AND (d_year = 2000) │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ CTERef[1] Est. 5907893 rows + └─ Buffer Est. 5907893 rows + └─ CTERef[1] Est. 5907893 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q24.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q24.explain index 989353689be..d50241b7852 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q24.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q24.explain @@ -1,25 +1,73 @@ Projection Est. 1360 rows -│ Expressions: c_first_name:=c_first_name_1, c_last_name:=c_last_name_1, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_1 +│ Expressions: c_first_name:=c_first_name_5, c_last_name:=c_last_name_5, paid:=`expr#sum(netpaid)`, s_store_name:=s_store_name_5 └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Gather Exchange Est. 1360 rows └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, s_store_name_1 ASC NULLS LAST} + │ Order by: {c_last_name_5 ASC NULLS LAST, c_first_name_5 ASC NULLS LAST, s_store_name_5 ASC NULLS LAST} └─ Inner Join Est. 1360 rows │ Condition: │ Filter: `expr#sum(netpaid)` > `expr#multiply('0.05', avg(netpaid))` ├─ MergingAggregated Est. 2721 rows - │ └─ Repartition Exchange Est. 362812 rows - │ │ Partition by: {c_last_name_1, c_first_name_1, s_store_name_1} - │ └─ Aggregating Est. 362812 rows - │ │ Group by: {c_last_name_1, c_first_name_1, s_store_name_1} - │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_1) - │ └─ Projection Est. 48749288 rows - │ │ Expressions: [c_first_name_1, c_last_name_1, expr#sum(ss_net_paid)_1, s_store_name_1] - │ └─ Filter Est. 48749288 rows - │ │ Condition: i_color_1 = 'peach' - │ └─ Buffer Est. 194997154 rows - │ └─ CTERef[0] Est. 194997154 rows + │ └─ Repartition Exchange Est. 5807249 rows + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5} + │ └─ Aggregating Est. 5807249 rows + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5} + │ │ Aggregates: expr#sum(netpaid):=AggNull(sum)(expr#sum(ss_net_paid)_5) + │ └─ MergingAggregated Est. 194997154 rows + │ └─ Repartition Exchange Est. 197223785 rows + │ │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ └─ Aggregating Est. 197223785 rows + │ │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} + │ │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) + │ └─ Inner (PARALLEL_HASH) Join Est. 197223785 rows + │ │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 + │ ├─ Repartition Exchange Est. 487492878 rows + │ │ │ Partition by: {s_zip_3, ss_customer_sk_5} + │ │ └─ Inner (PARALLEL_HASH) Join Est. 487492879 rows + │ │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 + │ │ ├─ Inner Join Est. 477123760 rows + │ │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 + │ │ │ │ Runtime Filters Builder: {s_store_sk_9} + │ │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} + │ │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid + │ │ │ └─ Broadcast Exchange Est. 84 rows + │ │ │ └─ Projection Est. 84 rows + │ │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] + │ │ │ └─ Filter Est. 84 rows + │ │ │ │ Condition: s_market_id_9 = 8 + │ │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip + │ │ └─ Inner Join Est. 287999764 rows + │ │ │ Condition: sr_item_sk_7 == i_item_sk_7 + │ │ │ Runtime Filters Builder: {i_item_sk_7} + │ │ ├─ Filter Est. 287999764 rows + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_7} + │ │ │ └─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number + │ │ └─ Filter Est. 6604 rows + │ │ │ Condition: i_color_5 = 'peach' + │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows + │ │ Where: i_color = 'peach' + │ │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_zip_3, c_customer_sk_5} + │ └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ │ Condition: c_current_addr_sk_3 == ca_address_sk_3 + │ │ Filter: c_birth_country_3 != upper(ca_country_3) + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_current_addr_sk_3} + │ │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_address_sk_3} + │ └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows + │ Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country └─ Broadcast Exchange Est. 1 rows └─ Projection Est. 1 rows │ Expressions: expr#multiply('0.05', avg(netpaid)):='0.05' * `expr#avg(netpaid)` @@ -27,81 +75,126 @@ Projection Est. 1360 rows └─ Gather Exchange Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} - │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_2) + │ Aggregates: expr#avg(netpaid):=AggNull(avg)(expr#sum(ss_net_paid)_6) └─ Buffer Est. 194997154 rows - └─ CTERef[0] Est. 194997154 rows -CTEDef [0] - MergingAggregated Est. 194997154 rows - └─ Repartition Exchange Est. 194997154 rows - │ Partition by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - └─ Aggregating Est. 194997154 rows - │ Group by: {c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size} - │ Aggregates: expr#sum(ss_net_paid):=AggNull(sum)(ss_net_paid) - └─ Inner (PARALLEL_HASH) Join Est. 194997154 rows - │ Condition: s_zip == ca_zip, ss_customer_sk == c_customer_sk - ├─ Repartition Exchange Est. 487492878 rows - │ │ Partition by: {s_zip, ss_customer_sk} - │ └─ Inner (PARALLEL_HASH) Join Est. 487492878 rows - │ │ Condition: ss_item_sk == sr_item_sk, ss_ticket_number == sr_ticket_number - │ ├─ Inner Join Est. 477123760 rows - │ │ │ Condition: ss_store_sk == s_store_sk - │ │ │ Runtime Filters Builder: {s_store_sk} - │ │ ├─ Filter Est. 2879987999 rows - │ │ │ │ Condition: Runtime Filters: {ss_store_sk} - │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: [ss_item_sk, ss_customer_sk, ss_store_sk, ss_ticket_number, ss_net_paid] - │ │ └─ Broadcast Exchange Est. 84 rows - │ │ └─ Projection Est. 84 rows - │ │ │ Expressions: [s_state, s_store_name, s_store_sk, s_zip] - │ │ └─ Filter Est. 84 rows - │ │ │ Condition: s_market_id = 8 - │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows - │ │ Where: s_market_id = 8 - │ │ Outputs: [s_store_sk, s_store_name, s_market_id, s_state, s_zip] - │ └─ Inner Join Est. 287999764 rows - │ │ Condition: sr_item_sk == i_item_sk - │ ├─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows - │ │ Outputs: [sr_item_sk, sr_ticket_number] - │ └─ TableScan tpcds1000_sample.item Est. 300000 rows - │ Outputs: [i_item_sk, i_current_price, i_size, i_color, i_units, i_manager_id] - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_zip, c_customer_sk} - └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows - │ Condition: c_current_addr_sk == ca_address_sk - │ Filter: c_birth_country != upper(ca_country) - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_current_addr_sk} - │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows - │ Outputs: [c_customer_sk, c_current_addr_sk, c_first_name, c_last_name, c_birth_country] - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_address_sk} - └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows - Outputs: [ca_address_sk, ca_state, ca_zip, ca_country] -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated Est. 194997154 rows + └─ Repartition Exchange Est. 197223785 rows + │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + └─ Aggregating Est. 197223785 rows + │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6, ca_state_6, s_state_6, i_color_6, i_current_price_6, i_manager_id_6, i_units_6, i_size_6} + │ Aggregates: expr#sum(ss_net_paid)_6:=AggNull(sum)(ss_net_paid_4) + └─ Inner (PARALLEL_HASH) Join Est. 197223785 rows + │ Condition: s_zip_5 == ca_zip_5, ss_customer_sk_7 == c_customer_sk_7 + ├─ Repartition Exchange Est. 487492878 rows + │ │ Partition by: {s_zip_5, ss_customer_sk_7} + │ └─ Inner (PARALLEL_HASH) Join Est. 487492879 rows + │ │ Condition: ss_item_sk_14 == sr_item_sk_11, ss_ticket_number_14 == sr_ticket_number_14 + │ ├─ Inner Join Est. 477123760 rows + │ │ │ Condition: ss_store_sk_13 == s_store_sk_13 + │ │ │ Runtime Filters Builder: {s_store_sk_13} + │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_13} + │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_14:=ss_item_sk, ss_customer_sk_7:=ss_customer_sk, ss_store_sk_13:=ss_store_sk, ss_ticket_number_14:=ss_ticket_number, ss_net_paid_4:=ss_net_paid + │ │ └─ Broadcast Exchange Est. 84 rows + │ │ └─ Projection Est. 84 rows + │ │ │ Expressions: [s_state_6, s_store_name_6, s_store_sk_13, s_zip_5] + │ │ └─ Filter Est. 84 rows + │ │ │ Condition: s_market_id_13 = 8 + │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_13:=s_store_sk, s_store_name_6:=s_store_name, s_market_id_13:=s_market_id, s_state_6:=s_state, s_zip_5:=s_zip + │ └─ Inner Join Est. 287999764 rows + │ │ Condition: sr_item_sk_11 == i_item_sk_10 + │ ├─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows + │ │ Outputs: sr_item_sk_11:=sr_item_sk, sr_ticket_number_14:=sr_ticket_number + │ └─ TableScan tpcds1000_sample.item Est. 300000 rows + │ Outputs: i_item_sk_10:=i_item_sk, i_current_price_6:=i_current_price, i_size_6:=i_size, i_color_6:=i_color, i_units_6:=i_units, i_manager_id_6:=i_manager_id + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_zip_5, c_customer_sk_7} + └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ Condition: c_current_addr_sk_6 == ca_address_sk_6 + │ Filter: c_birth_country_4 != upper(ca_country_4) + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_current_addr_sk_6} + │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ Outputs: c_customer_sk_7:=c_customer_sk, c_current_addr_sk_6:=c_current_addr_sk, c_first_name_6:=c_first_name, c_last_name_6:=c_last_name, c_birth_country_4:=c_birth_country + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_address_sk_6} + └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows + Outputs: ca_address_sk_6:=ca_address_sk, ca_state_6:=ca_state, ca_zip_5:=ca_zip, ca_country_4:=ca_country +note: Runtime Filter is applied for 3 times. Projection Est. 1360 rows -│ Expressions: c_first_name:=c_first_name_6, c_last_name:=c_last_name_6, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_6 +│ Expressions: c_first_name:=c_first_name_12, c_last_name:=c_last_name_12, paid:=`expr#sum(netpaid)_1`, s_store_name:=s_store_name_12 └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Gather Exchange Est. 1360 rows └─ Sorting Est. 1360 rows - │ Order by: {c_last_name_6 ASC NULLS LAST, c_first_name_6 ASC NULLS LAST, s_store_name_6 ASC NULLS LAST} + │ Order by: {c_last_name_12 ASC NULLS LAST, c_first_name_12 ASC NULLS LAST, s_store_name_12 ASC NULLS LAST} └─ Inner Join Est. 1360 rows │ Condition: │ Filter: `expr#sum(netpaid)_1` > `expr#multiply('0.05', avg(netpaid))_1` ├─ MergingAggregated Est. 2721 rows - │ └─ Repartition Exchange Est. 362812 rows - │ │ Partition by: {c_last_name_6, c_first_name_6, s_store_name_6} - │ └─ Aggregating Est. 362812 rows - │ │ Group by: {c_last_name_6, c_first_name_6, s_store_name_6} - │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_6) - │ └─ Projection Est. 48749288 rows - │ │ Expressions: [c_first_name_6, c_last_name_6, expr#sum(ss_net_paid)_6, s_store_name_6] - │ └─ Filter Est. 48749288 rows - │ │ Condition: i_color_6 = 'saddle' - │ └─ Buffer Est. 194997154 rows - │ └─ CTERef[0] Est. 194997154 rows + │ └─ Repartition Exchange Est. 5807249 rows + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12} + │ └─ Aggregating Est. 5807249 rows + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12} + │ │ Aggregates: expr#sum(netpaid)_1:=AggNull(sum)(expr#sum(ss_net_paid)_12) + │ └─ MergingAggregated Est. 194997154 rows + │ └─ Repartition Exchange Est. 197223785 rows + │ │ Partition by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ └─ Aggregating Est. 197223785 rows + │ │ Group by: {c_last_name_12, c_first_name_12, s_store_name_12, ca_state_12, s_state_12, i_color_12, i_current_price_12, i_manager_id_12, i_units_12, i_size_12} + │ │ Aggregates: expr#sum(ss_net_paid)_12:=AggNull(sum)(ss_net_paid_8) + │ └─ Inner (PARALLEL_HASH) Join Est. 197223785 rows + │ │ Condition: s_zip_10 == ca_zip_10, ss_customer_sk_14 == c_customer_sk_14 + │ ├─ Repartition Exchange Est. 487492878 rows + │ │ │ Partition by: {s_zip_10, ss_customer_sk_14} + │ │ └─ Inner (PARALLEL_HASH) Join Est. 487492879 rows + │ │ │ Condition: ss_item_sk_28 == sr_item_sk_22, ss_ticket_number_28 == sr_ticket_number_28 + │ │ ├─ Inner Join Est. 477123760 rows + │ │ │ │ Condition: ss_store_sk_26 == s_store_sk_26 + │ │ │ │ Runtime Filters Builder: {s_store_sk_26} + │ │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ │ Condition: Runtime Filters: {ss_store_sk_26} + │ │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows + │ │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ │ Outputs: ss_item_sk_28:=ss_item_sk, ss_customer_sk_14:=ss_customer_sk, ss_store_sk_26:=ss_store_sk, ss_ticket_number_28:=ss_ticket_number, ss_net_paid_8:=ss_net_paid + │ │ │ └─ Broadcast Exchange Est. 84 rows + │ │ │ └─ Projection Est. 84 rows + │ │ │ │ Expressions: [s_state_12, s_store_name_12, s_store_sk_26, s_zip_10] + │ │ │ └─ Filter Est. 84 rows + │ │ │ │ Condition: s_market_id_26 = 8 + │ │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows + │ │ │ Where: s_market_id = 8 + │ │ │ Outputs: s_store_sk_26:=s_store_sk, s_store_name_12:=s_store_name, s_market_id_26:=s_market_id, s_state_12:=s_state, s_zip_10:=s_zip + │ │ └─ Inner Join Est. 287999764 rows + │ │ │ Condition: sr_item_sk_22 == i_item_sk_20 + │ │ │ Runtime Filters Builder: {i_item_sk_20} + │ │ ├─ Filter Est. 287999764 rows + │ │ │ │ Condition: Runtime Filters: {sr_item_sk_22} + │ │ │ └─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows + │ │ │ Where: Runtime Filters: {sr_item_sk} + │ │ │ Outputs: sr_item_sk_22:=sr_item_sk, sr_ticket_number_28:=sr_ticket_number + │ │ └─ Filter Est. 6628 rows + │ │ │ Condition: i_color_12 = 'saddle' + │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows + │ │ Where: i_color = 'saddle' + │ │ Outputs: i_item_sk_20:=i_item_sk, i_current_price_12:=i_current_price, i_size_12:=i_size, i_color_12:=i_color, i_units_12:=i_units, i_manager_id_12:=i_manager_id + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_zip_10, c_customer_sk_14} + │ └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ │ Condition: c_current_addr_sk_12 == ca_address_sk_12 + │ │ Filter: c_birth_country_8 != upper(ca_country_8) + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_current_addr_sk_12} + │ │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ │ Outputs: c_customer_sk_14:=c_customer_sk, c_current_addr_sk_12:=c_current_addr_sk, c_first_name_12:=c_first_name, c_last_name_12:=c_last_name, c_birth_country_8:=c_birth_country + │ └─ Repartition Exchange Est. 6000000 rows + │ │ Partition by: {ca_address_sk_12} + │ └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows + │ Outputs: ca_address_sk_12:=ca_address_sk, ca_state_12:=ca_state, ca_zip_10:=ca_zip, ca_country_8:=ca_country └─ Broadcast Exchange Est. 1 rows └─ Projection Est. 1 rows │ Expressions: expr#multiply('0.05', avg(netpaid))_1:='0.05' * `expr#avg(netpaid)_1` @@ -109,56 +202,53 @@ Projection Est. 1360 rows └─ Gather Exchange Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} - │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_7) + │ Aggregates: expr#avg(netpaid)_1:=AggNull(avg)(expr#sum(ss_net_paid)_13) └─ Buffer Est. 194997154 rows - └─ CTERef[0] Est. 194997154 rows -CTEDef [0] - MergingAggregated Est. 194997154 rows - └─ Repartition Exchange Est. 194997154 rows - │ Partition by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - └─ Aggregating Est. 194997154 rows - │ Group by: {c_last_name_5, c_first_name_5, s_store_name_5, ca_state_5, s_state_5, i_color_5, i_current_price_5, i_manager_id_5, i_units_5, i_size_5} - │ Aggregates: expr#sum(ss_net_paid)_5:=AggNull(sum)(ss_net_paid_3) - └─ Inner (PARALLEL_HASH) Join Est. 194997154 rows - │ Condition: s_zip_3 == ca_zip_3, ss_customer_sk_5 == c_customer_sk_5 - ├─ Repartition Exchange Est. 487492878 rows - │ │ Partition by: {s_zip_3, ss_customer_sk_5} - │ └─ Inner (PARALLEL_HASH) Join Est. 487492878 rows - │ │ Condition: ss_item_sk_11 == sr_item_sk_7, ss_ticket_number_11 == sr_ticket_number_11 - │ ├─ Inner Join Est. 477123760 rows - │ │ │ Condition: ss_store_sk_9 == s_store_sk_9 - │ │ │ Runtime Filters Builder: {s_store_sk_9} - │ │ ├─ Filter Est. 2879987999 rows - │ │ │ │ Condition: Runtime Filters: {ss_store_sk_9} - │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows - │ │ │ Where: Runtime Filters: {ss_store_sk} - │ │ │ Outputs: ss_item_sk_11:=ss_item_sk, ss_customer_sk_5:=ss_customer_sk, ss_store_sk_9:=ss_store_sk, ss_ticket_number_11:=ss_ticket_number, ss_net_paid_3:=ss_net_paid - │ │ └─ Broadcast Exchange Est. 84 rows - │ │ └─ Projection Est. 84 rows - │ │ │ Expressions: [s_state_5, s_store_name_5, s_store_sk_9, s_zip_3] - │ │ └─ Filter Est. 84 rows - │ │ │ Condition: s_market_id_9 = 8 - │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows - │ │ Where: s_market_id = 8 - │ │ Outputs: s_store_sk_9:=s_store_sk, s_store_name_5:=s_store_name, s_market_id_9:=s_market_id, s_state_5:=s_state, s_zip_3:=s_zip - │ └─ Inner Join Est. 287999764 rows - │ │ Condition: sr_item_sk_7 == i_item_sk_7 - │ ├─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows - │ │ Outputs: sr_item_sk_7:=sr_item_sk, sr_ticket_number_11:=sr_ticket_number - │ └─ TableScan tpcds1000_sample.item Est. 300000 rows - │ Outputs: i_item_sk_7:=i_item_sk, i_current_price_5:=i_current_price, i_size_5:=i_size, i_color_5:=i_color, i_units_5:=i_units, i_manager_id_5:=i_manager_id - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_zip_3, c_customer_sk_5} - └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows - │ Condition: c_current_addr_sk_3 == ca_address_sk_3 - │ Filter: c_birth_country_3 != upper(ca_country_3) - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_current_addr_sk_3} - │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows - │ Outputs: c_customer_sk_5:=c_customer_sk, c_current_addr_sk_3:=c_current_addr_sk, c_first_name_5:=c_first_name, c_last_name_5:=c_last_name, c_birth_country_3:=c_birth_country - └─ Repartition Exchange Est. 6000000 rows - │ Partition by: {ca_address_sk_3} - └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows - Outputs: ca_address_sk_3:=ca_address_sk, ca_state_5:=ca_state, ca_zip_3:=ca_zip, ca_country_3:=ca_country -note: Runtime Filter is applied for 1 times. -note: CTE(Common Table Expression) is applied for 2 times. + └─ MergingAggregated Est. 194997154 rows + └─ Repartition Exchange Est. 197223785 rows + │ Partition by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + └─ Aggregating Est. 197223785 rows + │ Group by: {c_last_name_13, c_first_name_13, s_store_name_13, ca_state_13, s_state_13, i_color_13, i_current_price_13, i_manager_id_13, i_units_13, i_size_13} + │ Aggregates: expr#sum(ss_net_paid)_13:=AggNull(sum)(ss_net_paid_9) + └─ Inner (PARALLEL_HASH) Join Est. 197223785 rows + │ Condition: s_zip_12 == ca_zip_12, ss_customer_sk_16 == c_customer_sk_16 + ├─ Repartition Exchange Est. 487492878 rows + │ │ Partition by: {s_zip_12, ss_customer_sk_16} + │ └─ Inner (PARALLEL_HASH) Join Est. 487492879 rows + │ │ Condition: ss_item_sk_31 == sr_item_sk_26, ss_ticket_number_31 == sr_ticket_number_31 + │ ├─ Inner Join Est. 477123760 rows + │ │ │ Condition: ss_store_sk_30 == s_store_sk_30 + │ │ │ Runtime Filters Builder: {s_store_sk_30} + │ │ ├─ Filter Est. 2879987999 rows + │ │ │ │ Condition: Runtime Filters: {ss_store_sk_30} + │ │ │ └─ TableScan tpcds1000_sample.store_sales Est. 2879987999 rows + │ │ │ Where: Runtime Filters: {ss_store_sk} + │ │ │ Outputs: ss_item_sk_31:=ss_item_sk, ss_customer_sk_16:=ss_customer_sk, ss_store_sk_30:=ss_store_sk, ss_ticket_number_31:=ss_ticket_number, ss_net_paid_9:=ss_net_paid + │ │ └─ Broadcast Exchange Est. 84 rows + │ │ └─ Projection Est. 84 rows + │ │ │ Expressions: [s_state_13, s_store_name_13, s_store_sk_30, s_zip_12] + │ │ └─ Filter Est. 84 rows + │ │ │ Condition: s_market_id_30 = 8 + │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows + │ │ Where: s_market_id = 8 + │ │ Outputs: s_store_sk_30:=s_store_sk, s_store_name_13:=s_store_name, s_market_id_30:=s_market_id, s_state_13:=s_state, s_zip_12:=s_zip + │ └─ Inner Join Est. 287999764 rows + │ │ Condition: sr_item_sk_26 == i_item_sk_23 + │ ├─ TableScan tpcds1000_sample.store_returns Est. 287999764 rows + │ │ Outputs: sr_item_sk_26:=sr_item_sk, sr_ticket_number_31:=sr_ticket_number + │ └─ TableScan tpcds1000_sample.item Est. 300000 rows + │ Outputs: i_item_sk_23:=i_item_sk, i_current_price_13:=i_current_price, i_size_13:=i_size, i_color_13:=i_color, i_units_13:=i_units, i_manager_id_13:=i_manager_id + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_zip_12, c_customer_sk_16} + └─ Inner (PARALLEL_HASH) Join Est. 6000000 rows + │ Condition: c_current_addr_sk_15 == ca_address_sk_15 + │ Filter: c_birth_country_9 != upper(ca_country_9) + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_current_addr_sk_15} + │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ Outputs: c_customer_sk_16:=c_customer_sk, c_current_addr_sk_15:=c_current_addr_sk, c_first_name_13:=c_first_name, c_last_name_13:=c_last_name, c_birth_country_9:=c_birth_country + └─ Repartition Exchange Est. 6000000 rows + │ Partition by: {ca_address_sk_15} + └─ TableScan tpcds1000_sample.customer_address Est. 6000000 rows + Outputs: ca_address_sk_15:=ca_address_sk, ca_state_13:=ca_state, ca_zip_12:=ca_zip, ca_country_9:=ca_country +note: Runtime Filter is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q31.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q31.explain index 57374cd3b3e..588c8bc9d71 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q31.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q31.explain @@ -5,9 +5,9 @@ Projection Est. 59469 rows └─ Gather Exchange Est. 59469 rows └─ Sorting Est. 59469 rows │ Order by: {ca_county_3 ASC NULLS LAST} - └─ Projection Est. 59469 rows + └─ Projection Est. 59472 rows │ Expressions: [ca_county_3, d_year_1], expr#divide(ss2.store_sales, ss1.store_sales):=`expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, expr#divide(ss3.store_sales, ss2.store_sales):=`expr#sum(ss_ext_sales_price)_3` / `expr#sum(ss_ext_sales_price)_2`, expr#divide(ws2.web_sales, ws1.web_sales):=`expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, expr#divide(ws3.web_sales, ws2.web_sales):=`expr#sum(ws_ext_sales_price)_3` / `expr#sum(ws_ext_sales_price)_2` - └─ Inner Join Est. 59469 rows + └─ Inner Join Est. 59472 rows │ Condition: ca_county_3 == ca_county_1 │ Filter: multiIf(`expr#sum(ws_ext_sales_price)_1` > 0, `expr#sum(ws_ext_sales_price)_2` / `expr#sum(ws_ext_sales_price)_1`, NULL) > multiIf(`expr#sum(ss_ext_sales_price)_1` > 0, `expr#sum(ss_ext_sales_price)_2` / `expr#sum(ss_ext_sales_price)_1`, NULL) ├─ Inner Join Est. 16319 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q33.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q33.explain index 8e90ed4766c..02abcc155ae 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q33.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q33.explain @@ -66,7 +66,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: [i_item_sk, i_manufact_id] - │ └─ CTERef[1] Est. 29887 rows + │ └─ Buffer Est. 29887 rows + │ └─ CTERef[1] Est. 29887 rows ├─ MergingAggregated Est. 99 rows │ └─ Repartition Exchange Est. 99 rows │ │ Partition by: {i_manufact_id_2} @@ -119,7 +120,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ Where: Runtime Filters: {i_manufact_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_manufact_id_2:=i_manufact_id - │ └─ CTERef[1] Est. 29887 rows + │ └─ Buffer Est. 29887 rows + │ └─ CTERef[1] Est. 29887 rows └─ MergingAggregated Est. 99 rows └─ Repartition Exchange Est. 99 rows │ Partition by: {i_manufact_id_4} @@ -172,7 +174,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ Where: Runtime Filters: {i_manufact_id} │ Outputs: i_item_sk_2:=i_item_sk, i_manufact_id_4:=i_manufact_id - └─ CTERef[1] Est. 29887 rows + └─ Buffer Est. 29887 rows + └─ CTERef[1] Est. 29887 rows CTEDef [1] Repartition Exchange Est. 29887 rows │ Partition by: {i_manufact_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q54.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q54.explain index ecf86cf44fe..1623d083483 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q54.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q54.explain @@ -29,7 +29,7 @@ Projection Est. 100 rows │ Condition: ss_sold_date_sk == d_date_sk_1 ├─ Repartition Exchange Est. 1085260 rows │ │ Partition by: {ss_sold_date_sk} - │ └─ Inner Join Est. 1085260 rows + │ └─ Inner Join Est. 1104632 rows │ │ Condition: ss_customer_sk == c_customer_sk │ │ Runtime Filters Builder: {c_customer_sk} │ ├─ Filter Est. 2879987999 rows @@ -123,7 +123,8 @@ Projection Est. 100 rows │ │ Group by: {expr#plus(d_month_seq, 1)} │ └─ Projection Est. 30 rows │ │ Expressions: expr#plus(d_month_seq, 1):=d_month_seq_1 + 1 - │ └─ CTERef[1] Est. 30 rows + │ └─ Buffer Est. 30 rows + │ └─ CTERef[1] Est. 30 rows └─ Broadcast Exchange Est. 1 rows └─ EnforceSingleRow Est. 1 rows └─ Gather Exchange Est. 1 rows @@ -136,7 +137,8 @@ Projection Est. 100 rows │ Group by: {expr#plus(d_month_seq, 3)} └─ Projection Est. 30 rows │ Expressions: expr#plus(d_month_seq, 3):=d_month_seq_2 + 3 - └─ CTERef[1] Est. 30 rows + └─ Buffer Est. 30 rows + └─ CTERef[1] Est. 30 rows CTEDef [1] Projection Est. 30 rows │ Expressions: [d_month_seq_1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q56.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q56.explain index 6684e5b2db2..e4e6af51af9 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q56.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q56.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 4015 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 856495 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ │ Condition: ss_addr_sk == ca_address_sk │ │ Runtime Filters Builder: {ca_address_sk} │ ├─ Repartition Exchange Est. 5166069 rows @@ -55,7 +55,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] Est. 8050 rows + │ │ └─ Buffer Est. 8050 rows + │ │ └─ CTERef[1] Est. 8050 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk} │ └─ Projection Est. 2189168 rows @@ -71,7 +72,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 4015 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 425211 rows + │ └─ Inner Join Est. 1519060 rows │ │ Condition: ca_address_sk_1 == cs_bill_addr_sk │ │ Runtime Filters Builder: {cs_bill_addr_sk} │ ├─ Repartition Exchange Est. 2189168 rows @@ -115,14 +116,15 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ └─ CTERef[1] Est. 8050 rows + │ └─ Buffer Est. 8050 rows + │ └─ CTERef[1] Est. 8050 rows └─ MergingAggregated Est. 4015 rows └─ Repartition Exchange Est. 4015 rows │ Partition by: {i_item_id_4} └─ Aggregating Est. 4015 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 205978 rows + └─ Inner Join Est. 603388 rows │ Condition: ca_address_sk_2 == ws_bill_addr_sk │ Runtime Filters Builder: {ws_bill_addr_sk} ├─ Repartition Exchange Est. 2189168 rows @@ -166,7 +168,8 @@ Projection Est. 100 rows │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ Where: Runtime Filters: {i_item_id} │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - └─ CTERef[1] Est. 8050 rows + └─ Buffer Est. 8050 rows + └─ CTERef[1] Est. 8050 rows CTEDef [1] Repartition Exchange Est. 8050 rows │ Partition by: {i_item_id_1} diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q58.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q58.explain index c47762e63d4..2b3adad9f26 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q58.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q58.explain @@ -21,7 +21,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id_1} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner Join Est. 4721247 rows + │ └─ Inner Join Est. 4705818 rows │ │ Condition: cs_item_sk == i_item_sk_1 │ ├─ Inner Join Est. 4705818 rows │ │ │ Condition: cs_sold_date_sk == d_date_sk_1 @@ -41,7 +41,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id_1} │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -57,7 +58,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner Join Est. 9510141 rows + │ └─ Inner Join Est. 9478841 rows │ │ Condition: ss_item_sk == i_item_sk │ ├─ Inner Join Est. 9478841 rows │ │ │ Condition: ss_sold_date_sk == d_date_sk @@ -77,7 +78,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 6 rows - │ │ └─ CTERef[1] Est. 6 rows + │ │ └─ Buffer Est. 6 rows + │ │ └─ CTERef[1] Est. 6 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -89,7 +91,7 @@ Projection Est. 100 rows └─ Aggregating Est. 149640 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner Join Est. 2281747 rows + └─ Inner Join Est. 2274236 rows │ Condition: ws_item_sk == i_item_sk_2 ├─ Inner Join Est. 2369721 rows │ │ Condition: ws_sold_date_sk == d_date_sk_2 @@ -109,7 +111,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange Est. 6 rows - │ └─ CTERef[1] Est. 6 rows + │ └─ Buffer Est. 6 rows + │ └─ CTERef[1] Est. 6 rows └─ TableScan tpcds1000_sample.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q59.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q59.explain index 2bc77737bb9..f2bb4c1001a 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q59.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q59.explain @@ -15,9 +15,9 @@ Projection Est. 100 rows │ Condition: expr#cast(minus(d_week_seq_3, 52), 'Int32') == expr#cast(d_week_seq_1, 'Int32'), s_store_id_1 == s_store_id ├─ Repartition Exchange Est. 150901 rows │ │ Partition by: {expr#cast(minus(d_week_seq_3, 52), 'Int32'), s_store_id_1} - │ └─ Projection Est. 150901 rows + │ └─ Projection Est. 150598 rows │ │ Expressions: [expr#sum(multiIf(equals(d_day_name, 'Friday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Monday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Saturday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Sunday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Thursday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Tuesday'), ss_sales_price, NULL))_4, expr#sum(multiIf(equals(d_day_name, 'Wednesday'), ss_sales_price, NULL))_4, s_store_id_1], expr#cast(minus(d_week_seq_3, 52), 'Int32'):=cast(d_week_seq_6 - 52, 'Int32') - │ └─ Inner Join Est. 150901 rows + │ └─ Inner Join Est. 150598 rows │ │ Condition: d_week_seq_6 == d_week_seq_4 │ │ Runtime Filters Builder: {d_week_seq_4} │ ├─ Inner Join Est. 21192 rows @@ -50,18 +50,20 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_sk_2:=d_date_sk, d_week_seq_6:=d_week_seq, d_day_name_2:=d_day_name │ │ │ └─ Broadcast Exchange Est. 334 rows - │ │ │ └─ CTERef[2] Est. 334 rows + │ │ │ └─ Buffer Est. 334 rows + │ │ │ └─ CTERef[2] Est. 334 rows │ │ └─ Repartition Exchange Est. 1002 rows │ │ │ Partition by: {s_store_sk_1} │ │ └─ TableScan tpcds1000_sample.store Est. 1002 rows │ │ Outputs: s_store_sk_1:=s_store_sk, s_store_id_1:=s_store_id │ └─ Broadcast Exchange Est. 334 rows - │ └─ CTERef[2] Est. 334 rows + │ └─ Buffer Est. 334 rows + │ └─ CTERef[2] Est. 334 rows └─ Repartition Exchange Est. 150449 rows │ Partition by: {expr#cast(d_week_seq_1, 'Int32'), s_store_id} - └─ Projection Est. 150449 rows + └─ Projection Est. 150147 rows │ Expressions: [d_week_seq_5, expr#sum(multiIf(equals(d_day_name, 'Friday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Monday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Saturday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Sunday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Thursday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Tuesday'), ss_sales_price, NULL))_3, expr#sum(multiIf(equals(d_day_name, 'Wednesday'), ss_sales_price, NULL))_3, s_store_id, s_store_name], expr#cast(d_week_seq_1, 'Int32'):=cast(d_week_seq_5, 'Int32') - └─ Inner Join Est. 150449 rows + └─ Inner Join Est. 150147 rows │ Condition: d_week_seq_5 == d_week_seq_2 │ Runtime Filters Builder: {d_week_seq_2} ├─ Inner Join Est. 21192 rows @@ -94,13 +96,15 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_week_seq_5:=d_week_seq, d_day_name_1:=d_day_name │ │ └─ Broadcast Exchange Est. 333 rows - │ │ └─ CTERef[1] Est. 333 rows + │ │ └─ Buffer Est. 333 rows + │ │ └─ CTERef[1] Est. 333 rows │ └─ Repartition Exchange Est. 1002 rows │ │ Partition by: {s_store_sk} │ └─ TableScan tpcds1000_sample.store Est. 1002 rows │ Outputs: [s_store_sk, s_store_id, s_store_name] └─ Broadcast Exchange Est. 333 rows - └─ CTERef[1] Est. 333 rows + └─ Buffer Est. 333 rows + └─ CTERef[1] Est. 333 rows CTEDef [1] Projection Est. 333 rows │ Expressions: [d_week_seq_2] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q60.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q60.explain index 5cf73b1fd07..76f0b3fc9a5 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q60.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q60.explain @@ -20,7 +20,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 14965 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(ss_ext_sales_price):=AggNull(sum)(ss_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 3306347 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 7178201 rows │ │ Condition: ss_addr_sk == ca_address_sk │ │ Runtime Filters Builder: {ca_address_sk} │ ├─ Repartition Exchange Est. 19943464 rows @@ -55,7 +55,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: [i_item_sk, i_item_id] - │ │ └─ CTERef[1] Est. 30004 rows + │ │ └─ Buffer Est. 30004 rows + │ │ └─ CTERef[1] Est. 30004 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk} │ └─ Projection Est. 2189168 rows @@ -71,7 +72,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 14965 rows │ │ Group by: {i_item_id_2} │ │ Aggregates: expr#sum(cs_ext_sales_price):=AggNull(sum)(cs_ext_sales_price) - │ └─ Inner (PARALLEL_HASH) Join Est. 1641452 rows + │ └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ │ Condition: cs_bill_addr_sk == ca_address_sk_1 │ │ Runtime Filters Builder: {ca_address_sk_1} │ ├─ Repartition Exchange Est. 5784990 rows @@ -106,7 +107,8 @@ Projection Est. 100 rows │ │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ │ Where: Runtime Filters: {i_item_id} │ │ │ Outputs: i_item_sk_1:=i_item_sk, i_item_id_2:=i_item_id - │ │ └─ CTERef[1] Est. 30004 rows + │ │ └─ Buffer Est. 30004 rows + │ │ └─ CTERef[1] Est. 30004 rows │ └─ Repartition Exchange Est. 2189168 rows │ │ Partition by: {ca_address_sk_1} │ └─ Projection Est. 2189168 rows @@ -122,7 +124,7 @@ Projection Est. 100 rows └─ Aggregating Est. 14965 rows │ Group by: {i_item_id_4} │ Aggregates: expr#sum(ws_ext_sales_price):=AggNull(sum)(ws_ext_sales_price) - └─ Inner (PARALLEL_HASH) Join Est. 795144 rows + └─ Inner (PARALLEL_HASH) Join Est. 2189168 rows │ Condition: ws_bill_addr_sk == ca_address_sk_2 │ Runtime Filters Builder: {ca_address_sk_2} ├─ Repartition Exchange Est. 2297866 rows @@ -157,7 +159,8 @@ Projection Est. 100 rows │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows │ │ Where: Runtime Filters: {i_item_id} │ │ Outputs: i_item_sk_2:=i_item_sk, i_item_id_4:=i_item_id - │ └─ CTERef[1] Est. 30004 rows + │ └─ Buffer Est. 30004 rows + │ └─ CTERef[1] Est. 30004 rows └─ Repartition Exchange Est. 2189168 rows │ Partition by: {ca_address_sk_2} └─ Projection Est. 2189168 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q64.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q64.explain index a2fd69a2c43..5e5847df360 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q64.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q64.explain @@ -24,13 +24,13 @@ CTEDef [0] Projection Est. 1225046 rows │ Expressions: [ca_city, ca_city_1, ca_street_name, ca_street_name_1, ca_street_number, ca_street_number_1, ca_zip, ca_zip_1, d_year, expr#count(), expr#sum(ss_coupon_amt), expr#sum(ss_list_price), expr#sum(ss_wholesale_cost), i_product_name, s_store_name, s_zip], i_item_sk:=ss_item_sk └─ MergingAggregated Est. 1225046 rows - └─ Repartition Exchange Est. 1225046 rows + └─ Repartition Exchange Est. 608931 rows │ Partition by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} - └─ Aggregating Est. 1225046 rows + └─ Aggregating Est. 608931 rows │ Group by: {i_product_name, ss_item_sk, s_store_name, s_zip, ca_street_number, ca_street_name, ca_city, ca_zip, ca_street_number_1, ca_street_name_1, ca_city_1, ca_zip_1, d_year, d_year_1, d_year_2} │ Group by keys not hashed: {i_product_name} │ Aggregates: expr#count():=AggNull(count)(), expr#sum(ss_wholesale_cost):=AggNull(sum)(ss_wholesale_cost), expr#sum(ss_list_price):=AggNull(sum)(ss_list_price), expr#sum(ss_coupon_amt):=AggNull(sum)(ss_coupon_amt) - └─ Inner Join Est. 1225046 rows + └─ Inner Join Est. 608931 rows │ Condition: ca_address_sk_1 == c_current_addr_sk │ Runtime Filters Builder: {c_current_addr_sk} ├─ Repartition Exchange Est. 6000000 rows @@ -42,7 +42,7 @@ CTEDef [0] │ Outputs: ca_address_sk_1:=ca_address_sk, ca_street_number_1:=ca_street_number, ca_street_name_1:=ca_street_name, ca_city_1:=ca_city, ca_zip_1:=ca_zip └─ Repartition Exchange Est. 608931 rows │ Partition by: {c_current_addr_sk} - └─ Inner Join Est. 608931 rows + └─ Inner Join Est. 604785 rows │ Condition: ca_address_sk == ss_addr_sk │ Runtime Filters Builder: {ss_addr_sk} ├─ Repartition Exchange Est. 6000000 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q74.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q74.explain index 1acbe5f1ad7..76d52191f81 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q74.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q74.explain @@ -9,7 +9,7 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST, c_customer_id_7 ASC NULLS LAST} │ Limit: 100 - └─ Inner (PARALLEL_HASH) Join Est. 7365203 rows + └─ Inner (PARALLEL_HASH) Join Est. 7340429 rows │ Condition: ss_customer_sk_5 == c_customer_sk_2 │ Filter: multiIf(`expr#sum(ss_net_paid)_4` > 0, `expr#sum(ss_net_paid)_5` / `expr#sum(ss_net_paid)_4`, NULL) > multiIf(`expr#sum(ss_net_paid)_2` > 0, `expr#sum(ss_net_paid)_3` / `expr#sum(ss_net_paid)_2`, NULL) ├─ Inner (PARALLEL_HASH) Join Est. 4785392 rows @@ -28,7 +28,7 @@ Projection Est. 100 rows │ └─ Local Exchange Est. 10368680 rows │ └─ Buffer Est. 10368680 rows │ └─ CTERef[0] Est. 10368680 rows - └─ Inner (PARALLEL_HASH) Join Est. 4546419 rows + └─ Inner (PARALLEL_HASH) Join Est. 4531127 rows │ Condition: ss_customer_sk_3 == c_customer_sk_2 ├─ Projection Est. 2592170 rows │ │ Expressions: [expr#sum(ss_net_paid)_3, ss_customer_sk_3] diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain index c4d57761640..a145133c6b5 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain @@ -23,7 +23,7 @@ Projection Est. 100 rows │ │ └─ Aggregating Est. 149640 rows │ │ │ Group by: {i_item_id_1} │ │ │ Aggregates: expr#sum(cr_return_quantity):=AggNull(sum)(cr_return_quantity) - │ │ └─ Inner Join Est. 1372705 rows + │ │ └─ Inner Join Est. 1368790 rows │ │ │ Condition: cr_item_sk == i_item_sk_1 │ │ ├─ Inner Join Est. 1368790 rows │ │ │ │ Condition: cr_returned_date_sk == d_date_sk_1 @@ -52,7 +52,8 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq │ │ │ └─ Broadcast Exchange Est. 3 rows - │ │ │ └─ CTERef[1] Est. 3 rows + │ │ │ └─ Buffer Est. 3 rows + │ │ │ └─ CTERef[1] Est. 3 rows │ │ └─ Filter Est. 300000 rows │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -64,7 +65,7 @@ Projection Est. 100 rows │ └─ Aggregating Est. 149640 rows │ │ Group by: {i_item_id} │ │ Aggregates: expr#sum(sr_return_quantity):=AggNull(sum)(sr_return_quantity) - │ └─ Inner Join Est. 2884324 rows + │ └─ Inner Join Est. 2875684 rows │ │ Condition: sr_item_sk == i_item_sk │ ├─ Inner Join Est. 2875684 rows │ │ │ Condition: sr_returned_date_sk == d_date_sk @@ -93,7 +94,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_week_seq} │ │ │ Outputs: [d_week_seq], d_date_1:=d_date │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 3 rows + │ │ └─ CTERef[1] Est. 3 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -105,7 +107,7 @@ Projection Est. 100 rows └─ Aggregating Est. 149640 rows │ Group by: {i_item_id_2} │ Aggregates: expr#sum(wr_return_quantity):=AggNull(sum)(wr_return_quantity) - └─ Inner Join Est. 661741 rows + └─ Inner Join Est. 659619 rows │ Condition: wr_item_sk == i_item_sk_2 ├─ Inner Join Est. 659619 rows │ │ Condition: wr_returned_date_sk == d_date_sk_2 @@ -134,7 +136,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_week_seq} │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq │ └─ Broadcast Exchange Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 3 rows + │ └─ CTERef[1] Est. 3 rows └─ TableScan tpcds1000_sample.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] diff --git a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference new file mode 100644 index 00000000000..95815b2fabd --- /dev/null +++ b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference @@ -0,0 +1,2 @@ +1 +nan diff --git a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql new file mode 100644 index 00000000000..cebda2bdaaa --- /dev/null +++ b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql @@ -0,0 +1,6 @@ +drop table if exists t1; +CREATE TABLE t1(c1 UInt64, c2 String, c3 Int32) ENGINE = CnchMergeTree PARTITION BY c1 ORDER BY c1; +insert into t1 values (1, 'a', 1); +insert into t1 values (2, 'b', 1); +select count(distinct c2) / sum(c3) from t1; +select count(distinct c2) / sum(c3) from t1 where c1 > 10; diff --git a/tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.reference b/tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.sql b/tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.sql new file mode 100644 index 00000000000..359cab5562b --- /dev/null +++ b/tests/queries/4_cnch_stateless/60006_symbol_reallocate_duplicate_group_by.sql @@ -0,0 +1,4 @@ +drop table if exists 60006_t1; +create table 60006_t1 (p_date Date, app_id Int32, id Int32) engine = CnchMergeTree +partition by (p_date, app_id) order by id; +select t1.app_id, t2.app_id from 60006_t1 t1, 60006_t1 t2 where t1.app_id = t2.app_id group by t1.app_id, t2.app_id; diff --git a/tests/queries/4_cnch_stateless_no_tenant/40047_duplicate_subquery_expression_planning.reference b/tests/queries/4_cnch_stateless_no_tenant/40047_duplicate_subquery_expression_planning.reference index b1870984b07..e6acc954e15 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40047_duplicate_subquery_expression_planning.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40047_duplicate_subquery_expression_planning.reference @@ -85,30 +85,36 @@ Projection Est. ? rows └─ TableScan test.t40047_y Est. ? rows Outputs: [c] Projection Est. ? rows -│ Expressions: _subquery1:=`expr#max(c)_1`, plus(_subquery2, 1):=`expr#plus(_subquery2, 1)` +│ Expressions: _subquery1:=`expr#max(c)_7`, plus(_subquery2, 1):=`expr#plus(_subquery2, 1)` └─ Gather Exchange Est. ? rows └─ Projection Est. ? rows - │ Expressions: [expr#max(c)_1], expr#plus(_subquery2, 1):=`expr#max(y2.c)_1` + 1 + │ Expressions: [expr#max(c)_7], expr#plus(_subquery2, 1):=`expr#max(c)_9` + 1 └─ Inner Join Est. ? rows │ Condition: ├─ Inner Join Est. ? rows │ │ Condition: │ ├─ TableScan test.t40047_x Est. ? rows │ │ Outputs: [a] - │ └─ CTERef[1] Est. ? rows - └─ CTERef[1] Est. ? rows -CTEDef [1] - Broadcast Exchange Est. ? rows - └─ Projection Est. ? rows - │ Expressions: expr#max(c)_1:=cast(`expr#max(c)`, \'Nullable(Int32)\') - └─ MergingAggregated Est. ? rows - └─ Gather Exchange Est. ? rows - └─ Aggregating Est. ? rows - │ Group by: {} - │ Aggregates: expr#max(c):=AggNull(max)(c) - └─ TableScan test.t40047_y Est. ? rows - Outputs: [c] -note: CTE(Common Table Expression) is applied for 2 times. + │ └─ Broadcast Exchange Est. ? rows + │ └─ Projection Est. ? rows + │ │ Expressions: expr#max(c)_7:=cast(`expr#max(c)_6`, \'Nullable(Int32)\') + │ └─ MergingAggregated Est. ? rows + │ └─ Gather Exchange Est. ? rows + │ └─ Aggregating Est. ? rows + │ │ Group by: {} + │ │ Aggregates: expr#max(c)_6:=AggNull(max)(c_4) + │ └─ TableScan test.t40047_y Est. ? rows + │ Outputs: c_4:=c + └─ Broadcast Exchange Est. ? rows + └─ Projection Est. ? rows + │ Expressions: expr#max(c)_9:=cast(`expr#max(c)_8`, \'Nullable(Int32)\') + └─ MergingAggregated Est. ? rows + └─ Gather Exchange Est. ? rows + └─ Aggregating Est. ? rows + │ Group by: {} + │ Aggregates: expr#max(c)_8:=AggNull(max)(c_5) + └─ TableScan test.t40047_y Est. ? rows + Outputs: c_5:=c Projection Est. ? rows │ Expressions: _subquery1:=`expr#max(c)_1`, plus(_subquery2, 1):=`expr#plus(_subquery2, 1)` └─ Gather Exchange Est. ? rows diff --git a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference index 5fb005ecb3d..ba24dfdd25b 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference @@ -20,7 +20,8 @@ Projection Est. ? rows └─ Inner Join Est. ? rows │ Condition: expr#rand(1)_1 == expr#rand(2)_1 ├─ CTERef[0] Est. 1 rows - └─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -40,7 +41,8 @@ Projection Est. ? rows │ │ Condition: expr#rand(1)_1 == expr#rand(2)_1 │ ├─ Buffer Est. 1 rows │ │ └─ CTERef[0] Est. 1 rows - │ └─ CTERef[1] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. 1 rows └─ Buffer Est. 1 rows └─ CTERef[0] Est. 1 rows @@ -62,8 +64,10 @@ Projection Est. ? rows ├─ Inner Join Est. ? rows │ │ Condition: expr#rand(1)_1 == expr#rand(2)_1 │ ├─ CTERef[0] Est. 1 rows - │ └─ CTERef[1] Est. 1 rows - └─ CTERef[1] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -86,7 +90,8 @@ Projection Est. ? rows │ Condition: expr#rand(1)_2 == expr#rand(2)_1 ├─ Buffer Est. 1 rows │ └─ CTERef[0] Est. 1 rows - └─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -107,7 +112,8 @@ Projection Est. ? rows └─ Broadcast Exchange Est. ? rows └─ Inner Join Est. ? rows │ Condition: expr#rand(1)_1 == expr#rand(2)_2 - ├─ CTERef[1] Est. 1 rows + ├─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. 1 rows └─ Buffer Est. 1 rows └─ CTERef[0] Est. 1 rows @@ -128,11 +134,13 @@ Projection Est. ? rows ├─ Inner Join Est. ? rows │ │ Condition: expr#rand(1)_1 == expr#rand(2)_1 │ ├─ CTERef[0] Est. 1 rows - │ └─ CTERef[1] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Inner Join Est. ? rows │ Condition: expr#rand(1)_2 == expr#rand(2)_2 ├─ CTERef[0] Est. 1 rows - └─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -157,8 +165,7 @@ Projection Est. ? rows │ └─ CTERef[1] Est. 1 rows └─ Inner Join Est. ? rows │ Condition: expr#rand(2)_2 == expr#rand(1)_2 - ├─ Buffer Est. 1 rows - │ └─ CTERef[1] Est. 1 rows + ├─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. 1 rows └─ Buffer Est. 1 rows └─ CTERef[0] Est. 1 rows @@ -181,11 +188,13 @@ Projection Est. ? rows │ ├─ Buffer Est. 1 rows │ │ └─ CTERef[0] Est. 1 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[1] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. ? rows └─ Inner Join Est. ? rows │ Condition: expr#rand(2)_2 == expr#rand(1)_2 - ├─ CTERef[1] Est. 1 rows + ├─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. 1 rows └─ Buffer Est. 1 rows └─ CTERef[0] Est. 1 rows @@ -211,9 +220,9 @@ Projection Est. ? rows │ └─ CTERef[1] Est. 1 rows └─ Inner Join Est. ? rows │ Condition: expr#rand(2)_2 == expr#rand(3)_1 - ├─ Buffer Est. 1 rows - │ └─ CTERef[1] Est. 1 rows - └─ CTERef[2] Est. 1 rows + ├─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[2] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -237,12 +246,15 @@ Projection Est. ? rows │ │ Condition: expr#rand(1)_1 == expr#rand(2)_1 │ ├─ CTERef[0] Est. 1 rows │ └─ Broadcast Exchange Est. 1 rows - │ └─ CTERef[1] Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows └─ Broadcast Exchange Est. ? rows └─ Inner Join Est. ? rows │ Condition: expr#rand(2)_2 == expr#rand(3)_1 - ├─ CTERef[1] Est. 1 rows - └─ CTERef[2] Est. 1 rows + ├─ Buffer Est. 1 rows + │ └─ CTERef[1] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[2] Est. 1 rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) @@ -257,6 +269,35 @@ CTEDef [2] │ Expressions: expr#rand(3):=rand(3) └─ Values Est. 1 rows note: CTE(Common Table Expression) is applied for 4 times. +explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select j1.x from (select t1.x from c1 t1 join c1 t2 on t1.x = t2.x) j1 join (select t3.x from c1 t3 join c2 t4 on t3.x = t4.x) j2 on j1.x = j2.x; +Projection Est. ? rows +│ Expressions: x:=`expr#rand(1)_1` +└─ Inner Join Est. ? rows + │ Condition: expr#rand(1)_1 == expr#rand(1)_3 + ├─ Inner Join Est. ? rows + │ │ Condition: expr#rand(1)_1 == expr#rand(1)_2 + │ ├─ Buffer Est. 1 rows + │ │ └─ CTERef[0] Est. 1 rows + │ └─ Broadcast Exchange Est. 1 rows + │ └─ Buffer Est. 1 rows + │ └─ CTERef[0] Est. 1 rows + └─ Broadcast Exchange Est. ? rows + └─ Inner Join Est. ? rows + │ Condition: expr#rand(1)_3 == expr#rand(2)_1 + ├─ Buffer Est. 1 rows + │ └─ CTERef[0] Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows +CTEDef [0] + Projection Est. 1 rows + │ Expressions: expr#rand(1):=rand(1) + └─ Values Est. 1 rows +CTEDef [1] + Broadcast Exchange Est. 1 rows + └─ Projection Est. 1 rows + │ Expressions: expr#rand(2):=rand(2) + └─ Values Est. 1 rows +note: CTE(Common Table Expression) is applied for 4 times. explain with c1 as (select rand(1) x), c2 as (select t1.x as x from c1 t1 join c1 t2 on t1.x = t2.x) select x from c2 t3; Projection Est. ? rows │ Expressions: x:=`expr#rand(1)_3` diff --git a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql index aa52a510557..94bb8d6afe2 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql @@ -15,5 +15,6 @@ explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select t1.x as x explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select j1.x from (select t1.x as x from c1 t1 join c2 t2 on t1.x = t2.x) j1 join (select t3.x as x from c2 t3 join c1 t4 on t3.x = t4.x) j2 on j1.x = j2.x; explain with c1 as (select rand(1) x), c2 as (select rand(2) x), c3 as (select rand(3) x) select t1.x from c1 t1 join c2 t2 on t1.x = t2.x union all select t3.x from c2 t3 join c3 t4 on t3.x = t4.x; explain with c1 as (select rand(1) x), c2 as (select rand(2) x), c3 as (select rand(3) x) select j1.x from (select t1.x from c1 t1 join c2 t2 on t1.x = t2.x) j1 join (select t3.x from c2 t3 join c3 t4 on t3.x = t4.x) j2 on j1.x = j2.x; +explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select j1.x from (select t1.x from c1 t1 join c1 t2 on t1.x = t2.x) j1 join (select t3.x from c1 t3 join c2 t4 on t3.x = t4.x) j2 on j1.x = j2.x; explain with c1 as (select rand(1) x), c2 as (select t1.x as x from c1 t1 join c1 t2 on t1.x = t2.x) select x from c2 t3; explain with c1 as (select rand(1) x), c2 as (select t1.x as x from c1 t1 union all (select rand(2) x)) select t2.x from c2 t2 join c1 t3 on t2.x = t3.x; diff --git a/tests/queries/4_cnch_stateless_no_tenant/40075_subcolumn_with_cte.sql b/tests/queries/4_cnch_stateless_no_tenant/40075_subcolumn_with_cte.sql index e9e8814d076..ea27da66931 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40075_subcolumn_with_cte.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/40075_subcolumn_with_cte.sql @@ -55,7 +55,7 @@ FROM 'zz' AS y ) t6 WHERE t2y = t6.x{'dd'} AND t6.y != '' -SETTINGS enable_optimizer=1, cte_mode='SHARED'; +SETTINGS enable_optimizer=1, cte_mode='SHARED', max_buffer_size_for_deadlock_cte=-1; SELECT ''; diff --git a/tests/queries/4_cnch_stateless_no_tenant/46001_remove_redundant_distinct.reference b/tests/queries/4_cnch_stateless_no_tenant/46001_remove_redundant_distinct.reference index 87bdb4bb621..a48e3f627bd 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/46001_remove_redundant_distinct.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/46001_remove_redundant_distinct.reference @@ -148,21 +148,25 @@ Projection Est. ? rows │ ├─ TableScan test_distinct.unique_3 Est. ? rows │ │ Outputs: [id3] │ └─ Projection Est. ? rows - │ │ Expressions: expr#divide(sum(avg_id), 25)_1:=cast(`expr#sum(avg_id)` / 25, \'Nullable(Float64)\') - │ └─ CTERef[1] Est. ? rows + │ │ Expressions: expr#divide(sum(avg_id), 25)_1:=cast(`expr#sum(avg_id)_4` / 25, \'Nullable(Float64)\') + │ └─ Broadcast Exchange Est. ? rows + │ └─ MergingAggregated Est. ? rows + │ └─ Gather Exchange Est. ? rows + │ └─ Aggregating Est. ? rows + │ │ Group by: {} + │ │ Aggregates: expr#sum(avg_id)_4:=AggNull(sum)(id_6) + │ └─ TableScan test_distinct.unique_1 Est. ? rows + │ Outputs: id_6:=id └─ Projection Est. ? rows - │ Expressions: expr#divide(sum(avg_id), 20)_1:=cast(`expr#sum(avg_id)_1` / 20, \'Nullable(Float64)\') - └─ CTERef[1] Est. ? rows -CTEDef [1] - Broadcast Exchange Est. ? rows - └─ MergingAggregated Est. ? rows - └─ Gather Exchange Est. ? rows - └─ Aggregating Est. ? rows - │ Group by: {} - │ Aggregates: expr#sum(avg_id):=AggNull(sum)(id_3) - └─ TableScan test_distinct.unique_1 Est. ? rows - Outputs: id_3:=id -note: CTE(Common Table Expression) is applied for 2 times. + │ Expressions: expr#divide(sum(avg_id), 20)_1:=cast(`expr#sum(avg_id)_5` / 20, \'Nullable(Float64)\') + └─ Broadcast Exchange Est. ? rows + └─ MergingAggregated Est. ? rows + └─ Gather Exchange Est. ? rows + └─ Aggregating Est. ? rows + │ Group by: {} + │ Aggregates: expr#sum(avg_id)_5:=AggNull(sum)(id_7) + └─ TableScan test_distinct.unique_1 Est. ? rows + Outputs: id_7:=id ---------test cte2 Projection Est. ? rows │ Expressions: id:=id_3 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48016_cte_projection.reference b/tests/queries/4_cnch_stateless_no_tenant/48016_cte_projection.reference index 1ddeb3ef40e..45820e1f876 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48016_cte_projection.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48016_cte_projection.reference @@ -6,9 +6,9 @@ Projection Est. ? rows │ Group by: {} │ Aggregates: expr#count():=AggNull(count)() └─ Left Join Est. ? rows - │ Condition: p_date == expr#toDate(hdfs_par)_2 + │ Condition: p_date == expr#toDate(hdfs_par)_5 ├─ Left Join Est. ? rows - │ │ Condition: p_date == expr#toDate(hdfs_par)_1, user_id == user_id_2 + │ │ Condition: p_date == expr#toDate(hdfs_par)_4, user_id == user_id_5 │ ├─ Projection Est. ? rows │ │ │ Expressions: [p_date, user_id] │ │ └─ Filter Est. ? rows @@ -16,22 +16,31 @@ Projection Est. ? rows │ │ └─ TableScan test.idm_ana_xz_rpt_user_evt Est. ? rows │ │ Where: (p_date >= cast(18359, \'Date\')) AND (channel = \'APP\') │ │ Outputs: [user_id, channel, p_date] - │ └─ Filter Est. ? rows - │ │ Condition: `expr#toDate(hdfs_par)_1` >= cast(18359, \'Date\') - │ └─ CTERef[0] Est. ? rows - └─ CTERef[0] Est. ? rows -CTEDef [0] - Broadcast Exchange Est. ? rows - └─ MergingAggregated Est. ? rows - └─ Repartition Exchange Est. ? rows - │ Partition by: {user_id_1, expr#toDate(hdfs_par)} - └─ Aggregating Est. ? rows - │ Group by: {user_id_1, expr#toDate(hdfs_par)} - └─ Projection Est. ? rows - │ Expressions: [user_id_1], expr#toDate(hdfs_par):=toDate(hdfs_par) - └─ Filter Est. ? rows - │ Condition: toDate(hdfs_par) >= cast(18359, \'Date\') - └─ TableScan test.idm_ods_ods_inst_user_tags Est. ? rows - Where: toDate(hdfs_par) >= cast(18359, \'Date\') - Outputs: [hdfs_par], user_id_1:=user_id -note: CTE(Common Table Expression) is applied for 2 times. + │ └─ Broadcast Exchange Est. ? rows + │ └─ MergingAggregated Est. ? rows + │ └─ Repartition Exchange Est. ? rows + │ │ Partition by: {user_id_5, expr#toDate(hdfs_par)_4} + │ └─ Aggregating Est. ? rows + │ │ Group by: {user_id_5, expr#toDate(hdfs_par)_4} + │ └─ Projection Est. ? rows + │ │ Expressions: [user_id_5], expr#toDate(hdfs_par)_4:=toDate(hdfs_par_2) + │ └─ Filter Est. ? rows + │ │ Condition: toDate(hdfs_par_2) >= cast(18359, \'Date\') + │ └─ TableScan test.idm_ods_ods_inst_user_tags Est. ? rows + │ Where: toDate(hdfs_par) >= cast(18359, \'Date\') + │ Outputs: user_id_5:=user_id, hdfs_par_2:=hdfs_par + └─ Broadcast Exchange Est. ? rows + └─ Projection Est. ? rows + │ Expressions: [expr#toDate(hdfs_par)_5] + └─ MergingAggregated Est. ? rows + └─ Repartition Exchange Est. ? rows + │ Partition by: {user_id_6, expr#toDate(hdfs_par)_5} + └─ Aggregating Est. ? rows + │ Group by: {user_id_6, expr#toDate(hdfs_par)_5} + └─ Projection Est. ? rows + │ Expressions: [user_id_6], expr#toDate(hdfs_par)_5:=toDate(hdfs_par_3) + └─ Filter Est. ? rows + │ Condition: toDate(hdfs_par_3) >= cast(18359, \'Date\') + └─ TableScan test.idm_ods_ods_inst_user_tags Est. ? rows + Where: toDate(hdfs_par) >= cast(18359, \'Date\') + Outputs: user_id_6:=user_id, hdfs_par_3:=hdfs_par From 81a8854e746146db76bd83b74ba157d9424a471c Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:06:56 +0000 Subject: [PATCH 003/292] Merge 'fix-double-quoted-string' into 'cnch-2.2' fix(clickhousech@m-4172484611): fix typo in implementation of double quoted string See merge request: !22437 --- src/Parsers/ExpressionElementParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index c25f7d1ed8f..71fdd3ee0dc 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1079,7 +1079,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!isOneOf(last_token)) return false; } - else if (isOneOf(pos->type)) + else if (isOneOf(pos->type)) { if (!isOneOf(last_token)) return false; From 67cb1df5ee0c422b9e46cfc387da0fcf3becd49a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:07:20 +0000 Subject: [PATCH 004/292] Merge 'fix_40057_ansi_nullify_grouping_cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4549370198): fix 40057 ansi nullify grouping cnch 2.2 See merge request: !22485 From 7008d8013f0849e8657b7fd543088eddf26a6449 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:08:46 +0000 Subject: [PATCH 005/292] Merge 'cherry-pick-mr-22463-3' into 'cnch-2.2' feat(clickhousech@m-4344711347): Bucket Shuffle See merge request: !22494 # Conflicts: # src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp # src/Optimizer/Property/PropertyDeriver.cpp # src/Optimizer/Rewriter/BitEngineInfoDeriver.cpp --- src/Advisor/tests/gtest_materialized_view.cpp | 4 +- src/Analyzers/ASTEquals.cpp | 11 + src/Core/Settings.h | 2 + src/Core/tests/gtest_protobuf.cpp | 18 ++ src/Core/tests/gtest_protobuf_common.h | 30 ++- src/Functions/FunctionBucket.cpp | 11 + src/Functions/FunctionBucket.h | 249 ++++++++++++++++++ src/Functions/FunctionSipHashBuiltin.cpp | 11 + src/Functions/FunctionSipHashBuiltin.h | 51 ++++ src/Functions/IFunction.h | 11 +- src/Functions/modulo.cpp | 12 +- src/Interpreters/DAGGraph.cpp | 7 + .../DistributedStages/BSPScheduler.cpp | 6 +- .../DistributedStages/PlanSegment.cpp | 14 + .../DistributedStages/PlanSegment.h | 12 + .../DistributedStages/PlanSegmentExecutor.cpp | 6 +- .../DistributedStages/PlanSegmentSplitter.cpp | 54 +++- .../DistributedStages/PlanSegmentSplitter.h | 2 + .../DistributedStages/Scheduler.h | 2 +- src/Interpreters/NodeSelector.cpp | 68 +++-- src/Interpreters/NodeSelector.h | 22 +- src/Optimizer/Cascades/Task.cpp | 62 ++++- src/Optimizer/CostModel/ExchangeCost.cpp | 2 +- src/Optimizer/Property/Property.cpp | 133 +++++++++- src/Optimizer/Property/Property.h | 16 +- src/Optimizer/Property/PropertyDeriver.cpp | 61 ++++- src/Optimizer/Property/PropertyDeterminer.cpp | 14 +- src/Optimizer/Property/PropertyEnforcer.cpp | 4 +- src/Parsers/ASTClusterByElement.cpp | 24 ++ src/Parsers/ASTClusterByElement.h | 6 + src/Parsers/ASTSerDerHelper.cpp | 4 +- src/Parsers/IAST.h | 3 +- .../Exchange/RepartitionTransform.cpp | 7 + .../Exchange/RepartitionTransform.h | 3 + src/Processors/tests/gtest_bucket_shuffle.cpp | 125 +++++++++ src/Protos/plan_node_utils.proto | 2 + src/QueryPlan/GraphvizPrinter.cpp | 73 ++--- src/QueryPlan/PlanPrinter.cpp | 4 +- src/QueryPlan/PlanPrinter.h | 1 + src/QueryPlan/SymbolMapper.cpp | 58 ++-- .../71000_bucket_shuffle_join.reference | 108 ++++++++ .../71000_bucket_shuffle_join.sql | 63 +++++ .../40022_topn_filtering_opt.reference | 28 +- ...fix_bitmap_index_not_pushed_down.reference | 22 +- ...023_eliminate_group_by_constants.reference | 48 ++-- .../48023_eliminate_join_by_fk.reference | 16 +- ...8028_only_push_bitmap_with_index.reference | 12 +- 47 files changed, 1261 insertions(+), 241 deletions(-) create mode 100644 src/Functions/FunctionBucket.cpp create mode 100644 src/Functions/FunctionBucket.h create mode 100644 src/Functions/FunctionSipHashBuiltin.cpp create mode 100644 src/Functions/FunctionSipHashBuiltin.h create mode 100644 src/Processors/tests/gtest_bucket_shuffle.cpp create mode 100644 tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.reference create mode 100644 tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.sql diff --git a/src/Advisor/tests/gtest_materialized_view.cpp b/src/Advisor/tests/gtest_materialized_view.cpp index 80792670da6..cb98795378f 100644 --- a/src/Advisor/tests/gtest_materialized_view.cpp +++ b/src/Advisor/tests/gtest_materialized_view.cpp @@ -124,7 +124,7 @@ TEST_F(MaterializedViewAdviseTest, TestMaterializedViewFilterAndProject) EXPECT_CONTAINS(advises.front()->getOptimizedValue(), "d_month_seq - 1"); } -TEST_F(MaterializedViewAdviseTest, TestMaterializedViewFilterAndProject2) +TEST_F(MaterializedViewAdviseTest, DISABLED_TestMaterializedViewFilterAndProject2) { auto advises = getAdvises( {"select d_month_seq - 1, d_date_sk + 1 from date_dim where d_week_seq = 1", @@ -196,7 +196,7 @@ TEST_F(MaterializedViewAdviseTest, TestMaterializedViewCaseWhen) EXPECT_GE(advises.size(), 1); } -TEST_F(MaterializedViewAdviseTest, TestTPCDSQ6) +TEST_F(MaterializedViewAdviseTest, DISABLED_TestTPCDSQ6) { std::string sql = tester->loadQuery("q6").sql.front().first; auto advises = getAdvises({sql}); diff --git a/src/Analyzers/ASTEquals.cpp b/src/Analyzers/ASTEquals.cpp index 29a40d52d38..b08fac5f859 100644 --- a/src/Analyzers/ASTEquals.cpp +++ b/src/Analyzers/ASTEquals.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace DB::ASTEquality @@ -57,6 +58,13 @@ bool compareNode(const ASTWindowDefinition & left, const ASTWindowDefinition & r left.frame_end_preceding == right.frame_end_preceding; } +bool compareNode(const ASTClusterByElement & left, const ASTClusterByElement & right) +{ + return left.split_number == right.split_number && + left.is_with_range == right.is_with_range && + left.is_user_defined_expression == right.is_user_defined_expression; +} + bool compareNode(const ASTSubquery & left, const ASTSubquery & right) { return left.cte_name == right.cte_name && left.database_of_view == right.database_of_view; @@ -144,6 +152,9 @@ bool compareTree(const ASTPtr & left, const ASTPtr & right, const SubtreeCompara case ASTType::ASTTableIdentifier: node_equals = compareNode(left->as(), right->as()); break; + case ASTType::ASTClusterByElement: + node_equals = compareNode(left->as(), right->as()); + break; default: node_equals = left->getID() == right->getID(); // align with ScopeAwareHash } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3c2e45da361..6d62d9bcaa9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1657,6 +1657,7 @@ enum PreloadLevelSettings : UInt64 M(UInt64, max_expand_join_key_size, 3, "Whether enable using equivalences when property match", 0) \ M(UInt64, max_expand_agg_key_size, 3, "Max allowed agg/window keys number when expand powerset when property match", 0) \ M(Bool, enable_sharding_optimize, false, "Whether enable sharding optimization, eg. local join", 0) \ + M(Bool, enable_bucket_shuffle, false, "Whether enable bucket shuffle", 0) \ M(Bool, enable_magic_set, true, "Whether enable magic set rewriting for join aggregation", 0) \ M(Float, magic_set_filter_factor, 0.5, "The minimum filter factor of magic set, used for early pruning", 0) \ M(UInt64, magic_set_max_search_tree, 2, "The maximum table scans in magic set, used for early pruning", 0) \ @@ -1727,6 +1728,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, exchange_force_use_buffer, false, "Force exchange use buffer as possible", 0) \ M(Bool, exchange_enable_node_stable_hash, false, "Force exchange use buffer as possible", 0) \ M(Bool, exchange_use_query_memory_tracker, true, "Use query-level memory tracker", 0) \ + M(String, exchange_shuffle_method_name, "cityHash64V2", "Shuffle method name used in exchange", 0) \ M(UInt64, wait_for_post_processing_timeout_ms, 1000, "Timeout for waiting post processing rpc from workers.", 0) \ M(UInt64, distributed_query_wait_exception_ms, 2000,"Wait final planSegment exception from segmentScheduler.", 0) \ M(UInt64, distributed_max_parallel_size, false, "Max distributed execution parallel size", 0) \ diff --git a/src/Core/tests/gtest_protobuf.cpp b/src/Core/tests/gtest_protobuf.cpp index efbf9ab22a4..64cf9f84af4 100644 --- a/src/Core/tests/gtest_protobuf.cpp +++ b/src/Core/tests/gtest_protobuf.cpp @@ -1,4 +1,5 @@ #include +#include #include "Interpreters/DistributedStages/PlanSegment.h" @@ -306,6 +307,23 @@ TEST_F(ProtobufTest, PlanSegmentInput) compareProto(pb, pb2); } +TEST_F(ProtobufTest, PlanSegmentOutput) +{ + std::default_random_engine eng(42); + // construct valid step + auto output = generatePlanSegmentOutput(eng); + // serialize to protobuf + Protos::PlanSegmentOutput pb; + output->toProto(pb); + // deserialize from protobuf + auto output2 = std::make_shared(); + output2->fillFromProto(pb); + // re-serialize to protobuf + Protos::PlanSegmentOutput pb2; + output2->toProto(pb2); + compareProto(pb, pb2); +} + TEST_F(ProtobufTest, InputOrderInfo) { std::default_random_engine eng(42); diff --git a/src/Core/tests/gtest_protobuf_common.h b/src/Core/tests/gtest_protobuf_common.h index 8cff2512bed..558a6a2ffe1 100644 --- a/src/Core/tests/gtest_protobuf_common.h +++ b/src/Core/tests/gtest_protobuf_common.h @@ -44,11 +44,12 @@ #include #include #include -#include "Core/NamesAndTypes.h" -#include "DataTypes/DataTypeMap.h" -#include "DataTypes/DataTypeNullable.h" -#include "IO/WriteBuffer.h" -#include "Interpreters/Context.h" +#include +#include +#include +#include +#include +#include namespace DB::UnitTest { @@ -334,7 +335,7 @@ class ProtobufTest : public testing::Test auto buckets = eng() % 1000; auto enforce_round_robin = eng() % 2 == 1; auto component = static_cast(eng() % 3); - auto result = Partitioning(handle, columns, require_handle, buckets, enforce_round_robin, component); + auto result = Partitioning(handle, columns, require_handle, buckets, nullptr, enforce_round_robin, component); return result; } @@ -526,7 +527,6 @@ class ProtobufTest : public testing::Test { Block header = {ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "local_exchange_test")}; AddressInfo local_address("localhost", 0, "test", "123456"); - PlanSegmentInputs inputs; auto input = std::make_shared(header, PlanSegmentType::EXCHANGE); input->setExchangeParallelSize(2); @@ -536,6 +536,22 @@ class ProtobufTest : public testing::Test return input; } + static std::shared_ptr generatePlanSegmentOutput(std::default_random_engine & eng) + { + Block header = {ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "local_exchange_test")}; + auto output = std::make_shared(header, PlanSegmentType::EXCHANGE); + output->setExchangeParallelSize(2); + output->setExchangeId(3); + output->setPlanSegmentId(4); + output->setKeepOrder(true); + output->setShuffleFunctionName("bucket"); + Array params; + params.emplace_back(generateField(eng)); + params.emplace_back(generateField(eng)); + output->setShuffleFunctionParams(params); + return output; + } + static WindowFrame generateWindowFrame(std::default_random_engine & eng) { WindowFrame res; diff --git a/src/Functions/FunctionBucket.cpp b/src/Functions/FunctionBucket.cpp new file mode 100644 index 00000000000..eab51171627 --- /dev/null +++ b/src/Functions/FunctionBucket.cpp @@ -0,0 +1,11 @@ +#include +#include +#include + +namespace DB +{ +REGISTER_FUNCTION(Bucket) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/FunctionBucket.h b/src/Functions/FunctionBucket.h new file mode 100644 index 00000000000..c3cc969826a --- /dev/null +++ b/src/Functions/FunctionBucket.h @@ -0,0 +1,249 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +class FunctionBucket : public IExecutableFunction +{ +public: + static constexpr auto name = "bucket"; + + explicit FunctionBucket( + ExecutableFunctionPtr hash_function_, + UInt64 bucket_size_, + UInt64 is_with_range_, + UInt64 split_number_) + : hash_function(std::move(hash_function_)) + , bucket_size(bucket_size_) + , is_with_range(is_with_range_) + , split_number(split_number_) + , split_number_argument(ColumnWithTypeAndName{}) + { + } + + explicit FunctionBucket( + ExecutableFunctionPtr hash_function_, + UInt64 bucket_size_, + UInt64 is_with_range_, + UInt64 split_number_, + ColumnWithTypeAndName split_number_argument_) + : hash_function(std::move(hash_function_)) + , bucket_size(bucket_size_) + , is_with_range(is_with_range_) + , split_number(split_number_) + , split_number_argument(std::move(split_number_argument_)) + { + } + + std::string getName() const override { return name; } + + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + auto result = ColumnUInt64::create(input_rows_count, 0); + auto & result_data = result->getData(); + ColumnPtr hash_result; + if constexpr (ModSplitNumberInside) + { + ColumnsWithTypeAndName full_args = arguments; + full_args.emplace_back(split_number_argument); + hash_result = hash_function->execute(full_args, result_type, input_rows_count, false); + } + else + { + hash_result = hash_function->execute(arguments, result_type, input_rows_count, false); + } + + const auto * hash_result_ptr = typeid_cast *>(hash_result.get()); + if (!hash_result_ptr) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Function {} return unexpected type id: {}, it should return ColumnUInt64", + hash_function->getName(), + hash_result->getDataType()); + } + auto & hash_data = (const_cast *>(hash_result_ptr))->getData(); + if constexpr (!ModSplitNumberInside) + { + if (split_number > 0) + { + for (size_t i = 0; i < input_rows_count; i++) + { + hash_data[i] = hash_data[i] % split_number; + } + } + } + + if (!is_with_range) + { + for (size_t i = 0; i < input_rows_count; i++) + { + result_data[i] = hash_data[i] % bucket_size; + } + } + else + { + auto shard_ratio = split_number / bucket_size; + shard_ratio = shard_ratio == 0 ? 1 : shard_ratio; + for (size_t i = 0; i < input_rows_count; i++) + { + // implicit floor for shard ratio. + // split_number has no constraint to match user requirement, so a shard_ratio(0), when split_number < bucket_size , is ok for customer. + UInt64 bucket_number = hash_data[i] / shard_ratio; + bucket_number = bucket_number >= bucket_size ? bucket_size - 1 : bucket_number; + result_data[i] = bucket_number; + } + } + + return result; + } + +private: + ExecutableFunctionPtr hash_function; + UInt64 bucket_size; + bool is_with_range; + UInt64 split_number; + ColumnWithTypeAndName split_number_argument; +}; + +class BucketFunctionBase : public IFunctionBase +{ +public: + static constexpr auto name = "bucket"; + BucketFunctionBase(DataTypes argument_types_, ContextPtr context_) + : argument_types(std::move(argument_types_)), context(std::move(context_)) + { + } + + String getName() const override { return name; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + virtual const DataTypePtr & getResultType() const override { return BucketFunctionBase::RESULT_DATA_TYPE; } + + virtual ExecutableFunctionPtr prepareWithParameters(const ColumnsWithTypeAndName & arguments, const Array & parameters) const override + { + if (parameters.size() != 4) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires 4 parameters", getName()); + } + const String & hash_func_name = parameters[0].safeGet(); + auto bucket_size = parameters[1].safeGet(); + if (bucket_size == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires positive bucket_size", getName()); + } + auto is_with_range = parameters[2].safeGet(); + auto split_number = parameters[3].safeGet(); + + FunctionOverloadResolverPtr hash_func_builder = FunctionFactory::instance().get(hash_func_name, context); + + if (hash_func_name == "dtspartition") + { + auto split_number_column + = ColumnWithTypeAndName{ColumnInt64::create(1, split_number), BucketFunctionBase::SPLIT_NUMBER_TYPE, ""}; + auto full_args = arguments; + full_args.emplace_back(split_number_column); + FunctionBasePtr hash_func_base = hash_func_builder->build(full_args); + auto executable_hash_func = hash_func_base->prepare(full_args); + return std::make_unique>( + executable_hash_func, bucket_size, is_with_range, split_number, split_number_column); + } + + FunctionBasePtr hash_func_base = hash_func_builder->build(arguments); + auto executable_hash_func = hash_func_base->prepare(arguments); + return std::make_unique>(executable_hash_func, bucket_size, is_with_range, split_number); + } + + bool isDeterministic() const override { return true; } + bool isDeterministicInScopeOfQuery() const override { return true; } + + bool isSuitableForConstantFolding() const override { return false; } + + static const DataTypePtr RESULT_DATA_TYPE; + static const DataTypePtr SPLIT_NUMBER_TYPE; + +private: + DataTypes argument_types; + ContextPtr context; +}; + + +const DataTypePtr BucketFunctionBase::RESULT_DATA_TYPE = std::make_shared(); +const DataTypePtr BucketFunctionBase::SPLIT_NUMBER_TYPE = std::make_shared(); + +class FunctionBucketOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "bucket"; + + explicit FunctionBucketOverloadResolver(ContextPtr context_) : context(std::move(context_)) { } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(std::move(context_)); + } + + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return BucketFunctionBase::RESULT_DATA_TYPE; } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override + { + if (arguments.size() != 1 && arguments.size() != 2) + throw Exception( + "Number of arguments for function " + getName() + " should be 1 or 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + DataTypes arguments_types; + + for (const auto & arg : arguments) + { + arguments_types.push_back(arg.type); + } + return std::make_unique(arguments_types, std::move(context)); + } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + +private: + ContextPtr context; +}; + +} diff --git a/src/Functions/FunctionSipHashBuiltin.cpp b/src/Functions/FunctionSipHashBuiltin.cpp new file mode 100644 index 00000000000..98765ed6ef6 --- /dev/null +++ b/src/Functions/FunctionSipHashBuiltin.cpp @@ -0,0 +1,11 @@ +#include +#include +#include + +namespace DB +{ +REGISTER_FUNCTION(SipHashBuiltin) +{ + factory.registerFunction(FunctionSipHashBuiltin::name, FunctionFactory::CaseSensitive); +} +} diff --git a/src/Functions/FunctionSipHashBuiltin.h b/src/Functions/FunctionSipHashBuiltin.h new file mode 100644 index 00000000000..4a499f00800 --- /dev/null +++ b/src/Functions/FunctionSipHashBuiltin.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB { + + +class FunctionSipHashBuiltin : public IFunction +{ +public: + static constexpr auto name = "sipHashBuitin"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + + virtual DataTypePtr getReturnTypeImpl(const DataTypes & ) const override + { + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto result_column = ColumnUInt64::create(input_rows_count, 0); + auto & result_date = result_column->getData(); + for (size_t i = 0; i < input_rows_count; i++) + { + SipHash hash; + for (const auto & argument : arguments) + { + argument.column->updateHashWithValue(i, hash); + } + result_date[i] = hash.get64(); + } + return result_column; + } +}; + +} + diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 95a938d03fc..fd866d393e9 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -163,7 +163,16 @@ class IFunctionBase /// Do preparations and return executable. /// sample_columns should contain data types of arguments and values of constants, if relevant. - virtual ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & arguments) const = 0; + virtual ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*arguments*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "prepare is not implemented for function {}", getName()); + } + + /// Do preparations with extra parameters and return executable. + virtual ExecutableFunctionPtr prepareWithParameters(const ColumnsWithTypeAndName & /*arguments*/, const Array & /*parameters*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "prepare with extra parameters is not implemented for function {}", getName()); + } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index a4b001d46a5..e26224e671a 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -197,13 +197,13 @@ REGISTER_FUNCTION(ModuloLegacy) factory.registerFunction(); } -struct NameBucket {static constexpr auto name = "bucket"; }; -using FunctionBucket = BinaryArithmeticOverloadResolver; +// struct NameBucket {static constexpr auto name = "bucket"; }; +// using FunctionBucket = BinaryArithmeticOverloadResolver; -REGISTER_FUNCTION(Bucket) -{ - factory.registerFunction(); -} +// REGISTER_FUNCTION(Bucket) +// { +// factory.registerFunction(); +// } struct NamePositiveModulo { diff --git a/src/Interpreters/DAGGraph.cpp b/src/Interpreters/DAGGraph.cpp index a39e4a513e7..2702b5c3404 100644 --- a/src/Interpreters/DAGGraph.cpp +++ b/src/Interpreters/DAGGraph.cpp @@ -107,6 +107,13 @@ void SourcePruner::generateUnprunableSegments() unprunable_plan_segments.insert(segment_output->getPlanSegmentId()); } } + for (const auto & segment_input : node.getPlanSegment()->getPlanSegmentInputs()) + { + if (segment_input->isStable()) + { + unprunable_plan_segments.insert(node.getPlanSegment()->getPlanSegmentId()); + } + } } } diff --git a/src/Interpreters/DistributedStages/BSPScheduler.cpp b/src/Interpreters/DistributedStages/BSPScheduler.cpp index 8c5605bb84b..8e4ffb7c152 100644 --- a/src/Interpreters/DistributedStages/BSPScheduler.cpp +++ b/src/Interpreters/DistributedStages/BSPScheduler.cpp @@ -70,7 +70,7 @@ void BSPScheduler::submitTasks(PlanSegment * plan_segment_ptr, const SegmentTask source_task_index_on_workers[addr]++; } } - triggerDispatch(cluster_nodes.rank_workers); + triggerDispatch(cluster_nodes.all_workers); } void BSPScheduler::onSegmentFinished(const size_t & segment_id, bool is_succeed, bool /*is_canceled*/) @@ -254,7 +254,7 @@ bool BSPScheduler::retryTaskIfPossible(size_t segment_id, UInt64 parallel_index) // for local no repartion and local may no repartition, schedule to original node NodeSelector::tryGetLocalInput(dag_graph_ptr->getPlanSegmentPtr(segment_id)) || // in case all workers except servers are occupied, simply retry at last node - failed_workers[segment_id].size() == cluster_nodes.rank_workers.size()) + failed_workers[segment_id].size() == cluster_nodes.all_workers.size()) { auto available_worker = segment_parallel_locations[segment_id][parallel_index]; occupied_workers[segment_id].erase(available_worker); @@ -266,7 +266,7 @@ bool BSPScheduler::retryTaskIfPossible(size_t segment_id, UInt64 parallel_index) { pending_task_instances.no_prefs.insert({segment_id, parallel_index}); lk.unlock(); - triggerDispatch(cluster_nodes.rank_workers); + triggerDispatch(cluster_nodes.all_workers); } } return true; diff --git a/src/Interpreters/DistributedStages/PlanSegment.cpp b/src/Interpreters/DistributedStages/PlanSegment.cpp index 09ef1c4f792..739962f1019 100644 --- a/src/Interpreters/DistributedStages/PlanSegment.cpp +++ b/src/Interpreters/DistributedStages/PlanSegment.cpp @@ -211,6 +211,7 @@ String PlanSegmentInput::toString(size_t indent) const ostr << indent_str << "keep_order: " << keep_order << "\n"; ostr << indent_str << "storage_id: " << (type == PlanSegmentType::SOURCE && storage_id.has_value() ? storage_id->getNameForLogs() : "") << "\n"; ostr << indent_str << "source_addresses: " << "\n"; + ostr << indent_str << "isStable: " << isStable() << "\n"; for (auto & address : source_addresses) ostr << indent_str << indent_str << address.toString() << "\n"; @@ -239,6 +240,8 @@ void PlanSegmentOutput::toProto(Protos::PlanSegmentOutput & proto) proto.set_shuffle_hash_function(shuffle_function_name); proto.set_parallel_size(parallel_size); proto.set_keep_order(keep_order); + if(!shuffle_func_params.empty()) + serializeFieldVectorToProto(shuffle_func_params, *proto.mutable_shuffle_function_parameters()); } void PlanSegmentOutput::fillFromProto(const Protos::PlanSegmentOutput & proto) @@ -247,6 +250,8 @@ void PlanSegmentOutput::fillFromProto(const Protos::PlanSegmentOutput & proto) shuffle_function_name = proto.shuffle_hash_function(); parallel_size = proto.parallel_size(); keep_order = proto.keep_order(); + if (proto.has_shuffle_function_parameters()) + shuffle_func_params = deserializeFieldVectorFromProto(proto.shuffle_function_parameters()); } String PlanSegmentOutput::toString(size_t indent) const @@ -256,6 +261,15 @@ String PlanSegmentOutput::toString(size_t indent) const ostr << IPlanSegment::toString(indent) << "\n"; ostr << indent_str << "shuffle_function_name: " << shuffle_function_name << "\n"; + if (!shuffle_func_params.empty()) + { + ostr << indent_str << "shuffle_parameters: "; + for (auto & field : shuffle_func_params) + { + ostr << field.toString() << " "; + } + ostr << "\n"; + } ostr << indent_str << "parallel_size: " << parallel_size << "\n"; ostr << indent_str << "keep_order: " << keep_order; diff --git a/src/Interpreters/DistributedStages/PlanSegment.h b/src/Interpreters/DistributedStages/PlanSegment.h index 76f6e2b24be..4cf2b240410 100644 --- a/src/Interpreters/DistributedStages/PlanSegment.h +++ b/src/Interpreters/DistributedStages/PlanSegment.h @@ -172,11 +172,15 @@ class PlanSegmentInput : public IPlanSegment void setStorageID(const StorageID & storage_id_) { storage_id = storage_id_;} + void setStable(bool stable_) { stable = stable_; } + bool isStable() const { return stable; } + private: size_t parallel_index = std::numeric_limits::max(); /// no longer used bool keep_order = false; AddressInfos source_addresses; std::optional storage_id; + bool stable = false; }; using PlanSegmentInputPtr = std::shared_ptr; @@ -210,10 +214,18 @@ class PlanSegmentOutput : public IPlanSegment String toString(size_t indent = 0) const override; + void setShuffleFunctionName(const String & shuffle_function_name_) { shuffle_function_name = shuffle_function_name_; } + + const String & getShuffleFunctionName() { return shuffle_function_name; } + + void setShuffleFunctionParams(const Array & shuffle_func_params_) { shuffle_func_params = shuffle_func_params_; } + const Array & getShuffleFunctionParams() { return shuffle_func_params; } + private: String shuffle_function_name = "cityHash64"; size_t parallel_size; bool keep_order = false; + Array shuffle_func_params; }; using PlanSegmentOutputPtr = std::shared_ptr; diff --git a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp index 0dad062d1b0..db9fe1c879e 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp @@ -829,7 +829,11 @@ Processors PlanSegmentExecutor::buildRepartitionExchangeSink( arguments.emplace_back(plan_segment_outputs[output_index]->getHeader().getByName(column_name)); argument_numbers.emplace_back(plan_segment_outputs[output_index]->getHeader().getPositionByName(column_name)); } - auto repartition_func = RepartitionTransform::getDefaultRepartitionFunction(arguments, context); + auto repartition_func = RepartitionTransform::getRepartitionHashFunction( + plan_segment_outputs[output_index]->getShuffleFunctionName(), + arguments, + context, + plan_segment_outputs[output_index]->getShuffleFunctionParams()); size_t partition_num = senders.size(); if (keep_order && context->getSettingsRef().exchange_enable_keep_order_parallel_shuffle && partition_num > 1) diff --git a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp index f88872ea5c4..e7dfca634e3 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp @@ -13,6 +13,7 @@ * limitations under the License. */ +#include #include #include @@ -26,7 +27,10 @@ #include #include #include +#include #include +#include +#include namespace DB { @@ -37,7 +41,8 @@ void PlanSegmentSplitter::split(QueryPlan & query_plan, PlanSegmentContext & pla if (plan_segment_context.context->getSettingsRef().distributed_max_parallel_size != 0) SetScalable::setScalable(query_plan.getRoot(), query_plan.getCTENodes(), *plan_segment_context.context); size_t exchange_id = 0; - PlanSegmentVisitorContext split_context{{}, {}, exchange_id}; + PlanSegmentVisitorContext split_context{ + {}, {}, exchange_id, plan_segment_context.context->getSettingsRef().exchange_shuffle_method_name}; visitor.createPlanSegment(query_plan.getRoot(), split_context); std::unordered_map plan_mapping; @@ -99,7 +104,6 @@ void PlanSegmentSplitter::split(QueryPlan & query_plan, PlanSegmentContext & pla } } } - } ParallelSizeChecker checker; @@ -152,8 +156,14 @@ PlanSegmentResult PlanSegmentVisitor::visitExchangeNode(QueryPlan::Node * node, bool is_add_extremes = false; for (auto & child : node->children) { - PlanSegmentVisitorContext child_context{{}, {}, split_context.exchange_id}; - auto plan_segment = createPlanSegment(child, child_context); + String hash_func = plan_segment_context.context->getSettingsRef().exchange_shuffle_method_name; + PlanSegmentVisitorContext child_context{ + {}, + {}, + split_context.exchange_id, + step->getSchema().getHashFunc(hash_func), + step->getSchema().getParams()}; + PlanSegment * plan_segment = createPlanSegment(child, child_context); is_add_totals |= child_context.is_add_totals; is_add_extremes |= child_context.is_add_extremes; @@ -164,6 +174,7 @@ PlanSegmentResult PlanSegmentVisitor::visitExchangeNode(QueryPlan::Node * node, // TODO: Not support one ExchangeStep with multi children yet(multi children can't share one exchange id), we may need to support it later. input->setExchangeId(plan_segment->getPlanSegmentOutputs().back()->getExchangeId()); input->setKeepOrder(step->needKeepOrder()); + input->setStable(step->getSchema().getBucketExpr() != nullptr); inputs.push_back(input); if (auto * output = dynamic_cast(plan_segment->getPlanSegmentOutput().get())) @@ -196,7 +207,7 @@ PlanSegmentResult PlanSegmentVisitor::visitCTERefNode(QueryPlan::Node * node, Pl if (cte_node->step->getType() == IQueryPlanStep::Type::Exchange) { exchange_step = dynamic_cast(cte_node->step.get()); - PlanSegmentVisitorContext child_context{{}, {}, split_context.exchange_id, split_context.is_add_extremes, split_context.is_add_totals, exchange_step->isScalable()}; + PlanSegmentVisitorContext child_context{{}, {}, split_context.exchange_id, split_context.hash_func, split_context.params, split_context.is_add_extremes, split_context.is_add_totals, exchange_step->isScalable()}; plan_segment = createPlanSegment(cte_node->children[0], child_context); } else @@ -270,11 +281,15 @@ PlanSegment * PlanSegmentVisitor::createPlanSegment(QueryPlan::Node * node, size auto plan_segment = std::make_unique(segment_id, plan_segment_context.query_id, cluster_name); plan_segment->setQueryPlan(std::move(sub_plan)); - plan_segment->setExchangeParallelSize(plan_segment_context.context->getSettingsRef().exchange_parallel_size); + auto exchange_parallel_size = plan_segment_context.context->getSettingsRef().exchange_parallel_size; + plan_segment->setExchangeParallelSize(exchange_parallel_size); PlanSegmentType output_type = segment_id == 0 ? PlanSegmentType::OUTPUT : PlanSegmentType::EXCHANGE; auto output = std::make_shared(plan_segment->getQueryPlan().getRoot()->step->getOutputStream().header, output_type); + + output->setShuffleFunctionName(split_context.hash_func); + output->setShuffleFunctionParams(split_context.params); if (output_type == PlanSegmentType::OUTPUT) { plan_segment->setParallelSize(1); @@ -288,15 +303,36 @@ PlanSegment * PlanSegmentVisitor::createPlanSegment(QueryPlan::Node * node, size else output->setParallelSize(parallel); } - output->setExchangeParallelSize(plan_segment_context.context->getSettingsRef().exchange_parallel_size); + output->setExchangeParallelSize(exchange_parallel_size); output->setExchangeId(split_context.exchange_id++); plan_segment->appendPlanSegmentOutput(output); auto inputs = findInputs(plan_segment->getQueryPlan().getRoot()); if (inputs.empty()) inputs.push_back(std::make_shared(Block(), PlanSegmentType::UNKNOWN)); - for (auto & input : inputs) - input->setExchangeParallelSize(plan_segment_context.context->getSettingsRef().exchange_parallel_size); + if (unlikely(exchange_parallel_size > 1)) + { + for (auto & input : inputs) + { + if (input->isStable()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "exchange_parallel_size can't be {} when input is stable for segment {} ", + exchange_parallel_size, + plan_segment->getPlanSegmentId()); + } + input->setExchangeParallelSize(exchange_parallel_size); + } + } + else + { + for (auto & input : inputs) + { + input->setExchangeParallelSize(exchange_parallel_size); + } + } + if (inputs[0]->getExchangeMode() == ExchangeMode::GATHER) plan_segment->setParallelSize(1); diff --git a/src/Interpreters/DistributedStages/PlanSegmentSplitter.h b/src/Interpreters/DistributedStages/PlanSegmentSplitter.h index 4b86718b658..80212483e34 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentSplitter.h +++ b/src/Interpreters/DistributedStages/PlanSegmentSplitter.h @@ -56,6 +56,8 @@ struct PlanSegmentVisitorContext PlanSegmentInputs inputs; std::vector children; size_t & exchange_id; + String hash_func; + Array params = Array(); bool is_add_totals = false; bool is_add_extremes = false; bool scalable = true; diff --git a/src/Interpreters/DistributedStages/Scheduler.h b/src/Interpreters/DistributedStages/Scheduler.h index a908dbc3e74..c13b84d140a 100644 --- a/src/Interpreters/DistributedStages/Scheduler.h +++ b/src/Interpreters/DistributedStages/Scheduler.h @@ -110,7 +110,7 @@ class Scheduler , local_address(getLocalAddress(*query_context)) , log(&Poco::Logger::get("Scheduler")) { - cluster_nodes.rank_workers.emplace_back(local_address, NodeType::Local, ""); + cluster_nodes.all_workers.emplace_back(local_address, NodeType::Local, ""); timespec query_expiration_ts = query_context->getQueryExpirationTimeStamp(); query_expiration_ms = query_expiration_ts.tv_sec * 1000 + query_expiration_ts.tv_nsec / 1000000; } diff --git a/src/Interpreters/NodeSelector.cpp b/src/Interpreters/NodeSelector.cpp index 3515d7cd8e2..101b99dce04 100644 --- a/src/Interpreters/NodeSelector.cpp +++ b/src/Interpreters/NodeSelector.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -119,10 +121,11 @@ NodeSelectorResult LocalNodeSelector::select(PlanSegment *, ContextPtr query_con NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, ContextPtr query_context, DAGGraph * dag_graph_ptr) { checkClusterInfo(plan_segment_ptr); + bool need_stable_schedule = needStableSchedule(plan_segment_ptr); NodeSelectorResult result; // The one worker excluded is server itself. - const auto worker_number = cluster_nodes.rank_workers.size() - 1; - if (plan_segment_ptr->getParallelSize() > worker_number && !query_context->getSettingsRef().bsp_mode) + const auto worker_number = cluster_nodes.all_workers.size() - 1; + if (plan_segment_ptr->getParallelSize() > worker_number && (!query_context->getSettingsRef().bsp_mode || need_stable_schedule)) { throw Exception( ErrorCodes::BAD_QUERY_PARAMETER, @@ -130,6 +133,7 @@ NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, Co plan_segment_ptr->getParallelSize(), worker_number); } + // If parallelism is greater than the worker number, we split the parts according to the input size. if (plan_segment_ptr->getParallelSize() > worker_number) { @@ -218,14 +222,28 @@ NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, Co } else { - size_t parallel_index = 0; - for (const auto & worker : cluster_nodes.rank_workers) + if (need_stable_schedule) { - parallel_index++; - if (parallel_index > plan_segment_ptr->getParallelSize()) - break; - if (worker.address != local_address) - result.worker_nodes.emplace_back(worker); + LOG_TRACE(log, "use stable schedule for segment:{} with {} nodes", plan_segment_ptr->getPlanSegmentId(), worker_number); + if (plan_segment_ptr->getParallelSize() != worker_number) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + " Source plan segment parallel size {} is not equal to worker number {}.", + plan_segment_ptr->getParallelSize(), + worker_number); + for (size_t parallel_index = 0; parallel_index < worker_number; parallel_index++) + { + result.worker_nodes.emplace_back(cluster_nodes.all_workers[parallel_index]); + } + } + else + { + for (size_t parallel_index = 0; parallel_index < plan_segment_ptr->getParallelSize(); parallel_index++) + { + if (parallel_index > plan_segment_ptr->getParallelSize()) + break; + result.worker_nodes.emplace_back(cluster_nodes.all_workers[cluster_nodes.rank_worker_ids[parallel_index]]); + } } } } @@ -245,17 +263,33 @@ NodeSelectorResult ComputeNodeSelector::select(PlanSegment * plan_segment_ptr, C } else { - size_t parallel_index = 0; - for (const auto & worker : cluster_nodes.rank_workers) + bool need_stable_schedule = needStableSchedule(plan_segment_ptr); + if (need_stable_schedule) { - parallel_index++; - if (parallel_index > plan_segment_ptr->getParallelSize()) - break; - if (worker.address != local_address) - result.worker_nodes.emplace_back(worker); + const auto worker_number = cluster_nodes.all_workers.size() - 1; + LOG_TRACE(log, "use stable schedule for segment:{} with {} nodes", plan_segment_ptr->getPlanSegmentId(), worker_number); + if (plan_segment_ptr->getParallelSize() != worker_number) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Plan segment {} need stable schedule, but parallel size {} is not equal to worker number {}.", + plan_segment_ptr->getPlanSegmentId(), + plan_segment_ptr->getParallelSize(), + worker_number); + for (size_t parallel_index = 0; parallel_index < worker_number; parallel_index++) + { + result.worker_nodes.emplace_back(cluster_nodes.all_workers[parallel_index]); + } + } + else + { + for (size_t parallel_index = 0; parallel_index < plan_segment_ptr->getParallelSize(); parallel_index++) + { + if (parallel_index > plan_segment_ptr->getParallelSize()) + break; + result.worker_nodes.emplace_back(cluster_nodes.all_workers[cluster_nodes.rank_worker_ids[parallel_index]]); + } } } - return result; } diff --git a/src/Interpreters/NodeSelector.h b/src/Interpreters/NodeSelector.h index 6b1c55e8c48..bd5cd743ee5 100644 --- a/src/Interpreters/NodeSelector.h +++ b/src/Interpreters/NodeSelector.h @@ -65,18 +65,18 @@ struct ClusterNodes const auto & worker_group = query_context->tryGetCurrentWorkerGroup(); if (worker_group) { - for (auto i : rank_worker_ids) + for (size_t i = 0; i < rank_worker_ids.size(); i++) { const auto & worker_endpoint = worker_group->getHostWithPortsVec()[i]; auto worker_address = getRemoteAddress(worker_endpoint, query_context); - rank_workers.emplace_back(worker_address, NodeType::Remote, worker_endpoint.id); - rank_hosts.emplace_back(worker_endpoint); + all_workers.emplace_back(worker_address, NodeType::Remote, worker_endpoint.id); + all_hosts.emplace_back(worker_endpoint); } } } std::vector rank_worker_ids; - std::vector rank_workers; - HostWithPortsVec rank_hosts; + std::vector all_workers; + HostWithPortsVec all_hosts; }; struct NodeSelectorResult @@ -142,6 +142,12 @@ class CommonNodeSelector ErrorCodes::LOGICAL_ERROR); } } + + bool needStableSchedule(PlanSegment * plan_segment_ptr) + { + const auto & inputs = plan_segment_ptr->getPlanSegmentInputs(); + return std::any_of(inputs.begin(), inputs.end(), [](const auto & input) { return input->isStable(); }); + } void selectPrunedWorkers(DAGGraph * dag_graph_ptr, PlanSegment * plan_segment_ptr, NodeSelectorResult & result, AddressInfo & local_address) { @@ -149,7 +155,7 @@ class CommonNodeSelector if (target_hosts.empty()) { LOG_DEBUG(log, "SourcePrune plan segment {} select first worker.", plan_segment_ptr->getPlanSegmentId()); - for (const auto & worker : cluster_nodes.rank_workers) + for (const auto & worker : cluster_nodes.all_workers) { if (worker.address != local_address) { @@ -163,8 +169,8 @@ class CommonNodeSelector LOG_DEBUG(log, "SourcePrune plan segment {} select workers after source prune.", plan_segment_ptr->getPlanSegmentId()); for (size_t idx = 0; idx < cluster_nodes.rank_worker_ids.size(); idx++) { - if (target_hosts.contains(cluster_nodes.rank_hosts[idx])) - result.worker_nodes.emplace_back(cluster_nodes.rank_workers[idx]); + if (target_hosts.contains(cluster_nodes.all_hosts[idx])) + result.worker_nodes.emplace_back(cluster_nodes.all_workers[idx]); } } } diff --git a/src/Optimizer/Cascades/Task.cpp b/src/Optimizer/Cascades/Task.cpp index e668f38ef98..b75c5c31a75 100644 --- a/src/Optimizer/Cascades/Task.cpp +++ b/src/Optimizer/Cascades/Task.cpp @@ -29,6 +29,7 @@ #include #include #include +#include "Interpreters/Context_fwd.h" #include "QueryPlan/IQueryPlanStep.h" #include @@ -282,7 +283,7 @@ void OptimizeInput::execute() // 1. We can init input cost using non-zero value for pruning // 2. We can calculate the current operator cost if we have maintain // logical properties in group (e.g. stats, schema, cardinality) - + // Compute the cost of the root operator // 1. Collect stats needed and cache them in the group // 2. Calculate cost based on children's stats and cache it in the group expression @@ -351,7 +352,7 @@ void OptimizeInput::execute() break; } auto & input_props = input_properties[cur_prop_pair_idx]; - + // initial total cost if (cur_child_idx == 0) { @@ -420,7 +421,8 @@ void OptimizeInput::execute() single_count++; } - if (group_expr->getStep()->getType() == IQueryPlanStep::Type::Union && single_count > 0 && single_count < group_expr->getChildrenGroups().size()) + if (group_expr->getStep()->getType() == IQueryPlanStep::Type::Union && single_count > 0 + && single_count < group_expr->getChildrenGroups().size()) { auto new_child_requires = input_props; for (auto & new_child : new_child_requires) @@ -576,6 +578,44 @@ void OptimizeInput::addInputPropertiesForCTE(CTEId cte_id, CTEDescription cte_de input_properties.insert(input_properties.end(), new_properties.begin(), new_properties.end()); } +static PropertySets makeHandleSame(const PropertySet & input_props, const PropertySet & actual_props, const ContextPtr & context) +{ + PropertySets result; + auto new_child_requires = input_props; + for (auto & new_child : new_child_requires) + { + new_child.getNodePartitioningRef().setRequireHandle(true); + } + result.emplace_back(new_child_requires); + + if (actual_props[0].getNodePartitioning().isExchangeSchema(context->getSettingsRef().enable_bucket_shuffle) + && actual_props[0].getNodePartitioning().getHandle() == Partitioning::Handle::BUCKET_TABLE) + { + auto other_new_child_requires = new_child_requires; + for (auto & new_child : other_new_child_requires) + { + new_child.getNodePartitioningRef().setHandle(Partitioning::Handle::BUCKET_TABLE); + new_child.getNodePartitioningRef().setBuckets(actual_props[0].getNodePartitioning().getBuckets()); + new_child.getNodePartitioningRef().setBucketExpr(actual_props[0].getNodePartitioning().getBucketExpr()); + } + result.emplace_back(other_new_child_requires); + } + + if (actual_props[1].getNodePartitioning().isExchangeSchema(context->getSettingsRef().enable_bucket_shuffle) + && actual_props[1].getNodePartitioning().getHandle() == Partitioning::Handle::BUCKET_TABLE) + { + auto other_new_child_requires = new_child_requires; + for (auto & new_child : other_new_child_requires) + { + new_child.getNodePartitioningRef().setHandle(Partitioning::Handle::BUCKET_TABLE); + new_child.getNodePartitioningRef().setBuckets(actual_props[1].getNodePartitioning().getBuckets()); + new_child.getNodePartitioningRef().setBucketExpr(actual_props[1].getNodePartitioning().getBucketExpr()); + } + result.emplace_back(other_new_child_requires); + } + return result; +} + bool OptimizeInput::checkJoinInputProperties(const PropertySet & requried_input_props, const PropertySet & actual_input_props) { bool all_fix_hash = std::all_of(requried_input_props.begin(), requried_input_props.end(), [](const auto & i_prop) { @@ -618,6 +658,7 @@ bool OptimizeInput::checkJoinInputProperties(const PropertySet & requried_input_ auto first_handle = first_props.getNodePartitioning().getHandle(); auto first_bucket_count = first_props.getNodePartitioning().getBuckets(); + auto first_sharding_expr = first_props.getNodePartitioning().getBucketExpr(); auto first_partition_column = first_props.getNodePartitioning().normalize(*left_equivalences).getColumns(); for (size_t actual_prop_index = 1; actual_prop_index < actual_input_props.size(); ++actual_prop_index) @@ -625,7 +666,8 @@ bool OptimizeInput::checkJoinInputProperties(const PropertySet & requried_input_ auto before_transformed_partition_cols = actual_input_props[actual_prop_index].getNodePartitioning().getColumns(); auto translated_prop = actual_input_props[actual_prop_index].normalize(*right_equivalences); if (translated_prop.getNodePartitioning().getHandle() != first_handle - || translated_prop.getNodePartitioning().getBuckets() != first_bucket_count) + || translated_prop.getNodePartitioning().getBuckets() != first_bucket_count + || !ASTEquality::compareTree(translated_prop.getNodePartitioning().getBucketExpr(), first_sharding_expr)) { match = false; break; @@ -649,14 +691,13 @@ bool OptimizeInput::checkJoinInputProperties(const PropertySet & requried_input_ if (!match) { - auto new_child_requires = requried_input_props; - for (auto & new_child : new_child_requires) + for (auto & new_child_requires : makeHandleSame(requried_input_props, actual_input_props, context->getOptimizerContext().getContext())) { - new_child.getNodePartitioningRef().setRequireHandle(true); + input_properties.emplace_back(new_child_requires); } - input_properties.emplace_back(new_child_requires); } + return match; } @@ -738,7 +779,8 @@ void OptimizeInput::enforcePropertyAndUpdateWinner( // increase cost if the cte exists both join side. disable q11 & q74 cte for tpcds. if (!it.second && group_expr->getStep()->getType() == IQueryPlanStep::Type::Join) { - auto coefficient = opt_context->getOptimizerContext().getContext()->getSettingsRef().cost_calculator_cte_weight_for_join_build_side; + auto coefficient + = opt_context->getOptimizerContext().getContext()->getSettingsRef().cost_calculator_cte_weight_for_join_build_side; it.first->second.second = std::max(it.first->second.second, cte_prop.second.second) * coefficient; } } @@ -848,7 +890,7 @@ void OptimizeCTE::execute() if (context->getOptimizerContext().isEnableTrace()) context->getOptimizerContext().trace("OptimizeCTE", group_expr->getGroupId(), group_expr->getProduceRule(), elapsed_ns); } - + OptimizerTask::OptimizerTask(OptContextPtr context_) : context(std::move(context_)), log(context->getOptimizerContext().getLog()) { } diff --git a/src/Optimizer/CostModel/ExchangeCost.cpp b/src/Optimizer/CostModel/ExchangeCost.cpp index 813ae024e9f..e927a1ca7e2 100644 --- a/src/Optimizer/CostModel/ExchangeCost.cpp +++ b/src/Optimizer/CostModel/ExchangeCost.cpp @@ -30,7 +30,7 @@ PlanNodeCost ExchangeCost::calculate(const ExchangeStep & step, CostContext & co if (!step.getSchema().getColumns().empty() && (step.getSchema().getHandle() == Partitioning::Handle::FIXED_HASH || step.getSchema().getHandle() == Partitioning::Handle::BUCKET_TABLE)) - base_cost += 1.0 / step.getSchema().getColumns().size(); + base_cost += 1.0 / (step.getSchema().getColumns().size() + 1); if (step.getSchema().getHandle() == Partitioning::Handle::BUCKET_TABLE) base_cost *= 1.1; diff --git a/src/Optimizer/Property/Property.cpp b/src/Optimizer/Property/Property.cpp index a27dae81f64..0a5106e6858 100644 --- a/src/Optimizer/Property/Property.cpp +++ b/src/Optimizer/Property/Property.cpp @@ -18,14 +18,17 @@ #include #include #include -#include -#include -#include #include #include +#include +#include #include +#include #include #include +#include +#include +#include "Core/Field.h" namespace DB { @@ -45,7 +48,7 @@ bool Partitioning::satisfy(const Partitioning & requirement, const Constants & c { if (requirement.require_handle) return getHandle() == requirement.getHandle() && getBuckets() == requirement.getBuckets() - && getColumns() == requirement.getColumns(); + && getColumns() == requirement.getColumns() && ASTEquality::compareTree(bucket_expr, requirement.bucket_expr); switch (requirement.component) { @@ -74,7 +77,7 @@ bool Partitioning::satisfy(const Partitioning & requirement, const Constants & c || (!requirement.isExactlyMatch() && this->isPartitionOn(requirement, constants)); default: return getHandle() == requirement.getHandle() && getBuckets() == requirement.getBuckets() - && getColumns() == requirement.getColumns(); + && getColumns() == requirement.getColumns() && ASTEquality::compareTree(bucket_expr, requirement.bucket_expr); } } @@ -106,6 +109,103 @@ bool Partitioning::isPartitionOn(const Partitioning & requirement, const Constan return true; } +bool Partitioning::isExchangeSchema(bool support_bucket_shuffle) const +{ + if (handle == Handle::BUCKET_TABLE) + { + if (support_bucket_shuffle && bucket_expr) + { + if (auto * cluster_by_ast_element = bucket_expr->as()) + { + if (cluster_by_ast_element->is_user_defined_expression) + { + if (!cluster_by_ast_element->getColumns()->as()) + return false; + } + + auto expression = extractKeyExpressionList(cluster_by_ast_element->getColumns()); + + if (auto * expr_list = expression->as()) + { + if (expr_list->children.size() != columns.size()) + return false; + for (const auto & col : expr_list->children) + { + if (auto * id = col->as()) + { + if (!id->name().starts_with("$")) + return false; + } + else + { + return false; + } + } + } + else + { + return false; + } + } + } + else + { + return false; + } + } + + return true; +} + +String Partitioning::getHashFunc(String default_func) const +{ + if (handle == Handle::BUCKET_TABLE) + { + if (bucket_expr) + { + if (auto * cluster_by_ast_element = bucket_expr->as()) + { + if (cluster_by_ast_element->is_user_defined_expression) + return "toUInt64"; + return "bucket"; + } + } + } + + return default_func; +} + + +// bucket(function_name,bucket_num,with_range,split_number)(bucket_column) +Array Partitioning::getParams() const +{ + Array result; + if (handle == Handle::BUCKET_TABLE) + { + if (bucket_expr) + { + if (auto * cluster_by_ast_element = bucket_expr->as()) + { + if (cluster_by_ast_element->is_user_defined_expression) + return result; + if (cluster_by_ast_element->split_number > 0 && columns.size() == 1) + { + result.emplace_back(Field("dtspartition")); + } + else + { + result.emplace_back(Field("sipHashBuitin")); + } + result.emplace_back(buckets); + result.emplace_back(Field(cluster_by_ast_element->is_with_range)); + result.emplace_back(Field(static_cast(cluster_by_ast_element->split_number))); + } + } + } + + return result; +} + Partitioning Partitioning::normalize(const SymbolEquivalences & symbol_equivalences) const { auto mapping = symbol_equivalences.representMap(); @@ -134,8 +234,8 @@ Partitioning Partitioning::translate(const std::unordered_map & else // note: don't discard column translate_columns.emplace_back(column); } - auto result - = Partitioning{handle, translate_columns, require_handle, buckets, enforce_round_robin, component, exactly_match, satisfy_worker}; + auto result = Partitioning{ + handle, translate_columns, require_handle, buckets, bucket_expr, enforce_round_robin, component, exactly_match, satisfy_worker}; result.setPreferred(preferred); return result; } @@ -151,6 +251,7 @@ void Partitioning::toProto(Protos::Partitioning & proto) const proto.set_enforce_round_robin(enforce_round_robin); proto.set_component(Partitioning::ComponentConverter::toProto(component)); proto.set_exactly_match(exactly_match); + serializeASTToProto(bucket_expr, *proto.mutable_bucket_expr()); } Partitioning Partitioning::fromProto(const Protos::Partitioning & proto) @@ -164,7 +265,10 @@ Partitioning Partitioning::fromProto(const Protos::Partitioning & proto) auto enforce_round_robin = proto.enforce_round_robin(); auto component = Partitioning::ComponentConverter::fromProto(proto.component()); auto exactly_match = proto.exactly_match(); - return Partitioning(handle, columns, require_handle, buckets, enforce_round_robin, component, exactly_match); + ASTPtr bucket_expr = nullptr; + if (proto.has_bucket_expr()) + bucket_expr = deserializeASTFromProto(proto.bucket_expr()); + return Partitioning(handle, columns, require_handle, buckets, bucket_expr, enforce_round_robin, component, exactly_match); } String Partitioning::toString() const @@ -213,7 +317,7 @@ String Partitioning::toString() const columns[0], [](String a, const String & b) { return std::move(a) + ", " + b; }) + "]"; - result += " BUCKETS " + std::to_string(getBuckets()); + result += " " + queryToString(bucket_expr); if (require_handle) result += " H"; if (preferred) @@ -299,8 +403,9 @@ Sorting Sorting::normalize(const SymbolEquivalences & symbol_equivalences) const String Sorting::toString() const { - return empty() ? "" : std::accumulate( - std::next(begin()), end(), front().toString(), [](std::string a, const auto & b) { return std::move(a) + '-' + b.toString(); }); + return empty() ? "" : std::accumulate(std::next(begin()), end(), front().toString(), [](std::string a, const auto & b) { + return std::move(a) + '-' + b.toString(); + }); } size_t CTEDescriptions::hash() const @@ -309,7 +414,7 @@ size_t CTEDescriptions::hash() const for (const auto & item : *this) { hash = MurmurHash3Impl64::combineHashes(hash, IntHash64Impl::apply(item.first)); - hash = MurmurHash3Impl64::combineHashes(hash, item.second.hash()); + hash = MurmurHash3Impl64::combineHashes(hash, item.second.hash()); } return hash; } @@ -377,7 +482,7 @@ Property Property::normalize(const SymbolEquivalences & symbol_equivalences) con node_partitioning.normalize(symbol_equivalences), stream_partitioning.normalize(symbol_equivalences), sorting.normalize(symbol_equivalences)}; - result.setCTEDescriptions(cte_descriptions); + result.setCTEDescriptions(cte_descriptions); return result; } @@ -428,7 +533,7 @@ CTEDescription CTEDescription::inlined() } CTEDescription CTEDescription::from(const Property & property) - { +{ return CTEDescription(false, property.getNodePartitioning()); } diff --git a/src/Optimizer/Property/Property.h b/src/Optimizer/Property/Property.h index ec9b77aca0e..cb1f7165066 100644 --- a/src/Optimizer/Property/Property.h +++ b/src/Optimizer/Property/Property.h @@ -84,6 +84,7 @@ class Partitioning Names columns_ = {}, bool require_handle_ = false, UInt64 buckets_ = 0, + ASTPtr bucket_expr_ = nullptr, bool enforce_round_robin_ = true, Component component_ = Component::ANY, bool exactly_match_ = false, @@ -92,6 +93,7 @@ class Partitioning , columns(std::move(columns_)) , require_handle(require_handle_) , buckets(buckets_) + , bucket_expr(bucket_expr_) , enforce_round_robin(enforce_round_robin_) , component(component_) , exactly_match(exactly_match_) @@ -115,6 +117,13 @@ class Partitioning void setComponent(Component component_) { component = component_; } bool isExactlyMatch() const { return exactly_match; } + bool isPartitionHandle() const { return handle == Handle::BUCKET_TABLE || handle == Handle::FIXED_HASH; } + + bool isExchangeSchema(bool support_bucket_shuffle) const; + + String getHashFunc(String default_func) const; + Array getParams() const; + bool isSatisfyWorker() const { return satisfy_worker; @@ -137,8 +146,12 @@ class Partitioning bool operator==(const Partitioning & other) const { return preferred == other.preferred && handle == other.handle && columns == other.columns && require_handle == other.require_handle && buckets == other.buckets - && enforce_round_robin == other.enforce_round_robin; + && enforce_round_robin == other.enforce_round_robin && ASTEquality::compareTree(bucket_expr, other.bucket_expr); } + + ASTPtr getBucketExpr() const { return bucket_expr; } + void setBucketExpr(const ASTPtr & bucket_expr_) { bucket_expr = bucket_expr_; } + String toString() const; void toProto(Protos::Partitioning & proto) const; @@ -149,6 +162,7 @@ class Partitioning Names columns; bool require_handle; UInt64 buckets; + ASTPtr bucket_expr; bool enforce_round_robin; Component component; bool exactly_match; diff --git a/src/Optimizer/Property/PropertyDeriver.cpp b/src/Optimizer/Property/PropertyDeriver.cpp index 9726b2177f4..b91a78c7dc1 100644 --- a/src/Optimizer/Property/PropertyDeriver.cpp +++ b/src/Optimizer/Property/PropertyDeriver.cpp @@ -19,11 +19,14 @@ #include #include +#include #include #include +#include #include #include #include +#include #include #include #include @@ -31,12 +34,9 @@ #include #include #include -#include -#include namespace DB { - namespace ErrorCodes { extern const int OPTIMIZER_NONSUPPORT; @@ -111,6 +111,31 @@ Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, cons auto metadata = storage->getInMemoryMetadataPtr(); Names cluster_by; UInt64 buckets = 0; + + auto normalize_ast = [&](ASTPtr sharding_key) -> std::pair { + static SymbolVisitor visitor; + Names partition_keys; + SymbolVisitorContext symbol_context; + ASTVisitorUtil::accept(sharding_key, visitor, symbol_context); + + ConstASTMap expression_map; + size_t index = 0; + for (auto symbol : symbol_context.result) + { + ASTPtr name = std::make_shared(symbol); + ASTPtr id = std::make_shared("$" + std::to_string(index)); + if (!expression_map.contains(name)) + { + expression_map[name] = ConstHashAST::make(id); + partition_keys.emplace_back(symbol); + index++; + } + } + + return {partition_keys, ExpressionRewriter::rewrite(sharding_key, expression_map)}; + }; + + ASTPtr ast; if (storage->isBucketTable()) { bool clustered = storage->isTableClustered(context); @@ -125,7 +150,9 @@ Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, cons } else { - cluster_by = metadata->cluster_by_key.column_names; + auto [columns, rewritten] = normalize_ast(metadata->cluster_by_key.definition_ast); + cluster_by = columns; + ast = rewritten; } buckets = metadata->getBucketNumberFromClusterByKey(); } @@ -143,7 +170,16 @@ Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, cons } #endif return Property{ - Partitioning{Partitioning::Handle::BUCKET_TABLE, cluster_by, true, buckets, true, Partitioning::Component::ANY, false, satisfyBucketWorkerRelation(storage, *context)}, + Partitioning{ + Partitioning::Handle::BUCKET_TABLE, + cluster_by, + true, + buckets, + ast, + true, + Partitioning::Component::ANY, + false, + satisfyBucketWorkerRelation(storage, *context)}, Partitioning{}, sorting}; } @@ -169,9 +205,11 @@ Property PropertyDeriver::deriveStoragePropertyWhatIfMode( Names cluster_by{what_if_table_partitioning.getPartitionKey().column}; // the bucket number is only used for matching, can be set to anything UInt64 buckets = (actual_storage_property.getNodePartitioning().getHandle() == Partitioning::Handle::BUCKET_TABLE) - ? actual_storage_property.getNodePartitioning().getBuckets() : context->getSettingsRef().memory_catalog_worker_size; + ? actual_storage_property.getNodePartitioning().getBuckets() + : context->getSettingsRef().memory_catalog_worker_size; - Partitioning new_partitioning{Partitioning::Handle::BUCKET_TABLE, cluster_by, true, buckets, true, Partitioning::Component::ANY}; + Partitioning new_partitioning{ + Partitioning::Handle::BUCKET_TABLE, cluster_by, true, buckets, nullptr, true, Partitioning::Component::ANY}; actual_storage_property.setNodePartitioning(new_partitioning); return actual_storage_property; @@ -368,7 +406,7 @@ Property DeriverVisitor::visitAggregatingStep(const AggregatingStep &, DeriverCo Property DeriverVisitor::visitMarkDistinctStep(const MarkDistinctStep &, DeriverContext & context) { - return context.getInput()[0].clearSorting(); + return context.getInput()[0].clearSorting(); } Property DeriverVisitor::visitMergingAggregatedStep(const MergingAggregatedStep &, DeriverContext & context) @@ -453,6 +491,7 @@ Property DeriverVisitor::visitUnionStep(const UnionStep & step, DeriverContext & output_keys, true, first_child_property.getNodePartitioning().getBuckets(), + first_child_property.getNodePartitioning().getBucketExpr(), first_child_property.getNodePartitioning().isEnforceRoundRobin(), first_child_property.getNodePartitioning().getComponent(), false, @@ -466,6 +505,7 @@ Property DeriverVisitor::visitUnionStep(const UnionStep & step, DeriverContext & output_keys, true, first_child_property.getNodePartitioning().getBuckets(), + first_child_property.getNodePartitioning().getBucketExpr(), first_child_property.getNodePartitioning().isEnforceRoundRobin(), first_child_property.getNodePartitioning().getComponent(), false, @@ -536,7 +576,8 @@ Property DeriverVisitor::visitTableScanStep(const TableScanStep & step, DeriverC translation.emplace(item.first, item.second); if (!context.getRequire().getTableLayout().empty()) - return PropertyDeriver::deriveStoragePropertyWhatIfMode(step.getStorage(), context.getContext(), context.getRequire()).translate(translation); + return PropertyDeriver::deriveStoragePropertyWhatIfMode(step.getStorage(), context.getContext(), context.getRequire()) + .translate(translation); return PropertyDeriver::deriveStorageProperty(step.getStorage(), context.getRequire(), context.getContext()).translate(translation); } @@ -691,7 +732,7 @@ Property DeriverVisitor::visitMultiJoinStep(const MultiJoinStep &, DeriverContex return context.getInput()[0]; } -Property DeriverVisitor::visitExpandStep(const ExpandStep&, DeriverContext & context) +Property DeriverVisitor::visitExpandStep(const ExpandStep &, DeriverContext & context) { return context.getInput()[0]; } diff --git a/src/Optimizer/Property/PropertyDeterminer.cpp b/src/Optimizer/Property/PropertyDeterminer.cpp index 11730f5486d..446a571d10a 100644 --- a/src/Optimizer/Property/PropertyDeterminer.cpp +++ b/src/Optimizer/Property/PropertyDeterminer.cpp @@ -138,8 +138,8 @@ PropertySets DeterminerVisitor::visitJoinStep(const JoinStep & step, DeterminerC Partitioning left_stream{Partitioning::Handle::FIXED_HASH, left_keys_asof}; Partitioning right_stream{Partitioning::Handle::FIXED_HASH, right_keys_asof}; - Property left{Partitioning{Partitioning::Handle::FIXED_HASH, left_keys_asof, false, 0, enforce_round_robine}, left_stream}; - Property right{Partitioning{Partitioning::Handle::FIXED_HASH, right_keys_asof, false, 0, false}, right_stream}; + Property left{Partitioning{Partitioning::Handle::FIXED_HASH, left_keys_asof, false, 0, nullptr, enforce_round_robine}, left_stream}; + Property right{Partitioning{Partitioning::Handle::FIXED_HASH, right_keys_asof, false, 0, nullptr, false}, right_stream}; PropertySet set; set.emplace_back(left); set.emplace_back(right); @@ -184,8 +184,8 @@ PropertySets DeterminerVisitor::visitJoinStep(const JoinStep & step, DeterminerC Partitioning left_stream{Partitioning::Handle::FIXED_HASH, sub_left_keys}; Partitioning right_stream{Partitioning::Handle::FIXED_HASH, sub_right_keys}; - Property left{Partitioning{Partitioning::Handle::FIXED_HASH, sub_left_keys, false, 0, enforce_round_robine}, left_stream}; - Property right{Partitioning{Partitioning::Handle::FIXED_HASH, sub_right_keys, false, 0, false}, right_stream}; + Property left{Partitioning{Partitioning::Handle::FIXED_HASH, sub_left_keys, false, 0, nullptr, enforce_round_robine}, left_stream}; + Property right{Partitioning{Partitioning::Handle::FIXED_HASH, sub_right_keys, false, 0, nullptr, false}, right_stream}; PropertySet prop_set; prop_set.emplace_back(left); prop_set.emplace_back(right); @@ -196,8 +196,8 @@ PropertySets DeterminerVisitor::visitJoinStep(const JoinStep & step, DeterminerC { Partitioning left_stream{Partitioning::Handle::FIXED_HASH, left_keys}; Partitioning right_stream{Partitioning::Handle::FIXED_HASH, right_keys}; - Property left{Partitioning{Partitioning::Handle::FIXED_HASH, left_keys, false, 0, enforce_round_robine}, left_stream}; - Property right{Partitioning{Partitioning::Handle::FIXED_HASH, right_keys, false, 0, false}, right_stream}; + Property left{Partitioning{Partitioning::Handle::FIXED_HASH, left_keys, false, 0, nullptr, enforce_round_robine}, left_stream}; + Property right{Partitioning{Partitioning::Handle::FIXED_HASH, right_keys, false, 0, nullptr, false}, right_stream}; PropertySet prop_set; prop_set.emplace_back(left); prop_set.emplace_back(right); @@ -284,7 +284,7 @@ PropertySets DeterminerVisitor::visitAggregatingStep(const AggregatingStep & ste { keys.emplace_back("__grouping_set"); return {PropertySet{ - Property{Partitioning{Partitioning::Handle::FIXED_HASH, keys, false, 0, true, Partitioning::Component::ANY, true}}}}; + Property{Partitioning{Partitioning::Handle::FIXED_HASH, keys, false, 0, nullptr, true, Partitioning::Component::ANY, true}}}}; } return sets; diff --git a/src/Optimizer/Property/PropertyEnforcer.cpp b/src/Optimizer/Property/PropertyEnforcer.cpp index ee2a39f3ffb..d96de3c5548 100644 --- a/src/Optimizer/Property/PropertyEnforcer.cpp +++ b/src/Optimizer/Property/PropertyEnforcer.cpp @@ -86,8 +86,6 @@ QueryPlanStepPtr PropertyEnforcer::enforceNodePartitioning( { case Partitioning::Handle::SINGLE: return std::make_unique(streams, ExchangeMode::GATHER, partitioning, keep_order); - case Partitioning::Handle::FIXED_HASH: - return std::make_unique(streams, ExchangeMode::REPARTITION, partitioning, keep_order); case Partitioning::Handle::FIXED_BROADCAST: return std::make_unique(streams, ExchangeMode::BROADCAST, partitioning, keep_order); case Partitioning::Handle::FIXED_ARBITRARY: @@ -99,7 +97,9 @@ QueryPlanStepPtr PropertyEnforcer::enforceNodePartitioning( return std::make_unique(streams, ExchangeMode::LOCAL_NO_NEED_REPARTITION, partitioning, keep_order); case Partitioning::Handle::ARBITRARY: return nullptr; + case Partitioning::Handle::FIXED_HASH: case Partitioning::Handle::BUCKET_TABLE: + return std::make_unique(streams, ExchangeMode::REPARTITION, partitioning, keep_order); default: throw Exception("Property Enforce error", ErrorCodes::ILLEGAL_ENFORCE); } diff --git a/src/Parsers/ASTClusterByElement.cpp b/src/Parsers/ASTClusterByElement.cpp index 4793b380efb..cfe739ec637 100644 --- a/src/Parsers/ASTClusterByElement.cpp +++ b/src/Parsers/ASTClusterByElement.cpp @@ -15,6 +15,7 @@ #include #include +#include #include @@ -56,5 +57,28 @@ ASTPtr ASTClusterByElement::clone() const return clone; } +void ASTClusterByElement::serialize(WriteBuffer & buf) const +{ + writeBinary(split_number, buf); + writeBinary(is_with_range, buf); + writeBinary(is_user_defined_expression, buf); + serializeASTs(children, buf); +} + +void ASTClusterByElement::deserializeImpl(ReadBuffer & buf) +{ + readBinary(split_number, buf); + readBinary(is_with_range, buf); + readBinary(is_user_defined_expression, buf); + children = deserializeASTs(buf); +} + +ASTPtr ASTClusterByElement::deserialize(ReadBuffer & buf) +{ + auto element = std::make_shared(); + element->deserializeImpl(buf); + return element; +} + } diff --git a/src/Parsers/ASTClusterByElement.h b/src/Parsers/ASTClusterByElement.h index 45766f2247f..80b06d99fab 100644 --- a/src/Parsers/ASTClusterByElement.h +++ b/src/Parsers/ASTClusterByElement.h @@ -42,9 +42,15 @@ class ASTClusterByElement : public IAST const ASTPtr & getColumns() const { return children.front(); } const ASTPtr & getTotalBucketNumber() const { return children.back(); } + ASTType getType() const override { return ASTType::ASTClusterByElement; } + String getID(char) const override { return "ClusterByElement"; } ASTPtr clone() const override; + void serialize(WriteBuffer & buf) const override; + void deserializeImpl(ReadBuffer & buf) override; + static ASTPtr deserialize(ReadBuffer & buf); + protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ASTSerDerHelper.cpp b/src/Parsers/ASTSerDerHelper.cpp index 839c324a7ef..074c98a0cda 100644 --- a/src/Parsers/ASTSerDerHelper.cpp +++ b/src/Parsers/ASTSerDerHelper.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -112,7 +113,6 @@ namespace DB { - ASTPtr createWithASTType(ASTType type, ReadBuffer & buf) { switch (type) diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index a6545971562..6336fcbd8c2 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -162,7 +162,8 @@ class ReadBuffer; M(ASTDropPreparedStatementQuery) \ M(ASTBitEngineConstraintDeclaration) \ M(ASTStorageAnalyticalMySQL) \ - M(ASTCreateQueryAnalyticalMySQL) + M(ASTCreateQueryAnalyticalMySQL) \ + M(ASTClusterByElement) #define ENUM_TYPE(ITEM) ITEM, enum class ASTType : UInt8 diff --git a/src/Processors/Exchange/RepartitionTransform.cpp b/src/Processors/Exchange/RepartitionTransform.cpp index 86c5cf5a539..8ed5c77e06f 100644 --- a/src/Processors/Exchange/RepartitionTransform.cpp +++ b/src/Processors/Exchange/RepartitionTransform.cpp @@ -118,6 +118,13 @@ ExecutableFunctionPtr RepartitionTransform::getDefaultRepartitionFunction(const return function_base->prepare(arguments); } +ExecutableFunctionPtr RepartitionTransform::getRepartitionHashFunction(const String & func_name, const ColumnsWithTypeAndName & arguments, ContextPtr context, const Array & params) +{ + FunctionOverloadResolverPtr func_builder = FunctionFactory::instance().get(func_name, context); + FunctionBasePtr function_base = func_builder->build(arguments); + return params.empty() ? function_base->prepare(arguments) : function_base->prepareWithParameters(arguments, params); +} + const DataTypePtr RepartitionTransform::REPARTITION_FUNC_RESULT_TYPE = std::make_shared(); const DataTypePtr RepartitionTransform::REPARTITION_FUNC_NULLABLE_RESULT_TYPE = std::make_shared(RepartitionTransform::REPARTITION_FUNC_RESULT_TYPE); } diff --git a/src/Processors/Exchange/RepartitionTransform.h b/src/Processors/Exchange/RepartitionTransform.h index 02e2c46197b..6072948590f 100644 --- a/src/Processors/Exchange/RepartitionTransform.h +++ b/src/Processors/Exchange/RepartitionTransform.h @@ -25,6 +25,7 @@ #include #include #include +#include #include namespace DB @@ -65,6 +66,8 @@ class RepartitionTransform : public ISimpleTransform static ExecutableFunctionPtr getDefaultRepartitionFunction(const ColumnsWithTypeAndName & arguments, ContextPtr context); + static ExecutableFunctionPtr getRepartitionHashFunction(const String & func_name, const ColumnsWithTypeAndName & arguments, ContextPtr context, const Array & params = {}); + protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/tests/gtest_bucket_shuffle.cpp b/src/Processors/tests/gtest_bucket_shuffle.cpp new file mode 100644 index 00000000000..d11777fc018 --- /dev/null +++ b/src/Processors/tests/gtest_bucket_shuffle.cpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace DB; + + +Block generateBlockWithTwoColumns(size_t total_rows) +{ + auto col_uint64 = ColumnUInt64::create(total_rows, 0); + auto & col_uint64_data = col_uint64->getData(); + auto col_string = ColumnString::create(); + for (size_t i = 0; i < total_rows; i++) + { + col_uint64_data[i] = i; + String str = "bucket_" + std::to_string(i) ; + col_string->insertData(str.data(), str.size()); + } + ColumnWithTypeAndName column_1{std::move(col_uint64), std::make_shared(), "column_1"}; + ColumnWithTypeAndName column_2{std::move(col_string), std::make_shared(), "column_2"}; + + + ColumnsWithTypeAndName columns; + columns.emplace_back(std::move(column_1)); + columns.emplace_back(std::move(column_2)); + return Block(columns); +} + + +bool comparePrepareBucketColumnWithBucketFunction(Block & expected, ColumnPtr result) +{ + auto expected_col = expected.getByName(COLUMN_BUCKET_NUMBER).column; + + if (expected_col->size() != result->size()) + return false; + for(size_t i = 0; i < expected_col->size(); i++) + { + if(expected_col->getUInt(i) != result->getUInt(i)) + return false; + } + return true; +} + +ColumnPtr executeBucketFunction(Block & block, const Names & bucket_columns, const Int64 & split_number, const bool is_with_range, const Int64 total_shard_num, ContextPtr context) +{ + String func_name = "sipHashBuitin"; + if(split_number && bucket_columns.size() == 1) + func_name = "dtspartition"; + Array params; + params.emplace_back(Field(func_name)); + params.emplace_back(Field(static_cast(total_shard_num))); + params.emplace_back(Field(is_with_range)); + params.emplace_back(Field(static_cast(split_number))); + + + ColumnsWithTypeAndName arguments; + for (const auto & name: bucket_columns) + { + arguments.push_back(block.getByName(name)); + } + + auto func = RepartitionTransform::getRepartitionHashFunction("bucket", arguments, context, params); + return func->execute(arguments, RepartitionTransform::REPARTITION_FUNC_RESULT_TYPE, block.rows(), false); +} + + +bool executeAndComparePrepareBucketColumnWithBucketFunction( + Block & block, + Names bucket_columns, + const Int64 & split_number, + const bool is_with_range, + const Int64 total_shard_num, + ContextPtr context) +{ + auto expected = block; + prepareBucketColumn(expected, bucket_columns, split_number, is_with_range, total_shard_num, context, false); + auto result = executeBucketFunction(block, bucket_columns, split_number, is_with_range, total_shard_num, context); + return comparePrepareBucketColumnWithBucketFunction(expected, result); +} + +TEST(BucketShuffleTest, BucketFunctionTest) +{ + tryRegisterFunctions(); + auto block = generateBlockWithTwoColumns(5); + auto local_context = Context::createCopy(getContext().context); + + + // sipHashBuitin + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1"}, 0, false, 33, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_2"}, 0, false, 100, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1", "column_2"}, 300, false, 100, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1", "column_2"}, 50, true, 100, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1", "column_2"}, 50, true, 300, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1", "column_2"}, 50, true, 1, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1", "column_2"}, 1, true, 50, local_context)); + + + // dtspartition + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1"}, 300, false, 100, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_1"}, 400, true, 99, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_2"}, 400, true, 99, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_2"}, 99, true, 101, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_2"}, 99, true, 1, local_context)); + ASSERT_TRUE(executeAndComparePrepareBucketColumnWithBucketFunction(block, {"column_2"}, 1, true, 102, local_context)); + + // fail test + auto expected = block; + prepareBucketColumn(expected, {"column_1"}, 0, false, 33, local_context, false); + auto result = executeBucketFunction(block, {"column_1"}, 0, false, 37, local_context); + ASSERT_FALSE(comparePrepareBucketColumnWithBucketFunction(expected, result)); +} + + diff --git a/src/Protos/plan_node_utils.proto b/src/Protos/plan_node_utils.proto index 5bcfde77164..d4bf47b69b4 100644 --- a/src/Protos/plan_node_utils.proto +++ b/src/Protos/plan_node_utils.proto @@ -109,6 +109,7 @@ message Partitioning { required bool enforce_round_robin = 5; required Component.Enum component = 6; required bool exactly_match = 7; + optional AST bucket_expr = 8; } // possibly nullptr @@ -264,6 +265,7 @@ message PlanSegmentOutput { optional string shuffle_hash_function = 2; optional uint32 parallel_size = 3; optional bool keep_order = 4; + optional FieldVector shuffle_function_parameters = 5; } message WindowFrame { diff --git a/src/QueryPlan/GraphvizPrinter.cpp b/src/QueryPlan/GraphvizPrinter.cpp index 0fae1de6f5c..7430f1ecf5c 100644 --- a/src/QueryPlan/GraphvizPrinter.cpp +++ b/src/QueryPlan/GraphvizPrinter.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1314,7 +1315,7 @@ String StepPrinter::printExpandStep(const ExpandStep & step, bool) } std::string result = ss.str(); - details << step.getGroupIdSymbol() << "[" << result << "]"; + details << step.getGroupIdSymbol() << "[" << result << "]"; details << "|"; details << "Groups"; details << "|"; @@ -1813,6 +1814,8 @@ String StepPrinter::printExchangeStep(const ExchangeStep & step) } }; details << f(step.getExchangeMode()); + details << "|"; + details << step.getSchema().toString(); if (step.needKeepOrder()) { @@ -1989,11 +1992,11 @@ String StepPrinter::printTableScanStep(const TableScanStep & step) { ASTSampleRatio * sample = query->sampleSize()->as(); details << "Sample : \\n"; - details << "Sample Size : " << ASTSampleRatio::toString(sample->ratio)<< "\\n"; + details << "Sample Size : " << ASTSampleRatio::toString(sample->ratio) << "\\n"; if (query->sampleOffset()) { ASTSampleRatio * offset = query->sampleOffset()->as(); - details << "Sample Offset : " << ASTSampleRatio::toString(offset->ratio)<< "\\n"; + details << "Sample Offset : " << ASTSampleRatio::toString(offset->ratio) << "\\n"; } details << "|"; } @@ -3562,6 +3565,11 @@ void GraphvizPrinter::appendPlanSegmentNode(std::stringstream & out, const PlanS out << "keeporder "; } + if (input->isStable()) + { + out << "stable "; + } + out << "\n"; } out << "\n"; @@ -3578,6 +3586,14 @@ void GraphvizPrinter::appendPlanSegmentNode(std::stringstream & out, const PlanS { out << "keeporder "; } + out << "hash_func:" << input->getShuffleFunctionName(); + + auto visitor = FieldVisitorToString(); + out << " params:"; + for (auto item : input->getShuffleFunctionParams()) + { + out << " " << applyVisitor(visitor, item); + } out << "\n"; } out << "\n"; @@ -3706,7 +3722,6 @@ String GraphvizPrinter::printGroup(const Group & group, const std::unordered_map head_step = group.getLogicalExpressions()[0]->getStep().get(); auto fold = [](std::string a, GroupId b) { return std::move(a) + ", " + std::to_string(b); }; - auto fold_string = [](String a, const String & b) { return std::move(a) + ", " + b; }; auto expr_to_str = [&](const GroupExprPtr & expr) { if (!expr) @@ -3848,55 +3863,9 @@ String GraphvizPrinter::printGroup(const Group & group, const std::unordered_map // winners - auto partition_str = [&](const Partitioning & partitioning) { - auto component_str = " ANY"; - if (partitioning.getComponent() == Partitioning::Component::COORDINATOR) - component_str = " COORDINATOR"; - else if (partitioning.getComponent() == Partitioning::Component::WORKER) - component_str = " WORKER"; - - if (partitioning.getHandle() == Partitioning::Handle::SINGLE) - return String("SINGLE") + component_str; - else if (partitioning.getHandle() == Partitioning::Handle::FIXED_BROADCAST) - return String("BROADCAST") + component_str; - else if (partitioning.getHandle() == Partitioning::Handle::ARBITRARY) - return String("ARBITRARY") + component_str; - else if (partitioning.getHandle() == Partitioning::Handle::BUCKET_TABLE) - return String("BUCKET_TABLE") + component_str; - else if (partitioning.getHandle() == Partitioning::Handle::FIXED_ARBITRARY) - return String("FIXED_ARBITRARY") + component_str; - else if (partitioning.getHandle() == Partitioning::Handle::FIXED_HASH) - { - if (partitioning.getColumns().empty()) - { - return String("FIXED_HASH[]") + component_str; - } - else - { - auto result = String("FIXED_HASH[") - + std::accumulate( - std::next(partitioning.getColumns().begin()), - partitioning.getColumns().end(), - partitioning.getColumns()[0], - fold_string) - + "]"; - if (partitioning.isEnforceRoundRobin()) - { - result += " RoundR"; - } - if (partitioning.isRequireHandle()) - { - result += " handle"; - } - return result + component_str; - } - } - else - return String("UNKNOWN") + component_str; - }; auto property_str = [&](const Property & property) { std::stringstream ss; - ss << partition_str(property.getNodePartitioning()); + ss << property.getNodePartitioning().toString(); ss << " "; ss << property.getCTEDescriptions().toString(); return ss.str(); @@ -3936,7 +3905,7 @@ String GraphvizPrinter::printGroup(const Group & group, const std::unordered_map if (auto exchange_step = dynamic_cast(winner->getRemoteExchange()->getStep().get())) { out << "enforce: "; - out << partition_str(exchange_step->getSchema()); + out << exchange_step->getSchema().toString(); out << "
"; } } diff --git a/src/QueryPlan/PlanPrinter.cpp b/src/QueryPlan/PlanPrinter.cpp index 429a490feda..9fe7fb34935 100644 --- a/src/QueryPlan/PlanPrinter.cpp +++ b/src/QueryPlan/PlanPrinter.cpp @@ -253,7 +253,8 @@ String PlanPrinter::getPlanSegmentHeaderText(PlanSegmentDescriptionPtr & segment << " ExchangeId:" << input->exchange_id << " ExchangeMode:" << magic_enum::enum_name(input->mode) << " ExchangeParallelSize:" << input->exchange_parallel_size - << " KeepOrder:" << input->keep_order << ")"; + << " KeepOrder:" << input->keep_order + << (input->stable ? " Stable" : "") << ")"; first = false; } os << "]\n"; @@ -1791,6 +1792,7 @@ PlanSegmentDescriptionPtr PlanSegmentDescription::getPlanSegmentDescription(Plan input_desc.mode = input->getExchangeMode(); input_desc.exchange_parallel_size = input->getExchangeParallelSize(); input_desc.keep_order = input->needKeepOrder(); + input_desc.stable = input->isStable(); auto input_desc_ptr = std::make_shared(input_desc); plan_segment_desc->inputs_desc.emplace_back(input_desc_ptr); } diff --git a/src/QueryPlan/PlanPrinter.h b/src/QueryPlan/PlanPrinter.h index 593754ae5ca..c19432851ec 100644 --- a/src/QueryPlan/PlanPrinter.h +++ b/src/QueryPlan/PlanPrinter.h @@ -193,6 +193,7 @@ struct PlanSegmentDescription size_t exchange_id; size_t exchange_parallel_size; bool keep_order; + bool stable; }; size_t segment_id; String segment_type; diff --git a/src/QueryPlan/SymbolMapper.cpp b/src/QueryPlan/SymbolMapper.cpp index 0e6ce57b2ae..24e530b55f7 100644 --- a/src/QueryPlan/SymbolMapper.cpp +++ b/src/QueryPlan/SymbolMapper.cpp @@ -64,7 +64,7 @@ SymbolMapper SymbolMapper::symbolMapper(std::unordered_map & map while (it != mapping.end() && it->second != symbol) { if (++lookup > MAX_LOOKUP_TIMES) - throw Exception("endless loop in SymbolMapper", ErrorCodes::LOGICAL_ERROR); + throw Exception("endless loop in SymbolMapper", ErrorCodes::LOGICAL_ERROR); symbol = it->second; it = mapping.find(symbol); } @@ -82,7 +82,7 @@ SymbolMapper SymbolMapper::symbolReallocator(std::unordered_map while (it != mapping.end() && it->second != symbol) { if (++lookup > MAX_LOOKUP_TIMES) - throw Exception("endless loop in SymbolMapper", ErrorCodes::LOGICAL_ERROR); + throw Exception("endless loop in SymbolMapper", ErrorCodes::LOGICAL_ERROR); symbol = it->second; it = mapping.find(symbol); } @@ -214,6 +214,7 @@ Partitioning SymbolMapper::map(const Partitioning & partition) map(partition.getColumns()), partition.isRequireHandle(), partition.getBuckets(), + partition.getBucketExpr(), partition.isEnforceRoundRobin(), partition.getComponent()}; } @@ -404,10 +405,10 @@ SortDescription SymbolMapper::map(const SortDescription & sort_desc) return res; } -std::map SymbolMapper::map(const std::map & group_id_non_null_symbol) +std::map SymbolMapper::map(const std::map & group_id_non_null_symbol) { std::map res; - for(const auto & entry : group_id_non_null_symbol) + for (const auto & entry : group_id_non_null_symbol) { res[entry.first] = map(entry.second); } @@ -506,11 +507,11 @@ std::shared_ptr SymbolMapper::map(const FinalSampleStep & final std::shared_ptr SymbolMapper::map(const FinishSortingStep & finish_sorting) { return std::make_shared( - map(finish_sorting.getInputStreams()[0]), - SortDescription{map(finish_sorting.getPrefixDescription())}, - SortDescription{map(finish_sorting.getResultDescription())}, - finish_sorting.getMaxBlockSize(), - finish_sorting.getLimit()); + map(finish_sorting.getInputStreams()[0]), + SortDescription{map(finish_sorting.getPrefixDescription())}, + SortDescription{map(finish_sorting.getResultDescription())}, + finish_sorting.getMaxBlockSize(), + finish_sorting.getLimit()); } std::shared_ptr SymbolMapper::map(const IntersectStep & intersect) @@ -545,7 +546,8 @@ std::shared_ptr SymbolMapper::map(const TableScanStep & scan) // order matters as symbol mapper should traverse plan nodes bottom-up std::shared_ptr mapped_filter = scan.getPushdownFilterCast() ? map(*scan.getPushdownFilterCast()) : nullptr; std::shared_ptr mapped_projection = scan.getPushdownProjectionCast() ? map(*scan.getPushdownProjectionCast()) : nullptr; - std::shared_ptr mapped_aggregation = scan.getPushdownAggregationCast() ? map(*scan.getPushdownAggregationCast()) : nullptr; + std::shared_ptr mapped_aggregation + = scan.getPushdownAggregationCast() ? map(*scan.getPushdownAggregationCast()) : nullptr; auto mapped_scan = std::make_shared( std::move(mapped_output_stream), @@ -690,7 +692,11 @@ std::shared_ptr SymbolMapper::map(const ReadNothingStep & read_ std::shared_ptr SymbolMapper::map(const RemoteExchangeSourceStep & remote_exchange) { - return std::make_shared(remote_exchange.getInput(), map(remote_exchange.getInputStreams()[0]), remote_exchange.isAddTotals(), remote_exchange.isAddExtremes()); + return std::make_shared( + remote_exchange.getInput(), + map(remote_exchange.getInputStreams()[0]), + remote_exchange.isAddTotals(), + remote_exchange.isAddExtremes()); } @@ -754,7 +760,12 @@ std::shared_ptr SymbolMapper::map(const CTERefStep & cte_ref) std::shared_ptr SymbolMapper::map(const ExplainAnalyzeStep & step) { return std::make_shared( - map(step.getInputStreams()[0]), map(step.getOutputName()), step.getKind(), step.getContext(), step.getQueryPlan(), step.getSetting()); + map(step.getInputStreams()[0]), + map(step.getOutputName()), + step.getKind(), + step.getContext(), + step.getQueryPlan(), + step.getSetting()); } std::shared_ptr SymbolMapper::map(const LocalExchangeStep & step) @@ -790,7 +801,8 @@ std::shared_ptr SymbolMapper::map(const BufferStep & step) std::shared_ptr SymbolMapper::map(const TableFinishStep & step) { - return std::make_shared(map(step.getInputStreams()[0]), step.getTarget(), step.getOutputAffectedRowCountSymbol(), step.getQuery()); + return std::make_shared( + map(step.getInputStreams()[0]), step.getTarget(), step.getOutputAffectedRowCountSymbol(), step.getQuery()); } std::shared_ptr SymbolMapper::map(const IntermediateResultCacheStep & step) @@ -805,29 +817,31 @@ std::shared_ptr SymbolMapper::map(const MultiJoinStep & step) std::shared_ptr SymbolMapper::map(const TotalsHavingStep & step) { - return std::make_shared(map(step.getInputStreams()[0]), step.isOverflowRow(), map(step.getHavingFilter()), step.getTotalsMode(), step.getAutoIncludeThreshols(), step.isFinal()); + return std::make_shared( + map(step.getInputStreams()[0]), + step.isOverflowRow(), + map(step.getHavingFilter()), + step.getTotalsMode(), + step.getAutoIncludeThreshols(), + step.isFinal()); } std::shared_ptr SymbolMapper::map(const ExpandStep & step) { return std::make_shared( - map(step.getOutputStream()), + map(step.getOutputStream()), map(step.getAssignments()), - map(step.getNameToType()), + map(step.getNameToType()), map(step.getGroupIdSymbol()), step.getGroupIdValue(), - map(step.getGroupIdNonNullSymbol()) - ); + map(step.getGroupIdNonNullSymbol())); } class SymbolMapper::SymbolMapperVisitor : public StepVisitor { protected: #define VISITOR_DEF(TYPE) \ - QueryPlanStepPtr visit##TYPE##Step(const TYPE##Step & step, SymbolMapper & mapper) override \ - { \ - return mapper.map(step); \ - } + QueryPlanStepPtr visit##TYPE##Step(const TYPE##Step & step, SymbolMapper & mapper) override { return mapper.map(step); } APPLY_STEP_TYPES(VISITOR_DEF) #undef VISITOR_DEF }; diff --git a/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.reference b/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.reference new file mode 100644 index 00000000000..f2767fe0770 --- /dev/null +++ b/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.reference @@ -0,0 +1,108 @@ +enable bucket shuffle +dtspartition +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 5 \N \N +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +\N \N n3 7 +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 5 \N \N +cdw_1 6 \N \N +cdw_1 7 n3 7 +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 7 n3 7 +cdw_3 25 n3 25 +toUInt64 +cdw_1 0 \N \N +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_3 3 \N \N +cdw_1 1 n1 1 +cdw_2 2 n2 2 +\N \N n3 7 +\N \N n3 25 +sipHash +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 +disable bucket shuffle +dtspartition +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 5 \N \N +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +\N \N n3 7 +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 5 \N \N +cdw_1 6 \N \N +cdw_1 7 n3 7 +cdw_3 25 n3 25 +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_1 7 n3 7 +cdw_3 25 n3 25 +toUInt64 +cdw_1 0 \N \N +cdw_1 1 n1 1 +cdw_2 2 n2 2 +cdw_3 3 \N \N +cdw_1 1 n1 1 +cdw_2 2 n2 2 +\N \N n3 7 +\N \N n3 25 +sipHash +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +cdw_1 5 n5 \N \N +cdw_3 25 n1 \N \N +cdw_1 1 n1 n1 1 +cdw_2 2 n2 n2 2 +\N \N \N n3 7 +\N \N \N n3 25 diff --git a/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.sql b/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.sql new file mode 100644 index 00000000000..d4a27648c09 --- /dev/null +++ b/tests/queries/4_cnch_stateless/71000_bucket_shuffle_join.sql @@ -0,0 +1,63 @@ +use test; +DROP TABLE IF EXISTS normal; +DROP TABLE IF EXISTS bucket_dtspartition; +DROP TABLE IF EXISTS bucket_dtspartition_with_range; +DROP TABLE IF EXISTS bucket_user_expression; +DROP TABLE IF EXISTS bucket_siphash; +DROP TABLE IF EXISTS bucket_siphash_split_number; +DROP TABLE IF EXISTS bucket_siphash_with_range; + +CREATE TABLE normal (a String, b UInt64) ENGINE = CnchMergeTree() PARTITION BY a ORDER BY a; +CREATE TABLE bucket_dtspartition (c String, d UInt64) ENGINE = CnchMergeTree() PARTITION BY c CLUSTER BY d INTO 4 BUCKETS SPLIT_NUMBER 60 ORDER BY c; +CREATE TABLE bucket_dtspartition_with_range (c String, d UInt64) ENGINE = CnchMergeTree() PARTITION BY c CLUSTER BY d INTO 4 BUCKETS SPLIT_NUMBER 60 WITH_RANGE ORDER BY c; +CREATE TABLE bucket_user_expression (c String, d UInt64) ENGINE = CnchMergeTree() PARTITION BY c CLUSTER BY expression d INTO 4 BUCKETS ORDER BY c; +CREATE TABLE bucket_siphash (d String, e UInt64, f String) ENGINE = CnchMergeTree() PARTITION BY d CLUSTER BY (e, f) INTO 4 BUCKETS ORDER BY d; +CREATE TABLE bucket_siphash_split_number (d String, e UInt64, f String) ENGINE = CnchMergeTree() PARTITION BY d CLUSTER BY (e, f) INTO 4 BUCKETS SPLIT_NUMBER 10 ORDER BY d; +CREATE TABLE bucket_siphash_with_range (d String, e UInt64, f String) ENGINE = CnchMergeTree() PARTITION BY d CLUSTER BY (e, f) INTO 4 BUCKETS SPLIT_NUMBER 10 WITH_RANGE ORDER BY d; + + +INSERT INTO normal VALUES ('n1', 1)('n2', 2)('n3', 7)('n3', 25); +INSERT INTO bucket_dtspartition VALUES ('cdw_1', 1)('cdw_2', 2)('cdw_1', 5)('cdw_3', 25); +INSERT INTO bucket_dtspartition_with_range VALUES ('cdw_1', 1)('cdw_2', 2)('cdw_1', 5)('cdw_1', 6)('cdw_1', 7)('cdw_3', 25); +INSERT INTO bucket_user_expression VALUES ('cdw_1', 0) ('cdw_1', 1)('cdw_2', 2)('cdw_3', 3); +INSERT INTO bucket_siphash VALUES ('cdw_1', 1, 'n1')('cdw_2', 2, 'n2' )('cdw_1', 5, 'n5')('cdw_3', 25,'n1'); +INSERT INTO bucket_siphash_split_number VALUES ('cdw_1', 1, 'n1')('cdw_2', 2, 'n2' )('cdw_1', 5, 'n5')('cdw_3', 25,'n1'); +INSERT INTO bucket_siphash_with_range VALUES ('cdw_1', 1, 'n1')('cdw_2', 2, 'n2' )('cdw_1', 5, 'n5')('cdw_3', 25,'n1'); + +SET enable_optimizer=1; +SET bsp_mode=0; -- bsp mode does not support bucket join + +SET enum_replicate_no_stats=0,enable_bucket_shuffle=1; +SELECT 'enable bucket shuffle'; +SELECT 'dtspartition'; +SELECT * FROM test.bucket_dtspartition b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_dtspartition b Right JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT * FROM test.bucket_dtspartition_with_range b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_dtspartition_with_range b RIGHT JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT 'toUInt64'; +SELECT * FROM test.bucket_user_expression b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_user_expression b RIGHT JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT 'sipHash'; +SELECT * FROM test.bucket_siphash b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; +SELECT * FROM test.bucket_siphash_split_number b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash_split_number b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; +SELECT * FROM test.bucket_siphash_with_range b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash_with_range b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; +SET enum_replicate_no_stats=1,enable_bucket_shuffle=0; +SELECT 'disable bucket shuffle'; +SELECT 'dtspartition'; +SELECT * FROM test.bucket_dtspartition b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_dtspartition b Right JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT * FROM test.bucket_dtspartition_with_range b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_dtspartition_with_range b RIGHT JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT 'toUInt64'; +SELECT * FROM test.bucket_user_expression b LEFT JOIN test.normal n ON b.d=n.b ORDER BY d; +SELECT * FROM test.bucket_user_expression b RIGHT JOIN test.normal n ON b.d=n.b ORDER BY b; +SELECT 'sipHash'; +SELECT * FROM test.bucket_siphash b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; +SELECT * FROM test.bucket_siphash_split_number b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash_split_number b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; +SELECT * FROM test.bucket_siphash_with_range b LEFT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY e; +SELECT * FROM test.bucket_siphash_with_range b RIGHT JOIN test.normal n ON b.e=n.b AND b.f = n.a ORDER BY b; diff --git a/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference b/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference index 5e31a755a48..3594eb7dbd4 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference @@ -1,16 +1,16 @@ -Projection Est. 10 rows, cost 1.284000e+01 +Projection Est. 10 rows, cost 1.276000e+01 │ Expressions: count():=`expr#count()`, n:=number_3 -└─ Limit Est. 10 rows, cost 1.210000e+01 +└─ Limit Est. 10 rows, cost 1.202000e+01 │ Limit: 10 - └─ Sorting Est. 10 rows, cost 1.210000e+01 + └─ Sorting Est. 10 rows, cost 1.202000e+01 │ Order by: {number_3 ASC NULLS LAST} │ Limit: 10 - └─ Gather Exchange Est. 10 rows, cost 1.210000e+01 - └─ Sorting Est. 10 rows, cost 1.034000e+01 + └─ Gather Exchange Est. 10 rows, cost 1.202000e+01 + └─ Sorting Est. 10 rows, cost 1.026000e+01 │ Order by: {number_3 ASC NULLS LAST} │ Limit: 10 - └─ MergingAggregated Est. 21 rows, cost 1.034000e+01 - └─ Repartition Exchange Est. 21 rows, cost 1.034000e+01 + └─ MergingAggregated Est. 21 rows, cost 1.026000e+01 + └─ Repartition Exchange Est. 21 rows, cost 1.026000e+01 │ Partition by: {number_3} └─ Union Est. 21 rows, cost 6.660000e+00 │ OutputToInputs: expr#count() = [expr#count(),expr#count(),expr#count()], number_3 = [number,number_1,number_2] @@ -66,19 +66,19 @@ Projection Est. 10 rows, cost 1.284000e+01 9 1 10 2 12 2 -Projection Est. 10 rows, cost 2.820000e+00 +Projection Est. 10 rows, cost 2.740000e+00 │ Expressions: [block_number], gas_fee:=`expr#sum(multiply(gas, gas_price))` -└─ Limit Est. 10 rows, cost 2.080000e+00 +└─ Limit Est. 10 rows, cost 2.000000e+00 │ Limit: 10 - └─ Sorting Est. 10 rows, cost 2.080000e+00 + └─ Sorting Est. 10 rows, cost 2.000000e+00 │ Order by: {block_number ASC NULLS LAST} │ Limit: 10 - └─ Gather Exchange Est. 10 rows, cost 2.080000e+00 - └─ Sorting Est. 10 rows, cost 3.200000e-01 + └─ Gather Exchange Est. 10 rows, cost 2.000000e+00 + └─ Sorting Est. 10 rows, cost 2.400000e-01 │ Order by: {block_number ASC NULLS LAST} │ Limit: 10 - └─ MergingAggregated Est. ? rows, cost 3.200000e-01 - └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + └─ MergingAggregated Est. ? rows, cost 2.400000e-01 + └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ Partition by: {block_number} └─ Aggregating Est. ? rows, cost 0.000000e+00 │ Group by: {block_number} diff --git a/tests/queries/4_cnch_stateless_no_tenant/40094_fix_bitmap_index_not_pushed_down.reference b/tests/queries/4_cnch_stateless_no_tenant/40094_fix_bitmap_index_not_pushed_down.reference index 6fbfec995b3..3df576bb06f 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40094_fix_bitmap_index_not_pushed_down.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40094_fix_bitmap_index_not_pushed_down.reference @@ -1,22 +1,22 @@ -Projection Est. 1000 rows, cost 3.086933e+02 +Projection Est. 1000 rows, cost 3.086000e+02 │ Expressions: _1700031876736:=`expr#count()`, _1700031897102:=`expr#CAST(avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0)), \'Nullable(Float64)\')`, _1700046658469:=`expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\'))`, _1700047638968:=`expr#CAST(avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0)), \'Nullable(Float64)\')`, _avg_1700031876697_4700826bc6c9c490e9a7756937dd1f6e:=`expr#avg(mapElement(model_label, \'staytime\'))`, _countdistinct_1700031876715:=`expr#uniqExact(uid)`, _sum_1700046657800:=`expr#sum(multiIf(greater(arrayElement(label, 31), 0), 1, 0))`, _sum_1700046657811:=`expr#sum(multiIf(greaterOrEquals(arrayElement(label, 1), 1), 1, 0))`, _sum_1700046657910:=`expr#sum(arrayElement(label, 32))` -└─ Projection Est. 1000 rows, cost 2.346933e+02 +└─ Projection Est. 1000 rows, cost 2.346000e+02 │ Expressions: [expr#avg(mapElement(model_label, \'staytime\')), expr#count(), expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\')), expr#sum(arrayElement(label, 32)), expr#sum(multiIf(greater(arrayElement(label, 31), 0), 1, 0)), expr#sum(multiIf(greaterOrEquals(arrayElement(label, 1), 1), 1, 0)), expr#uniqExact(uid)], expr#CAST(avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0)), \'Nullable(Float64)\'):=CAST(`expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0))`, \'Nullable(Float64)\'), expr#CAST(avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0)), \'Nullable(Float64)\'):=CAST(`expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0))`, \'Nullable(Float64)\') - └─ Limit Est. 1000 rows, cost 1.606933e+02 + └─ Limit Est. 1000 rows, cost 1.606000e+02 │ Limit: 1000 - └─ Gather Exchange Est. 1000 rows, cost 1.606933e+02 - └─ Limit Est. 1000 rows, cost 5.333333e-01 + └─ Gather Exchange Est. 1000 rows, cost 1.606000e+02 + └─ Limit Est. 1000 rows, cost 4.400000e-01 │ Limit: 1000 - └─ MergingAggregated Est. ? rows, cost 5.333333e-01 - └─ Repartition Exchange Est. ? rows, cost 5.333333e-01 + └─ MergingAggregated Est. ? rows, cost 4.400000e-01 + └─ Repartition Exchange Est. ? rows, cost 4.400000e-01 │ Partition by: {expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\'))} - └─ Aggregating Est. ? rows, cost 2.133333e-01 + └─ Aggregating Est. ? rows, cost 2.000000e-01 │ Group by: {expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\'))} │ Aggregates: expr#sum(multiIf(greater(arrayElement(label, 31), 0), 1, 0)):=AggNull(anyIf)(expr#sum(multiIf(greater(arrayElement(label, 31), 0), 1, 0)),group_id_mask), expr#sum(arrayElement(label, 32)):=AggNull(anyIf)(expr#sum(arrayElement(label, 32)),group_id_mask), expr#sum(multiIf(greaterOrEquals(arrayElement(label, 1), 1), 1, 0)):=AggNull(anyIf)(expr#sum(multiIf(greaterOrEquals(arrayElement(label, 1), 1), 1, 0)),group_id_mask), expr#count():=AggNull(anyIf)(expr#count(),group_id_mask), expr#uniqExact(uid):=AggNull(countIf)(uid,group_id_mask_1), expr#avg(mapElement(model_label, \'staytime\')):=AggNull(anyIf)(expr#avg(mapElement(model_label, \'staytime\')),group_id_mask), expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0)):=AggNull(anyIf)(expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0)),group_id_mask), expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0)):=AggNull(anyIf)(expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0)),group_id_mask) - └─ Projection Est. ? rows, cost 2.133333e-01 + └─ Projection Est. ? rows, cost 2.000000e-01 │ Expressions: [expr#avg(mapElement(model_label, \'staytime\')), expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 30000), 1, 0)), expr#avg(multiIf(greaterOrEquals(mapElement(model_label, \'staytime\'), 60000), 1, 0)), expr#count(), expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\')), expr#sum(arrayElement(label, 32)), expr#sum(multiIf(greater(arrayElement(label, 31), 0), 1, 0)), expr#sum(multiIf(greaterOrEquals(arrayElement(label, 1), 1), 1, 0)), uid], group_id_mask:=group_id = 0, group_id_mask_1:=group_id = 1 - └─ MergingAggregated Est. ? rows, cost 2.133333e-01 - └─ Repartition Exchange Est. ? rows, cost 2.133333e-01 + └─ MergingAggregated Est. ? rows, cost 2.000000e-01 + └─ Repartition Exchange Est. ? rows, cost 2.000000e-01 │ Partition by: {expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\')), group_id, uid} └─ Aggregating Est. ? rows, cost 0.000000e+00 │ Group by: {expr#if(arraySetCheck(vids, \'8683112\'), \'8683112\', if(arraySetCheck(vids, \'8683113\'), \'8683113\', \'other\')), group_id, uid} diff --git a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_group_by_constants.reference b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_group_by_constants.reference index d4e390e68f3..fc3362ce988 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_group_by_constants.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_group_by_constants.reference @@ -1,29 +1,29 @@ -Projection Est. 100 rows, cost 2.572000e+01 +Projection Est. 100 rows, cost 2.521333e+01 │ Expressions: channel:=`expr#\'store\'_1`, i_category:=i_category_3, nn:=ss_store_sk_1, sales_cnt:=`expr#count()` -└─ Limit Est. 100 rows, cost 1.832000e+01 +└─ Limit Est. 100 rows, cost 1.781333e+01 │ Limit: 100 - └─ Sorting Est. 100 rows, cost 1.832000e+01 + └─ Sorting Est. 100 rows, cost 1.781333e+01 │ Order by: {expr#\'store\'_1 ASC NULLS LAST, ss_store_sk_1 ASC NULLS LAST, i_category_3 ASC NULLS LAST} │ Limit: 100 - └─ Gather Exchange Est. 100 rows, cost 1.832000e+01 - └─ Sorting Est. 100 rows, cost 2.160000e+00 + └─ Gather Exchange Est. 100 rows, cost 1.781333e+01 + └─ Sorting Est. 100 rows, cost 1.653333e+00 │ Order by: {expr#\'store\'_1 ASC NULLS LAST, ss_store_sk_1 ASC NULLS LAST, i_category_3 ASC NULLS LAST} │ Limit: 100 - └─ Projection Est. ? rows, cost 2.160000e+00 + └─ Projection Est. ? rows, cost 1.653333e+00 │ Expressions: [expr#\'store\'_1, expr#count(), i_category_3], ss_store_sk_1:=NULL - └─ MergingAggregated Est. ? rows, cost 2.160000e+00 - └─ Repartition Exchange Est. ? rows, cost 2.160000e+00 + └─ MergingAggregated Est. ? rows, cost 1.653333e+00 + └─ Repartition Exchange Est. ? rows, cost 1.653333e+00 │ Partition by: {expr#\'store\'_1, i_category_3} - └─ Union Est. ? rows, cost 1.920000e+00 + └─ Union Est. ? rows, cost 1.440000e+00 │ OutputToInputs: expr#count() = [expr#count(),expr#count(),expr#count()], i_category_3 = [i_category,i_category_1,i_category_2], expr#\'store\'_1 = [expr#\'store\',expr#\'web\',expr#\'catalog\'] - ├─ Aggregating Est. ? rows, cost 6.400000e-01 + ├─ Aggregating Est. ? rows, cost 4.800000e-01 │ │ Group by: {expr#\'store\', i_category} │ │ Aggregates: expr#count():=AggNull(count)() - │ └─ Projection Est. ? rows, cost 6.400000e-01 + │ └─ Projection Est. ? rows, cost 4.800000e-01 │ │ Expressions: [i_category], expr#\'store\':=\'store\' - │ └─ Inner Join Est. ? rows, cost 6.400000e-01 + │ └─ Inner Join Est. ? rows, cost 4.800000e-01 │ │ Condition: ss_item_sk == i_item_sk - │ ├─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + │ ├─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ │ Partition by: {ss_item_sk} │ │ └─ Projection Est. ? rows, cost 0.000000e+00 │ │ │ Expressions: [ss_item_sk] @@ -32,18 +32,18 @@ Projection Est. 100 rows, cost 2.572000e+01 │ │ └─ TableScan test.store_sales Est. ? rows, cost 0.000000e+00 │ │ Where: isNull(ss_store_sk) │ │ Outputs: [ss_item_sk, ss_store_sk] - │ └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + │ └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ Partition by: {i_item_sk} │ └─ TableScan test.item Est. ? rows, cost 0.000000e+00 │ Outputs: [i_item_sk, i_category] - ├─ Aggregating Est. ? rows, cost 6.400000e-01 + ├─ Aggregating Est. ? rows, cost 4.800000e-01 │ │ Group by: {expr#\'web\', i_category_1} │ │ Aggregates: expr#count():=AggNull(count)() - │ └─ Projection Est. ? rows, cost 6.400000e-01 + │ └─ Projection Est. ? rows, cost 4.800000e-01 │ │ Expressions: [i_category_1], expr#\'web\':=\'web\' - │ └─ Inner Join Est. ? rows, cost 6.400000e-01 + │ └─ Inner Join Est. ? rows, cost 4.800000e-01 │ │ Condition: ws_item_sk == i_item_sk_1 - │ ├─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + │ ├─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ │ Partition by: {ws_item_sk} │ │ └─ Projection Est. ? rows, cost 0.000000e+00 │ │ │ Expressions: [ws_item_sk] @@ -52,18 +52,18 @@ Projection Est. 100 rows, cost 2.572000e+01 │ │ └─ TableScan test.web_sales Est. ? rows, cost 0.000000e+00 │ │ Where: isNull(ws_ship_customer_sk) │ │ Outputs: [ws_item_sk, ws_ship_customer_sk] - │ └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + │ └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ Partition by: {i_item_sk_1} │ └─ TableScan test.item Est. ? rows, cost 0.000000e+00 │ Outputs: i_item_sk_1:=i_item_sk, i_category_1:=i_category - └─ Aggregating Est. ? rows, cost 6.400000e-01 + └─ Aggregating Est. ? rows, cost 4.800000e-01 │ Group by: {expr#\'catalog\', i_category_2} │ Aggregates: expr#count():=AggNull(count)() - └─ Projection Est. ? rows, cost 6.400000e-01 + └─ Projection Est. ? rows, cost 4.800000e-01 │ Expressions: [i_category_2], expr#\'catalog\':=\'catalog\' - └─ Inner Join Est. ? rows, cost 6.400000e-01 + └─ Inner Join Est. ? rows, cost 4.800000e-01 │ Condition: cs_item_sk == i_item_sk_2 - ├─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + ├─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ Partition by: {cs_item_sk} │ └─ Projection Est. ? rows, cost 0.000000e+00 │ │ Expressions: [cs_item_sk] @@ -72,7 +72,7 @@ Projection Est. 100 rows, cost 2.572000e+01 │ └─ TableScan test.catalog_sales Est. ? rows, cost 0.000000e+00 │ Where: isNull(cs_ship_addr_sk) │ Outputs: [cs_ship_addr_sk, cs_item_sk] - └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ Partition by: {i_item_sk_2} └─ TableScan test.item Est. ? rows, cost 0.000000e+00 Outputs: i_item_sk_2:=i_item_sk, i_category_2:=i_category diff --git a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference index 0d56e40c31e..15f23a2b020 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference @@ -63,24 +63,24 @@ Projection Est. 10 rows, cost 2.500000e+00 2000.3 3003.5 not OK: fk right outer join pk -Projection Est. 10 rows, cost 3.140000e+00 +Projection Est. 10 rows, cost 2.980000e+00 │ Expressions: [price] -└─ Limit Est. 10 rows, cost 2.400000e+00 +└─ Limit Est. 10 rows, cost 2.240000e+00 │ Limit: 10 - └─ Sorting Est. 10 rows, cost 2.400000e+00 + └─ Sorting Est. 10 rows, cost 2.240000e+00 │ Order by: {price ASC NULLS LAST} │ Limit: 10 - └─ Gather Exchange Est. 10 rows, cost 2.400000e+00 - └─ Sorting Est. 10 rows, cost 6.400000e-01 + └─ Gather Exchange Est. 10 rows, cost 2.240000e+00 + └─ Sorting Est. 10 rows, cost 4.800000e-01 │ Order by: {price ASC NULLS LAST} │ Limit: 10 - └─ Right Join Est. ? rows, cost 6.400000e-01 + └─ Right Join Est. ? rows, cost 4.800000e-01 │ Condition: sk == expr#cast(sk_1, \'Nullable(Int64)\') - ├─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + ├─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ │ Partition by: {sk} │ └─ TableScan test.web Est. ? rows, cost 0.000000e+00 │ Outputs: [sk, price] - └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ Partition by: {expr#cast(sk_1, \'Nullable(Int64)\')} └─ Projection Est. ? rows, cost 0.000000e+00 │ Expressions: expr#cast(sk_1, \'Nullable(Int64)\'):=cast(sk_1, \'Nullable(Int64)\') diff --git a/tests/queries/4_cnch_stateless_no_tenant/48028_only_push_bitmap_with_index.reference b/tests/queries/4_cnch_stateless_no_tenant/48028_only_push_bitmap_with_index.reference index 01e7d463c8b..db872fe6c78 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48028_only_push_bitmap_with_index.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48028_only_push_bitmap_with_index.reference @@ -1,12 +1,12 @@ -Projection Est. 100 rows, cost 2.388000e+01 +Projection Est. 100 rows, cost 2.380000e+01 │ Expressions: _1700004311268:=`expr#sum(mapElement(int_params, \'predict_step\'))`, _1700050188505:=`expr#multiIf(arraySetCheck(splitByChar(\',\', mapElement(string_params, \'local_life_deboost_ab_list\')), \'33\'), \'AA\', \'-1\')` -└─ Limit Est. 100 rows, cost 1.648000e+01 +└─ Limit Est. 100 rows, cost 1.640000e+01 │ Limit: 100 - └─ Gather Exchange Est. 100 rows, cost 1.648000e+01 - └─ Limit Est. 100 rows, cost 3.200000e-01 + └─ Gather Exchange Est. 100 rows, cost 1.640000e+01 + └─ Limit Est. 100 rows, cost 2.400000e-01 │ Limit: 100 - └─ MergingAggregated Est. ? rows, cost 3.200000e-01 - └─ Repartition Exchange Est. ? rows, cost 3.200000e-01 + └─ MergingAggregated Est. ? rows, cost 2.400000e-01 + └─ Repartition Exchange Est. ? rows, cost 2.400000e-01 │ Partition by: {expr#multiIf(arraySetCheck(splitByChar(\',\', mapElement(string_params, \'local_life_deboost_ab_list\')), \'33\'), \'AA\', \'-1\')} └─ Aggregating Est. ? rows, cost 0.000000e+00 │ Group by: {expr#multiIf(arraySetCheck(splitByChar(\',\', mapElement(string_params, \'local_life_deboost_ab_list\')), \'33\'), \'AA\', \'-1\')} From 08160be04ba7e83fffe2f51fff0381a20adabe6e Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:09:09 +0000 Subject: [PATCH 006/292] Merge 'fix-access-update-on-remote-servers-2p2' into 'cnch-2.2' fix(clickhousech@m-3012525130): Fix entity updates on remote servers See merge request: !22490 --- src/Access/KVAccessStorage.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Access/KVAccessStorage.cpp b/src/Access/KVAccessStorage.cpp index 191d6db3f4d..0abc7493877 100644 --- a/src/Access/KVAccessStorage.cpp +++ b/src/Access/KVAccessStorage.cpp @@ -373,6 +373,8 @@ UUID KVAccessStorage::updateCache(EntityType type, const AccessEntityModel & ent // Always get entity from KV to ensure that we have the most updated Entity at all times std::optional KVAccessStorage::findImpl(EntityType type, const String & name) const { + Notifications notifications; + SCOPE_EXIT({ notify(notifications); }); auto entity_model = catalog->tryGetAccessEntity(type, name); if (!entity_model) @@ -385,7 +387,7 @@ std::optional KVAccessStorage::findImpl(EntityType type, const String & na return std::nullopt; } - return updateCache(type, *entity_model, nullptr); + return updateCache(type, *entity_model, ¬ifications); } From 895413305e5bbfcc26b2f952e6501c5ef454a11c Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:09:27 +0000 Subject: [PATCH 007/292] Merge 'revoke-if-2p2' into 'cnch-2.2' fix(clickhousech@m-17322645): Implement REVOKE IF EXISTS for RBAC See merge request: !22510 --- src/Access/AccessRights.cpp | 117 ++++++++++++------ src/Access/AccessRights.h | 17 ++- src/Interpreters/InterpreterGrantQuery.cpp | 12 +- src/Parsers/ASTGrantQuery.h | 1 + src/Parsers/ParserGrantQuery.cpp | 4 + .../60000_revoke_if.reference | 1 + .../4_cnch_stateless/60000_revoke_if.sql | 16 +++ 7 files changed, 125 insertions(+), 43 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/60000_revoke_if.reference create mode 100644 tests/queries/4_cnch_stateless/60000_revoke_if.sql diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index cbbf5cda06d..8b0856e57a8 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -275,30 +275,56 @@ struct AccessRightsBase::Node calculateMinMaxFlags(); } + template void revoke(const AccessFlags & flags_) { removeGrantsRec(flags_); optimizeTree(); } - template + template void revoke(const AccessFlags & flags_, const std::string_view & name, const Args &... subnames) { - auto & child = getChild(name); + if constexpr (if_exists) + { + auto * child = tryGetChild(name); - child.revoke(flags_, subnames...); - eraseChildIfPossible(child); + if (!child) + return; + + child->template revoke(flags_, subnames...); + eraseChildIfPossible(*child); + } + else + { + auto & child = getChild(name); + + child.template revoke(flags_, subnames...); + eraseChildIfPossible(child); + } calculateMinMaxFlags(); } - template + template void revoke(const AccessFlags & flags_, const std::vector & names) { for (const auto & name : names) { - auto & child = getChild(name); - child.revoke(flags_); - eraseChildIfPossible(child); + if constexpr (if_exists) + { + auto * child = tryGetChild(name); + if (!child) + continue; + + child->template revoke(flags_); + eraseChildIfPossible(*child); + } + else + { + auto & child = getChild(name); + child.template revoke(flags_); + eraseChildIfPossible(child); + } } calculateMinMaxFlags(); } @@ -930,14 +956,14 @@ void AccessRightsBase::grantWithGrantOption(const AccessRightsEleme template -template +template void AccessRightsBase::revokeImpl(const AccessFlags & flags, const Args &... args) { auto helper = [&](std::unique_ptr & root_node) { if (!root_node) return; - root_node->revoke(flags, args...); + root_node->template revoke(flags, args...); if (!root_node->flags && !root_node->children) root_node = nullptr; }; @@ -948,78 +974,95 @@ void AccessRightsBase::revokeImpl(const AccessFlags & flags, const } template -template +template void AccessRightsBase::revokeImplHelper(const AccessRightsElement & element) { assert(!element.grant_option || grant_option); if (element.any_database) - revokeImpl(element.access_flags); + revokeImpl(element.access_flags); else if (element.any_table) - revokeImpl(element.access_flags, element.database); + revokeImpl(element.access_flags, element.database); else if (element.any_column) - revokeImpl(element.access_flags, element.database, element.table); + revokeImpl(element.access_flags, element.database, element.table); else - revokeImpl(element.access_flags, element.database, element.table, element.columns); + revokeImpl(element.access_flags, element.database, element.table, element.columns); } template -template +template void AccessRightsBase::revokeImpl(const AccessRightsElement & element) { if constexpr (grant_option) { - revokeImplHelper(element); + revokeImplHelper(element); } else { if (element.grant_option) - revokeImplHelper(element); + revokeImplHelper(element); else - revokeImplHelper(element); + revokeImplHelper(element); } } template -template +template void AccessRightsBase::revokeImpl(const AccessRightsElements & elements) { for (const auto & element : elements) - revokeImpl(element); + revokeImpl(element); } template -void AccessRightsBase::revoke(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRightsBase::revoke(const AccessFlags & flags) { revokeImpl(flags); } +template +void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +template +void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +template +void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +template +void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +template +void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +template +void AccessRightsBase::revoke(const AccessRightsElement & element) { revokeImpl(element); } +template +void AccessRightsBase::revoke(const AccessRightsElements & elements) { revokeImpl(elements); } + +template +void AccessRightsBase::tryRevoke(const AccessFlags & flags) { revokeImpl(flags); } template -void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRightsBase::tryRevoke(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } template -void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRightsBase::tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } template -void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRightsBase::tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } template -void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRightsBase::tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } template -void AccessRightsBase::revoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRightsBase::tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } template -void AccessRightsBase::revoke(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRightsBase::tryRevoke(const AccessRightsElement & element) { revokeImpl(element); } template -void AccessRightsBase::revoke(const AccessRightsElements & elements) { revokeImpl(elements); } +void AccessRightsBase::tryRevoke(const AccessRightsElements & elements) { revokeImpl(elements); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags) { revokeImpl(flags); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags) { revokeImpl(flags); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database) { revokeImpl(flags, database); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table) { revokeImpl(flags, database, table); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) { revokeImpl(flags, database, table, column); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) { revokeImpl(flags, database, table, columns); } template -void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } +void AccessRightsBase::revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) { revokeImpl(flags, database, table, columns); } template -void AccessRightsBase::revokeGrantOption(const AccessRightsElement & element) { revokeImpl(element); } +void AccessRightsBase::revokeGrantOption(const AccessRightsElement & element) { revokeImpl(element); } template -void AccessRightsBase::revokeGrantOption(const AccessRightsElements & elements) { revokeImpl(elements); } +void AccessRightsBase::revokeGrantOption(const AccessRightsElements & elements) { revokeImpl(elements); } template diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index 7deb076048c..2eeea796a90 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -68,6 +68,15 @@ class AccessRightsBase void revoke(const AccessRightsElement & element); void revoke(const AccessRightsElements & elements); + void tryRevoke(const AccessFlags & flags); + void tryRevoke(const AccessFlags & flags, const std::string_view & database); + void tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); + void tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column); + void tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns); + void tryRevoke(const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns); + void tryRevoke(const AccessRightsElement & element); + void tryRevoke(const AccessRightsElements & elements); + void revokeGrantOption(const AccessFlags & flags); void revokeGrantOption(const AccessFlags & flags, const std::string_view & database); void revokeGrantOption(const AccessFlags & flags, const std::string_view & database, const std::string_view & table); @@ -117,16 +126,16 @@ class AccessRightsBase template void grantImplHelper(const AccessRightsElement & element); - template + template void revokeImpl(const AccessFlags & flags, const Args &... args); - template + template void revokeImpl(const AccessRightsElement & element); - template + template void revokeImpl(const AccessRightsElements & elements); - template + template void revokeImplHelper(const AccessRightsElement & element); diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index c733cb0c992..b3472ee3096 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -32,8 +32,16 @@ namespace { if (query.is_revoke) { - grantee.access.revoke(query.access_rights_elements); - grantee.sensitive_access.revoke(query.access_rights_elements); + if (query.if_exists) + { + grantee.access.tryRevoke(query.access_rights_elements); + grantee.sensitive_access.tryRevoke(query.access_rights_elements); + } + else + { + grantee.access.revoke(query.access_rights_elements); + grantee.sensitive_access.revoke(query.access_rights_elements); + } } else { diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/ASTGrantQuery.h index 4964e12074b..bc56cb26b45 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/ASTGrantQuery.h @@ -42,6 +42,7 @@ class ASTGrantQuery : public IAST public: bool attach_mode = false; bool is_revoke = false; + bool if_exists = false; AccessRightsElements access_rights_elements; std::shared_ptr roles; bool admin_option = false; diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index e6fb940ca04..1b6f8c1e631 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -244,10 +244,13 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) // String cluster; // parseOnCluster(pos, expected, cluster); + bool if_exists = false; bool grant_option = false; bool admin_option = false; if (is_revoke) { + if (ParserKeyword{"IF EXISTS"}.ignore(pos, expected)) + if_exists = true; if (ParserKeyword{"GRANT OPTION FOR"}.ignore(pos, expected)) grant_option = true; else if (ParserKeyword{"ADMIN OPTION FOR"}.ignore(pos, expected)) @@ -297,6 +300,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto query = std::make_shared(); node = query; + query->if_exists = if_exists; query->is_revoke = is_revoke; query->attach_mode = attach_mode; // query->cluster = std::move(cluster); diff --git a/tests/queries/4_cnch_stateless/60000_revoke_if.reference b/tests/queries/4_cnch_stateless/60000_revoke_if.reference new file mode 100644 index 00000000000..bb6ee7d8dab --- /dev/null +++ b/tests/queries/4_cnch_stateless/60000_revoke_if.reference @@ -0,0 +1 @@ +GRANT ALL ON *.* TO `1234.test_user` diff --git a/tests/queries/4_cnch_stateless/60000_revoke_if.sql b/tests/queries/4_cnch_stateless/60000_revoke_if.sql new file mode 100644 index 00000000000..bc1d8be8db7 --- /dev/null +++ b/tests/queries/4_cnch_stateless/60000_revoke_if.sql @@ -0,0 +1,16 @@ +-- Create a test user +CREATE USER IF NOT EXISTS test_user IDENTIFIED BY 'password'; + +-- Grant all privileges on all databases and tables to the test user +GRANT ALL ON *.* TO test_user; +GRANT SELECT ON db.tbl TO test_user; + +-- Revoke SELECT on a specific database and table only exactly matches +REVOKE IF EXISTS SELECT ON db.tbl FROM test_user; + +-- Show the grants for the test user again +SHOW GRANTS FOR test_user; +SELECT * FROM system.sensitive_grants where user_name like '%test_user' FORMAT CSV; + +-- Clean up - drop the test user after the test +DROP USER IF EXISTS test_user; From 13013da2bc6a69297e78868ed821e02c0bac1434 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:09:46 +0000 Subject: [PATCH 008/292] Merge branch 'cherry-pick-88cf625f' into 'cnch-2.2' fix(clickhousech@m-4532738647): [cp] cnch 2.2 fix partition selector read server part log See merge request dp/ClickHouse!22477 --- src/CloudServices/CnchBGThreadPartitionSelector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CloudServices/CnchBGThreadPartitionSelector.cpp b/src/CloudServices/CnchBGThreadPartitionSelector.cpp index 0766340e8ab..29edcefa173 100644 --- a/src/CloudServices/CnchBGThreadPartitionSelector.cpp +++ b/src/CloudServices/CnchBGThreadPartitionSelector.cpp @@ -48,7 +48,7 @@ CnchBGThreadPartitionSelector::CnchBGThreadPartitionSelector(ContextMutablePtr g if (!res) break; - auto * col_uuid = checkAndGetColumn(*res.getByName("uuid").column); + auto * col_uuid = checkAndGetColumn(*res.getByName("uuid").column); auto * col_partition = checkAndGetColumn(*res.getByName("partition_id").column); auto * col_insert = checkAndGetColumn(*res.getByName("insert_parts").column); auto * col_insert_time = checkAndGetColumn(*res.getByName("last_insert_time").column); From 076051aad42eff87b6faefde4cced6b7f1a1ad0d Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:10:09 +0000 Subject: [PATCH 009/292] Merge 'cherry-pick-fix-recluster-2.2' into 'cnch-2.2' fix(clickhousech@m-3892771768): [cp] Fix recluster partition and cluster_status See merge request: !22531 --- src/Catalog/Catalog.cpp | 15 +- src/Catalog/Catalog.h | 3 +- src/CloudServices/CnchMergeMutateThread.cpp | 209 ++++++++++-------- src/CloudServices/CnchMergeMutateThread.h | 3 +- .../CnchTablePartitionMetricsHelper.cpp | 23 +- src/Storages/PartCacheManager.cpp | 8 +- src/Storages/PartCacheManager.h | 2 +- src/Storages/PartitionCommands.cpp | 4 + src/Storages/StorageCnchMergeTree.cpp | 2 +- src/Storages/TableDefinitionHash.h | 2 +- src/Storages/TableMetaEntry.h | 3 +- .../tests/gtest_part_storage_cache.cpp | 2 +- 12 files changed, 142 insertions(+), 134 deletions(-) diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index da4650d4627..15d49c97a91 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -1145,7 +1145,7 @@ namespace Catalog // Set cluster status after Alter table is successful to update PartCacheManager with new table metadata if (is_modify_cluster_by) - setTableClusterStatus(storage->getStorageUUID(), false, new_table->getTableHashForClusterBy().getDeterminHash()); + setTableClusterStatus(storage->getStorageUUID(), false, new_table->getTableHashForClusterBy()); if (auto cache_manager = context.getPartCacheManager(); cache_manager) { @@ -2381,7 +2381,7 @@ namespace Catalog { if (!part->deleted && !table_definition_hash.match(part->table_definition_hash)) { - setTableClusterStatus(storage->getStorageUUID(), false, table_definition_hash.getDeterminHash()); + setTableClusterStatus(storage->getStorageUUID(), false, table_definition_hash); break; } } @@ -3462,7 +3462,7 @@ namespace Catalog { if (!part->deleted && !table_definition_hash.match(part->table_definition_hash)) { - setTableClusterStatus(table->getStorageUUID(), false, table_definition_hash.getDeterminHash()); + setTableClusterStatus(table->getStorageUUID(), false, table_definition_hash); break; } } @@ -5169,6 +5169,7 @@ namespace Catalog void Catalog::createMutation(const StorageID & storage_id, const String & mutation_name, const String & mutate_text) { + LOG_TRACE(log, "createMutation: {}, {}", storage_id.getNameForLogs(), mutation_name); runWithMetricSupport( [&] { meta_proxy->createMutation(name_space, UUIDHelpers::UUIDToString(storage_id.uuid), mutation_name, mutate_text); }, ProfileEvents::CreateMutationSuccess, @@ -5177,6 +5178,7 @@ namespace Catalog void Catalog::removeMutation(const StorageID & storage_id, const String & mutation_name) { + LOG_TRACE(log, "removeMutation: {}, {}", storage_id.getNameForLogs(), mutation_name); runWithMetricSupport( [&] { meta_proxy->removeMutation(name_space, UUIDHelpers::UUIDToString(storage_id.uuid), mutation_name); }, ProfileEvents::RemoveMutationSuccess, @@ -5223,14 +5225,15 @@ namespace Catalog } } - void Catalog::setTableClusterStatus(const UUID & table_uuid, const bool clustered, const UInt64 & table_definition_hash) + void Catalog::setTableClusterStatus(const UUID & table_uuid, const bool clustered, const TableDefinitionHash & table_definition_hash) { + LOG_TRACE(log, "setTableClusterStatus: {} to {}", UUIDHelpers::UUIDToString(table_uuid), clustered); runWithMetricSupport( [&] { - meta_proxy->setTableClusterStatus(name_space, UUIDHelpers::UUIDToString(table_uuid), clustered, table_definition_hash); + meta_proxy->setTableClusterStatus(name_space, UUIDHelpers::UUIDToString(table_uuid), clustered, table_definition_hash.getDeterminHash()); /// keep the cache status up to date. if (context.getPartCacheManager()) - context.getPartCacheManager()->setTableClusterStatus(table_uuid, clustered); + context.getPartCacheManager()->setTableClusterStatus(table_uuid, clustered, table_definition_hash); }, ProfileEvents::SetTableClusterStatusSuccess, ProfileEvents::SetTableClusterStatusFailed); diff --git a/src/Catalog/Catalog.h b/src/Catalog/Catalog.h index ec3d2ffc64b..1f33dfddf36 100644 --- a/src/Catalog/Catalog.h +++ b/src/Catalog/Catalog.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include "common/types.h" @@ -647,7 +648,7 @@ class Catalog std::multimap getAllMutations(); void fillMutationsByStorage(const StorageID & storage_id, std::map & out_mutations); - void setTableClusterStatus(const UUID & table_uuid, const bool clustered, const UInt64 & table_definition_hash); + void setTableClusterStatus(const UUID & table_uuid, const bool clustered, const TableDefinitionHash & table_definition_hash); void getTableClusterStatus(const UUID & table_uuid, bool & clustered); bool isTableClustered(const UUID & table_uuid); diff --git a/src/CloudServices/CnchMergeMutateThread.cpp b/src/CloudServices/CnchMergeMutateThread.cpp index b957eb6b28c..ad7c72e0c27 100644 --- a/src/CloudServices/CnchMergeMutateThread.cpp +++ b/src/CloudServices/CnchMergeMutateThread.cpp @@ -61,8 +61,12 @@ namespace constexpr auto DELAY_SCHEDULE_TIME_IN_SECOND = 60ul; constexpr auto DELAY_SCHEDULE_RANDOM_TIME_IN_SECOND = 3ul; - bool needMutate(const ServerDataPartPtr & part, const TxnTimestamp & commit_ts, bool change_schema) + bool needMutate(const ServerDataPartPtr & part, const TxnTimestamp & commit_ts, bool change_schema, bool is_recluster, const TableDefinitionHash & table_definition_hash) { + if (is_recluster) + { + return !table_definition_hash.match(part->part_model().table_definition_hash()); + } /// Some mutation commands (@see MutationCommands::changeSchema()) will not change the table schema /// which means it will not update columns_commit_time. To track those mutation commands, /// we add a new field `mutation_commit_time` in part metadata. And it's set to 0 for a new part by default. @@ -79,7 +83,7 @@ namespace TxnTimestamp getFirstMutation(const ServerDataPartPtr & part, const std::vector> & mutations) { for (const auto & [commit_time, change_schema] : mutations) - if (needMutate(part, commit_time, change_schema)) + if (needMutate(part, commit_time, change_schema, false, {})) return commit_time; return TxnTimestamp::maxTS(); } @@ -1248,10 +1252,7 @@ ClusterTaskProgress CnchMergeMutateThread::getReclusteringTaskProgress() if (partition_list.empty()) return cluster_task_progress; - if (!scheduled_mutation_partitions.empty()) - cluster_task_progress.progress = (scheduled_mutation_partitions.size() / static_cast(partition_list.size())) * 100; - else if (!finish_mutation_partitions.empty()) - cluster_task_progress.progress = (finish_mutation_partitions.size() / static_cast(partition_list.size())) * 100; + cluster_task_progress.progress = (finish_mutation_partitions.size() / static_cast(partition_list.size())) * 100; cluster_task_progress.start_time_seconds = current_mutate_entry->commit_time.toSecond(); return cluster_task_progress; } @@ -1291,6 +1292,19 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer std::lock_guard lock(try_mutate_parts_mutex); auto merging_mutating_parts_snapshot = copyCurrentlyMergingMutatingParts(); + auto finish_current_mutation = [this, &lock, &storage]() + { + if (!current_mutate_entry) + return; + + removeMutationEntryFromKV(*current_mutate_entry, lock); + storage.removeMutationEntry(current_mutate_entry->commit_time); + + scheduled_mutation_partitions.clear(); + finish_mutation_partitions.clear(); + current_mutate_entry.reset(); + }; + /// Fetch mutation entries from KV. std::map current_mutations_by_version; auto catalog = getContext()->getCnchCatalog(); @@ -1316,10 +1330,33 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer current_mutate_entry = std::make_optional(entry_from_catalog); } - if (current_mutate_entry->isReclusterMutation() && !getContext()->getTableReclusterTaskStatus(storage_id)) - return false; + if (current_mutate_entry->isReclusterMutation()) + { + if (!getContext()->getTableReclusterTaskStatus(storage_id)) + { + LOG_TRACE(log, "recluster task is disabled for {}", storage_id.getNameForLogs()); + return false; + } + if (current_mutate_entry->columns_commit_time < storage.commit_time) + { + /// There is newer version storage, needs to check whether `cluster by` definition changed + + /// get specific version storage + auto entry_istorage = catalog->getTableByUUID(*getContext(), toString(storage_id.uuid), current_mutate_entry->columns_commit_time); + auto & entry_cnch_table = checkAndGetCnchTable(entry_istorage); + + if (entry_cnch_table.getTableHashForClusterBy() != storage.getTableHashForClusterBy()) + { + LOG_INFO(log, "recluster task {} is canceled due to newer version cluster by", current_mutate_entry->txn_id.toString()); + finish_current_mutation(); + return false; + } + } + } bool change_schema = current_mutate_entry->commands.changeSchema(); + bool is_recluster = current_mutate_entry->isReclusterMutation(); + auto table_definition_hash = storage.getTableHashForClusterBy(); /// Function to generating new tasks. Return true if we can still generate new tasks. auto generate_tasks = [&](const ServerDataPartsVector & visible_parts, const NameSet & snapshot) @@ -1330,74 +1367,38 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer size_t curr_mutate_part_size = 0; ServerDataPartsVector alter_parts; bool remain_tasks_in_partition = false; - String command_partition_id; - if (type == ManipulationType::Clustering) + for (const auto & part : visible_parts) { - auto mutation_command = current_mutate_entry->commands[0]; - if (mutation_command.partition) - command_partition_id = storage.getPartitionIDFromQuery(mutation_command.partition, getContext()); - else if (mutation_command.predicate) + if (!needMutate(part, commit_ts, change_schema, is_recluster, table_definition_hash)) + continue; + if (snapshot.count(part->name())) { - ServerDataPartsVector parts_to_recluster; - auto table_definition_hash = storage.getTableHashForClusterBy(); - for (const auto & part : visible_parts) - { - if (!table_definition_hash.match(part->part_model().table_definition_hash())) - parts_to_recluster.push_back(part); - } - - /// TODO: (vivek, zuochuang.zema) why not filter by columns_commit_time and mutation_commit_time? - alter_parts = storage.getServerPartsByPredicate( - mutation_command.predicate, - [&]{ return parts_to_recluster; }, - getContext()); + remain_tasks_in_partition = true; + continue; } - } - - if (alter_parts.empty()) - { - for (const auto & part : visible_parts) + remain_tasks_in_partition = true; + alter_parts.push_back(part); + curr_mutate_part_size += part->part_model().size(); + auto p_part = part->tryGetPreviousPart(); + while (p_part) { - if (!needMutate(part, commit_ts, change_schema)) - continue; - - if (snapshot.count(part->name())) - { - remain_tasks_in_partition = true; - continue; - } - - remain_tasks_in_partition = true; - - if (type == ManipulationType::Clustering - && command_partition_id != part->partition().getID(storage)) - continue; - - alter_parts.push_back(part); - curr_mutate_part_size += part->part_model().size(); - auto p_part = part->tryGetPreviousPart(); - while (p_part) - { - curr_mutate_part_size += p_part->part_model().size(); - p_part = p_part->tryGetPreviousPart(); - } - - /// Batch n parts in one task. - if (alter_parts.size() >= storage_settings->cnch_mutate_max_parts_to_mutate - || curr_mutate_part_size >= storage_settings->cnch_mutate_max_total_bytes_to_mutate) - { - submitFutureManipulationTask( - storage, - FutureManipulationTask(*this, type) - .setMutationEntry(*current_mutate_entry) - .assignSourceParts(std::move(alter_parts))); - - alter_parts.clear(); - curr_mutate_part_size = 0; - if (running_mutation_tasks >= storage.getSettings()->max_addition_mutation_task_num) - return true; - } + curr_mutate_part_size += p_part->part_model().size(); + p_part = p_part->tryGetPreviousPart(); + } + /// Batch n parts in one task. + if (alter_parts.size() >= storage_settings->cnch_mutate_max_parts_to_mutate + || curr_mutate_part_size >= storage_settings->cnch_mutate_max_total_bytes_to_mutate) + { + submitFutureManipulationTask( + storage, + FutureManipulationTask(*this, type) + .setMutationEntry(*current_mutate_entry) + .assignSourceParts(std::move(alter_parts))); + alter_parts.clear(); + curr_mutate_part_size = 0; + if (running_mutation_tasks >= storage.getSettings()->max_addition_mutation_task_num) + return true; } } @@ -1410,10 +1411,6 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer .setMutationEntry(*current_mutate_entry) .assignSourceParts(std::move(alter_parts))); } - else if (alter_parts.empty() && type == ManipulationType::Clustering) - { - remain_tasks_in_partition = false; - } return remain_tasks_in_partition; }; @@ -1424,12 +1421,12 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer const auto & commit_ts = current_mutate_entry->commit_time; for (const auto & part : visible_parts) { - if (needMutate(part, commit_ts, change_schema)) + if (needMutate(part, commit_ts, change_schema, is_recluster, table_definition_hash)) return false; } for (const auto & part : visible_staged_parts) { - if (needMutate(part, commit_ts, change_schema)) + if (needMutate(part, commit_ts, change_schema, is_recluster, table_definition_hash)) return false; } return true; @@ -1437,7 +1434,6 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer /// Step 1: generate mutations tasks for the earliest mutation entry. bool is_finish = true; - bool is_recluster_partition_finish = false; auto timestamp = getContext()->getTimestamp(); if (storage.getInMemoryMetadataPtr()->getPartitionKeyAST()) @@ -1470,7 +1466,7 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer visible_staged_parts = CnchPartsHelper::calcVisibleParts(staged_parts, false); } - if (check_all_done(visible_parts, visible_staged_parts) || is_recluster_partition_finish) + if (check_all_done(visible_parts, visible_staged_parts)) finish_mutation_partitions.emplace(partition_id); } /// Some parts are not scheduled, generate tasks for those parts @@ -1479,8 +1475,6 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer { LOG_TRACE(log, "No more mutation tasks for partition {}, mutation id: {}", partition_id, current_mutate_entry->txn_id); scheduled_mutation_partitions.emplace(partition_id); - if (current_mutate_entry->isReclusterMutation()) - is_recluster_partition_finish = true; } /// - if can't generate all tasks at this round, then go into next round. else @@ -1531,12 +1525,10 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer } } - removeMutationEntryFromKV(*current_mutate_entry, is_newest_cluster_mutation, lock); - storage.removeMutationEntry(commit_ts); + if (is_newest_cluster_mutation) + setTableClusterStatus(); - scheduled_mutation_partitions.clear(); - finish_mutation_partitions.clear(); - current_mutate_entry.reset(); + finish_current_mutation(); return false; } @@ -1557,6 +1549,7 @@ MergeTreeMutationStatusVector CnchMergeMutateThread::getAllMutationStatuses() TxnTimestamp curr_ts = context->tryGetTimestamp(__PRETTY_FUNCTION__); auto istorage = catalog->getTableByUUID(*context, UUIDHelpers::UUIDToString(storage_id.uuid), curr_ts); auto & storage = checkAndGetCnchTable(istorage); + auto table_definition_hash = storage.getTableHashForClusterBy(); auto all_parts = catalog->getAllServerDataParts(istorage, curr_ts, nullptr); auto visible_parts = CnchPartsHelper::calcVisibleParts(all_parts, false); @@ -1572,10 +1565,11 @@ MergeTreeMutationStatusVector CnchMergeMutateThread::getAllMutationStatuses() calcMutationPartitions(entry, istorage, storage); auto & partitions = entry.partition_ids.value(); bool change_schema = entry.commands.changeSchema(); + bool is_recluster = entry.isReclusterMutation(); for (auto & part : visible_parts) { bool partition_match = std::find(partitions.begin(), partitions.end(), part->info().partition_id) != partitions.end(); - if (partition_match && needMutate(part, commit_ts, change_schema)) + if (partition_match && needMutate(part, commit_ts, change_schema, is_recluster, table_definition_hash)) ++status.parts_to_do; } @@ -1593,7 +1587,40 @@ MergeTreeMutationStatusVector CnchMergeMutateThread::getAllMutationStatuses() return res; } -void CnchMergeMutateThread::removeMutationEntryFromKV(const CnchMergeTreeMutationEntry & entry, bool recluster_finish, std::lock_guard &) +void CnchMergeMutateThread::setTableClusterStatus() +{ + /// modify cluster status before removing recluster mutation entry. + LOG_DEBUG(log, "All reclusted tasks in table {} have been executed, check for cluster status", storage_id.getNameForLogs()); + + bool clustered = true; + auto istorage = getStorageFromCatalog(); + auto & storage = checkAndGetCnchTable(istorage); + auto table_definition_hash = storage.getTableHashForClusterBy(); + auto check_clustered = [&table_definition_hash](const ServerDataPartsVector & parts) + { + return std::all_of(parts.begin(), parts.end(), + [&table_definition_hash](const ServerDataPartPtr & part) { return table_definition_hash.match(part->part_model().table_definition_hash()); }); + }; + auto partition_ids = catalog->getPartitionIDs(istorage, nullptr); + for (const auto & partition_id : partition_ids) + { + auto parts = catalog->getServerDataPartsInPartitions(istorage, {partition_id}, TxnTimestamp::maxTS(), nullptr); + auto visible_parts = CnchPartsHelper::calcVisibleParts(parts, false); + ServerDataPartsVector visible_staged_parts; + if (storage.getInMemoryMetadataPtr()->hasUniqueKey()) + { + auto staged_parts = catalog->getStagedServerDataParts(istorage, TxnTimestamp::maxTS()); + visible_staged_parts = CnchPartsHelper::calcVisibleParts(staged_parts, false); + } + clustered = check_clustered(visible_parts) && check_clustered(visible_staged_parts); + if (!clustered) + break; + } + + catalog->setTableClusterStatus(storage.getStorageID().uuid, clustered, table_definition_hash); +} + +void CnchMergeMutateThread::removeMutationEntryFromKV(const CnchMergeTreeMutationEntry & entry, std::lock_guard &) { const auto & commit_time = entry.commit_time; /// FIXME: (zuochuang.zema) buggy: we don't touch active timestamp for insertion, @@ -1615,12 +1642,6 @@ void CnchMergeMutateThread::removeMutationEntryFromKV(const CnchMergeTreeMutatio return; } - /// modify cluster status before removing recluster mutation entry. - if (recluster_finish) - { - LOG_DEBUG(log, "Data parts are clustered in table {}.", storage_id.getNameForLogs()); - } - WriteBufferFromOwnString buf; entry.commands.writeText(buf); LOG_DEBUG(log, "Mutation {}(command: {}) has been done, will remove it from catalog.", commit_time, buf.str()); diff --git a/src/CloudServices/CnchMergeMutateThread.h b/src/CloudServices/CnchMergeMutateThread.h index ec5280bc771..60e9dbecc2f 100644 --- a/src/CloudServices/CnchMergeMutateThread.h +++ b/src/CloudServices/CnchMergeMutateThread.h @@ -199,6 +199,7 @@ class CnchMergeMutateThread : public ICnchBGThread void waitMutationFinish(UInt64 mutation_commit_time, UInt64 timeout_ms); MergeTreeMutationStatusVector getAllMutationStatuses(); ClusterTaskProgress getReclusteringTaskProgress(); + void setTableClusterStatus(); private: void preStart() override; @@ -217,7 +218,7 @@ class CnchMergeMutateThread : public ICnchBGThread String submitFutureManipulationTask(const StorageCnchMergeTree & storage, FutureManipulationTask & future_task, bool maybe_sync_task = false); // Mutate - void removeMutationEntryFromKV(const CnchMergeTreeMutationEntry & entry, bool recluster_finish, std::lock_guard &); + void removeMutationEntryFromKV(const CnchMergeTreeMutationEntry & entry, std::lock_guard &); void calcMutationPartitions(CnchMergeTreeMutationEntry & mutate_entry, StoragePtr & istorage, StorageCnchMergeTree & storage); bool tryMutateParts(StoragePtr & istorage, StorageCnchMergeTree & storage); diff --git a/src/Storages/CnchTablePartitionMetricsHelper.cpp b/src/Storages/CnchTablePartitionMetricsHelper.cpp index 08259c9685b..c621a5c8741 100644 --- a/src/Storages/CnchTablePartitionMetricsHelper.cpp +++ b/src/Storages/CnchTablePartitionMetricsHelper.cpp @@ -217,30 +217,18 @@ void CnchTablePartitionMetricsHelper::recalculateOrSnapshotPartitionsMetrics( return; auto cnch_catalog = getContext()->getCnchCatalog(); - /// Recalculate table level `is_fully_clustered` and `load_parts_by_partition`. + /// Recalculate table level `load_parts_by_partition`. { - auto is_fully_clustered = true; { size_t total_parts_number{0}; - // store table TDH used for comparison with part TDH as it table TDH may change during comparison - UInt64 current_table_definition_hash = table_meta_ptr->table_definition_hash.load(); auto & meta_partitions = table_meta_ptr->partitions; for (auto it = meta_partitions.begin(); it != meta_partitions.end(); it++) { auto & partition_info_ptr = *it; if (partition_info_ptr == nullptr || partition_info_ptr->metrics_ptr == nullptr) continue; - auto metrics_data = partition_info_ptr->metrics_ptr->read(); total_parts_number += metrics_data.total_parts_number; - if (!metrics_data.is_deleted) - { - auto is_partition_clustered = (metrics_data.is_single_table_definition_hash - && metrics_data.table_definition_hash == current_table_definition_hash) - && !metrics_data.has_bucket_number_neg_one; - if (!is_partition_clustered) - is_fully_clustered = false; - } } { auto lock = table_meta_ptr->writeLock(); @@ -248,16 +236,7 @@ void CnchTablePartitionMetricsHelper::recalculateOrSnapshotPartitionsMetrics( /// reset load_parts_by_partition if parts number of current table is less than 5 million; if (table_meta_ptr->load_parts_by_partition && total_parts_number < 5000000) table_meta_ptr->load_parts_by_partition = false; - - if (is_fully_clustered) - { - if (table_meta_ptr->is_clustered || current_table_definition_hash != table_meta_ptr->table_definition_hash) - is_fully_clustered = false; - } } - if (is_fully_clustered) - cnch_catalog->setTableClusterStatus( - UUIDHelpers::toUUID(table_meta_ptr->table_uuid), is_fully_clustered, current_table_definition_hash); } } diff --git a/src/Storages/PartCacheManager.cpp b/src/Storages/PartCacheManager.cpp index d8d47d5ee26..8816365db73 100644 --- a/src/Storages/PartCacheManager.cpp +++ b/src/Storages/PartCacheManager.cpp @@ -183,7 +183,7 @@ void PartCacheManager::mayUpdateTableMeta(const IStorage & storage, const PairIn getContext()->getCnchCatalog()->getTablePreallocateVW(storage.getStorageUUID(), meta_ptr->preallocate_vw); meta_loaded = true; } - meta_ptr->table_definition_hash = storage.getTableHashForClusterBy().getDeterminHash(); + meta_ptr->table_definition_hash = storage.getTableHashForClusterBy(); /// Needs make sure no other thread force reload UInt32 loading = CacheStatus::LOADING; meta_ptr->cache_status.compare_exchange_strong(loading, CacheStatus::LOADED); @@ -399,16 +399,14 @@ UInt64 PartCacheManager::getTableLastUpdateTime(const UUID & uuid) return last_update_time; } -void PartCacheManager::setTableClusterStatus(const UUID & uuid, const bool clustered) +void PartCacheManager::setTableClusterStatus(const UUID & uuid, const bool clustered, const TableDefinitionHash & table_definition_hash) { TableMetaEntryPtr table_entry = getTableMeta(uuid); if (table_entry) { auto lock = table_entry->writeLock(); table_entry->is_clustered = clustered; - auto table = getContext()->getCnchCatalog()->getTableByUUID(*getContext(), UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS()); - if (table) - table_entry->table_definition_hash = table->getTableHashForClusterBy().getDeterminHash(); + table_entry->table_definition_hash = table_definition_hash; } } diff --git a/src/Storages/PartCacheManager.h b/src/Storages/PartCacheManager.h index 6d4cfc17a70..e75627e8da7 100644 --- a/src/Storages/PartCacheManager.h +++ b/src/Storages/PartCacheManager.h @@ -67,7 +67,7 @@ class PartCacheManager : WithMutableContext bool getTableClusterStatus(const UUID & uuid); - void setTableClusterStatus(const UUID & uuid, bool clustered); + void setTableClusterStatus(const UUID & uuid, bool clustered, const TableDefinitionHash & table_definition_hash); void setTablePreallocateVW(const UUID & uuid, String vw); diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 840cbd3894e..9258f0c2251 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -373,6 +373,10 @@ std::string PartitionCommand::typeToString() const return "REPLACE PARTITION"; case PartitionCommand::Type::INGEST_PARTITION: return "INGEST PARTITION"; + case PartitionCommand::Type::RECLUSTER_PARTITION: + return "RECLUSTER PARTITION"; + case PartitionCommand::Type::RECLUSTER_PARTITION_WHERE: + return "RECLUSTER PARTITION WHERE"; default: throw Exception("Uninitialized partition command", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index d81f5f3a869..7510aa7feb1 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -2074,7 +2074,7 @@ void StorageCnchMergeTree::reclusterPartition(const PartitionCommand & command, mutation_commands.emplace_back(mutation_command); mutation_entry.commands = mutation_commands; mutation_entry.txn_id = query_context->getCurrentTransaction()->getPrimaryTransactionID().toUInt64(); - mutation_entry.commit_time = commit_time; + mutation_entry.commit_time = query_context->getTimestamp(); mutation_entry.columns_commit_time = commit_time; query_context->getCnchCatalog()->createMutation(getStorageID(), mutation_entry.txn_id.toString(), mutation_entry.toString()); } diff --git a/src/Storages/TableDefinitionHash.h b/src/Storages/TableDefinitionHash.h index 98db37fd579..343ce81e270 100644 --- a/src/Storages/TableDefinitionHash.h +++ b/src/Storages/TableDefinitionHash.h @@ -17,7 +17,7 @@ class TableDefinitionHash :determin_hash(determin_hash_), v1_hash(v1_hash_), v2_hash(v2_hash_), v1_quoted_hash(v1_quoted_hash_) {} bool operator==(const TableDefinitionHash & other) const { return this->determin_hash == other.determin_hash; } - bool match(UInt64 hash_value) + bool match(UInt64 hash_value) const { return hash_value == determin_hash || hash_value == v1_hash || hash_value == v2_hash || hash_value == v1_quoted_hash; } diff --git a/src/Storages/TableMetaEntry.h b/src/Storages/TableMetaEntry.h index f4ea2e0dd3c..d1d76ebd82e 100644 --- a/src/Storages/TableMetaEntry.h +++ b/src/Storages/TableMetaEntry.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -69,7 +70,7 @@ struct TableMetaEntry CacheVersion cache_version; bool is_clustered{true}; - std::atomic_uint64_t table_definition_hash{0}; + TableDefinitionHash table_definition_hash; String preallocate_vw; mutable RWLock meta_mutex; std::atomic_bool partition_metrics_loaded = false; diff --git a/src/Storages/tests/gtest_part_storage_cache.cpp b/src/Storages/tests/gtest_part_storage_cache.cpp index 0bffb25d3b1..f0dac09ffe6 100644 --- a/src/Storages/tests/gtest_part_storage_cache.cpp +++ b/src/Storages/tests/gtest_part_storage_cache.cpp @@ -476,7 +476,7 @@ TEST_F(CacheManagerTest, getAndSetStatus) { EXPECT_EQ(cache_manager->getTableClusterStatus(storage_1->getStorageUUID()), true); // Catalog is not initialized, but the value would still set. - EXPECT_THROW({ cache_manager->setTableClusterStatus(storage_1->getStorageUUID(), false); }, Exception); + EXPECT_NO_THROW({ cache_manager->setTableClusterStatus(storage_1->getStorageUUID(), false, storage_1->getTableHashForClusterBy()); }); EXPECT_EQ(cache_manager->getTableClusterStatus(storage_1->getStorageUUID()), false); From 0a1b13bbd9e8f855f640fc8fc2a19bca9518cf50 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:10:37 +0000 Subject: [PATCH 010/292] Merge '4616856596_cnch-2.2' into 'cnch-2.2' fix(optimizer@m-4616856596): fix MultipleDistinctAggregationToExpandAggregate bugs See merge request: !22537 --- ...leDistinctAggregationToExpandAggregate.cpp | 27 +- src/Optimizer/tests/gtest_base_plan_test.cpp | 2 +- .../tpcds/explains/tpcds100/q16.explain | 12 +- .../tpcds/explains/tpcds1000/q16.explain | 12 +- .../tpcds1000_not_show_stats/q16.explain | 4 +- .../explains/tpcds1000_sample/q16.explain | 12 +- .../48044_multiple_distinct_rewrite.reference | 5 + .../48044_multiple_distinct_rewrite.sql | 516 ++++++++++++++++++ 8 files changed, 558 insertions(+), 32 deletions(-) diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp index 317e640e266..92c8ada1488 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp @@ -34,12 +34,7 @@ const std::unordered_map MultipleDistinctAggregationToExpandAggr {"avgdistinct", "avgIf"}, {"maxdistinct", "maxIf"}, {"mindistinct", "minIf"}, - {"sumdistinct", "sumIf"}, - {"count", "anyIf"}, - {"max", "anyIf"}, - {"min", "anyIf"}, - {"avg", "anyIf"}, - {"sum", "anyIf"}}; + {"sumdistinct", "sumIf"}}; bool MultipleDistinctAggregationToExpandAggregate::hasNoDistinctWithFilterOrMask(const AggregatingStep & step) { @@ -268,10 +263,20 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan } // step 2 : add pre-compute aggregate + std::set keyset; + for(const String & key : step.getKeys()) + { + keyset.insert(key); + } + keyset.insert(group_id_symbol); + for(const String & distinct : distinct_arguments) + { + keyset.insert(distinct); + } + + // make sure keys remove duplicated value. Names keys; - keys.insert(keys.end(), step.getKeys().begin(), step.getKeys().end()); - keys.emplace_back(group_id_symbol); - keys.insert(keys.end(), distinct_arguments.begin(), distinct_arguments.end()); + keys.insert(keys.end(), keyset.begin(), keyset.end()); auto pre_agg_step = std::make_shared( expand_node->getStep()->getOutputStream(), @@ -363,7 +368,7 @@ MultipleDistinctAggregationToExpandAggregate::nonDistinctAggWithMask(const Aggre Array parameters = agg_desc.function->getParameters(); AggregateFunctionProperties properties; - String fun_remove_distinct = distinct_func_normal_func.at(Poco::toLower(agg_desc.function->getName())); + String fun = "anyIf"; /// in case count(*), agg_desc.function->getArgumentTypes() returns empty. /// anyIf requires 2 arguments @@ -372,7 +377,7 @@ MultipleDistinctAggregationToExpandAggregate::nonDistinctAggWithMask(const Aggre data_types.emplace_back(std::make_shared()); } - AggregateFunctionPtr new_agg_fun = AggregateFunctionFactory::instance().get(fun_remove_distinct, data_types, parameters, properties); + AggregateFunctionPtr new_agg_fun = AggregateFunctionFactory::instance().get(fun, data_types, parameters, properties); Names argument_names; argument_names.emplace_back(agg_desc.column_name); argument_names.emplace_back(mask_column); diff --git a/src/Optimizer/tests/gtest_base_plan_test.cpp b/src/Optimizer/tests/gtest_base_plan_test.cpp index f6f2a250995..d1d0a09b56b 100644 --- a/src/Optimizer/tests/gtest_base_plan_test.cpp +++ b/src/Optimizer/tests/gtest_base_plan_test.cpp @@ -76,7 +76,7 @@ BasePlanTest::BasePlanTest(const String & database_name_, const std::unordered_m setting_changes.emplace_back("enable_optimizer", true); setting_changes.emplace_back("enable_memory_catalog", true); - setting_changes.emplace_back("dialect_type", "ANSI"s); + setting_changes.emplace_back("dialect_type", "ANSI"); setting_changes.emplace_back("data_type_default_nullable", false); for (const auto & item : session_settings) diff --git a/tests/optimizers/tpcds/explains/tpcds100/q16.explain b/tests/optimizers/tpcds/explains/tpcds100/q16.explain index cdcc69cfea8..7b5c65848b0 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q16.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q16.explain @@ -10,13 +10,13 @@ Projection Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#uniqExact(cs_order_number):=AggNull(countIf)(cs_order_number,group_id_mask), expr#sum(cs_ext_ship_cost):=AggNull(anyIf)(expr#sum(cs_ext_ship_cost),group_id_mask_1), expr#sum(cs_net_profit):=AggNull(anyIf)(expr#sum(cs_net_profit),group_id_mask_1) - └─ Projection Est. 19547 rows + └─ Projection Est. 21719 rows │ Expressions: [cs_order_number, expr#sum(cs_ext_ship_cost), expr#sum(cs_net_profit)], group_id_mask:=group_id = 1, group_id_mask_1:=group_id = 0 - └─ MergingAggregated Est. 19547 rows - └─ Repartition Exchange Est. 19547 rows - │ Partition by: {group_id, cs_order_number} - └─ Aggregating Est. 19547 rows - │ Group by: {group_id, cs_order_number} + └─ MergingAggregated Est. 21719 rows + └─ Repartition Exchange Est. 21719 rows + │ Partition by: {cs_order_number, group_id} + └─ Aggregating Est. 21719 rows + │ Group by: {cs_order_number, group_id} │ Aggregates: expr#sum(cs_ext_ship_cost):=AggNull(sum)(cs_ext_ship_cost), expr#sum(cs_net_profit):=AggNull(sum)(cs_net_profit) └─ Expand Est. 21719 rows └─ Right Semi Join Est. 21719 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q16.explain b/tests/optimizers/tpcds/explains/tpcds1000/q16.explain index 8ba49f99564..6de98b277f3 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q16.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q16.explain @@ -10,13 +10,13 @@ Projection Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#uniqExact(cs_order_number):=AggNull(countIf)(cs_order_number,group_id_mask), expr#sum(cs_ext_ship_cost):=AggNull(anyIf)(expr#sum(cs_ext_ship_cost),group_id_mask_1), expr#sum(cs_net_profit):=AggNull(anyIf)(expr#sum(cs_net_profit),group_id_mask_1) - └─ Projection Est. 56241 rows + └─ Projection Est. 62491 rows │ Expressions: [cs_order_number, expr#sum(cs_ext_ship_cost), expr#sum(cs_net_profit)], group_id_mask:=group_id = 1, group_id_mask_1:=group_id = 0 - └─ MergingAggregated Est. 56241 rows - └─ Repartition Exchange Est. 56241 rows - │ Partition by: {group_id, cs_order_number} - └─ Aggregating Est. 56241 rows - │ Group by: {group_id, cs_order_number} + └─ MergingAggregated Est. 62491 rows + └─ Repartition Exchange Est. 62491 rows + │ Partition by: {cs_order_number, group_id} + └─ Aggregating Est. 62491 rows + │ Group by: {cs_order_number, group_id} │ Aggregates: expr#sum(cs_ext_ship_cost):=AggNull(sum)(cs_ext_ship_cost), expr#sum(cs_net_profit):=AggNull(sum)(cs_net_profit) └─ Expand Est. 62491 rows └─ Right Semi Join Est. 62491 rows diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q16.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q16.explain index 9edd659176f..1f3e3913777 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q16.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q16.explain @@ -14,9 +14,9 @@ Projection │ Expressions: [cs_order_number, expr#sum(cs_ext_ship_cost), expr#sum(cs_net_profit)], group_id_mask:=group_id = 1, group_id_mask_1:=group_id = 0 └─ MergingAggregated └─ Repartition Exchange - │ Partition by: {group_id, cs_order_number} + │ Partition by: {cs_order_number, group_id} └─ Aggregating - │ Group by: {group_id, cs_order_number} + │ Group by: {cs_order_number, group_id} │ Aggregates: expr#sum(cs_ext_ship_cost):=AggNull(sum)(cs_ext_ship_cost), expr#sum(cs_net_profit):=AggNull(sum)(cs_net_profit) └─ Expand └─ Right Semi Join diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q16.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q16.explain index 340071e2710..7976ddd7d33 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q16.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q16.explain @@ -10,13 +10,13 @@ Projection Est. 1 rows └─ Aggregating Est. 1 rows │ Group by: {} │ Aggregates: expr#uniqExact(cs_order_number):=AggNull(countIf)(cs_order_number,group_id_mask), expr#sum(cs_ext_ship_cost):=AggNull(anyIf)(expr#sum(cs_ext_ship_cost),group_id_mask_1), expr#sum(cs_net_profit):=AggNull(anyIf)(expr#sum(cs_net_profit),group_id_mask_1) - └─ Projection Est. 54540 rows + └─ Projection Est. 60601 rows │ Expressions: [cs_order_number, expr#sum(cs_ext_ship_cost), expr#sum(cs_net_profit)], group_id_mask:=group_id = 1, group_id_mask_1:=group_id = 0 - └─ MergingAggregated Est. 54540 rows - └─ Repartition Exchange Est. 54540 rows - │ Partition by: {group_id, cs_order_number} - └─ Aggregating Est. 54540 rows - │ Group by: {group_id, cs_order_number} + └─ MergingAggregated Est. 60601 rows + └─ Repartition Exchange Est. 60601 rows + │ Partition by: {cs_order_number, group_id} + └─ Aggregating Est. 60601 rows + │ Group by: {cs_order_number, group_id} │ Aggregates: expr#sum(cs_ext_ship_cost):=AggNull(sum)(cs_ext_ship_cost), expr#sum(cs_net_profit):=AggNull(sum)(cs_net_profit) └─ Expand Est. 60601 rows └─ Right Semi Join Est. 60601 rows diff --git a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference index c1088bcc012..179a06251d4 100644 --- a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference +++ b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference @@ -1 +1,6 @@ 5 9 4 3 +1 1 1 1 a +1 1 1 1 b +1 2 1 1 c +2 5 1 2 d +0 0 0 nan nan diff --git a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql index cd5b16d2bc4..b7d275a303e 100644 --- a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql +++ b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql @@ -14,5 +14,521 @@ insert into test1 values ('d', '5', 3); select count(*), sum(c), count(distinct a), count(distinct b) from test1; +-- test duplicate group by keys +select count(*), sum(c), count(distinct a), count(distinct b), a from test1 group by a order by a; +-- test std::out_of_range exception case +CREATE TABLE aeolus_data_table_8_352783_prod +( + `row_id_kmtq3k` Int64, + `p_date` Date, + `fx_account_id` Nullable(String), + `user_id` Nullable(String), + `product_code` Nullable(String), + `partner_user_id` Nullable(String), + `partner_code` Nullable(String), + `person_id` Nullable(String), + `register_phone` Nullable(String), + `account_status_code` Nullable(String), + `account_create_time` Nullable(String), + `account_delete_time` Nullable(String), + `credit_id` Nullable(String), + `out_credit_no` Nullable(String), + `fund_code` Nullable(String), + `auto_audit_status` Nullable(String), + `auto_deny_reason` Nullable(String), + `all_deny_reason` Nullable(String), + `manual_audit_status` Nullable(String), + `manual_deny_reason` Nullable(String), + `credit_apply_time` Nullable(String), + `credit_apply_pass_time` Nullable(String), + `user_source` Nullable(String), + `credit_create_time` Nullable(String), + `his_credit_cum` Nullable(Int32), + `his_credit_info` Array(Nullable(String)), + `identity` Nullable(String), + `name` Nullable(String), + `identity_province` Nullable(String), + `identity_city` Nullable(String), + `birth_date` Nullable(String), + `gender` Nullable(String), + `nation` Nullable(String), + `verified_create_time` Nullable(String), + `submit_host_user_id` Nullable(String), + `submit_host_app_id` Nullable(String), + `submit_host_device_id` Nullable(String), + `submit_create_time` Nullable(String), + `submit_source` Nullable(String), + `quota_id` Nullable(String), + `quota_amount` Nullable(Float64), + `used_amount` Nullable(Float64), + `freeze_amount` Nullable(Float64), + `risk_freeze_amount` Nullable(Float64), + `daily_rate` Nullable(Float64), + `annual_rate` Nullable(Float64), + `monthly_rate` Nullable(Float64), + `penalty_daily_rate` Nullable(Float64), + `bill_penalty_daily_rate` Nullable(Float64), + `loan_product_code` Nullable(String), + `card_num` Nullable(String), + `bank_list` Array(Nullable(String)), + `efc_temp_amount` Nullable(Float64), + `efc_temp_amount_begin_time` Nullable(String), + `efc_temp_amount_end_time` Nullable(String), + `last_temp_amount` Nullable(Float64), + `last_temp_amount_begin_time` Nullable(String), + `last_temp_amount_end_time` Nullable(String), + `last_temp_amount_status_code` Nullable(Float64), + `used_temp_amount` Nullable(Float64), + `last_trans_quota_time` Nullable(String), + `is_trans_quota` Nullable(String), + `verified_status_code` Nullable(String), + `ocr_status_code` Nullable(String), + `ocr_create_time` Nullable(String), + `avail_quota_amount` Nullable(Float64), + `is_order` Nullable(String), + `first_order_date` Nullable(String), + `first_order_amount` Nullable(Float64), + `first_order_type` Nullable(String), + `last_order_date` Nullable(String), + `last_order_type` Nullable(String), + `first_order_settle_date` Nullable(String), + `second_order_type` Nullable(String), + `order_num` Nullable(Int64), + `trade_amount_sum` Nullable(Float64), + `term_trade_amount_sum` Nullable(Float64), + `real_trade_amount_sum` Nullable(Float64), + `first_ecom_order_date` Nullable(String), + `first_ecom_order_amount_date` Nullable(Float64), + `first_ecom_order_type` Nullable(String), + `first_ecom_name_new` Nullable(String), + `first_ecom_vertical_category_new` Nullable(String), + `first_ecom_vertical_category_ka_new` Nullable(String), + `ecom_order_num` Nullable(Int64), + `ecom_trade_amount_sum` Nullable(Float64), + `ecom_term_trade_amount_sum` Nullable(Float64), + `ecom_real_trade_amount_sum` Nullable(Float64), + `last_ecom_order_date` Nullable(String), + `last_ecom_order_type` Nullable(String), + `first_credit_order_datedif` Nullable(Int64), + `last_settle_time` Nullable(String), + `overdue_status` Nullable(String), + `history_overdue_status` Nullable(String), + `zl_amount` Nullable(Int64), + `zl_amount_begin_time` Nullable(String), + `zl_amount_end_time` Nullable(String), + `zl_amount_id` Nullable(String), + `used_zl_amount` Nullable(Int64), + `his_is_issue_zl` Nullable(String), + `is_livedetect` Nullable(String), + `is_orc` Nullable(String), + `is_auth` Nullable(String), + `is_fourelement_dou` Nullable(String), + `is_submit` Nullable(String), + `is_amount_success` Nullable(String), + `fx_person_id` Nullable(String), + `cust_quality_by_house_es` Nullable(String), + `cust_quality_by_loan` Nullable(String), + `credit_amount_version` Nullable(String), + `cust_quality_by_house_origin` Nullable(String), + `cust_quality_by_hkyy_origin` Nullable(String), + `most_use_ip` Nullable(String), + `location_city` Nullable(String), + `credit_risk_level` Nullable(String), + `oceanum_v2` Nullable(String), + `cust_quality_by_hjovd_amt` Nullable(String), + `cust_quality_by_loan_orgcnt` Nullable(String), + `cust_quality_by_hkyy_es` Nullable(String), + `cust_quality_level_pboc` Nullable(String), + `location_province` Nullable(String), + `cust_quality_by_card_es` Nullable(String), + `first_term_order_date` Nullable(String), + `oceanum_v1` Nullable(String), + `author_id` Nullable(String), + `cust_quality_by_ocean_card` Nullable(String), + `cust_quality_by_revenue` Nullable(String), + `cust_quality_by_career` Nullable(String), + `cust_quality_by_td_d7_applynum` Nullable(String), + `location_city_level` Nullable(String), + `cust_quality_by_hjovd_cnt` Nullable(String), + `ip_list` Nullable(String), + `first_consume_order_date` Nullable(String), + `bank_phone` Nullable(String), + `cust_quality_by_edu_es` Nullable(String), + `cust_quality_by_card_origin` Nullable(String), + `zl_amount_version` Nullable(String), + `brand` Nullable(String), + `cust_quality_by_ocean_shop` Nullable(String), + `fico_score` Nullable(String), + `ip` Nullable(String), + `first_bill_date` Nullable(String), + `cust_consume_level` Nullable(String), + `temp_amount_version` Nullable(String), + `cust_quality_level_withoutpboc` Nullable(String), + `cust_quality_by_edu_origin` Nullable(String), + `last_use_ip` Nullable(String), + `first_pay_time` Nullable(String), + `first_lowest_repay_time` Nullable(String), + `douyin_pay_time` Nullable(String), + `first_douyin_pay_type` Nullable(String), + `first_bankcard_pay_time` Nullable(String), + `first_bill_to_term_time` Nullable(String), + `gmt_created` Nullable(String), + `is_dfq_super_white` Nullable(String), + `first_pay_type` Nullable(String), + `is_no_pwd` Nullable(String), + `term_order_num` Nullable(Int64), + `refund_consume_amt` Nullable(Float64), + `term_order_amt` Nullable(Float64), + `repaid_principal` Nullable(Float64), + `used_ratio` Nullable(Float64), + `refund_erase_service` Nullable(Float64), + `douyin_pay_amount` Nullable(Float64), + `consume_order_amt` Nullable(Float64), + `commodity_amount` Nullable(Float64), + `sum_bill_cnt` Nullable(Int64), + `refund_service` Nullable(Float64), + `repaid_penalty` Nullable(Float64), + `success_consume_order_amt` Nullable(Float64), + `onloan_principal_bal` Nullable(Float64), + `pay_amount` Nullable(Float64), + `his_overdue_amt` Nullable(Int64), + `term_repaid_service_fee` Nullable(Float64), + `his_overdue_days` Nullable(Int64), + `refund_term_amt` Nullable(Float64), + `success_term_order_num` Nullable(Int64), + `refund_term_cnt` Nullable(Float64), + `sum_principal_amt` Nullable(Float64), + `success_consume_order_num` Nullable(Int64), + `refund_consume_cnt` Nullable(Int64), + `cur_overdue_days` Nullable(Int64), + `refund_asset_preservation_fee` Nullable(Float64), + `consume_order_num` Nullable(Int64), + `refund_sum_cnt` Nullable(Int64), + `refund_penalty` Nullable(Float64), + `cur_overdue_amt` Nullable(Float64), + `normal_bill_cnt` Nullable(Int64), + `trade_order_amt` Nullable(Float64), + `trade_order_num` Nullable(Int64), + `refund_principal` Nullable(Float64), + `overdue_bill_cnt` Nullable(Int64), + `age` Nullable(Int64), + `no_pwd_quota_amount` Nullable(Float64), + `early_bill_cnt` Nullable(Int64), + `residence_city_level` Nullable(String), + `edu_degree` Nullable(String), + `user_group_label` Nullable(Int32), + `age_10` Nullable(String), + `residence_province_name` Nullable(String), + `occupation` Nullable(String), + `user_group_label_name` Nullable(String), + `credit_amount` Nullable(Int64), + `entry_ocr_num` Nullable(Int64), + `first_payment_installment_time` Nullable(String), + `first_entry_ocr_time` Nullable(String), + `repurchase_tag` Nullable(String), + `vertical_category_new` Nullable(String), + `vertical_category_new_dfq` Nullable(String), + `his_credit_apply_pass_cum` Nullable(Int32), + `first_credit_apply_time` Nullable(String), + `destroy_reason` Nullable(String), + `chinese_name` Nullable(String), + `second_category` Nullable(String), + `bind_fx_account_id` Nullable(String), + `first_credit_apply_pass_time` Nullable(String), + `is_used_zl_amount` Nullable(String), + `first_category` Nullable(String), + `second_tag` Nullable(String), + `ec_tag` Nullable(String), + `first_tag` Nullable(String), + `user_nr_ltv_level` Nullable(String), + `second_sec_tag` Nullable(String), + `account_rank_desc` Nullable(Int32), + `pre_record_annualrate` Nullable(String), + `pre_record_organization_id` Nullable(String), + `min_bind_gmt_created` Nullable(Int64), + `new_customer_dfq_order_num_30days_score` Nullable(Float64), + `old_pay_demand_score` Nullable(Float64), + `pre_record_dailyrate` Nullable(String), + `pre_record_temp_amount_begin_time` Nullable(String), + `new_pay_demand_score` Nullable(Float64), + `pre_record_billpenaltydailyrate` Nullable(String), + `min_nopw_open_time` Nullable(String), + `his_batch_no` Nullable(String), + `pre_record_temp_amount` Nullable(String), + `product_terms_price_monthly_rate` Nullable(Float64), + `new_customer_ecom_order_num_30days_score` Nullable(Float64), + `fxj_first_loan_req_pay_date` Nullable(String), + `is_fxj_per_overdue` Nullable(String), + `min_gmt_created` Nullable(Int64), + `submit_user_id` Nullable(String), + `is_pre_record_used` Nullable(Int64), + `pre_record_zx_amount` Nullable(String), + `pre_record_monthlyrate` Nullable(String), + `terms_price_monthly_rate` Nullable(Float64), + `pre_record_penaltydailyrate` Nullable(String), + `pre_record_expire_time` Nullable(Int64), + `ops_risk_lvl` Nullable(String), + `fxj_submit_source` Nullable(String), + `pre_record_credit_id` Nullable(String), + `first_order_second_voucher_type` Nullable(String), + `is_click_driver_banner` Nullable(String), + `pre_record_temp_amount_end_time` Nullable(String), + `new_customer_dfq_period_order_num_30days_score` Nullable(Float64), + `min_nopw_time` Nullable(String), + `is_fxj_quota_submit` Nullable(String), + `pre_record_zl_amount_end_time` Nullable(String), + `min_debit_gmt_created` Nullable(String), + `first_order_first_voucher_type` Nullable(String), + `is_pre_record_bind_after_qualified_flag` Nullable(String), + `pre_record_create_time` Nullable(Int64), + `his_voucher_name` Nullable(String), + `old_period_demand_score` Nullable(Float64), + `pre_record_zl_amount` Nullable(String), + `pre_record_zl_amount_begin_time` Nullable(String), + `pre_record_fund_code` Nullable(String), + `bill_lowest_repay_price_monthly_rate` Nullable(Float64), + `is_fxj_overdue` Nullable(String), + `pre_record_decision` Nullable(String), + `fxj_quota_begin_time` Nullable(String), + `new_period_demand_score` Nullable(Float64), + `pre_record_amount` Nullable(String), + `fxj_quota_submit_time` Nullable(String), + `fxj_quota_amount` Nullable(String), + `prior_repay_open_time_first` Nullable(String), + `is_prior_repay` Nullable(Int32), + `prior_repay_open_time_last` Nullable(String), + `is_out_assets_credit` Nullable(String), + `last_pay_type` Nullable(String), + `source_name` Nullable(String), + `out_assets_first_apply_time` Nullable(String), + `org_list` Nullable(String), + `payment_installment_cnt` Nullable(String), + `prior_repay_close_time_first` Nullable(String), + `out_assets_credit_amount` Nullable(String), + `lowest_repay_cnt` Nullable(String), + `last_payment_installment_time` Nullable(String), + `org_num` Nullable(String), + `last_pay_time` Nullable(String), + `first_label` Nullable(String), + `bill_to_term_cnt` Nullable(String), + `disable_date` Nullable(String), + `last_lowest_repay_time` Nullable(String), + `out_assets_first_credit_time` Nullable(String), + `last_douyin_pay_time` Nullable(String), + `prior_repay_open_source_last` Nullable(String), + `prior_repay_close_time_last` Nullable(String), + `last_douyin_pay_type` Nullable(String), + `last_bill_to_term_time` Nullable(String), + `prior_repay_open_source_first` Nullable(String), + `disable_all_reason` Nullable(String), + `product_id` Nullable(String), + `shop_id` Nullable(String), + `first_label0` Nullable(String), + `room_id` Nullable(String), + `disable_date0` Nullable(String), + `order_id` Nullable(String), + `disable_all_reason0` Nullable(String), + `livedetect_status_code` Nullable(String), + `before_credit_bytepay_sensitive_v3_score` Nullable(String), + `user_loyal_zfb_sub_pay_type` Nullable(String), + `user_loyal_pay_type` Nullable(String), + `contract_sign_time` Nullable(String), + `is_sign_contract` Nullable(String), + `before_credit_ds_ttpay_30_cnt` Nullable(String), + `before_credit_pay_type` Nullable(String), + `user_loyal_wx_sub_pay_type` Nullable(String), + `before_credit_ds_yhk_30_cnt` Nullable(String), + `before_credit_ds_dyzf_30_cnt` Nullable(String), + `before_credit_old_pay_bytepay_score` Nullable(String), + `livedetect_time` Nullable(Int64), + `user_loyal_dy_sub_pay_type` Nullable(String), + `before_credit_new_pay_demand_score` Nullable(String), + `before_credit_ds_dyzf_30_rate` Nullable(String), + `credit_user_nr_ltv_level` Nullable(String), + `after_30_ecom_amount_sum` Nullable(Float64), + `before_30_ecom_amount_sum` Nullable(Float64), + `first_ecom_amount_sum` Nullable(Float64), + `first_ecom_order_num` Nullable(String), + `before_credit_30_ecom_amount_sum` Nullable(Float64), + `after_30_ecom_order_num` Nullable(String), + `before_30_ecom_order_num` Nullable(String), + `ssls_amount` Nullable(Int64), + `ssls_used_amount` Nullable(Int64), + `is_grant_ssls` Nullable(Int32), + `first_no_pwd_quota` Nullable(Int64), + `last_close_no_pwd_time` Nullable(String), + `card_bind_source` Nullable(String), + `first_no_pwd_source` Nullable(String), + `first_close_no_pwd_time` Nullable(String), + `is_bind_card_1day_before_credit` Nullable(String), + `cur_act_ssls_used_amount` Nullable(Int64), + `all_ssls_amount` Nullable(Int64), + `all_act_ssls_used_amount` Nullable(Int64), + `act_ssls_amount` Nullable(Int64), + `cur_ssls_used_amount` Nullable(Int64), + `fxj_ocr_status_code` Nullable(String), + `first_dfq_ocr_create_time` Nullable(String), + `first_ocr_product` Nullable(String), + `first_dfq_ocr_update_time` Nullable(String), + `first_ocr_create_time` Nullable(String), + `first_fxj_ocr_create_time` Nullable(String), + `is_repay_date_modify` Nullable(Int64), + `target_billing_day` Nullable(Int64), + `target_repay_day` Nullable(Int64), + `origin_repay_day` Nullable(Int64), + `last_repay_date_modify_time` Nullable(Int64), + `origin_billing_day` Nullable(Int64), + `repay_date_modify_num` Nullable(String), + `all_lifecycle_first` Nullable(String), + `ecom_lifecycle_first` Nullable(String), + `all_lifecycle_third` Nullable(String), + `all_lifecycle_second` Nullable(String), + `ecom_lifecycle_second` Nullable(String), + `ecom_lifecycle_third` Nullable(String), + `last_credit_source` Nullable(String), + `cur_credit_bank_num` Nullable(String), + `user_loyal_wx_sub_pay_type_new` Nullable(String), + `first_credit_source` Nullable(String), + `credit_before_debit_bank_num` Nullable(String), + `user_loyal_dy_sub_pay_type_new` Nullable(String), + `is_bind_card_debit_1day_before_credit` Nullable(String), + `first_debit_source` Nullable(String), + `min_credit_after_source` Nullable(String), + `user_loyal_pay_type_new` Nullable(String), + `is_bind_card_credit_1day_before_credit` Nullable(String), + `user_loyal_zfb_sub_pay_type_new` Nullable(String), + `cur_debit_bank_num` Nullable(String), + `last_yq_time` Nullable(String), + `credit_before_credit_bank_num` Nullable(String), + `min_credit_after_gmt_created` Nullable(String), + `last_debit_source` Nullable(String), + `age_5` Nullable(String), + `consumption_level` Nullable(String), + `user_demo_group` Nullable(String), + `recommend_age` Nullable(String), + `consumption_level_new` Nullable(String), + `is_mall_new_user` Nullable(Int64), + `cust_flag` Nullable(String), + `first_un_payment_installment_time` Nullable(String), + `last_installment_time` Nullable(String), + `last_un_payment_installment_time` Nullable(String), + `first_yq_time` Nullable(String), + `cur_xels_used_amount` Nullable(String), + `all_xels_amount` Nullable(String), + `effect_card_num` Nullable(String), + `is_bind_1day_before_date_effect` Nullable(String), + `xels_used_amount` Nullable(String), + `is_bind_card_1day_before_credit_effect` Nullable(String), + `xels_amount` Nullable(String), + `first_quota_lock_time` Nullable(String), + `cur_act_xels_used_amount` Nullable(String), + `lock_status` Nullable(Int64), + `is_quota_locked_ever` Nullable(Int64), + `is_quota_locked` Nullable(Int64), + `is_bind_debit_card_1day_before_credit_effect` Nullable(String), + `last_quota_lock_time` Nullable(String), + `all_act_xels_used_amount` Nullable(String), + `act_xels_amount` Nullable(String), + `uid_rank_desc` Nullable(String), + `ranks` Nullable(String), + `cust_flag0` Nullable(String), + `uid_rank_asc` Nullable(String), + `uid_min_credit_apply_pass_time` Nullable(String), + `uid_min_first_order_date` Nullable(String), + `first_defer_one_period_repay` Nullable(String), + `last_defer_one_period_repay` Nullable(String), + `period_installment_cnt` Nullable(String), + `is_allow_later_repay` Nullable(String), + `total_order_percentile_recent_90days` Nullable(String), + `last_quota_unlock_time` Nullable(String), + `first_installment_time` Nullable(String), + `first_quota_unlock_time` Nullable(String), + `total_amount_percentile_recent_90days` Nullable(String), + `rank_type` Nullable(String), + `installment_type` Nullable(String), + `bdsh_xy_30_trade_amount` Nullable(Int64), + `bdsh_xy_30_cnt` Nullable(Int64), + `total_ttpay_30_cnt` Nullable(Int64), + `bdsh_ttpay_30_cnt` Nullable(Int64), + `ds_xy_30_trade_amount` Nullable(Int64), + `ds_ttpay_30_cnt` Nullable(Int64), + `ds_ttpay_30_trade_amount` Nullable(Int64), + `total_xy_30_trade_amount` Nullable(Int64), + `total_ttpay_30_trade_amount` Nullable(Int64), + `total_xy_30_cnt` Nullable(Int64), + `bdsh_ttpay_30_trade_amount` Nullable(Int64), + `ds_xy_30_cnt` Nullable(Int64), + `repay_cnt_active` Nullable(Int64), + `repay_cnt` Nullable(Int64), + `repay_cnt_withhold` Nullable(Int64), + `repay_cnt_aheadall` Nullable(Int64), + `repay_cnt_early_settle` Nullable(Int64), + `repay_cnt_prepayment` Nullable(Int64), + `first_mx_date` Nullable(String), + `mx_cnt` Nullable(Int64), + `is_huabei` Nullable(String), + `residence_city_name` Nullable(String), + `is_master` Nullable(String), + `is_sign_loan` Nullable(String), + `after_first_trade_30_bdsh_creditpay_avg_amount` Nullable(Float64), + `after_first_trade_30_bdsh_cnt` Nullable(Int64), + `before_first_trade_30_bdsh_avg_amount` Nullable(Float64), + `before_first_trade_30_bdsh_cnt` Nullable(Int64), + `period12_discount_ratio` Nullable(Int64), + `period6_discount_ratio` Nullable(Int64), + `tx_amount` Nullable(Float64), + `last_mx_date` Nullable(String), + `tx_cnt` Nullable(Int64), + `last_tx_date` Nullable(String), + `period3_discount_ratio` Nullable(Int64), + `mx_amount` Nullable(Float64), + `is_sign_withhold` Nullable(Int32), + `first_tx_date` Nullable(String), + `first_bill_date0` Nullable(String), + `is_equal_phone_num` Nullable(Int32), + `period24_discount_ratio` Nullable(Int64), + `fxj_livedetect_status_code` Nullable(Int32), + `pay_livedetect_status_code` Nullable(Int32), + `first_subscription_time` Nullable(Int64), + `max_quota_limit_set_time` Nullable(Int64), + `first_subscription_source` Nullable(String), + `last_subscription_source` Nullable(String), + `backup_org_list` Array(Nullable(String)), + `min_quota_limit_set_time` Nullable(Int64), + `master_org_id` Nullable(String), + `pay_ocr_status_code` Nullable(Int32), + `quota_limit_amount` Nullable(String), + `last_subscription_time` Nullable(Int64), + `subscription_status` Nullable(Int32), + `is_quota_limit` Nullable(Int32), + `bind_card_before_loan_status` Nullable(Int64), + `backup_org_num` Nullable(Int32), + `term_risk_level` Nullable(String), + `cust_list_label` Nullable(String), + `dyyf_cust_seg_v3` Nullable(String), + `payment_installment_risk_level` Nullable(String), + `last_30_fffq_cnt` Nullable(String), + `dyyf_cust_seg` Nullable(String), + `credit_tag` Nullable(String), + `account_label_json` Nullable(String), + `pay_livedetect_sync_status` Nullable(Int32), + `payment_installment_lifecycle_type` Nullable(String), + `last_order_installment_time` Nullable(String), + `first_order_installment_time` Nullable(String), + `first_yq_type` Nullable(String), + `ua_creditpay_high_natural_will` Nullable(Float32), + `last_30d_main_page_pv` Nullable(Int64), + `last_30d_main_page_days` Nullable(Int64) +) +ENGINE = CnchMergeTree() order by row_id_kmtq3k; +SELECT + count(multiIf(isNotNull(credit_apply_pass_time) AND (auto_audit_status = '3'), submit_host_user_id, NULL)) AS _count_1700010380910, + countDistinct(multiIf(isNotNull(CAST(first_term_order_date, 'Nullable(Date)')), submit_host_user_id, NULL)) AS _1700018901243, + countDistinct(multiIf(isNotNull(CAST(to_date(first_payment_installment_time), 'Nullable(Date)')), submit_host_user_id, NULL)) AS _1700018805062, + countDistinct(multiIf(isNotNull(CAST(first_term_order_date, 'Nullable(Date)')) OR (CAST(first_yq_time, 'Nullable(Date)') > '2021-07-01'), submit_host_user_id, NULL)) / countDistinct(multiIf(auto_audit_status = '3', fx_account_id, NULL)) AS _1700021754761, + uniq(multiIf(dateDiff(CAST(first_order_date, 'Nullable(Date)'), CAST(first_term_order_date, 'Nullable(Date)')) <= 30, fx_account_id, NULL)) / uniq(multiIf(is_order = '1', multiIf(isNotNull(credit_apply_pass_time) AND (auto_audit_status = '3'), submit_host_user_id, NULL), NULL)) AS _1700018902349 +FROM aeolus_data_table_8_352783_prod +WHERE ((p_date >= '2024-06-05') AND (p_date <= '2024-06-05')) AND (auto_audit_status = '3') AND (uid_rank_desc = '1') +LIMIT 1000; \ No newline at end of file From d60c53fda53327e5980a0c86c23c04ef60b910b7 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:12:41 +0000 Subject: [PATCH 011/292] Merge 'leaf22' into 'cnch-2.2' fix(clickhousech@m-4549347174): count segments containing table scan in right way See merge request: !22548 # Conflicts: # .codebase/pipelines/ci.yaml --- src/Interpreters/DAGGraph.h | 9 ++++--- .../DistributedStages/BSPScheduler.cpp | 6 ++--- .../DistributedStages/PlanSegmentSplitter.cpp | 7 ++++-- .../DistributedStages/Scheduler.cpp | 24 +++++++++---------- .../DistributedStages/Scheduler.h | 6 ++--- src/Interpreters/NodeSelector.cpp | 8 +++---- src/Interpreters/NodeSelector.h | 2 +- src/Interpreters/SegmentScheduler.cpp | 16 ++++++------- 8 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/Interpreters/DAGGraph.h b/src/Interpreters/DAGGraph.h index 54896169d49..fb8880a7392 100644 --- a/src/Interpreters/DAGGraph.h +++ b/src/Interpreters/DAGGraph.h @@ -30,7 +30,7 @@ struct PlanSegmentsStatus }; using PlanSegmentsStatusPtr = std::shared_ptr; -using Source = std::unordered_set; +using SegmentIds = std::unordered_set; using WorkerInfoSet = std::unordered_set, HostWithPorts::IsSameEndpoint>; using PlanSegmentId = size_t; using StorageUnions = std::vector>; @@ -77,10 +77,9 @@ struct DAGGraph return source_pruner; } - /// all segments containing only table scan - Source sources; - /// all segments containing at least one table scan - Source any_tables; + SegmentIds leaf_segments; + /// all segments contain at least table scan + SegmentIds segments_has_table_scan; size_t final = std::numeric_limits::max(); std::set scheduled_segments; std::unordered_map id_to_segment; diff --git a/src/Interpreters/DistributedStages/BSPScheduler.cpp b/src/Interpreters/DistributedStages/BSPScheduler.cpp index 8e4ffb7c152..c50cac6d4c9 100644 --- a/src/Interpreters/DistributedStages/BSPScheduler.cpp +++ b/src/Interpreters/DistributedStages/BSPScheduler.cpp @@ -52,13 +52,13 @@ void BSPScheduler::submitTasks(PlanSegment * plan_segment_ptr, const SegmentTask else { pending_task_instances.for_nodes[selector_info.worker_nodes[i].address].emplace(task.task_id, i); - if (task.is_source) + if (task.has_table_scan) { source_task_count_on_workers[selector_info.worker_nodes[i].address] += 1; } } } - if (task.is_source) + if (task.has_table_scan) { std::unordered_map source_task_index_on_workers; for (size_t i = 0; i < selector_info.worker_nodes.size(); i++) @@ -250,7 +250,7 @@ bool BSPScheduler::retryTaskIfPossible(size_t segment_id, UInt64 parallel_index) } { std::unique_lock lk(nodes_alloc_mutex); - if (dag_graph_ptr->any_tables.contains(segment_id) || + if (dag_graph_ptr->segments_has_table_scan.contains(segment_id) || // for local no repartion and local may no repartition, schedule to original node NodeSelector::tryGetLocalInput(dag_graph_ptr->getPlanSegmentPtr(segment_id)) || // in case all workers except servers are occupied, simply retry at last node diff --git a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp index e7dfca634e3..1549a6fb630 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp @@ -123,8 +123,11 @@ void PlanSegmentSplitter::split(QueryPlan & query_plan, PlanSegmentContext & pla auto first = sizes[0]; for (auto size : sizes) { - if (size != first) - throw Exception("Segment parallel size not match", ErrorCodes::LOGICAL_ERROR); + // TODO(wangtao.vip): check with @JingPeng whether it is right to skip (error in tpcds 05/08). + if (size != first && !plan_segment_context.context->getSettingsRef().bsp_mode) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Segment parallel size not match {} and {}", size, first); + } } } } diff --git a/src/Interpreters/DistributedStages/Scheduler.cpp b/src/Interpreters/DistributedStages/Scheduler.cpp index 09f7fa4bb1b..8e1fb1b4880 100644 --- a/src/Interpreters/DistributedStages/Scheduler.cpp +++ b/src/Interpreters/DistributedStages/Scheduler.cpp @@ -89,7 +89,7 @@ TaskResult Scheduler::scheduleTask(PlanSegment * plan_segment_ptr, const Segment NodeSelectorResult selector_info; { std::unique_lock lock(node_selector_result_mutex); - auto selector_result = node_selector_result.emplace(task.task_id, node_selector.select(plan_segment_ptr, task.is_source)); + auto selector_result = node_selector_result.emplace(task.task_id, node_selector.select(plan_segment_ptr, task.has_table_scan)); selector_info = selector_result.first->second; } prepareTask(plan_segment_ptr, selector_info.worker_nodes.size()); @@ -136,7 +136,7 @@ void Scheduler::schedule() { Stopwatch sw; genTopology(); - genSourceTasks(); + genLeafTasks(); /// Leave final segment alone. while (!dag_graph_ptr->plan_segment_status_ptr->is_final_stage_start) @@ -176,20 +176,20 @@ void Scheduler::schedule() LOG_DEBUG(log, "Scheduling takes {} ms", sw.elapsedMilliseconds()); } -void Scheduler::genSourceTasks() +void Scheduler::genLeafTasks() { - LOG_DEBUG(log, "Begin generate source tasks"); + LOG_DEBUG(log, "Begin generate leaf tasks"); auto batch_task = std::make_shared(); - batch_task->reserve(dag_graph_ptr->sources.size()); - for (auto source_id : dag_graph_ptr->sources) + batch_task->reserve(dag_graph_ptr->leaf_segments.size()); + for (auto leaf_id : dag_graph_ptr->leaf_segments) { - LOG_TRACE(log, "Generate task for source segment {}", source_id); - if (source_id == dag_graph_ptr->final) + LOG_TRACE(log, "Generate task for leaf segment {}", leaf_id); + if (leaf_id == dag_graph_ptr->final) continue; - batch_task->emplace_back(source_id, true); - plansegment_topology.erase(source_id); - LOG_TRACE(log, "Task for source segment {} generated", source_id); + batch_task->emplace_back(leaf_id, true); + plansegment_topology.erase(leaf_id); + LOG_TRACE(log, "Task for leaf segment {} generated", leaf_id); } addBatchTask(std::move(batch_task)); } @@ -256,7 +256,7 @@ void Scheduler::removeDepsAndEnqueueTask(const SegmentTask & task) LOG_INFO(log, "Erase dependency {} for segment {}", task_id, id); if (dependencies.empty()) { - batch_task->emplace_back(id); + batch_task->emplace_back(id, dag_graph_ptr->segments_has_table_scan.contains(id)); } } for (const auto & t : *batch_task) diff --git a/src/Interpreters/DistributedStages/Scheduler.h b/src/Interpreters/DistributedStages/Scheduler.h index c13b84d140a..947382bcf55 100644 --- a/src/Interpreters/DistributedStages/Scheduler.h +++ b/src/Interpreters/DistributedStages/Scheduler.h @@ -33,12 +33,12 @@ enum class TaskStatus : uint8_t /// Indicates a plan segment. struct SegmentTask { - explicit SegmentTask(size_t task_id_, bool is_source_ = false) : task_id(task_id_), is_source(is_source_) + explicit SegmentTask(size_t task_id_, bool has_table_scan_ = false) : task_id(task_id_), has_table_scan(has_table_scan_) { } // plan segment id. size_t task_id; - bool is_source; + bool has_table_scan; }; /// Indicates a plan segment instance. @@ -153,7 +153,7 @@ class Scheduler std::atomic stopped{false}; void genTopology(); - void genSourceTasks(); + void genLeafTasks(); bool getBatchTaskToSchedule(BatchTaskPtr & task); virtual void sendResources(PlanSegment * plan_segment_ptr) { diff --git a/src/Interpreters/NodeSelector.cpp b/src/Interpreters/NodeSelector.cpp index 101b99dce04..126fc5c9c75 100644 --- a/src/Interpreters/NodeSelector.cpp +++ b/src/Interpreters/NodeSelector.cpp @@ -160,7 +160,7 @@ NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, Co { sum += current_size; } - size_t avg = sum / plan_segment_ptr->getParallelSize(); + size_t avg = sum / plan_segment_ptr->getParallelSize() + 1; if (sum < plan_segment_ptr->getParallelSize()) sum = 0; if (sum > 0) @@ -338,16 +338,16 @@ NodeSelectorResult LocalityNodeSelector::select(PlanSegment * plan_segment_ptr, return result; } -NodeSelectorResult NodeSelector::select(PlanSegment * plan_segment_ptr, bool is_source) +NodeSelectorResult NodeSelector::select(PlanSegment * plan_segment_ptr, bool has_table_scan) { NodeSelectorResult result; auto segment_id = plan_segment_ptr->getPlanSegmentId(); - LOG_TRACE(log, "Begin to select nodes for segment, id: {}, is_source: {}", segment_id, is_source); + LOG_TRACE(log, "Begin to select nodes for segment, id: {}, has table scan: {}", segment_id, has_table_scan); if (isLocal(plan_segment_ptr)) { result = local_node_selector.select(plan_segment_ptr, query_context); } - else if (is_source) + else if (has_table_scan) { result = source_node_selector.select(plan_segment_ptr, query_context, dag_graph_ptr); } diff --git a/src/Interpreters/NodeSelector.h b/src/Interpreters/NodeSelector.h index bd5cd743ee5..5794a404b1b 100644 --- a/src/Interpreters/NodeSelector.h +++ b/src/Interpreters/NodeSelector.h @@ -225,7 +225,7 @@ class NodeSelector { } - NodeSelectorResult select(PlanSegment * plan_segment_ptr, bool is_source); + NodeSelectorResult select(PlanSegment * plan_segment_ptr, bool has_table_scan); void setParallelIndexAndSourceAddrs(PlanSegment * plan_segment_ptr, NodeSelectorResult * result); static PlanSegmentInputPtr tryGetLocalInput(PlanSegment * plan_segment_ptr); diff --git a/src/Interpreters/SegmentScheduler.cpp b/src/Interpreters/SegmentScheduler.cpp index 87f1da5ae9d..8ebcaf70036 100644 --- a/src/Interpreters/SegmentScheduler.cpp +++ b/src/Interpreters/SegmentScheduler.cpp @@ -499,8 +499,8 @@ void SegmentScheduler::buildDAGGraph(PlanSegmentTree * plan_segments_ptr, std::s // value, readnothing, system table if (plan_segment_ptr->getPlanSegmentInputs().empty()) { - graph_ptr->sources.insert(plan_segment_ptr->getPlanSegmentId()); - graph_ptr->any_tables.insert(plan_segment_ptr->getPlanSegmentId()); + graph_ptr->leaf_segments.insert(plan_segment_ptr->getPlanSegmentId()); + // graph_ptr->segments_has_table_scan.insert(plan_segment_ptr->getPlanSegmentId()); } // source if (!plan_segment_ptr->getPlanSegmentInputs().empty()) @@ -515,9 +515,9 @@ void SegmentScheduler::buildDAGGraph(PlanSegmentTree * plan_segments_ptr, std::s any_tables = true; } if (all_tables) - graph_ptr->sources.insert(plan_segment_ptr->getPlanSegmentId()); + graph_ptr->leaf_segments.insert(plan_segment_ptr->getPlanSegmentId()); if (any_tables) - graph_ptr->any_tables.insert(plan_segment_ptr->getPlanSegmentId()); + graph_ptr->segments_has_table_scan.insert(plan_segment_ptr->getPlanSegmentId()); } // final stage if (plan_segment_ptr->getPlanSegmentOutput()->getPlanSegmentType() == PlanSegmentType::OUTPUT) @@ -549,9 +549,9 @@ void SegmentScheduler::buildDAGGraph(PlanSegmentTree * plan_segments_ptr, std::s } } // do some check - // 1. check source or final is empty - if (graph_ptr->sources.empty()) - throw Exception("Logical error: source is empty", ErrorCodes::LOGICAL_ERROR); + // 1. check if leaf segments or the final is empty + if (graph_ptr->leaf_segments.empty()) + throw Exception("Logical error: no leaf segment", ErrorCodes::LOGICAL_ERROR); if (graph_ptr->final == std::numeric_limits::max()) throw Exception("Logical error: final is empty", ErrorCodes::LOGICAL_ERROR); @@ -656,7 +656,7 @@ PlanSegmentSet SegmentScheduler::getIOPlanSegmentInstanceIDs(const String & quer throw Exception("query_id-" + query_id + " does not exist in scheduler query map", ErrorCodes::LOGICAL_ERROR); const auto & dag_ptr = iter->second; PlanSegmentSet res; - for (auto && segment_id : dag_ptr->any_tables) + for (auto && segment_id : dag_ptr->segments_has_table_scan) { /// wont wait for final segment, because it is already logged in progress_callback if (segment_id != dag_ptr->final) From 091e81afe05ffe6531ac30ddf24e43cb741cc948 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:24:35 +0000 Subject: [PATCH 012/292] Merge 'fix-context-lock-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4172471020): [cp] fix high context lock contention See merge request: !22560 # Conflicts: # src/Interpreters/Context.cpp --- programs/server/Server.cpp | 1 - src/Access/AccessControlManager.cpp | 6 +- src/Access/AccessControlManager.h | 2 +- src/Access/ContextAccess.cpp | 5 + src/Access/ContextAccess.h | 2 + src/Common/callOnce.h | 16 + src/Interpreters/Context.cpp | 941 ++++++++++++++-------------- src/Interpreters/Context.h | 88 +-- src/Interpreters/Context_fwd.h | 4 - src/Server/HTTPHandler.cpp | 4 +- 10 files changed, 551 insertions(+), 518 deletions(-) create mode 100644 src/Common/callOnce.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index e13643ca0e1..a2d09604dbc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -595,7 +595,6 @@ int Server::main(const std::vector & /*args*/) global_context->initCnchConfig(config()); global_context->setBlockPrivilegedOp(config().getBool("restrict_tenanted_users_to_privileged_operations", false)); global_context->initRootConfig(config()); - global_context->initPreloadThrottler(); const auto & root_config = global_context->getRootConfig(); diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 00dcbe65577..53d9380cf21 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -437,8 +437,7 @@ void AccessControlManager::checkSettingNameIsAllowed(const std::string_view & se custom_settings_prefixes->checkSettingNameIsAllowed(setting_name); } - -std::shared_ptr AccessControlManager::getContextAccess( +ContextAccessParams AccessControlManager::getContextAccessParams( const UUID & user_id, const std::vector & current_roles, bool use_default_roles, @@ -474,8 +473,7 @@ std::shared_ptr AccessControlManager::getContextAccess( boost::trim(last_forwarded_address); params.forwarded_address = last_forwarded_address; } - - return getContextAccess(params); + return params; } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 6e25122615b..461612a3d07 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -131,7 +131,7 @@ class AccessControlManager : public MultipleAccessStorage void setSelectFromMySQLRequiresGrant(bool enable) { select_from_mysql_requires_grant = enable; } bool doesSelectFromMySQLRequireGrant() const { return select_from_mysql_requires_grant; } - std::shared_ptr getContextAccess( + ContextAccessParams getContextAccessParams( const UUID & user_id, const std::vector & current_roles, bool use_default_roles, diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 296dcf2cabb..41d8393d500 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -945,4 +945,9 @@ void ContextAccess::checkAdminOption(const std::vector & role_ids) const { void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } +bool ContextAccessParams::dependsOnSettingName(std::string_view setting_name) +{ + return (setting_name == "readonly") || (setting_name == "allow_ddl") || (setting_name == "allow_introspection_functions"); +} + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 513685c5f43..30028d4e607 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -60,6 +60,8 @@ struct ContextAccessParams friend bool operator >(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return rhs < lhs; } friend bool operator <=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(rhs < lhs); } friend bool operator >=(const ContextAccessParams & lhs, const ContextAccessParams & rhs) { return !(lhs < rhs); } + + static bool dependsOnSettingName(std::string_view setting_name); }; diff --git a/src/Common/callOnce.h b/src/Common/callOnce.h new file mode 100644 index 00000000000..402bb7365a1 --- /dev/null +++ b/src/Common/callOnce.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ + +using OnceFlag = std::once_flag; + +template +void callOnce(OnceFlag & flag, Callable && func, Args&&... args) +{ + std::call_once(flag, std::forward(func), std::forward(args)...); +} + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f2b3ea4a227..ed413aeadfb 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -163,6 +163,7 @@ #include #include #include +#include #include #include #include @@ -372,21 +373,31 @@ struct ContextSharedPart InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. mutable std::optional buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. + mutable OnceFlag schedule_pool_initialized; mutable std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) mutable std::optional distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) mutable std::optional message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) + mutable OnceFlag readers_initialized; mutable AsynchronousReaderPtr asynchronous_remote_fs_reader; - mutable ThrottlerPtr disk_cache_throttler; - - mutable std::array, SchedulePool::Size> extra_schedule_pools; + struct ExtraSchedulePool + { + OnceFlag is_initialized; + std::unique_ptr pool; + }; + mutable std::array extra_schedule_pools; + std::optional vector_index_loading_thread_pool; + mutable OnceFlag disk_cache_throttler_initialized; + mutable ThrottlerPtr disk_cache_throttler; + mutable OnceFlag preload_throttler_initialized; + mutable ThrottlerPtr preload_throttler; /// may be nullptr + mutable OnceFlag replicated_fetches_throttler_initialized; mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches + mutable OnceFlag replicated_sends_throttler_initialized; mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends - mutable ThrottlerPtr preload_throttler; - MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. @@ -618,7 +629,7 @@ struct ContextSharedPart distributed_schedule_pool.reset(); message_broker_schedule_pool.reset(); for (auto & p : extra_schedule_pools) - p.reset(); + p.pool.reset(); ddl_worker.reset(); /// Stop trace collector if any @@ -651,10 +662,11 @@ struct ContextSharedPart } }; +ContextData::ContextData() = default; +ContextData::ContextData(const ContextData &) = default; Context::Context() = default; -Context::Context(const Context &) = default; -Context & Context::operator=(const Context &) = default; +Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) = default; @@ -669,10 +681,10 @@ void SharedContextHolder::reset() shared.reset(); } -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared) +ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) { auto res = std::shared_ptr(new Context); - res->shared = shared; + res->shared = shared_part; return res; } @@ -690,7 +702,7 @@ SharedContextHolder Context::createShared() void Context::addSessionView(StorageID view_table_id, StoragePtr view_storage) { - auto lock = getLock(); + auto lock = getLocalLock(); if (session_views_cache.find(view_table_id) != session_views_cache.end()) return; session_views_cache.emplace(view_table_id, view_storage); @@ -698,21 +710,24 @@ void Context::addSessionView(StorageID view_table_id, StoragePtr view_storage) StoragePtr Context::getSessionView(StorageID view_table_id) { - auto lock = getLock(); - auto it = session_views_cache.find(view_table_id); - if (it != session_views_cache.end()) - return it->second; - else { - StoragePtr view_storage = DatabaseCatalog::instance().tryGetTable(view_table_id, shared_from_this()); - if (view_storage) - session_views_cache.emplace(view_table_id, view_storage); - return view_storage; + auto lock = getLocalSharedLock(); + auto it = session_views_cache.find(view_table_id); + if (it != session_views_cache.end()) + return it->second; } + + /// should be done outside the context lock, otherwise may deadlock + StoragePtr view_storage = DatabaseCatalog::instance().tryGetTable(view_table_id, shared_from_this()); + + if (view_storage) + addSessionView(view_table_id, view_storage); + return view_storage; } ContextMutablePtr Context::createCopy(const ContextPtr & other) { + auto lock = other->getLocalSharedLock(); return std::shared_ptr(new Context(*other)); } @@ -729,16 +744,11 @@ ContextMutablePtr Context::createCopy(const ContextMutablePtr & other) return createCopy(std::const_pointer_cast(other)); } -void Context::copyFrom(const ContextPtr & other) -{ - *this = *other; -} - Context::~Context() = default; WorkerStatusManagerPtr Context::getWorkerStatusManager() { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->worker_status_manager) shared->worker_status_manager = std::make_shared(global_context); return shared->worker_status_manager; @@ -751,7 +761,7 @@ void Context::updateAdaptiveSchdulerConfig() WorkerStatusManagerPtr Context::getWorkerStatusManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->worker_status_manager) shared->worker_status_manager = std::make_shared(global_context); return shared->worker_status_manager; @@ -826,6 +836,22 @@ std::unique_lock Context::getLock() const return std::unique_lock(shared->mutex); } +/// NOTE: it's an non-recursive lock, caller should be aware of the deadlock risk +std::unique_lock Context::getLocalLock() const +{ + ProfileEvents::increment(ProfileEvents::ContextLock); + CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; + return std::unique_lock(mutex); +} + +/// NOTE: it's an non-recursive lock, caller should be aware of the deadlock risk +std::shared_lock Context::getLocalSharedLock() const +{ + ProfileEvents::increment(ProfileEvents::ContextLock); + CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; + return std::shared_lock(mutex); +} + ProcessList & Context::getProcessList() { return shared->process_list; @@ -869,7 +895,7 @@ const ReplicatedFetchList & Context::getReplicatedFetchList() const SegmentSchedulerPtr Context::getSegmentScheduler() { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->segment_scheduler) shared->segment_scheduler = std::make_shared(); return shared->segment_scheduler; @@ -877,7 +903,7 @@ SegmentSchedulerPtr Context::getSegmentScheduler() SegmentSchedulerPtr Context::getSegmentScheduler() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->segment_scheduler) shared->segment_scheduler = std::make_shared(); return shared->segment_scheduler; @@ -885,13 +911,13 @@ SegmentSchedulerPtr Context::getSegmentScheduler() const void Context::setMockExchangeDataTracker(ExchangeStatusTrackerPtr exchange_data_tracker) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->exchange_data_tracker = exchange_data_tracker; } ExchangeStatusTrackerPtr Context::getExchangeDataTracker() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->exchange_data_tracker) { if (shared->server_type == ServerType::cnch_server) @@ -913,7 +939,7 @@ void Context::initDiskExchangeDataManager() const DiskExchangeDataManagerPtr Context::getDiskExchangeDataManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->disk_exchange_data_manager) { const auto & bsp_conf = getRootConfig().bulk_synchronous_parallel; @@ -944,13 +970,13 @@ DiskExchangeDataManagerPtr Context::getDiskExchangeDataManager() const void Context::setMockDiskExchangeDataManager(DiskExchangeDataManagerPtr disk_exchange_data_manager) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->disk_exchange_data_manager = disk_exchange_data_manager; } BindingCacheManagerPtr Context::getGlobalBindingCacheManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (this->shared->global_binding_cache_manager) return this->shared->global_binding_cache_manager; return nullptr; @@ -958,7 +984,7 @@ BindingCacheManagerPtr Context::getGlobalBindingCacheManager() const BindingCacheManagerPtr Context::getGlobalBindingCacheManager() { - auto lock = getLock(); + auto lock = getLock(); // checked if (this->shared->global_binding_cache_manager) return this->shared->global_binding_cache_manager; return nullptr; @@ -966,7 +992,7 @@ BindingCacheManagerPtr Context::getGlobalBindingCacheManager() void Context::setGlobalBindingCacheManager(std::shared_ptr && manager) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->global_binding_cache_manager) throw Exception("Global binding cache has been already created.", ErrorCodes::LOGICAL_ERROR); shared->global_binding_cache_manager = std::move(manager); @@ -974,7 +1000,7 @@ void Context::setGlobalBindingCacheManager(std::shared_ptr std::shared_ptr Context::getSessionBindingCacheManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!this->session_binding_cache_manager) { this->session_binding_cache_manager = std::make_shared(); @@ -985,7 +1011,7 @@ std::shared_ptr Context::getSessionBindingCacheManager() co QueueManagerPtr Context::getQueueManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->queue_manager) shared->queue_manager = std::make_shared(global_context); return shared->queue_manager; @@ -993,7 +1019,7 @@ QueueManagerPtr Context::getQueueManager() const AsyncQueryManagerPtr Context::getAsyncQueryManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->async_query_manager) shared->async_query_manager = std::make_shared(global_context); return shared->async_query_manager; @@ -1071,18 +1097,20 @@ CnchWorkerResourcePtr Context::tryGetCnchWorkerResource() const void Context::initCnchWorkerResource() { - worker_resource = std::make_shared(); + auto lock = getLocalLock(); + if (!worker_resource) + worker_resource = std::make_shared(); } void Context::setExtendedProfileInfo(const ExtendedProfileInfo & source) const { - auto lock = getLock(); + auto lock = getLocalLock(); extended_profile_info = source; } ExtendedProfileInfo Context::getExtendedProfileInfo() const { - auto lock = getLock(); + auto lock = getLocalSharedLock(); return extended_profile_info; } @@ -1096,57 +1124,60 @@ String Context::resolveDatabase(const String & database_name) const String Context::getPath() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->path; } String Context::getFlagsPath() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->flags_path; } String Context::getUserFilesPath() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->user_files_path; } String Context::getDictionariesLibPath() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->dictionaries_lib_path; } String Context::getMetastorePath() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->metastore_path; } VolumePtr Context::getTemporaryVolume() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->tmp_volume; } TemporaryDataOnDiskScopePtr Context::getTempDataOnDisk() const { - auto lock = getLock(); - if (this->temp_data_on_disk) - return this->temp_data_on_disk; + { + auto lock = getLocalSharedLock(); + if (this->temp_data_on_disk) + return this->temp_data_on_disk; + } + auto lock = getLock(); // checked return shared->temp_data_on_disk; } void Context::setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_) { - auto lock = getLock(); + auto lock = getLocalLock(); this->temp_data_on_disk = std::move(temp_data_on_disk_); } void Context::setPath(const String & path) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->path = path; @@ -1331,38 +1362,43 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s void Context::setFlagsPath(const String & path) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->flags_path = path; } void Context::setUserFilesPath(const String & path) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->user_files_path = path; } void Context::setDictionariesLibPath(const String & path) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->dictionaries_lib_path = path; } void Context::setMetastorePath(const String & path) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->metastore_path = path; } void Context::setConfig(const ConfigurationPtr & config) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->config = config; shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config); } const Poco::Util::AbstractConfiguration & Context::getConfigRef() const { - auto lock = getLock(); + auto lock = getLock(); // checked + return shared->config ? *shared->config : Poco::Util::Application::instance().config(); +} + +const Poco::Util::AbstractConfiguration & Context::getConfigRefWithLock(const std::unique_lock &) const +{ return shared->config ? *shared->config : Poco::Util::Application::instance().config(); } @@ -1412,13 +1448,13 @@ const AccessControlManager & Context::getAccessControlManager() const void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->access_control_manager.setExternalAuthenticatorsConfig(config); } std::unique_ptr Context::makeGSSAcceptorContext() const { - auto lock = getLock(); + auto lock = getLock(); // checked return std::make_unique(shared->access_control_manager.getExternalAuthenticators().getKerberosParams()); } @@ -1442,7 +1478,7 @@ void Context::updateAdditionalServices(const Poco::Util::AbstractConfiguration & void Context::setUsersConfig(const ConfigurationPtr & config) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->users_config = config; shared->access_control_manager.setUsersConfig(*shared->users_config); if (getServerType() == ServerType::cnch_server || getServerType() == ServerType::cnch_worker) @@ -1457,7 +1493,7 @@ void Context::setUsersConfig(const ConfigurationPtr & config) ConfigurationPtr Context::getUsersConfig() { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->users_config; } @@ -1514,10 +1550,14 @@ void Context::initResourceGroupManager(const ConfigurationPtr & config) void Context::setResourceGroup(const IAST * ast) { - if (auto lock = getLock(); shared->resource_group_manager && shared->resource_group_manager->isInUse()) - resource_group = shared->resource_group_manager->selectGroup(*this, ast); - else - resource_group = nullptr; + IResourceGroup * group = nullptr; + { + auto lock = getLock(); // checked + if (shared->resource_group_manager && shared->resource_group_manager->isInUse()) + group = shared->resource_group_manager->selectGroup(*this, ast); + } + auto lock = getLocalLock(); + resource_group = group; } IResourceGroup * Context::tryGetResourceGroup() const @@ -1550,26 +1590,33 @@ void Context::stopResourceGroup() void Context::setUser(const Credentials & credentials, const Poco::Net::SocketAddress & address) { - client_info.current_user = credentials.getUserName(); - client_info.current_address = address; - - //#if defined(ARCADIA_BUILD) - /// This is harmful field that is used only in foreign "Arcadia" build. - client_info.current_password.clear(); - if (const auto * basic_credentials = dynamic_cast(&credentials)) - client_info.current_password = basic_credentials->getPassword(); - //#endif - /// Find a user with such name and check the credentials. /// NOTE: getAccessControlManager().login and other AccessControl's functions may require some IO work, /// so Context::getLock() must be unlocked while we're doing this. auto new_user_id = getAccessControlManager().login(credentials, address.host()); - auto new_access = getAccessControlManager().getContextAccess( - new_user_id, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info, - has_tenant_id_in_username ? tenant_id : "", - getServerType() != ServerType::cnch_server); - auto lock = getLock(); + ContextAccessParams params; + { + auto lock = getLocalLock(); + client_info.current_user = credentials.getUserName(); + client_info.current_address = address; + + //#if defined(ARCADIA_BUILD) + /// This is harmful field that is used only in foreign "Arcadia" build. + client_info.current_password.clear(); + if (const auto * basic_credentials = dynamic_cast(&credentials)) + client_info.current_password = basic_credentials->getPassword(); + //#endif + + params = getAccessControlManager().getContextAccessParams( + new_user_id, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info, + has_tenant_id_in_username ? tenant_id : "", + getServerType() != ServerType::cnch_server); + } + + auto new_access = getAccessControlManager().getContextAccess(params); + + auto lock = getLocalLock(); user_id = new_user_id; access = std::move(new_access); @@ -1578,7 +1625,7 @@ void Context::setUser(const Credentials & credentials, const Poco::Net::SocketAd current_roles.clear(); use_default_roles = true; - applySettingsChanges(default_profile_info->settings); + applySettingsChangesWithLock(default_profile_info->settings, /*internal*/ true, lock); } String Context::formatUserName(const String & name) @@ -1621,7 +1668,7 @@ std::shared_ptr Context::getUser() const void Context::setQuotaKey(String quota_key_) { - auto lock = getLock(); + auto lock = getLocalLock(); client_info.quota_key = std::move(quota_key_); } @@ -1632,29 +1679,28 @@ String Context::getUserName() const std::optional Context::getUserID() const { - auto lock = getLock(); + auto lock = getLocalSharedLock(); return user_id; } - void Context::setCurrentRoles(const std::vector & current_roles_) { - auto lock = getLock(); + auto lock = getLocalLock(); if (current_roles == current_roles_ && !use_default_roles) return; current_roles = current_roles_; use_default_roles = false; - calculateAccessRights(); + calculateAccessRightsWithLock(lock); } void Context::setCurrentRolesDefault() { - auto lock = getLock(); + auto lock = getLocalLock(); if (use_default_roles) return; current_roles.clear(); use_default_roles = true; - calculateAccessRights(); + calculateAccessRightsWithLock(lock); } boost::container::flat_set Context::getCurrentRoles() const @@ -1673,13 +1719,15 @@ std::shared_ptr Context::getRolesInfo() const } -void Context::calculateAccessRights() +void Context::calculateAccessRightsWithLock(const std::unique_lock &) { - auto lock = getLock(); if (user_id) - access = getAccessControlManager().getContextAccess( + { + auto params = getAccessControlManager().getContextAccessParams( *user_id, current_roles, use_default_roles, settings, current_database, client_info, has_tenant_id_in_username ? tenant_id : "", false); + access = getAccessControlManager().getContextAccess(params); + } } @@ -1747,17 +1795,18 @@ void Context::checkAccess(const AccessRightsElements & elements) const void Context::grantAllAccess() { - auto lock = getLock(); + auto lock = getLocalLock(); access = ContextAccess::getFullAccess(); } std::shared_ptr Context::getAccess() const { - auto lock = getLock(); // If its a worker node and prefer_cnch_catalog is false, this is a query from server // and access check has already been done in server. We can return full access. if (getServerType() == ServerType::cnch_worker && !getSettingsRef().prefer_cnch_catalog) return ContextAccess::getFullAccess(); + + auto lock = getLocalSharedLock(); return access ? access : ContextAccess::getFullAccess(); } @@ -1781,14 +1830,17 @@ void Context::checkAeolusTableAccess(const String & database_name, const String ASTPtr Context::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType type) const { - auto lock = getLock(); - auto initial_condition = initial_row_policy ? initial_row_policy->getCondition(database, table_name, type) : nullptr; - return getAccess()->getRowPolicyCondition(database, table_name, type, initial_condition); + ASTPtr condition; + { + auto lock = getLocalSharedLock(); + condition = initial_row_policy ? initial_row_policy->getCondition(database, table_name, type) : nullptr; + } + return getAccess()->getRowPolicyCondition(database, table_name, type, condition); } void Context::setInitialRowPolicy() { - auto lock = getLock(); + auto lock = getLocalLock(); auto initial_user_id = getAccessControlManager().find(client_info.initial_user); initial_row_policy = nullptr; if (initial_user_id) @@ -1807,13 +1859,12 @@ std::optional Context::getQuotaUsage() const return getAccess()->getQuotaUsage(); } -void Context::setCurrentProfile(const String & profile_name) +void Context::setCurrentProfileWithLock(const String & profile_name, const std::unique_lock & lock) { - auto lock = getLock(); try { UUID profile_id = getAccessControlManager().getID(profile_name); - setCurrentProfile(profile_id); + setCurrentProfileWithLock(profile_id, lock); } catch (Exception & e) { @@ -1822,25 +1873,40 @@ void Context::setCurrentProfile(const String & profile_name) } } -void Context::setCurrentProfile(const UUID & profile_id) +void Context::setCurrentProfileWithLock(const UUID & profile_id, const std::unique_lock & lock) { - auto lock = getLock(); auto profile_info = getAccessControlManager().getSettingsProfileInfo(profile_id); - checkSettingsConstraints(profile_info->settings); - applySettingsChanges(profile_info->settings); - settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); + setCurrentProfileWithLock(*profile_info, lock); +} + +void Context::setCurrentProfileWithLock(const SettingsProfilesInfo & profiles_info, const std::unique_lock & lock) +{ + checkSettingsConstraintsWithLock(profiles_info.settings); + applySettingsChangesWithLock(profiles_info.settings, true, lock); + settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); +} + +void Context::setCurrentProfile(const String & profile_name) +{ + auto lock = getLocalLock(); + setCurrentProfileWithLock(profile_name, lock); } +void Context::setCurrentProfile(const UUID & profile_id) +{ + auto lock = getLocalLock(); + setCurrentProfileWithLock(profile_id, lock); +} std::vector Context::getCurrentProfiles() const { - auto lock = getLock(); + auto lock = getLocalSharedLock(); return settings_constraints_and_current_profiles->current_profiles; } std::vector Context::getEnabledProfiles() const { - auto lock = getLock(); + auto lock = getLocalSharedLock(); return settings_constraints_and_current_profiles->enabled_profiles; } @@ -1866,7 +1932,7 @@ const Block & Context::getScalar(const String & name) const Tables Context::getExternalTables() const { assert(!isGlobalContext() || getApplicationType() == ApplicationType::LOCAL); - auto lock = getLock(); + auto lock = getLocalSharedLock(); Tables res; for (const auto & table : external_tables_mapping) @@ -1891,7 +1957,7 @@ Tables Context::getExternalTables() const void Context::addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) { assert(!isGlobalContext() || getApplicationType() == ApplicationType::LOCAL); - auto lock = getLock(); + auto lock = getLocalLock(); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) throw Exception("Temporary table " + backQuoteIfNeed(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); @@ -1903,7 +1969,7 @@ std::shared_ptr Context::removeExternalTable(const String assert(!isGlobalContext() || getApplicationType() == ApplicationType::LOCAL); std::shared_ptr holder; { - auto lock = getLock(); + auto lock = getLocalLock(); auto iter = external_tables_mapping.find(table_name); if (iter == external_tables_mapping.end()) return {}; @@ -1945,7 +2011,7 @@ void Context::addQueryAccessInfo( void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const { assert(!isGlobalContext() || getApplicationType() == ApplicationType::LOCAL); - auto lock = getLock(); + auto lock = getLocalLock(); switch (factory_type) { @@ -2014,142 +2080,69 @@ StoragePtr Context::getViewSource() const return view_source; } -Settings Context::getSettings() const -{ - auto lock = getLock(); - return settings; -} - - -void Context::setSettings(const Settings & settings_) -{ - auto lock = getLock(); - auto old_readonly = settings.readonly; - auto old_allow_ddl = settings.allow_ddl; - auto old_allow_introspection_functions = settings.allow_introspection_functions; - - settings = settings_; - - if ((settings.readonly != old_readonly) || (settings.allow_ddl != old_allow_ddl) - || (settings.allow_introspection_functions != old_allow_introspection_functions)) - calculateAccessRights(); -} - - -void Context::setSetting(const StringRef & name, const String & value) +void Context::setSettingWithLock(const StringRef & name, const String & value, const std::unique_lock & lock) { - auto lock = getLock(); if (name == "profile") { - setCurrentProfile(value); + setCurrentProfileWithLock(value, lock); return; } settings.set(std::string_view{name}, value); - if (name == "readonly" || name == "allow_ddl" || name == "allow_introspection_functions") - calculateAccessRights(); + if (ContextAccessParams::dependsOnSettingName(name.toView())) + calculateAccessRightsWithLock(lock); } - -void Context::setSetting(const StringRef & name, const Field & value) +void Context::setSettingWithLock(const StringRef & name, const Field & value, const std::unique_lock & lock) { - auto lock = getLock(); if (name == "profile") { - setCurrentProfile(value.safeGet()); + setCurrentProfileWithLock(value.safeGet(), lock); return; } settings.set(std::string_view{name}, value); - if (name == "readonly" || name == "allow_ddl" || name == "allow_introspection_functions") - calculateAccessRights(); + if (ContextAccessParams::dependsOnSettingName(name.toView())) + calculateAccessRightsWithLock(lock); } -void Context::applySettingsChanges(const JSON & changes) +Settings Context::getSettings() const { - auto lock = getLock(); - - // set ansi related settings first, as they may be overwritten explicitly later - std::optional dialect_type_opt; - std::function find_dialect_type_if_any = [&](const SettingsChanges & setting_changes) - { - for (const auto & change: setting_changes) - { - if (change.name == "profile") - { - UUID profile_id = getAccessControlManager().getID(change.value.safeGet()); - auto profile_info = getAccessControlManager().getSettingsProfileInfo(profile_id); - - find_dialect_type_if_any(profile_info->settings); - } - - if (change.name == "dialect_type") - { - auto value_str = change.value.safeGet(); - - if (!dialect_type_opt) - dialect_type_opt = value_str; - else if (*dialect_type_opt != value_str) - throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Multiple dialect_type value found"); - } - } - }; - - for (JSON::iterator it = changes.begin(); it != changes.end(); ++it) - { - auto name = it.getRawName().toView(); - auto value = it.getValue().getRawString().toView(); - Field value_field(value); - auto value_str = value_field.safeGet(); - UUID profile_id = getAccessControlManager().getID(value_str); - auto profile_info = getAccessControlManager().getSettingsProfileInfo(profile_id); - checkSettingsConstraints(profile_info->settings); - if (name == "profile") - { - find_dialect_type_if_any(profile_info->settings); - } - - if (name == "dialect_type") - { - if (!dialect_type_opt) - dialect_type_opt = value; - else if (*dialect_type_opt != value) - throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Multiple dialect_type value found"); - } + auto lock = getLocalSharedLock(); + return settings; +} - try - { - setSetting(StringRef(name), value_field); - } - catch (Exception & e) - { - e.addMessage(fmt::format("in attempt to set the value of setting '{}' to {}", - name, applyVisitor(FieldVisitorToString(), value_field))); - throw; - } - } +void Context::setSettings(const Settings & settings_) +{ + auto lock = getLocalLock(); + auto old_readonly = settings.readonly; + auto old_allow_ddl = settings.allow_ddl; + auto old_allow_introspection_functions = settings.allow_introspection_functions; - // skip if a previous setting change is in process - bool apply_ansi_related_settings = dialect_type_opt && !settings.dialect_type.pending; + settings = settings_; - if (apply_ansi_related_settings) - { - setSetting("dialect_type", *dialect_type_opt); - ANSI::onSettingChanged(&settings); - settings.dialect_type.pending = true; - } + if ((settings.readonly != old_readonly) || (settings.allow_ddl != old_allow_ddl) + || (settings.allow_introspection_functions != old_allow_introspection_functions)) + calculateAccessRightsWithLock(lock); +} - applySettingsQuirks(settings); +void Context::setSetting(const StringRef & name, const String & value) +{ + auto lock = getLocalLock(); + setSettingWithLock(name, value, lock); +} - if (apply_ansi_related_settings) - settings.dialect_type.pending = false; +void Context::setSetting(const StringRef & name, const Field & value) +{ + auto lock = getLocalLock(); + setSettingWithLock(name, value, lock); } -void Context::applySettingChange(const SettingChange & change) +void Context::applySettingChangeWithLock(const SettingChange & change, const std::unique_lock & lock) { try { - setSetting(change.name, change.value); + setSettingWithLock(change.name, change.value, lock); } catch (Exception & e) { @@ -2159,11 +2152,8 @@ void Context::applySettingChange(const SettingChange & change) } } - -void Context::applySettingsChanges(const SettingsChanges & changes, bool internal) +void Context::applySettingsChangesWithLock(const SettingsChanges & changes, bool internal, const std::unique_lock & lock) { - auto lock = getLock(); - // set ansi related settings first, as they may be overwritten explicitly later std::optional dialect_type_opt; std::function find_dialect_type_if_any = [&](const SettingsChanges & setting_changes) { @@ -2188,6 +2178,7 @@ void Context::applySettingsChanges(const SettingsChanges & changes, bool interna } } }; + find_dialect_type_if_any(changes); // NOTE: tenanted users connect to server using tenant id given in connection info. // allow only whitelisted settings for tenanted users @@ -2202,62 +2193,108 @@ void Context::applySettingsChanges(const SettingsChanges & changes, bool interna } } - find_dialect_type_if_any(changes); - // skip if a previous setting change is in process bool apply_ansi_related_settings = dialect_type_opt && !settings.dialect_type.pending; if (apply_ansi_related_settings) { - setSetting("dialect_type", *dialect_type_opt); + setSettingWithLock("dialect_type", *dialect_type_opt, lock); ANSI::onSettingChanged(&settings); settings.dialect_type.pending = true; } for (const SettingChange & change : changes) - applySettingChange(change); + applySettingChangeWithLock(change, lock); applySettingsQuirks(settings); if (apply_ansi_related_settings) settings.dialect_type.pending = false; } +void Context::applySettingChange(const SettingChange & change) +{ + try + { + setSetting(change.name, change.value); + } + catch (Exception & e) + { + e.addMessage(fmt::format( + "in attempt to set the value of setting '{}' to {}", change.name, applyVisitor(FieldVisitorToString(), change.value))); + throw; + } +} + + +void Context::applySettingsChanges(const SettingsChanges & changes, bool internal) +{ + auto lock = getLocalLock(); + applySettingsChangesWithLock(changes, internal, lock); +} + +void Context::checkSettingsConstraintsWithLock(const SettingChange & change) const +{ + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, change); +} + +void Context::checkSettingsConstraintsWithLock(const SettingsChanges & changes) const +{ + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes); +} + +void Context::checkSettingsConstraintsWithLock(SettingsChanges & changes) const +{ + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes); +} + +void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes) const +{ + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes); +} void Context::checkSettingsConstraints(const SettingChange & change) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); + auto lock = getLocalSharedLock(); + checkSettingsConstraintsWithLock(change); } void Context::checkSettingsConstraints(const SettingsChanges & changes) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + auto lock = getLocalSharedLock(); + checkSettingsConstraintsWithLock(changes); } void Context::checkSettingsConstraints(SettingsChanges & changes) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + auto lock = getLocalSharedLock(); + checkSettingsConstraintsWithLock(changes); } void Context::clampToSettingsConstraints(SettingsChanges & changes) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); + auto lock = getLocalSharedLock(); + clampToSettingsConstraintsWithLock(changes); } -std::shared_ptr Context::getSettingsConstraintsAndCurrentProfiles() const +std::shared_ptr Context::getSettingsConstraintsAndCurrentProfilesWithLock() const { - auto lock = getLock(); if (settings_constraints_and_current_profiles) return settings_constraints_and_current_profiles; static auto no_constraints_or_profiles = std::make_shared(getAccessControlManager()); return no_constraints_or_profiles; } +std::shared_ptr Context::getSettingsConstraintsAndCurrentProfiles() const +{ + auto lock = getLocalSharedLock(); + return getSettingsConstraintsAndCurrentProfilesWithLock(); +} String Context::getCurrentDatabase() const { String tenant_db; { - auto lock = getLock(); + auto lock = getLocalLock(); tenant_db = current_database; } @@ -2312,7 +2349,7 @@ void Context::setCurrentDatabaseNameInGlobalContext(const String & name) throw Exception( "Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR); - auto lock = getLock(); + auto lock = getLocalLock(); if (!current_database.empty()) throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR); @@ -2322,10 +2359,10 @@ void Context::setCurrentDatabaseNameInGlobalContext(const String & name) void Context::setCurrentDatabase(const String & name) { - DatabaseCatalog::instance().assertDatabaseExists(name, hasQueryContext() ? getQueryContext() : shared_from_this()); - auto lock = getLock(); + DatabaseCatalog::instance().assertDatabaseExists(name, hasQueryContext() ? getQueryContext(): shared_from_this()); + auto lock = getLocalLock(); current_database = name; - calculateAccessRights(); + calculateAccessRightsWithLock(lock); } void Context::setCurrentDatabase(const String & name, ContextPtr local_context) @@ -2351,7 +2388,7 @@ void Context::setCurrentDatabase(const String & name, ContextPtr local_context) } auto db_name_with_tenant_id = appendTenantIdOnly(database_opt.value()); - auto lock = getLock(); + auto lock = getLocalLock(); if(use_cnch_catalog){ current_catalog = ""; current_database = db_name_with_tenant_id; @@ -2361,14 +2398,14 @@ void Context::setCurrentDatabase(const String & name, ContextPtr local_context) current_database = database_opt.value(); LOG_TRACE(shared->log, "use external catalog, catalog_name: {}, db_name: {}", current_catalog, current_database); } - calculateAccessRights(); + calculateAccessRightsWithLock(lock); } void Context::setCurrentCatalog(const String & catalog_name) { if (catalog_name == "" || catalog_name == "cnch") { - auto lock = getLock(); + auto lock = getLocalLock(); current_catalog = ""; current_database = ""; return; @@ -2378,7 +2415,7 @@ void Context::setCurrentCatalog(const String & catalog_name) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "catalog {} does not exist", catalog_name); } - auto lock = getLock(); + auto lock = getLocalLock(); current_catalog = catalog_name; current_database = "default"; } @@ -2619,7 +2656,7 @@ void Context::loadDictionaries(const Poco::Util::AbstractConfiguration & config) SynonymsExtensions & Context::getSynonymsExtensions() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->synonyms_extensions) shared->synonyms_extensions.emplace(getConfigRef()); @@ -2629,7 +2666,7 @@ void Context::loadDictionaries(const Poco::Util::AbstractConfiguration & config) Lemmatizers & Context::getLemmatizers() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->lemmatizers) shared->lemmatizers.emplace(getConfigRef()); @@ -2676,13 +2713,13 @@ std::weak_ptr Context::getPlanSegmentProcessListEnt void Context::setProcessorProfileElementConsumer( std::shared_ptr> processor_log_element_consumer_) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->processor_log_element_consumer = processor_log_element_consumer_; } std::shared_ptr> Context::getProcessorProfileElementConsumer() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->processor_log_element_consumer) return {}; @@ -2712,7 +2749,7 @@ QueryStatus * Context::getProcessListElement() const void Context::setNvmCache(const Poco::Util::AbstractConfiguration &config) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->nvm_cache) throw Exception("Nvmcache cache has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -2749,27 +2786,27 @@ void Context::setNvmCache(const Poco::Util::AbstractConfiguration &config) NvmCachePtr Context::getNvmCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->nvm_cache; } void Context::dropNvmCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->nvm_cache) shared->nvm_cache->reset(); } void Context::setFooterCache(size_t max_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (max_size_in_bytes) ArrowFooterCache::initialize(max_size_in_bytes); } void Context::setUncompressedCache(size_t max_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->uncompressed_cache) throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -2780,14 +2817,14 @@ void Context::setUncompressedCache(size_t max_size_in_bytes) UncompressedCachePtr Context::getUncompressedCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->uncompressed_cache; } void Context::dropUncompressedCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->uncompressed_cache) shared->uncompressed_cache->reset(); } @@ -2795,7 +2832,7 @@ void Context::dropUncompressedCache() const void Context::setMarkCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->mark_cache) throw Exception("Mark cache has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -2805,20 +2842,20 @@ void Context::setMarkCache(size_t cache_size_in_bytes) MarkCachePtr Context::getMarkCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->mark_cache; } void Context::dropMarkCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->mark_cache) shared->mark_cache->reset(); } void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->query_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); @@ -2829,27 +2866,27 @@ void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->query_cache) shared->query_cache->updateConfiguration(config); } QueryCachePtr Context::getQueryCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->query_cache; } void Context::dropQueryCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->query_cache) shared->query_cache->reset(); } void Context::setIntermediateResultCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->intermediate_result_cache) throw Exception("Intermediate result cache has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -2859,20 +2896,20 @@ void Context::setIntermediateResultCache(size_t cache_size_in_bytes) IntermediateResultCachePtr Context::getIntermediateResultCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->intermediate_result_cache; } void Context::dropIntermediateResultCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->intermediate_result_cache) shared->intermediate_result_cache->reset(); } void Context::setMMappedFileCache(size_t cache_size_in_num_entries) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->mmap_cache) throw Exception("Mapped file cache has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -2882,13 +2919,13 @@ void Context::setMMappedFileCache(size_t cache_size_in_num_entries) MMappedFileCachePtr Context::getMMappedFileCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->mmap_cache; } void Context::dropMMappedFileCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->mmap_cache) shared->mmap_cache->reset(); } @@ -2896,7 +2933,7 @@ void Context::dropMMappedFileCache() const void Context::dropCaches() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->uncompressed_cache) shared->uncompressed_cache->reset(); @@ -2923,7 +2960,7 @@ void Context::setMergeSchedulerSettings(const Poco::Util::AbstractConfiguration BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->buffer_flush_schedule_pool) shared->buffer_flush_schedule_pool.emplace( settings.background_buffer_flush_schedule_pool_size, CurrentMetrics::BackgroundBufferFlushSchedulePoolTask, "BgBufSchPool"); @@ -2974,15 +3011,18 @@ BackgroundTaskSchedulingSettings Context::getBackgroundMoveTaskSchedulingSetting BackgroundSchedulePool & Context::getSchedulePool() const { - auto lock = getLock(); - if (!shared->schedule_pool) - shared->schedule_pool.emplace(settings.background_schedule_pool_size, CurrentMetrics::BackgroundSchedulePoolTask, "BgSchPool"); + callOnce(shared->schedule_pool_initialized, [&]{ + shared->schedule_pool.emplace( + settings.background_schedule_pool_size, + CurrentMetrics::BackgroundSchedulePoolTask, + "BgSchPool"); + }); return *shared->schedule_pool; } BackgroundSchedulePool & Context::getDistributedSchedulePool() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->distributed_schedule_pool) shared->distributed_schedule_pool.emplace( settings.background_distributed_schedule_pool_size, CurrentMetrics::BackgroundDistributedSchedulePoolTask, "BgDistSchPool"); @@ -2991,7 +3031,7 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->message_broker_schedule_pool) shared->message_broker_schedule_pool.emplace( settings.background_message_broker_schedule_pool_size, CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask, "BgMBSchPool"); @@ -3000,159 +3040,129 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const BackgroundSchedulePool & Context::getConsumeSchedulePool() const { - auto lock = getLock(); - LOG_DEBUG(&Poco::Logger::get("BackgroundSchedulePool"), "getConsumeSchedulePool"); - if (!shared->extra_schedule_pools[SchedulePool::Consume]) - { + auto & item = shared->extra_schedule_pools[SchedulePool::Consume]; + callOnce(item.is_initialized, [&] { CpuSetPtr cpu_set; if (auto & cgroup_manager = CGroupManagerFactory::instance(); cgroup_manager.isInit()) { cpu_set = cgroup_manager.getCpuSet("hakafka"); } - shared->extra_schedule_pools[SchedulePool::Consume].emplace( + item.pool = std::make_unique( settings.background_consume_schedule_pool_size, CurrentMetrics::BackgroundConsumeSchedulePoolTask, "BgConsumePool", std::move(cpu_set)); - } - - return *shared->extra_schedule_pools[SchedulePool::Consume]; -} - -BackgroundSchedulePool & Context::getRestartSchedulePool() const -{ - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::Restart]) - shared->extra_schedule_pools[SchedulePool::Restart].emplace( - settings.background_schedule_pool_size, CurrentMetrics::BackgroundRestartSchedulePoolTask, "BgRestartPool"); - return *shared->extra_schedule_pools[SchedulePool::Restart]; -} - -BackgroundSchedulePool & Context::getHaLogSchedulePool() const -{ - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::HaLog]) - shared->extra_schedule_pools[SchedulePool::HaLog].emplace( - settings.background_schedule_pool_size, CurrentMetrics::BackgroundHaLogSchedulePoolTask, "BgHaLogPool"); - return *shared->extra_schedule_pools[SchedulePool::HaLog]; -} -BackgroundSchedulePool & Context::getMutationSchedulePool() const -{ - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::Mutation]) - shared->extra_schedule_pools[SchedulePool::Mutation].emplace( - settings.background_schedule_pool_size, CurrentMetrics::BackgroundMutationSchedulePoolTask, "BgMutatePool"); - return *shared->extra_schedule_pools[SchedulePool::Mutation]; + }); + return *item.pool; } BackgroundSchedulePool & Context::getLocalSchedulePool() const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::Local]) - shared->extra_schedule_pools[SchedulePool::Local].emplace( - settings.background_local_schedule_pool_size, CurrentMetrics::BackgroundLocalSchedulePoolTask, "BgLocalPool"); - return *shared->extra_schedule_pools[SchedulePool::Local]; + auto & item = shared->extra_schedule_pools[SchedulePool::Local]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( + settings.background_local_schedule_pool_size, + CurrentMetrics::BackgroundLocalSchedulePoolTask, + "BgLocalPool" + ); + }); + return *item.pool; } BackgroundSchedulePool & Context::getMergeSelectSchedulePool() const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::MergeSelect]) - shared->extra_schedule_pools[SchedulePool::MergeSelect].emplace( - settings.background_schedule_pool_size, CurrentMetrics::BackgroundMergeSelectSchedulePoolTask, "BgMSelectPool"); - return *shared->extra_schedule_pools[SchedulePool::MergeSelect]; + auto & item = shared->extra_schedule_pools[SchedulePool::MergeSelect]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( + settings.background_schedule_pool_size, + CurrentMetrics::BackgroundMergeSelectSchedulePoolTask, + "BgMSelectPool"); + }); + return *item.pool; } BackgroundSchedulePool & Context::getUniqueTableSchedulePool() const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::UniqueTable]) - shared->extra_schedule_pools[SchedulePool::UniqueTable].emplace( - settings.background_unique_table_schedule_pool_size, CurrentMetrics::BackgroundUniqueTableSchedulePoolTask, "BgUniqPool"); - return *shared->extra_schedule_pools[SchedulePool::UniqueTable]; -} - -BackgroundSchedulePool & Context::getMemoryTableSchedulePool() const -{ - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::MemoryTable]) - shared->extra_schedule_pools[SchedulePool::MemoryTable].emplace( - settings.background_memory_table_schedule_pool_size, CurrentMetrics::BackgroundMemoryTableSchedulePoolTask, "BgMemTblPool"); - return *shared->extra_schedule_pools[SchedulePool::MemoryTable]; + auto & item = shared->extra_schedule_pools[SchedulePool::UniqueTable]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( + settings.background_unique_table_schedule_pool_size, + CurrentMetrics::BackgroundUniqueTableSchedulePoolTask, + "BgUniqPool"); + }); + return *item.pool; } BackgroundSchedulePool & Context::getTopologySchedulePool() const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::CNCHTopology]) - shared->extra_schedule_pools[SchedulePool::CNCHTopology].emplace( - settings.background_topology_thread_pool_size, CurrentMetrics::BackgroundCNCHTopologySchedulePoolTask, "CNCHTopoPool"); - return *shared->extra_schedule_pools[SchedulePool::CNCHTopology]; + auto & item = shared->extra_schedule_pools[SchedulePool::CNCHTopology]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( + settings.background_topology_thread_pool_size, + CurrentMetrics::BackgroundCNCHTopologySchedulePoolTask, + "CNCHTopoPool"); + }); + return *item.pool; } BackgroundSchedulePool & Context::getMetricsRecalculationSchedulePool() const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[SchedulePool::PartsMetrics]) - shared->extra_schedule_pools[SchedulePool::PartsMetrics].emplace( + auto & item = shared->extra_schedule_pools[SchedulePool::PartsMetrics]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( settings.background_metrics_recalculation_schedule_pool_size, CurrentMetrics::BackgroundPartsMetricsSchedulePoolTask, "PtMetricsPool"); - return *shared->extra_schedule_pools[SchedulePool::PartsMetrics]; + }); + return *item.pool; } BackgroundSchedulePool & Context::getExtraSchedulePool( SchedulePool::Type pool_type, SettingFieldUInt64 pool_size, CurrentMetrics::Metric metric, const char * name) const { - auto lock = getLock(); - if (!shared->extra_schedule_pools[pool_type]) - shared->extra_schedule_pools[pool_type].emplace(pool_size, metric, name); - return *shared->extra_schedule_pools[pool_type]; + auto & item = shared->extra_schedule_pools[pool_type]; + callOnce(item.is_initialized, [&] { + item.pool = std::make_unique( pool_size, metric, name); + }); + return *item.pool; } ThrottlerPtr Context::getDiskCacheThrottler() const { - auto lock = getLock(); - if (!shared->disk_cache_throttler) - { + callOnce(shared->disk_cache_throttler_initialized, [&] { shared->disk_cache_throttler = std::make_shared(settings.max_bandwidth_for_disk_cache); - } - + }); return shared->disk_cache_throttler; } -ThrottlerPtr Context::getReplicatedSendsThrottler() const +ThrottlerPtr Context::tryGetPreloadThrottler() const { - auto lock = getLock(); - if (!shared->replicated_sends_throttler) - shared->replicated_sends_throttler = std::make_shared(settings.max_replicated_sends_network_bandwidth_for_server); + callOnce(shared->preload_throttler_initialized, [&] { + shared->preload_throttler = settings.parts_preload_throttler == 0 ? nullptr : std::make_shared(settings.parts_preload_throttler); + }); + return shared->preload_throttler; +} +ThrottlerPtr Context::getReplicatedSendsThrottler() const +{ + callOnce(shared->replicated_sends_throttler_initialized, [&] { + shared->replicated_sends_throttler = std::make_shared( + settings.max_replicated_sends_network_bandwidth_for_server); + }); return shared->replicated_sends_throttler; } ThrottlerPtr Context::getReplicatedFetchesThrottler() const { - auto lock = getLock(); - if (!shared->replicated_fetches_throttler) - shared->replicated_fetches_throttler = std::make_shared(settings.max_replicated_fetches_network_bandwidth_for_server); - + callOnce(shared->replicated_fetches_throttler_initialized, [&] { + shared->replicated_fetches_throttler = std::make_shared( + settings.max_replicated_fetches_network_bandwidth_for_server); + }); return shared->replicated_fetches_throttler; } -void Context::initPreloadThrottler() -{ - auto lock = getLock(); - shared->preload_throttler = settings.parts_preload_throttler == 0 ? nullptr : std::make_shared(settings.parts_preload_throttler); -} - -ThrottlerPtr Context::tryGetPreloadThrottler() const -{ - auto lock = getLock(); - return shared->preload_throttler; -} - bool Context::hasDistributedDDL() const { return getConfigRef().has("distributed_ddl"); @@ -3160,7 +3170,7 @@ bool Context::hasDistributedDDL() const void Context::setDDLWorker(std::unique_ptr ddl_worker) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->ddl_worker) throw Exception("DDL background thread has already been initialized", ErrorCodes::LOGICAL_ERROR); ddl_worker->startup(); @@ -3169,7 +3179,7 @@ void Context::setDDLWorker(std::unique_ptr ddl_worker) DDLWorker & Context::getDDLWorker() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->ddl_worker) { if (!hasZooKeeper()) @@ -3468,8 +3478,8 @@ InterserverCredentialsPtr Context::getInterserverCredentials() std::pair Context::getCnchInterserverCredentials() const { - auto lock = getLock(); String user_name = getSettingsRef().username_for_internal_communication.toString(); + auto lock = getLock(); // checked auto password = shared->users_config->getString("users." + user_name + ".password", ""); return {user_name, password}; @@ -3576,8 +3586,6 @@ UInt16 Context::getTCPPort() const if (auto env_port = getPortFromEnvForConsul("PORT0")) return env_port; - auto lock = getLock(); - const auto & config = getConfigRef(); return config.getInt("tcp_port", DBMS_DEFAULT_PORT); } @@ -3599,8 +3607,6 @@ UInt16 Context::getTCPPort(const String & host, UInt16 rpc_port) const std::optional Context::getTCPPortSecure() const { - auto lock = getLock(); - const auto & config = getConfigRef(); if (config.has("tcp_port_secure")) return config.getInt("tcp_port_secure"); @@ -3623,7 +3629,6 @@ UInt16 Context::getServerPort(const String & port_name) const UInt16 Context::getHaTCPPort() const { - auto lock = getLock(); const auto & config = getConfigRef(); return config.getInt("ha_tcp_port"); } @@ -3720,7 +3725,7 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrsystem_logs = std::make_unique(getGlobalContext(), getConfigRef()); } @@ -3747,7 +3752,7 @@ PartitionSelectorPtr Context::getBGPartitionSelector() const std::shared_ptr Context::getQueryLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3758,7 +3763,7 @@ std::shared_ptr Context::getQueryLog() const std::shared_ptr Context::getQueryThreadLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3769,7 +3774,7 @@ std::shared_ptr Context::getQueryThreadLog() const std::shared_ptr Context::getQueryExchangeLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3780,7 +3785,7 @@ std::shared_ptr Context::getQueryExchangeLog() const std::shared_ptr Context::getPartLog(const String & part_database) const { - auto lock = getLock(); + auto lock = getLock(); // checked /// No part log or system logs are shutting down. if (!shared->system_logs) @@ -3798,7 +3803,7 @@ std::shared_ptr Context::getPartLog(const String & part_database) const std::shared_ptr Context::getPartMergeLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs || !shared->system_logs->part_merge_log) return {}; @@ -3809,7 +3814,7 @@ std::shared_ptr Context::getPartMergeLog() const std::shared_ptr Context::getServerPartLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs || !shared->system_logs->server_part_log) return {}; @@ -3821,7 +3826,7 @@ void Context::initializeCnchSystemLogs() { if ((shared->server_type != ServerType::cnch_server) && (shared->server_type != ServerType::cnch_worker)) return; - auto lock = getLock(); + auto lock = getLock(); // checked shared->cnch_system_logs = std::make_unique(getGlobalContext()); } @@ -3836,7 +3841,7 @@ void Context::insertViewRefreshTaskLog(const ViewRefreshTaskLogElement & element std::shared_ptr Context::getCnchQueryLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_system_logs) return {}; @@ -3846,7 +3851,7 @@ std::shared_ptr Context::getCnchQueryLog() const std::shared_ptr Context::getViewRefreshTaskLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_system_logs) return {}; @@ -3856,7 +3861,7 @@ std::shared_ptr Context::getViewRefreshTaskLog() const std::shared_ptr Context::getTraceLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3867,7 +3872,7 @@ std::shared_ptr Context::getTraceLog() const std::shared_ptr Context::getTextLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3878,7 +3883,7 @@ std::shared_ptr Context::getTextLog() const std::shared_ptr Context::getMetricLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3889,7 +3894,7 @@ std::shared_ptr Context::getMetricLog() const std::shared_ptr Context::getAsynchronousMetricLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3900,7 +3905,7 @@ std::shared_ptr Context::getAsynchronousMetricLog() const std::shared_ptr Context::getOpenTelemetrySpanLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3910,7 +3915,7 @@ std::shared_ptr Context::getOpenTelemetrySpanLog() const std::shared_ptr Context::getKafkaLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3920,7 +3925,7 @@ std::shared_ptr Context::getKafkaLog() const std::shared_ptr Context::getCloudKafkaLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_system_logs) return {}; @@ -3929,7 +3934,7 @@ std::shared_ptr Context::getCloudKafkaLog() const std::shared_ptr Context::getCloudMaterializedMySQLLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_system_logs) return {}; @@ -3938,7 +3943,7 @@ std::shared_ptr Context::getCloudMaterializedMySQLLog std::shared_ptr Context::getCloudUniqueTableLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_system_logs) return {}; @@ -3947,7 +3952,7 @@ std::shared_ptr Context::getCloudUniqueTableLog() const std::shared_ptr Context::getMutationLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3958,7 +3963,7 @@ std::shared_ptr Context::getMutationLog() const std::shared_ptr Context::getProcessorsProfileLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3968,7 +3973,7 @@ std::shared_ptr Context::getProcessorsProfileLog() const std::shared_ptr Context::getRemoteReadLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3978,7 +3983,7 @@ std::shared_ptr Context::getRemoteReadLog() const std::shared_ptr Context::getZooKeeperLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3988,7 +3993,7 @@ std::shared_ptr Context::getZooKeeperLog() const std::shared_ptr Context::getAutoStatsTaskLog() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->system_logs) return {}; @@ -3998,12 +4003,12 @@ std::shared_ptr Context::getAutoStatsTaskLog() const CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->compression_codec_selector) { constexpr auto config_name = "compression"; - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); if (config.has(config_name)) shared->compression_codec_selector = std::make_unique(config, "compression"); @@ -4104,11 +4109,11 @@ void Context::updateStorageConfiguration(Poco::Util::AbstractConfiguration & con const CnchHiveSettings & Context::getCnchHiveSettings() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnchhive_settings) { - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); CnchHiveSettings cnchhive_settings; cnchhive_settings.loadFromConfig("hive", config); shared->cnchhive_settings.emplace(cnchhive_settings); @@ -4119,11 +4124,11 @@ const CnchHiveSettings & Context::getCnchHiveSettings() const const CnchHiveSettings & Context::getCnchLasSettings() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->las_settings) { - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); CnchHiveSettings las_settings; las_settings.loadFromConfig("las", config); shared->las_settings.emplace(las_settings); @@ -4133,11 +4138,11 @@ const CnchHiveSettings & Context::getCnchLasSettings() const const MergeTreeSettings & Context::getMergeTreeSettings(bool skip_unknown_settings) const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->merge_tree_settings) { - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); MergeTreeSettings mt_settings; mt_settings.loadFromConfig("merge_tree", config, skip_unknown_settings); shared->merge_tree_settings.emplace(mt_settings); @@ -4148,11 +4153,11 @@ const MergeTreeSettings & Context::getMergeTreeSettings(bool skip_unknown_settin const CnchFileSettings & Context::getCnchFileSettings() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->cnch_file_settings) { - auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); shared->cnch_file_settings.emplace(); shared->cnch_file_settings->loadFromConfig("cnch_file", config); } @@ -4162,11 +4167,11 @@ const CnchFileSettings & Context::getCnchFileSettings() const const MergeTreeSettings & Context::getReplicatedMergeTreeSettings() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->replicated_merge_tree_settings) { - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); MergeTreeSettings mt_settings; mt_settings.loadFromConfig("merge_tree", config); mt_settings.loadFromConfig("replicated_merge_tree", config); @@ -4179,11 +4184,11 @@ const MergeTreeSettings & Context::getReplicatedMergeTreeSettings() const const StorageS3Settings & Context::getStorageS3Settings() const { #if !defined(ARCADIA_BUILD) - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->storage_s3_settings) { - const auto & config = getConfigRef(); + const auto & config = getConfigRefWithLock(lock); shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); } @@ -4326,7 +4331,7 @@ OutputFormatPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, time_t Context::getUptimeSeconds() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->uptime_watch.elapsedSeconds(); } @@ -4471,7 +4476,7 @@ void Context::setQueryParameter(const String & name, const String & value) void Context::addBridgeCommand(std::unique_ptr cmd) const { - auto lock = getLock(); + auto lock = getLock(); // checked shared->bridge_commands.emplace_back(std::move(cmd)); } @@ -4490,7 +4495,7 @@ const IHostContextPtr & Context::getHostContext() const std::shared_ptr Context::getActionLocksManager() { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->action_locks_manager) shared->action_locks_manager = std::make_shared(shared_from_this()); @@ -4583,7 +4588,7 @@ StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where StorageID resolved = StorageID::createEmpty(); std::optional exc; { - auto lock = getLock(); + auto lock = getLock(); // checked resolved = resolveStorageIDImpl(std::move(storage_id), where, &exc); } if (exc) @@ -4604,7 +4609,7 @@ StorageID Context::tryResolveStorageID(StorageID storage_id, StorageNamespace wh StorageID resolved = StorageID::createEmpty(); { - auto lock = getLock(); + auto lock = getLock(); // checked resolved = resolveStorageIDImpl(std::move(storage_id), where, nullptr); } if (resolved && !resolved.hasUUID() && resolved.database_name != DatabaseCatalog::TEMPORARY_DATABASE) @@ -4721,7 +4726,7 @@ ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const PartUUIDsPtr Context::getPartUUIDs() const { - auto lock = getLock(); + auto lock = getLocalLock(); // checked if (!part_uuids) /// For context itself, only this initialization is not const. /// We could have done in constructor. @@ -4747,7 +4752,7 @@ void Context::setReadTaskCallback(ReadTaskCallback && callback) PartUUIDsPtr Context::getIgnoredPartUUIDs() const { - auto lock = getLock(); + auto lock = getLocalLock(); // checked if (!ignored_part_uuids) const_cast(ignored_part_uuids) = std::make_shared(); @@ -4805,13 +4810,13 @@ void Context::setLasfsConnectionParams(const Poco::Util::AbstractConfiguration & void Context::setVETosConnectParams(const VETosConnectionParams & connect_params) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->vetos_connection_params = connect_params; } const VETosConnectionParams & Context::getVETosConnectParams() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->vetos_connection_params; } @@ -4827,7 +4832,7 @@ const OSSConnectionParams & Context::getOSSConnectParams() const void Context::setUniqueKeyIndexBlockCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->unique_key_index_block_cache) throw Exception("Unique key index block cache has been already created", ErrorCodes::LOGICAL_ERROR); shared->unique_key_index_block_cache = IndexFile::NewLRUCache(cache_size_in_bytes); @@ -4835,13 +4840,13 @@ void Context::setUniqueKeyIndexBlockCache(size_t cache_size_in_bytes) UniqueKeyIndexBlockCachePtr Context::getUniqueKeyIndexBlockCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->unique_key_index_block_cache; } void Context::setUniqueKeyIndexFileCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->unique_key_index_file_cache) throw Exception("Unique key index file cache has been already created", ErrorCodes::LOGICAL_ERROR); shared->unique_key_index_file_cache = std::make_shared(*this, cache_size_in_bytes); @@ -4849,13 +4854,13 @@ void Context::setUniqueKeyIndexFileCache(size_t cache_size_in_bytes) UniqueKeyIndexFileCachePtr Context::getUniqueKeyIndexFileCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->unique_key_index_file_cache; } void Context::setUniqueKeyIndexCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->unique_key_index_cache) throw Exception("Unique key index cache has been already created", ErrorCodes::LOGICAL_ERROR); shared->unique_key_index_cache = std::make_shared(cache_size_in_bytes); @@ -4863,13 +4868,13 @@ void Context::setUniqueKeyIndexCache(size_t cache_size_in_bytes) std::shared_ptr Context::getUniqueKeyIndexCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->unique_key_index_cache; } void Context::setDeleteBitmapCache(size_t cache_size_in_bytes) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->delete_bitmap_cache) throw Exception("Delete bitmap cache has been already created", ErrorCodes::LOGICAL_ERROR); shared->delete_bitmap_cache = std::make_shared(cache_size_in_bytes); @@ -4877,7 +4882,7 @@ void Context::setDeleteBitmapCache(size_t cache_size_in_bytes) DeleteBitmapCachePtr Context::getDeleteBitmapCache() const { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->delete_bitmap_cache; } @@ -4952,12 +4957,12 @@ void Context::setMetaCheckerStatus(bool stop) shared->stop_sync = stop; } -void Context::setChecksumsCache(const ChecksumsCacheSettings & settings) +void Context::setChecksumsCache(const ChecksumsCacheSettings & settings_) { if (shared->checksums_cache) throw Exception("Checksums cache has been already created.", ErrorCodes::LOGICAL_ERROR); - shared->checksums_cache = std::make_shared(settings); + shared->checksums_cache = std::make_shared(settings_); } std::shared_ptr Context::getChecksumsCache() const @@ -4965,12 +4970,12 @@ std::shared_ptr Context::getChecksumsCache() const return shared->checksums_cache; } -void Context::setGinIndexStoreFactory(const GinIndexStoreCacheSettings & settings) +void Context::setGinIndexStoreFactory(const GinIndexStoreCacheSettings & settings_) { if (shared->ginindex_store_factory) throw Exception("ginindex_store_factory has been already created.", ErrorCodes::LOGICAL_ERROR); - shared->ginindex_store_factory = std::make_shared(settings); + shared->ginindex_store_factory = std::make_shared(settings_); } std::shared_ptr Context::getGinIndexStoreFactory() const @@ -5091,7 +5096,7 @@ UInt64 Context::getPhysicalTimestamp() const void Context::setPartCacheManager() { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->cache_manager) throw Exception("Part cache manager has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -5101,7 +5106,8 @@ void Context::setPartCacheManager() PartCacheManagerPtr Context::getPartCacheManager() const { - auto lock = getLock(); + /// no need to lock because PartCacheManager is initialized during server start up, + /// there is no concurrent setPartCacheManager and getPartCacheManager usage. return shared->cache_manager; } @@ -5138,7 +5144,7 @@ DaemonManagerClientPtr Context::getDaemonManagerClient() const void Context::setCnchServerManager(const Poco::Util::AbstractConfiguration & config) { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->server_manager) throw Exception("Server manager has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -5147,7 +5153,7 @@ void Context::setCnchServerManager(const Poco::Util::AbstractConfiguration & con std::shared_ptr Context::getCnchServerManager() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->server_manager) throw Exception("Server manager is not initiailized.", ErrorCodes::LOGICAL_ERROR); @@ -5158,7 +5164,7 @@ void Context::updateServerVirtualWarehouses(const ConfigurationPtr & config) { std::shared_ptr server_manager; { - auto lock = getLock(); + auto lock = getLock(); // checked server_manager = shared->server_manager; } if (server_manager) @@ -5167,7 +5173,7 @@ void Context::updateServerVirtualWarehouses(const ConfigurationPtr & config) void Context::setCnchTopologyMaster() { - auto lock = getLock(); + auto lock = getLock(); // checked if (shared->topology_master) throw Exception("Topology master has been already created.", ErrorCodes::LOGICAL_ERROR); @@ -5176,7 +5182,7 @@ void Context::setCnchTopologyMaster() std::shared_ptr Context::getCnchTopologyMaster() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->topology_master) throw Exception("Topology master is not initialized.", ErrorCodes::LOGICAL_ERROR); @@ -5185,7 +5191,7 @@ std::shared_ptr Context::getCnchTopologyMaster() const GlobalTxnCommitterPtr Context::getGlobalTxnCommitter() const { - auto lock = getLock(); + auto lock = getLock(); // checked if (!shared->global_txn_committer) shared->global_txn_committer = std::make_shared(shared_from_this()); return shared->global_txn_committer; @@ -5427,7 +5433,7 @@ void Context::initResourceManagerClient() String host_port; try { - auto lock = getLock(); + auto lock = getLock(); // checked shared->rm_client = std::make_shared(getGlobalContext()); LOG_DEBUG(shared->log, "Initialised Resource Manager Client on try: {}", retry_count); return; @@ -5449,7 +5455,7 @@ ResourceManagerClientPtr Context::getResourceManagerClient() const void Context::initCnchBGThreads() { - auto lock = getLock(); + auto lock = getLock(); // checked shared->cnch_bg_threads_array = std::make_unique(shared_from_this()); } @@ -5606,14 +5612,13 @@ std::multimap Context::collectMutationStatus void Context::initCnchTransactionCoordinator() { - auto lock = getLock(); - + auto lock = getLock(); // checked shared->cnch_txn_coordinator = std::make_unique(shared_from_this()); } TransactionCoordinatorRcCnch & Context::getCnchTransactionCoordinator() const { - auto lock = getLock(); + auto lock = getLock(); // checked return *shared->cnch_txn_coordinator; } @@ -5621,7 +5626,7 @@ void Context::setCurrentTransaction(TransactionCnchPtr txn, bool finish_txn) { TransactionCnchPtr prev_txn; { - auto lock = getLock(); + auto lock = getLocalSharedLock(); prev_txn = current_cnch_txn; } @@ -5631,7 +5636,7 @@ void Context::setCurrentTransaction(TransactionCnchPtr txn, bool finish_txn) if (current_thread && txn) CurrentThread::get().setTransactionId(txn->getTransactionID()); - auto lock = getLock(); + auto lock = getLocalLock(); current_cnch_txn = std::move(txn); } @@ -5654,28 +5659,26 @@ TransactionCnchPtr Context::setTemporaryTransaction(const TxnTimestamp & txn_id, else cnch_txn = std::make_shared(getGlobalContext(), txn_id, primary_txn_id); - auto lock = getLock(); + auto lock = getLocalLock(); std::swap(current_cnch_txn, cnch_txn); return current_cnch_txn; } TransactionCnchPtr Context::getCurrentTransaction() const { - auto lock = getLock(); - + auto lock = getLocalSharedLock(); return current_cnch_txn; } TxnTimestamp Context::tryGetCurrentTransactionID() const { - auto lock = getLock(); - + auto lock = getLocalSharedLock(); return current_cnch_txn ? current_cnch_txn->getTransactionID() : TxnTimestamp{}; } TxnTimestamp Context::getCurrentTransactionID() const { - auto lock = getLock(); + auto lock = getLocalSharedLock(); if (!current_cnch_txn) throw Exception("Transaction is not set (empty)", ErrorCodes::LOGICAL_ERROR); @@ -5689,11 +5692,9 @@ TxnTimestamp Context::getCurrentTransactionID() const TxnTimestamp Context::getCurrentCnchStartTime() const { - auto lock = getLock(); - + auto lock = getLocalSharedLock(); if (!current_cnch_txn) throw Exception("Transaction is not set", ErrorCodes::LOGICAL_ERROR); - return current_cnch_txn->getStartTime(); } @@ -5808,7 +5809,7 @@ void Context::createOptimizerMetrics() std::shared_ptr Context::getStatisticsMemoryStore() { - auto lock = getLock(); + auto lock = getLocalLock(); if (!this->stats_memory_store) { this->stats_memory_store = std::make_shared(); @@ -5880,25 +5881,25 @@ void Context::waitReadFromClientFinished() const void Context::setPlanCacheManager(std::unique_ptr && manager) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->plan_cache_manager = std::move(manager); } PlanCacheManager* Context::getPlanCacheManager() { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->plan_cache_manager ? shared->plan_cache_manager.get() : nullptr; } void Context::setPreparedStatementManager(std::unique_ptr && manager) { - auto lock = getLock(); + auto lock = getLock(); // checked shared->prepared_statement_manager = std::move(manager); } PreparedStatementManager * Context::getPreparedStatementManager() { - auto lock = getLock(); + auto lock = getLock(); // checked return shared->prepared_statement_manager ? shared->prepared_statement_manager.get() : nullptr; } @@ -5927,16 +5928,12 @@ void Context::setQueryExpirationTimeStamp() AsynchronousReaderPtr Context::getThreadPoolReader() const { - auto lock = getLock(); - - if (!shared->asynchronous_remote_fs_reader) - { + callOnce(shared->readers_initialized, [&] { const Poco::Util::AbstractConfiguration & config = getConfigRef(); auto pool_size = config.getUInt(".threadpool_remote_fs_reader_pool_size", 250); auto queue_size = config.getUInt(".threadpool_remote_fs_reader_queue_size", 1000000); shared->asynchronous_remote_fs_reader = std::make_shared(pool_size, queue_size); - } - + }); return shared->asynchronous_remote_fs_reader; } } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 66901b9b3d0..4158bc02b48 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -183,6 +184,7 @@ class VWResourceGroupManager; class Credentials; class GSSAcceptorContext; struct SettingsConstraintsAndProfileIDs; +struct SettingsProfilesInfo; class RemoteHostFilter; struct StorageID; class IDisk; @@ -421,15 +423,13 @@ class CopyableAtomic : public std::atomic } }; -/** A set of known objects that can be used in the query. - * Consists of a shared part (always common to all sessions and queries) - * and copied part (which can be its own for each session or query). - * - * Everything is encapsulated for all sorts of checks and locks. - */ -class Context : public std::enable_shared_from_this +class ContextData { -private: +protected: + /// Use copy constructor or createGlobal() instead + ContextData(); + ContextData(const ContextData &); + ContextSharedPart * shared; ClientInfo client_info; @@ -579,13 +579,12 @@ class Context : public std::enable_shared_from_this bool enable_worker_fault_tolerance = false; timespec query_expiration_timestamp{}; + public: // Top-level OpenTelemetry trace context for the query. Makes sense only for a query context. OpenTelemetryTraceContext query_trace_context; -private: - friend struct NamedCnchSession; - +protected: using SampleBlockCache = std::unordered_map; mutable SampleBlockCache sample_block_cache; @@ -627,16 +626,36 @@ class Context : public std::enable_shared_from_this ExceptionHandlerPtr plan_segment_ex_handler = nullptr; bool read_from_client_finished = false; - bool is_explain_query = false; + int step_id = 2000; + int rule_id = 3000; + String graphviz_sub_query_path; + int sub_query_id = 0; + bool has_tenant_id_in_username = false; + String tenant_id; + String current_catalog; +}; + +/** A set of known objects that can be used in the query. + * Consists of a shared part (always common to all sessions and queries) + * and copied part (which can be its own for each session or query). + * + * Everything is encapsulated for all sorts of checks and locks. + */ +class Context : public ContextData, public std::enable_shared_from_this +{ +private: + /// ContextData mutex + mutable SharedMutex mutex; Context(); Context(const Context &); - Context & operator=(const Context &); public: + friend struct NamedCnchSession; + /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared); + static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); static ContextMutablePtr createCopy(const ContextWeakPtr & other); static ContextMutablePtr createCopy(const ContextMutablePtr & other); static ContextMutablePtr createCopy(const ContextPtr & other); @@ -645,8 +664,6 @@ class Context : public std::enable_shared_from_this void addSessionView(StorageID view_table_id, StoragePtr view_storage); StoragePtr getSessionView(StorageID view_table_id); - void copyFrom(const ContextPtr & other); - ~Context(); void setExtendedProfileInfo(const ExtendedProfileInfo & source) const; @@ -710,6 +727,7 @@ class Context : public std::enable_shared_from_this /// Global application configuration settings. void setConfig(const ConfigurationPtr & config); const Poco::Util::AbstractConfiguration & getConfigRef() const; + const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::unique_lock &) const; void initRootConfig(const Poco::Util::AbstractConfiguration & poco_config); const RootConfiguration & getRootConfig() const; @@ -941,7 +959,6 @@ class Context : public std::enable_shared_from_this void setSetting(const StringRef & name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes, bool internal = true); - void applySettingsChanges(const JSON & changes); /// Checks the constraints. void checkSettingsConstraints(const SettingChange & change) const; @@ -1245,13 +1262,9 @@ class Context : public std::enable_shared_from_this BackgroundSchedulePool & getDistributedSchedulePool() const; BackgroundSchedulePool & getConsumeSchedulePool() const; - BackgroundSchedulePool & getRestartSchedulePool() const; - BackgroundSchedulePool & getHaLogSchedulePool() const; - BackgroundSchedulePool & getMutationSchedulePool() const; BackgroundSchedulePool & getLocalSchedulePool() const; BackgroundSchedulePool & getMergeSelectSchedulePool() const; BackgroundSchedulePool & getUniqueTableSchedulePool() const; - BackgroundSchedulePool & getMemoryTableSchedulePool() const; BackgroundSchedulePool & getTopologySchedulePool() const; BackgroundSchedulePool & getMetricsRecalculationSchedulePool() const; /// no more get pool method, use getExtraSchedulePool @@ -1259,13 +1272,10 @@ class Context : public std::enable_shared_from_this SchedulePool::Type pool_type, SettingFieldUInt64 pool_size, CurrentMetrics::Metric metric, const char * name) const; ThrottlerPtr getDiskCacheThrottler() const; - + ThrottlerPtr tryGetPreloadThrottler() const; ThrottlerPtr getReplicatedFetchesThrottler() const; ThrottlerPtr getReplicatedSendsThrottler() const; - void initPreloadThrottler(); - ThrottlerPtr tryGetPreloadThrottler() const; - /// Has distributed_ddl configuration or not. bool hasDistributedDDL() const; void setDDLWorker(std::unique_ptr ddl_worker); @@ -1440,17 +1450,14 @@ class Context : public std::enable_shared_from_this UInt32 nextNodeId() { return id_allocator->nextId(); } void createPlanNodeIdAllocator(int max_id = 1); - int step_id = 2000; int getStepId() const { return step_id; } void setStepId(int step_id_) { step_id = step_id_; } int getAndIncStepId() { return ++step_id; } - int rule_id = 3000; int getRuleId() const { return rule_id; } void setRuleId(int rule_id_) { rule_id = rule_id_; } void incRuleId() { ++rule_id; } - String graphviz_sub_query_path; void setExecuteSubQueryPath(String path) { graphviz_sub_query_path = std::move(path); } String getExecuteSubQueryPath() const { @@ -1461,7 +1468,6 @@ class Context : public std::enable_shared_from_this graphviz_sub_query_path = ""; } - int sub_query_id = 0; int incAndGetSubQueryId() { return ++sub_query_id; } const SymbolAllocatorPtr & getSymbolAllocator() { return symbol_allocator; } @@ -1525,10 +1531,10 @@ class Context : public std::enable_shared_from_this return settings.default_catalog.toString(); } - void setChecksumsCache(const ChecksumsCacheSettings & settings); + void setChecksumsCache(const ChecksumsCacheSettings & settings_); std::shared_ptr getChecksumsCache() const; - void setGinIndexStoreFactory(const GinIndexStoreCacheSettings & settings); + void setGinIndexStoreFactory(const GinIndexStoreCacheSettings & settings_); std::shared_ptr getGinIndexStoreFactory() const; void setPrimaryIndexCache(size_t cache_size_in_bytes); @@ -1704,16 +1710,28 @@ class Context : public std::enable_shared_from_this bool is_tenant_user() const { return has_tenant_id_in_username; } private: - bool has_tenant_id_in_username = false; - String tenant_id; - String current_catalog; std::unique_lock getLock() const; + std::unique_lock getLocalLock() const; + std::shared_lock getLocalSharedLock() const; void initGlobal(); /// Compute and set actual user settings, client_info.current_user should be set - void calculateAccessRights(); + void calculateAccessRightsWithLock(const std::unique_lock &); + + void setCurrentProfileWithLock(const String & profile_name, const std::unique_lock & lock); + void setCurrentProfileWithLock(const UUID & profile_id, const std::unique_lock & lock); + void setCurrentProfileWithLock(const SettingsProfilesInfo & profiles_info, const std::unique_lock & lock); + void setSettingWithLock(const StringRef & name, const String & value, const std::unique_lock & lock); + void setSettingWithLock(const StringRef & name, const Field & value, const std::unique_lock & lock); + void applySettingChangeWithLock(const SettingChange & change, const std::unique_lock & lock); + void applySettingsChangesWithLock(const SettingsChanges & changes, bool internal, const std::unique_lock & lock); + std::shared_ptr getSettingsConstraintsAndCurrentProfilesWithLock() const; + void checkSettingsConstraintsWithLock(const SettingChange & change) const; + void checkSettingsConstraintsWithLock(const SettingsChanges & changes) const; + void checkSettingsConstraintsWithLock(SettingsChanges & changes) const; + void clampToSettingsConstraintsWithLock(SettingsChanges & changes) const; template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/Context_fwd.h b/src/Interpreters/Context_fwd.h index 4d1a56cd4fa..7674de6804c 100644 --- a/src/Interpreters/Context_fwd.h +++ b/src/Interpreters/Context_fwd.h @@ -13,13 +13,9 @@ namespace SchedulePool enum Type { Consume, - Restart, - HaLog, - Mutation, Local, MergeSelect, UniqueTable, - MemoryTable, CNCHTopology, PartsMetrics, BspGC, diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 6d1b5646812..78232a92a1f 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -494,7 +494,9 @@ void HTTPHandler::processQuery( session = context->acquireNamedSession(session_id, session_timeout, session_check == "1"); - context->copyFrom(session->context); /// FIXME: maybe move this part to HandleRequest(), copyFrom() is used only here. + /// FIXME: maybe move this part to HandleRequest() + /// see also https://github.com/ClickHouse/ClickHouse/pull/26864 + context = Context::createCopy(session->context); context->setSessionContext(session->context); } From e564a323d26e9bb98b9b217e2c49b26dd9a41c6b Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:29:50 +0000 Subject: [PATCH 013/292] update ci to work on release/1.0.x branch --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cc84bdf392..0501522c019 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the "main" branch push: - branches: [ "master" , "release/0.4.x" ] + branches: [ "master" , "release/1.0.x" ] pull_request: - branches: [ "master" , "release/0.4.x" ] + branches: [ "master" , "release/1.0.x" ] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 5dad9c23d0daec107bcb7bceb73bfd21895fa948 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:31:05 +0000 Subject: [PATCH 014/292] Merge branch 'cherry-pick-bbe0f585' into 'cnch-2.2' fix(clickhousech@m-4618741564): add some more whitelist setting and fix a bug for setting control See merge request dp/ClickHouse!22551 --- src/Common/SettingsChanges.cpp | 39 ++++++++++++++++++++++++++++++++++ src/Interpreters/Context.cpp | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp index 5ab86fe2c26..e8c585130de 100644 --- a/src/Common/SettingsChanges.cpp +++ b/src/Common/SettingsChanges.cpp @@ -243,6 +243,7 @@ std::unordered_set SettingsChanges::WHITELIST_SETTINGS = "enable_deterministic_sample_by_range", "enable_dictionary_compression", "enable_direct_insert", + "enable_distinct_remove", "enable_distributed_stages", "enable_dynamic_filter", "enable_final_for_delta", @@ -346,6 +347,15 @@ std::unordered_set SettingsChanges::WHITELIST_SETTINGS = "kafka_max_partition_fetch_bytes", "kafka_session_timeout_ms", "kms_token", + "lasfs_access_key", + "lasfs_endpoint", + "lasfs_identity_id", + "lasfs_identity_type", + "lasfs_overwrite", + "lasfs_region", + "lasfs_secret_key", + "lasfs_service_name", + "lasfs_session_token", "load_balancing_offset", "local_disk_cache_thread_pool_size", "log_id", @@ -490,6 +500,7 @@ std::unordered_set SettingsChanges::WHITELIST_SETTINGS = "preload_checksums_and_primary_index_cache", "priority", "process_list_block_time", + "profile", "query_auto_retry", "query_auto_retry_millisecond", "query_cache_min_lifetime", @@ -511,6 +522,27 @@ std::unordered_set SettingsChanges::WHITELIST_SETTINGS = "restore_table_expression_in_distributed", "result_overflow_mode", "rm_zknodes_while_alter_engine", + "s3_access_key_id", + "s3_access_key_secret", + "s3_ak_id", + "s3_ak_secret", + "s3_check_objects_after_upload", + "s3_endpoint", + "s3_gc_inter_partition_parallelism", + "s3_gc_intra_partition_parallelism", + "s3_max_connections", + "s3_max_list_nums", + "s3_max_redirects", + "s3_max_request_ms", + "s3_max_single_part_upload_size", + "s3_max_single_read_retries", + "s3_max_unexpected_write_error_retries", + "s3_min_upload_part_size", + "s3_region", + "s3_skip_empty_files", + "s3_upload_part_size_multiply_factor", + "s3_upload_part_size_multiply_parts_count_threshold", + "s3_use_virtual_hosted_style", "schedule_sync_thread_per_table", "select_sequential_consistency", "send_logs_level", @@ -537,6 +569,13 @@ std::unordered_set SettingsChanges::WHITELIST_SETTINGS = "tcp_keep_alive_timeout", "tealimit_order_keep", "timeout_before_checking_execution_speed", + "tos_access_key", + "tos_connection_timeout", + "tos_endpoint", + "tos_region", + "tos_request_timeout", + "tos_secret_key", + "tos_security_token", "totals_auto_threshold", "totals_mode", "underlying_dictionary_tables", diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ed413aeadfb..f2547e0ad49 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2182,7 +2182,7 @@ void Context::applySettingsChangesWithLock(const SettingsChanges & changes, bool // NOTE: tenanted users connect to server using tenant id given in connection info. // allow only whitelisted settings for tenanted users - if (is_tenant_user() && !internal && !isInternalQuery () && getIsRestrictSettingsToWhitelist() && getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY && !getCurrentTenantId().empty()) + if (is_tenant_user() && !internal && !isInternalQuery () && getIsRestrictSettingsToWhitelist() && (getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY || session_context.lock().get() == this) && !getCurrentTenantId().empty()) { for (const auto & change : changes) { From 2677368ee395ddd91b2f9c8877fbe6e930063de4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:31:26 +0000 Subject: [PATCH 015/292] Merge branch 'cherry-pick-32009262' into 'cnch-2.2' fix(clickhousech@m-4616285035): support muti-tenant "processes" system table and kill query See merge request dp/ClickHouse!22550 --- src/Access/ContextAccess.cpp | 1 + src/Interpreters/InterpreterKillQueryQuery.cpp | 18 +++++++++++------- src/Storages/System/StorageSystemProcesses.cpp | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 41d8393d500..4e0919d3d52 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -74,6 +74,7 @@ namespace "current_roles", "enabled_roles", "quota_usage", + "processes", /// The following tables hide some rows if the current user doesn't have corresponding SHOW privileges. /// For IDE tools to get schema info diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 106b7eae603..05065290968 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -45,6 +45,7 @@ #include #include #include +#include namespace DB @@ -121,24 +122,26 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce }; String query_user; + const auto & tenant_id = getCurrentTenantId(); + const auto & nontenant_client_user = getOriginalEntityName(my_client.current_user, tenant_id); for (size_t i = 0; i < num_processes; ++i) { if ((my_client.current_query_id == query_id_col.getDataAt(i).toString()) - && (my_client.current_user == user_col.getDataAt(i).toString())) + && (nontenant_client_user == user_col.getDataAt(i).toString())) continue; auto query_id = query_id_col.getDataAt(i).toString(); query_user = user_col.getDataAt(i).toString(); - if ((my_client.current_user != query_user) && !is_kill_query_granted()) + if ((nontenant_client_user != query_user) && !is_kill_query_granted()) continue; res.emplace_back(std::move(query_id), query_user, i, false); } if (res.empty() && access_denied) - throw Exception("User " + my_client.current_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); + throw Exception("User " + nontenant_client_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); return res; } @@ -225,6 +228,7 @@ BlockIO InterpreterKillQueryQuery::execute() { case ASTKillQueryQuery::Type::Query: { + auto context = getContext(); auto where_clause = DB::collectWhereORClausePredicate(query.where_expression, getContext()); String query_id; std::for_each(where_clause.begin(), where_clause.end(), [&query_id](const std::map & wheres) { @@ -233,13 +237,13 @@ BlockIO InterpreterKillQueryQuery::execute() query_id = iter->second.get(); }); if (!query_id.empty()) - getContext()->getQueueManager()->cancel(query_id); + context->getQueueManager()->cancel(query_id); Block processes_block = getSelectResult("query_id, user, query", "system.processes"); if (!processes_block) return res_io; - ProcessList & process_list = getContext()->getProcessList(); - QueryDescriptors queries_to_stop = extractQueriesExceptMeAndCheckAccess(processes_block, getContext()); + ProcessList & process_list = context->getProcessList(); + QueryDescriptors queries_to_stop = extractQueriesExceptMeAndCheckAccess(processes_block, context); auto header = processes_block.cloneEmpty(); header.insert(0, {ColumnString::create(), std::make_shared(), "kill_status"}); @@ -250,7 +254,7 @@ BlockIO InterpreterKillQueryQuery::execute() for (const auto & query_desc : queries_to_stop) { auto code = (query.test) ? CancellationCode::Unknown - : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true); + : process_list.sendCancelToQuery(query_desc.query_id, (context->is_tenant_user() ? formatTenantEntityName(query_desc.user) : query_desc.user), true); insertResultRow(query_desc.source_num, code, processes_block, header, res_columns); } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 828e1dba82f..469e5953f7c 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -105,19 +106,27 @@ NamesAndAliases StorageSystemProcesses::getNamesAndAliases() void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { ProcessList::Info info = context->getProcessList().getInfo(true, true, true); + auto tenant_id = getCurrentTenantId(); + if (!context->is_tenant_user()) + tenant_id.resize(0); for (const auto & process : info) { + if (!tenant_id.empty() && !isTenantMatchedEntityName(process.client_info.initial_user, tenant_id)) + { + continue; + } + size_t i = 0; res_columns[i++]->insert(process.client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY); - res_columns[i++]->insert(process.client_info.current_user); + res_columns[i++]->insert(getOriginalEntityName(process.client_info.current_user, tenant_id)); res_columns[i++]->insert(process.client_info.current_query_id); res_columns[i++]->insertData(IPv6ToBinary(process.client_info.current_address.host()).data(), 16); res_columns[i++]->insert(process.client_info.current_address.port()); - res_columns[i++]->insert(process.client_info.initial_user); + res_columns[i++]->insert(getOriginalEntityName(process.client_info.initial_user, tenant_id)); res_columns[i++]->insert(process.client_info.initial_query_id); res_columns[i++]->insertData(IPv6ToBinary(process.client_info.initial_address.host()).data(), 16); res_columns[i++]->insert(process.client_info.initial_address.port()); @@ -180,7 +189,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c } } - res_columns[i++]->insert(process.current_database); + res_columns[i++]->insert(getOriginalEntityName(process.current_database, tenant_id)); } } From 61e698cc6cffb84cd4d5ed5510f41f07961523be Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:33:07 +0000 Subject: [PATCH 016/292] Merge branch 'cherry-pick-b216c48c-3' into 'cnch-2.2' fix(clickhousech@m-4549422250): [cp cnch-2.2] fix possible memory leak by librdkafka See merge request dp/ClickHouse!22568 # Conflicts: # src/Storages/Kafka/StorageCloudKafka.cpp --- .../Kafka/CnchKafkaBlockInputStream.cpp | 15 +++++++++++++++ .../Kafka/CnchReadBufferFromKafkaConsumer.cpp | 18 ++++++++++++++++-- .../Kafka/CnchReadBufferFromKafkaConsumer.h | 16 ++++++++++++++++ src/Storages/Kafka/KafkaConsumer.cpp | 15 +-------------- src/Storages/Kafka/KafkaConsumer.h | 5 +++-- 5 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/Storages/Kafka/CnchKafkaBlockInputStream.cpp b/src/Storages/Kafka/CnchKafkaBlockInputStream.cpp index 9b909e3e696..84a34a81f78 100644 --- a/src/Storages/Kafka/CnchKafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/CnchKafkaBlockInputStream.cpp @@ -120,6 +120,21 @@ CnchKafkaBlockInputStream::~CnchKafkaBlockInputStream() cloud_kafka_log->add(kafka_skip_log); } + auto rdkafka_errors_buffer = read_buf->getRdkafkaErrorsBuffer(); + if (!rdkafka_errors_buffer.empty()) + { + for (const auto & err : rdkafka_errors_buffer) + { + auto kafka_error_log = storage.createKafkaLog(KafkaLogElement::EXCEPTION, consumer_index); + kafka_error_log.event_time = err.timestamp_usec; + kafka_error_log.last_exception = err.text; + kafka_error_log.has_error = true; + kafka_log->add(kafka_error_log); + if (cloud_kafka_log) + cloud_kafka_log->add(kafka_error_log); + } + } + IRowInputFormat * row_input = nullptr; if (!children.empty()) { diff --git a/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.cpp index 05f25ff3d7b..fd8ab7fd9fc 100644 --- a/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.cpp @@ -205,9 +205,21 @@ bool CnchReadBufferFromKafkaConsumer::nextImpl() /// thus here we don't need to check if the message has some error; /// Of course, we may get no message, e.g there are no more messages in the topic-partition now. auto new_message = consumer->poll(std::chrono::milliseconds(poll_timeout)); - if (!new_message) + if (!new_message || new_message.is_eof()) continue; + /// Must continue polling and handling the consumer queue even if the queue is filled with errors; + /// or the memory leak occurs because the consumer keeps fetching from brokers and fill the queue + if (auto error = new_message.get_error()) + { + ++rdkafka_errors; + if (consumer->is_serious_err(error)) + consumer->setDestroyed(); + + rdkafka_errors_buffer.push_back({"poll(): " + error.to_string(), static_cast(Poco::Timestamp().epochTime())}); + continue; + } + /// Get an available message, save it for committing current = std::move(new_message); read_messages += 1; @@ -276,7 +288,7 @@ bool CnchReadBufferFromKafkaConsumer::nextImpl() } /// This buffer/consumer has been expired if reached here - LOG_DEBUG(log, "Stalled. Polled {} messages", read_messages); + LOG_DEBUG(log, "Stalled. Polled {} messages and {} errors", read_messages, rdkafka_errors); stalled = true; return false; } @@ -292,6 +304,8 @@ void CnchReadBufferFromKafkaConsumer::reset() skipped_msgs_in_holes = 0; skipped_ofsets_hole.clear(); skip_messages_by_sample = 0; + rdkafka_errors = 0; + rdkafka_errors_buffer.clear(); } bool CnchReadBufferFromKafkaConsumer::hasExpired() diff --git a/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.h index 35f51e3c6ba..87ff92b46bc 100644 --- a/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.h +++ b/src/Storages/Kafka/CnchReadBufferFromKafkaConsumer.h @@ -24,6 +24,7 @@ #include #include +#include namespace DB { @@ -47,6 +48,13 @@ class CnchReadBufferFromKafkaConsumer : public ReadBuffer using Message = cppkafka::Message; public: + struct RdkafkaErrorInfo + { + String text; + UInt64 timestamp_usec; + }; + using RdkafkaErrorsBuffer = boost::circular_buffer; + CnchReadBufferFromKafkaConsumer( ConsumerPtr consumer_, const String & logger_name, @@ -64,6 +72,7 @@ class CnchReadBufferFromKafkaConsumer : public ReadBuffer , run(run_) , create_time(time(nullptr)) , enable_skip_offsets_hole(enable_skip_offsets_hole_) + , rdkafka_errors_buffer(ERRORS_DEPTH) { } @@ -91,6 +100,7 @@ class CnchReadBufferFromKafkaConsumer : public ReadBuffer size_t getSkippedMessagesBySampling() const { return skip_messages_by_sample; } size_t getCreateTime() const { return create_time; } size_t getAliveTime() const { return alive_time; } + auto getRdkafkaErrorsBuffer() const { return rdkafka_errors_buffer; } // Return values for the message that's being read. const Message & currentMessage() const { return current; } @@ -135,6 +145,12 @@ class CnchReadBufferFromKafkaConsumer : public ReadBuffer std::int64_t, PairHash> offsets; + /// The errors from rdkafka should not be diverse; + /// so we only need to record the recent ones if they occur + size_t rdkafka_errors{0}; + const size_t ERRORS_DEPTH = 10; + RdkafkaErrorsBuffer rdkafka_errors_buffer; + bool nextImpl() override; bool hasExpired(); void drain(); diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index de90c9654c5..ab1fe33b962 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes using namespace cppkafka; -inline bool is_serious_err(cppkafka::Error error) +bool KafkaConsumer::is_serious_err(cppkafka::Error error) const { auto && ec = error.get_error(); return ec == RD_KAFKA_RESP_ERR__TRANSPORT @@ -51,19 +51,6 @@ inline bool is_serious_err(cppkafka::Error error) throw DB::Exception(std::string(__func__) + "(): " + _e.what(), ErrorCodes::RDKAFKA_EXCEPTION); \ } -cppkafka::Message KafkaConsumer::poll(std::chrono::milliseconds timeout) -{ - auto message = cppkafka::Consumer::poll(timeout); - if (message && message.get_error()) - { - if (is_serious_err(message.get_error())) - this->is_destroyed = true; - - throw Exception("poll(): " + message.get_error().to_string(), ErrorCodes::RDKAFKA_EXCEPTION); - } - return message; -} - void KafkaConsumer::subscribe(const std::vector& topics) { exception_wrapper({ diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index ad6eb4fad95..cfa2e00bd32 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -26,8 +26,6 @@ class KafkaConsumer : public cppkafka::Consumer public: using cppkafka::Consumer::Consumer; - cppkafka::Message poll(std::chrono::milliseconds timeout); - void subscribe(const std::vector& topics); void unsubscribe(); void assign(const cppkafka::TopicPartitionList& topic_partitions); @@ -47,6 +45,9 @@ class KafkaConsumer : public cppkafka::Consumer const std::vector & get_cached_subscription() const { return cached_subscription; } const cppkafka::TopicPartitionList & get_cached_assignment() const { return cached_assignment; } bool check_destroyed() const { return is_destroyed; } + void setDestroyed() const { is_destroyed = true; } + + bool is_serious_err(cppkafka::Error error) const; private: std::vector cached_subscription; From e79d561aff641c3be678b6784ba03bcfbadc3ad4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:34:02 +0000 Subject: [PATCH 017/292] Merge 'cherry-pick-7e6f8db0' into 'cnch-2.2' fix(optimizer@m-3460292615): Prepared statement support RBAC and multi-tenant See merge request: !22561 # Conflicts: # src/Parsers/ASTIdentifier.cpp # src/Parsers/formatTenantDatabaseName.cpp --- src/Access/AccessType.h | 2 + src/Access/ContextAccess.cpp | 5 +- .../InterpreterDropPreparedStatementQuery.cpp | 6 +- .../InterpreterSelectQueryUseOptimizer.cpp | 18 +++-- .../InterpreterShowPreparedStatementQuery.cpp | 23 ++++++- .../PreparedStatementManager.cpp | 30 +++++--- .../PreparedStatementManager.h | 9 +-- src/Parsers/ASTIdentifier.cpp | 11 ++- src/Parsers/ASTIdentifier.h | 4 +- src/Parsers/ASTPreparedStatement.cpp | 68 ++++++++++++++++++- src/Parsers/ASTPreparedStatement.h | 13 ++++ src/Parsers/ParserPreparedStatement.cpp | 55 +++++++++++++-- src/Parsers/formatTenantDatabaseName.cpp | 19 +++++- src/Parsers/formatTenantDatabaseName.h | 4 +- .../48035_prepared_statement.reference | 42 ++++++++++++ .../48035_prepared_statement.sql | 36 ++++++++++ 16 files changed, 297 insertions(+), 48 deletions(-) diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index 95b9f447c98..1d31ee4869b 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -124,6 +124,7 @@ enum class AccessType implicitly enabled by the grant CREATE_TABLE on any table */ \ M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ M(CREATE_BINDING, "", GLOBAL, CREATE) /* allows to execute CREATE BINDING */ \ + M(CREATE_PREPARED_STATEMENT, "", GLOBAL, CREATE) /* allows to execute CREATE PREPARED STATEMENT */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */ \ @@ -133,6 +134,7 @@ enum class AccessType M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */ \ M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ M(DROP_BINDING, "", GLOBAL, DROP) /* allows to execute DROP BINDING */\ + M(DROP_PREPARED_STATEMENT, "", GLOBAL, DROP) /* allows to execute DROP PREPARED STATEMENT */ \ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */ \ \ M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 4e0919d3d52..f9eb6b48258 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -701,15 +701,16 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION; const AccessFlags binding_ddl = AccessType::CREATE_BINDING | AccessType::DROP_BINDING; + const AccessFlags prepared_statement_ddl = AccessType::CREATE_PREPARED_STATEMENT | AccessType::DROP_PREPARED_STATEMENT; const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; - const AccessFlags table_and_dictionary_and_function_ddl_and_binding = table_ddl | dictionary_ddl | function_ddl | binding_ddl; + const AccessFlags table_and_dictionary_and_function_ddl_and_binding = table_ddl | dictionary_ddl | function_ddl | binding_ddl | prepared_statement_ddl; const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_and_function_ddl_and_binding | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY; const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE; - const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl | binding_ddl; + const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl | binding_ddl | prepared_statement_ddl; const AccessFlags introspection_flags = AccessType::INTROSPECTION; }; static const PrecalculatedFlags precalc; diff --git a/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp b/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp index b47dfb17ae0..d6e29064efb 100644 --- a/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp +++ b/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp @@ -14,11 +14,15 @@ namespace ErrorCodes BlockIO InterpreterDropPreparedStatementQuery::execute() { + auto current_context = getContext(); + AccessRightsElements access_rights_elements; + access_rights_elements.emplace_back(AccessType::DROP_PREPARED_STATEMENT); + current_context->checkAccess(access_rights_elements); + const auto * drop = query_ptr->as(); if (!drop || drop->name.empty()) throw Exception("Drop Prepare logical error", ErrorCodes::LOGICAL_ERROR); - auto current_context = getContext(); // if (!drop->cluster.empty()) // return executeDDLQueryOnCluster(query_ptr, current_context); diff --git a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp index ef8a6422f8b..e05fbd4435d 100644 --- a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp +++ b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp @@ -693,6 +693,14 @@ void InterpreterSelectQueryUseOptimizer::buildQueryPlan(QueryPlanPtr & query_pla BlockIO InterpreterSelectQueryUseOptimizer::executeCreatePreparedStatementQuery() { + const auto & prepare = query_ptr->as(); + AccessRightsElements access_rights_elements; + access_rights_elements.emplace_back(AccessType::CREATE_PREPARED_STATEMENT); + + if (prepare.or_replace) + access_rights_elements.emplace_back(AccessType::DROP_PREPARED_STATEMENT); + context->checkAccess(access_rights_elements); + auto * prep_stat_manager = context->getPreparedStatementManager(); if (!prep_stat_manager) throw Exception("Prepare cache has to be initialized", ErrorCodes::LOGICAL_ERROR); @@ -703,10 +711,9 @@ BlockIO InterpreterSelectQueryUseOptimizer::executeCreatePreparedStatementQuery( String name; String formatted_query; SettingsChanges settings_changes; - const auto & prepare = query_ptr->as(); + ASTPtr prepare_ast = query_ptr->clone(); { name = prepare.getName(); - formatted_query = prepare.formatForErrorMessage(); settings_changes = InterpreterSetQuery::extractSettingsFromQuery(query_ptr, context); } @@ -717,15 +724,12 @@ BlockIO InterpreterSelectQueryUseOptimizer::executeCreatePreparedStatementQuery( CollectPreparedParamsVisitor(prepared_params_collector).visit(query_ptr); prep_stat_manager->addPlanToCache( name, - formatted_query, + prepare_ast, settings_changes, query_plan, analysis, std::move(prepared_params_collector.prepared_params), - context, - !prepare.if_not_exists, - prepare.or_replace, - prepare.is_permanent); + context); return {}; } diff --git a/src/Interpreters/InterpreterShowPreparedStatementQuery.cpp b/src/Interpreters/InterpreterShowPreparedStatementQuery.cpp index 387ad197656..dc8dfeea232 100644 --- a/src/Interpreters/InterpreterShowPreparedStatementQuery.cpp +++ b/src/Interpreters/InterpreterShowPreparedStatementQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -26,7 +27,22 @@ BlockIO InterpreterShowPreparedStatementQuery::execute() if (show_prepared->show_create) { auto prepared_object = prepared_manager->getObject(show_prepared->name); - out << prepared_object.query; + if (!prepared_object.query) + out << "Null"; + if (auto * create_prep_stat = prepared_object.query->as()) + { + + if (context->getTenantId().empty()) + out << create_prep_stat->formatForErrorMessage(); + else + { + ASTPtr ast = create_prep_stat->clone(); + auto * new_create = ast->as(); + new_create->rewriteNamesWithoutTenant(); + out << new_create->formatForErrorMessage(); + } + } + result_column_name = "Create Statement"; } else if (show_prepared->show_explain) @@ -58,7 +74,10 @@ BlockIO InterpreterShowPreparedStatementQuery::execute() { auto name_list = prepared_manager->getNames(); for (auto & name : name_list) - out << name << "\n"; + { + if (context->getTenantId().empty() || isTenantMatchedEntityName(name)) + out << getOriginalEntityName(name) << "\n"; + } result_column_name = "Prepared Statement List"; } diff --git a/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp b/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp index f4cb8bfe945..eb1c7a8f29c 100644 --- a/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp +++ b/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp @@ -1,10 +1,13 @@ #include #include +#include #include #include #include #include #include +#include +#include "Parsers/IAST_fwd.h" #include @@ -18,7 +21,7 @@ namespace ErrorCodes } void PreparedObject::toProto(Protos::PreparedStatement & proto) const { - proto.set_query(query); + proto.set_query(query->formatForErrorMessage()); } void PreparedStatementManager::initialize(ContextMutablePtr context) @@ -94,7 +97,11 @@ PreparedStatementManager::CacheResultType PreparedStatementManager::getPlanFromC for (auto & [database, table_info] : prepared_object.query_detail->query_access_info) { for (auto & [table, columns] : table_info) - context->addQueryAccessInfo(database, table, columns); + { + auto storage_id = context->tryResolveStorageID(StorageID{database, table}); + context->checkAccess(AccessType::SELECT, storage_id, columns); + context->addQueryAccessInfo(backQuoteIfNeed(storage_id.getDatabaseName()), storage_id.getFullTableName(), columns); + } } } @@ -105,15 +112,12 @@ PreparedStatementManager::CacheResultType PreparedStatementManager::getPlanFromC void PreparedStatementManager::addPlanToCache( const String & name, - const String & query, + ASTPtr & query, SettingsChanges settings_changes, QueryPlanPtr & plan, AnalysisPtr analysis, PreparedParameterSet prepared_params, - ContextMutablePtr & context, - bool throw_if_exists, - bool or_replace, - bool is_persistent) + ContextMutablePtr & context) { PlanNodeId max_id; PreparedObject prepared_object{}; @@ -136,12 +140,12 @@ void PreparedStatementManager::addPlanToCache( { for (const auto & column : it->second) prepared_object.query_detail - ->query_access_info[backQuoteIfNeed(storage_id.getDatabaseName())][storage_id.getFullTableName()] + ->query_access_info[storage_id.getDatabaseName()][storage_id.getTableName()] .emplace_back(column); } } - - set(name, std::move(prepared_object), throw_if_exists, or_replace, is_persistent); + const auto & prepare = query->as(); + set(name, std::move(prepared_object), !prepare.if_not_exists, prepare.or_replace, prepare.is_permanent); } PlanNodePtr PreparedStatementManager::getNewPlanNode(PlanNodePtr node, ContextMutablePtr & context, bool cache_plan, PlanNodeId & max_id) @@ -230,7 +234,11 @@ void PreparedStatementManager::loadStatementsFromDisk(ContextMutablePtr & contex throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid prepread statement query: {}", statement.second.query()); create_prep_stat->is_permanent = false; - InterpreterSelectQueryUseOptimizer interpreter{ast, context, {}}; + auto query_context = Context::createCopy(context); + query_context->setQueryContext(query_context); + SettingsChanges settings_changes = InterpreterSetQuery::extractSettingsFromQuery(ast, query_context); + query_context->applySettingsChanges(settings_changes); + InterpreterSelectQueryUseOptimizer interpreter{ast, query_context, {}}; interpreter.executeCreatePreparedStatementQuery(); } catch (...) diff --git a/src/Interpreters/PreparedStatement/PreparedStatementManager.h b/src/Interpreters/PreparedStatement/PreparedStatementManager.h index 4f0e7f12df0..733e3b78865 100644 --- a/src/Interpreters/PreparedStatement/PreparedStatementManager.h +++ b/src/Interpreters/PreparedStatement/PreparedStatementManager.h @@ -29,7 +29,7 @@ struct PreparedObject std::unordered_map>> query_access_info; }; - String query; + ASTPtr query; SettingsChanges settings_changes; PreparedParameterSet prepared_params; std::shared_ptr query_detail; @@ -72,15 +72,12 @@ class PreparedStatementManager // TODO @wangtao: extract common logic with InterpreterSelectQueryUseOptimizer::addPlanToCache void addPlanToCache( const String & name, - const String & query, + ASTPtr & query, SettingsChanges settings_changes, QueryPlanPtr & plan, AnalysisPtr analysis, PreparedParameterSet prepared_params, - ContextMutablePtr & context, - bool throw_if_exists, - bool or_replace, - bool is_persistent); + ContextMutablePtr & context); static void loadStatementsFromDisk(ContextMutablePtr & context); diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index d4cb34eba5f..591283307d3 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -307,7 +307,7 @@ void ASTIdentifier::appendCatalogName(const std::string & catalog_name) cnch_append_catalog = true; } -void ASTIdentifier::appendTenantId(const Context * context) +void ASTIdentifier::appendTenantId(const Context* context, bool is_datbase_name) { if (!context) return; @@ -315,7 +315,7 @@ void ASTIdentifier::appendTenantId(const Context * context) { /// Only catalogname case 1: - name_parts[0] = appendTenantIdOnly(name_parts[0]); + name_parts[0] = appendTenantIdOnly(name_parts[0], is_datbase_name); resetFullName(); break; default: @@ -463,7 +463,7 @@ void ASTTableIdentifier::appendCatalogName(const std::string & catalog_name) cnch_append_catalog = true; } -void ASTTableIdentifier::appendTenantId([[maybe_unused]] const Context * context) +void ASTTableIdentifier::appendTenantId([[maybe_unused]]const Context* context, bool /*is_datbase_name*/) { // this function shall not be called on TableIdentifier. throw Exception(ErrorCodes::LOGICAL_ERROR, "this function shall not be called on TableIdentifier."); @@ -535,9 +535,8 @@ void tryRewriteHiveCatalogName(ASTPtr & ast_catalog, const Context * context) return; if (auto * c = dynamic_cast(ast_catalog.get())) { - if (c->name() == "cnch") - return; - c->appendTenantId(context); + if(c->name() == "cnch") return; + c->appendTenantId(context, true); } } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index eb85ca1b3e6..f47a9ba875d 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -91,7 +91,7 @@ class ASTIdentifier : public ASTWithAlias virtual void appendCatalogName(const std::string& catalog_name); - virtual void appendTenantId(const Context * context); + virtual void appendTenantId(const Context * context, bool is_datbase_name); String full_name; std::vector name_parts; @@ -143,7 +143,7 @@ class ASTTableIdentifier : public ASTIdentifier // void rewriteCnchDatabaseOrCatalog(const Context *context) override; void rewriteCnchDatabaseName(const Context * context = nullptr) override; virtual void appendCatalogName(const std::string& catalog_name) override; - virtual void appendTenantId(const Context * context) override; + virtual void appendTenantId(const Context * context, bool is_datbase_name) override; }; diff --git a/src/Parsers/ASTPreparedStatement.cpp b/src/Parsers/ASTPreparedStatement.cpp index 3df791a866b..dceb694c1c0 100644 --- a/src/Parsers/ASTPreparedStatement.cpp +++ b/src/Parsers/ASTPreparedStatement.cpp @@ -1,4 +1,6 @@ #include +#include +#include namespace DB { @@ -6,8 +8,10 @@ namespace DB ASTPtr ASTCreatePreparedStatementQuery::clone() const { auto res = std::make_shared(*this); + res->name_ast = name_ast->clone(); res->query = query->clone(); res->children.clear(); + res->children.push_back(res->name_ast); res->children.push_back(res->query); return res; } @@ -24,7 +28,7 @@ void ASTCreatePreparedStatementQuery::formatImpl(const FormatSettings & settings else if (or_replace) settings.ostr << (settings.hilite ? hilite_keyword : "") << "OR REPLACE " << (settings.hilite ? hilite_none : ""); - settings.ostr << (settings.hilite ? hilite_identifier : "") << name << (settings.hilite ? hilite_none : ""); + name_ast->formatImpl(settings, state, frame); formatOnCluster(settings); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << (settings.hilite ? hilite_none : ""); @@ -35,6 +39,33 @@ void ASTCreatePreparedStatementQuery::formatImpl(const FormatSettings & settings } } +void ASTCreatePreparedStatementQuery::rewriteNamesWithTenant(const Context* context) +{ + if (!context) + { + String new_name = formatTenantName(name); + if (new_name != name) + { + auto tenant_id = getCurrentTenantId(); + std::vector name_part = {tenant_id, name}; + name_ast = std::make_shared(std::move(name_part), false); + name = new_name; + } + } + + if (auto * identifier = name_ast->as()) + { + identifier->appendTenantId(context, false); + name = identifier->name(); + } +} + +void ASTCreatePreparedStatementQuery::rewriteNamesWithoutTenant() +{ + name = getOriginalEntityName(name); + name_ast = std::make_shared(name); +} + ASTPtr ASTExecutePreparedStatementQuery::clone() const { auto res = std::make_shared(*this); @@ -46,7 +77,6 @@ ASTPtr ASTExecutePreparedStatementQuery::clone() const return res; } - void ASTExecutePreparedStatementQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "EXECUTE PREPARED STATEMENT " << (settings.hilite ? hilite_identifier : "") @@ -61,6 +91,16 @@ void ASTExecutePreparedStatementQuery::formatQueryImpl(const FormatSettings & se } } +void ASTExecutePreparedStatementQuery::rewriteNamesWithTenant(const Context* /*context*/) +{ + name = formatTenantName(name); +} + +void ASTExecutePreparedStatementQuery::rewriteNamesWithoutTenant() +{ + name = getOriginalEntityName(name); +} + ASTPtr ASTShowPreparedStatementQuery::clone() const { auto res = std::make_shared(*this); @@ -86,6 +126,20 @@ void ASTShowPreparedStatementQuery::formatQueryImpl(const FormatSettings & setti settings.ostr << (settings.hilite ? hilite_keyword : "") << "PREPARED STATEMENTS" << (settings.hilite ? hilite_none : ""); } +void ASTShowPreparedStatementQuery::rewriteNamesWithTenant(const Context* /*context*/) +{ + if (name.empty()) + return; + name = formatTenantName(name); +} + +void ASTShowPreparedStatementQuery::rewriteNamesWithoutTenant() +{ + if (name.empty()) + return; + name = getOriginalEntityName(name); +} + ASTPtr ASTDropPreparedStatementQuery::clone() const { auto res = std::make_shared(*this); @@ -102,5 +156,15 @@ void ASTDropPreparedStatementQuery::formatImpl(const FormatSettings & settings, formatOnCluster(settings); } +void ASTDropPreparedStatementQuery::rewriteNamesWithTenant(const Context* /*context*/) +{ + name = formatTenantName(name); +} + +void ASTDropPreparedStatementQuery::rewriteNamesWithoutTenant() +{ + name = getOriginalEntityName(name); +} + } diff --git a/src/Parsers/ASTPreparedStatement.h b/src/Parsers/ASTPreparedStatement.h index 6d815e1149c..067666fcf49 100644 --- a/src/Parsers/ASTPreparedStatement.h +++ b/src/Parsers/ASTPreparedStatement.h @@ -12,6 +12,7 @@ class ASTCreatePreparedStatementQuery : public IAST, public ASTQueryWithOnCluste { public: String name; + ASTPtr name_ast; ASTPtr query; bool if_not_exists = false; @@ -45,6 +46,9 @@ class ASTCreatePreparedStatementQuery : public IAST, public ASTQueryWithOnCluste return removeOnCluster(clone()); } + void rewriteNamesWithTenant(const Context* context = nullptr); + void rewriteNamesWithoutTenant(); + protected: void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; }; @@ -78,6 +82,9 @@ class ASTExecutePreparedStatementQuery : public ASTQueryWithOutput return values; } + void rewriteNamesWithTenant(const Context* context = nullptr); + void rewriteNamesWithoutTenant(); + protected: void formatQueryImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; }; @@ -103,6 +110,9 @@ class ASTShowPreparedStatementQuery : public ASTQueryWithOutput ASTPtr clone() const override; + void rewriteNamesWithTenant(const Context* context = nullptr); + void rewriteNamesWithoutTenant(); + protected: void formatQueryImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; }; @@ -132,6 +142,9 @@ class ASTDropPreparedStatementQuery : public IAST, public ASTQueryWithOnCluster return removeOnCluster(clone()); } + void rewriteNamesWithTenant(const Context* context = nullptr); + void rewriteNamesWithoutTenant(); + protected: void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; }; diff --git a/src/Parsers/ParserPreparedStatement.cpp b/src/Parsers/ParserPreparedStatement.cpp index 7919a6f8366..921e8a2eec6 100644 --- a/src/Parsers/ParserPreparedStatement.cpp +++ b/src/Parsers/ParserPreparedStatement.cpp @@ -7,9 +7,10 @@ #include #include #include +#include namespace DB -{ +{ bool ParserCreatePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); @@ -39,7 +40,14 @@ bool ParserCreatePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Exp ParserCompoundIdentifier name_p; ASTPtr identifier; - if (!name_p.parse(pos, identifier, expected)) + if (name_p.parse(pos, identifier, expected)) + { + auto * name_node = identifier->as(); + if (name_node->nameParts().size() > 2 + || (name_node->nameParts().size() == 2 && (!getCurrentTenantId().empty() || getCurrentTenantId() == name_node->nameParts()[0]))) + return false; + } + else return false; String cluster_str; @@ -49,7 +57,6 @@ bool ParserCreatePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Exp return false; } - if (!s_as.parse(pos, identifier, expected)) return false; @@ -66,7 +73,10 @@ bool ParserCreatePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Exp prepare->if_not_exists = if_not_exists; prepare->or_replace = or_replace; prepare->query = query; + prepare->name_ast = identifier; + prepare->children.push_back(prepare->name_ast); prepare->children.push_back(prepare->query); + prepare->rewriteNamesWithTenant(pos.getContext()); node = prepare; return true; @@ -83,7 +93,14 @@ bool ParserExecutePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Ex ParserCompoundIdentifier name_p; ASTPtr identifier; - if (!name_p.parse(pos, identifier, expected)) + if (name_p.parse(pos, identifier, expected)) + { + auto * name_node = identifier->as(); + if (name_node->nameParts().size() > 2 + || (name_node->nameParts().size() == 2 && (!getCurrentTenantId().empty() || getCurrentTenantId() == name_node->nameParts()[0]))) + return false; + } + else return false; ASTPtr settings; @@ -104,6 +121,7 @@ bool ParserExecutePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Ex tryGetIdentifierNameInto(identifier, execute->name); execute->values = settings; + execute->rewriteNamesWithTenant(); node = execute; return true; } @@ -127,9 +145,16 @@ bool ParserShowPreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expec create = true; if (!s_prepared_statement.ignore(pos, expected)) return false; - if (!name_p.parse(pos, identifier, expected)) + if (name_p.parse(pos, identifier, expected)) + { + auto * name_node = identifier->as(); + if (name_node->nameParts().size() > 2 + || (name_node->nameParts().size() == 2 && (!getCurrentTenantId().empty() || getCurrentTenantId() == name_node->nameParts()[0]))) return false; } + else + return false; + } else if (s_prepared_statements.ignore(pos, expected)) { } @@ -141,7 +166,14 @@ bool ParserShowPreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expec explain = true; if (!s_prepared_statement.ignore(pos, expected)) return false; - if (!name_p.parse(pos, identifier, expected)) + if (name_p.parse(pos, identifier, expected)) + { + auto * name_node = identifier->as(); + if (name_node->nameParts().size() > 2 + || (name_node->nameParts().size() == 2 && (!getCurrentTenantId().empty() || getCurrentTenantId() == name_node->nameParts()[0]))) + return false; + } + else return false; } else @@ -152,6 +184,7 @@ bool ParserShowPreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expec tryGetIdentifierNameInto(identifier, show_prepare->name); show_prepare->show_create = create; show_prepare->show_explain = explain; + show_prepare->rewriteNamesWithTenant(); node = show_prepare; return true; } @@ -172,7 +205,14 @@ bool ParserDropPreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expec ParserCompoundIdentifier name_p; ASTPtr identifier; - if (!name_p.parse(pos, identifier, expected)) + if (name_p.parse(pos, identifier, expected)) + { + auto * name_node = identifier->as(); + if (name_node->nameParts().size() > 2 + || (name_node->nameParts().size() == 2 && (!getCurrentTenantId().empty() || getCurrentTenantId() == name_node->nameParts()[0]))) + return false; + } + else return false; String cluster_str; @@ -186,6 +226,7 @@ bool ParserDropPreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Expec tryGetIdentifierNameInto(identifier, drop->name); drop->cluster = std::move(cluster_str); drop->if_exists = if_exists; + drop->rewriteNamesWithTenant(); node = drop; return true; } diff --git a/src/Parsers/formatTenantDatabaseName.cpp b/src/Parsers/formatTenantDatabaseName.cpp index 5fc06f9e0fe..43a5956adde 100644 --- a/src/Parsers/formatTenantDatabaseName.cpp +++ b/src/Parsers/formatTenantDatabaseName.cpp @@ -61,6 +61,21 @@ static bool isInternalDatabaseName(const String & database_name) return false; } +//Format pattern {tenant_id}.{name} +String formatTenantName(const String & name, char separator) +{ + auto tenant_id = getCurrentTenantId(); + if (!tenant_id.empty() && + (name.find(tenant_id) != 0 || name.size() == tenant_id.size() || name[tenant_id.size()] != separator)) + { + String result = tenant_id; + result += separator; + result += name; + return result; + } + return name; +} + //Format pattern {tenant_id}.{database_name} static String formatTenantDatabaseNameImpl(const String & database_name, char separator = '.') { @@ -106,8 +121,10 @@ String formatTenantDatabaseName(const String & database_name) } } -String appendTenantIdOnly(const String & name) +String appendTenantIdOnly(const String& name, bool is_datbase_name) { + if (!is_datbase_name) + return formatTenantName(name); return formatTenantDatabaseNameImpl(name); } diff --git a/src/Parsers/formatTenantDatabaseName.h b/src/Parsers/formatTenantDatabaseName.h index 3cbe9a115d9..21c0bccedfb 100644 --- a/src/Parsers/formatTenantDatabaseName.h +++ b/src/Parsers/formatTenantDatabaseName.h @@ -14,7 +14,7 @@ String formatTenantDatabaseName(const String & database_name); // name -> tenant_id.name // no catalog information will be attached. -String appendTenantIdOnly(const String & name); +String appendTenantIdOnly(const String & name, bool is_datbase_name = true); String formatTenantConnectDefaultDatabaseName(const String & database_name); @@ -32,6 +32,8 @@ String getOriginalDatabaseName(const String & tenant_database_name); String getOriginalDatabaseName(const String & tenant_database_name, const String & tenant_id); +String formatTenantName(const String & name, char separator = '.'); + void pushTenantId(const String &tenant_id); void popTenantId(); diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference index 7bb90bb97fc..7849671e91b 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference @@ -19,3 +19,45 @@ CREATE PREPARED STATEMENT IF NOT EXISTS prep1 AS SELECT count() FROM (SELECT num 444444 prep3 prep2 +prep1 +Projection +│ Expressions: count():=`expr#count()` +└─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: number < [x:UInt32] + └─ Limit + │ Limit: 8 + └─ TableScan system.numbers + Limit: 8 + Outputs: [number] +prep1 +12345.prep1 +CREATE PREPARED STATEMENT IF NOT EXISTS `12345.prep1` AS SELECT count() FROM (SELECT number FROM system.numbers LIMIT 8) WHERE number < [x:UInt32] +CREATE PREPARED STATEMENT IF NOT EXISTS prep1 AS SELECT count() FROM (SELECT number FROM system.numbers LIMIT 9) WHERE number < [x:UInt32] +Projection +│ Expressions: count():=`expr#count()` +└─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: number < [x:UInt32] + └─ Limit + │ Limit: 8 + └─ TableScan system.numbers + Limit: 8 + Outputs: [number] +Projection +│ Expressions: count():=`expr#count()` +└─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: number < [x:UInt32] + └─ Limit + │ Limit: 9 + └─ TableScan system.numbers + Limit: 9 + Outputs: [number] +prep1 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql index 622b55a0b26..25d3daeea4d 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql @@ -44,5 +44,41 @@ EXECUTE PREPARED STATEMENT prep3 SETTINGS iterative_optimizer_timeout=444444; SHOW PREPARED STATEMENTS; +DROP PREPARED STATEMENT IF EXISTS prep1; +DROP PREPARED STATEMENT IF EXISTS prep2; +DROP PREPARED STATEMENT IF EXISTS prep3; + +set tenant_id='12345'; + +CREATE PREPARED STATEMENT IF NOT EXISTS prep1 AS +SELECT count() +FROM (SELECT number FROM system.numbers LIMIT 8) +WHERE number < [x: UInt32]; + +SHOW PREPARED STATEMENTS; +EXPLAIN PREPARED STATEMENT prep1; + +set tenant_id=''; + +CREATE PREPARED STATEMENT IF NOT EXISTS prep1 AS +SELECT count() +FROM (SELECT number FROM system.numbers LIMIT 9) +WHERE number < [x: UInt32]; + +SHOW PREPARED STATEMENTS; + +SHOW CREATE PREPARED STATEMENT `12345.prep1`; +SHOW CREATE PREPARED STATEMENT `prep1`; + +EXPLAIN PREPARED STATEMENT `12345.prep1`; +EXPLAIN PREPARED STATEMENT prep1; + +set tenant_id='12345'; +SHOW PREPARED STATEMENTS; +DROP PREPARED STATEMENT IF EXISTS prep1; + +set tenant_id=''; +DROP PREPARED STATEMENT IF EXISTS prep1; + DROP PREPARED STATEMENT IF EXISTS prep2; DROP PREPARED STATEMENT IF EXISTS prep3; From f0e2dcd712eba4a738874fea3e08ce38061b2879 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:34:24 +0000 Subject: [PATCH 018/292] Merge 'fix_dumper_hdfs_2.2' into 'cnch-2.2' fix(clickhousech@m-4619228507): [cp]dumper tool fix nnproxy See merge request: !22546 --- programs/dumper/Dumper.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/programs/dumper/Dumper.cpp b/programs/dumper/Dumper.cpp index 822eb81435f..9c883a638d8 100644 --- a/programs/dumper/Dumper.cpp +++ b/programs/dumper/Dumper.cpp @@ -329,9 +329,8 @@ void ClickHouseDumper::initHDFS() /// Options load from command line argument use priority -100 in layeredconfiguration, so construct /// hdfs params from config directly rather than from config file - HDFSConnectionParams hdfs_params = HDFSConnectionParams(HDFSConnectionParams::CONN_NNPROXY, - config().getString("hdfs_user", "clickhouse"), config().getString("output_hdfs_nnproxy", "nnproxy")); - hdfs_params.lookupOnNeed(); + HDFSConnectionParams hdfs_params = HDFSConnectionParams::parseFromMisusedNNProxyStr( + config().getString("output_hdfs_nnproxy", "nnproxy"), config().getString("hdfs_user", "clickhouse")); global_context->setHdfsConnectionParams(hdfs_params); /// register default hdfs file system bool has_hdfs_disk = false; From d0feece91f20170719c398cc780d027afde1706a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:35:06 +0000 Subject: [PATCH 019/292] Merge branch 'fix_date_out_of_range_2.2' into 'cnch-2.2' fix(clickhousech@m-4503623503): [cp]fix DateTime Overflow Behavior See merge request dp/ClickHouse!22544 # Conflicts: # src/Formats/FormatFactory.cpp --- src/Core/Settings.h | 1 + src/Core/SettingsEnums.cpp | 5 + src/Core/SettingsEnums.h | 2 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 10 + .../Formats/Impl/ArrowBlockInputFormat.cpp | 1 + .../Formats/Impl/ArrowColumnToCHColumn.cpp | 290 +++++++++++++----- .../Formats/Impl/ArrowColumnToCHColumn.h | 11 + .../Formats/Impl/ORCBlockInputFormat.cpp | 1 + .../Formats/Impl/OrcChunkReader.cpp | 7 +- .../Impl/Parquet/ParquetArrowColReader.cpp | 1 + .../Formats/Impl/ParquetBlockInputFormat.cpp | 1 + .../DataLakes/HiveFile/JNIArrowSource.cpp | 4 +- .../00900_orc_out_of_date.reference | 4 + .../4_cnch_stateless/00900_orc_out_of_date.sh | 25 ++ .../00900_parquet_out_of_date.reference | 4 + .../00900_parquet_out_of_date.sh | 24 ++ .../02716_parquet_invalid_date32.reference | 2 +- .../000000_0_copy_2 | Bin 0 -> 291 bytes .../000000_0_copy_2 | Bin 0 -> 382 bytes 20 files changed, 315 insertions(+), 79 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/00900_orc_out_of_date.reference create mode 100755 tests/queries/4_cnch_stateless/00900_orc_out_of_date.sh create mode 100644 tests/queries/4_cnch_stateless/00900_parquet_out_of_date.reference create mode 100755 tests/queries/4_cnch_stateless/00900_parquet_out_of_date.sh create mode 100644 tests/queries/4_cnch_stateless/data_orc/test_orc_date_out_of_range/000000_0_copy_2 create mode 100644 tests/queries/4_cnch_stateless/data_parquet/test_parquet_date_out_of_range/000000_0_copy_2 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6d62d9bcaa9..ff7a4edcd2b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1969,6 +1969,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, input_format_parquet_coalesce_read, true, "Merge small IO ranges, See arrow::ReadRangeCache", 0) \ M(Bool, input_format_parquet_use_lazy_io_cache, true, "Lazy caching will trigger io requests when they are requested for the first time. See arrow::ReadRangeCache", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading Orc files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \ + M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \ \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 3eae3d7b9e1..c43d93819c2 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -244,4 +244,9 @@ IMPLEMENT_SETTING_ENUM(SchemaInferenceMode, ErrorCodes::BAD_ARGUMENTS, {{"default", SchemaInferenceMode::DEFAULT}, {"union", SchemaInferenceMode::UNION}}) +IMPLEMENT_SETTING_ENUM(DateTimeOverflowBehavior, ErrorCodes::BAD_ARGUMENTS, + {{"throw", FormatSettings::DateTimeOverflowBehavior::Throw}, + {"ignore", FormatSettings::DateTimeOverflowBehavior::Ignore}, + {"saturate", FormatSettings::DateTimeOverflowBehavior::Saturate}}) + } // namespace DB diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index f8795fa669e..a3b978632c9 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -412,4 +412,6 @@ enum class SchemaInferenceMode DECLARE_SETTING_ENUM(SchemaInferenceMode) +DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior) + } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f604cf15835..634fb158c08 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -167,6 +167,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.map.skip_null_map_value = settings.input_format_skip_null_map_value; format_settings.map.max_map_key_length = settings.input_format_max_map_key_long; format_settings.check_data_overflow = settings.check_data_overflow; + format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 389a4cdaa48..b2cde5bbf67 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -82,6 +82,16 @@ struct FormatSettings DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; + enum class DateTimeOverflowBehavior + { + Ignore, + Throw, + Saturate + }; + + DateTimeOverflowBehavior date_time_overflow_behavior = DateTimeOverflowBehavior::Ignore; + + UInt64 input_allow_errors_num = 0; Float32 input_allow_errors_ratio = 0; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 28b136c6526..f9e2cb9717f 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -108,6 +108,7 @@ void ArrowBlockInputFormat::prepareReader() format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, format_settings.null_as_default, + format_settings.date_time_overflow_behavior, format_settings.arrow.case_insensitive_column_matching); if (stream) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 3d652fc06ff..9827cf9ae8a 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -102,7 +102,7 @@ namespace ErrorCodes /// Inserts numeric data right into internal column data to reduce an overhead template > -static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithNumericData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared>(); auto internal_column = internal_type->createColumn(); @@ -127,7 +127,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -171,7 +171,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithFixedStringData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -190,7 +190,7 @@ static ColumnWithTypeAndName readColumnWithFixedStringData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -218,7 +218,7 @@ static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::sh } template -static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { size_t total_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) @@ -259,7 +259,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_p return {std::move(internal_column), column_type, column_name}; } -static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithBooleanData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = DataTypeFactory::instance().get("Bool"); auto internal_column = internal_type->createColumn(); @@ -278,7 +278,8 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & type_hint) +static ColumnWithTypeAndName readColumnWithDate32Data(const std::shared_ptr & arrow_column, const String & column_name, + const DataTypePtr & type_hint, FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior) { DataTypePtr internal_type; bool check_date_range = false; @@ -310,11 +311,21 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr(chunk.Value(value_i)); if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM || days_num < -DAYNUM_OFFSET_EPOCH) { - throw Exception{ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, - "Input value {} of a column \"{}\" is out of allowed Date32 range, which is [{}, {}]", days_num, column_name, DAYNUM_OFFSET_EPOCH, DATE_LUT_MAX_EXTEND_DAY_NUM}; + switch (date_time_overflow_behavior) + { + case FormatSettings::DateTimeOverflowBehavior::Saturate: + days_num = (days_num < -DAYNUM_OFFSET_EPOCH) ? -DAYNUM_OFFSET_EPOCH : DATE_LUT_MAX_EXTEND_DAY_NUM; + break; + default: + /// Prior to introducing `date_time_overflow_behavior`, this function threw an error in case value was out of range. + /// In order to leave this behavior as default, we also throw when `date_time_overflow_mode == ignore`, as it is the setting's default value + /// (As we want to make this backwards compatible, not break any workflows.) + throw Exception{ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column \"{}\" is out of allowed Date32 range, which is [{}, {}]", + days_num,column_name, -DAYNUM_OFFSET_EPOCH, DATE_LUT_MAX_EXTEND_DAY_NUM}; + } } - else - column_data.emplace_back(days_num); + column_data.emplace_back(days_num); } } else @@ -328,7 +339,7 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDate64Data(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -347,7 +358,7 @@ static ColumnWithTypeAndName readColumnWithDate64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimestampData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -368,7 +379,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimeData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -391,18 +402,18 @@ static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime32Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } -static ColumnWithTypeAndName readColumnWithTime64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime64Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } template -static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) +static ColumnWithTypeAndName readColumnWithDecimalDataImpl(const std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) { auto internal_column = internal_type->createColumn(); auto & column = assert_cast &>(*internal_column); @@ -421,7 +432,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr -static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDecimalData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * arrow_decimal_type = static_cast(arrow_column->type().get()); size_t precision = arrow_decimal_type->precision(); @@ -436,7 +447,7 @@ static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column) +static ColumnPtr readByteMapFromArrowColumn(const std::shared_ptr & arrow_column) { if (!arrow_column->null_count()) return ColumnUInt8::create(arrow_column->length(), 0); @@ -455,7 +466,7 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr return nullmap_column; } -static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) +static ColumnPtr readOffsetsFromArrowListColumn(const std::shared_ptr & arrow_column) { auto offsets_column = ColumnUInt64::create(); ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); @@ -502,7 +513,8 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr> -static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptr & arrow_column, const String & column_name, Int64 default_value_index, NumericType dict_size, bool is_nullable) +static ColumnWithTypeAndName readColumnWithIndexesDataImpl(const std::shared_ptr & arrow_column, + const String & column_name, Int64 default_value_index, NumericType dict_size, bool is_nullable) { auto internal_type = std::make_shared>(); auto internal_column = internal_type->createColumn(); @@ -600,7 +612,7 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptr & arrow_column, Int64 default_value_index, UInt64 dict_size, bool is_nullable) +static ColumnPtr readColumnWithIndexesData(const std::shared_ptr & arrow_column, Int64 default_value_index, UInt64 dict_size, bool is_nullable) { switch (arrow_column->type()->id()) { @@ -617,7 +629,7 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr } } -static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) +static std::shared_ptr getNestedArrowColumn(const std::shared_ptr & arrow_column) { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); @@ -702,34 +714,84 @@ static std::shared_ptr getNestedArrowColumn(std::shared_ptr // return {std::move(internal_column), std::move(internal_type), column_name}; // } +struct ReadColumnFromArrowColumnSettings +{ + std::string format_name; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; + bool allow_arrow_null_type; + bool skip_columns_with_unsupported_types; +}; + +static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings); + static ColumnWithTypeAndName readColumnFromArrowColumn( - std::shared_ptr & arrow_column, - const std::string & column_name, - const std::string & format_name, - bool is_nullable, - std::unordered_map & dictionary_infos, - bool allow_null_type, - bool skip_columns_with_unsupported_types, - bool & skipped, - DataTypePtr type_hint = nullptr, - bool is_map_nested = false) + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_nullable_column, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) { - if (!is_nullable && (arrow_column->null_count() || (type_hint && type_hint->isNullable())) && arrow_column->type()->id() != arrow::Type::LIST - && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT && + /// read as Nullable (only in basic data type): + /// case 1: arrow column has null vaules, but clickhouse schema is not nullable + /// step 1: read column as Nullable(xxx) + /// step 2: clickhouse column is Bitmap column + /// castBitmapColumn: Array(Nullable(int)) / Array(Nullable(String)) -> Array(int) / Array(String) -> BitMap + /// castColumn: Nullable(xxx) -> xxx + /// case 2: arrow column has null values, clickhouse schema is Nullable(xxx) + /// step 1: read column as Nullable(xxx) + bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); + if (read_as_nullable_column && + arrow_column->type()->id() != arrow::Type::LIST && + arrow_column->type()->id() != arrow::Type::LARGE_LIST && + arrow_column->type()->id() != arrow::Type::MAP && + arrow_column->type()->id() != arrow::Type::STRUCT && arrow_column->type()->id() != arrow::Type::DICTIONARY) { DataTypePtr nested_type_hint; if (type_hint) nested_type_hint = removeNullable(type_hint); - auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint); - if (skipped) + + auto nested_column = readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + nested_type_hint, + is_map_nested_column, + settings); + + if (!nested_column.column) return {}; + auto nullmap_column = readByteMapFromArrowColumn(arrow_column); auto nullable_type = std::make_shared(std::move(nested_column.type)); auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); + return {std::move(nullable_column), std::move(nullable_type), column_name}; } + return readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + type_hint, + is_map_nested_column, + settings); +} + +static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) +{ switch (arrow_column->type()->id()) { case arrow::Type::STRING: @@ -784,7 +846,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::BOOL: return readColumnWithBooleanData(arrow_column, column_name); case arrow::Type::DATE32: - return readColumnWithDate32Data(arrow_column, column_name, type_hint); + return readColumnWithDate32Data(arrow_column, column_name, type_hint, settings.date_time_overflow_behavior); case arrow::Type::DATE64: return readColumnWithDate64Data(arrow_column, column_name); // ClickHouse writes Date as arrow UINT16 and DateTime as arrow UINT32, @@ -832,8 +894,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } } auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint, true); - if (skipped) + auto nested_column = readColumnFromArrowColumn( + arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + /*is_nullable_column*/ false, + /*is_map_nested_column*/ true, + settings); + if (!nested_column.column) return {}; auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); @@ -866,10 +935,20 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (array_type_hint) nested_type_hint = array_type_hint->getNestedType(); } + auto * arrow_list_type = assert_cast(arrow_column->type().get()); + bool is_nested_nullable_column = arrow_list_type->value_field()->nullable(); auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint); - if (skipped) + auto nested_column = readColumnFromArrowColumn( + arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + is_nested_nullable_column, + false /*is_map_nested_column*/, + settings); + if (!nested_column.column) return {}; + auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); @@ -894,11 +973,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { - auto field_name = arrow_struct_type->field(i)->name(); + auto field = arrow_struct_type->field(i); + auto field_name = field->name(); DataTypePtr nested_type_hint; if (tuple_type_hint) { - if (tuple_type_hint->haveExplicitNames() && !is_map_nested) + if (tuple_type_hint->haveExplicitNames() && !is_map_nested_column) { auto pos = tuple_type_hint->tryGetPositionByName(field_name); if (pos) @@ -908,9 +988,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( nested_type_hint = tuple_type_hint->getElement(i); } auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn(nested_arrow_column, field_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint); - if (skipped) + auto element = readColumnFromArrowColumn( + nested_arrow_column, + field_name, + dictionary_infos, + nested_type_hint, + field->nullable(), + false /*is_map_nested_column*/, + settings); + if (!element.column) return {}; + tuple_elements.emplace_back(std::move(element.column)); tuple_types.emplace_back(std::move(element.type)); tuple_names.emplace_back(std::move(element.name)); @@ -935,7 +1023,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( dict_array.emplace_back(dict_chunk.dictionary()); } auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped); + auto dict_column = readColumnFromArrowColumn(arrow_dict_column, + column_name, + dictionary_infos, + nullptr /*nested_type_hint*/, + false /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (!dict_column.column) + return {}; + for (size_t i = 0; i != dict_column.column->size(); ++i) { if (dict_column.column->isDefaultAt(i)) @@ -983,7 +1081,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( // TODO: read UUID as a string? case arrow::Type::NA: { - if (allow_null_type) + if (settings.allow_arrow_null_type) { auto type = std::make_shared(); auto column = ColumnNothing::create(arrow_column->length()); @@ -993,11 +1091,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } default: { - if (skip_columns_with_unsupported_types) - { - skipped = true; + if (settings.skip_columns_with_unsupported_types) return {}; - } throw Exception( ErrorCodes::UNKNOWN_TYPE, @@ -1005,10 +1100,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( "If it happens during schema inference and you want to skip columns with " "unsupported types, you can enable setting input_format_{}" "_skip_columns_with_unsupported_types_in_schema_inference", - format_name, + settings.format_name, arrow_column->type()->name(), column_name, - boost::algorithm::to_lower_copy(format_name)); + boost::algorithm::to_lower_copy(settings.format_name)); } } } @@ -1026,6 +1121,14 @@ static void checkStatus(const arrow::Status & status, const String & column_name Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case) { + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, + .allow_arrow_null_type = false, + .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types + }; + ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; if (hint_header) @@ -1050,13 +1153,19 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( arrow::ArrayVector array_vector = {arrow_array}; auto arrow_column = std::make_shared(array_vector); std::unordered_map dict_infos; - bool skipped = false; - bool allow_null_type = false; if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable()) - allow_null_type = true; - ColumnWithTypeAndName sample_column = readColumnFromArrowColumn( - arrow_column, field->name(), format_name, false, dict_infos, allow_null_type, skip_columns_with_unsupported_types, skipped); - if (!skipped) + settings.allow_arrow_null_type = true; + + auto sample_column = readColumnFromArrowColumn( + arrow_column, + field->name(), + dict_infos, + nullptr /*nested_type_hint*/, + field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (sample_column.column) sample_columns.emplace_back(std::move(sample_column)); } return Block(std::move(sample_columns)); @@ -1068,40 +1177,51 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( bool import_nested_, bool allow_missing_columns_, bool null_as_default_, + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) , null_as_default(null_as_default_) + , date_time_overflow_behavior(date_time_overflow_behavior_) , case_insensitive_matching(case_insensitive_matching_) { } void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) { - NameToColumnPtr name_to_column_ptr; + NameToArrowColumn name_to_arrow_column; for (auto column_name : table->ColumnNames()) { std::shared_ptr arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); + auto arrow_field = table->schema()->GetFieldByName(column_name); + if (case_insensitive_matching) boost::to_lower(column_name); - name_to_column_ptr[std::move(column_name)] = arrow_column; + name_to_arrow_column[std::move(column_name)] = {std::move(arrow_column), std::move(arrow_field)}; } - arrowColumnsToCHChunk(res, name_to_column_ptr, num_rows, block_missing_values); + arrowColumnsToCHChunk(res, name_to_arrow_column, num_rows, block_missing_values); } -void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values) +void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values) { + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = date_time_overflow_behavior, + .allow_arrow_null_type = true, + .skip_columns_with_unsupported_types = false + }; + Columns columns_list; columns_list.reserve(header.columns()); std::unordered_map>> nested_tables; - bool skipped = false; for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); @@ -1111,7 +1231,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & boost::to_lower(search_column_name); ColumnWithTypeAndName column; - if (!name_to_column_ptr.contains(search_column_name)) + if (!name_to_arrow_column.contains(search_column_name)) { bool read_from_nested = false; /// Check if it's a column from nested table. @@ -1121,7 +1241,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & String search_nested_table_name = nested_table_name; if (case_insensitive_matching) boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + if (name_to_arrow_column.contains(search_nested_table_name)) { if (!nested_tables.contains(search_nested_table_name)) { @@ -1133,9 +1253,17 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( - arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; + const auto & arrow_column = name_to_arrow_column.find(search_nested_table_name)->second; + ColumnsWithTypeAndName cols = + { + readColumnFromArrowColumn(arrow_column.column, + nested_table_name, + dictionary_infos, + nested_table_type, + arrow_column.field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings) + }; BlockPtr block_ptr = std::make_shared(cols); auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; @@ -1170,9 +1298,15 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } else { - auto arrow_column = name_to_column_ptr[search_column_name]; + const auto & arrow_column = name_to_arrow_column.find(search_column_name)->second; column = readColumnFromArrowColumn( - arrow_column, header_column.name, format_name, false, dictionary_infos, true, false, skipped, header_column.type); + arrow_column.column, + header_column.name, + dictionary_infos, + header_column.type, + arrow_column.field->nullable(), + /*is_map_nested_column*/ false, + settings); } if (null_as_default) @@ -1214,7 +1348,16 @@ ColumnPtr ArrowColumnToCHColumn::castArrayColumnToBitmapColumn(ColumnWithTypeAnd "ClickHouse BitMap64 can only be converted from Array, but column {} is {}", column.name, column.type->getName()); - DataTypePtr internal_nested = array->getNestedType(); + + if (array->getNestedType()->isNullable()) + { + DataTypePtr adapter_type = std::make_shared(removeNullable(array->getNestedType())); + column.column = castColumn(column, adapter_type); + column.type = adapter_type; + } + + DataTypePtr internal_nested = checkAndGetDataType(column.type.get())->getNestedType(); + if (isString(internal_nested)) { throw Exception("String list to Bitmap is not support In cnch", ErrorCodes::NOT_IMPLEMENTED); @@ -1227,6 +1370,7 @@ ColumnPtr ArrowColumnToCHColumn::castArrayColumnToBitmapColumn(ColumnWithTypeAnd column.column = castColumn(column, adapter_type); column.type = adapter_type; } + return castToBitmap64Column(column, target_type); } } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 0d5b23b8583..30dadbc4774 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -53,6 +53,7 @@ class ArrowColumnToCHColumn bool import_nested_, bool allow_missing_columns_, bool null_as_default_, + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior_, bool case_insensitive_matching_ = false); void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); @@ -76,6 +77,15 @@ class ArrowColumnToCHColumn }; private: + struct ArrowColumn + { + std::shared_ptr column; + std::shared_ptr field; + }; + + using NameToArrowColumn = std::unordered_map; + void arrowColumnsToCHChunk(Chunk & res, const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); + static ColumnPtr castArrayColumnToBitmapColumn(ColumnWithTypeAndName & column, const DataTypePtr & target_type); const Block & header; @@ -84,6 +94,7 @@ class ArrowColumnToCHColumn /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; bool null_as_default; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; bool case_insensitive_matching; /// Map {column name : dictionary column}. diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 657a624c1be..ce1de06a9de 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -169,6 +169,7 @@ void ORCBlockInputFormat::prepareReader() format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.null_as_default, + format_settings.date_time_overflow_behavior, format_settings.orc.case_insensitive_column_matching); include_indices = getColumnIndices(schema, getPort().getHeader(), format_settings.orc.case_insensitive_column_matching, format_settings.orc.import_nested); diff --git a/src/Processors/Formats/Impl/OrcChunkReader.cpp b/src/Processors/Formats/Impl/OrcChunkReader.cpp index 61622fdf15d..ae5355bd4d4 100644 --- a/src/Processors/Formats/Impl/OrcChunkReader.cpp +++ b/src/Processors/Formats/Impl/OrcChunkReader.cpp @@ -514,14 +514,15 @@ Status OrcChunkReader::initBlock() bool allow_missing_columns = format_settings.orc.allow_missing_columns; bool null_as_default = format_settings.null_as_default; bool case_insenstive = format_settings.orc.case_insensitive_column_matching; + bool allow_out_of_range = format_settings.date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate ? true : false; //TODO fix this. active_orc_column_to_ch_column - = std::make_unique(active_block, allow_missing_columns, null_as_default, case_insenstive); + = std::make_unique(active_block, allow_missing_columns, null_as_default, case_insenstive, allow_out_of_range); lazy_orc_column_to_ch_column - = std::make_unique(lazy_block, allow_missing_columns, null_as_default, case_insenstive); + = std::make_unique(lazy_block, allow_missing_columns, null_as_default, case_insenstive, allow_out_of_range); orc_column_to_ch_column - = std::make_unique(chunk_reader_params.header, allow_missing_columns, null_as_default, case_insenstive); + = std::make_unique(chunk_reader_params.header, allow_missing_columns, null_as_default, case_insenstive, allow_out_of_range); return Status::OK(); } diff --git a/src/Processors/Formats/Impl/Parquet/ParquetArrowColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetArrowColReader.cpp index 6597c67b952..4474896ef37 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetArrowColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetArrowColReader.cpp @@ -52,6 +52,7 @@ ParquetArrowColReader::ParquetArrowColReader( format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, + format_settings.date_time_overflow_behavior, format_settings.parquet.case_insensitive_column_matching); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 927d12ded51..887a7249815 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -568,6 +568,7 @@ void ParquetBlockInputFormat::initializeRowGroupReaderIfNeeded(size_t row_group_ format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, + format_settings.date_time_overflow_behavior, format_settings.parquet.case_insensitive_column_matching); // if (auto context = getContext()) diff --git a/src/Storages/DataLakes/HiveFile/JNIArrowSource.cpp b/src/Storages/DataLakes/HiveFile/JNIArrowSource.cpp index dc9caa4506c..6dc1d40432c 100644 --- a/src/Storages/DataLakes/HiveFile/JNIArrowSource.cpp +++ b/src/Storages/DataLakes/HiveFile/JNIArrowSource.cpp @@ -64,8 +64,8 @@ void JNIArrowSource::prepareReader() "JNI", false, /*import_nested*/ false, /*allow_missing_columns*/ - true /*null_as_default*/ - ); + true /*null_as_default*/, + FormatSettings::DateTimeOverflowBehavior::Saturate); } } diff --git a/tests/queries/4_cnch_stateless/00900_orc_out_of_date.reference b/tests/queries/4_cnch_stateless/00900_orc_out_of_date.reference new file mode 100644 index 00000000000..d406dffbd97 --- /dev/null +++ b/tests/queries/4_cnch_stateless/00900_orc_out_of_date.reference @@ -0,0 +1,4 @@ +Code: 321. DB::Ex---tion: Input value 2567653 of a column "last_login_time" is out of allowed Date32 range, which is [-25567, 120530]: While executing ORCBlockInputFormat: data for INSERT was parsed from stdin SQLSTATE: 22003 + +3 2299-12-31 +4 1900-01-01 diff --git a/tests/queries/4_cnch_stateless/00900_orc_out_of_date.sh b/tests/queries/4_cnch_stateless/00900_orc_out_of_date.sh new file mode 100755 index 00000000000..1a20b858e2e --- /dev/null +++ b/tests/queries/4_cnch_stateless/00900_orc_out_of_date.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CUR_DIR/data_orc/test_orc_date_out_of_range/000000_0_copy_2 + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_orc_date_out_of_range_32;" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.test_orc_date_out_of_range_32(user_id Int64, last_login_time Date32) ENGINE = CnchMergeTree ORDER BY tuple()" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_orc_date_out_of_range_32 format ORC SETTINGS input_format_orc_use_fast_decoder = 0" 2>&1 | sed 's/Exception/Ex---tion/' + +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_orc_date_out_of_range_32 format ORC SETTINGS date_time_overflow_behavior = 'saturate', input_format_orc_use_fast_decoder = 0" + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.test_orc_date_out_of_range_32;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_orc_date_out_of_range_32;" + + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_orc_date_out_of_range;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.test_orc_date_out_of_range(user_id Int64, last_login_time Date) ENGINE = CnchMergeTree ORDER BY tuple()" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_orc_date_out_of_range format ORC SETTINGS input_format_orc_use_fast_decoder = 0" 2>&1 | sed 's/Exception/Ex---tion/' + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_orc_date_out_of_range;" diff --git a/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.reference b/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.reference new file mode 100644 index 00000000000..b2314819f36 --- /dev/null +++ b/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.reference @@ -0,0 +1,4 @@ +Code: 321. DB::Ex---tion: Input value 2567653 of a column "last_login_time" is out of allowed Date32 range, which is [-25567, 120530]: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin SQLSTATE: 22003 + +3 2299-12-31 +4 1900-01-01 diff --git a/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.sh b/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.sh new file mode 100755 index 00000000000..c881ad94c08 --- /dev/null +++ b/tests/queries/4_cnch_stateless/00900_parquet_out_of_date.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CUR_DIR/data_parquet/test_parquet_date_out_of_range/000000_0_copy_2 + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_parquet_date_out_of_range32;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.test_parquet_date_out_of_range32(user_id Int64, last_login_time Date32) ENGINE = CnchMergeTree ORDER BY tuple()" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_parquet_date_out_of_range32 format Parquet" 2>&1 | sed 's/Exception/Ex---tion/' + +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_parquet_date_out_of_range32 format Parquet SETTINGS date_time_overflow_behavior = 'saturate' " + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.test_parquet_date_out_of_range32;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_parquet_date_out_of_range32;" + + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_parquet_date_out_of_range;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.test_parquet_date_out_of_range(user_id Int64, last_login_time Date) ENGINE = CnchMergeTree ORDER BY tuple()" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test.test_parquet_date_out_of_range format Parquet" 2>&1 | sed 's/Exception/Ex---tion/' + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.test_parquet_date_out_of_range;" \ No newline at end of file diff --git a/tests/queries/4_cnch_stateless/02716_parquet_invalid_date32.reference b/tests/queries/4_cnch_stateless/02716_parquet_invalid_date32.reference index 74a6c799865..bb144b655f3 100644 --- a/tests/queries/4_cnch_stateless/02716_parquet_invalid_date32.reference +++ b/tests/queries/4_cnch_stateless/02716_parquet_invalid_date32.reference @@ -1,3 +1,3 @@ -Code: 321. DB::Ex---tion: Input value 200000 of a column "date" is out of allowed Date32 range, which is [25567, 120530]: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin SQLSTATE: 22003 +Code: 321. DB::Ex---tion: Input value 200000 of a column "date" is out of allowed Date32 range, which is [-25567, 120530]: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin SQLSTATE: 22003 200000 diff --git a/tests/queries/4_cnch_stateless/data_orc/test_orc_date_out_of_range/000000_0_copy_2 b/tests/queries/4_cnch_stateless/data_orc/test_orc_date_out_of_range/000000_0_copy_2 new file mode 100644 index 0000000000000000000000000000000000000000..bc79bd96a432aebb3fa165a3e156b3907d597be6 GIT binary patch literal 291 zcmeYdau#G@;9?VE;b012&;~MvxtJLk7=(B@n1t9k*aSEv_yQOVf#PBiaefXaD^8B1 zC)fo}ZQlvh!p^|p#+U&#BahK9`V`|6S04t3$4QS9gcBw_c*bBPs{)S literal 0 HcmV?d00001 diff --git a/tests/queries/4_cnch_stateless/data_parquet/test_parquet_date_out_of_range/000000_0_copy_2 b/tests/queries/4_cnch_stateless/data_parquet/test_parquet_date_out_of_range/000000_0_copy_2 new file mode 100644 index 0000000000000000000000000000000000000000..cdce9c407f6398192c9e343b051c7524acef4a71 GIT binary patch literal 382 zcmZ{g&q~8U5XLvjQWsP_&62<#a%rrP7SmMHLN4C)R(yblO?KA?l2-Gl#~%BXdh|K! z##$)_7nYs<=KIYIGk<&u0D%v_4_F61Aj~Eh9ga<`--vdvsjp=nrK3qx4A=>dzlOKS z#-G>w{R!5Iodl*6KV2^KjZErnDN7C*0q&djMO(`%$pr#a|85FiH%YO6%~wg2m$H9_ zD0Z0RdxQaY9#D@se+cLt-O{Z`;(v$tb_UbE58!$Ap8^ZI(5BcvpPzmh6r;iO8?WA5 z*$m6d-N>rW*DE&|O-F${NZY&+?u4g&BDjpw^j=J)NHZ19#vzwM7|zsqszj(Fm8m;~ J4TR=;^WSa0N}T`z literal 0 HcmV?d00001 From fcd365eaf7ed652b6b571393ec8b24cb7bd1389d Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:35:27 +0000 Subject: [PATCH 020/292] Merge 'fix-source-split-2.2' into 'cnch-2.2' fix(clickhousech@m-4619228743): retry with same target worker for source partitioning[CNCH-2.2] See merge request: !22603 --- src/Interpreters/DistributedStages/BSPScheduler.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/Interpreters/DistributedStages/BSPScheduler.cpp b/src/Interpreters/DistributedStages/BSPScheduler.cpp index c50cac6d4c9..bdd7cfc9728 100644 --- a/src/Interpreters/DistributedStages/BSPScheduler.cpp +++ b/src/Interpreters/DistributedStages/BSPScheduler.cpp @@ -153,19 +153,6 @@ void BSPScheduler::updateSegmentStatusCounter(size_t segment_id, UInt64 parallel std::unique_lock lk(nodes_alloc_mutex); auto failed_worker = segment_parallel_locations[segment_id][parallel_index]; failed_workers[segment_id].insert(failed_worker); - auto iter = pending_task_instances.for_nodes[failed_worker].begin(); - while (iter != pending_task_instances.for_nodes[failed_worker].end()) - { - if (iter->task_id == segment_id) - { - pending_task_instances.no_prefs.insert({iter->task_id, iter->parallel_index}); - iter = pending_task_instances.for_nodes[failed_worker].erase(iter); - } - else - { - iter++; - } - } } } From cfeb76a6311c7845c71efaf720a8006b5abef3c8 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:36:48 +0000 Subject: [PATCH 021/292] Merge 'cherry-pick-mr-22414' into 'cnch-2.2' fix(clickhousech@m-4345257817): insert_select_with_profiles See merge request: !22611 # Conflicts: # src/Core/Settings.h # src/Interpreters/InterpreterInsertQuery.cpp --- src/Core/Settings.h | 12 ++++ src/Interpreters/InterpreterInsertQuery.cpp | 32 +++++++-- src/Optimizer/Rewriter/ColumnPruning.cpp | 11 ++- .../Transforms/ProcessorToOutputStream.cpp | 72 +++++++++++++++++++ .../Transforms/ProcessorToOutputStream.h | 41 +++++++++++ src/Protos/plan_node.proto | 1 + src/QueryPlan/QueryPlanner.cpp | 5 +- src/QueryPlan/SymbolMapper.cpp | 4 +- src/QueryPlan/TableFinishStep.cpp | 14 +++- src/QueryPlan/TableFinishStep.h | 15 +++- src/QueryPlan/TableWriteStep.cpp | 57 ++++++++++++--- src/QueryPlan/TableWriteStep.h | 5 +- src/QueryPlan/tests/gtest_protobuf.cpp | 4 +- ...test_insert_select_with_profiles.reference | 9 +++ ...10098_test_insert_select_with_profiles.sql | 27 +++++++ 15 files changed, 278 insertions(+), 31 deletions(-) create mode 100644 src/Processors/Transforms/ProcessorToOutputStream.cpp create mode 100644 src/Processors/Transforms/ProcessorToOutputStream.h create mode 100644 tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.reference create mode 100644 tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ff7a4edcd2b..1023bcbcc04 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -506,6 +506,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_join_on_1_equals_1, false, "Enable join on 1=1.", 0) \ \ M(UInt64, preferred_block_size_bytes, 1000000, "", 0) \ +<<<<<<< HEAD \ M(UInt64, \ max_replica_delay_for_distributed_queries, \ @@ -542,6 +543,17 @@ enum PreloadLevelSettings : UInt64 "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will " \ "contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", \ 0) \ +======= + \ + M(UInt64, max_replica_delay_for_distributed_queries, 300, "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified value (not inclusive). Zero means do not take delay into account.", 0) \ + M(Bool, fallback_to_stale_replicas_for_distributed_queries, 1, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \ + M(UInt64, preferred_max_column_in_block_size_bytes, 0, "Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.", 0) \ + \ + M(Bool, insert_select_with_profiles, false, "If setting is enabled, return the total inserted (selected) rows.", 0) \ + M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \ + M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \ + M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \ +>>>>>>> cceecdccd5 (Merge 'cherry-pick-mr-22414' into 'cnch-2.2') M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \ M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \ \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index c5ffe56af39..7c50b387fd3 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -571,15 +572,32 @@ BlockIO InterpreterInsertQuery::execute() res.pipeline.addSimpleTransform( [&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header, actions); }); - res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr { - if (type != QueryPipeline::StreamType::Main) - return nullptr; + if (settings.insert_select_with_profiles) + { + res.pipeline.addSimpleTransform([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr + { + if (type != QueryPipeline::StreamType::Main) + return nullptr; - auto stream = std::move(out_streams.back()); - out_streams.pop_back(); + auto stream = std::move(out_streams.back()); + out_streams.pop_back(); - return std::make_shared(std::move(stream)); - }); + return std::make_shared(std::move(stream)); + }); + } + else + { + res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr + { + if (type != QueryPipeline::StreamType::Main) + return nullptr; + + auto stream = std::move(out_streams.back()); + out_streams.pop_back(); + + return std::make_shared(std::move(stream)); + }); + } if (!allow_materialized) { diff --git a/src/Optimizer/Rewriter/ColumnPruning.cpp b/src/Optimizer/Rewriter/ColumnPruning.cpp index 56bba602d5d..ca92d659496 100644 --- a/src/Optimizer/Rewriter/ColumnPruning.cpp +++ b/src/Optimizer/Rewriter/ColumnPruning.cpp @@ -161,9 +161,16 @@ PlanNodePtr ColumnPruningVisitor::visitOffsetNode(OffsetNode & node, ColumnPruni return visitDefault(node, column_pruning_context); } -PlanNodePtr ColumnPruningVisitor::visitTableFinishNode(TableFinishNode & node, ColumnPruningContext & column_pruning_context) +PlanNodePtr ColumnPruningVisitor::visitTableFinishNode(TableFinishNode & node, ColumnPruningContext &) { - return visitPlanNode(node, column_pruning_context); + NameSet require; + PlanNodePtr child = node.getChildren()[0]; + for (const auto & item : child->getCurrentDataStream().header) + require.insert(item.name); + ColumnPruningContext child_column_pruning_context{.name_set = require}; + PlanNodePtr new_child = VisitorUtil::accept(*child, *this, child_column_pruning_context); + node.replaceChildren({new_child}); + return node.shared_from_this(); } PlanNodePtr ColumnPruningVisitor::visitOutfileFinishNode(OutfileFinishNode & node, ColumnPruningContext & column_pruning_context) diff --git a/src/Processors/Transforms/ProcessorToOutputStream.cpp b/src/Processors/Transforms/ProcessorToOutputStream.cpp new file mode 100644 index 00000000000..d1fc64bc479 --- /dev/null +++ b/src/Processors/Transforms/ProcessorToOutputStream.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include + +namespace DB +{ + +Block ProcessorToOutputStream::newHeader() +{ + return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "inserted_rows")}; +} + +ProcessorToOutputStream::ProcessorToOutputStream(BlockOutputStreamPtr stream_) + : IProcessor({stream_->getHeader()}, {newHeader()}) + , input(inputs.front()) + , output(outputs.front()) + , stream(std::move(stream_)) +{ + total_rows = 0; + stream->writePrefix(); +} + +void ProcessorToOutputStream::consume(Chunk chunk) +{ + total_rows += chunk.getNumRows(); + stream->write(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); +} + +Chunk ProcessorToOutputStream::getReturnChunk() +{ + auto total_rows_column = DataTypeUInt64().createColumnConst(1, total_rows); + return Chunk({total_rows_column}, 1); +} + +void ProcessorToOutputStream::onFinish() +{ + stream->writeSuffix(); + + auto return_chunk = getReturnChunk(); + output_data.chunk = std::move(return_chunk); + output.pushData(std::move(output_data)); +} + +ProcessorToOutputStream::Status ProcessorToOutputStream::prepare() +{ + if (has_input) + return Status::Ready; + + if (input.isFinished()) + { + onFinish(); + output.finish(); + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + current_chunk = input.pull(true); + has_input = true; + return Status::Ready; +} + +void ProcessorToOutputStream::work() +{ + consume(std::move(current_chunk)); + has_input = false; +} + +} diff --git a/src/Processors/Transforms/ProcessorToOutputStream.h b/src/Processors/Transforms/ProcessorToOutputStream.h new file mode 100644 index 00000000000..c72f11555cd --- /dev/null +++ b/src/Processors/Transforms/ProcessorToOutputStream.h @@ -0,0 +1,41 @@ +#pragma once +#include +#include + +namespace DB +{ + +class ProcessorToOutputStream : public IProcessor +{ +public: + explicit ProcessorToOutputStream(BlockOutputStreamPtr stream_); + + String getName() const override { return "ProcessorToOutputStream"; } + + static Block newHeader(); + Chunk getReturnChunk(); + + Status prepare() override; + void work() override; + + InputPort & getInputPort() { return input; } + OutputPort & getOutputPort() { return output; } + +protected: + InputPort & input; + OutputPort & output; + + Chunk current_chunk; + Port::Data output_data; + bool has_input = false; + + void consume(Chunk chunk); + void onFinish(); + +private: + BlockOutputStreamPtr stream; + size_t total_rows; + +}; + +} diff --git a/src/Protos/plan_node.proto b/src/Protos/plan_node.proto index 21e14a875b7..e74926417ca 100644 --- a/src/Protos/plan_node.proto +++ b/src/Protos/plan_node.proto @@ -207,6 +207,7 @@ message TableWriteStep { required ITransformingStep query_plan_base = 1; required Target target = 2; + optional bool insert_select_with_profiles = 3; } message TableFinishStep { diff --git a/src/QueryPlan/QueryPlanner.cpp b/src/QueryPlan/QueryPlanner.cpp index 051c696ef88..41054d2a9ef 100644 --- a/src/QueryPlan/QueryPlanner.cpp +++ b/src/QueryPlan/QueryPlanner.cpp @@ -319,12 +319,13 @@ RelationPlan QueryPlannerVisitor::visitASTInsertQuery(ASTPtr & node, const Void auto & insert = *analysis.getInsert(); auto select_plan = process(insert_query.select); select_plan.withNewRoot(planOutput(select_plan, insert_query.select, analysis, context)); + auto insert_select_with_profiles = context->getSettingsRef().insert_select_with_profiles; auto target = std::make_shared(insert.storage, insert.storage_id, insert.columns, node); auto insert_node = select_plan.getRoot()->addStep( context->nextNodeId(), - std::make_shared(select_plan.getRoot()->getCurrentDataStream(), target), + std::make_shared(select_plan.getRoot()->getCurrentDataStream(), target, insert_select_with_profiles), {select_plan.getRoot()}); auto total_affected_row_count_symbol = context->getSymbolAllocator()->newSymbol("rows"); @@ -335,7 +336,7 @@ RelationPlan QueryPlannerVisitor::visitASTInsertQuery(ASTPtr & node, const Void auto return_node = PlanNodeBase::createPlanNode( context->nextNodeId(), - std::make_shared(insert_node->getCurrentDataStream(), target, total_affected_row_count_symbol, node), + std::make_shared(insert_node->getCurrentDataStream(), target, total_affected_row_count_symbol, node, insert_select_with_profiles), {insert_node}); PRINT_PLAN(return_node, plan_insert); diff --git a/src/QueryPlan/SymbolMapper.cpp b/src/QueryPlan/SymbolMapper.cpp index 24e530b55f7..022469397b0 100644 --- a/src/QueryPlan/SymbolMapper.cpp +++ b/src/QueryPlan/SymbolMapper.cpp @@ -775,7 +775,7 @@ std::shared_ptr SymbolMapper::map(const LocalExchangeStep & s std::shared_ptr SymbolMapper::map(const TableWriteStep & step) { - return std::make_shared(map(step.getInputStreams()[0]), step.getTarget()); + return std::make_shared(map(step.getInputStreams()[0]), step.getTarget(), step.isOutputProfiles()); } std::shared_ptr SymbolMapper::map(const OutfileWriteStep & step) @@ -802,7 +802,7 @@ std::shared_ptr SymbolMapper::map(const BufferStep & step) std::shared_ptr SymbolMapper::map(const TableFinishStep & step) { return std::make_shared( - map(step.getInputStreams()[0]), step.getTarget(), step.getOutputAffectedRowCountSymbol(), step.getQuery()); + map(step.getInputStreams()[0]), step.getTarget(), step.getOutputAffectedRowCountSymbol(), step.getQuery(), step.isOutputProfiles()); } std::shared_ptr SymbolMapper::map(const IntermediateResultCacheStep & step) diff --git a/src/QueryPlan/TableFinishStep.cpp b/src/QueryPlan/TableFinishStep.cpp index 5b2fd2b9f7e..70cf88bf537 100644 --- a/src/QueryPlan/TableFinishStep.cpp +++ b/src/QueryPlan/TableFinishStep.cpp @@ -20,18 +20,26 @@ static ITransformingStep::Traits getTraits() TableFinishStep::TableFinishStep( const DataStream & input_stream_, TableWriteStep::TargetPtr target_, - String output_affected_row_count_symbol_, ASTPtr query_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits()) + String output_affected_row_count_symbol_, ASTPtr query_, bool insert_select_with_profiles_) + : ITransformingStep(input_stream_, {}, getTraits()) , target(std::move(target_)) , output_affected_row_count_symbol(std::move(output_affected_row_count_symbol_)) , query(query_) + , insert_select_with_profiles(insert_select_with_profiles_) , log(&Poco::Logger::get("TableFinishStep")) { + if (insert_select_with_profiles) + { + Block new_header = {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "inserted_rows")}; + output_stream = DataStream{.header = std::move(new_header)}; + } + else + output_stream = {input_stream_.header}; } std::shared_ptr TableFinishStep::copy(ContextPtr) const { - return std::make_shared(input_streams[0], target, output_affected_row_count_symbol, query); + return std::make_shared(input_streams[0], target, output_affected_row_count_symbol, query, insert_select_with_profiles); } void TableFinishStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) diff --git a/src/QueryPlan/TableFinishStep.h b/src/QueryPlan/TableFinishStep.h index 1aa8714883e..435de564733 100644 --- a/src/QueryPlan/TableFinishStep.h +++ b/src/QueryPlan/TableFinishStep.h @@ -3,13 +3,15 @@ #include #include #include +#include +#include namespace DB { class TableFinishStep : public ITransformingStep { public: - TableFinishStep(const DataStream & input_stream_, TableWriteStep::TargetPtr target_, String output_affected_row_count_symbol_, ASTPtr query_); + TableFinishStep(const DataStream & input_stream_, TableWriteStep::TargetPtr target_, String output_affected_row_count_symbol_, ASTPtr query_, bool insert_select_with_profiles_ = false); String getName() const override { @@ -25,7 +27,13 @@ class TableFinishStep : public ITransformingStep void setInputStreams(const DataStreams & input_streams_) override { input_streams = input_streams_; - output_stream = DataStream{.header = std::move((input_streams_[0].header))}; + if (insert_select_with_profiles) + { + Block new_header = {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "inserted_rows")}; + output_stream = DataStream{.header = std::move(new_header)}; + } + else + output_stream = DataStream{.header = std::move((input_streams_[0].header))}; } TableWriteStep::TargetPtr getTarget() const @@ -38,6 +46,8 @@ class TableFinishStep : public ITransformingStep void setQuery(const ASTPtr & query_) { query = query_; } ASTPtr getQuery() const { return query; } + bool isOutputProfiles() const { return insert_select_with_profiles; } + void toProto(Protos::TableFinishStep & proto, bool for_hash_equals = false) const; static std::shared_ptr fromProto(const Protos::TableFinishStep & proto, ContextPtr context); @@ -45,6 +55,7 @@ class TableFinishStep : public ITransformingStep TableWriteStep::TargetPtr target; String output_affected_row_count_symbol; ASTPtr query; + bool insert_select_with_profiles; Poco::Logger * log; }; } diff --git a/src/QueryPlan/TableWriteStep.cpp b/src/QueryPlan/TableWriteStep.cpp index 2a62c248d6d..db435f47f4a 100644 --- a/src/QueryPlan/TableWriteStep.cpp +++ b/src/QueryPlan/TableWriteStep.cpp @@ -12,9 +12,13 @@ #include #include #include +#include "QueryPlan/IQueryPlanStep.h" #include #include #include +#include +#include +#include namespace DB { @@ -29,9 +33,18 @@ static ITransformingStep::Traits getTraits() {.preserves_number_of_rows = true}}; } -TableWriteStep::TableWriteStep(const DataStream & input_stream_, TargetPtr target_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits()), target(target_) +TableWriteStep::TableWriteStep(const DataStream & input_stream_, TargetPtr target_, bool insert_select_with_profiles_) + : ITransformingStep(input_stream_, {}, getTraits()) + , target(target_) + , insert_select_with_profiles(insert_select_with_profiles_) { + if (insert_select_with_profiles) + { + Block new_header = {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "inserted_rows")}; + output_stream = DataStream{.header = std::move(new_header)}; + } + else + output_stream = {input_stream_.header}; } Block TableWriteStep::getHeader(const NamesAndTypes & input_columns) @@ -158,13 +171,29 @@ void TableWriteStep::transformPipeline(QueryPipeline & pipeline, const BuildQuer pipeline.resize(out_streams.size()); LOG_INFO(&Poco::Logger::get("TableWriteStep"), fmt::format("pipeline size: {}, out streams size {}", pipeline.getNumStreams(), out_streams.size())); - pipeline.addSimpleTransform( - [&]([[maybe_unused]] const Block & in_header) -> ProcessorPtr { + if (insert_select_with_profiles) + { + pipeline.addSimpleTransform([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr + { + if (type != QueryPipeline::StreamType::Main) + return nullptr; + auto stream = std::move(out_streams.back()); out_streams.pop_back(); - return std::make_shared(stream, insert_target_header, insert_target->getStorage(), settings.context);} - ); - break; + + return std::make_shared(std::move(stream)); + }); + } + else + { + pipeline.addSimpleTransform( + [&]([[maybe_unused]] const Block & in_header) -> ProcessorPtr { + auto stream = std::move(out_streams.back()); + out_streams.pop_back(); + return std::make_shared(stream, insert_target_header, insert_target->getStorage(), settings.context);} + ); + break; + } } } } @@ -172,12 +201,18 @@ void TableWriteStep::transformPipeline(QueryPipeline & pipeline, const BuildQuer void TableWriteStep::setInputStreams(const DataStreams & input_streams_) { input_streams = input_streams_; - output_stream = DataStream{.header = std::move((input_streams_[0].header))}; + if (insert_select_with_profiles) + { + Block new_header = {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "inserted_rows")}; + output_stream = DataStream{.header = std::move(new_header)}; + } + else + output_stream = DataStream{.header = std::move((input_streams_[0].header))}; } std::shared_ptr TableWriteStep::copy(ContextPtr) const { - return std::make_shared(input_streams[0], target); + return std::make_shared(input_streams[0], target, insert_select_with_profiles); } void TableWriteStep::toProto(Protos::TableWriteStep & proto, bool) const @@ -187,13 +222,15 @@ void TableWriteStep::toProto(Protos::TableWriteStep & proto, bool) const if (!target) throw Exception("Target cannot be nullptr", ErrorCodes::LOGICAL_ERROR); target->toProto(*proto.mutable_target()); + proto.set_insert_select_with_profiles(insert_select_with_profiles); } std::shared_ptr TableWriteStep::fromProto(const Protos::TableWriteStep & proto, ContextPtr context) { auto [step_description, base_input_stream] = ITransformingStep::deserializeFromProtoBase(proto.query_plan_base()); auto target = TableWriteStep::Target::fromProto(proto.target(), context); - auto step = std::make_shared(base_input_stream, target); + bool insert_select_with_profiles = proto.has_insert_select_with_profiles() ? proto.insert_select_with_profiles() : context->getSettingsRef().insert_select_with_profiles; + auto step = std::make_shared(base_input_stream, target, insert_select_with_profiles); step->setStepDescription(step_description); return step; } diff --git a/src/QueryPlan/TableWriteStep.h b/src/QueryPlan/TableWriteStep.h index f4ffc21257f..a6e3742571d 100644 --- a/src/QueryPlan/TableWriteStep.h +++ b/src/QueryPlan/TableWriteStep.h @@ -18,7 +18,7 @@ class TableWriteStep : public ITransformingStep INSERT, }; - TableWriteStep(const DataStream & input_stream_, TargetPtr target_); + TableWriteStep(const DataStream & input_stream_, TargetPtr target_, bool insert_select_with_profiles_ = false); String getName() const override { @@ -43,6 +43,8 @@ class TableWriteStep : public ITransformingStep void allocate(const ContextPtr & context); + bool isOutputProfiles() const { return insert_select_with_profiles; } + void toProto(Protos::TableWriteStep & proto, bool for_hash_equals = false) const; static std::shared_ptr fromProto(const Protos::TableWriteStep & proto, ContextPtr context); @@ -58,6 +60,7 @@ class TableWriteStep : public ITransformingStep ASTPtr query); TargetPtr target; + bool insert_select_with_profiles; }; class TableWriteStep::Target diff --git a/src/QueryPlan/tests/gtest_protobuf.cpp b/src/QueryPlan/tests/gtest_protobuf.cpp index a3bdab48472..de20f745abd 100644 --- a/src/QueryPlan/tests/gtest_protobuf.cpp +++ b/src/QueryPlan/tests/gtest_protobuf.cpp @@ -674,7 +674,7 @@ TEST_F(ProtobufTest, TableWriteStep) std::string step_description = fmt::format("description {}", eng() % 100); auto base_input_stream = generateDataStream(eng); auto target = generateTableWriteStepInsertTarget(eng); - auto s = std::make_shared(base_input_stream, target); + auto s = std::make_shared(base_input_stream, target, false); s->setStepDescription(step_description); return s; }(); @@ -700,7 +700,7 @@ TEST_F(ProtobufTest, TableFinishStep) auto base_input_stream = generateDataStream(eng); auto target = generateTableWriteStepInsertTarget(eng); auto output_affected_row_count_symbol = fmt::format("text{}", eng() % 100); - auto s = std::make_shared(base_input_stream, target, output_affected_row_count_symbol, nullptr); + auto s = std::make_shared(base_input_stream, target, output_affected_row_count_symbol, nullptr, false); s->setStepDescription(step_description); return s; }(); diff --git a/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.reference b/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.reference new file mode 100644 index 00000000000..4d98a7fc2a1 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.reference @@ -0,0 +1,9 @@ +insert_select_with_profiles = 0 +0 +2946865 +insert_select_with_profiles = 1 +947 +52346 +2893572 +0 +2946865 diff --git a/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.sql b/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.sql new file mode 100644 index 00000000000..4d9ea995cc3 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10098_test_insert_select_with_profiles.sql @@ -0,0 +1,27 @@ +USE test; + +set enable_optimizer=0; + +DROP TABLE IF EXISTS test_insert_all; +DROP TABLE IF EXISTS test_insert_all2; +CREATE TABLE test_insert_all (`id` UInt64, `name` String) ENGINE = CnchMergeTree() ORDER BY id SETTINGS index_granularity = 8192; + +SET insert_select_with_profiles = 0; +SELECT 'insert_select_with_profiles = 0'; +INSERT INTO test_insert_all SELECT number, toString(number) FROM system.numbers LIMIT 947; +INSERT INTO test_insert_all SELECT number, toString(number) FROM system.numbers LIMIT 52346; +INSERT INTO test_insert_all SELECT number, toString(number) FROM system.numbers LIMIT 2893572; +select sleep(2); +SELECT count() FROM test_insert_all; + +CREATE TABLE test_insert_all2 (`id` UInt64, `name` String) ENGINE = CnchMergeTree() ORDER BY id SETTINGS index_granularity = 8192; +SET insert_select_with_profiles = 1; +SELECT 'insert_select_with_profiles = 1'; +INSERT INTO test_insert_all2 SELECT number, toString(number) FROM system.numbers LIMIT 947; +INSERT INTO test_insert_all2 SELECT number, toString(number) FROM system.numbers LIMIT 52346; +INSERT INTO test_insert_all2 SELECT number, toString(number) FROM system.numbers LIMIT 2893572; +select sleep(2); +SELECT count() FROM test_insert_all2; + +DROP TABLE IF EXISTS test_insert_all; +DROP TABLE IF EXISTS test_insert_all2; From ff8a2785992c946bdb3001233c8e569bfcf8fcf1 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 03:37:08 +0000 Subject: [PATCH 022/292] Merge 'cherry-pick-mr-22451' into 'cnch-2.2' fix(optimizer@m-4505507062): fix rewrite like See merge request: !22617 --- src/Analyzers/SimpleFunctionVisitor.cpp | 7 ++++++- .../48011_rewrite_like_function.reference | 2 ++ .../48011_rewrite_like_function.sql | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Analyzers/SimpleFunctionVisitor.cpp b/src/Analyzers/SimpleFunctionVisitor.cpp index 47da564ef8f..0aaa351f043 100644 --- a/src/Analyzers/SimpleFunctionVisitor.cpp +++ b/src/Analyzers/SimpleFunctionVisitor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "common/types.h" namespace DB @@ -27,7 +28,11 @@ void SimpleFunctionVisitor::visit(ASTFunction * func) if ((func->name == "like" || func->name == "notLike") && func->arguments->children.size() == 2 && func->arguments->children[1]->as()) { - Field converted = convertFieldToType(func->arguments->children[1]->as()->value, DataTypeString()); + auto & pattern = func->arguments->children[1]->as()->value; + if (pattern.getType() != Field::Types::String) + return; + + Field converted = convertFieldToType(pattern, DataTypeString()); String text = converted.safeGet(); for (auto & s : text) diff --git a/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.reference b/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.reference index 125a489c483..f063b524823 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.reference @@ -16,6 +16,8 @@ 1 0 0 +\N +\N SELECT arrayJoin([\'hello\', \'world\']) = \'hello\' SELECT arrayJoin([\'hello\', \'world\']) LIKE \'%hello%\' SELECT arrayJoin([\'hello\', \'world\']) LIKE \'hell_\' diff --git a/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.sql b/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.sql index 66555587ab1..acbcef7ead3 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48011_rewrite_like_function.sql @@ -11,6 +11,9 @@ SELECT arrayJoin(['hello', 'world']) NOT LIKE 'hell_'; SELECT '_hello' NOT LIKE '\_hello'; SELECT '%hello' NOT LIKE '\%hello'; +SELECT 'hello' NOT LIKE Null; +SELECT 'hello' LIKE Null; + set enable_optimizer=1; set rewrite_like_function=1; EXPLAIN SYNTAX SELECT arrayJoin(['hello', 'world']) LIKE 'hello'; From fd28525514e5acbd4de5d9b3c78abb129e314e15 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:21:57 +0000 Subject: [PATCH 023/292] Merge 'cherry-pick-mr-22640' into 'cnch-2.2' feat(clickhousech@m-4655966021): Merge 'brpc-overcrowded' into 'cnch-2.2' See merge request: !22676 # Conflicts: # contrib/incubator-brpc --- contrib/incubator-brpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/incubator-brpc b/contrib/incubator-brpc index 275bf4ff355..aa0318be51b 160000 --- a/contrib/incubator-brpc +++ b/contrib/incubator-brpc @@ -1 +1 @@ -Subproject commit 275bf4ff35537eab940a84c615da17eee2b4cd9b +Subproject commit aa0318be51bc4aed735b7759452b7cd25e3c34dd From 4b02c10a0dff95216866dda279afff83285d6141 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:23:14 +0000 Subject: [PATCH 024/292] remove not used header --- src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp | 1 - src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp index 1549a6fb630..e68549f10c0 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentSplitter.cpp @@ -30,7 +30,6 @@ #include #include #include -#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index 1c891c1254b..b57f9d9f5c0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include From 9e50faa07305dd10dadea82098940b874b7d4fb1 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:24:12 +0000 Subject: [PATCH 025/292] Merge '4656096097_cnch-2.2' into 'cnch-2.2' fix(optimizer@m-4656096097): make MultipleDistinctAggregationToExpandAggregate skip unsupported case See merge request: !22675 --- ...leDistinctAggregationToExpandAggregate.cpp | 36 +++++-- ...ipleDistinctAggregationToExpandAggregate.h | 5 +- .../48044_multiple_distinct_rewrite.reference | 2 + .../48044_multiple_distinct_rewrite.sql | 97 ++++++++++++++++++- 4 files changed, 130 insertions(+), 10 deletions(-) diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp index 92c8ada1488..98d4b9a87eb 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp @@ -28,6 +28,9 @@ const std::set MultipleDistinctAggregationToExpandAggregate::distinct_fu const std::set MultipleDistinctAggregationToExpandAggregate::distinct_func_with_if{ "uniqexactif", "countdistinctif", "avgdistinctif", "maxdistinctif", "mindistinctif", "sumdistinctif"}; +const std::set MultipleDistinctAggregationToExpandAggregate::non_distinct_func_with_if{ + "sumif", "countif", "avgif", "maxif", "minif"}; + const std::unordered_map MultipleDistinctAggregationToExpandAggregate::distinct_func_normal_func{ {"uniqexact", "countIf"}, {"countdistinct", "countIf"}, @@ -36,7 +39,9 @@ const std::unordered_map MultipleDistinctAggregationToExpandAggr {"mindistinct", "minIf"}, {"sumdistinct", "sumIf"}}; -bool MultipleDistinctAggregationToExpandAggregate::hasNoDistinctWithFilterOrMask(const AggregatingStep & step) +const std::set MultipleDistinctAggregationToExpandAggregate::un_supported_func{"hllsketchestimate"}; + +bool MultipleDistinctAggregationToExpandAggregate::hasNoFilterOrMask(const AggregatingStep & step) { const AggregateDescriptions & agg_descs = step.getAggregates(); for (const auto & agg_desc : agg_descs) @@ -46,6 +51,9 @@ bool MultipleDistinctAggregationToExpandAggregate::hasNoDistinctWithFilterOrMask if (distinct_func_with_if.contains(Poco::toLower(agg_desc.function->getName()))) return false; + + if (non_distinct_func_with_if.contains(Poco::toLower(agg_desc.function->getName()))) + return false; } return true; } @@ -113,19 +121,31 @@ bool MultipleDistinctAggregationToExpandAggregate::allCountHasAtMostOneArguments if (Poco::toLower(agg.function->getName()) == "uniqexact" || Poco::toLower(agg.function->getName()) == "countdistinct") { if (agg.argument_names.size() > 1) - return false; + return false; } } return true; } +bool MultipleDistinctAggregationToExpandAggregate::hasNoUnSupportedFunc(const AggregatingStep & step) +{ + const AggregateDescriptions & agg_descs = step.getAggregates(); + for (const auto & agg_desc : agg_descs) + { + if (un_supported_func.contains(Poco::toLower(agg_desc.function->getName()))) + return false; + } + return true; +} + ConstRefPatternPtr MultipleDistinctAggregationToExpandAggregate::getPattern() const { static auto pattern = Patterns::aggregating() - .matchingStep([](const AggregatingStep & s) { - return hasNoDistinctWithFilterOrMask(s) && (hasMultipleDistincts(s) || hasMixedDistinctAndNonDistincts(s)) && hasUniqueArgument(s) && allCountHasAtMostOneArguments(s); - }) - .result(); + .matchingStep([](const AggregatingStep & s) { + return hasNoFilterOrMask(s) && (hasMultipleDistincts(s) || hasMixedDistinctAndNonDistincts(s)) + && hasUniqueArgument(s) && allCountHasAtMostOneArguments(s) && hasNoUnSupportedFunc(s); + }) + .result(); return pattern; } @@ -264,12 +284,12 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan // step 2 : add pre-compute aggregate std::set keyset; - for(const String & key : step.getKeys()) + for (const String & key : step.getKeys()) { keyset.insert(key); } keyset.insert(group_id_symbol); - for(const String & distinct : distinct_arguments) + for (const String & distinct : distinct_arguments) { keyset.insert(distinct); } diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h index f8a67ef0349..377550b2660 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h @@ -93,10 +93,13 @@ class MultipleDistinctAggregationToExpandAggregate : public Rule private: static const std::set distinct_func; static const std::set distinct_func_with_if; + static const std::set non_distinct_func_with_if; + static const std::set un_supported_func; static const std::unordered_map distinct_func_normal_func; - static bool hasNoDistinctWithFilterOrMask(const AggregatingStep & step); + static bool hasNoFilterOrMask(const AggregatingStep & step); static bool hasMultipleDistincts(const AggregatingStep & step); static bool hasMixedDistinctAndNonDistincts(const AggregatingStep & step); + static bool hasNoUnSupportedFunc(const AggregatingStep & step); /** * Distinct/Non-distinct aggregate function's arguments must unique. diff --git a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference index 179a06251d4..0e632cac56f 100644 --- a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference +++ b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.reference @@ -4,3 +4,5 @@ 1 2 1 1 c 2 5 1 2 d 0 0 0 nan nan +0 0 0 0 \N 0 +0 0 0 diff --git a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql index b7d275a303e..3ef9c9b3431 100644 --- a/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql +++ b/tests/queries/4_cnch_stateless/48044_multiple_distinct_rewrite.sql @@ -531,4 +531,99 @@ SELECT uniq(multiIf(dateDiff(CAST(first_order_date, 'Nullable(Date)'), CAST(first_term_order_date, 'Nullable(Date)')) <= 30, fx_account_id, NULL)) / uniq(multiIf(is_order = '1', multiIf(isNotNull(credit_apply_pass_time) AND (auto_audit_status = '3'), submit_host_user_id, NULL), NULL)) AS _1700018902349 FROM aeolus_data_table_8_352783_prod WHERE ((p_date >= '2024-06-05') AND (p_date <= '2024-06-05')) AND (auto_audit_status = '3') AND (uid_rank_desc = '1') -LIMIT 1000; \ No newline at end of file +LIMIT 1000; + +CREATE TABLE app_instant_partner_shop_di +( + `partner_id` Int64, + `partner_name` String, + `root_shop_id` Int64, + `root_shop_name` String, + `shop_logo_uri` String, + `ofo_commission_amt` Decimal(38, 6), + `ofo_live_commission_amt` Decimal(38, 6), + `pay_amt` Decimal(38, 6), + `pay_prod_cnt` Int64, + `prod_expose_uids` Nullable(SketchBinary), + `prod_click_uids` Nullable(SketchBinary), + `refund_amt` Decimal(38, 6), + `prod_reture_ord_cnt_3d` Int64, + `accept_ord_cnt` Int64, + `bad_comment_ord_cnt_3d` Int64, + `complain_ord_cnt_7d` Int64, + `pay_ord_cnt` Int64, + `date_type` String, + `date` Date +) +ENGINE = CnchMergeTree +PARTITION BY date +ORDER BY (partner_id, root_shop_id, date_type, intHash64(partner_id)); + +SELECT + sum(ofo_commission_amt + ofo_live_commission_amt) AS commission_amt, + sum(pay_amt) AS pay_amt_cnt, + sum(pay_prod_cnt) AS pay_prod_cnt, + countDistinct(IF(pay_amt > 0, root_shop_id, NULL)) AS has_paid_shop_cnt, + hllSketchEstimate(12, 1)(prod_click_uids) / hllSketchEstimate(12, 1)(prod_expose_uids) AS click_expose_rate, + sum(refund_amt) AS refund_amt +FROM app_instant_partner_shop_di +WHERE ((date >= '2024-05-24') AND (date <= '2024-05-30')) AND (partner_id = 7368703292081193255) AND (date_type = '1d'); + +CREATE TABLE ads_agroup_24year_brand_stat_df +( + `date_time` String, + `brand_id` Int64, + `brand_name` Nullable(String), + `brand_s_level` Nullable(String), + `shop_settle_type` String, + `first_mgt_cate` Nullable(String), + `second_mgt_cate` Nullable(String), + `cate_attribution` Nullable(String), + `tier` Nullable(String), + `tier_name` Nullable(String), + `brand_layer` Nullable(String), + `strategy_brand_struct` Nullable(String), + `strategy_cate_props` Nullable(String), + `strategy_agroup_industry` Nullable(String), + `strategy_second_vbline_id` Nullable(Int64), + `strategy_cate_attribution` Nullable(String), + `strategy_tier` Nullable(String), + `is_gmv_task` Int8, + `is_mall_shelf_task` Int8, + `is_low_brand_task` Int8, + `is_high_price_task` Int8, + `op_emp_id` Nullable(Int64), + `op_emp_name` Nullable(String), + `op_emp_email` Nullable(String), + `second_vbline_id` Nullable(Int64), + `second_vbline_name` Nullable(String), + `third_vbline_id` Nullable(Int64), + `third_vbline_name` Nullable(String), + `fourth_vbline_id` Nullable(Int64), + `fourth_vbline_name` Nullable(String), + `slice_id` UInt32, + `td_gmv` Int64, + `gmv` Int64, + `history_gmv` Int64, + `history_td_gmv` Int64, + `p_date` Date, + `date_type` String, + `platform` Int32, + `op_td_gmv` Int64, + `strategy_agroup_industry_name` Nullable(String), + `strategy_first_vbline_name` Nullable(String), + `first_vbline_name` Nullable(String), + `first_vbline_id` Nullable(Int64), + `strategy_first_vbline_id` Nullable(Int64) +) +ENGINE = CnchMergeTree +PARTITION BY (p_date, date_type, platform) +CLUSTER BY slice_id INTO 24 BUCKETS SPLIT_NUMBER 96 +ORDER BY (date_time, brand_id, shop_settle_type, slice_id, p_date, date_type, platform, intHash64(slice_id)); + +SELECT + countDistinct(brand_id) AS brand_nums, + sumIf(gmv, platform = 1) AS douyin_index, + sumIf(gmv, platform IN (2, 4)) AS outer_index +FROM ads_agroup_24year_brand_stat_df +WHERE (p_date = '2024-06-06') AND (date_type = 'month') AND ((platform IN (2, 4)) OR (platform = 1)) AND (date_time IN ('20221130', '20221231', '20230131', '20230228', '20230331', '20230430')) AND (brand_layer IN ('0份额品牌', '极劣势品牌', '劣势品牌', '胶着品牌', '优势品牌')) AND (shop_settle_type IN ('旗舰店', '经销商')) AND (strategy_first_vbline_id = 11230509000002) AND isNotNull(strategy_second_vbline_id) AND isNotNull(strategy_agroup_industry_name) \ No newline at end of file From 53cc356f56a59381dc902564cd44ee39c0b89d4b Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:24:32 +0000 Subject: [PATCH 026/292] Merge branch 'cherry-pick-82f2ae22' into 'cnch-2.2' feat(clickhousech@m-4505469713): Support single quoted identifier as alias for MYSQL IDE (CP) See merge request dp/ClickHouse!22659 --- src/Parsers/ExpressionElementParsers.cpp | 13 ++++++++----- src/Parsers/ExpressionElementParsers.h | 7 +++++-- .../60201_mysql_quote_identifier.reference | 2 ++ .../60201_mysql_quote_identifier.sql | 3 +++ 4 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.reference create mode 100644 tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 71fdd3ee0dc..d88de00ec0b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -181,15 +181,18 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - /// Identifier in backquotes or in double quotes - if (pos->type == TokenType::BackQuotedIdentifier || pos->type == TokenType::DoubleQuotedIdentifier) + /// Identifier in backquotes or in double quotes or single quotes + if (pos->type == TokenType::BackQuotedIdentifier || pos->type == TokenType::DoubleQuotedIdentifier + || (allow_single_quoted_identifier && pos->type == TokenType::StringLiteral)) { ReadBufferFromMemory buf(pos->begin, pos->size()); String s; if (*pos->begin == '`') readBackQuotedStringWithSQLStyle(s, buf); - else + else if (*pos->begin == '\'') + readQuotedStringWithSQLStyle(s, buf); + else if (*pos->begin == '"') readDoubleQuotedStringWithSQLStyle(s, buf); if (s.empty()) /// Identifiers "empty string" are not allowed. @@ -2272,7 +2275,7 @@ const char * ParserAlias::restricted_keywords[] = bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_as("AS"); - ParserIdentifier id_p; + ParserIdentifier id_p(false, allow_single_quoted_identifier); bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) @@ -2707,7 +2710,7 @@ bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & exp allow_alias_without_as_keyword_now = false; ASTPtr alias_node; - if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, alias_node, expected)) + if (ParserAlias(allow_alias_without_as_keyword_now, dt.parse_mysql_ddl).parse(pos, alias_node, expected)) { /// FIXME: try to prettify this cast using `as<>()` if (auto * ast_with_alias = dynamic_cast(node.get())) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 248882225fb..87013de0d31 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -72,12 +72,13 @@ class ParserSubquery : public IParserDialectBase class ParserIdentifier : public IParserBase { public: - explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} + explicit ParserIdentifier(bool allow_query_parameter_ = false, bool allow_single_quoted_identifier_ = false) : allow_query_parameter(allow_query_parameter_), allow_single_quoted_identifier(allow_single_quoted_identifier_) {} protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_query_parameter; + bool allow_single_quoted_identifier; }; @@ -480,12 +481,14 @@ class ParserLiteral : public IParserDialectBase class ParserAlias : public IParserBase { public: - explicit ParserAlias(bool allow_alias_without_as_keyword_) : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) { } + explicit ParserAlias(bool allow_alias_without_as_keyword_, bool allow_single_quoted_identifier_ = false) : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), allow_single_quoted_identifier(allow_single_quoted_identifier_) { } private: static const char * restricted_keywords[]; bool allow_alias_without_as_keyword; + /// default false; set to true for mysql, which allows: select 123 as 'offset' + bool allow_single_quoted_identifier; const char * getName() const override { return "alias"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.reference b/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.reference new file mode 100644 index 00000000000..3e67fe1ac4f --- /dev/null +++ b/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.reference @@ -0,0 +1,2 @@ +123 +123 diff --git a/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.sql b/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.sql new file mode 100644 index 00000000000..1e7bfd31c3f --- /dev/null +++ b/tests/queries/4_cnch_stateless/60201_mysql_quote_identifier.sql @@ -0,0 +1,3 @@ +set dialect_type='MYSQL'; +select 123 as 'offset'; +select 123 as 'offset''s'; From db367ac97718c43f9902a972d91c964d9041beb2 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:31:08 +0000 Subject: [PATCH 027/292] Merge 'fix/cnch-2.2-bitengine_preattach_fix' into 'cnch-2.2' fix(clickhousech@m-4502944525): [cp]minor fix and optimization about preattach partition See merge request: !22632 # Conflicts: # src/Storages/BitEngineEncodePartitionHelper.cpp # src/Storages/StorageCnchMergeTree.cpp # src/Storages/StorageCnchMergeTree.h --- src/Interpreters/InterpreterAlterQuery.cpp | 2 +- .../BitEngineEncodePartitionHelper.cpp | 465 ++++++++++++++++++ src/Storages/IStorage.cpp | 2 +- src/Storages/IStorage.h | 6 +- src/Storages/MergeTree/MergeTreeData.cpp | 3 +- src/Storages/MergeTree/MergeTreeData.h | 3 +- src/Storages/StorageCloudMergeTree.cpp | 3 +- src/Storages/StorageCloudMergeTree.h | 3 +- src/Storages/StorageCnchMergeTree.cpp | 2 +- src/Storages/StorageCnchMergeTree.h | 2 +- src/Storages/StorageMaterializedView.cpp | 5 +- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageProxy.h | 4 +- 13 files changed, 489 insertions(+), 13 deletions(-) create mode 100644 src/Storages/BitEngineEncodePartitionHelper.cpp diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 6f17b7744d1..4cb25a5f8a5 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -167,7 +167,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) if (!partition_commands.empty()) { table->checkAlterPartitionIsPossible(partition_commands, metadata_snapshot, getContext()->getSettingsRef()); - auto partition_commands_pipe = table->alterPartition(metadata_snapshot, partition_commands, getContext()); + auto partition_commands_pipe = table->alterPartition(metadata_snapshot, partition_commands, getContext(), query_ptr); if (!partition_commands_pipe.empty()) res.pipeline.init(std::move(partition_commands_pipe)); table->setUpdateTimeNow(); diff --git a/src/Storages/BitEngineEncodePartitionHelper.cpp b/src/Storages/BitEngineEncodePartitionHelper.cpp new file mode 100644 index 00000000000..d535b57d31a --- /dev/null +++ b/src/Storages/BitEngineEncodePartitionHelper.cpp @@ -0,0 +1,465 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DataStreams/IBlockStream_fwd.h" +#include "DataStreams/UnionBlockInputStream.h" +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYSTEM_ERROR; + extern const int NOT_ENOUGH_SPACE; +} + +/// To do mutate, reserve amount of space equals to sum size of parts times specified coefficient. +static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; + +Pipe StorageCnchMergeTree::preattachPartition(const PartitionCommand & command, const ContextPtr & local_context, const ASTPtr & query) +{ + if (!isBitEngineTable()) + return {}; + + String partition_id = getPartitionIDFromQuery(command.partition, local_context); + auto catalog = local_context->getCnchCatalog(); + auto cur_txn = local_context->getCurrentTransaction(); + + /// 1. get partition lock + LockInfoPtr partition_lock = std::make_shared(cur_txn->getTransactionID()); + partition_lock->setMode(LockMode::X); + partition_lock->setTimeout(local_context->getSettingsRef().ingest_column_memory_lock_timeout.value.totalMilliseconds()); // default 5s + partition_lock->setUUIDAndPrefix(getStorageUUID(), LockInfo::task_domain); + partition_lock->setPartition(partition_id); + + Stopwatch lock_watch; + auto cnch_lock = std::make_shared(local_context, std::move(partition_lock)); + cnch_lock->lock(); + LOG_DEBUG(log, "Acquired lock in {} ms", lock_watch.elapsedMilliseconds()); + + /// 2. stop merges of the table + /// remove the merge mutate tasks that could cause WW conflict before get server part + auto daemon_manager_client_ptr = local_context->getDaemonManagerClient(); + if (!daemon_manager_client_ptr) + throw Exception("Failed to get daemon manager client", ErrorCodes::SYSTEM_ERROR); + + std::optional merge_job_info + = daemon_manager_client_ptr->getDMBGJobInfo(getStorageUUID(), CnchBGThreadType::MergeMutate, local_context->getCurrentQueryId()); + if (!merge_job_info || merge_job_info->host_port.empty()) + LOG_DEBUG( + log, + "Will skip removing related merge tasks as there is no valid host server for table's merge job: {}", + getStorageID().getNameForLogs()); + else + { + auto server_client_ptr = local_context->getCnchServerClient(merge_job_info->host_port); + if (!server_client_ptr) + throw Exception("Failed to get server client with host port " + merge_job_info->host_port, ErrorCodes::SYSTEM_ERROR); + if (!server_client_ptr->removeMergeMutateTasksOnPartitions(getStorageID(), {partition_id})) + throw Exception( + "Failed to get remove MergeMutateTasks on partition_id " + partition_id + " for table " + getStorageID().getNameForLogs(), + ErrorCodes::SYSTEM_ERROR); + } + + /// 3. get source_parts of the partition + ServerDataPartsVector source_parts = catalog->getServerDataPartsInPartitions( + shared_from_this(), {partition_id}, local_context->getCurrentCnchStartTime(), local_context.get()); + ServerDataPartsVector visible_source_parts + = CnchPartsHelper::calcVisibleParts(source_parts, false, CnchPartsHelper::LoggingOption::EnableLogging); + LOG_DEBUG( + log, + "In partition_id: {}, number of server source parts: {}, visible source parts: {}", + partition_id, + source_parts.size(), + visible_source_parts.size()); + + /// 4. allocate dict table and parts + auto underlying_dicts_mapping = getUnderlyDictionaryTables(); + for (auto & entry : underlying_dicts_mapping) + { + auto storage_underlying_dict + = DatabaseCatalog::instance().tryGetTable(StorageID{entry.second.first, entry.second.second}, local_context); + + auto * storage_underlying_dict_cnch = dynamic_cast(storage_underlying_dict.get()); + if (storage_underlying_dict_cnch) + { + storage_underlying_dict_cnch->allocateForBitEngine(local_context, std::set{}, WorkerEngineType::DICT); + } + } + + /// 5. allocate bitengine table and visible_source_parts + String local_table_name = getCloudTableName(local_context); + collectResource(local_context, visible_source_parts, local_table_name); + + /// 6. send Alter query to each worker, and get the Remote Stream to construct an pipe + /// 6.1 rewrite the query to cloud worker + auto query_send = query->clone(); + ASTAlterQuery & query_send_ref = query_send->as(); + query_send_ref.database = getStorageID().getDatabaseName(); + query_send_ref.table = local_table_name; + String query_send_to_worker = queryToString(query_send); + + /// 6.2 collect worker group + auto worker_group = getWorkerGroupForTable(*this, local_context); + local_context->setCurrentWorkerGroup(worker_group); + healthCheckForWorkerGroup(local_context, worker_group); + + /// 6.3 construct remote_stream/pipe to send query to each worker + std::vector remote_streams; + for (const auto & shard_info : worker_group->getShardsInfo()) + { + auto preattach_stream = CnchStorageCommonHelper::sendQueryPerShard(local_context, query_send_to_worker, shard_info); + remote_streams.emplace_back(preattach_stream); + } + + cur_txn->setMainTableUUID(getStorageUUID()); + auto union_stream = std::make_shared(remote_streams, nullptr, local_context->getSettingsRef().max_threads); + auto transaction_stream = std::make_shared(union_stream, std::move(cur_txn), std::move(cnch_lock)); + + return Pipe{std::make_shared(std::move(transaction_stream))}; +} + +BitEngineEncodePartitionStream::BitEngineEncodePartitionStream( + const StorageCloudMergeTree & cloud_merge_tree, const PartitionCommand & command_, ContextPtr local_context_) + : storage(cloud_merge_tree), command(command_), local_context(local_context_) +{ +} + +FutureMergedMutatedPart getFuturePart(const MergeTreeMetaBase::DataPartPtr & part, ContextPtr & local_context) +{ + auto new_part_info = part->info; + new_part_info.level += 1; + new_part_info.hint_mutation = new_part_info.mutation; + new_part_info.mutation = local_context->getCurrentTransactionID().toUInt64(); + + FutureMergedMutatedPart future_part; + future_part.uuid = UUIDHelpers::generateV4(); + future_part.parts.push_back(part); + future_part.part_info = new_part_info; + future_part.name = new_part_info.getPartName(); + future_part.type = part->getType(); + return future_part; +} + +Block BitEngineEncodePartitionStream::readImpl() +{ + Stopwatch watch; + String partition_id = storage.getPartitionIDFromQuery(command.partition, local_context); + auto parts_to_encode = storage.getDataPartsVectorInPartition(MergeTreeMetaBase::DataPartState::Committed, partition_id); + + /// 1. encapsulate FutureParts for parts_to_encode + std::vector future_parts; + for (const auto & part : parts_to_encode) + { + future_parts.emplace_back(getFuturePart(part, local_context)); + } + + /// 2. BitEngineDictionaryManager encode parts + auto dict_manager = storage.getBitEngineDictionaryManager(); + auto temp_parts = dict_manager->encodeParts(storage, future_parts, local_context); + + /// 3. commit encoded parts + CnchDataWriter cnch_writer(const_cast(storage), local_context, ManipulationType::Insert); + cnch_writer.dumpAndCommitCnchParts(temp_parts); + + LOG_DEBUG( + &Poco::Logger::get("BitEngineEncodePartition"), + "({}) BitEngine encode partition_id {} with {} parts cost {} s.", + storage.getStorageID().getNameForLogs(), + partition_id, + parts_to_encode.size(), + watch.elapsedSeconds()); + + return {}; +} + +PartsEncoder::PartsEncoder(const StorageCloudMergeTree & storage_, BitEngineDictionaryManager & dict_manager_, ContextPtr local_context_) + : storage(storage_), dict_manager(dict_manager_), local_context(local_context_) +{ +} + +MergeTreeMetaBase::MutableDataPartsVector PartsEncoder::encodeBitEngineParts(std::vector & future_parts) +{ + MergeTreeMetaBase::MutableDataPartsVector result_parts; + for (auto & part : future_parts) + { + auto encoded_part = encodeBitEnginePart(part); + result_parts.emplace_back(encoded_part); + } + return result_parts; +} + +static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) +{ + return ( + (settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) + || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); +} + +MergeTreeMetaBase::MutableDataPartPtr PartsEncoder::encodeBitEnginePart(FutureMergedMutatedPart & future_part) +{ + auto & source_part = future_part.parts.at(0); + auto columns_to_encode = getBitEngineColumnsInPart(source_part); + if (columns_to_encode.empty()) + return nullptr; + + bool need_sync = needSyncPart(source_part->rows_count, source_part->getBytesOnDisk(), *storage.getSettings()); + auto compression_codec = source_part->default_codec; + + if (!compression_codec) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown codec for bitengine encode part: {}", source_part->name); + + auto new_partial_part = createEmptyTempPart(future_part); + + auto input_stream = createInputStream(source_part, columns_to_encode.getNames()); + auto output_stream = createOutputStream(source_part, input_stream->getHeader(), new_partial_part, compression_codec); + encodeTransform(*input_stream, *output_stream, new_partial_part, need_sync); + + finalizeTempPart(source_part, new_partial_part, compression_codec); + + return new_partial_part; +} + +NamesAndTypesList PartsEncoder::getBitEngineColumnsInPart(const IMergeTreeDataPartPtr & part) +{ + const auto & columns = part->getColumns(); + NamesAndTypesList columns_to_encode; + for (const auto & column : columns) + { + if (!isBitmap64(column.type)) + continue; + + bool bitengine_type = column.type->isBitEngineEncode(); + if (bitengine_type || storage.isBitEngineEncodeColumn(column.name)) + { + if (!bitengine_type) + const_cast(column.type.get())->setFlags(TYPE_BITENGINE_ENCODE_FLAG); + columns_to_encode.push_back(column); + } + } + + return columns_to_encode; +} + +IMutableMergeTreeDataPartPtr PartsEncoder::createEmptyTempPart(FutureMergedMutatedPart & future_part) +{ + auto & part = future_part.parts.at(0); + auto estimated_space_for_result = static_cast(part->getBytesOnDisk() * DISK_USAGE_COEFFICIENT_TO_RESERVE); + ReservationPtr reserved_space = storage.reserveSpace(estimated_space_for_result, IStorage::StorageLocation::AUXILITY); + + if (!reserved_space) + throw Exception("Not enough space for encoding part '" + part->name + "' ", ErrorCodes::NOT_ENOUGH_SPACE); + + auto single_disk_volume = std::make_shared("volume_" + future_part.name, reserved_space->getDisk(), 0); + + auto new_partial_part = storage.createPart( + future_part.name, + MergeTreeDataPartType::WIDE, + future_part.part_info, + single_disk_volume, + "tmp_enc_" + future_part.name, + nullptr, + IStorage::StorageLocation::AUXILITY); + + new_partial_part->uuid = future_part.uuid; + new_partial_part->is_temp = true; + new_partial_part->ttl_infos = part->ttl_infos; + new_partial_part->versions = part->versions; + + new_partial_part->index_granularity_info = part->index_granularity_info; + new_partial_part->setColumns(part->getColumns()); + new_partial_part->partition.assign(part->partition); + new_partial_part->columns_commit_time = part->columns_commit_time; + new_partial_part->mutation_commit_time = part->mutation_commit_time; + if (storage.isBucketTable()) + new_partial_part->bucket_number = part->bucket_number; + + new_partial_part->checksums_ptr = std::make_shared(); + + auto disk = new_partial_part->volume->getDisk(); + String new_part_tmp_path = new_partial_part->getFullRelativePath(); + + SyncGuardPtr sync_guard; + if (storage.getSettings()->fsync_part_directory) + sync_guard = disk->getDirectorySyncGuard(new_part_tmp_path); + + /// calculate which columns can be skipped in encoding + // NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); + disk->createDirectories(new_part_tmp_path); + + return new_partial_part; +} + +BlockInputStreamPtr PartsEncoder::createInputStream(const IMergeTreeDataPartPtr & part, Names column_names) +{ + auto input_source = std::make_unique( + storage, + storage.getStorageSnapshot(storage.getInMemoryMetadataPtr(), nullptr), + part, + column_names, + /*read_with_direct_io*/ false, + /*take_column_types_from_storage*/ true); + + QueryPipeline pipeline; + pipeline.init(Pipe(std::move(input_source))); + pipeline.setMaxThreads(1); + BlockInputStreamPtr pipeline_input_stream = std::make_shared(std::move(pipeline)); + + pipeline_input_stream = std::make_shared(pipeline_input_stream, + local_context->getSettingsRef().min_insert_block_size_rows, + local_context->getSettingsRef().min_insert_block_size_bytes * 2U); + + return pipeline_input_stream; +} + +BlockOutputStreamPtr PartsEncoder::createOutputStream( + const IMergeTreeDataPartPtr & source_part, + const Block & header_in, + IMutableMergeTreeDataPartPtr & new_temp_part, + const CompressionCodecPtr & codec) +{ + /// Calc header + Block header_to_write; + for (const auto & column : header_in.getColumnsWithTypeAndName()) + { + if (isBitmap64(column.type)) + { + header_to_write.insert(ColumnWithTypeAndName(column.column, column.type, column.name + BITENGINE_COLUMN_EXTENSION)); + } + } + + MergeTreeWriterSettings writer_settings( + storage.getContext()->getSettings(), + storage.getSettings(), + /*can_use_adaptive_granularity = */ false, + false); + + return std::make_shared( + new_temp_part, + storage.getInMemoryMetadataPtr(), + writer_settings, + header_to_write, + codec, + std::vector{}, + nullptr, + source_part->index_granularity); +} + +void PartsEncoder::encodeTransform( + IBlockInputStream & in, IBlockOutputStream & out, IMutableMergeTreeDataPartPtr & new_temp_part, bool need_sync) +{ + in.readPrefix(); + out.writePrefix(); + + Block block; + while ((block = in.read())) + { + writeImplicitColumnForBitEngine(block, new_temp_part->bucket_number); + out.write(block); + } + + in.readSuffix(); + auto changed_checksums = dynamic_cast(out).writeSuffixAndGetChecksums( + new_temp_part, *new_temp_part->getChecksums(), need_sync); + new_temp_part->checksums_ptr->add(std::move(changed_checksums)); +} + +void PartsEncoder::writeImplicitColumnForBitEngine(Block & block, Int64 bucket_number) +{ + ColumnsWithTypeAndName encoded_columns; + const auto & columns = block.getColumnsWithTypeAndName(); + + for (const auto & column : columns) + { + if (!isBitmap64(column.type)) + continue; + + /// check whether the column is a legal BitEngine column in table + if (!storage.isBitEngineEncodeColumn(column.name)) + continue; + + try + { + auto encoded_column = dict_manager.encodeColumn(column, column.name, bucket_number, local_context, BitEngineEncodeSettings{}); + + encoded_columns.push_back(encoded_column); + } + catch (Exception & e) + { + // LOG_ERROR(&Poco::Logger::get("BitEnginePartsEncoder"), "BitEngine encode column exception: {}", e.message()); + // tryLogCurrentException(__PRETTY_FUNCTION__); + throw Exception("BitEngine encode exception. reason: " + String(e.message()), ErrorCodes::LOGICAL_ERROR); + } + } + + if (!encoded_columns.empty()) + { + for (auto & encoded_column : encoded_columns) + block.insertUnique(encoded_column); + } +} + +void PartsEncoder::finalizeTempPart( + const MergeTreeDataPartPtr & source_part, const MergeTreeMutableDataPartPtr & new_partial_part, const CompressionCodecPtr & codec) +{ + auto disk = new_partial_part->volume->getDisk(); + auto new_part_checksums_ptr = new_partial_part->getChecksums(); + + if (new_partial_part->uuid != UUIDHelpers::Nil) + { + auto out = disk->writeFile(new_partial_part->getFullRelativePath() + IMergeTreeDataPart::UUID_FILE_NAME, {.buffer_size = 4096}); + HashingWriteBuffer out_hashing(*out); + writeUUIDText(new_partial_part->uuid, out_hashing); + new_part_checksums_ptr->files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); + new_part_checksums_ptr->files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); + } + + { + /// Write file with checksums. + auto out_checksums = disk->writeFile(fs::path(new_partial_part->getFullRelativePath()) / "checksums.txt", {.buffer_size = 4096}); + new_part_checksums_ptr->versions = new_partial_part->versions; + new_part_checksums_ptr->write(*out_checksums); + } /// close fd + + { + auto out = disk->writeFile( + new_partial_part->getFullRelativePath() + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, {.buffer_size = 4096}); + DB::writeText(queryToString(codec->getFullCodecDesc()), *out); + } + + { + /// Write a file with a description of columns. + auto out_columns = disk->writeFile(fs::path(new_partial_part->getFullRelativePath()) / "columns.txt", {.buffer_size = 4096}); + new_partial_part->getColumns().writeText(*out_columns); + } /// close fd + + new_partial_part->rows_count = source_part->rows_count; + new_partial_part->index_granularity = source_part->index_granularity; + new_partial_part->index = source_part->getIndex(); + new_partial_part->minmax_idx = source_part->minmax_idx; + new_partial_part->modification_time = time(nullptr); + new_partial_part->loadProjections(false, false); + new_partial_part->setBytesOnDisk( + MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_partial_part->volume->getDisk(), new_partial_part->getFullRelativePath())); + new_partial_part->default_codec = codec; +} +} diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 2fe40493346..6e19a4bccec 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -179,7 +179,7 @@ void IStorage::read( } Pipe IStorage::alterPartition( - const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, ContextPtr /* context */) + const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, ContextPtr /* context */, const ASTPtr & /* query */) { throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 81dc4df7538..49edcdffde7 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -531,11 +531,15 @@ class IStorage : public std::enable_shared_from_this, public TypePromo /** ALTER tables with regard to its partitions. * Should handle locks for each command on its own. + * + * Use the last `query` argument to keep the alter query ast since in some case we need to forward the + * query to workers */ virtual Pipe alterPartition( const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, - ContextPtr /* context */); + ContextPtr /* context */, + const ASTPtr & query = nullptr); /// Checks that partition commands can be applied to storage. virtual void checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & metadata_snapshot, const Settings & settings) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index cc29131c22c..4fd48565ed0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2893,7 +2893,8 @@ void MergeTreeData::repairPartition(const ASTPtr & , bool , const String & , Con Pipe MergeTreeData::alterPartition( const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, - ContextPtr query_context) + ContextPtr query_context, + const ASTPtr & /* query */) { PartitionCommandsResultInfo result; for (const PartitionCommand & command : commands) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 86102c65346..e300ddd0d23 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -486,7 +486,8 @@ class MergeTreeData : public MergeTreeMetaBase Pipe alterPartition( const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, - ContextPtr query_context) override; + ContextPtr query_context, + const ASTPtr & query = nullptr) override; /// Creates description of columns of data type Object from the range of data parts. static ColumnsDescription getConcreteObjectColumns( diff --git a/src/Storages/StorageCloudMergeTree.cpp b/src/Storages/StorageCloudMergeTree.cpp index f515d3ec25f..76419233f07 100644 --- a/src/Storages/StorageCloudMergeTree.cpp +++ b/src/Storages/StorageCloudMergeTree.cpp @@ -206,7 +206,8 @@ void StorageCloudMergeTree::checkAlterPartitionIsPossible(const PartitionCommand Pipe StorageCloudMergeTree::alterPartition( const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, - ContextPtr local_context) + ContextPtr local_context, + const ASTPtr & /* query */) { if (commands.size() > 1U) throw Exception( diff --git a/src/Storages/StorageCloudMergeTree.h b/src/Storages/StorageCloudMergeTree.h index ef506bfe6b1..a4e44dce276 100644 --- a/src/Storages/StorageCloudMergeTree.h +++ b/src/Storages/StorageCloudMergeTree.h @@ -91,7 +91,8 @@ class StorageCloudMergeTree : public shared_ptr_helper, p Pipe alterPartition( const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, - ContextPtr /* context */) override; + ContextPtr /* context */, + const ASTPtr & query = nullptr) override; Pipe ingestPartition(const StorageMetadataPtr &, const PartitionCommand & command, ContextPtr local_context); diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 7510aa7feb1..dc0515f95f8 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -1983,7 +1983,7 @@ void StorageCnchMergeTree::checkAlterPartitionIsPossible( } Pipe StorageCnchMergeTree::alterPartition( - const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, ContextPtr query_context) + const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, ContextPtr query_context, const ASTPtr & query) { if (unlikely(!query_context->getCurrentTransaction())) throw Exception("Transaction is not set", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/StorageCnchMergeTree.h b/src/Storages/StorageCnchMergeTree.h index a47a088ecd5..f51ed0e48a7 100644 --- a/src/Storages/StorageCnchMergeTree.h +++ b/src/Storages/StorageCnchMergeTree.h @@ -158,7 +158,7 @@ class StorageCnchMergeTree final : public shared_ptr_helperalterPartition(metadata_snapshot, commands, local_context); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 2e5d2bcf594..806e8d6b08e 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -77,7 +77,7 @@ class StorageMaterializedView final : public shared_ptr_helperalterPartition(metadata_snapshot, commands, context); } @@ -182,4 +183,3 @@ class StorageProxy : public IStorage } - From a69bf6e91128d4d5e8804559706aef8f11f21c18 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:31:34 +0000 Subject: [PATCH 028/292] Merge 'cherry-pick-mr-22606' into 'cnch-2.2' fix(optimizer@m-3010175307):prepared statement catalog See merge request: !22645 --- src/Catalog/Catalog.cpp | 44 ++++++++++++++++ src/Catalog/Catalog.h | 10 +++- src/Catalog/MetastoreProxy.cpp | 51 +++++++++++++++++++ src/Catalog/MetastoreProxy.h | 21 ++++++++ src/Common/ProfileEvents.cpp | 8 +++ .../InterpreterDropPreparedStatementQuery.cpp | 2 +- .../PreparedStatementCatalog.cpp | 27 ++++++++++ .../PreparedStatementCatalog.h | 46 +++++++++++++++++ .../PreparedStatementManager.cpp | 43 ++++++++-------- .../PreparedStatementManager.h | 11 ++-- src/Protos/data_models.proto | 5 ++ .../48035_prepared_statement.reference | 1 + .../48035_prepared_statement.sql | 9 ++++ 13 files changed, 247 insertions(+), 31 deletions(-) create mode 100644 src/Interpreters/PreparedStatement/PreparedStatementCatalog.cpp create mode 100644 src/Interpreters/PreparedStatement/PreparedStatementCatalog.h diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index 15d49c97a91..c8e4cd2cbad 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -112,6 +112,14 @@ namespace ProfileEvents extern const Event GetSQLBindingsFailed; extern const Event RemoveSQLBindingSuccess; extern const Event RemoveSQLBindingFailed; + extern const Event UpdatePreparedStatementSuccess; + extern const Event UpdatePreparedStatementFailed; + extern const Event GetPreparedStatementSuccess; + extern const Event GetPreparedStatementFailed; + extern const Event GetPreparedStatementsSuccess; + extern const Event GetPreparedStatementsFailed; + extern const Event RemovePreparedStatementSuccess; + extern const Event RemovePreparedStatementFailed; extern const Event CreateDatabaseSuccess; extern const Event CreateDatabaseFailed; extern const Event GetDatabaseSuccess; @@ -6626,6 +6634,42 @@ namespace Catalog ProfileEvents::RemoveSQLBindingFailed); } + void Catalog::updatePreparedStatement(const PreparedStatementItemPtr & data) + { + runWithMetricSupport( + [&] { meta_proxy->updatePreparedStatement(name_space, data); }, + ProfileEvents::UpdatePreparedStatementSuccess, + ProfileEvents::UpdatePreparedStatementFailed); + } + + PreparedStatements Catalog::getPreparedStatements() + { + PreparedStatements res; + runWithMetricSupport( + [&] { res = meta_proxy->getPreparedStatements(name_space); }, + ProfileEvents::GetPreparedStatementSuccess, + ProfileEvents::GetPreparedStatementFailed); + return res; + } + + PreparedStatementItemPtr Catalog::getPreparedStatement(const String & name) + { + PreparedStatementItemPtr res; + runWithMetricSupport( + [&] { res = meta_proxy->getPreparedStatement(name_space, name); }, + ProfileEvents::GetPreparedStatementSuccess, + ProfileEvents::GetPreparedStatementFailed); + return res; + } + + void Catalog::removePreparedStatement(const String & name) + { + runWithMetricSupport( + [&] { meta_proxy->removePreparedStatement(name_space, name); }, + ProfileEvents::RemovePreparedStatementSuccess, + ProfileEvents::RemovePreparedStatementFailed); + } + void Catalog::setMergeMutateThreadStartTime(const StorageID & storage_id, const UInt64 & startup_time) const { meta_proxy->setMergeMutateThreadStartTime(name_space, UUIDHelpers::UUIDToString(storage_id.uuid), startup_time); diff --git a/src/Catalog/Catalog.h b/src/Catalog/Catalog.h index 1f33dfddf36..ca34604e772 100644 --- a/src/Catalog/Catalog.h +++ b/src/Catalog/Catalog.h @@ -130,7 +130,7 @@ class Catalog ////////////// - void updateSQLBinding(const SQLBindingItemPtr data); + void updateSQLBinding(SQLBindingItemPtr data); SQLBindings getSQLBindings(); @@ -140,6 +140,14 @@ class Catalog void removeSQLBinding(const String & uuid, const String & tenant_id, const bool & is_re_expression); + void updatePreparedStatement(const PreparedStatementItemPtr & data); + + PreparedStatements getPreparedStatements(); + + PreparedStatementItemPtr getPreparedStatement(const String & name); + + void removePreparedStatement(const String & name); + ///////////////////////////// /// Database related API ///////////////////////////// diff --git a/src/Catalog/MetastoreProxy.cpp b/src/Catalog/MetastoreProxy.cpp index edcd952b9a1..4fa0be0559e 100644 --- a/src/Catalog/MetastoreProxy.cpp +++ b/src/Catalog/MetastoreProxy.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2372,6 +2373,56 @@ void MetastoreProxy::removeSQLBinding(const String & name_space, const String & metastore_ptr->batchWrite(batch_write, resp); } +void MetastoreProxy::updatePreparedStatement(const String & name_space, const PreparedStatementItemPtr & data) +{ + BatchCommitRequest batch_write; + + Protos::PreparedStatementItem prepared_statement; + prepared_statement.set_name(data->name); + prepared_statement.set_create_statement(data->create_statement); + batch_write.AddPut(SinglePutRequest(preparedStatementKey(name_space, data->name), prepared_statement.SerializeAsString())); + BatchCommitResponse resp; + metastore_ptr->batchWrite(batch_write, resp); +} + +PreparedStatements MetastoreProxy::getPreparedStatements(const String & name_space) +{ + PreparedStatements res; + auto prepared_prefix = preparedStatementPrefix(name_space); + auto it = metastore_ptr->getByPrefix(prepared_prefix); + while (it->next()) + { + Protos::PreparedStatementItem prepared_statement; + prepared_statement.ParseFromString(it->value()); + PreparedStatementItemPtr statement = std::make_shared(prepared_statement.name(), prepared_statement.create_statement()); + res.emplace_back(statement); + } + + return res; +} +PreparedStatementItemPtr MetastoreProxy::getPreparedStatement(const String & name_space, const String & name) +{ + String value; + auto prepared_statement_key = preparedStatementKey(name_space, name); + metastore_ptr->get(prepared_statement_key, value); + + if (value.empty()) + return nullptr; + + Protos::PreparedStatementItem prepared_statement; + prepared_statement.ParseFromString(value); + PreparedStatementItemPtr prepared = std::make_shared(prepared_statement.name(), prepared_statement.create_statement()); + return prepared; +} + +void MetastoreProxy::removePreparedStatement(const String & name_space, const String & name) +{ + BatchCommitRequest batch_write; + batch_write.AddDelete(preparedStatementKey(name_space, name)); + BatchCommitResponse resp; + metastore_ptr->batchWrite(batch_write, resp); +} + void MetastoreProxy::createVirtualWarehouse(const String & name_space, const String & vw_name, const VirtualWarehouseData & data) { auto vw_key = VWKey(name_space, vw_name); diff --git a/src/Catalog/MetastoreProxy.h b/src/Catalog/MetastoreProxy.h index 0040066ff66..a9c2b0350a8 100644 --- a/src/Catalog/MetastoreProxy.h +++ b/src/Catalog/MetastoreProxy.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,7 @@ namespace DB::Catalog #define COLUMN_STATISTICS_PREFIX "CS_" #define COLUMN_STATISTICS_TAG_PREFIX "CST_" // deprecated, just remove it #define SQL_BINDING_PREFIX "SBI_" +#define PREPARED_STATEMENT_PREFIX "PSTAT_" #define FILESYS_LOCK_PREFIX "FSLK_" #define UDF_STORE_PREFIX "UDF_" #define MERGEMUTATE_THREAD_START_TIME "MTST_" @@ -742,6 +744,20 @@ class MetastoreProxy return ss.str(); } + static String preparedStatementKey(const String name_space, const String & key) + { + std::stringstream ss; + ss << escapeString(name_space) << '_' << PREPARED_STATEMENT_PREFIX << '_' << key; + return ss.str(); + } + + static String preparedStatementPrefix(const String name_space) + { + std::stringstream ss; + ss << escapeString(name_space) << '_' << PREPARED_STATEMENT_PREFIX; + return ss.str(); + } + static String tableStatisticKey(const String name_space, const String & uuid, const StatisticsTag & tag) { std::stringstream ss; @@ -1224,6 +1240,11 @@ class MetastoreProxy SQLBindingItemPtr getSQLBinding(const String & name_space, const String & uuid, const String & tenant_id, const bool & is_re_expression); void removeSQLBinding(const String & name_space, const String & uuid, const String & tenant_id, const bool & is_re_expression); + void updatePreparedStatement(const String & name_space, const PreparedStatementItemPtr & data); + PreparedStatements getPreparedStatements(const String & name_space); + PreparedStatementItemPtr getPreparedStatement(const String & name_space, const String & name); + void removePreparedStatement(const String & name_space, const String & name); + void updateTableStatistics(const String & name_space, const String & uuid, const std::unordered_map & data); // new api std::unordered_map getTableStatistics(const String & name_space, const String & uuid); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index dbac6c90e81..49d0127f6de 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -521,6 +521,14 @@ M(GetSQLBindingsSuccess, "") \ M(RemoveSQLBindingFailed, "") \ M(RemoveSQLBindingSuccess, "") \ + M(UpdatePreparedStatementFailed, "") \ + M(UpdatePreparedStatementSuccess, "") \ + M(GetPreparedStatementFailed, "") \ + M(GetPreparedStatementSuccess, "") \ + M(GetPreparedStatementsFailed, "") \ + M(GetSPreparedStatementsSuccess, "") \ + M(RemovePreparedStatementFailed, "") \ + M(RemovePreparedStatementSuccess, "") \ M(CreateDatabaseSuccess, "") \ M(CreateDatabaseFailed, "") \ M(GetDatabaseSuccess, "") \ diff --git a/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp b/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp index d6e29064efb..32026fb8417 100644 --- a/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp +++ b/src/Interpreters/InterpreterDropPreparedStatementQuery.cpp @@ -32,7 +32,7 @@ BlockIO InterpreterDropPreparedStatementQuery::execute() if (!prepared_manager) throw Exception("Prepare cache has to be initialized", ErrorCodes::LOGICAL_ERROR); - prepared_manager->remove(drop->name, !drop->if_exists); + prepared_manager->remove(drop->name, !drop->if_exists, current_context); return {}; } } diff --git a/src/Interpreters/PreparedStatement/PreparedStatementCatalog.cpp b/src/Interpreters/PreparedStatement/PreparedStatementCatalog.cpp new file mode 100644 index 00000000000..85a5e92fd0e --- /dev/null +++ b/src/Interpreters/PreparedStatement/PreparedStatementCatalog.cpp @@ -0,0 +1,27 @@ +#include +#include + +namespace DB +{ +void PreparedStatementCatalogManager::updatePreparedStatement(PreparedStatementItemPtr data) +{ + catalog->updatePreparedStatement(data); +} + +PreparedStatements PreparedStatementCatalogManager::getPreparedStatements() +{ + return catalog->getPreparedStatements(); +} + +PreparedStatementItemPtr PreparedStatementCatalogManager::getPreparedStatement(const String & name) +{ + return catalog->getPreparedStatement(name); +} + +void PreparedStatementCatalogManager::removePreparedStatement(const String & name) +{ + catalog->removePreparedStatement(name); +} + + +} diff --git a/src/Interpreters/PreparedStatement/PreparedStatementCatalog.h b/src/Interpreters/PreparedStatement/PreparedStatementCatalog.h new file mode 100644 index 00000000000..2711ffdb742 --- /dev/null +++ b/src/Interpreters/PreparedStatement/PreparedStatementCatalog.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class PreparedStatementItem +{ + +public: + PreparedStatementItem(String name_, String create_statement_) + : name(std::move(name_)) + , create_statement(std::move(create_statement_)) + { + } + + String name; + String create_statement; +}; + +using PreparedStatementItemPtr = std::shared_ptr; +using PreparedStatements = std::vector; + +class PreparedStatementCatalogManager +{ +public: + explicit PreparedStatementCatalogManager(const ContextPtr & context) + { + catalog = context->getCnchCatalog(); + } + + void updatePreparedStatement(PreparedStatementItemPtr); + + PreparedStatements getPreparedStatements(); + + PreparedStatementItemPtr getPreparedStatement(const String & name); + + void removePreparedStatement(const String & name); + +private: + std::shared_ptr catalog; +}; + +} diff --git a/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp b/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp index eb1c7a8f29c..31c4ecc365b 100644 --- a/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp +++ b/src/Interpreters/PreparedStatement/PreparedStatementManager.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "Interpreters/PreparedStatement/PreparedStatementCatalog.h" #include "Parsers/IAST_fwd.h" #include @@ -29,17 +30,13 @@ void PreparedStatementManager::initialize(ContextMutablePtr context) if (!context->getPreparedStatementManager()) { auto manager_instance = std::make_unique(); - const auto & config = context->getConfigRef(); - String default_path = fs::path{context->getPath()} / "prepared_statement/"; - String path = config.getString("prepared_statement_path", default_path); - manager_instance->prepared_statement_loader = std::make_unique(path); context->setPreparedStatementManager(std::move(manager_instance)); - loadStatementsFromDisk(context); + loadStatementsFromCatalog(context); } } void PreparedStatementManager::set( - const String & name, PreparedObject prepared_object, bool throw_if_exists, bool or_replace, bool is_persistent) + const String & name, PreparedObject prepared_object, bool throw_if_exists, bool or_replace, bool is_persistent, ContextMutablePtr context) { std::unique_lock lock(mutex); @@ -47,10 +44,12 @@ void PreparedStatementManager::set( { if (is_persistent) { + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Context is nullptr"); Protos::PreparedStatement proto; - prepared_object.toProto(proto); - throw_if_exists = throw_if_exists && !or_replace; - prepared_statement_loader->storeObject(name, proto, throw_if_exists, or_replace); + PreparedStatementCatalogManager catalog(context); + PreparedStatementItemPtr prepared = std::make_shared(name, prepared_object.query->formatForErrorMessage()); + catalog.updatePreparedStatement(prepared); } cache[name] = std::move(prepared_object); } @@ -70,11 +69,15 @@ SettingsChanges PreparedStatementManager::getSettings(const String & name) const return getUnsafe(name).settings_changes; } -void PreparedStatementManager::remove(const String & name, bool throw_if_not_exists) +void PreparedStatementManager::remove(const String & name, bool throw_if_not_exists, ContextMutablePtr context) { std::unique_lock lock(mutex); + if (context) + { + PreparedStatementCatalogManager catalog(context); + catalog.removePreparedStatement(name); + } - prepared_statement_loader->removeObject(name, false); if (hasUnsafe(name)) cache.erase(name); else if (throw_if_not_exists) @@ -145,7 +148,7 @@ void PreparedStatementManager::addPlanToCache( } } const auto & prepare = query->as(); - set(name, std::move(prepared_object), !prepare.if_not_exists, prepare.or_replace, prepare.is_permanent); + set(name, std::move(prepared_object), !prepare.if_not_exists, prepare.or_replace, prepare.is_permanent, context); } PlanNodePtr PreparedStatementManager::getNewPlanNode(PlanNodePtr node, ContextMutablePtr & context, bool cache_plan, PlanNodeId & max_id) @@ -209,29 +212,25 @@ void PreparedStatementManager::clearCache() cache.clear(); } -NamesAndPreparedStatements PreparedStatementManager::getAllStatementsFromDisk(ContextMutablePtr & context) -{ - return prepared_statement_loader->getAllObjects(context); -} - -void PreparedStatementManager::loadStatementsFromDisk(ContextMutablePtr & context) +void PreparedStatementManager::loadStatementsFromCatalog(ContextMutablePtr & context) { if (!context->getPreparedStatementManager()) throw Exception("PreparedStatement cache has to be initialized", ErrorCodes::LOGICAL_ERROR); auto * manager = context->getPreparedStatementManager(); manager->clearCache(); - auto statements = manager->getAllStatementsFromDisk(context); + PreparedStatementCatalogManager catalog(context); + auto statements = catalog.getPreparedStatements(); for (auto & statement : statements) { try { ParserCreatePreparedStatementQuery parser(ParserSettings::valueOf(context->getSettingsRef())); - auto ast = parseQuery(parser, statement.second.query(), "", 0, context->getSettings().max_parser_depth); + auto ast = parseQuery(parser, statement->create_statement, "", 0, context->getSettings().max_parser_depth); auto * create_prep_stat = ast->as(); if (!create_prep_stat) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid prepread statement query: {}", statement.second.query()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid prepread statement query: {}", statement->create_statement); create_prep_stat->is_permanent = false; auto query_context = Context::createCopy(context); @@ -245,7 +244,7 @@ void PreparedStatementManager::loadStatementsFromDisk(ContextMutablePtr & contex { tryLogWarningCurrentException( &Poco::Logger::get("PreparedStatementManager"), - fmt::format("while build prepared statement {} plan", backQuote(statement.first))); + fmt::format("while build prepared statement {} plan", backQuote(statement->name))); continue; } } diff --git a/src/Interpreters/PreparedStatement/PreparedStatementManager.h b/src/Interpreters/PreparedStatement/PreparedStatementManager.h index 733e3b78865..840a644d928 100644 --- a/src/Interpreters/PreparedStatement/PreparedStatementManager.h +++ b/src/Interpreters/PreparedStatement/PreparedStatementManager.h @@ -52,15 +52,14 @@ class PreparedStatementManager PreparedObject prepared_object, bool throw_if_exists = true, bool or_replace = false, - bool is_persistent = true); + bool is_persistent = true, + ContextMutablePtr context = nullptr); PreparedObject getObject(const String & name) const; SettingsChanges getSettings(const String & name) const; - void remove(const String & name, bool throw_if_not_exists); + void remove(const String & name, bool throw_if_not_exists, ContextMutablePtr context = nullptr); void clearCache(); Strings getNames() const; bool has(const String & name) const; - NamesAndPreparedStatements getAllStatementsFromDisk(ContextMutablePtr & context); - struct CacheResultType { QueryPlanPtr plan; @@ -79,14 +78,12 @@ class PreparedStatementManager PreparedParameterSet prepared_params, ContextMutablePtr & context); - static void loadStatementsFromDisk(ContextMutablePtr & context); + static void loadStatementsFromCatalog(ContextMutablePtr & context); private: CacheType cache; mutable std::shared_mutex mutex; - std::unique_ptr prepared_statement_loader; - bool hasUnsafe(const String & name) const { return cache.contains(name); diff --git a/src/Protos/data_models.proto b/src/Protos/data_models.proto index a22533755e7..90bbd2bbfff 100644 --- a/src/Protos/data_models.proto +++ b/src/Protos/data_models.proto @@ -393,6 +393,11 @@ message SQLBinding { required string tenant_id = 6; } +message PreparedStatementItem { + required string name = 1; + required string create_statement = 2; +} + message VirtualWarehouseSettings { /// basic information /// // {READ, WRITE, TASK} diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference index 7849671e91b..b4527d9ca63 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference @@ -61,3 +61,4 @@ Projection Limit: 9 Outputs: [number] prep1 +prep4 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql index 25d3daeea4d..9a72799fc40 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql @@ -82,3 +82,12 @@ DROP PREPARED STATEMENT IF EXISTS prep1; DROP PREPARED STATEMENT IF EXISTS prep2; DROP PREPARED STATEMENT IF EXISTS prep3; + +DROP PREPARED STATEMENT IF EXISTS prep4; +CREATE PREPARED STATEMENT IF NOT EXISTS prep4 AS +SELECT count() +FROM (SELECT number FROM system.numbers LIMIT 10) +WHERE number < [x: UInt32]; +SHOW PREPARED STATEMENTS; +DROP PREPARED STATEMENT IF EXISTS prep4; +SHOW PREPARED STATEMENTS; From 9785714c45bcf0e59a00e544b007f3ffddf44286 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:33:17 +0000 Subject: [PATCH 029/292] fix compile --- src/Core/Settings.h | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1023bcbcc04..2f5d6f88ce0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -506,44 +506,6 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_join_on_1_equals_1, false, "Enable join on 1=1.", 0) \ \ M(UInt64, preferred_block_size_bytes, 1000000, "", 0) \ -<<<<<<< HEAD -\ - M(UInt64, \ - max_replica_delay_for_distributed_queries, \ - 300, \ - "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified " \ - "value (not inclusive). Zero means do not take delay into account.", \ - 0) \ - M(Bool, \ - fallback_to_stale_replicas_for_distributed_queries, \ - 1, \ - "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is " \ - "enabled, the query will be performed anyway, otherwise the error will be reported.", \ - 0) \ - M(UInt64, \ - preferred_max_column_in_block_size_bytes, \ - 0, \ - "Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.", \ - 0) \ -\ - M(Bool, \ - insert_distributed_sync, \ - false, \ - "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", \ - 0) \ - M(UInt64, \ - insert_distributed_timeout, \ - 0, \ - "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no " \ - "timeout.", \ - 0) \ - M(Int64, \ - distributed_ddl_task_timeout, \ - 180, \ - "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will " \ - "contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", \ - 0) \ -======= \ M(UInt64, max_replica_delay_for_distributed_queries, 300, "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified value (not inclusive). Zero means do not take delay into account.", 0) \ M(Bool, fallback_to_stale_replicas_for_distributed_queries, 1, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \ @@ -553,7 +515,6 @@ enum PreloadLevelSettings : UInt64 M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \ M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \ M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \ ->>>>>>> cceecdccd5 (Merge 'cherry-pick-mr-22414' into 'cnch-2.2') M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \ M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \ \ From ca24ae1da9afd2c41ca2968ec98604ca5eea263a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 04:34:32 +0000 Subject: [PATCH 030/292] Merge 'cherry-pick-mr-22014' into 'cnch-2.2' feat(clickhousech@m-3011087571):add alias for json function[cp] See merge request: !22717 # Conflicts: # tests/queries/0_stateless/01889_sql_json_functions.reference # tests/queries/0_stateless/01889_sql_json_functions.sql --- .../DummyJSONParser.h~cnch-ce-merge | 111 -- src/Functions/FunctionSQLJSON.cpp | 15 +- src/Functions/FunctionSQLJSON.h | 1349 +++++++++++++++-- src/Functions/FunctionsJSON.cpp | 1 + src/Functions/FunctionsJSON.h | 122 +- .../ObjectJSONVisitorJSONPathMemberAccess.h | 4 + .../Parsers/ParserJSONPathArrayIndex.cpp | 51 + .../Parsers/ParserJSONPathArrayIndex.h | 14 + .../JSONPath/Parsers/ParserJSONPathQuery.cpp | 5 +- .../JSONPath/Parsers/ParserJSONPathRange.cpp | 4 + .../01889_sql_json_functions.reference | 89 ++ .../0_stateless/01889_sql_json_functions.sql | 113 ++ .../01889_sql_object_json_functions.reference | 58 + .../01889_sql_object_json_functions.sql | 97 ++ 14 files changed, 1715 insertions(+), 318 deletions(-) delete mode 100644 src/Common/JSONParsers/DummyJSONParser.h~cnch-ce-merge create mode 100644 src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.cpp create mode 100644 src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.h create mode 100644 tests/queries/0_stateless/01889_sql_json_functions.reference create mode 100644 tests/queries/0_stateless/01889_sql_json_functions.sql create mode 100644 tests/queries/0_stateless/01889_sql_object_json_functions.reference create mode 100644 tests/queries/0_stateless/01889_sql_object_json_functions.sql diff --git a/src/Common/JSONParsers/DummyJSONParser.h~cnch-ce-merge b/src/Common/JSONParsers/DummyJSONParser.h~cnch-ce-merge deleted file mode 100644 index 6266ed48f65..00000000000 --- a/src/Common/JSONParsers/DummyJSONParser.h~cnch-ce-merge +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once - -#include -#include -#include -#include "ElementTypes.h" - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/// This class can be used as an argument for the template class FunctionJSON when we unable to parse JSONs. -/// It can't do anything useful and just throws an exception. -struct DummyJSONParser -{ - class Array; - class Object; - - /// References an element in a JSON document, representing a JSON null, boolean, string, number, - /// array or object. - class Element - { - public: - Element() = default; - static ElementType type() { return ElementType::NULL_VALUE; } - static bool isInt64() { return false; } - static bool isUInt64() { return false; } - static bool isDouble() { return false; } - static bool isString() { return false; } - static bool isArray() { return false; } - static bool isObject() { return false; } - static bool isBool() { return false; } - static bool isNull() { return false; } - - static Int64 getInt64() { return 0; } - static UInt64 getUInt64() { return 0; } - static double getDouble() { return 0; } - static bool getBool() { return false; } - static std::string_view getString() { return {}; } - static Array getArray() { return {}; } - static Object getObject() { return {}; } - - static Element getElement() { return {}; } - }; - - /// References an array in a JSON document. - class Array - { - public: - class Iterator - { - public: - Element operator*() const { return {}; } - Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } /// NOLINT - friend bool operator==(const Iterator &, const Iterator &) { return true; } - friend bool operator!=(const Iterator &, const Iterator &) { return false; } - }; - - static Iterator begin() { return {}; } - static Iterator end() { return {}; } - static size_t size() { return 0; } - Element operator[](size_t) const { return {}; } - }; - - using KeyValuePair = std::pair; - - /// References an object in a JSON document. - class Object - { - public: - class Iterator - { - public: - KeyValuePair operator*() const { return {}; } - Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } /// NOLINT - friend bool operator==(const Iterator &, const Iterator &) { return true; } - friend bool operator!=(const Iterator &, const Iterator &) { return false; } - }; - - static Iterator begin() { return {}; } - static Iterator end() { return {}; } - static size_t size() { return 0; } - bool find(std::string_view, Element &) const { return false; } /// NOLINT - -#if 0 - /// Optional: Provides access to an object's element by index. - KeyValuePair operator[](size_t) const { return {}; } -#endif - }; - - /// Parses a JSON document, returns the reference to its root element if succeeded. - bool parse(std::string_view, Element &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Functions JSON* are not supported"); } /// NOLINT - -#if 0 - /// Optional: Allocates memory to parse JSON documents faster. - void reserve(size_t max_size); -#endif -}; - -inline ALWAYS_INLINE std::ostream& operator<<(std::ostream& out, DummyJSONParser::Element) -{ - return out; -} - -} diff --git a/src/Functions/FunctionSQLJSON.cpp b/src/Functions/FunctionSQLJSON.cpp index 7fa853ae850..3be29bbaf14 100644 --- a/src/Functions/FunctionSQLJSON.cpp +++ b/src/Functions/FunctionSQLJSON.cpp @@ -7,14 +7,17 @@ namespace DB REGISTER_FUNCTION(SQLJSON) { - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerAlias("JSON_SIZE", "JSON_LENGTH", FunctionFactory::CaseInsensitive); + // factory.registerAlias("JSON_ARRAY_LENGTH", "JSON_LENGTH", FunctionFactory::CaseInsensitive); factory.registerFunction>(FunctionFactory::CaseInsensitive); factory.registerFunction>(FunctionFactory::CaseInsensitive); - factory.registerFunction>(FunctionFactory::CaseInsensitive); - + factory.registerFunction>(FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index b32aed0973c..f96c29bbcff 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -22,18 +22,20 @@ #include #include #include -#include "Common/assert_cast.h" +#include #include #include #include #include -#include "Columns/ColumnObject.h" -#include "Columns/IColumn.h" -#include "Core/ColumnsWithTypeAndName.h" -#include "DataTypes/IDataType.h" -#include "Functions/FunctionsComparison.h" -#include "Parsers/IAST_fwd.h" +#include +#include +#include +#include +#include +#include +#include #include +#include #if !defined(ARCADIA_BUILD) #include "config_functions.h" @@ -48,23 +50,145 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +template +class JSONUtils +{ +public: + using Element = typename JSONParser::Element; + using Object = typename JSONParser::Object; + using Array = typename JSONParser::Array; + + static bool jsonElementEqual(const Element & left, const Element & right) + { + if (left.isInt64() && right.isInt64()) + { + return left.getInt64() == right.getInt64(); + } + else if (left.isUInt64() && right.isUInt64()) + { + return left.getUInt64() == right.getUInt64(); + } + else if (left.isDouble() && right.isDouble()) + { + return left.getDouble() == right.getDouble(); + } + else if (left.isString() && right.isString()) + { + return left.getString() == right.getString(); + } + else if (left.isBool() && right.isBool()) + { + return left.getBool() == right.getBool(); + } + else if (left.isNull() && right.isNull()) + { + return true; + } + + return false; + } + + static bool jsonArrayContains(const Array & json_array, const Element & sub_element) + { + if (sub_element.isArray()) + { + const auto & sub_array = sub_element.getArray(); + for (auto it = sub_array.begin(); it != sub_array.end(); ++it) + { + if (!jsonArrayContains(json_array, *it)) + { + return false; + } + } + } + else if (sub_element.isObject()) + { + return false; + } + else + { + for (auto it = json_array.begin(); it != json_array.end(); ++it) + { + if (jsonElementEqual(*it, sub_element)) + { + return true; + } + } + + return false; + } + + return true; + } + + static bool jsonObjectContains(const Object & json_object, const Element & sub_element) + { + if (sub_element.isObject()) + { + for (const auto & [key, value] : sub_element.getObject()) + { + Element temp_element; + bool contains_key = json_object.find(key, temp_element); + if (!contains_key) + return false; + + if (temp_element.isObject()) + { + if (!jsonObjectContains(temp_element.getObject(), value)) + return false; + else + continue; + } + + if (temp_element.isArray()) + { + if (!jsonArrayContains(temp_element.getArray(), value)) + return false; + else + continue; + } + + if (!jsonElementEqual(temp_element, value)) + return false; + } + } + else + { + return false; + } + + return true; + } + + static bool contains(const Element & parent_element, const Element & sub_element) + { + if (parent_element.isObject()) + { + return jsonObjectContains(parent_element.getObject(), sub_element); + } + else if (parent_element.isArray()) + { + return jsonArrayContains(parent_element.getArray(), sub_element); + } + else + { + return jsonElementEqual(parent_element, sub_element); + } + } +}; + class FunctionSQLJSONHelpers { public: template typename Impl, class JSONParser> - class Executor + class ExecutorString { public: static ColumnPtr - run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth) + run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, DialectType dialect_type) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); - // TODO: add logic to handle single argument - if (arguments.size() < 2) - { - throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; - } const auto & json_column = arguments[0]; @@ -118,7 +242,13 @@ class FunctionSQLJSONHelpers const bool parse_res = parser.parse(token_iterator, res, expected); if (!parse_res) { - throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS}; + if (dialect_type != DialectType::MYSQL) + throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS}; + else + { + to->insertManyDefaults(input_rows_count); + return to; + } } /// Get data and offsets for 2 argument (JSON) @@ -131,7 +261,15 @@ class FunctionSQLJSONHelpers bool document_ok = false; /// Parse JSON for every row - Impl impl; + Impl> impl; + + constexpr bool has_member_prepare = requires + { + impl.prepare("", DataTypePtr{}); + }; + + if constexpr (has_member_prepare) + impl.prepare(Name::name, result_type); for (const auto i : collections::range(0, input_rows_count)) { @@ -140,9 +278,10 @@ class FunctionSQLJSONHelpers document_ok = json_parser.parse(json, document); bool added_to_column = false; + ElementIterator iterator(document); if (document_ok) { - added_to_column = impl.insertResultToColumn(*to, document, res); + added_to_column = impl.insertResultToColumn(*to, iterator, res, dialect_type); } if (!added_to_column) { @@ -152,41 +291,479 @@ class FunctionSQLJSONHelpers return to; } }; + + + template typename Impl, class JSONParser> + class ExecutorObject + { + public: + template + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, DialectType dialect_type) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + const auto & json_column = arguments[0]; + + if (!isObject(json_column.type) && !isTuple(json_column.type)) + { + throw Exception( + "JSONPath functions require first argument to be JSON of Object or Tuple, illegal type: " + json_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const auto & json_path_column = arguments[1]; + + if (!isString(json_path_column.type)) + { + throw Exception( + "JSONPath functions require second argument to be JSONPath of type string, illegal type: " + + json_path_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + if (!isColumnConst(*json_path_column.column)) + { + throw Exception("Second argument (JSONPath) must be constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const ColumnPtr & arg_jsonpath = json_path_column.column; + const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); + const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); + + const ColumnPtr & arg_json = json_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_object + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + ColumnPtr column_tuple; + DataTypePtr type_tuple; + + if constexpr (std::is_same_v) + { + std::tie(column_tuple, type_tuple) = unflattenObjectToTuple(*col_json_object); + } + else + { + column_tuple = col_json_object->getPtr(); + type_tuple = json_column.type; + } + + /// Get data and offsets for 1 argument (JSONPath) + const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars(); + const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets(); + + /// Prepare to parse 1 argument (JSONPath) + const char * query_begin = reinterpret_cast(&chars_path[0]); + const char * query_end = query_begin + offsets_path[0] - 1; + + /// Tokenize query + Tokens tokens(query_begin, query_end); + /// Max depth 0 indicates that depth is not limited + IParser::Pos token_iterator(tokens, parse_depth); + + /// Parse query and create AST tree + Expected expected; + ASTPtr res; + ParserJSONPath parser; + const bool parse_res = parser.parse(token_iterator, res, expected); + if (!parse_res) + { + if (dialect_type != DialectType::MYSQL) + throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS}; + else + { + to->insertManyDefaults(input_rows_count); + return to; + } + } + + // Element document; + + /// Parse JSON for every row + Impl impl; + + constexpr bool has_member_prepare = requires + { + impl.prepare("", DataTypePtr{}); + }; + + if constexpr (has_member_prepare) + impl.prepare(Name::name, result_type); + + for (const auto i : collections::range(0, input_rows_count)) + { + ObjectIterator iterator(type_tuple, column_tuple, col_json_const ? 0 : i); + bool added_to_column = impl.insertResultToColumn(*to, iterator, res, dialect_type); + + if (!added_to_column) + { + to->insertDefault(); + } + } + return to; + } + }; }; -template typename Impl> -class FunctionSQLJSON : public IFunction, WithConstContext +template +class ExecutableFunctionSQLJSONBase : public IExecutableFunction { + public: - static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { } + explicit ExecutableFunctionSQLJSONBase(const NullPresence & null_presence_, const DataTypePtr & json_return_type_, uint32_t parser_depth_, DialectType dialect_type_) + : null_presence(null_presence_), json_return_type(json_return_type_), parser_depth(parser_depth_), dialect_type(dialect_type_) + { + } - static constexpr auto name = Name::name; String getName() const override { return Name::name; } - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - return Impl::getReturnType(Name::name, arguments); + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + auto temp_arguments = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + auto temporary_result = Derived::run(temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + if (null_presence.has_nullable) + return wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + return temporary_result; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override +private: + NullPresence null_presence; + DataTypePtr json_return_type; + uint32_t parser_depth; + DialectType dialect_type; +}; + +template typename Impl, bool allow_simdjson> +class ExecutableFunctionSQLJSONString : public ExecutableFunctionSQLJSONBase> +{ +public: + using Base = ExecutableFunctionSQLJSONBase; + + ExecutableFunctionSQLJSONString(const NullPresence & null_presence_, const DataTypePtr & json_return_type_, uint32_t parser_depth_, DialectType dialect_type_) + : Base(null_presence_, json_return_type_, parser_depth_, dialect_type_) + { + } + + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & json_return_type, size_t input_rows_count, uint32_t parser_depth, const DialectType & dialect_type) + { + auto temp_arguments = arguments; + if (temp_arguments.size() < 2) + { + DataTypePtr default_path_type = std::make_shared(); + MutableColumnPtr default_path_string_column = default_path_type->createColumn(); + default_path_string_column->insert("$"); + MutableColumnPtr default_path_column = ColumnConst::create(std::move(default_path_string_column), 1); + temp_arguments.emplace_back(ColumnWithTypeAndName(std::move(default_path_column), default_path_type, "$")); + } + + return chooseAndRunJSONParser(temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } + +private: + static ColumnPtr chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & json_return_type, size_t input_rows_count, uint32_t parser_depth, const DialectType & dialect_type) { - /// Choose JSONParser. - /// 1. Lexer(path) -> Tokens - /// 2. Create ASTPtr - /// 3. Parser(Tokens, ASTPtr) -> complete AST - /// 4. Execute functions: call getNextItem on generator and handle each item - uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth; #if USE_SIMDJSON - if (getContext()->getSettingsRef().allow_simdjson) - return FunctionSQLJSONHelpers::Executor::run(arguments, result_type, input_rows_count, parse_depth); + if constexpr (allow_simdjson) + return FunctionSQLJSONHelpers::ExecutorString::run(arguments, json_return_type, input_rows_count, parser_depth, dialect_type); +#endif + + return FunctionSQLJSONHelpers::ExecutorString::run(arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } +}; + +template typename Impl, bool allow_simdjson> +class ExecutableFunctionSQLJSONObject : public ExecutableFunctionSQLJSONBase> +{ +public: + using Base = ExecutableFunctionSQLJSONBase; + + ExecutableFunctionSQLJSONObject(const NullPresence & null_presence_, const DataTypePtr & json_return_type_, uint32_t parser_depth_, DialectType dialect_type_) + : Base(null_presence_, json_return_type_, parser_depth_, dialect_type_) + { + } + + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & json_return_type, size_t input_rows_count, uint32_t parser_depth, DialectType dialect_type) + { + assert(!arguments.empty()); + + auto temp_arguments = arguments; + if (temp_arguments.size() < 2) + { + DataTypePtr default_path_type = std::make_shared(); + MutableColumnPtr default_path_string_column = default_path_type->createColumn(); + default_path_string_column->insert("$"); + MutableColumnPtr default_path_column = ColumnConst::create(std::move(default_path_string_column), 1); + temp_arguments.emplace_back(ColumnWithTypeAndName(std::move(default_path_column), default_path_type, "$")); + } + const auto & type_object = assert_cast(*temp_arguments[0].type); + const auto & arg_object = temp_arguments[0].column; + const auto * column_const = typeid_cast(arg_object.get()); + const auto * column_object + = typeid_cast(column_const ? column_const->getDataColumnPtr().get() : arg_object.get()); + + assert(column_object); + if (column_object->hasNullableSubcolumns()) + { + auto non_nullable_object = ColumnObject::create(false); + for (const auto & entry : column_object->getSubcolumns()) + { + auto new_subcolumn = recursiveAssumeNotNullable(entry->data.getFinalizedColumnPtr()); + non_nullable_object->addSubcolumn(entry->path, new_subcolumn->assumeMutable()); + } + + temp_arguments[0].type = std::make_shared(type_object.getSchemaFormat(), false); + temp_arguments[0].column = std::move(non_nullable_object); + + if (column_const) + temp_arguments[0].column = ColumnConst::create(temp_arguments[0].column, column_const->size()); + } + +#if USE_SIMDJSON + if constexpr (allow_simdjson) + { + return FunctionSQLJSONHelpers::ExecutorObject::template run( + temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } #endif - return FunctionSQLJSONHelpers::Executor::run(arguments, result_type, input_rows_count, parse_depth); + + return FunctionSQLJSONHelpers::ExecutorObject::template run( + temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } +}; + +template typename Impl, bool allow_simdjson> +class ExecutableFunctionSQLJSONTuple : public ExecutableFunctionSQLJSONBase> +{ +public: + using Base = ExecutableFunctionSQLJSONBase; + + ExecutableFunctionSQLJSONTuple(const NullPresence & null_presence_, const DataTypePtr & json_return_type_, uint32_t parser_depth_, DialectType dialect_type_) + : Base(null_presence_, json_return_type_, parser_depth_, dialect_type_) + { + } + + static ColumnPtr + run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & json_return_type, size_t input_rows_count, uint32_t parser_depth, DialectType dialect_type) + { + auto temp_arguments = arguments; + if (temp_arguments.size() < 2) + { + DataTypePtr default_path_type = std::make_shared(); + MutableColumnPtr default_path_string_column = default_path_type->createColumn(); + default_path_string_column->insert("$"); + MutableColumnPtr default_path_column = ColumnConst::create(std::move(default_path_string_column), 1); + temp_arguments.emplace_back(ColumnWithTypeAndName(std::move(default_path_column), default_path_type, "$")); + } +#if USE_SIMDJSON + if constexpr (allow_simdjson) + { + return FunctionSQLJSONHelpers::ExecutorObject::template run( + temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } +#endif + + return FunctionSQLJSONHelpers::ExecutorObject::template run( + temp_arguments, json_return_type, input_rows_count, parser_depth, dialect_type); + } +}; + + +template +class FunctionBaseFunctionSQLJSON : public IFunctionBase +{ +public: + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return return_type; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + +protected: + explicit FunctionBaseFunctionSQLJSON( + const NullPresence & null_presence_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_, + uint32_t parser_depth_, + DialectType dialect_type_) + : null_presence(null_presence_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + , parser_depth(parser_depth_) + , dialect_type(dialect_type_) + { + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; + uint32_t parser_depth; + DialectType dialect_type; +}; + +template typename Impl> +class FunctionBaseFunctionSQLJSONString : public FunctionBaseFunctionSQLJSON +{ +public: + template + explicit FunctionBaseFunctionSQLJSONString(bool allow_simdjson_, Args &&... args) + : FunctionBaseFunctionSQLJSON{std::forward(args)...} + , allow_simdjson(allow_simdjson_) + { + } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + if (this->allow_simdjson) + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } +private: + bool allow_simdjson; +}; + +template typename Impl> +class FunctionBaseFunctionSQLJSONObject : public FunctionBaseFunctionSQLJSON +{ +public: + template + explicit FunctionBaseFunctionSQLJSONObject(bool allow_simdjson_, Args &&... args) + : FunctionBaseFunctionSQLJSON{std::forward(args)...} + , allow_simdjson(allow_simdjson_) + { + } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + if (this->allow_simdjson) + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + } + +private: + bool allow_simdjson; +}; + +template typename Impl> +class FunctionBaseFunctionSQLJSONTuple : public FunctionBaseFunctionSQLJSON +{ +public: + template + explicit FunctionBaseFunctionSQLJSONTuple(bool allow_simdjson_, Args &&... args) + : FunctionBaseFunctionSQLJSON{std::forward(args)...} + , allow_simdjson(allow_simdjson_) + { + } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + if (this->allow_simdjson) + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + + return std::make_unique>(this->null_presence, this->json_return_type, this->parser_depth, this->dialect_type); + } +private: + bool allow_simdjson; +}; + +using ObjectIterator = FunctionJSONHelpers::ObjectIterator; +template +using ElementIterator = FunctionJSONHelpers::JSONElementIterator; + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class SQLJSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit SQLJSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at least one argument", Name::name); + + const auto & first_column = arguments[0]; + auto first_type_base = removeNullable(removeLowCardinality(first_column.type)); + + bool is_string = isString(first_type_base); + bool is_object = isObject(first_type_base); + bool is_tuple = isTuple(first_type_base); + bool is_nothing = isNothing(first_type_base); + + if (!is_string && !is_object && !is_tuple && !is_nothing) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON or Object or Tuple, illegal type: {}", + Name::name, first_column.type->getName()); + + auto json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + /// Top-level LowCardinality columns are processed outside JSON parser. + json_return_type = removeLowCardinality(json_return_type); + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + + auto allow_simdjson = getContext()->getSettingsRef().allow_simdjson; + uint32_t parser_depth = getContext()->getSettingsRef().max_parser_depth; + DialectType dialect_type = getContext()->getSettingsRef().dialect_type; + if (is_string || is_nothing) + return std::make_unique>( + allow_simdjson, null_presence, argument_types, return_type, json_return_type, parser_depth, dialect_type); + else if (is_object) + return std::make_unique>( + allow_simdjson, null_presence, argument_types, return_type, json_return_type, parser_depth, dialect_type); + else + return std::make_unique>( + allow_simdjson, null_presence, argument_types, return_type, json_return_type, parser_depth, dialect_type); } }; @@ -220,25 +797,142 @@ struct NameSQLJSONContainsPath static constexpr auto name{"JSON_CONTAINS_PATH"}; }; -struct NameSQLJSONExtractPath +struct NameSQLJSONArrayContains +{ + static constexpr auto name{"JSON_ARRAY_CONTAINS"}; +}; + +struct NameSQLJSONKeys +{ + static constexpr auto name{"JSON_KEYS"}; +}; + +struct NameSQLJSONExtract { static constexpr auto name{"JSON_EXTRACT"}; }; -template +template +class SQLJSONKeysImpl +{ +public: + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsElementIterator + { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; + GeneratorJSONPath generator_json_path(query_ptr); + Element current_element = iterator.getElement(); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + current_element = iterator.getElement(); + } + + Iterator sub_iterator{current_element}; + return JSONExtractKeysImpl::insertResultToColumn(dest, sub_iterator); + } + + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsObjectIterator + { + ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + } + + return JSONExtractKeysImpl::insertResultToColumn(dest, iterator); + } +}; + +template +class SQLJSONExtractImpl +{ +public: + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType dialect_type) requires IsElementIterator + { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; + GeneratorJSONPath generator_json_path(query_ptr); + Element current_element = iterator.getElement(); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + current_element = iterator.getElement(); + } + + if (status == VisitorStatus::Exhausted) + { + return false; + } + + Iterator sub_iterator{current_element}; + return JSONExtractRawImpl::insertResultToColumn(dest, sub_iterator, dialect_type); + } + + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, ASTPtr & query_ptr, DialectType dialect_type) requires IsObjectIterator + { + ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + } + + if (status == VisitorStatus::Exhausted) + { + return false; + } + + return JSONExtractRawImpl::insertResultToColumn(dest, iterator, dialect_type); + } +}; + +template class SQLJSONExistsImpl { public: - using Element = typename JSONParser::Element; static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsElementIterator { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; GeneratorJSONPath generator_json_path(query_ptr); - Element current_element = root; + Element current_element = iterator.getElement(); VisitorStatus status; while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) { @@ -246,7 +940,33 @@ class SQLJSONExistsImpl { break; } - current_element = root; + current_element = iterator.getElement(); + } + + /// insert result, status can be either Ok (if we found the item) + /// or Exhausted (if we never found the item) + ColumnUInt8 & col_bool = assert_cast(dest); + if (status == VisitorStatus::Ok) + { + col_bool.insert(1); + } + else + { + col_bool.insert(0); + } + return true; + } + + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsObjectIterator + { + ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } } /// insert result, status can be either Ok (if we found the item) @@ -264,20 +984,21 @@ class SQLJSONExistsImpl } }; -template +template class SQLJSONValueImpl { public: - using Element = typename JSONParser::Element; static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsElementIterator { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; GeneratorJSONPath generator_json_path(query_ptr); - Element current_element = root; + Element current_element = iterator.getElement(); VisitorStatus status; Element res; while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) @@ -295,7 +1016,7 @@ class SQLJSONValueImpl /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), /// however this functionality is not implemented yet } - current_element = root; + current_element = iterator.getElement(); } if (status == VisitorStatus::Exhausted) @@ -310,26 +1031,56 @@ class SQLJSONValueImpl col_str.insertData(output_str.data(), output_str.size()); return true; } + + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsObjectIterator + { + ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + const auto & element_type = iterator.getType(); + if (!(isArray(element_type) || isObject(element_type) || isTuple(element_type))) + break; + } + } + + if (status == VisitorStatus::Exhausted) + { + return false; + } + + auto row = iterator.getRow(); + if (const auto * column_string = typeid_cast(iterator.getColumn().get())) + { + dest.insertFrom(*column_string, row); + return true; + } + + return JSONExtractRawImpl::insertResultToColumn(dest, iterator); + } }; /** * Function to test jsonpath member access, will be removed in final PR * @tparam JSONParser parser */ -template +template class SQLJSONQueryImpl { public: - using Element = typename JSONParser::Element; static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType /*dialect_type*/) requires IsElementIterator { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; GeneratorJSONPath generator_json_path(query_ptr); - Element current_element = root; + Element current_element = iterator.getElement(); VisitorStatus status; std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM /// Create json array of results: [res1, res2, ...] @@ -352,7 +1103,7 @@ class SQLJSONQueryImpl /// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6), /// however this functionality is not implemented yet } - current_element = root; + current_element = iterator.getElement(); } out << "]"; if (!success) @@ -364,13 +1115,17 @@ class SQLJSONQueryImpl col_str.insertData(output_str.data(), output_str.size()); return true; } + + static bool insertResultToColumn(IColumn & /*dest*/, ObjectIterator & /*iterator*/, ASTPtr & /*query_ptr*/, DialectType /*dialect_type*/) requires IsObjectIterator + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "JSON_QUERY is not implemented for Object or Tuple."); + } }; -template +template class SQLJSONLengthImpl { public: - using Element = typename JSONParser::Element; static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { @@ -379,10 +1134,12 @@ class SQLJSONLengthImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, ASTPtr & query_ptr, DialectType dialect_type) requires IsElementIterator { + using Element = typename Iterator::Element; + using JSONParser = typename Iterator::JSONParserType; GeneratorJSONPath generator_json_path(query_ptr); - Element current_element = root; + Element current_element = iterator.getElement(); VisitorStatus status; ColumnNullable & col = assert_cast(dest); @@ -392,7 +1149,43 @@ class SQLJSONLengthImpl { break; } - current_element = root; + current_element = iterator.getElement(); + } + + if (status == VisitorStatus::Exhausted) + { + col.insertData(nullptr, 0); + return false; + } + + size_t size; + if (current_element.isArray()) + size = current_element.getArray().size(); + else if (current_element.isObject()) + size = current_element.getObject().size(); + else + { + if (dialect_type == DialectType::MYSQL) + size = 0; + else + size = 1; + } + + col.insert(size); + return true; + } + + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, ASTPtr & query_ptr, DialectType dialect_type) requires IsObjectIterator + { + ColumnNullable & col = assert_cast(dest); + ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } } if (status == VisitorStatus::Exhausted) @@ -400,16 +1193,37 @@ class SQLJSONLengthImpl col.insertData(nullptr, 0); return false; } - - size_t size; - if (current_element.isArray()) - size = current_element.getArray().size(); - else if (current_element.isObject()) - size = current_element.getObject().size(); + + const auto * column_array = typeid_cast(iterator.getColumn().get()); + if (column_array) + { + const auto & offsets = column_array->getOffsets(); + auto row = iterator.getRow(); + UInt64 size = offsets[row] - offsets[row - 1]; + col.insert(size); + return true; + } + + const auto * column_tuple = typeid_cast(iterator.getColumn().get()); + if (column_tuple) + { + if (isDummyTuple(*iterator.getType())) + return false; + + UInt64 size = column_tuple->getColumns().size(); + col.insert(size); + return true; + } + + if (dialect_type == DialectType::MYSQL) + { + col.insert(0); + } else - size = 1; + { + col.insert(1); + } - col.insert(size); return true; } }; @@ -566,6 +1380,51 @@ class FunctionSQLJSONContains : public IFunction, WithConstContext return true; } + bool + insertResultToColumn(IColumn & dest, ElementIterator & iterator, const ColumnWithTypeAndName & candidate, ASTPtr & query_ptr) const + { + ColumnUInt8 & col_bool = assert_cast(dest); + + auto current_element = iterator.getElement(); + if (query_ptr) + { + GeneratorJSONPath generator_json_path(query_ptr); + VisitorStatus status; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + break; + } + current_element = iterator.getElement(); + } + + if (status == VisitorStatus::Exhausted) + { + return false; + } + } + + const auto & candidate_json_column = candidate.column; + const auto * candidate_json_const = typeid_cast(candidate_json_column.get()); + const auto * candidate_json_string = typeid_cast( + candidate_json_const ? candidate_json_const->getDataColumnPtr().get() : candidate_json_column.get()); + + std::string_view json{candidate_json_string ? candidate_json_string->getDataAt(0) : ""}; + SimdJSONParser json_parser; + using Element = typename SimdJSONParser::Element; + Element sub_document; + const bool parse_ok = json_parser.parse(json, sub_document); + + if (parse_ok) + { + bool contains = JSONUtils::contains(current_element, sub_document); + col_bool.insert(contains ? 1 : 0); + } + + return parse_ok; + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { //Only support Object JSON @@ -585,13 +1444,14 @@ class FunctionSQLJSONContains : public IFunction, WithConstContext const auto & json_column = arguments[0]; auto first_type_base = removeNullable(removeLowCardinality(json_column.type)); + bool is_string = isString(first_type_base); bool is_object = isObject(first_type_base); bool is_tuple = isTuple(first_type_base); - if (!is_object && !is_tuple) + if (!is_string && !is_object && !is_tuple) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing Object or Tuple, illegal type: {}", + "The first argument of function {} should be a string containing Object or Tuple or JSON, illegal type: {}", Name::name, json_column.type->getName()); @@ -602,27 +1462,6 @@ class FunctionSQLJSONContains : public IFunction, WithConstContext MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); - const auto & arg_json = json_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_object - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_object) - throw Exception{ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()}; - - ColumnPtr column_tuple; - DataTypePtr type_tuple; - - if (is_object) - { - std::tie(column_tuple, type_tuple) = unflattenObjectToTuple(*col_json_object); - } - else - { - column_tuple = col_json_object->getPtr(); - type_tuple = json_column.type; - } - ASTPtr res; if (arguments.size() == 3) { @@ -669,14 +1508,69 @@ class FunctionSQLJSONContains : public IFunction, WithConstContext } } - for (const auto i : collections::range(0, input_rows_count)) + const auto & arg_json = json_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + + if (is_object || is_tuple) { - ObjectIterator iterator(type_tuple, column_tuple, col_json_const ? 0 : i); + const auto * col_json_object + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - bool added_to_column = this->insertResultToColumn(*to, iterator, candidate, res); - if (!added_to_column) + if (!col_json_object) + throw Exception{ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()}; + + ColumnPtr column_tuple; + DataTypePtr type_tuple; + + if (is_object) { - to->insertDefault(); + std::tie(column_tuple, type_tuple) = unflattenObjectToTuple(*col_json_object); + } + else + { + column_tuple = col_json_object->getPtr(); + type_tuple = json_column.type; + } + for (const auto i : collections::range(0, input_rows_count)) + { + ObjectIterator iterator(type_tuple, column_tuple, col_json_const ? 0 : i); + + bool added_to_column = this->insertResultToColumn(*to, iterator, candidate, res); + if (!added_to_column) + { + to->insertDefault(); + } + } + } + else + { + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + const ColumnString::Chars & chars_json = col_json_string->getChars(); + const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); + + SimdJSONParser json_parser; + using Element = typename SimdJSONParser::Element; + Element document; + bool document_ok = false; + + for (const auto i : collections::range(0, input_rows_count)) + { + std::string_view json{ + reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + document_ok = json_parser.parse(json, document); + + bool added_to_column = false; + ElementIterator iterator(document); + if (document_ok) + { + added_to_column = this->insertResultToColumn(*to, iterator, candidate, res); + } + if (!added_to_column) + { + to->insertDefault(); + } } } @@ -713,16 +1607,50 @@ class FunctionSQLJSONContainsPath : public IFunction, WithConstContext for (const auto & query_ptr : query_ptrs) { + auto temp_iterator = iterator; ObjectJSONGeneratorJSONPath generator_json_path(query_ptr); VisitorStatus status; - while ((status = generator_json_path.getNextItem(iterator)) != VisitorStatus::Exhausted) + while ((status = generator_json_path.getNextItem(temp_iterator)) != VisitorStatus::Exhausted) + { + if (status == VisitorStatus::Ok) + { + contains = true; + break; + } + } + + if (status == VisitorStatus::Exhausted) + { + if (contains_all) + return false; + else + continue; + } + } + + col_bool.insert(contains ? 1 : 0); + return true; + } + + bool insertResultToColumn(IColumn & dest, ElementIterator & iterator, ASTs & query_ptrs, bool contains_all) const + { + ColumnUInt8 & col_bool = assert_cast(dest); + + bool contains = false; + for (const auto & query_ptr : query_ptrs) + { + GeneratorJSONPath generator_json_path(query_ptr); + auto current_element = iterator.getElement(); + VisitorStatus status; + + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) { if (status == VisitorStatus::Ok) { - if (!contains_all) - contains = true; + contains = true; break; } + current_element = iterator.getElement(); } if (status == VisitorStatus::Exhausted) @@ -757,44 +1685,21 @@ class FunctionSQLJSONContainsPath : public IFunction, WithConstContext const auto & json_column = arguments[0]; auto first_type_base = removeNullable(removeLowCardinality(json_column.type)); + bool is_string = isString(first_type_base); bool is_object = isObject(first_type_base); bool is_tuple = isTuple(first_type_base); - if (!is_object && !is_tuple) + if (!is_string && !is_object && !is_tuple) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing Object or Tuple, illegal type: {}", + "The first argument of function {} should be a string containing Object or Tuple or JSON, illegal type: {}", Name::name, json_column.type->getName()); const auto & any_or_all_column = arguments[1]; if (!isString(any_or_all_column.type) || !isColumnConst(*any_or_all_column.column)) throw Exception("Second argument (any or all) must be constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - MutableColumnPtr to{result_type->createColumn()}; - to->reserve(input_rows_count); - - const auto & arg_json = json_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_object - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_object) - throw Exception{ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()}; - - ColumnPtr column_tuple; - DataTypePtr type_tuple; - if (is_object) - { - std::tie(column_tuple, type_tuple) = unflattenObjectToTuple(*col_json_object); - } - else - { - column_tuple = col_json_object->getPtr(); - type_tuple = json_column.type; - } - const ColumnPtr & arg_any_or_all = any_or_all_column.column; const auto * arg_any_or_all_const = typeid_cast(arg_any_or_all.get()); const auto * arg_any_or_all_string = typeid_cast(arg_any_or_all_const->getDataColumnPtr().get()); @@ -850,14 +1755,74 @@ class FunctionSQLJSONContainsPath : public IFunction, WithConstContext json_path_ast_ptrs.emplace_back(res); } - for (const auto i : collections::range(0, input_rows_count)) + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + const auto & arg_json = json_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + + if (is_object || is_tuple) { - ObjectIterator iterator(type_tuple, column_tuple, col_json_const ? 0 : i); + const auto * col_json_object + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_object) + throw Exception{ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()}; - bool added_to_column = this->insertResultToColumn(*to, iterator, json_path_ast_ptrs, contains_all); - if (!added_to_column) + ColumnPtr column_tuple; + DataTypePtr type_tuple; + + if (is_object) { - to->insertDefault(); + std::tie(column_tuple, type_tuple) = unflattenObjectToTuple(*col_json_object); + } + else + { + column_tuple = col_json_object->getPtr(); + type_tuple = json_column.type; + } + + + for (const auto i : collections::range(0, input_rows_count)) + { + ObjectIterator iterator(type_tuple, column_tuple, col_json_const ? 0 : i); + + bool added_to_column = this->insertResultToColumn(*to, iterator, json_path_ast_ptrs, contains_all); + if (!added_to_column) + { + to->insertDefault(); + } + } + } + else + { + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + const ColumnString::Chars & chars_json = col_json_string->getChars(); + const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); + + SimdJSONParser json_parser; + using Element = typename SimdJSONParser::Element; + Element document; + bool document_ok = false; + + for (const auto i : collections::range(0, input_rows_count)) + { + std::string_view json{ + reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + document_ok = json_parser.parse(json, document); + + bool added_to_column = false; + ElementIterator iterator(document); + if (document_ok) + { + added_to_column = this->insertResultToColumn(*to, iterator, json_path_ast_ptrs, contains_all); + } + if (!added_to_column) + { + to->insertDefault(); + } } } @@ -865,37 +1830,129 @@ class FunctionSQLJSONContainsPath : public IFunction, WithConstContext } }; -template -class SQLJSONExtractPathImpl +template +class FunctionSQLJSONArrayContains : public IFunction, WithConstContext { public: - using Element = typename JSONParser::Element; + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionSQLJSONArrayContains(ContextPtr context_) : WithConstContext(context_) + {} - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + static constexpr auto name = Name::name; + String getName() const override { return Name::name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override + { + return std::make_shared(); + } - static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) + template + ColumnPtr internalExecuteImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t /*parse_depth*/) const { - GeneratorJSONPath generator_json_path(query_ptr); - Element current_element = root; - VisitorStatus status; - while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) + if (arguments.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly two arguments", Name::name); + + const auto & json_column = arguments[0]; + auto first_type_base = removeNullable(removeLowCardinality(json_column.type)); + + bool is_string = isString(first_type_base); + + if (!is_string) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string, illegal type: {}", + Name::name, + json_column.type->getName()); + + + const ColumnPtr & arg_json = json_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + /// Get data and offsets for 2 argument (JSON) + const ColumnString::Chars & chars_json = col_json_string->getChars(); + const ColumnString::Offsets & offsets_json = col_json_string->getOffsets(); + + const auto & target_value_column = arguments[1]; + + JSONParser json_parser; + using Element = typename JSONParser::Element; + Element document; + bool document_ok = false; + + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + auto compare = [&target_value_column](const Element & json_array_element) { - if (status == VisitorStatus::Ok) + if (isString(target_value_column.type) && json_array_element.isString()) { - break; + return target_value_column.column->getDataAt(0).toString() == json_array_element.getString(); } - current_element = root; - } - if (status == VisitorStatus::Exhausted) + if (isNumber(target_value_column.type) && json_array_element.isInt64()) + { + return target_value_column.column->getInt(0) == json_array_element.getInt64(); + } + + if (isBool(target_value_column.type) && json_array_element.isBool()) + return target_value_column.column->getBool(0) == json_array_element.getBool(); + + return false; + }; + + for (const auto i : collections::range(0, input_rows_count)) { - return false; + + std::string_view json{ + reinterpret_cast(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1}; + document_ok = json_parser.parse(json, document); + if (!document_ok) + { + to->insertDefault(); + continue; + } + + if (document.isArray()) + { + const auto & json_array = document.getArray(); + for (auto it = json_array.begin(); it != json_array.end(); ++it) + { + if (compare(*it)) + { + to->insert(1); + break; + } + } + to->insertDefault(); + } + else + { + to->insertDefault(); + } } - ElementIterator iterator(current_element); - return JSONExtractRawImpl>::insertResultToColumn(dest, iterator); + return to; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + /// Choose JSONParser. + /// 1. Lexer(path) -> Tokens + /// 2. Create ASTPtr + /// 3. Parser(Tokens, ASTPtr) -> complete AST + /// 4. Execute functions: call getNextItem on generator and handle each item + uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth; +#if USE_SIMDJSON + if (getContext()->getSettingsRef().allow_simdjson) + return this->template internalExecuteImpl(arguments, result_type, input_rows_count, parse_depth); +#endif + return this->template internalExecuteImpl(arguments, result_type, input_rows_count, parse_depth); } }; diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index aa2a116c0ba..0dc9f3488f3 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -177,6 +177,7 @@ REGISTER_FUNCTION(JSON) factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(FunctionFactory::CaseSensitiveness::CaseInsensitive); + factory.registerAlias("JSON_UNQUOTE", "JSONUnquote", FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index b5a7cf6158d..073fb053ec9 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -34,6 +34,7 @@ #include #include #include +#include "Core/SettingsEnums.h" #include #include #include @@ -316,6 +317,8 @@ class FunctionJSONHelpers const Element & getElement() const { return element; } std::string_view getLastKey() const { return last_key; } + using JSONParserType = JSONParser; + private: Element element; std::string_view last_key; @@ -618,7 +621,6 @@ class ExecutableFunctionJSONTuple : public ExecutableFunctionJSONBase class FunctionBaseFunctionJSON : public IFunctionBase { @@ -939,7 +941,7 @@ class JSONHasImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator &) + static bool insertResultToColumn(IColumn & dest, Iterator &) { auto & col_vec = assert_cast &>(dest); col_vec.insertValue(1); @@ -967,7 +969,7 @@ class IsValidJSONImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } - static bool insertResultToColumn(IColumn & dest, const Iterator &) + static bool insertResultToColumn(IColumn & dest, Iterator &) { /// This function is called only if JSON is valid. /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. @@ -990,7 +992,7 @@ class JSONLengthImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); @@ -1007,7 +1009,7 @@ class JSONLengthImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { auto & to_vec = assert_cast &>(dest); @@ -1049,7 +1051,7 @@ class JSONKeyImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) { auto last_key = iterator.getLastKey(); if (last_key.empty()) @@ -1083,7 +1085,7 @@ class JSONTypeImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); UInt8 type; @@ -1111,7 +1113,7 @@ class JSONTypeImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { WhichDataType which(iterator.getType()); UInt8 type; @@ -1149,7 +1151,7 @@ class JSONExtractNumericImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); NumberType value; @@ -1188,7 +1190,7 @@ class JSONExtractNumericImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { const auto & from = iterator.getColumn(); UInt64 row = iterator.getRow(); @@ -1230,7 +1232,7 @@ class JSONExtractBoolImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isBool()) @@ -1241,7 +1243,7 @@ class JSONExtractBoolImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { return JSONExtractUInt8Impl::insertResultToColumn(dest, iterator); } @@ -1258,20 +1260,24 @@ class JSONExtractRawImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator, DialectType dialect_type = DialectType::CLICKHOUSE) requires IsElementIterator { const auto & element = iterator.getElement(); auto & col_str = assert_cast(dest); auto & chars = col_str.getChars(); WriteBufferFromVector buf(chars, AppendModeTag()); - Traverse::traverse(element, buf); + if (dialect_type == DialectType::MYSQL) + Traverse::traverse(element, buf, true); + else + Traverse::traverse(element, buf); + buf.finalize(); chars.push_back(0); col_str.getOffsets().push_back(chars.size()); return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator, DialectType dialect_type = DialectType::CLICKHOUSE) requires IsObjectIterator { const auto & type = iterator.getType(); const auto & column = iterator.getColumn(); @@ -1284,10 +1290,13 @@ class JSONExtractRawImpl WriteBufferFromVector buf(to_chars, AppendModeTag{}); + const auto & format_setting + = dialect_type == DialectType::MYSQL ? Traverse::unquote_format_settings() : Traverse::format_settings(); + if (isDummyTuple(*type)) writeString("{}", buf); else - serialization->serializeTextJSON(*column, row, buf, Traverse::format_settings()); + serialization->serializeTextJSON(*column, row, buf, format_setting); writeChar(0, buf); buf.finalize(); @@ -1314,7 +1323,7 @@ class JSONUnquoteImpl col_str.insertData(json_data.data(), json_data.size()); } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); auto & col_str = assert_cast(dest); @@ -1322,7 +1331,7 @@ class JSONUnquoteImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { const auto & type = iterator.getType(); const auto & column = iterator.getColumn(); @@ -1359,7 +1368,7 @@ class JSONExtractStringImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isString()) @@ -1371,7 +1380,7 @@ class JSONExtractStringImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator) requires IsObjectIterator { const auto & column = iterator.getColumn(); UInt64 row = iterator.getRow(); @@ -1396,14 +1405,14 @@ struct JSONExtractTree public: Node() = default; virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Iterator &) = 0; + virtual bool insertResultToColumn(IColumn &, Iterator &) = 0; }; template class NumericNode : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { return JSONExtractNumericImpl::insertResultToColumn(dest, iterator); } @@ -1417,7 +1426,7 @@ struct JSONExtractTree { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { auto from_col = dictionary_type->createColumn(); if (impl->insertResultToColumn(*from_col, iterator)) @@ -1436,7 +1445,7 @@ struct JSONExtractTree class UUIDNodeString : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); if (!element.isString()) @@ -1451,7 +1460,7 @@ struct JSONExtractTree class UUIDNodeObject : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { if (!isString(iterator.getType())) return false; @@ -1473,7 +1482,7 @@ struct JSONExtractTree public: explicit DecimalNodeString(DataTypePtr data_type_) : data_type(std::move(data_type_)) {} - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); const auto * type = assert_cast *>(data_type.get()); @@ -1503,7 +1512,7 @@ struct JSONExtractTree public: explicit DecimalNodeObject(DataTypePtr data_type_) : data_type(std::move(data_type_)) {} - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto * decimal_type = assert_cast *>(data_type.get()); const auto & from = iterator.getColumn(); @@ -1533,7 +1542,7 @@ struct JSONExtractTree class StringNodeString : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); if (element.isString()) @@ -1548,7 +1557,7 @@ struct JSONExtractTree class StringNodeObject : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { return JSONExtractStringImpl::insertResultToColumn(dest, iterator); } @@ -1559,7 +1568,7 @@ struct JSONExtractTree class FixedStringNodeString : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); if (!element.isString()) @@ -1576,7 +1585,7 @@ struct JSONExtractTree class FixedStringNodeObject : public Node { public: - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { if (!isString(iterator.getType())) return false; @@ -1622,7 +1631,7 @@ struct JSONExtractTree { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); auto & col_vec = assert_cast &>(dest); @@ -1667,7 +1676,7 @@ struct JSONExtractTree { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { auto & to_vec = assert_cast &>(dest); const auto & from = iterator.getColumn(); @@ -1711,7 +1720,7 @@ struct JSONExtractTree public: explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { auto & col_null = assert_cast(dest); if (!nested->insertResultToColumn(col_null.getNestedColumn(), iterator)) @@ -1729,7 +1738,7 @@ struct JSONExtractTree public: explicit ArrayNodeString(std::unique_ptr nested_) : nested(std::move(nested_)) {} - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); if (!element.isArray()) @@ -1744,7 +1753,8 @@ struct JSONExtractTree for (auto value : array) { - if (nested->insertResultToColumn(data, Iterator{value})) + auto temp_it = Iterator{value}; + if (nested->insertResultToColumn(data, temp_it)) were_valid_elements = true; else data.insertDefault(); @@ -1769,7 +1779,7 @@ struct JSONExtractTree public: explicit ArrayNodeObject(std::unique_ptr nested_) : nested(std::move(nested_)) {} - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto * from_array = typeid_cast(iterator.getColumn().get()); if (!from_array) @@ -1834,7 +1844,7 @@ struct JSONExtractTree { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { ColumnTuple & tuple = assert_cast(dest); size_t old_size = dest.size(); @@ -1865,7 +1875,8 @@ struct JSONExtractTree for (size_t index = 0; (index != this->nested.size()) && (it != array.end()); ++index) { - if (this->nested[index]->insertResultToColumn(tuple.getColumn(index), Iterator{*it++})) + auto temp_it = Iterator{*it++}; + if (this->nested[index]->insertResultToColumn(tuple.getColumn(index), temp_it)) were_valid_elements = true; else tuple.getColumn(index).insertDefault(); @@ -1882,7 +1893,8 @@ struct JSONExtractTree auto it = object.begin(); for (size_t index = 0; (index != this->nested.size()) && (it != object.end()); ++index) { - if (this->nested[index]->insertResultToColumn(tuple.getColumn(index), Iterator{(*it++).second})) + auto temp_it = Iterator{(*it++).second}; + if (this->nested[index]->insertResultToColumn(tuple.getColumn(index), temp_it)) were_valid_elements = true; else tuple.getColumn(index).insertDefault(); @@ -1895,7 +1907,8 @@ struct JSONExtractTree auto index = this->name_to_index_map.find(key); if (index != this->name_to_index_map.end()) { - if (this->nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), Iterator{value})) + auto temp_it = Iterator{value}; + if (this->nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), temp_it)) were_valid_elements = true; } } @@ -1916,7 +1929,7 @@ struct JSONExtractTree { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { auto & to_tuple = assert_cast(dest); size_t old_size = dest.size(); @@ -1984,7 +1997,7 @@ struct JSONExtractTree public: MapNodeString(std::unique_ptr key_, std::unique_ptr value_) : MapNodeBase(std::move(key_), std::move(value_)) { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto & element = iterator.getElement(); if (!element.isObject()) @@ -2006,7 +2019,8 @@ struct JSONExtractTree key_col.insertData(pair.first.data(), pair.first.size()); /// Insert value - if (!this->value->insertResultToColumn(value_col, Iterator{pair.second})) + auto temp_it = Iterator{pair.second}; + if (!this->value->insertResultToColumn(value_col, temp_it)) value_col.insertDefault(); } @@ -2020,7 +2034,7 @@ struct JSONExtractTree public: MapNodeObject(std::unique_ptr key_, std::unique_ptr value_) : MapNodeBase(std::move(key_), std::move(value_)) { } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) override + bool insertResultToColumn(IColumn & dest, Iterator & iterator) override { const auto * from_tuple_type = typeid_cast(iterator.getType().get()); if (!from_tuple_type) @@ -2152,7 +2166,7 @@ class JSONExtractImpl extract_tree = JSONExtractTree::build(function_name, result_type); } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) + bool insertResultToColumn(IColumn & dest, Iterator & iterator) { return extract_tree->insertResultToColumn(dest, iterator); } @@ -2195,7 +2209,7 @@ class JSONExtractKeysAndValuesImpl extract_tree = JSONExtractTree::build(function_name, value_type); } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isObject()) @@ -2223,7 +2237,7 @@ class JSONExtractKeysAndValuesImpl return true; } - bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { const auto * from_tuple_type = typeid_cast(iterator.getType().get()); if (!from_tuple_type || !from_tuple_type->haveExplicitNames()) @@ -2273,7 +2287,7 @@ class GetJsonObjectImpl static size_t getResolveArgumentIndex(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); ColumnString & col_str = assert_cast(dest); @@ -2286,7 +2300,7 @@ class GetJsonObjectImpl return true; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsObjectIterator { const auto & type = iterator.getType(); const auto & column = iterator.getColumn(); @@ -2324,7 +2338,7 @@ class JSONExtractArrayRawImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isArray()) @@ -2384,7 +2398,7 @@ class JSONExtractKeysAndValuesRawImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isObject()) @@ -2450,7 +2464,7 @@ class JSONExtractKeysImpl static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Iterator & iterator) requires IsElementIterator + static bool insertResultToColumn(IColumn & dest, Iterator & iterator) requires IsElementIterator { const auto & element = iterator.getElement(); if (!element.isObject()) @@ -2470,7 +2484,7 @@ class JSONExtractKeysImpl return true; } - bool insertResultToColumn(IColumn & dest, const ObjectIterator & iterator) requires IsObjectIterator + static bool insertResultToColumn(IColumn & dest, ObjectIterator & iterator) requires IsObjectIterator { const auto * type_tuple = typeid_cast(iterator.getType().get()); if (!type_tuple || !type_tuple->haveExplicitNames()) diff --git a/src/Functions/JSONPath/Generator/ObjectJSONVisitorJSONPathMemberAccess.h b/src/Functions/JSONPath/Generator/ObjectJSONVisitorJSONPathMemberAccess.h index 0caa7f38c8b..0dc8e82930f 100644 --- a/src/Functions/JSONPath/Generator/ObjectJSONVisitorJSONPathMemberAccess.h +++ b/src/Functions/JSONPath/Generator/ObjectJSONVisitorJSONPathMemberAccess.h @@ -17,7 +17,11 @@ class ObjectJSONVisitorJSONPathMemberAccess : public IObjectJSONVisitor VisitorStatus apply(ObjectIterator & iterator) override { const auto * type_tuple = typeid_cast(iterator.getType().get()); + if (!type_tuple || !type_tuple->haveExplicitNames()) + return VisitorStatus::Error; auto pos = type_tuple->tryGetPositionByName(member_access_ptr->member_name); + if (!pos) + return VisitorStatus::Error; const auto& type = type_tuple->getElement(*pos); auto subcolumn = assert_cast(*iterator.getColumn()).getColumnPtr(*pos); iterator.setType(type); diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.cpp new file mode 100644 index 00000000000..2a533984d64 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +/** + * + * @param pos token iterator + * @param node node of ParserJSONPathArrayIndex + * @param expected stuff for logging + * @return was parse successful + * '$.a.1' -> is_start_with_dot = true + * '$.1' -> is_start_with_dot = false + */ +bool ParserJSONPathArrayIndex::parseImpl(Pos & pos, ASTPtr & node, Expected & /*expected*/) +{ + bool is_start_with_dot = false; + if (pos->type == TokenType::Dot) + { + is_start_with_dot = true; + ++pos; + } + + if (pos->type != TokenType::Number) + return false; + + auto range = std::make_shared(); + node = range; + + std::pair range_indices; + + std::string number_str; + number_str.assign(is_start_with_dot ? pos->begin : pos->begin + 1, pos->end); + UInt32 index; + if (!Poco::NumberParser::tryParseUnsigned(number_str, index)) + return false; + range_indices.first = index; + range_indices.second = range_indices.first + 1; + range->ranges.push_back(std::move(range_indices)); + + ++pos; + + return !range->ranges.empty(); +} + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.h b/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.h new file mode 100644 index 00000000000..7b0836f54d8 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathArrayIndex.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ +class ParserJSONPathArrayIndex : public IParserBase +{ + const char * getName() const override { return "ParserJSONPathArrayIndex"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp index c18b2ad9b31..b43063ed1f4 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -22,6 +23,7 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect ParserJSONPathRange parser_jsonpath_range; ParserJSONPathStar parser_jsonpath_star; ParserJSONPathRoot parser_jsonpath_root; + ParserJSONPathArrayIndex parser_jsonpath_array_index; ASTPtr path_root; if (!parser_jsonpath_root.parse(pos, path_root, expected)) @@ -33,7 +35,8 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect ASTPtr accessor; while (parser_jsonpath_member_access.parse(pos, accessor, expected) || parser_jsonpath_range.parse(pos, accessor, expected) - || parser_jsonpath_star.parse(pos, accessor, expected)) + || parser_jsonpath_star.parse(pos, accessor, expected) + || parser_jsonpath_array_index.parse(pos, accessor, expected)) { if (accessor) { diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp index b51e59ac2f3..aeb7197747e 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -43,6 +43,10 @@ namespace ErrorCodes */ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (pos->type == TokenType::Dot) + { + ++pos; + } if (pos->type != TokenType::OpeningSquareBracket) { diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference new file mode 100644 index 00000000000..c12a0e6cefa --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -0,0 +1,89 @@ +--JSON_VALUE-- + +1 +1.2 +true +"world" +null + + + + +--JSON_QUERY-- +[{"hello":1}] +[1] +[1.2] +[true] +["world"] +[null] +[["world","world2"]] +[{"world":"!"}] + + +[0, 1, 4, 0, -1, -4] +--JSON_EXISTS-- +1 +0 +1 +1 +1 +0 +1 +0 +0 +1 +1 +0 +1 +0 +1 +--JSON_LENGTH-- +3 +2 +1 +0 +2 +1 +\N +\N +0 +--JSON_EXTRACT-- +1 +John Doe +John Doe +green +20 +20 +20 + +--JSON_CONTAINS-- +1 +1 +1 +1 +0 +0 +0 +1 +1 +--JSON_CONTAINS_PATH-- +1 +1 +1 +0 +--JSON_KEYS-- +['c'] +['a','b'] +--JSON_SIZE-- +2 +0 +--JSON_ARRAY_CONTAINS-- +1 +0 +0 +--JSON_ARRAY_LENGTH-- +3 +--MANY ROWS-- +0 ["Vasily", "Kostya"] +1 ["Tihon", "Ernest"] +2 ["Katya", "Anatoliy"] diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql new file mode 100644 index 00000000000..e91db49a076 --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -0,0 +1,113 @@ +set dialect_type = 'MYSQL'; +SELECT '--JSON_VALUE--'; +SELECT JSON_VALUE('{"hello":1}', '$'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('{"hello":1}', '$.hello'); +SELECT JSON_VALUE('{"hello":1.2}', '$.hello'); +SELECT JSON_VALUE('{"hello":true}', '$.hello'); +SELECT JSON_VALUE('{"hello":"world"}', '$.hello'); +SELECT JSON_VALUE('{"hello":null}', '$.hello'); +SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_VALUE('{hello:world}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_VALUE('', '$.hello'); + +SELECT '--JSON_QUERY--'; +SELECT JSON_QUERY('{"hello":1}', '$'); +SELECT JSON_QUERY('{"hello":1}', '$.hello'); +SELECT JSON_QUERY('{"hello":1.2}', '$.hello'); +SELECT JSON_QUERY('{"hello":true}', '$.hello'); +SELECT JSON_QUERY('{"hello":"world"}', '$.hello'); +SELECT JSON_QUERY('{"hello":null}', '$.hello'); +SELECT JSON_QUERY('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_QUERY('', '$.hello'); +SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); + +SELECT '--JSON_EXISTS--'; +SELECT JSON_EXISTS('{"hello":1}', '$'); +SELECT JSON_EXISTS('', '$'); +SELECT JSON_EXISTS('{}', '$'); +SELECT JSON_EXISTS('{"hello":1}', '$.hello'); +SELECT JSON_EXISTS('{"hello":1,"world":2}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.hello.world'); +SELECT JSON_EXISTS('{hello:world}', '$.hello'); -- invalid json => default value (zero integer) +SELECT JSON_EXISTS('', '$.hello'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[1]'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].b'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].f'); +SELECT JSON_EXISTS('{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}', '$.a[*][0].h'); + +SELECT '--JSON_LENGTH--'; +SELECT JSON_LENGTH('[1, 2, {"a": 3}]', '$'); +SELECT JSON_LENGTH('{"a": 1, "b": {"c": 30}}', '$'); +SELECT JSON_LENGTH('{"a": 1, "b": {"c": 30}}', '$.b'); +SELECT JSON_LENGTH('{"hello":["world"]}', '$.hello[*]'); +SELECT JSON_LENGTH('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_LENGTH('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_LENGTH( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_LENGTH('', '$.hello'); +SELECT JSON_LENGTH('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); + +-- SELECT '--JSON_VALID--'; +-- SELECT JSON_VALID('{"name": "John Doe", "age": 30}'); +-- SELECT JSON_VALID('{"name: "John Doe", "age": 30}'); +-- SELECT JSON_VALID('{"name": 123, "age": 30}'); +-- SELECT JSON_VALID('{"name": "John Doe"}'); +-- SELECT JSON_VALID(NULL); + +SELECT '--JSON_EXTRACT--'; +SELECT JSON_EXTRACT('{"hello":1}', '$.hello'); +SELECT JSON_EXTRACT('{"name": "John Doe", "age": 30}', '$.name'); +SELECT JSON_EXTRACT('{"person": {"name": "John Doe", "age": 30}}', '$.person.name'); +SELECT JSON_EXTRACT('{"colors": ["red", "green", "blue"]}', '$.colors[1]'); +SELECT JSON_EXTRACT('{"name": "John Doe", "age": 30}', '$.address'); +SELECT JSON_EXTRACT('{"name: "John Doe", "age": 30}', '$.name'); +SELECT json_extract('[10, 20, [30, 40]]', '$.[1]'); +SELECT json_extract('[10, 20, [30, 40]]', '$.1'); +SELECT json_extract('{"a":[10, 20, [30, 40]]}', '$.a.1'); + +SELECT '--JSON_CONTAINS--'; +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '"hello"', '$.a'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '-100', '$.b[0]'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '-100', '$.b'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '-100', '$.b[*]'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '"hello"'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '"world"', '$.a'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '-1', '$.b[0]'); +select JSON_CONTAINS('{"a": 1, "b": 2, "c": {"d": 4}}', '{"a":1, "c": {"d": 4}}'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}', '[-100, 200.0]', '$.b'); + +SELECT '--JSON_CONTAINS_PATH--'; +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}', 'all', '$.a'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}', 'all', '$.a', '$.b'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}', 'one', '$.a', '$.c'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}', 'all', '$.a', '$.c'); + +SELECT '--JSON_KEYS--'; +SELECT json_keys('{"a": 1, "b": {"c": 30}}','$.b'); +SELECT JSON_KEYS('{"a": 1, "b": {"c": 30}}'); + +SELECT '--JSON_SIZE--'; +SELECT json_size('{"x":{"a":1, "b": 2}}', '$.x'); +SELECT json_size('{"x": {"a": 1, "b": 2}}', '$.x.a'); + +SELECT '--JSON_ARRAY_CONTAINS--'; +SELECT json_array_contains('[1, 2, 3]', 2); +SELECT json_array_contains('[1, 2, 3]', 10); +SELECT json_array_contains("['1', '2', '3']", '3'); + +SELECT '--JSON_ARRAY_LENGTH--'; +SELECT json_array_length('[1, 2, 3]'); + +SELECT '--MANY ROWS--'; +DROP TABLE IF EXISTS 01889_sql_json; +CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY id; +INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); +SELECT id, JSON_QUERY(json, '$.friends[0 to 2]') FROM 01889_sql_json ORDER BY id; +DROP TABLE 01889_sql_json; diff --git a/tests/queries/0_stateless/01889_sql_object_json_functions.reference b/tests/queries/0_stateless/01889_sql_object_json_functions.reference new file mode 100644 index 00000000000..95ce2c48098 --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_object_json_functions.reference @@ -0,0 +1,58 @@ +--JSON_VALUE-- + +1 +1.2 +true +"world" + + + +--JSON_QUERY-- +--JSON_EXISTS-- +1 +1 +1 +0 +1 +1 +1 +0 +1 +0 +1 +--JSON_LENGTH-- +2 +1 +0 +2 +1 +0 +--JSON_EXTRACT-- +1 +John Doe +John Doe +green +[20] + + +--JSON_CONTAINS-- +1 +1 +1 +1 +0 +0 +0 +1 +1 +--JSON_CONTAINS_PATH-- +1 +1 +1 +0 +--JSON_KEYS-- +['c'] +['a','b'] +--JSON_SIZE-- +2 +0 diff --git a/tests/queries/0_stateless/01889_sql_object_json_functions.sql b/tests/queries/0_stateless/01889_sql_object_json_functions.sql new file mode 100644 index 00000000000..6bcb0f152ee --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_object_json_functions.sql @@ -0,0 +1,97 @@ +set dialect_type = 'MYSQL'; +SELECT '--JSON_VALUE--'; +SELECT JSON_VALUE('{"hello":1}'::Object('json'), '$'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('{"hello":1}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":1.2}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":true}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":"world"}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":null}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":["world","world2"]}'::Object('json'), '$.hello'); +SELECT JSON_VALUE('{"hello":{"world":"!"}}'::Object('json'), '$.hello'); + +SELECT '--JSON_QUERY--'; +-- SELECT JSON_QUERY('{"hello":1}'::Object('json'), '$'); +-- SELECT JSON_QUERY('{"hello":1}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":1.2}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":true}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":"world"}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":null}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":["world","world2"]}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY('{"hello":{"world":"!"}}'::Object('json'), '$.hello'); +-- SELECT JSON_QUERY( '{hello:{"world":"!"}}}'::Object('json'), '$.hello'); -- invalid json => default value (empty string) +-- SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'::Object('json'), '$.array[*][0 to 2, 4]'); + +SELECT '--JSON_EXISTS--'; +SELECT JSON_EXISTS('{"hello":1}'::Object('json'), '$'); +SELECT JSON_EXISTS('{"hello":1}'::Object('json'), '$.hello'); +SELECT JSON_EXISTS('{"hello":1,"world":2}'::Object('json'), '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}'::Object('json'), '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}'::Object('json'), '$.hello.world'); +SELECT JSON_EXISTS('{"hello":["world"]}'::Object('json'), '$.hello[*]'); +SELECT JSON_EXISTS('{"hello":["world"]}'::Object('json'), '$.hello[0]'); +SELECT JSON_EXISTS('{"hello":["world"]}'::Object('json'), '$.hello[1]'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}'::Object('json'), '$.a[*].b'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}'::Object('json'), '$.a[*].f'); +SELECT JSON_EXISTS('{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}'::Object('json'), '$.a[*][0].h'); + +SELECT '--JSON_LENGTH--'; +-- SELECT JSON_LENGTH('[1, 2, {"a": 3}]'::Object('json'), '$'); +SELECT JSON_LENGTH('{"a": 1, "b": {"c": 30}}'::Object('json'), '$'); +SELECT JSON_LENGTH('{"a": 1, "b": {"c": 30}}'::Object('json'), '$.b'); +SELECT JSON_LENGTH('{"hello":["world"]}'::Object('json'), '$.hello[*]'); +SELECT JSON_LENGTH('{"hello":["world","world2"]}'::Object('json'), '$.hello'); +SELECT JSON_LENGTH('{"hello":{"world":"!"}}'::Object('json'), '$.hello'); +-- SELECT JSON_LENGTH( '{hello:{"world":"!"}}}'::Object('json'), '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_LENGTH('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'::Object('json'), '$.array[*][0 to 2, 4]'); + +-- SELECT '--JSON_VALID--'; +-- SELECT JSON_VALID('{"name": "John Doe", "age": 30}'::Object('json')); +-- SELECT JSON_VALID('{"name: "John Doe", "age": 30}'::Object('json')); +-- SELECT JSON_VALID('{"name": 123, "age": 30}'::Object('json')); +-- SELECT JSON_VALID('{"name": "John Doe"}'::Object('json')); +-- SELECT JSON_VALID(NULL); + +SELECT '--JSON_EXTRACT--'; +SELECT JSON_EXTRACT('{"hello":1}'::Object('json'), '$.hello'); +SELECT JSON_EXTRACT('{"name": "John Doe", "age": 30}'::Object('json'), '$.name'); +SELECT JSON_EXTRACT('{"person": {"name": "John Doe", "age": 30}}'::Object('json'), '$.person.name'); +SELECT JSON_EXTRACT('{"colors": ["red", "green", "blue"]}'::Object('json'), '$.colors[1]'); +SELECT JSON_EXTRACT('{"name": "John Doe", "age": 30}'::Object('json'), '$.address'); +-- SELECT JSON_EXTRACT('{"name": "John Doe", "age": 30}'::Object('json'), '$.name'); +SELECT json_extract('{"a":[10, 20, [30, 40]]}'::Object('json'), '$.a.1') + +SELECT '--JSON_CONTAINS--'; +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'hello', '$.a'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), -100, '$.b[0]'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), -100, '$.b'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), -100, '$.b[*]'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'hello'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'world', '$.a'); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), -1, '$.b[0]'); +select JSON_CONTAINS('{"a": 1, "b": 2, "c": {"d": 4}}'::Object('json'), '{"a":1, "c": {"d": 4}}'::Object('json')); +SELECT JSON_CONTAINS('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), [-100, 200.0], '$.b') settings parse_literal_as_decimal = 0; + +SELECT '--JSON_CONTAINS_PATH--'; +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'all', '$.a'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'all', '$.a', '$.b'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'one', '$.a', '$.c'); +SELECT JSON_CONTAINS_PATH('{"a": "hello", "b": [-100, 200.0, 300]}'::Object('json'), 'all', '$.a', '$.c'); + +SELECT '--JSON_KEYS--'; +SELECT json_keys('{"a": 1, "b": {"c": 30}}'::Object('json'),'$.b'); +SELECT JSON_KEYS('{"a": 1, "b": {"c": 30}}'::Object('json')); + +SELECT '--JSON_SIZE--'; +SELECT json_size('{"x":{"a":1, "b": 2}}'::Object('json'), '$.x'); +SELECT json_size('{"x": {"a": 1, "b": 2}}'::Object('json'), '$.x.a'); + +-- SELECT '--MANY ROWS--'; +-- DROP TABLE IF EXISTS 01889_sql_json; +-- set allow_experimental_object_type = 1; +-- set enable_optimizer = 0; +-- CREATE TABLE 01889_sql_json (id UInt8, json Object('json')) ENGINE = MergeTree ORDER BY id; +-- INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); +-- INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); +-- INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); +-- SELECT id, JSON_QUERY(json, '$.friends[0 to 2]') FROM 01889_sql_json ORDER BY id; +-- DROP TABLE 01889_sql_json; From e74c4dc79bf196a340661399809d561fdfad81c0 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:26:52 +0000 Subject: [PATCH 031/292] remove bit engine code --- .../BitEngineEncodePartitionHelper.cpp | 465 ------------------ 1 file changed, 465 deletions(-) delete mode 100644 src/Storages/BitEngineEncodePartitionHelper.cpp diff --git a/src/Storages/BitEngineEncodePartitionHelper.cpp b/src/Storages/BitEngineEncodePartitionHelper.cpp deleted file mode 100644 index d535b57d31a..00000000000 --- a/src/Storages/BitEngineEncodePartitionHelper.cpp +++ /dev/null @@ -1,465 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "DataStreams/IBlockStream_fwd.h" -#include "DataStreams/UnionBlockInputStream.h" -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int SYSTEM_ERROR; - extern const int NOT_ENOUGH_SPACE; -} - -/// To do mutate, reserve amount of space equals to sum size of parts times specified coefficient. -static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; - -Pipe StorageCnchMergeTree::preattachPartition(const PartitionCommand & command, const ContextPtr & local_context, const ASTPtr & query) -{ - if (!isBitEngineTable()) - return {}; - - String partition_id = getPartitionIDFromQuery(command.partition, local_context); - auto catalog = local_context->getCnchCatalog(); - auto cur_txn = local_context->getCurrentTransaction(); - - /// 1. get partition lock - LockInfoPtr partition_lock = std::make_shared(cur_txn->getTransactionID()); - partition_lock->setMode(LockMode::X); - partition_lock->setTimeout(local_context->getSettingsRef().ingest_column_memory_lock_timeout.value.totalMilliseconds()); // default 5s - partition_lock->setUUIDAndPrefix(getStorageUUID(), LockInfo::task_domain); - partition_lock->setPartition(partition_id); - - Stopwatch lock_watch; - auto cnch_lock = std::make_shared(local_context, std::move(partition_lock)); - cnch_lock->lock(); - LOG_DEBUG(log, "Acquired lock in {} ms", lock_watch.elapsedMilliseconds()); - - /// 2. stop merges of the table - /// remove the merge mutate tasks that could cause WW conflict before get server part - auto daemon_manager_client_ptr = local_context->getDaemonManagerClient(); - if (!daemon_manager_client_ptr) - throw Exception("Failed to get daemon manager client", ErrorCodes::SYSTEM_ERROR); - - std::optional merge_job_info - = daemon_manager_client_ptr->getDMBGJobInfo(getStorageUUID(), CnchBGThreadType::MergeMutate, local_context->getCurrentQueryId()); - if (!merge_job_info || merge_job_info->host_port.empty()) - LOG_DEBUG( - log, - "Will skip removing related merge tasks as there is no valid host server for table's merge job: {}", - getStorageID().getNameForLogs()); - else - { - auto server_client_ptr = local_context->getCnchServerClient(merge_job_info->host_port); - if (!server_client_ptr) - throw Exception("Failed to get server client with host port " + merge_job_info->host_port, ErrorCodes::SYSTEM_ERROR); - if (!server_client_ptr->removeMergeMutateTasksOnPartitions(getStorageID(), {partition_id})) - throw Exception( - "Failed to get remove MergeMutateTasks on partition_id " + partition_id + " for table " + getStorageID().getNameForLogs(), - ErrorCodes::SYSTEM_ERROR); - } - - /// 3. get source_parts of the partition - ServerDataPartsVector source_parts = catalog->getServerDataPartsInPartitions( - shared_from_this(), {partition_id}, local_context->getCurrentCnchStartTime(), local_context.get()); - ServerDataPartsVector visible_source_parts - = CnchPartsHelper::calcVisibleParts(source_parts, false, CnchPartsHelper::LoggingOption::EnableLogging); - LOG_DEBUG( - log, - "In partition_id: {}, number of server source parts: {}, visible source parts: {}", - partition_id, - source_parts.size(), - visible_source_parts.size()); - - /// 4. allocate dict table and parts - auto underlying_dicts_mapping = getUnderlyDictionaryTables(); - for (auto & entry : underlying_dicts_mapping) - { - auto storage_underlying_dict - = DatabaseCatalog::instance().tryGetTable(StorageID{entry.second.first, entry.second.second}, local_context); - - auto * storage_underlying_dict_cnch = dynamic_cast(storage_underlying_dict.get()); - if (storage_underlying_dict_cnch) - { - storage_underlying_dict_cnch->allocateForBitEngine(local_context, std::set{}, WorkerEngineType::DICT); - } - } - - /// 5. allocate bitengine table and visible_source_parts - String local_table_name = getCloudTableName(local_context); - collectResource(local_context, visible_source_parts, local_table_name); - - /// 6. send Alter query to each worker, and get the Remote Stream to construct an pipe - /// 6.1 rewrite the query to cloud worker - auto query_send = query->clone(); - ASTAlterQuery & query_send_ref = query_send->as(); - query_send_ref.database = getStorageID().getDatabaseName(); - query_send_ref.table = local_table_name; - String query_send_to_worker = queryToString(query_send); - - /// 6.2 collect worker group - auto worker_group = getWorkerGroupForTable(*this, local_context); - local_context->setCurrentWorkerGroup(worker_group); - healthCheckForWorkerGroup(local_context, worker_group); - - /// 6.3 construct remote_stream/pipe to send query to each worker - std::vector remote_streams; - for (const auto & shard_info : worker_group->getShardsInfo()) - { - auto preattach_stream = CnchStorageCommonHelper::sendQueryPerShard(local_context, query_send_to_worker, shard_info); - remote_streams.emplace_back(preattach_stream); - } - - cur_txn->setMainTableUUID(getStorageUUID()); - auto union_stream = std::make_shared(remote_streams, nullptr, local_context->getSettingsRef().max_threads); - auto transaction_stream = std::make_shared(union_stream, std::move(cur_txn), std::move(cnch_lock)); - - return Pipe{std::make_shared(std::move(transaction_stream))}; -} - -BitEngineEncodePartitionStream::BitEngineEncodePartitionStream( - const StorageCloudMergeTree & cloud_merge_tree, const PartitionCommand & command_, ContextPtr local_context_) - : storage(cloud_merge_tree), command(command_), local_context(local_context_) -{ -} - -FutureMergedMutatedPart getFuturePart(const MergeTreeMetaBase::DataPartPtr & part, ContextPtr & local_context) -{ - auto new_part_info = part->info; - new_part_info.level += 1; - new_part_info.hint_mutation = new_part_info.mutation; - new_part_info.mutation = local_context->getCurrentTransactionID().toUInt64(); - - FutureMergedMutatedPart future_part; - future_part.uuid = UUIDHelpers::generateV4(); - future_part.parts.push_back(part); - future_part.part_info = new_part_info; - future_part.name = new_part_info.getPartName(); - future_part.type = part->getType(); - return future_part; -} - -Block BitEngineEncodePartitionStream::readImpl() -{ - Stopwatch watch; - String partition_id = storage.getPartitionIDFromQuery(command.partition, local_context); - auto parts_to_encode = storage.getDataPartsVectorInPartition(MergeTreeMetaBase::DataPartState::Committed, partition_id); - - /// 1. encapsulate FutureParts for parts_to_encode - std::vector future_parts; - for (const auto & part : parts_to_encode) - { - future_parts.emplace_back(getFuturePart(part, local_context)); - } - - /// 2. BitEngineDictionaryManager encode parts - auto dict_manager = storage.getBitEngineDictionaryManager(); - auto temp_parts = dict_manager->encodeParts(storage, future_parts, local_context); - - /// 3. commit encoded parts - CnchDataWriter cnch_writer(const_cast(storage), local_context, ManipulationType::Insert); - cnch_writer.dumpAndCommitCnchParts(temp_parts); - - LOG_DEBUG( - &Poco::Logger::get("BitEngineEncodePartition"), - "({}) BitEngine encode partition_id {} with {} parts cost {} s.", - storage.getStorageID().getNameForLogs(), - partition_id, - parts_to_encode.size(), - watch.elapsedSeconds()); - - return {}; -} - -PartsEncoder::PartsEncoder(const StorageCloudMergeTree & storage_, BitEngineDictionaryManager & dict_manager_, ContextPtr local_context_) - : storage(storage_), dict_manager(dict_manager_), local_context(local_context_) -{ -} - -MergeTreeMetaBase::MutableDataPartsVector PartsEncoder::encodeBitEngineParts(std::vector & future_parts) -{ - MergeTreeMetaBase::MutableDataPartsVector result_parts; - for (auto & part : future_parts) - { - auto encoded_part = encodeBitEnginePart(part); - result_parts.emplace_back(encoded_part); - } - return result_parts; -} - -static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) -{ - return ( - (settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) - || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); -} - -MergeTreeMetaBase::MutableDataPartPtr PartsEncoder::encodeBitEnginePart(FutureMergedMutatedPart & future_part) -{ - auto & source_part = future_part.parts.at(0); - auto columns_to_encode = getBitEngineColumnsInPart(source_part); - if (columns_to_encode.empty()) - return nullptr; - - bool need_sync = needSyncPart(source_part->rows_count, source_part->getBytesOnDisk(), *storage.getSettings()); - auto compression_codec = source_part->default_codec; - - if (!compression_codec) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown codec for bitengine encode part: {}", source_part->name); - - auto new_partial_part = createEmptyTempPart(future_part); - - auto input_stream = createInputStream(source_part, columns_to_encode.getNames()); - auto output_stream = createOutputStream(source_part, input_stream->getHeader(), new_partial_part, compression_codec); - encodeTransform(*input_stream, *output_stream, new_partial_part, need_sync); - - finalizeTempPart(source_part, new_partial_part, compression_codec); - - return new_partial_part; -} - -NamesAndTypesList PartsEncoder::getBitEngineColumnsInPart(const IMergeTreeDataPartPtr & part) -{ - const auto & columns = part->getColumns(); - NamesAndTypesList columns_to_encode; - for (const auto & column : columns) - { - if (!isBitmap64(column.type)) - continue; - - bool bitengine_type = column.type->isBitEngineEncode(); - if (bitengine_type || storage.isBitEngineEncodeColumn(column.name)) - { - if (!bitengine_type) - const_cast(column.type.get())->setFlags(TYPE_BITENGINE_ENCODE_FLAG); - columns_to_encode.push_back(column); - } - } - - return columns_to_encode; -} - -IMutableMergeTreeDataPartPtr PartsEncoder::createEmptyTempPart(FutureMergedMutatedPart & future_part) -{ - auto & part = future_part.parts.at(0); - auto estimated_space_for_result = static_cast(part->getBytesOnDisk() * DISK_USAGE_COEFFICIENT_TO_RESERVE); - ReservationPtr reserved_space = storage.reserveSpace(estimated_space_for_result, IStorage::StorageLocation::AUXILITY); - - if (!reserved_space) - throw Exception("Not enough space for encoding part '" + part->name + "' ", ErrorCodes::NOT_ENOUGH_SPACE); - - auto single_disk_volume = std::make_shared("volume_" + future_part.name, reserved_space->getDisk(), 0); - - auto new_partial_part = storage.createPart( - future_part.name, - MergeTreeDataPartType::WIDE, - future_part.part_info, - single_disk_volume, - "tmp_enc_" + future_part.name, - nullptr, - IStorage::StorageLocation::AUXILITY); - - new_partial_part->uuid = future_part.uuid; - new_partial_part->is_temp = true; - new_partial_part->ttl_infos = part->ttl_infos; - new_partial_part->versions = part->versions; - - new_partial_part->index_granularity_info = part->index_granularity_info; - new_partial_part->setColumns(part->getColumns()); - new_partial_part->partition.assign(part->partition); - new_partial_part->columns_commit_time = part->columns_commit_time; - new_partial_part->mutation_commit_time = part->mutation_commit_time; - if (storage.isBucketTable()) - new_partial_part->bucket_number = part->bucket_number; - - new_partial_part->checksums_ptr = std::make_shared(); - - auto disk = new_partial_part->volume->getDisk(); - String new_part_tmp_path = new_partial_part->getFullRelativePath(); - - SyncGuardPtr sync_guard; - if (storage.getSettings()->fsync_part_directory) - sync_guard = disk->getDirectorySyncGuard(new_part_tmp_path); - - /// calculate which columns can be skipped in encoding - // NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); - disk->createDirectories(new_part_tmp_path); - - return new_partial_part; -} - -BlockInputStreamPtr PartsEncoder::createInputStream(const IMergeTreeDataPartPtr & part, Names column_names) -{ - auto input_source = std::make_unique( - storage, - storage.getStorageSnapshot(storage.getInMemoryMetadataPtr(), nullptr), - part, - column_names, - /*read_with_direct_io*/ false, - /*take_column_types_from_storage*/ true); - - QueryPipeline pipeline; - pipeline.init(Pipe(std::move(input_source))); - pipeline.setMaxThreads(1); - BlockInputStreamPtr pipeline_input_stream = std::make_shared(std::move(pipeline)); - - pipeline_input_stream = std::make_shared(pipeline_input_stream, - local_context->getSettingsRef().min_insert_block_size_rows, - local_context->getSettingsRef().min_insert_block_size_bytes * 2U); - - return pipeline_input_stream; -} - -BlockOutputStreamPtr PartsEncoder::createOutputStream( - const IMergeTreeDataPartPtr & source_part, - const Block & header_in, - IMutableMergeTreeDataPartPtr & new_temp_part, - const CompressionCodecPtr & codec) -{ - /// Calc header - Block header_to_write; - for (const auto & column : header_in.getColumnsWithTypeAndName()) - { - if (isBitmap64(column.type)) - { - header_to_write.insert(ColumnWithTypeAndName(column.column, column.type, column.name + BITENGINE_COLUMN_EXTENSION)); - } - } - - MergeTreeWriterSettings writer_settings( - storage.getContext()->getSettings(), - storage.getSettings(), - /*can_use_adaptive_granularity = */ false, - false); - - return std::make_shared( - new_temp_part, - storage.getInMemoryMetadataPtr(), - writer_settings, - header_to_write, - codec, - std::vector{}, - nullptr, - source_part->index_granularity); -} - -void PartsEncoder::encodeTransform( - IBlockInputStream & in, IBlockOutputStream & out, IMutableMergeTreeDataPartPtr & new_temp_part, bool need_sync) -{ - in.readPrefix(); - out.writePrefix(); - - Block block; - while ((block = in.read())) - { - writeImplicitColumnForBitEngine(block, new_temp_part->bucket_number); - out.write(block); - } - - in.readSuffix(); - auto changed_checksums = dynamic_cast(out).writeSuffixAndGetChecksums( - new_temp_part, *new_temp_part->getChecksums(), need_sync); - new_temp_part->checksums_ptr->add(std::move(changed_checksums)); -} - -void PartsEncoder::writeImplicitColumnForBitEngine(Block & block, Int64 bucket_number) -{ - ColumnsWithTypeAndName encoded_columns; - const auto & columns = block.getColumnsWithTypeAndName(); - - for (const auto & column : columns) - { - if (!isBitmap64(column.type)) - continue; - - /// check whether the column is a legal BitEngine column in table - if (!storage.isBitEngineEncodeColumn(column.name)) - continue; - - try - { - auto encoded_column = dict_manager.encodeColumn(column, column.name, bucket_number, local_context, BitEngineEncodeSettings{}); - - encoded_columns.push_back(encoded_column); - } - catch (Exception & e) - { - // LOG_ERROR(&Poco::Logger::get("BitEnginePartsEncoder"), "BitEngine encode column exception: {}", e.message()); - // tryLogCurrentException(__PRETTY_FUNCTION__); - throw Exception("BitEngine encode exception. reason: " + String(e.message()), ErrorCodes::LOGICAL_ERROR); - } - } - - if (!encoded_columns.empty()) - { - for (auto & encoded_column : encoded_columns) - block.insertUnique(encoded_column); - } -} - -void PartsEncoder::finalizeTempPart( - const MergeTreeDataPartPtr & source_part, const MergeTreeMutableDataPartPtr & new_partial_part, const CompressionCodecPtr & codec) -{ - auto disk = new_partial_part->volume->getDisk(); - auto new_part_checksums_ptr = new_partial_part->getChecksums(); - - if (new_partial_part->uuid != UUIDHelpers::Nil) - { - auto out = disk->writeFile(new_partial_part->getFullRelativePath() + IMergeTreeDataPart::UUID_FILE_NAME, {.buffer_size = 4096}); - HashingWriteBuffer out_hashing(*out); - writeUUIDText(new_partial_part->uuid, out_hashing); - new_part_checksums_ptr->files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); - new_part_checksums_ptr->files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); - } - - { - /// Write file with checksums. - auto out_checksums = disk->writeFile(fs::path(new_partial_part->getFullRelativePath()) / "checksums.txt", {.buffer_size = 4096}); - new_part_checksums_ptr->versions = new_partial_part->versions; - new_part_checksums_ptr->write(*out_checksums); - } /// close fd - - { - auto out = disk->writeFile( - new_partial_part->getFullRelativePath() + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, {.buffer_size = 4096}); - DB::writeText(queryToString(codec->getFullCodecDesc()), *out); - } - - { - /// Write a file with a description of columns. - auto out_columns = disk->writeFile(fs::path(new_partial_part->getFullRelativePath()) / "columns.txt", {.buffer_size = 4096}); - new_partial_part->getColumns().writeText(*out_columns); - } /// close fd - - new_partial_part->rows_count = source_part->rows_count; - new_partial_part->index_granularity = source_part->index_granularity; - new_partial_part->index = source_part->getIndex(); - new_partial_part->minmax_idx = source_part->minmax_idx; - new_partial_part->modification_time = time(nullptr); - new_partial_part->loadProjections(false, false); - new_partial_part->setBytesOnDisk( - MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_partial_part->volume->getDisk(), new_partial_part->getFullRelativePath())); - new_partial_part->default_codec = codec; -} -} From 6b2da7008ee95870dc726d63ff133b61bc9fb677 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:28:54 +0000 Subject: [PATCH 032/292] Merge 'cherry-pick-ba73c2a3' into 'cnch-2.2' fix(clickhousech@m-4505967287): [cp]core bug in destructor of LockHolder See merge request: !22721 --- src/Interpreters/Context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f2547e0ad49..81b08cedd52 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -564,7 +564,11 @@ struct ContextSharedPart cnch_bg_threads_array->shutdown(); if (cnch_txn_coordinator) + { cnch_txn_coordinator->shutdown(); + /// Need to reset cnch_txn_coordinator before schedule_pool reset, otherwise it may core. + cnch_txn_coordinator.reset(); + } if (server_manager) server_manager->shutDown(); From 713543f113f7853acb339f450addde22a07a538f Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:29:32 +0000 Subject: [PATCH 033/292] Merge 'fix-unknown-bg-thread-core-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4655971924): [cp] fix server core at unknown bg thread type See merge request: !22724 --- src/CloudServices/CnchBGThreadCommon.h | 54 ++++++++++++++----- src/CloudServices/CnchBGThreadsMap.cpp | 10 ++-- src/CloudServices/CnchBGThreadsMap.h | 28 +++++----- src/CloudServices/CnchServerServiceImpl.cpp | 19 ++----- src/Common/ErrorCodes.cpp | 2 + .../System/StorageSystemBGThreads.cpp | 4 +- 6 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/CloudServices/CnchBGThreadCommon.h b/src/CloudServices/CnchBGThreadCommon.h index 2725e0b8ac2..9323b53a076 100644 --- a/src/CloudServices/CnchBGThreadCommon.h +++ b/src/CloudServices/CnchBGThreadCommon.h @@ -15,8 +15,17 @@ #pragma once +#include +#include + namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_CNCH_BG_THREAD_ACTION; + extern const int UNKNOWN_CNCH_BG_THREAD_TYPE; +} + /** * Use enum in nested namespace instead enum class. * Because we want to pass it to protos easily, while it offers weaker compile-time check. @@ -24,10 +33,14 @@ namespace DB */ namespace CnchBGThread { + /// NOTE: when introducing a new type, remember to update + /// 1. {Server|Daemon}{Min|Max}Type accordingly + /// 2. toString(CnchBGThreadType type) enum Type : unsigned int { Empty = 0, + /// server types PartGC = 1, MergeMutate = 2, Consumer = 3, @@ -40,22 +53,22 @@ namespace CnchBGThread PartMover = 10, ManifestCheckpoint = 11, - ServerMinType = PartGC, - ServerMaxType = ManifestCheckpoint, - + /// DM types GlobalGC = 20, /// reserve several entries TxnGC = 21, AutoStatistics = 22, - DaemonMinType = GlobalGC, - DaemonMaxType = AutoStatistics, - ResourceReport = 30, /// worker - WorkerMinType = ResourceReport, /// Enum to mark start of worker types - WorkerMaxType = ResourceReport, /// Enum to mark end of worker types + /// worker types (perhaps this should not be included in CnchBGThread?) + ResourceReport = 30, }; - constexpr unsigned int NumType = WorkerMaxType + 1; + constexpr unsigned int ServerMinType = PartGC; + constexpr unsigned int ServerMaxType = ManifestCheckpoint; + constexpr unsigned int NumServerType = ServerMaxType + 1; + constexpr unsigned int DaemonMinType = GlobalGC; + constexpr unsigned int DaemonMaxType = AutoStatistics; + /// when introducing a new type, remember to update toCnchBGThreadAction() enum Action : unsigned int { Start = 0, @@ -116,14 +129,29 @@ constexpr auto toString(CnchBGThreadType type) __builtin_unreachable(); } -constexpr auto isServerBGThreadType(CnchBGThreadType t) +constexpr auto isServerBGThreadType(size_t t) +{ + return CnchBGThread::ServerMinType <= t && t <= CnchBGThread::ServerMaxType; +} + +inline CnchBGThreadType toServerBGThreadType(size_t t) +{ + if (unlikely(!isServerBGThreadType(t))) + throw Exception(ErrorCodes::UNKNOWN_CNCH_BG_THREAD_TYPE, "Unknown server bg thread type: {}", t); + return static_cast(t); +} + +constexpr auto iDaemonBGThreadType(size_t t) { - return CnchBGThreadType::ServerMinType <= t && t <= CnchBGThreadType::ServerMaxType; + return CnchBGThread::DaemonMinType <= t && t <= CnchBGThread::DaemonMaxType; } -constexpr auto iDaemonBGThreadType(CnchBGThreadType t) +inline CnchBGThreadAction toCnchBGThreadAction(size_t action) { - return CnchBGThreadType::DaemonMinType <= t && t <= CnchBGThreadType::DaemonMaxType; + if (unlikely(action > CnchBGThreadAction::Wakeup)) + throw Exception(ErrorCodes::UNKNOWN_CNCH_BG_THREAD_ACTION, "Unknown bg thread action: {}", action); + + return static_cast(action); } constexpr auto toString(CnchBGThreadAction action) diff --git a/src/CloudServices/CnchBGThreadsMap.cpp b/src/CloudServices/CnchBGThreadsMap.cpp index 26a6d729252..00532b7e4c6 100644 --- a/src/CloudServices/CnchBGThreadsMap.cpp +++ b/src/CloudServices/CnchBGThreadsMap.cpp @@ -237,8 +237,8 @@ void CnchBGThreadsMap::cleanup() CnchBGThreadsMapArray::CnchBGThreadsMapArray(ContextPtr global_context_) : WithContext(global_context_) { - for (auto i = size_t(CnchBGThreadType::ServerMinType); i <= size_t(CnchBGThreadType::ServerMaxType); ++i) - threads_array[i] = std::make_unique(global_context_, CnchBGThreadType(i)); + for (auto i = CnchBGThread::ServerMinType; i <= CnchBGThread::ServerMaxType; ++i) + threads_array[i] = std::make_unique(global_context_, static_cast(i)); if (global_context_->getServerType() == ServerType::cnch_worker && global_context_->getResourceManagerClient()) { @@ -263,9 +263,9 @@ CnchBGThreadsMapArray::~CnchBGThreadsMapArray() void CnchBGThreadsMapArray::shutdown() { - ThreadPool pool(size_t(CnchBGThreadType::ServerMaxType) - size_t(CnchBGThreadType::ServerMinType) + 1); + ThreadPool pool(CnchBGThread::ServerMaxType - CnchBGThread::ServerMinType + 1); - for (auto i = size_t(CnchBGThreadType::ServerMinType); i <= size_t(CnchBGThreadType::ServerMaxType); ++i) + for (auto i = CnchBGThread::ServerMinType; i <= CnchBGThread::ServerMaxType; ++i) { if (auto * t = threads_array[i].get()) pool.scheduleOrThrowOnError([t] { t->stopAll(); }); @@ -285,7 +285,7 @@ void CnchBGThreadsMapArray::cleanThread() { try { - for (auto i = size_t(CnchBGThreadType::ServerMinType); i <= size_t(CnchBGThreadType::ServerMaxType); ++i) + for (auto i = CnchBGThread::ServerMinType; i <= CnchBGThread::ServerMaxType; ++i) threads_array[i]->cleanup(); } catch (...) diff --git a/src/CloudServices/CnchBGThreadsMap.h b/src/CloudServices/CnchBGThreadsMap.h index 73f348c62ad..6cf6a32b316 100644 --- a/src/CloudServices/CnchBGThreadsMap.h +++ b/src/CloudServices/CnchBGThreadsMap.h @@ -25,6 +25,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_CNCH_BG_THREAD_TYPE; +} + using UUIDToBGThreads = std::unordered_map; namespace ResourceManagement { @@ -90,20 +95,13 @@ class CnchBGThreadsMapArray : protected WithContext, private boost::noncopyable inline CnchBGThreadsMap * at(size_t type) { - try - { - auto * res = threads_array.at(type).get(); - if (unlikely(!res)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "CnchBGThread for type {} is not initialized.", toString(static_cast(type))); - } - return res; - } - catch(...) - { - /// Show a better exception message. - throw Exception(ErrorCodes::LOGICAL_ERROR, "CnchBGThread for type {} is not initialized. Maybe the enum CnchBGThread is mismatch.", toString(static_cast(type))); - } + if (unlikely(!isServerBGThreadType(type))) + throw Exception(ErrorCodes::UNKNOWN_CNCH_BG_THREAD_TYPE, "Unknown server bg thread type: {}", type); + + auto * res = threads_array.at(type).get(); + if (unlikely(!res)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "CnchBGThread for type {} is not initialized.", type); + return res; } void cleanThread(); @@ -113,7 +111,7 @@ class CnchBGThreadsMapArray : protected WithContext, private boost::noncopyable bool isResourceReportRegistered(); private: - std::array, CnchBGThread::NumType> threads_array; + std::array, CnchBGThread::NumServerType> threads_array; std::unique_ptr resource_reporter_task; diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index b8abe86fcae..78387c10f91 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -778,18 +778,8 @@ void CnchServerServiceImpl::getBackgroundThreadStatus( try { - std::map res; - - auto type = CnchBGThreadType(request->type()); - if (type >= CnchBGThreadType::ServerMinType && type <= CnchBGThreadType::ServerMaxType) - { - auto threads = global_context->getCnchBGThreadsMap(type); - res = threads->getStatusMap(); - } - else - { - throw Exception("Not support type " + toString(int(request->type())), ErrorCodes::NOT_IMPLEMENTED); - } + auto type = toServerBGThreadType(request->type()); + std::map res = global_context->getCnchBGThreadsMap(type)->getStatusMap(); for (const auto & [storage_id, status] : res) { @@ -832,8 +822,9 @@ void CnchServerServiceImpl::controlCnchBGThread( StorageID storage_id = StorageID::createEmpty(); if (!request->storage_id().table().empty()) storage_id = RPCHelpers::createStorageID(request->storage_id()); - auto type = CnchBGThreadType(request->type()); - auto action = CnchBGThreadAction(request->action()); + + auto type = toServerBGThreadType(request->type()); + auto action = toCnchBGThreadAction(request->action()); auto & controller = static_cast(*cntl); LOG_DEBUG(log, "Received controlBGThread for {} type {} action {} from {}", storage_id.empty() ? "empty storage" : storage_id.getNameForLogs(), diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 619f5cb0faa..1883368903b 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -818,6 +818,8 @@ M(5025, VIRTUAL_WAREHOUSE_NOT_FOUND) \ M(5027, CNCH_SERVER_NOT_FOUND) \ M(5030, CNCH_BG_THREAD_NOT_FOUND) \ + M(5031, UNKNOWN_CNCH_BG_THREAD_TYPE) \ + M(5032, UNKNOWN_CNCH_BG_THREAD_ACTION) \ M(5035, INSERTION_LABEL_ALREADY_EXISTS) \ M(5036, FAILED_TO_PUT_INSERTION_LABEL) \ M(5037, VIRTUAL_WAREHOUSE_ALREADY_EXISTS) \ diff --git a/src/Storages/System/StorageSystemBGThreads.cpp b/src/Storages/System/StorageSystemBGThreads.cpp index a3a8b7aed28..f80d62507c9 100644 --- a/src/Storages/System/StorageSystemBGThreads.cpp +++ b/src/Storages/System/StorageSystemBGThreads.cpp @@ -43,9 +43,9 @@ NamesAndTypesList StorageSystemBGThreads::getNamesAndTypes() void StorageSystemBGThreads::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { - for (auto i = CnchBGThreadType::ServerMinType; i <= CnchBGThreadType::ServerMaxType; i = CnchBGThreadType(size_t(i) + 1)) + for (auto i = CnchBGThread::ServerMinType; i <= CnchBGThread::ServerMaxType; ++i) { - for (auto && [_, t] : context->getCnchBGThreadsMap(i)->getAll()) + for (auto && [_, t] : context->getCnchBGThreadsMap(static_cast(i))->getAll()) { size_t c = 0; res_columns[c++]->insert(toString(t->getType())); From 4cbb48ea9fcb718a4486fd3f48f8fc518fc6a1ae Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:31:32 +0000 Subject: [PATCH 034/292] Merge branch 'jiashuo_cherry-pick-some-disk-cache-fix-2.2' into 'cnch-2.2' fix(clickhousech@m-4656089224): [CP to 2.2]disk cache & preload performace See merge request dp/ClickHouse!22743 # Conflicts: # src/CloudServices/CnchDataWriter.cpp # src/CloudServices/CnchWorkerServiceImpl.cpp # src/Interpreters/PartLog.h # src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp --- programs/server/Server.cpp | 1 - src/CloudServices/CnchDataWriter.cpp | 17 +-- src/CloudServices/CnchServerClient.cpp | 16 +-- src/CloudServices/CnchServerClient.h | 2 +- src/CloudServices/CnchServerResource.cpp | 3 +- src/CloudServices/CnchWorkerClient.cpp | 3 +- src/CloudServices/CnchWorkerServiceImpl.cpp | 58 ++++---- src/Core/Settings.h | 3 +- src/IO/WriteBufferFromFile.cpp | 5 +- src/IO/WriteBufferFromFile.h | 4 +- src/IO/WriteBufferFromFileDescriptor.cpp | 8 +- src/IO/WriteBufferFromFileDescriptor.h | 5 +- src/Interpreters/Context.cpp | 36 +---- src/Interpreters/Context.h | 6 +- .../InterpreterAlterDiskCacheQuery.cpp | 2 +- src/Interpreters/PartLog.cpp | 6 +- src/Interpreters/PartLog.h | 3 +- src/MergeTreeCommon/TableVersion.cpp | 2 +- src/MergeTreeCommon/assignCnchParts.cpp | 24 ++-- src/MergeTreeCommon/assignCnchParts.h | 2 +- src/Storages/DiskCache/DiskCacheLRU.cpp | 8 +- src/Storages/DiskCache/IDiskCache.cpp | 16 ++- src/Storages/DiskCache/IDiskCache.h | 2 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 7 +- .../MergeTree/MergeTreeDataPartCNCH.cpp | 127 +++++++++--------- .../MergeTree/MergeTreeDataPartCNCH.h | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/StorageCnchMergeTree.cpp | 4 +- 28 files changed, 185 insertions(+), 189 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a2d09604dbc..e5ab12d7248 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -597,7 +597,6 @@ int Server::main(const std::vector & /*args*/) global_context->initRootConfig(config()); const auto & root_config = global_context->getRootConfig(); - // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. diff --git a/src/CloudServices/CnchDataWriter.cpp b/src/CloudServices/CnchDataWriter.cpp index 86c6a9b52df..2c24e0b8cd4 100644 --- a/src/CloudServices/CnchDataWriter.cpp +++ b/src/CloudServices/CnchDataWriter.cpp @@ -50,9 +50,9 @@ namespace ProfileEvents { -extern const Event CnchWriteDataElapsedMilliseconds; + extern const Event CnchWriteDataElapsedMilliseconds; + extern const Event PreloadSubmitTotalOps; } - namespace DB { namespace ErrorCodes @@ -715,10 +715,12 @@ void CnchDataWriter::publishStagedParts(const MergeTreeDataPartsCNCHVector & sta void CnchDataWriter::preload(const MutableMergeTreeDataPartsCNCHVector & dumped_parts) { - if (context->tryGetPreloadThrottler()) - context->tryGetPreloadThrottler()->add(1); - const auto & settings = context->getSettingsRef(); + + if (!settings.parts_preload_level || !storage.getSettings()->parts_preload_level || !storage.getSettings()->enable_local_disk_cache + || !storage.getSettings()->enable_preload_parts) + return; + try { Stopwatch timer; @@ -730,8 +732,8 @@ void CnchDataWriter::preload(const MutableMergeTreeDataPartsCNCHVector & dumped_ if (!preload_parts.empty()) { - auto max_timeout = std::max(30 * 1000L, settings.max_execution_time.totalMilliseconds()); - server_client->submitPreloadTask(storage, preload_parts, max_timeout); + ProfileEvents::increment(ProfileEvents::PreloadSubmitTotalOps, 1, Metrics::MetricType::Rate); + server_client->submitPreloadTask(storage, preload_parts, settings.preload_send_rpc_max_ms); LOG_DEBUG( storage.getLogger(), "Finish submit preload {} task for {} parts to server {}, elapsed {} ms", @@ -740,7 +742,6 @@ void CnchDataWriter::preload(const MutableMergeTreeDataPartsCNCHVector & dumped_ server_client->getRPCAddress(), timer.elapsedMilliseconds()); } - // TODO: invalidate deleted part's disk cache } catch (...) { diff --git a/src/CloudServices/CnchServerClient.cpp b/src/CloudServices/CnchServerClient.cpp index efbc7d43c2b..84698a2fe4d 100644 --- a/src/CloudServices/CnchServerClient.cpp +++ b/src/CloudServices/CnchServerClient.cpp @@ -984,18 +984,19 @@ CnchServerClient::getBackGroundStatus(const CnchBGThreadType & type) return response.status(); } -void CnchServerClient::submitPreloadTask(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts, UInt64 timeout_ms) +brpc::CallId CnchServerClient::submitPreloadTask(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts, UInt64 timeout_ms) { + auto * cntl = new brpc::Controller(); + auto call_id = cntl->call_id(); if (parts.empty()) - return; + return call_id; - brpc::Controller cntl; Protos::SubmitPreloadTaskReq request; request.set_ts(time(nullptr)); - Protos::SubmitPreloadTaskResp response; + auto response = new Protos::SubmitPreloadTaskResp(); if (timeout_ms) - cntl.set_timeout_ms(timeout_ms); + cntl->set_timeout_ms(timeout_ms); /// prefer to get cnch table uuid from settings as multiple CloudMergeTrees cannot share a same uuid, /// thus most CloudMergeTrees have no uuids on the worker side @@ -1010,9 +1011,8 @@ void CnchServerClient::submitPreloadTask(const MergeTreeMetaBase & storage, cons fillPartModel(storage, *part, *new_part); } - stub->submitPreloadTask(&cntl, &request, &response, nullptr); - assertController(cntl); - RPCHelpers::checkResponse(response); + stub->submitPreloadTask(cntl, &request, response, brpc::NewCallback(RPCHelpers::onAsyncCallDone, response, cntl, std::make_shared())); + return call_id; } UInt32 CnchServerClient::reportDeduperHeartbeat(const StorageID & cnch_storage_id, const String & worker_table_name) diff --git a/src/CloudServices/CnchServerClient.h b/src/CloudServices/CnchServerClient.h index a8496d87289..3ab0bb7c1ba 100644 --- a/src/CloudServices/CnchServerClient.h +++ b/src/CloudServices/CnchServerClient.h @@ -186,7 +186,7 @@ class CnchServerClient : public RpcClientBase google::protobuf::RepeatedPtrField getBackGroundStatus(const CnchBGThreadType & type); - void submitPreloadTask(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts, UInt64 timeout_ms); + brpc::CallId submitPreloadTask(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts, UInt64 timeout_ms); UInt32 reportDeduperHeartbeat(const StorageID & cnch_storage_id, const String & worker_table_name); diff --git a/src/CloudServices/CnchServerResource.cpp b/src/CloudServices/CnchServerResource.cpp index ac430cf34d2..11850960f12 100644 --- a/src/CloudServices/CnchServerResource.cpp +++ b/src/CloudServices/CnchServerResource.cpp @@ -433,7 +433,6 @@ void CnchServerResource::allocateResource( leftover_server_parts.size()); ProfileEvents::increment(ProfileEvents::CnchPartAllocationSplits); } - // If the # of parts over vw size is not zero, // only go through hybrid allocation logic when that is smaller than a configurable ratio if ((context->getSettingsRef().enable_hybrid_allocation || cnch_table->getSettings()->enable_hybrid_allocation) @@ -451,7 +450,7 @@ void CnchServerResource::allocateResource( } else { - assigned_map = assignCnchParts(worker_group, leftover_server_parts, context); + assigned_map = assignCnchParts(worker_group, leftover_server_parts, context, cnch_table->getSettings()); } moveBucketTablePartsToAssignedParts( assigned_map, bucket_parts, worker_group->getWorkerIDVec(), required_bucket_numbers, replicated); diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp index 09e9ccc9f17..6d3e45d3a6a 100644 --- a/src/CloudServices/CnchWorkerClient.cpp +++ b/src/CloudServices/CnchWorkerClient.cpp @@ -302,8 +302,7 @@ brpc::CallId CnchWorkerClient::preloadDataParts( auto * response = new Protos::PreloadDataPartsResp(); /// adjust the timeout to prevent timeout if there are too many parts to send, const auto & settings = context->getSettingsRef(); - auto send_timeout = std::max(settings.max_execution_time.value.totalMilliseconds() >> 1, settings.brpc_data_parts_timeout_ms.totalMilliseconds()); - cntl->set_timeout_ms(send_timeout); + cntl->set_timeout_ms(settings.preload_send_rpc_max_ms); auto call_id = cntl->call_id(); stub->preloadDataParts(cntl, &request, response, brpc::NewCallback(RPCHelpers::onAsyncCallDone, response, cntl, handler)); diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index d2fc6fb2615..10a71836f65 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -54,6 +56,9 @@ #include #include #include +#include +#include +#include #if USE_RDKAFKA # include @@ -74,11 +79,6 @@ namespace ProfileEvents extern const Event PreloadExecTotalOps; } -namespace ProfileEvents -{ -extern const Event PreloadExecTotalOps; -} - namespace DB { namespace ErrorCodes @@ -614,8 +614,6 @@ void CnchWorkerServiceImpl::preloadDataParts( google::protobuf::Closure * done) { SUBMIT_THREADPOOL({ - SCOPE_EXIT({ProfileEvents::increment(ProfileEvents::PreloadExecTotalOps, 1, Metrics::MetricType::Rate);}); - Stopwatch watch; auto rpc_context = RPCHelpers::createSessionContextForRPC(getContext(), *cntl); StoragePtr storage = createStorageFromQuery(request->create_table_query(), rpc_context); @@ -637,30 +635,40 @@ void CnchWorkerServiceImpl::preloadDataParts( || (!cloud_merge_tree.getSettings()->parts_preload_level && !cloud_merge_tree.getSettings()->enable_preload_parts)) return; - std::unique_ptr pool; - ThreadPool * pool_ptr; + auto preload_level = request->preload_level(); + auto submit_ts = request->submit_ts(); + if (request->sync()) { - pool = std::make_unique(std::min(data_parts.size(), cloud_merge_tree.getSettings()->cnch_parallel_preloading.value)); - pool_ptr = pool.get(); + auto & settings = getContext()->getSettingsRef(); + auto pool = std::make_unique(std::min(data_parts.size(), settings.cnch_parallel_preloading.value)); + for (const auto & part : data_parts) + { + pool->scheduleOrThrowOnError([part, preload_level, submit_ts, storage] { + part->disk_cache_mode = DiskCacheMode::SKIP_DISK_CACHE;// avoid getCheckum & getIndex re-cache + part->preload(preload_level, submit_ts); + }); + } + pool->wait(); + LOG_DEBUG( + log, + "Finish preload tasks in {} ms, level: {}, sync: {}, size: {}", + watch.elapsedMilliseconds(), + preload_level, + sync, + data_parts.size()); } else - pool_ptr = &(IDiskCache::getThreadPool()); - - for (const auto & part : data_parts) { - part->preload(request->preload_level(), *pool_ptr, request->submit_ts()); + ThreadPool * preload_thread_pool = &(IDiskCache::getPreloadPool()); + for (const auto & part : data_parts) + { + preload_thread_pool->scheduleOrThrowOnError([part, preload_level, submit_ts, storage] { + part->disk_cache_mode = DiskCacheMode::SKIP_DISK_CACHE;// avoid getCheckum & getIndex re-cache + part->preload(preload_level, submit_ts); + }); + } } - - if (request->sync()) - pool->wait(); - - LOG_DEBUG( - storage->getLogger(), - "Finish preload tasks in {} ms, level: {}, sync: {}", - watch.elapsedMilliseconds(), - request->preload_level(), - request->sync()); }) } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2f5d6f88ce0..fba93a9e103 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1287,7 +1287,8 @@ enum PreloadLevelSettings : UInt64 M(UInt64, cloud_task_auto_stop_timeout, 60, "We will remove this task when heartbeat can't find this task more than retries_count times.", 0)\ M(Bool, enable_local_disk_cache, 1, "enable global local disk cache", 0) \ M(UInt64, parts_preload_level, 1, "used for global preload(manual alter&table auto), 0=close preload;1=preload meta;2=preload data;3=preload meta&data, Note: for table auto preload, 0 will disable all table preload, > 0 will use table preload setting", 0) \ - M(UInt64, parts_preload_throttler, 0, "used for max preload rpc concurrent count", 0) \ + M(MaxThreads, cnch_parallel_preloading, 0, "Max threads when worker preload parts", 0) \ + M(UInt64, preload_send_rpc_max_ms, 3000, "Max rpc ms when send preload parts reqeust", 0) \ M(DiskCacheMode, disk_cache_mode, DiskCacheMode::AUTO, "Whether to use local disk cache", 0) \ M(Bool, enable_vw_customized_setting, false, "Allow vw customized overwrite profile settings", 0) \ M(Bool, enable_async_execution, false, "Whether to enable async execution", 0) \ diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 67cd7ba27d6..a8937ffa0ac 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -31,8 +31,9 @@ WriteBufferFromFile::WriteBufferFromFile( int flags, mode_t mode, char * existing_memory, - size_t alignment) - : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) + size_t alignment, + ThrottlerPtr throttler_) + : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, throttler_), file_name(file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 8c535e5461f..23fa1a3e900 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -2,6 +2,7 @@ #include +#include #include #include @@ -35,7 +36,8 @@ class WriteBufferFromFile : public WriteBufferFromFileDescriptor int flags = -1, mode_t mode = 0666, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + ThrottlerPtr throttler = nullptr); /// Use pre-opened file descriptor. WriteBufferFromFile( diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index 6b6f2034f6c..baef0522af1 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -68,6 +68,9 @@ void WriteBufferFromFileDescriptor::nextImpl() Stopwatch watch; + if (throttler) + throttler->add(offset()); + size_t bytes_written = 0; while (bytes_written != offset()) { @@ -106,8 +109,9 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, char * existing_memory, - size_t alignment) - : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} + size_t alignment, + ThrottlerPtr throttler_) + : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), throttler(throttler_) {} WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index 1d341852ead..38af396a2b9 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -22,6 +22,7 @@ #pragma once #include +#include namespace DB @@ -33,6 +34,7 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase { protected: int fd; + ThrottlerPtr throttler; void nextImpl() override; @@ -44,7 +46,8 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + ThrottlerPtr throttler = nullptr); /** Could be used before initialization if needed 'fd' was not passed to constructor. * It's not possible to change 'fd' during work. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 81b08cedd52..51491ac32c6 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3141,14 +3141,6 @@ ThrottlerPtr Context::getDiskCacheThrottler() const return shared->disk_cache_throttler; } -ThrottlerPtr Context::tryGetPreloadThrottler() const -{ - callOnce(shared->preload_throttler_initialized, [&] { - shared->preload_throttler = settings.parts_preload_throttler == 0 ? nullptr : std::make_shared(settings.parts_preload_throttler); - }); - return shared->preload_throttler; -} - ThrottlerPtr Context::getReplicatedSendsThrottler() const { callOnce(shared->replicated_sends_throttler_initialized, [&] { @@ -5743,31 +5735,11 @@ std::vector> Context::getAllWorkerResou return shared->named_cnch_sessions->getAllWorkerResources(); } -Context::PartAllocator Context::getPartAllocationAlgo() const +Context::PartAllocator Context::getPartAllocationAlgo(MergeTreeSettingsPtr table_settings) const { - /// we prefer the config setting first - if (getConfigRef().has("part_allocation_algorithm")) - { - LOG_DEBUG( - shared->log, - "Using part allocation algorithm from config: {}.", - getConfigRef().getInt("part_allocation_algorithm")); - switch (getConfigRef().getInt("part_allocation_algorithm")) - { - case 0: - return PartAllocator::JUMP_CONSISTENT_HASH; - case 1: - return PartAllocator::RING_CONSISTENT_HASH; - case 2: - return PartAllocator::STRICT_RING_CONSISTENT_HASH; - case 3: - return PartAllocator::SIMPLE_HASH; - default: - return PartAllocator::JUMP_CONSISTENT_HASH; - } - } + auto algorithm = table_settings->cnch_part_allocation_algorithm >= 0 ? table_settings->cnch_part_allocation_algorithm : settings.cnch_part_allocation_algorithm; + LOG_DEBUG(shared->log, "Send query with cnch_part_allocation_algorithm = {}, system setting = {}, table setting = {}", algorithm, settings.cnch_part_allocation_algorithm, table_settings->cnch_part_allocation_algorithm); - /// if not set, we use the query settings switch (settings.cnch_part_allocation_algorithm) { case 0: @@ -5777,7 +5749,7 @@ Context::PartAllocator Context::getPartAllocationAlgo() const case 2: return PartAllocator::STRICT_RING_CONSISTENT_HASH; case 3: - return PartAllocator::SIMPLE_HASH; + return PartAllocator::DISK_CACHE_STEALING_DEBUG; default: return PartAllocator::JUMP_CONSISTENT_HASH; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 4158bc02b48..706632b6f9b 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -55,6 +55,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) # include @@ -1272,7 +1273,6 @@ class Context : public ContextData, public std::enable_shared_from_this SchedulePool::Type pool_type, SettingFieldUInt64 pool_size, CurrentMetrics::Metric metric, const char * name) const; ThrottlerPtr getDiskCacheThrottler() const; - ThrottlerPtr tryGetPreloadThrottler() const; ThrottlerPtr getReplicatedFetchesThrottler() const; ThrottlerPtr getReplicatedSendsThrottler() const; @@ -1658,10 +1658,10 @@ class Context : public ContextData, public std::enable_shared_from_this JUMP_CONSISTENT_HASH = 0, RING_CONSISTENT_HASH = 1, STRICT_RING_CONSISTENT_HASH = 2, - SIMPLE_HASH = 3,//Note: Now just used for test disk cache stealing so not used for online + DISK_CACHE_STEALING_DEBUG = 3,//Note: Now just used for test disk cache stealing so not used for online }; - PartAllocator getPartAllocationAlgo() const; + PartAllocator getPartAllocationAlgo(MergeTreeSettingsPtr settings) const; /// Consistent hash algorithm for hybrid part allocation enum HybridPartAllocator : int diff --git a/src/Interpreters/InterpreterAlterDiskCacheQuery.cpp b/src/Interpreters/InterpreterAlterDiskCacheQuery.cpp index d8b8b9fb24e..b20a6e6ccec 100644 --- a/src/Interpreters/InterpreterAlterDiskCacheQuery.cpp +++ b/src/Interpreters/InterpreterAlterDiskCacheQuery.cpp @@ -44,7 +44,7 @@ BlockIO InterpreterAlterDiskCacheQuery::execute() if (query.type == ASTAlterDiskCacheQuery::Type::PRELOAD) { - storage->sendPreloadTasks(getContext(), std::move(parts), query.sync, getContext()->getSettings().parts_preload_level); + storage->sendPreloadTasks(getContext(), std::move(parts), query.sync, getContext()->getSettings().parts_preload_level, time(nullptr)); } else if (query.type == ASTAlterDiskCacheQuery::Type::DROP) { diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 55c6c6839b9..94cc51f2b38 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -11,6 +11,7 @@ #include #include +#include "common/types.h" #include #include @@ -55,6 +56,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"rows", std::make_shared()}, {"segments", std::make_shared()}, + {"preload_level", std::make_shared()}, {"size_in_bytes", std::make_shared()}, // On disk /// Merge-specific info @@ -91,6 +93,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(rows); columns[i++]->insert(segments); + columns[i++]->insert(preload_level); columns[i++]->insert(bytes_compressed_on_disk); Array source_part_names_array; @@ -177,7 +180,7 @@ bool PartLog::addNewParts( return true; } -PartLogElement PartLog::createElement(PartLogElement::Type event_type, const IMergeTreeDataPartPtr & part, UInt64 elapsed_ns, const String & exception, UInt64 submit_ts, UInt64 segments) +PartLogElement PartLog::createElement(PartLogElement::Type event_type, const IMergeTreeDataPartPtr & part, UInt64 elapsed_ns, const String & exception, UInt64 submit_ts, UInt64 segments, UInt64 preload_level) { PartLogElement elem; @@ -193,6 +196,7 @@ PartLogElement PartLog::createElement(PartLogElement::Type event_type, const IMe elem.rows = part->rows_count; elem.segments = segments; + elem.preload_level = preload_level; elem.bytes_compressed_on_disk = part->bytes_on_disk; elem.exception = exception; diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 8cc7c98ae3f..55060120751 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -40,6 +40,7 @@ struct PartLogElement /// Size of the part UInt64 rows = 0; UInt64 segments = 0; + UInt64 preload_level = 0; /// Size of files in filesystem UInt64 bytes_compressed_on_disk = 0; @@ -81,7 +82,7 @@ class PartLog : public SystemLog static bool addNewParts(ContextPtr context, const MutableDataPartsVector & parts, UInt64 elapsed_ns, const ExecutionStatus & execution_status = {}); static PartLogElement createElement(PartLogElement::Type event_type, const IMergeTreeDataPartPtr & part, - UInt64 elapsed_ns = 0, const String & exception = "", UInt64 submit_ts = 0, UInt64 segments = 0); + UInt64 elapsed_ns = 0, const String & exception = "", UInt64 submit_ts = 0, UInt64 segments = 0, UInt64 preload_level = 0); }; } diff --git a/src/MergeTreeCommon/TableVersion.cpp b/src/MergeTreeCommon/TableVersion.cpp index 218c2ffea3b..58543ec7525 100644 --- a/src/MergeTreeCommon/TableVersion.cpp +++ b/src/MergeTreeCommon/TableVersion.cpp @@ -128,7 +128,7 @@ void TableVersion::fileterDataByWorkerInfo(const MergeTreeMetaBase & storage, st WorkerGroupHandle mock_wg = WorkerGroupHandleImpl::mockWorkerGroupHandle(worker_id_prefix, worker_info->num_workers, getContext()); // Use the same allocation algorithm as preaload. can work with parts as well as delete bitmap. - auto allocate_res = assignCnchParts(mock_wg, data_vector, getContext()); + auto allocate_res = assignCnchParts(mock_wg, data_vector, getContext(), storage.getSettings()); // only get the allocated data which belongs to current worker worker_hold_data = std::move(allocate_res[worker_id]); diff --git a/src/MergeTreeCommon/assignCnchParts.cpp b/src/MergeTreeCommon/assignCnchParts.cpp index 889dad7f0d9..7262b0e64f0 100644 --- a/src/MergeTreeCommon/assignCnchParts.cpp +++ b/src/MergeTreeCommon/assignCnchParts.cpp @@ -67,20 +67,16 @@ inline void reportStats(Poco::Logger * log, const M & map, const String & name, } /// explicit instantiation for server part and cnch data part. -template ServerAssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const ServerDataPartsVector & parts, const ContextPtr & query_context); -template AssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const MergeTreeDataPartsCNCHVector & parts, const ContextPtr & query_context); -template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataModelPartWrapperVector &, const ContextPtr & query_context); -template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DeleteBitmapMetaPtrVector &, const ContextPtr & query_context); +template ServerAssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const ServerDataPartsVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings); +template AssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const MergeTreeDataPartsCNCHVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings); +template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataModelPartWrapperVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings); +template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DeleteBitmapMetaPtrVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings); template -std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & query_context) +std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings) { static auto * log = &Poco::Logger::get("assignCnchParts"); - Context::PartAllocator part_allocation_algorithm; - if (query_context->getSettingsRef().cnch_part_allocation_algorithm.changed) - part_allocation_algorithm = query_context->getPartAllocationAlgo(); - else - part_allocation_algorithm = worker_group->getContext()->getPartAllocationAlgo(); + Context::PartAllocator part_allocation_algorithm = query_context->getPartAllocationAlgo(settings); switch (part_allocation_algorithm) { @@ -112,17 +108,17 @@ std::unordered_map assignCnchParts(const WorkerGrou reportStats(log, ret, "Strict Consistent Hash", worker_group->getRing().size()); return ret; } - case Context::PartAllocator::SIMPLE_HASH: //Note: Now just used for test disk cache stealing so not used for online + case Context::PartAllocator::DISK_CACHE_STEALING_DEBUG: //Note: Now just used for test disk cache stealing so not used for online { - auto ret = assignCnchPartsWithSimpleHash(worker_group->getWorkerIDVec(), worker_group->getIdHostPortsMap(), parts); - reportStats(log, ret, "Simple Hash", worker_group->getWorkerIDVec().size()); + auto ret = assignCnchPartsWithStealingCache(worker_group->getWorkerIDVec(), worker_group->getIdHostPortsMap(), parts); + reportStats(log, ret, "disk cache stealing debug", worker_group->getWorkerIDVec().size()); return ret; } } } template -std::unordered_map assignCnchPartsWithSimpleHash(WorkerList worker_ids, const std::unordered_map & worker_hosts, const DataPartsCnchVector & parts) +std::unordered_map assignCnchPartsWithStealingCache(WorkerList worker_ids, const std::unordered_map & worker_hosts, const DataPartsCnchVector & parts) { std::unordered_map ret; /// we don't know the order of workers returned from consul so sort then explicitly now diff --git a/src/MergeTreeCommon/assignCnchParts.h b/src/MergeTreeCommon/assignCnchParts.h index 3f410284fb7..dbccb269345 100644 --- a/src/MergeTreeCommon/assignCnchParts.h +++ b/src/MergeTreeCommon/assignCnchParts.h @@ -48,7 +48,7 @@ FilePartsAssignMap assignCnchFileParts(const WorkerGroupHandle & worker_group, c HivePartsAssignMap assignCnchHiveParts(const WorkerGroupHandle & worker_group, const HiveFiles & parts); template -std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & query_context); +std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & context, MergeTreeSettingsPtr settings); /** * splitCnchParts will split server parts into bucketed parts and leftover server parts. diff --git a/src/Storages/DiskCache/DiskCacheLRU.cpp b/src/Storages/DiskCache/DiskCacheLRU.cpp index c5795f2f111..7d4830fc744 100644 --- a/src/Storages/DiskCache/DiskCacheLRU.cpp +++ b/src/Storages/DiskCache/DiskCacheLRU.cpp @@ -223,11 +223,6 @@ void DiskCacheLRU::set(const String& seg_name, ReadBuffer& value, size_t weight_ ProfileEvents::increment(ProfileEvents::DiskCacheSetTotalOps, 1, Metrics::MetricType::Rate, {{"type", (is_preload ? "preload": "query")}}); - if (set_throughput_throttler) - { - set_throughput_throttler->add(weight_hint); - } - auto key = hash(seg_name); auto& shard = containers.shard(key); // Insert cache meta first, if there is a entry already there, skip this insert @@ -307,7 +302,8 @@ size_t DiskCacheLRU::writeSegment(const String& seg_key, ReadBuffer& buffer, Res // Write into temporary file, by default it will truncate this file size_t written_size = 0; { - WriteBufferFromFile to(fs::path(disk->getPath()) / temp_cache_rel_path); + WriteBufferFromFile to( + fs::path(disk->getPath()) / temp_cache_rel_path, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0, set_throughput_throttler); copyData(buffer, to, reservation.get()); to.finalize(); written_size = to.count(); diff --git a/src/Storages/DiskCache/IDiskCache.cpp b/src/Storages/DiskCache/IDiskCache.cpp index 2ff1a9c8c53..776c15adada 100644 --- a/src/Storages/DiskCache/IDiskCache.cpp +++ b/src/Storages/DiskCache/IDiskCache.cpp @@ -40,7 +40,7 @@ namespace ErrorCodes std::unique_ptr IDiskCache::local_disk_cache_thread_pool; std::unique_ptr IDiskCache::local_disk_cache_evict_thread_pool; - +std::unique_ptr IDiskCache::local_disk_cache_preload_thread_pool; void IDiskCache::init(const Context & global_context) { @@ -62,6 +62,11 @@ void IDiskCache::init(const Context & global_context) settings.local_disk_cache_evict_thread_pool_size, settings.local_disk_cache_evict_thread_pool_size, settings.local_disk_cache_evict_thread_pool_size * 100); + + local_disk_cache_preload_thread_pool = std::make_unique( + settings.cnch_parallel_preloading, + settings.cnch_parallel_preloading, + settings.cnch_parallel_preloading * 100); } void IDiskCache::close() @@ -70,6 +75,8 @@ void IDiskCache::close() local_disk_cache_thread_pool.reset(); if (local_disk_cache_evict_thread_pool) local_disk_cache_evict_thread_pool.reset(); + if (local_disk_cache_preload_thread_pool) + local_disk_cache_preload_thread_pool.reset(); } ThreadPool & IDiskCache::getThreadPool() @@ -86,6 +93,13 @@ ThreadPool & IDiskCache::getEvictPool() return *local_disk_cache_evict_thread_pool; } +ThreadPool & IDiskCache::getPreloadPool() +{ + if (!local_disk_cache_preload_thread_pool) + throw Exception("Uninitialized disk cache thread pool", ErrorCodes::CANNOT_SCHEDULE_TASK); + return *local_disk_cache_preload_thread_pool; +} + IDiskCache::IDiskCache( const String & name_, diff --git a/src/Storages/DiskCache/IDiskCache.h b/src/Storages/DiskCache/IDiskCache.h index 28583e59ddb..3756560dc84 100644 --- a/src/Storages/DiskCache/IDiskCache.h +++ b/src/Storages/DiskCache/IDiskCache.h @@ -48,6 +48,7 @@ class IDiskCache : public std::enable_shared_from_this static void close(); static ThreadPool & getThreadPool(); static ThreadPool & getEvictPool(); + static ThreadPool & getPreloadPool(); enum class DataType { @@ -151,6 +152,7 @@ class IDiskCache : public std::enable_shared_from_this private: bool scheduleCacheTask(const std::function & task); + static std::unique_ptr local_disk_cache_preload_thread_pool; static std::unique_ptr local_disk_cache_thread_pool; static std::unique_ptr local_disk_cache_evict_thread_pool; }; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 4d927eaf11d..e90344ecaf9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -914,13 +914,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadUUID(); loadColumns(require_columns_checksums); - { - std::lock_guard lock(checksums_mutex); - checksums_ptr = loadChecksums(require_columns_checksums); - } + getChecksums(); loadIndexGranularity(); calculateColumnsSizesOnDisk(); - loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` + getIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadPartitionAndMinMaxIndex(); if (!parent_part) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index b57f9d9f5c0..609cb094bfe 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -1217,8 +1217,9 @@ void MergeTreeDataPartCNCH::fillProjectionNamesFromChecksums(const MergeTreeData } } -void MergeTreeDataPartCNCH::preload(UInt64 preload_level, ThreadPool & pool, UInt64 submit_ts) const +void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) const { + Stopwatch watch; String full_path = getFullPath(); if (isPartial()) { @@ -1232,36 +1233,35 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, ThreadPool & pool, UIn LOG_WARNING(storage.log, "Can't find {} when preload level: {} before caching", full_path + DATA_FILE, preload_level); return; } - - LOG_TRACE(storage.log, "Start preload part: {}", name); - - Stopwatch watch; - - auto cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree); - auto cache_strategy = cache->getStrategy(); + auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree); + auto cache_strategy = disk_cache->getStrategy(); MarkRanges all_mark_ranges{MarkRange(0, getMarksCount())}; IDiskCacheSegmentsVector segments; MarkCachePtr mark_cache_holder = storage.getContext()->getMarkCache(); - auto add_segments = [&, this, strategy = cache_strategy, disk_cache = cache]( - const NameAndTypePair & real_column) { - ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - + auto add_segments = [&, this](const NameAndTypePair & real_column) { + ISerialization::StreamCallback stream_callback = [&](const ISerialization::SubstreamPath & substream_path) { String stream_name = ISerialization::getFileNameForStream(real_column, substream_path); String file_name = stream_name; ChecksumsPtr checksums = getChecksums(); if (!checksums->files.count(file_name + DATA_FILE_EXTENSION)) { - LOG_WARNING(storage.log, "Can't find {} in checksum info and skip cache it: column = {}, stream = {}", real_column.name, stream_name, file_name + DATA_FILE_EXTENSION); + LOG_WARNING( + storage.log, + "Can't find {} in checksum info and skip cache it: column = {}, stream = {}", + real_column.name, + stream_name, + file_name + DATA_FILE_EXTENSION); return; } String mark_file_name = index_granularity_info.getMarksFilePath(stream_name); String data_file_name = stream_name + DATA_FILE_EXTENSION; - IMergeTreeDataPartPtr source_data_part = isProjectionPart() ? shared_from_this() : getMvccDataPart(stream_name + DATA_FILE_EXTENSION); - auto seg = strategy->transferRangesToSegments( + IMergeTreeDataPartPtr source_data_part + = isProjectionPart() ? shared_from_this() : getMvccDataPart(stream_name + DATA_FILE_EXTENSION); + auto seg = cache_strategy->transferRangesToSegments( all_mark_ranges, source_data_part, PartFileDiskCacheSegment::FileOffsetAndSize{getFileOffsetOrZero(mark_file_name), getFileSizeOrZero(mark_file_name)}, @@ -1276,7 +1276,7 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, ThreadPool & pool, UIn }; auto serialization = getSerializationForColumn(real_column); - serialization->enumerateStreams(callback); + serialization->enumerateStreams(stream_callback); }; for (const NameAndTypePair & column : *columns_ptr) @@ -1312,75 +1312,72 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, ThreadPool & pool, UIn segments.emplace_back(std::make_shared(shared_from_this(), preload_level)); } - std::function callback; + String last_exception{}; + int real_cache_segments_count = 0; - if (auto part_log = storage.getContext()->getPartLog(storage.getDatabaseName())) + auto meta_disk_cache = disk_cache->getMetaCache(); + auto data_disk_cache = disk_cache->getDataCache(); + for (const auto & segment : segments) { - callback = [w = watch, part_log, part = shared_from_this(), submit_ts](const String & exception, const int & segments_count) { - part_log->add(PartLog::createElement(PartLogElement::PRELOAD_PART, part, w.elapsedNanoseconds(), exception, submit_ts, segments_count)); - }; - } - - pool.scheduleOrThrowOnError([this, part_path, full_path, level = preload_level, segments = std::move(segments), cb = std::move(callback), disk_cache = cache] { - String last_exception{}; - int real_cache_segments_count = 0; - - auto meta_disk_cache = disk_cache->getMetaCache(); - auto data_disk_cache = disk_cache->getDataCache(); - for (const auto & segment : segments) + try { - try + String mark_key = segment->getMarkName(); + String seg_key = segment->getSegmentName(); + if (!mark_key.empty()) // means this is PartFileDiskCacheSegment { - String mark_key = segment->getMarkName(); - String seg_key = segment->getSegmentName(); - if (!mark_key.empty()) // means this is PartFileDiskCacheSegment + if (preload_level == PreloadLevelSettings::MetaPreload) { - if (level == PreloadLevelSettings::MetaPreload) - { - if (meta_disk_cache->get(mark_key).second.empty()) - { - segment->cacheToDisk(*meta_disk_cache); - real_cache_segments_count++; - } - } - else if (level == PreloadLevelSettings::DataPreload) + if (meta_disk_cache->get(mark_key).second.empty()) { - if (data_disk_cache->get(seg_key).second.empty()) - { - segment->cacheToDisk(*data_disk_cache); - real_cache_segments_count++; - } + segment->cacheToDisk(*meta_disk_cache); + real_cache_segments_count++; } - else + } + else if (preload_level == PreloadLevelSettings::DataPreload) + { + if (data_disk_cache->get(seg_key).second.empty()) { - if (data_disk_cache->get(seg_key).second.empty() || meta_disk_cache->get(mark_key).second.empty()) - { - segment->cacheToDisk(*disk_cache); - real_cache_segments_count++; - } + segment->cacheToDisk(*data_disk_cache); + real_cache_segments_count++; } } - else // means this is ChecksumsDiskCacheSegment or PrimaryIndexDiskCacheSegment + else { - if (meta_disk_cache->get(seg_key).second.empty()) + if (data_disk_cache->get(seg_key).second.empty() || meta_disk_cache->get(mark_key).second.empty()) { - segment->cacheToDisk(*meta_disk_cache); + segment->cacheToDisk(*disk_cache); real_cache_segments_count++; } } } - catch (const Exception & e) + else // means this is ChecksumsDiskCacheSegment or PrimaryIndexDiskCacheSegment { - last_exception = e.message(); - /// no exception thrown + if (meta_disk_cache->get(seg_key).second.empty()) + { + segment->cacheToDisk(*meta_disk_cache); + real_cache_segments_count++; + } } } + catch (const Exception & e) + { + last_exception = e.message(); + /// no exception thrown + } + } - if (cb) - cb(last_exception, real_cache_segments_count); - - LOG_TRACE(storage.log, "Preloaded part: {}, marks_count: {}, segments_count: {}, cached_count: {} ", name, getMarksCount(), segments.size(), real_cache_segments_count); - }); + auto part_log = storage.getContext()->getPartLog(storage.getDatabaseName()); + part_log->add(PartLog::createElement( + PartLogElement::PRELOAD_PART, shared_from_this(), watch.elapsedNanoseconds(), last_exception, submit_ts, real_cache_segments_count, preload_level)); + + LOG_TRACE( + storage.log, + "Preloaded part: {}, marks_count: {}, segments_count: {}, cached_count: {}, time_ns: {}", + name, + getMarksCount(), + segments.size(), + real_cache_segments_count, + watch.elapsedNanoseconds()); } void MergeTreeDataPartCNCH::dropDiskCache(ThreadPool & pool, bool drop_vw_disk_cache) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.h b/src/Storages/MergeTree/MergeTreeDataPartCNCH.h index 5ba5bf46480..1389d8d1004 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.h @@ -107,7 +107,7 @@ class MergeTreeDataPartCNCH : public IMergeTreeDataPart /// it's a no-op because in CNCH, projection parts are uploaded to parent part's data file virtual void projectionRemove(const String &, bool) const override { } - void preload(UInt64 preload_level, ThreadPool & pool, UInt64 submit_ts) const; + void preload(UInt64 preload_level, UInt64 submit_ts) const; void dropDiskCache(ThreadPool & pool, bool drop_vw_disk_cache = false) const; void setColumnsPtr(const NamesAndTypesListPtr & new_columns_ptr) override {columns_ptr = new_columns_ptr;} diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a112a82db2b..c2e24ecb4c1 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -510,7 +510,6 @@ enum StealingCacheMode : UInt64 M(UInt64, parts_preload_level, 0, "0=close preload;1=preload meta;2=preload data;3=preload meta&data", 0) \ M(Bool, enable_parts_sync_preload, 0, "Enable sync preload parts", 0) \ M(Bool, enable_gc_evict_disk_cache, false, "Enable gc evict disk cache", 0) \ - M(MaxThreads, cnch_parallel_preloading, 0, "Max threads when worker preload parts", 0) \ M(UInt64, disk_cache_stealing_mode, 0, "Read/write remote vw local disk cache if cur local disk cache empty, 0: close; 1: read 2: write 3: read&write", 0) \ \ /* Renamed settings - cannot be ignored */\ @@ -567,6 +566,7 @@ enum StealingCacheMode : UInt64 /** JSON related settings start*/ \ M(UInt64, json_subcolumns_threshold, 1000, "Max number of json sub columns", 0) \ M(UInt64, json_partial_schema_assemble_batch_size, 100, "Batch size to assemble dynamic object column schema", 0) \ + M(Int64, cnch_part_allocation_algorithm, -1, "Part allocation algorithm, -1: disable table setting and use query setting, 0: jump consistent hashing, 1: bounded hash ring consistent hashing, 2: strict ring consistent hashing.", 0) \ /** JSON related settings end*/ \ \ /// Settings that should not change after the creation of a table. diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index dc0515f95f8..09ede2b049f 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -1077,7 +1077,7 @@ CheckResults StorageCnchMergeTree::checkDataCommon(const ASTPtr & query, Context std::mutex mutex; CheckResults results; - auto assignment = assignCnchParts(worker_group, parts, local_context); + auto assignment = assignCnchParts(worker_group, parts, local_context, getSettings()); ThreadPool allocate_pool(std::min(local_context->getSettingsRef().parts_preallocate_pool_size, num_of_workers)); @@ -2255,8 +2255,8 @@ void StorageCnchMergeTree::checkAlterSettings(const AlterCommands & commands) co "parts_preload_level", "cnch_parallel_prefetching", "enable_prefetch_checksums", - "cnch_parallel_preloading", "disk_cache_stealing_mode", + "cnch_part_allocation_algorithm", "enable_addition_bg_task", "max_addition_bg_task_num", From 31d6d0b427a67e358d0edec854a22159f1db3427 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:31:53 +0000 Subject: [PATCH 035/292] Merge 'fix/fix_makeSetsForIndex_sub_columns_cnch_2.2' into 'cnch-2.2' fix(optimizer@m-4548778590): fix makeSetsForIndex don't support subcolumns cnch-2.2 See merge request: !22682 --- src/QueryPlan/TableScanStep.cpp | 6 ++-- ...032_mark_set_for_index_optimizer.reference | 2 ++ .../53032_mark_set_for_index_optimizer.sql | 31 +++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.reference create mode 100644 tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.sql diff --git a/src/QueryPlan/TableScanStep.cpp b/src/QueryPlan/TableScanStep.cpp index 606c6c23be3..187cedd2926 100644 --- a/src/QueryPlan/TableScanStep.cpp +++ b/src/QueryPlan/TableScanStep.cpp @@ -758,10 +758,12 @@ void TableScanStep::makeSetsForIndex(const ASTPtr & node, ContextPtr context, Pr } else { - auto input = storage->getInMemoryMetadataPtr()->getColumns().getAll(); + Block header = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context) + ->getSampleBlockForColumns(getRequiredColumns()); + Names output; output.emplace_back(left_in_operand->getColumnName()); - auto temp_actions = createExpressionActions(context, input, output, left_in_operand); + auto temp_actions = createExpressionActions(context, header.getNamesAndTypesList(), output, left_in_operand); if (temp_actions->tryFindInOutputs(left_in_operand->getColumnName())) { makeExplicitSet(func, *temp_actions, true, context, size_limits_for_set, prepared_sets); diff --git a/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.reference b/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.sql b/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.sql new file mode 100644 index 00000000000..7803242b9aa --- /dev/null +++ b/tests/queries/4_cnch_stateless/53032_mark_set_for_index_optimizer.sql @@ -0,0 +1,31 @@ +create table 53032_mark_set_for_index_optimizer ( + id UInt32, name Nullable(String), dim_data Map(String, String), p_date Date +) ENGINE=CnchMergeTree +order by p_date; + +SELECT count() +FROM 53032_mark_set_for_index_optimizer +WHERE ((multiIf(isNull(`name`), '1', dim_data{'a'} LIKE '1%', '1', '1'), p_date) IN [('1', 1982)]); + +-- SELECT count() +-- FROM 53032_mark_set_for_index_optimizer +-- WHERE ((multiIf(isNull(`name`), '1', `__dim_data__'a'` LIKE '1%', '1', '1'), p_date) IN [('1', 1982)]); + +SELECT count() +FROM 53032_mark_set_for_index_optimizer +WHERE ((multiIf(isNull(name), '1', dim_data{'a'} LIKE '1%', '1', '1'), p_date) IN ( + SELECT + multiIf(isNull(name), '1', dim_data{'a'} LIKE '1%', '1', '1'), + p_date + FROM + ( + SELECT + name, + dim_data, + p_date + FROM 53032_mark_set_for_index_optimizer + WHERE p_date = '2024-06-05' + LIMIT 1 + ) +)) +LIMIT 1; From b0403f7fafca77440d67ebf65cada3c682dbc71e Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:32:54 +0000 Subject: [PATCH 036/292] Merge 'cherry-pick-f8a20558' into 'cnch-2.2' feat(clickhousech@m-4209342798): [cp] catalog support large KV See merge request: !22744 # Conflicts: # ci_scripts/config/users.xml --- ci_scripts/config/users.xml | 1 + .../meta-inspector/MetastoreInspector.cpp | 9 + src/Catalog/Catalog.cpp | 18 +- src/Catalog/Catalog.h | 6 + src/Catalog/CatalogBackgroundTask.cpp | 118 +++++++++++++ src/Catalog/CatalogBackgroundTask.h | 41 +++++ src/Catalog/IMetastore.h | 5 + src/Catalog/LargeKVHandler.cpp | 167 ++++++++++++++++++ src/Catalog/LargeKVHandler.h | 53 ++++++ src/Catalog/MetastoreByteKVImpl.h | 3 + src/Catalog/MetastoreFDBImpl.h | 5 +- src/Catalog/MetastoreProxy.cpp | 112 ++++++++++-- src/Catalog/MetastoreProxy.h | 28 ++- src/CloudServices/CnchServerServiceImpl.cpp | 3 +- src/Interpreters/Context.cpp | 2 + src/Protos/data_models.proto | 7 + src/Storages/StorageCnchMergeTree.cpp | 2 +- src/Transaction/Actions/DDLAlterAction.cpp | 2 +- src/Transaction/ICnchTransaction.h | 7 + .../13005_test_create_big_table.reference | 2 + .../13005_test_create_big_table.sh | 33 ++++ 21 files changed, 599 insertions(+), 25 deletions(-) create mode 100644 src/Catalog/CatalogBackgroundTask.cpp create mode 100644 src/Catalog/CatalogBackgroundTask.h create mode 100644 src/Catalog/LargeKVHandler.cpp create mode 100644 src/Catalog/LargeKVHandler.h create mode 100644 tests/queries/4_cnch_stateless/13005_test_create_big_table.reference create mode 100755 tests/queries/4_cnch_stateless/13005_test_create_big_table.sh diff --git a/ci_scripts/config/users.xml b/ci_scripts/config/users.xml index a7859c3d20b..4aa4a732bc3 100644 --- a/ci_scripts/config/users.xml +++ b/ci_scripts/config/users.xml @@ -21,6 +21,7 @@ 6 8589934592 50000 + 0 diff --git a/programs/meta-inspector/MetastoreInspector.cpp b/programs/meta-inspector/MetastoreInspector.cpp index dbc300525c1..4106a0b914a 100644 --- a/programs/meta-inspector/MetastoreInspector.cpp +++ b/programs/meta-inspector/MetastoreInspector.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -362,6 +363,14 @@ class MetastoreInspector : public Poco::Util::Application { std::string value; metastore_ptr->get(full_key, value); + // try parse large KV before really dump metadata. + DB::Protos::DataModelLargeKVMeta large_kv_model; + if (Catalog::tryParseLargeKVMetaModel(value, large_kv_model)) + { + std::cout << "Large KV base value: \n" << large_kv_model.DebugString() << std::endl; + tryGetLargeValue(metastore_ptr, name_space, full_key, value); + std::cout << "Original value : " << std::endl; + } dumpMetadata(cmd.key, value); break; } diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index c8e4cd2cbad..1c69ff08770 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include #include @@ -565,6 +567,9 @@ namespace Catalog topology_key = name_space; else topology_key = name_space + "_" + config.topology_key; + + // Add background task to do some GC job for Catalog. + bg_task = std::make_shared(Context::createCopy(context.shared_from_this()), meta_proxy->getMetastore(), name_space); }, ProfileEvents::CatalogConstructorSuccess, ProfileEvents::CatalogConstructorFailed); @@ -965,7 +970,11 @@ namespace Catalog return; } - auto storage = tryGetTableByUUID(context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS()); + StoragePtr storage; + if (auto query_context = CurrentThread::getGroup()->query_context.lock()) + storage = tryGetTableByUUID(*query_context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS()); + else + storage = tryGetTableByUUID(context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS()); if (auto pcm = context.getPartCacheManager(); pcm && storage) { @@ -6241,7 +6250,12 @@ namespace Catalog for (const String & dependency : dependencies) batch_write.AddDelete(MetastoreProxy::viewDependencyKey(name_space, dependency, table_id.uuid())); - batch_write.AddPut(SinglePutRequest(MetastoreProxy::tableStoreKey(name_space, table_id.uuid(), ts.toUInt64()), table.SerializeAsString())); + addPotentialLargeKVToBatchwrite( + meta_proxy->getMetastore(), + batch_write, + name_space, + MetastoreProxy::tableStoreKey(name_space, table_id.uuid(), ts.toUInt64()), + table.SerializeAsString()); // use database name and table name in table_id is required because it may different with that in table data model. batch_write.AddPut(SinglePutRequest( MetastoreProxy::tableTrashKey(name_space, table_id.database(), table_id.name(), ts.toUInt64()), table_id.SerializeAsString())); diff --git a/src/Catalog/Catalog.h b/src/Catalog/Catalog.h index ca34604e772..844ef3540f7 100644 --- a/src/Catalog/Catalog.h +++ b/src/Catalog/Catalog.h @@ -82,6 +82,8 @@ enum class VisibilityLevel All }; +class CatalogBackgroundTask; + class Catalog { public: @@ -898,6 +900,8 @@ class Catalog void commitCheckpointVersion(const UUID & uuid, std::shared_ptr checkpoint_version); void cleanTableVersions(const UUID & uuid, std::vector> versions_to_clean); + void shutDown() {bg_task.reset();} + private: Poco::Logger * log = &Poco::Logger::get("Catalog"); Context & context; @@ -909,6 +913,8 @@ class Catalog std::mutex all_storage_nhut_mutex; CatalogSettings settings; + std::shared_ptr bg_task; + std::shared_ptr tryGetDatabaseFromMetastore(const String & database, const UInt64 & ts); std::shared_ptr tryGetTableFromMetastore(const String & table_uuid, const UInt64 & ts, bool with_prev_versions = false, bool with_deleted = false); diff --git a/src/Catalog/CatalogBackgroundTask.cpp b/src/Catalog/CatalogBackgroundTask.cpp new file mode 100644 index 00000000000..912f5811f4f --- /dev/null +++ b/src/Catalog/CatalogBackgroundTask.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace Catalog +{ + +CatalogBackgroundTask::CatalogBackgroundTask( + const ContextPtr & context_, + const std::shared_ptr & metastore_, + const String & name_space_) + : context(context_), + metastore(metastore_), + name_space(name_space_) +{ + task_holder = context->getSchedulePool().createTask( + "CatalogBGTask", + [this](){ + execute(); + } + ); + + task_holder->activate(); + // wait for server startup + task_holder->scheduleAfter(30*1000); +} + +CatalogBackgroundTask::~CatalogBackgroundTask() +{ + try + { + task_holder->deactivate(); + } + catch (...) + { + tryLogCurrentException(log); + } +} + +void CatalogBackgroundTask::execute() +{ + // only server can perform catalog bg task + if (context->getServerType() != ServerType::cnch_server) + return; + + LOG_DEBUG(log, "Try execute catalog bg task."); + try + { + cleanStaleLargeKV(); + } + catch (...) + { + tryLogCurrentException(log, "Exception happens while executing catalog bg task."); + } + + // execute every 1 hour. + task_holder->scheduleAfter(60*60*1000); +} + +void CatalogBackgroundTask::cleanStaleLargeKV() +{ + // only leader can execute clean job + if (!context->getCnchServerManager()->isLeader()) + return; + + // scan large kv records + std::unordered_map uuid_to_key; + String large_kv_reference_prefix = MetastoreProxy::largeKVReferencePrefix(name_space); + auto it = metastore->getByPrefix(large_kv_reference_prefix); + + while (it->next()) + { + String uuid = it->key().substr(large_kv_reference_prefix.size()); + uuid_to_key.emplace(uuid, it->value()); + } + + // check for each large KV if still been referenced by stored key + for (const auto & [uuid, key] : uuid_to_key) + { + String value; + metastore->get(key, value); + if (!value.empty()) + { + Protos::DataModelLargeKVMeta large_kv_model; + if (tryParseLargeKVMetaModel(value, large_kv_model) && large_kv_model.uuid() == uuid) + continue; + } + + // remove large KV because it is not been referenced by original key + BatchCommitRequest batch_write; + BatchCommitResponse resp; + + auto large_kv_it = metastore->getByPrefix(MetastoreProxy::largeKVDataPrefix(name_space, uuid)); + while (large_kv_it->next()) + batch_write.AddDelete(large_kv_it->key()); + + batch_write.AddDelete(MetastoreProxy::largeKVReferenceKey(name_space, uuid)); + + try + { + metastore->batchWrite(batch_write, resp); + LOG_DEBUG(log, "Removed large KV(uuid: {}) from metastore.", uuid); + } + catch (...) + { + tryLogCurrentException(log, "Error occurs while removing large kv."); + } + } +} + +} + +} diff --git a/src/Catalog/CatalogBackgroundTask.h b/src/Catalog/CatalogBackgroundTask.h new file mode 100644 index 00000000000..d252cf040ec --- /dev/null +++ b/src/Catalog/CatalogBackgroundTask.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +namespace Catalog +{ + +class CatalogBackgroundTask +{ + +public: + CatalogBackgroundTask( + const ContextPtr & context_, + const std::shared_ptr & metastore_, + const String & name_space_); + + ~CatalogBackgroundTask(); + + void execute(); + +private: + + void cleanStaleLargeKV(); + + Poco::Logger * log = &Poco::Logger::get("CatalogBGTask"); + + ContextPtr context; + std::shared_ptr metastore; + String name_space; + + BackgroundSchedulePool::TaskHolder task_holder; +}; + +} + +} diff --git a/src/Catalog/IMetastore.h b/src/Catalog/IMetastore.h index 1496d76f252..e9dbe959b48 100644 --- a/src/Catalog/IMetastore.h +++ b/src/Catalog/IMetastore.h @@ -117,6 +117,11 @@ class IMetaStore * get limitations of the kv store */ virtual uint32_t getMaxBatchSize() = 0; + + /*** + * get limitation single a KV size + */ + virtual uint32_t getMaxKVSize() = 0; }; } diff --git a/src/Catalog/LargeKVHandler.cpp b/src/Catalog/LargeKVHandler.cpp new file mode 100644 index 00000000000..1624e123a50 --- /dev/null +++ b/src/Catalog/LargeKVHandler.cpp @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CORRUPTED_DATA; +} + +namespace Catalog +{ + +// Using SHA-1 value of the KV as its UUID so that we can perform CAS based on it. +String getUUIDForLargeKV(const String & key, const String & value) +{ + Poco::SHA1Engine engine; + engine.update(key.data(), key.size()); + engine.update(value.data(), value.size()); + const std::vector & sha1_value = engine.digest(); + String hexed_hash; + hexed_hash.resize(sha1_value.size() * 2); + boost::algorithm::hex(sha1_value.begin(), sha1_value.end(), hexed_hash.data()); + return hexed_hash; +} + +bool tryParseLargeKVMetaModel(const String & serialized, Protos::DataModelLargeKVMeta & model) +{ + if (serialized.compare(0, 4, MAGIC_NUMBER) == 0) + return model.ParseFromArray(serialized.c_str() + 4, serialized.size()-4); + + return false; +} + +void tryGetLargeValue(const std::shared_ptr & metastore, const String & name_space, const String & key, String & value) +{ + Protos::DataModelLargeKVMeta large_kv_model; + + if (!tryParseLargeKVMetaModel(value, large_kv_model)) + return; + + String kv_id = large_kv_model.uuid(); + UInt32 subkv_number = large_kv_model.subkv_number(); + + String resolved; + + if (large_kv_model.has_value_size()) + resolved.reserve(large_kv_model.value_size()); + + if (subkv_number < 10) + { + std::vector request_keys(subkv_number); + for (size_t i=0; imultiGet(request_keys); + for (const auto & [subvalue, _] : sub_values) + resolved += subvalue; + } + else + { + auto it = metastore->getByPrefix(MetastoreProxy::largeKVDataPrefix(name_space, kv_id)); + while (it->next()) + resolved += it->value(); + } + + //check kv uuid(KV hash) to verity the data integrity + if (getUUIDForLargeKV(key, resolved) != kv_id) + throw Exception(fmt::format("Cannot resolve value of big KV. Data may be corrupted. Origin value size : {}, resolved size : {}" + , large_kv_model.value_size(), resolved.size()), ErrorCodes::CORRUPTED_DATA); + + value.swap(resolved); +} + +LargeKVWrapperPtr tryGetLargeKVWrapper( + const std::shared_ptr & metastore, + const String & name_space, + const String & key, + const String & value, + bool if_not_exists, + const String & expected) +{ + const size_t max_allowed_kv_size = metastore->getMaxKVSize(); + size_t value_size = value.size(); + + if (value_size > max_allowed_kv_size) + { + String large_kv_id = getUUIDForLargeKV(key, value); + + std::vector puts; + UInt64 sub_key_index = 0; + // split serialized data to make substrings match the KV size limitation + for (size_t i=0; i max_allowed_kv_size) + { + Protos::DataModelLargeKVMeta expected_large_kv_model; + expected_large_kv_model.set_uuid(getUUIDForLargeKV(key, expected)); + expected_large_kv_model.set_subkv_number(1 + ((expected.size() - 1) / max_allowed_kv_size)); + expected_large_kv_model.set_value_size(expected.size()); + + base_req.expected_value = MAGIC_NUMBER + expected_large_kv_model.SerializeAsString(); + } + else + base_req.expected_value = expected; + } + + LargeKVWrapperPtr wrapper = std::make_shared(std::move(base_req)); + wrapper->sub_requests.swap(puts); + + return wrapper; + } + else + { + SinglePutRequest base_req(key, value); + base_req.if_not_exists = if_not_exists; + if (!expected.empty()) + base_req.expected_value = expected; + LargeKVWrapperPtr wrapper = std::make_shared(std::move(base_req)); + return wrapper; + } +} + +void addPotentialLargeKVToBatchwrite( + const std::shared_ptr & metastore, + BatchCommitRequest & batch_request, + const String & name_space, + const String & key, + const String & value, + bool if_not_eixts, + const String & expected) +{ + LargeKVWrapperPtr largekv_wrapper = tryGetLargeKVWrapper(metastore, name_space, key, value, if_not_eixts, expected); + + for (auto & sub_req : largekv_wrapper->sub_requests) + batch_request.AddPut(sub_req); + + batch_request.AddPut(largekv_wrapper->base_request); +} + +} + +} diff --git a/src/Catalog/LargeKVHandler.h b/src/Catalog/LargeKVHandler.h new file mode 100644 index 00000000000..5d9f0397482 --- /dev/null +++ b/src/Catalog/LargeKVHandler.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +namespace Catalog +{ + +static const char * MAGIC_NUMBER = "LGKV"; + +struct LargeKVWrapper +{ + LargeKVWrapper(SinglePutRequest && base) + : base_request(std::move(base)) + { + } + + SinglePutRequest base_request; + std::vector sub_requests; + + bool isLargeKV() { return sub_requests.size() > 0; } +}; + +using LargeKVWrapperPtr = std::shared_ptr; + +LargeKVWrapperPtr tryGetLargeKVWrapper( + const std::shared_ptr & metastore, + const String & name_space, + const String & key, + const String & value, + bool if_not_exists = false, + const String & expected = ""); + + +bool tryParseLargeKVMetaModel(const String & serialized, Protos::DataModelLargeKVMeta & model); + +void tryGetLargeValue(const std::shared_ptr & metastore, const String & name_space, const String & key, String & value); + +void addPotentialLargeKVToBatchwrite( + const std::shared_ptr & metastore, + BatchCommitRequest & batch_request, + const String & name_space, + const String & key, + const String & value, + bool if_not_eixts = false, + const String & expected = ""); +} + +} diff --git a/src/Catalog/MetastoreByteKVImpl.h b/src/Catalog/MetastoreByteKVImpl.h index b58b858c77d..c3426193cd8 100644 --- a/src/Catalog/MetastoreByteKVImpl.h +++ b/src/Catalog/MetastoreByteKVImpl.h @@ -122,6 +122,9 @@ class MetastoreByteKVImpl : public IMetaStore // leave some margin uint32_t getMaxBatchSize() final { return MAX_BYTEKV_BATCH_SIZE - 1000; } + // leave some margin + uint32_t getMaxKVSize() final { return MAX_BYTEKV_KV_SIZE - 200; } + public: std::shared_ptr client; diff --git a/src/Catalog/MetastoreFDBImpl.h b/src/Catalog/MetastoreFDBImpl.h index 5465dbb43b0..9b8d01b17b8 100644 --- a/src/Catalog/MetastoreFDBImpl.h +++ b/src/Catalog/MetastoreFDBImpl.h @@ -33,7 +33,7 @@ namespace Catalog class MetastoreFDBImpl : public IMetaStore { // Limitations of FDB (in bytes) -#define MAX_FDB_KV_SIZE 10000 +#define MAX_FDB_KV_SIZE 100000 //Hard limit.Keys cannot exceed 10,000 bytes in size. Values cannot exceed 100,000 bytes in size #define MAX_FDB_TRANSACTION_SIZE 10000000 public: @@ -105,6 +105,9 @@ class MetastoreFDBImpl : public IMetaStore // leave some margin uint32_t getMaxBatchSize() final { return MAX_FDB_TRANSACTION_SIZE - 1000; } + // leave some margin + uint32_t getMaxKVSize() final { return MAX_FDB_KV_SIZE - 200; } + private: /// convert metastore specific error code to Clickhouse error code for processing convenience in upper layer. static int toCommonErrorCode(const fdb_error_t & error_t); diff --git a/src/Catalog/MetastoreProxy.cpp b/src/Catalog/MetastoreProxy.cpp index 4fa0be0559e..015f33c000f 100644 --- a/src/Catalog/MetastoreProxy.cpp +++ b/src/Catalog/MetastoreProxy.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -343,8 +344,16 @@ void MetastoreProxy::createTable(const String & name_space, const UUID & db_uuid BatchCommitRequest batch_write; batch_write.AddPut(SinglePutRequest(nonHostUpdateKey(name_space, uuid), "0", true)); - // insert table meta - batch_write.AddPut(SinglePutRequest(tableStoreKey(name_space, uuid, table_data.commit_time()), serialized_meta, true)); + + // insert table meta. Handle by largeKVHandler in case the table meta exceeds KV size limitation + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + tableStoreKey(name_space, uuid, table_data.commit_time()), + serialized_meta, + true/*if_not_exists*/); + /// add dependency mapping if need for (const String & dependency : dependencies) batch_write.AddPut(SinglePutRequest(viewDependencyKey(name_space, dependency, uuid), uuid)); @@ -420,14 +429,34 @@ void MetastoreProxy::dropUDF(const String & name_space, const String &resolved_n void MetastoreProxy::updateTable(const String & name_space, const String & table_uuid, const String & table_info_new, const UInt64 & ts) { - metastore_ptr->put(tableStoreKey(name_space, table_uuid, ts), table_info_new); + if (table_info_new.size() > metastore_ptr->getMaxKVSize()) + { + BatchCommitRequest batch_write; + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + tableStoreKey(name_space, table_uuid, ts), + table_info_new); + BatchCommitResponse resp; + metastore_ptr->batchWrite(batch_write, resp); + } + else + metastore_ptr->put(tableStoreKey(name_space, table_uuid, ts), table_info_new); } void MetastoreProxy::updateTableWithID(const String & name_space, const Protos::TableIdentifier & table_id, const DB::Protos::DataModelTable & table_data) { BatchCommitRequest batch_write; batch_write.AddPut(SinglePutRequest(tableUUIDMappingKey(name_space, table_id.database(), table_id.name()), table_id.SerializeAsString())); - batch_write.AddPut(SinglePutRequest(tableStoreKey(name_space, table_id.uuid(), table_data.commit_time()), table_data.SerializeAsString())); + + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + tableStoreKey(name_space, table_id.uuid(), table_data.commit_time()), + table_data.SerializeAsString()); + BatchCommitResponse resp; metastore_ptr->batchWrite(batch_write, resp); } @@ -437,7 +466,10 @@ void MetastoreProxy::getTableByUUID(const String & name_space, const String & ta auto it = metastore_ptr->getByPrefix(tableStorePrefix(name_space, table_uuid)); while(it->next()) { - tables_info.emplace_back(it->value()); + String table_meta = it->value(); + /// NOTE: Too many large KVs will cause severe performance regression. It rarely happens + tryGetLargeValue(metastore_ptr, name_space, it->key(), table_meta); + tables_info.emplace_back(std::move(table_meta)); } } @@ -831,10 +863,14 @@ void MetastoreProxy::prepareRenameTable(const String & name_space, RPCHelpers::fillUUID(to_db_uuid, *identifier.mutable_db_uuid()); batch_write.AddPut(SinglePutRequest(tableUUIDMappingKey(name_space, to_table.database(), to_table.name()), identifier.SerializeAsString(), true)); - String meta_data; - to_table.SerializeToString(&meta_data); /// add new table meta data with new name - batch_write.AddPut(SinglePutRequest(tableStoreKey(name_space, table_uuid, to_table.commit_time()), meta_data, true)); + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + tableStoreKey(name_space, table_uuid, to_table.commit_time()), + to_table.SerializeAsString(), + true/*if_not_exists*/); } bool MetastoreProxy::alterTable(const String & name_space, const Protos::DataModelTable & table, const Strings & masks_to_remove, const Strings & masks_to_add) @@ -842,7 +878,14 @@ bool MetastoreProxy::alterTable(const String & name_space, const Protos::DataMod BatchCommitRequest batch_write; String table_uuid = UUIDHelpers::UUIDToString(RPCHelpers::createUUID(table.uuid())); - batch_write.AddPut(SinglePutRequest(tableStoreKey(name_space, table_uuid, table.commit_time()), table.SerializeAsString(), true)); + + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + tableStoreKey(name_space, table_uuid, table.commit_time()), + table.SerializeAsString(), + true/*if_not_exists*/); Protos::TableIdentifier identifier; identifier.set_database(table.database()); @@ -3422,7 +3465,9 @@ std::shared_ptr MetastoreProxy::getSensitive String MetastoreProxy::getAccessEntity(EntityType type, const String & name_space, const String & name) const { String data; - metastore_ptr->get(accessEntityKey(type, name_space, name), data); + String access_entity_key = accessEntityKey(type, name_space, name); + metastore_ptr->get(access_entity_key, data); + tryGetLargeValue(metastore_ptr, name_space, access_entity_key, data); return data; } @@ -3445,7 +3490,16 @@ std::vector> MetastoreProxy::getEntities(EntityType ty requests.push_back(accessEntityKey(type, name_space, s)); } - return metastore_ptr->multiGet(requests); + auto res = metastore_ptr->multiGet(requests); + + for (size_t i=0; igetByPrefix(accessEntityPrefix(type, name_space)); while (it->next()) { - models.push_back(it->value()); + String value = it->value(); + /// NOTE: Too many large KVs will cause severe performance regression. + tryGetLargeValue(metastore_ptr, name_space, it->key(), value); + models.push_back(std::move(value)); } return models; } @@ -3482,12 +3539,30 @@ bool MetastoreProxy::putAccessEntity(EntityType type, const String & name_space, BatchCommitRequest batch_write; BatchCommitResponse resp; auto is_rename = !old_access_entity.name().empty() && new_access_entity.name() != old_access_entity.name(); - auto put_access_entity_request = SinglePutRequest(accessEntityKey(type, name_space, new_access_entity.name()), new_access_entity.SerializeAsString(), !replace_if_exists); String uuid = UUIDHelpers::UUIDToString(RPCHelpers::createUUID(new_access_entity.uuid())); String serialized_old_access_entity = old_access_entity.SerializeAsString(); if (!serialized_old_access_entity.empty() && !is_rename) - put_access_entity_request.expected_value = serialized_old_access_entity; - batch_write.AddPut(put_access_entity_request); + { + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + accessEntityKey(type, name_space, new_access_entity.name()), + new_access_entity.SerializeAsString(), + !replace_if_exists, + serialized_old_access_entity); + } + else + { + addPotentialLargeKVToBatchwrite( + metastore_ptr, + batch_write, + name_space, + accessEntityKey(type, name_space, new_access_entity.name()), + new_access_entity.SerializeAsString(), + !replace_if_exists); + } + batch_write.AddPut(SinglePutRequest(accessEntityUUIDNameMappingKey(name_space, uuid), new_access_entity.name(), !replace_if_exists)); if (is_rename) batch_write.AddDelete(accessEntityKey(type, name_space, old_access_entity.name())); // delete old one in case of rename @@ -3497,21 +3572,22 @@ bool MetastoreProxy::putAccessEntity(EntityType type, const String & name_space, } catch (Exception & e) { + auto puts_size = batch_write.puts.size(); if (e.code() == ErrorCodes::METASTORE_COMMIT_CAS_FAILURE) { - if (resp.puts.count(0) && replace_if_exists && !serialized_old_access_entity.empty()) + if (resp.puts.count(puts_size-2) && replace_if_exists && !serialized_old_access_entity.empty()) { throw Exception( "Access Entity has recently been changed in catalog. Please try the request again.", ErrorCodes::METASTORE_ACCESS_ENTITY_CAS_ERROR); } - else if (resp.puts.count(0) && !replace_if_exists) + else if (resp.puts.count(puts_size-2) && !replace_if_exists) { throw Exception( "Access Entity with the same name already exists in catalog. Please use another name and try again.", ErrorCodes::METASTORE_ACCESS_ENTITY_EXISTS_ERROR); } - else if (resp.puts.count(1) && !replace_if_exists) + else if (resp.puts.count(puts_size-1) && !replace_if_exists) { throw Exception( "Access Entity with the same UUID already exists in catalog. Please use another name and try again.", diff --git a/src/Catalog/MetastoreProxy.h b/src/Catalog/MetastoreProxy.h index a9c2b0350a8..f872a78f163 100644 --- a/src/Catalog/MetastoreProxy.h +++ b/src/Catalog/MetastoreProxy.h @@ -134,6 +134,9 @@ namespace DB::Catalog #define MANIFEST_DATA_PREFIX "MFST_" #define MANIFEST_LIST_PREFIX "MFSTS_" +#define LARGE_KV_DATA_PREFIX "LGKV_" +#define LARGE_KV_REFERENCE "LGKVRF_" + using EntityType = IAccessEntity::Type; struct EntityMetastorePrefix { @@ -970,6 +973,29 @@ class MetastoreProxy return manifestListPrefix(name_space, uuid) + toString(table_version); } + static String largeKVDataPrefix(const String & name_space, const String & uuid) + { + return escapeString(name_space) + '_' + LARGE_KV_DATA_PREFIX + uuid + '_'; + } + + static String largeKVDataKey(const String & name_space, const String & uuid, UInt64 index) + { + // keep records in the kv storage with the same order as index. Support at most 10k sub-kv + std::ostringstream oss; + oss << std::setw(5) << std::setfill('0') << index; + return largeKVDataPrefix(name_space, uuid) + oss.str(); + } + + static String largeKVReferencePrefix(const String & name_space) + { + return escapeString(name_space) + '_' + LARGE_KV_REFERENCE; + } + + static String largeKVReferenceKey(const String & name_space, const String & uuid) + { + return largeKVReferencePrefix(name_space) + uuid; + } + // parse the first key in format of '{prefix}{escapedString(first_key)}_postfix' // note that prefix should contains _, like TCS_ // return [first_key, postfix] @@ -1053,7 +1079,7 @@ class MetastoreProxy void updateTableWithID(const String & name_space, const Protos::TableIdentifier & table_id, const DB::Protos::DataModelTable & table_data); void getTableByUUID(const String & name_space, const String & table_uuid, Strings & tables_info); void clearTableMeta(const String & name_space, const String & database, const String & table, const String & uuid, const Strings & dependencies, const UInt64 & ts = 0); - static void prepareRenameTable(const String & name_space, const String & table_uuid, const String & from_db, const String & from_table, const UUID & to_db_uuid, Protos::DataModelTable & to_table, BatchCommitRequest & batch_write); + void prepareRenameTable(const String & name_space, const String & table_uuid, const String & from_db, const String & from_table, const UUID & to_db_uuid, Protos::DataModelTable & to_table, BatchCommitRequest & batch_write); bool alterTable(const String & name_space, const Protos::DataModelTable & table, const Strings & masks_to_remove, const Strings & masks_to_add); Strings getAllTablesInDB(const String & name_space, const String & database); IMetaStore::IteratorPtr getAllTablesMeta(const String & name_space); diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index 78387c10f91..b2e5edf017b 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -1828,7 +1828,8 @@ void CnchServerServiceImpl::notifyTableCreated( catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); - RPCHelpers::handleException(response->mutable_exception()); + (void)response; + //RPCHelpers::handleException(response->mutable_exception()); } }); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 51491ac32c6..b439c7cd605 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -588,6 +588,8 @@ struct ContextSharedPart if (worker_status_manager) worker_status_manager->shutdown(); + if (cnch_catalog) + cnch_catalog->shutDown(); std::unique_ptr delete_system_logs; std::unique_ptr delete_cnch_system_logs; diff --git a/src/Protos/data_models.proto b/src/Protos/data_models.proto index 90bbd2bbfff..d8943dbf337 100644 --- a/src/Protos/data_models.proto +++ b/src/Protos/data_models.proto @@ -816,3 +816,10 @@ message ManifestListModel repeated uint64 txn_ids = 2; optional bool checkpoint = 3; } + +message DataModelLargeKVMeta +{ + required bytes uuid = 1; //uuid of the large KV + required uint64 subkv_number = 2; + optional uint64 value_size = 3; // record the value size of the large KV +} diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 09ede2b049f..c8f6587cc2c 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -2126,7 +2126,7 @@ void StorageCnchMergeTree::alter(const AlterCommands & commands, ContextPtr loca StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); TransactionCnchPtr txn = local_context->getCurrentTransaction(); - auto action = txn->createAction(shared_from_this(), local_context->getSettingsRef(), local_context->getCurrentQueryId()); + auto action = txn->createActionWithLocalContext(local_context, shared_from_this(), local_context->getSettingsRef(), local_context->getCurrentQueryId()); auto & alter_act = action->as(); alter_act.setMutationCommands(mutation_commands); diff --git a/src/Transaction/Actions/DDLAlterAction.cpp b/src/Transaction/Actions/DDLAlterAction.cpp index 01da6922db9..58693e6f96b 100644 --- a/src/Transaction/Actions/DDLAlterAction.cpp +++ b/src/Transaction/Actions/DDLAlterAction.cpp @@ -89,7 +89,7 @@ void DDLAlterAction::executeV1(TxnTimestamp commit_time) // updateTsCache(table->getStorageUUID(), commit_time); if (!new_schema.empty() && new_schema!=old_schema) { - catalog->alterTable(global_context, query_settings, table, new_schema, table->commit_time, txn_id, commit_time, is_modify_cluster_by); + catalog->alterTable(*getContext(), query_settings, table, new_schema, table->commit_time, txn_id, commit_time, is_modify_cluster_by); LOG_DEBUG(log, "Successfully change schema in catalog."); } else diff --git a/src/Transaction/ICnchTransaction.h b/src/Transaction/ICnchTransaction.h index 4d81a6ba1c2..414006c2853 100644 --- a/src/Transaction/ICnchTransaction.h +++ b/src/Transaction/ICnchTransaction.h @@ -120,6 +120,13 @@ class ICnchTransaction : public std::enable_shared_from_this, { return std::make_shared(global_context, txn_record.txnID(), std::forward(args)...); } + + template + ActionPtr createActionWithLocalContext(const ContextPtr & local_context, Args &&... args) const + { + return std::make_shared(local_context, txn_record.txnID(), std::forward(args)...); + } + template IntentLockPtr createIntentLock(const String & lock_prefix, Args &&... args) const { diff --git a/tests/queries/4_cnch_stateless/13005_test_create_big_table.reference b/tests/queries/4_cnch_stateless/13005_test_create_big_table.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/4_cnch_stateless/13005_test_create_big_table.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/4_cnch_stateless/13005_test_create_big_table.sh b/tests/queries/4_cnch_stateless/13005_test_create_big_table.sh new file mode 100755 index 00000000000..9973758a51c --- /dev/null +++ b/tests/queries/4_cnch_stateless/13005_test_create_big_table.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +COL_NUMBER=5000 + +$CLICKHOUSE_CLIENT --multiquery < 1000000) FROM system.cnch_tables where database='test' AND name='big_table'; +ALTER TABLE test.big_table ADD COLUMN extra String; +SELECT count() FROM system.cnch_columns WHERE database='test' AND table='big_table' AND name='extra'; +DROP TABLE test.big_table; +EOF + +rm -f $TEMP_QUERY_FILE \ No newline at end of file From f1a9dbd20a41f98c73c20f5a8c3d9862ed8632be Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:37:24 +0000 Subject: [PATCH 037/292] Merge 'cherry-pick-d9648a52' into 'cnch-2.2' fix(clickhousech@m-4649517515): [cp] fix aeolus check exception See merge request: !22723 --- src/Interpreters/DatabaseCatalog.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cc828c7f3f..236d6eaad12 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -302,8 +302,17 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( } } - if (context_->getServerType() == ServerType::cnch_server) + auto aeolus_check = [&table_id, &context_](const StoragePtr & storage) + { + // check aeolus table access before return required storage. + if (context_->getServerType() != ServerType::cnch_server) + return; + + if (!storage || storage->getName() == "MaterializedView") + return; + context_->checkAeolusTableAccess(table_id.database_name, table_id.table_name); + }; if (table_id.hasUUID() && table_id.database_name == TEMPORARY_DATABASE) { @@ -333,6 +342,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( db_and_table.second = std::make_shared(std::move(db_and_table.second), db_and_table.first.get()); } #endif + + aeolus_check(db_and_table.second); return db_and_table; } @@ -394,6 +405,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( cnch_table->resetObjectColumns(context_); } + aeolus_check(table); return {database, table}; } From cb89cac934e48e8ac7ce807b8cfc7ae9bd16906a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:38:07 +0000 Subject: [PATCH 038/292] Merge 'cherry-pick-86eb4e86' into 'cnch-2.2' fix(clickhousech@m-4548795671): [cp] fix high cpu when attaching parts/partitions See merge request: !22718 --- src/Catalog/MetastoreProxy.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/Catalog/MetastoreProxy.cpp b/src/Catalog/MetastoreProxy.cpp index 015f33c000f..e6bcb6d69e8 100644 --- a/src/Catalog/MetastoreProxy.cpp +++ b/src/Catalog/MetastoreProxy.cpp @@ -980,7 +980,6 @@ void MetastoreProxy::prepareAddDataParts( if (parts.empty()) return; - std::unordered_set existing_partitions{current_partitions.begin(), current_partitions.end()}; std::unordered_set partitions_found_in_deleting_set; std::unordered_map partition_map; @@ -1003,18 +1002,23 @@ void MetastoreProxy::prepareAddDataParts( batch_write.AddPut(SinglePutRequest(manifestKeyForPart(name_space, table_uuid, txn_id, info_ptr->getPartName()), part_meta)); if (deleting_partitions.count(info_ptr->partition_id) && !partitions_found_in_deleting_set.count(info_ptr->partition_id)) - { partitions_found_in_deleting_set.emplace(info_ptr->partition_id); - partition_map.emplace(info_ptr->partition_id, it->partition_minmax()); - } - if (!existing_partitions.count(info_ptr->partition_id) && !partition_map.count(info_ptr->partition_id)) + if (!partition_map.count(info_ptr->partition_id)) partition_map.emplace(info_ptr->partition_id, it->partition_minmax()); } if (update_sync_list) batch_write.AddPut(SinglePutRequest(syncListKey(name_space, table_uuid, commit_time), std::to_string(commit_time))); + // Prepare partition metadata. Skip those already exists non-deleting partitions + for (const auto & exist_partition : current_partitions) + { + auto it = partition_map.find(exist_partition); + if (it != partition_map.end() && !partitions_found_in_deleting_set.count(exist_partition)) + partition_map.erase(it); + } + Protos::PartitionMeta partition_model; for (auto it = partition_map.begin(); it != partition_map.end(); it++) { @@ -1038,7 +1042,6 @@ void MetastoreProxy::prepareAddStagedParts( if (parts.empty()) return; - std::unordered_set existing_partitions{current_partitions.begin(), current_partitions.end()}; std::unordered_map partition_map; size_t expected_staged_part_size = expected_staged_parts.size(); if (expected_staged_part_size != static_cast(parts.size())) @@ -1050,10 +1053,18 @@ void MetastoreProxy::prepareAddStagedParts( String part_meta = it->SerializeAsString(); batch_write.AddPut(SinglePutRequest(stagedDataPartKey(name_space, table_uuid, info_ptr->getPartName()), part_meta, expected_staged_parts[it - parts.begin()])); - if (!existing_partitions.count(info_ptr->partition_id) && !partition_map.count(info_ptr->partition_id)) + if (!partition_map.count(info_ptr->partition_id)) partition_map.emplace(info_ptr->partition_id, it->partition_minmax()); } + // Prepare partition metadata. Skip those already exists partitions + for (const auto & exist_partition : current_partitions) + { + auto it = partition_map.find(exist_partition); + if (it != partition_map.end()) + partition_map.erase(it); + } + Protos::PartitionMeta partition_model; for (auto & it : partition_map) { From e6466cf47e6f56dd97f6418a50eb7429b21e5830 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:38:35 +0000 Subject: [PATCH 039/292] Merge 'cherry-pick-mr-22696' into 'cnch-2.2' fix(optimizer@m-4655303703): prepared statement support array type See merge request: !22752 --- src/Optimizer/ExpressionInterpreter.cpp | 10 ++++++ src/Parsers/ExpressionElementParsers.cpp | 7 ++++ src/Parsers/ExpressionElementParsers.h | 11 ++++++ src/Parsers/ParserPreparedParameter.cpp | 7 ++-- src/Parsers/ParserPreparedStatement.cpp | 35 +++++++++++++++++-- .../48035_prepared_statement.reference | 10 ++++++ .../48035_prepared_statement.sql | 26 ++++++++++++++ 7 files changed, 100 insertions(+), 6 deletions(-) diff --git a/src/Optimizer/ExpressionInterpreter.cpp b/src/Optimizer/ExpressionInterpreter.cpp index 47c6f2b3d5c..95a68de4b4d 100644 --- a/src/Optimizer/ExpressionInterpreter.cpp +++ b/src/Optimizer/ExpressionInterpreter.cpp @@ -710,6 +710,16 @@ InterpretIMResult ExpressionInterpreter::visitInFunction(const ASTFunction & fun if (left_arg_result.isAST() && !setting.enable_function_simplify) return {getType(rewritten_in_func), rewritten_in_func}; + if (const auto * ast_prepared_param = right_arg->as()) + { + auto riget_arg_result = visitASTPreparedParameter(*ast_prepared_param, right_arg); + ColumnsWithTypeAndName columns_with_types; + columns_with_types.emplace_back(left_arg_result.value, left_arg_result.type, ""); + columns_with_types.emplace_back(riget_arg_result.value, riget_arg_result.type, ""); + auto overload_resolver = FunctionFactory::instance().tryGet(function.name, context); + return {overload_resolver->getReturnType(columns_with_types), rewritten_in_func}; + } + // build set for IN statement(see also ActionsVisitor) SetPtr set; { diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index d88de00ec0b..0d47feb9526 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -3152,4 +3152,11 @@ bool ParserEscapeExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } +bool ParserExecuteValue::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + return ParserTupleOfLiterals(dt).parse(pos, node, expected) + || ParserArrayOfLiterals(dt).parse(pos, node, expected) + || ParserLiteral(dt).parse(pos, node, expected); +} + } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 87013de0d31..dc1250df57e 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -642,4 +642,15 @@ class ParserEscapeExpression : public IParserDialectBase using IParserDialectBase::IParserDialectBase; }; +/** The Execute Value is one of: an expression in parentheses, an array of literals, a literal, a function. + */ +class ParserExecuteValue : public IParserDialectBase +{ +protected: + const char * getName() const override { return "element of execute value"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + using IParserDialectBase::IParserDialectBase; +}; + } diff --git a/src/Parsers/ParserPreparedParameter.cpp b/src/Parsers/ParserPreparedParameter.cpp index 4746007b55c..bfac5dc8636 100644 --- a/src/Parsers/ParserPreparedParameter.cpp +++ b/src/Parsers/ParserPreparedParameter.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "Parsers/queryToString.h" namespace DB { @@ -27,14 +28,14 @@ bool ParserPreparedParameter::parseImpl(Pos & pos, ASTPtr & node, Expected & exp if (!ParserToken(TokenType::Colon).ignore(pos, expected)) return false; - if (!name_p.parse(pos, type_node, expected)) - return false; + ParserDataType type_parser(dt); + type_parser.parse(pos, type_node, expected); if (!ParserToken(TokenType::ClosingSquareBracket).ignore(pos, expected)) return false; tryGetIdentifierNameInto(identifier, prepared_parameter->name); - tryGetIdentifierNameInto(type_node, prepared_parameter->type); + prepared_parameter->type = queryToString(type_node); node = std::move(prepared_parameter); return true; } diff --git a/src/Parsers/ParserPreparedStatement.cpp b/src/Parsers/ParserPreparedStatement.cpp index 921e8a2eec6..7daf0370eab 100644 --- a/src/Parsers/ParserPreparedStatement.cpp +++ b/src/Parsers/ParserPreparedStatement.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { @@ -86,6 +87,7 @@ bool ParserExecutePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Ex { ParserKeyword s_execute("EXECUTE PREPARED STATEMENT"); ParserKeyword s_using("USING"); + ParserToken s_comma(TokenType::Comma); if (!s_execute.ignore(pos, expected)) return false; @@ -107,9 +109,36 @@ bool ParserExecutePreparedStatementQuery::parseImpl(Pos & pos, ASTPtr & node, Ex if (s_using.ignore(pos, expected)) { - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, settings, expected)) - return false; + SettingsChanges changes; + ParserExecuteValue value_p(ParserSettings::CLICKHOUSE); + ParserToken s_eq(TokenType::Equals); + while (true) + { + if (!changes.empty() && !s_comma.ignore(pos)) + break; + + changes.push_back(SettingChange{}); + ASTPtr name; + ASTPtr value; + + if (!name_p.parse(pos, name, expected)) + return false; + + if (!s_eq.ignore(pos, expected)) + return false; + + if (!value_p.parse(pos, value, expected)) + return false; + + if (!value->as()) + return false; + + tryGetIdentifierNameInto(name, changes.back().name); + changes.back().value = value->as().value; + } + auto set_ast = std::make_shared(); + settings = set_ast; + set_ast->changes = std::move(changes); } else { diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference index b4527d9ca63..94474c4a3eb 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.reference @@ -62,3 +62,13 @@ Projection Outputs: [number] prep1 prep4 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql index 9a72799fc40..2774cbf6dd6 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_prepared_statement.sql @@ -91,3 +91,29 @@ WHERE number < [x: UInt32]; SHOW PREPARED STATEMENTS; DROP PREPARED STATEMENT IF EXISTS prep4; SHOW PREPARED STATEMENTS; + +CREATE PREPARED STATEMENT OR REPLACE prep1 AS +SELECT number +FROM +( + SELECT number + FROM system.numbers + LIMIT 10 +) +WHERE (number > [i:UInt64]) AND (number IN ([x:Tuple(UInt64)])); + +execute PREPARED STATEMENT prep1 using x=(2,4,6), i=6; + +CREATE PREPARED STATEMENT OR REPLACE prep1 AS +SELECT number +FROM +( + SELECT number + FROM system.numbers + LIMIT 10 +) +WHERE has([x:Array(Nullable(String))], NULL); + +execute PREPARED STATEMENT prep1 using x=['1','2',null]; + +DROP PREPARED STATEMENT IF EXISTS prep1; From f686248b21e9c5169e1d52f91ca98ef448a5330a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:38:53 +0000 Subject: [PATCH 040/292] Merge 'cherry-pick-commit-63c4ebc6' into 'cnch-2.2' fix(clickhousech@m-4619222763): fix alter materialized view core and view tables type mismatch See merge request: !22614 --- src/Storages/IStorage.cpp | 7 +++++-- src/Storages/System/StorageSystemCnchViewTables.cpp | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 6e19a4bccec..33897d8d3a8 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -273,8 +273,11 @@ NameDependencies IStorage::getDependentViewsByColumn(ContextPtr context) const { Names required_columns; const auto & select_query = view->getInMemoryMetadataPtr()->select.inner_query; - if (auto * select = select_query->as(); select->list_of_selects->children.size() == 1) - required_columns = InterpreterSelectQuery(select->list_of_selects->children.at(0)->clone(), context, SelectQueryOptions{}.noModify()).getRequiredColumns(); + if (auto * select = select_query->as()) + { + if (select->list_of_selects->children.size() == 1) + required_columns = InterpreterSelectQuery(select->list_of_selects->children.at(0)->clone(), context, SelectQueryOptions{}.noModify()).getRequiredColumns(); + } else if (select_query->as()) required_columns = InterpreterSelectQuery(select_query->clone(), context, SelectQueryOptions{}.noModify()).getRequiredColumns(); for (const auto & col_name : required_columns) diff --git a/src/Storages/System/StorageSystemCnchViewTables.cpp b/src/Storages/System/StorageSystemCnchViewTables.cpp index 3708f120e5d..33d37b4a3d1 100644 --- a/src/Storages/System/StorageSystemCnchViewTables.cpp +++ b/src/Storages/System/StorageSystemCnchViewTables.cpp @@ -49,7 +49,7 @@ StorageSystemCnchViewTables::StorageSystemCnchViewTables(const StorageID & table {"latest_visible_partitions", std::make_shared(std::make_shared())}, {"previous_partitions", std::make_shared(std::make_shared())}, {"refresh_type", std::make_shared()}, - {"refresh_start_time", std::make_shared()}, + {"refresh_start_time", std::make_shared()}, {"refresh_interval", std::make_shared()}, {"is_refeshable", std::make_shared()}, })); From 1e33f1a2d2fb39ef7cb45b279ae6a5784de9b2ba Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:39:13 +0000 Subject: [PATCH 041/292] Merge branch 'cherry-pick-ca8c314c' into 'cnch-2.2' fix(clickhousech@m-4655288004): fix unknown func [cp2.2] See merge request dp/ClickHouse!22674 From b2bb19155927ea452333b156599fec6f0939c2a5 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:41:48 +0000 Subject: [PATCH 042/292] Merge 'cherry-pick-e4c028c5' into 'cnch-2.2' feat(optimizer@m-3987302752): support view grant check See merge request: !22781 --- .../ReplaceViewWithSubqueryVisitor.h | 20 ++++++++++ .../48055_view_grant_check.reference | 4 ++ .../48055_view_grant_check.sh | 40 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100755 tests/queries/4_cnch_stateless/48055_view_grant_check.reference create mode 100755 tests/queries/4_cnch_stateless/48055_view_grant_check.sh diff --git a/src/Analyzers/ReplaceViewWithSubqueryVisitor.h b/src/Analyzers/ReplaceViewWithSubqueryVisitor.h index 3cc3249fb15..e6091d26c0d 100644 --- a/src/Analyzers/ReplaceViewWithSubqueryVisitor.h +++ b/src/Analyzers/ReplaceViewWithSubqueryVisitor.h @@ -19,10 +19,17 @@ #include #include #include "Parsers/ASTIdentifier.h" +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int ACCESS_DENIED; +} + struct ReplaceViewWithSubquery { using TypeToVisit = ASTTableExpression; @@ -47,6 +54,19 @@ struct ReplaceViewWithSubquery if (dynamic_cast(table.get())) { auto table_metadata_snapshot = table->getInMemoryMetadataPtr(); + { + // check access rights. + auto access = context->getAccess(); + if (!access->isGranted(AccessType::SELECT, database_name, table_name)) + { + throw Exception( + ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT on {}", + context->getUserName(), + table->getStorageID().getFullTableName()); + } + } + auto subquery = table_metadata_snapshot->getSelectQuery().inner_query->clone(); const auto alias = table_expression.database_and_table_name->tryGetAlias(); table_expression.database_and_table_name = {}; diff --git a/tests/queries/4_cnch_stateless/48055_view_grant_check.reference b/tests/queries/4_cnch_stateless/48055_view_grant_check.reference new file mode 100755 index 00000000000..6807202170c --- /dev/null +++ b/tests/queries/4_cnch_stateless/48055_view_grant_check.reference @@ -0,0 +1,4 @@ +--- +Not enough privileges +Not enough privileges +--- diff --git a/tests/queries/4_cnch_stateless/48055_view_grant_check.sh b/tests/queries/4_cnch_stateless/48055_view_grant_check.sh new file mode 100755 index 00000000000..329252f624f --- /dev/null +++ b/tests/queries/4_cnch_stateless/48055_view_grant_check.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: no-parallel +# Tag no-parallel: create user + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS user_test_02184;" +$CLICKHOUSE_CLIENT --query "DROP VIEW IF EXISTS view2;" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttt;" + +$CLICKHOUSE_CLIENT --query "CREATE USER user_test_02184 IDENTIFIED WITH plaintext_password BY 'user_test_02184';" +$CLICKHOUSE_CLIENT --query "REVOKE ALL ON *.* FROM user_test_02184;" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS ttt (a String, b String) ENGINE = CnchMergeTree ORDER BY (a, b);" +$CLICKHOUSE_CLIENT --query "CREATE VIEW view2 (a String, b String) AS SELECT * FROM ttt WHERE a = 'aaa';" + +$CLICKHOUSE_CLIENT --query "GRANT SELECT ON ttt TO user_test_02184 WITH GRANT OPTION;" + +[ -v TENANT_ID ] && NEW_USER="${TENANT_ID}\`user_test_02184" +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT * FROM ttt settings enable_optimizer=0;" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT * FROM ttt settings enable_optimizer=1;" 2>&1| grep -Fo "Not enough privileges" | uniq + +echo "---" + +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT a FROM view2 settings enable_optimizer=0;" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT a FROM view2 settings enable_optimizer=1;" 2>&1| grep -Fo "Not enough privileges" | uniq + +$CLICKHOUSE_CLIENT --query "GRANT SELECT ON view2 TO user_test_02184 WITH GRANT OPTION;" + +echo "---" + +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT a FROM view2 settings enable_optimizer=0;" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --password=user_test_02184 --query "SELECT a FROM view2 settings enable_optimizer=1;" 2>&1| grep -Fo "Not enough privileges" | uniq + +$CLICKHOUSE_CLIENT --query "DROP VIEW view2;" +$CLICKHOUSE_CLIENT --query "DROP TABLE ttt;" +$CLICKHOUSE_CLIENT --query "DROP USER user_test_02184;" From ec3fd95ef71a809db14710ad2ab09f25b1e7a989 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:42:06 +0000 Subject: [PATCH 043/292] remove annhelper --- src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index 609cb094bfe..030030d10b1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -44,7 +44,6 @@ #include #include "Storages/MergeTree/IMergeTreeDataPart_fwd.h" -#include "Interpreters/ANNHelper.h" namespace ProfileEvents { From 56e818f643d488f5e2261695fa896da0d3106142 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 05:53:58 +0000 Subject: [PATCH 044/292] fix compile error --- src/CloudServices/CnchBGThreadCommon.h | 1 + src/CloudServices/CnchWorkerServiceImpl.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CloudServices/CnchBGThreadCommon.h b/src/CloudServices/CnchBGThreadCommon.h index 9323b53a076..73822fd1fb5 100644 --- a/src/CloudServices/CnchBGThreadCommon.h +++ b/src/CloudServices/CnchBGThreadCommon.h @@ -17,6 +17,7 @@ #include #include +#include namespace DB { diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index 10a71836f65..ec297ea765e 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include From c2094e356eaf6d372ab61cc6796251a2ca8ffff9 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 07:16:22 +0000 Subject: [PATCH 045/292] skip the intermediate result cache --- tests/queries/skip_list.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 2a66c0bf90a..328bb962beb 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -102,6 +102,7 @@ "01755_client_highlight_multi_line_comment_regression" ], "release-build": [ + "10102_intermediate_result_cache", // skip it temporarily "01801_s3_cluster", "01269_toStartOfSecond", "01103_check_cpu_instructions_at_startup", From 67482a7cbffb3338dbeecc34ebd7a1ab987a4789 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 08:08:19 +0000 Subject: [PATCH 046/292] fix ci --- tests/queries/4_cnch_stateless/60003_mysql_user.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/4_cnch_stateless/60003_mysql_user.sql b/tests/queries/4_cnch_stateless/60003_mysql_user.sql index b35b717765a..cbf6802f47c 100644 --- a/tests/queries/4_cnch_stateless/60003_mysql_user.sql +++ b/tests/queries/4_cnch_stateless/60003_mysql_user.sql @@ -40,6 +40,10 @@ SELECT '' as Db,Host,User,Select_priv,Insert_priv,Update_priv,Delete_pri SELECT '' as scope,user,host FROM mysql.user where user = 'hello'; SELECT user, host, ssl_type, ssl_cipher, x509_issuer, x509_subject, max_questions, max_updates, max_connections, super_priv, max_user_connections FROM mysql.user where user = 'hello'; +drop role 'r1'; +drop role 'r2'; drop user hello; +drop table userpriv.xx; drop database userpriv; +drop table dbpriv.yy; drop database dbpriv; From 8f718998aa9f27ceb5c485bb183d71a7cabc0ebf Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:44:33 +0000 Subject: [PATCH 047/292] Merge 'cherry-pick-mr-22756' into 'cnch-2.2' fix(optimizer@m-4619270650): optimizer ban some function with subquery See merge request: !22771 # Conflicts: # src/Optimizer/QueryUseOptimizerChecker.cpp --- src/Optimizer/QueryUseOptimizerChecker.cpp | 20 +++++++++++++++----- src/Optimizer/QueryUseOptimizerChecker.h | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Optimizer/QueryUseOptimizerChecker.cpp b/src/Optimizer/QueryUseOptimizerChecker.cpp index 8acf59fa875..9109e82bb5d 100644 --- a/src/Optimizer/QueryUseOptimizerChecker.cpp +++ b/src/Optimizer/QueryUseOptimizerChecker.cpp @@ -192,7 +192,7 @@ bool QueryUseOptimizerChecker::check(ASTPtr node, ContextMutablePtr context, boo if (!checkDatabaseAndTable(database, insert_query->table_id.getTableName(), context, {})) { - reason = "unsupported storage"; + reason = "unsupported storage, database: " + database + ", table: " + insert_query->table_id.getTableName(); support = false; } } @@ -247,6 +247,12 @@ bool QueryUseOptimizerVisitor::visitASTSelectQuery(ASTPtr & node, QueryUseOptimi return false; } + if (context.disallow_subquery) + { + reason = "lambda/nullIn/globalNullIn/notNullIn/globalNotNullIn function with subquery not implemented"; + return false; + } + if (select->group_by_with_totals && context.disallow_with_totals) { reason = "group by with totals only supports with totals at outmost select"; @@ -261,7 +267,7 @@ bool QueryUseOptimizerVisitor::visitASTSelectQuery(ASTPtr & node, QueryUseOptimi { if (!checkDatabaseAndTable(*table_expression, child_context.context, child_context.ctes)) { - reason = "unsupported storage"; + reason = "unsupported storage: " + table_expression->formatForErrorMessage(); return false; } if (table_expression->table_function) @@ -297,7 +303,7 @@ bool QueryUseOptimizerVisitor::visitASTFunction(ASTPtr & node, QueryUseOptimizer auto & fun = node->as(); if (fun.name == "untuple") { - reason = "unsupported function"; + reason = "unsupported untuple function"; return false; } @@ -311,13 +317,17 @@ bool QueryUseOptimizerVisitor::visitASTFunction(ASTPtr & node, QueryUseOptimizer table_expression.database_and_table_name = table; if (!checkDatabaseAndTable(table_expression, context.context, context.ctes)) { - reason = "unsupported storage"; + reason = "unsupported storage: " + table_expression.formatForErrorMessage(); return false; } } } } - return visitNode(node, context); + bool disallow_subquery = context.disallow_subquery; + context.disallow_subquery = disallow_subquery || (fun.name == "lambda" || fun.name == "nullIn" || fun.name == "globalNullIn" || fun.name == "notNullIn" || fun.name == "globalNotNullIn"); + bool support = visitNode(node, context); + context.disallow_subquery = disallow_subquery; + return support; } bool QueryUseOptimizerVisitor::visitASTQuantifiedComparison(ASTPtr & node, QueryUseOptimizerContext & context) diff --git a/src/Optimizer/QueryUseOptimizerChecker.h b/src/Optimizer/QueryUseOptimizerChecker.h index 141c7d0e41d..9ecd3ee1e5f 100644 --- a/src/Optimizer/QueryUseOptimizerChecker.h +++ b/src/Optimizer/QueryUseOptimizerChecker.h @@ -39,6 +39,7 @@ struct QueryUseOptimizerContext NameSet ctes; Tables external_tables; bool disallow_with_totals = false; + bool disallow_subquery = false; }; class QueryUseOptimizerVisitor : public ASTVisitor From 1aceb6154e6284fb07e3899cf1fc457d3185ec88 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:44:57 +0000 Subject: [PATCH 048/292] Merge 'fix/fix_share_common_plan_for_join_cnch_2.2' into 'cnch-2.2' fix(optimizer@m-4655932058): fix share common plan node for join cnch-2.2 See merge request: !22710 --- src/Optimizer/Signature/StepNormalizer.cpp | 26 +------ src/Optimizer/tests/gtest_plan_signature.cpp | 2 +- .../tpcds/explains/tpcds100/q83.explain | 68 +++++++------------ .../tpcds/explains/tpcds1000/q83.explain | 68 +++++++------------ .../tpcds1000_not_show_stats/q83.explain | 66 +++++++----------- .../explains/tpcds1000_sample/q83.explain | 68 +++++++------------ .../51005_share_common_plan_node.reference | 29 ++++++++ .../51005_share_common_plan_node.sql | 28 ++++++++ .../40052_deadlock_cte.reference | 26 ++++--- 9 files changed, 172 insertions(+), 209 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/51005_share_common_plan_node.reference create mode 100644 tests/queries/4_cnch_stateless/51005_share_common_plan_node.sql diff --git a/src/Optimizer/Signature/StepNormalizer.cpp b/src/Optimizer/Signature/StepNormalizer.cpp index 624b0b59326..d6f863b45e8 100644 --- a/src/Optimizer/Signature/StepNormalizer.cpp +++ b/src/Optimizer/Signature/StepNormalizer.cpp @@ -418,33 +418,13 @@ StepAndOutputOrder StepNormalizer::visitJoinStep(const JoinStep & step, StepsAnd DataStreams normal_input_streams = processInputStreams(step.getInputStreams(), inputs, symbol_mapping, cumulative_pos); createOutputSymbolMapping(step.getOutputStream().header, symbol_mapping, cumulative_pos); SymbolMapper symbol_mapper = SymbolMapper::simpleMapper(symbol_mapping); - QueryPlanStepPtr normal_step = symbol_mapper.map(step); + auto normal_step = symbol_mapper.map(step); auto output_header = normal_step->getOutputStream().header.getColumnsWithTypeAndName(); ExpressionReorderNormalizer::reorder(output_header); // replace the input_stream & output_stream because of reordering - normal_step = std::make_shared( - normal_input_streams, - DataStream{output_header}, - step.getKind(), - step.getStrictness(), - step.getMaxStreams(), - step.getKeepLeftReadInOrder(), - step.getLeftKeys(), - step.getRightKeys(), - step.getFilter(), - step.isHasUsing(), - step.getRequireRightKeys(), - step.getAsofInequality(), - step.getDistributionType(), - step.getJoinAlgorithm(), - step.isMagic(), - step.isOrdered(), - step.isSimpleReordered(), - // step.isParallel(), - // step.isBucket(), - step.getRuntimeFilterBuilders(), - step.getHints()); + normal_step->setInputStreams(normal_input_streams); + normal_step->setOutputStream(DataStream{output_header}); Block output_order = getOutputOrder(step, *normal_step, symbol_mapper); return StepAndOutputOrder{normal_step, std::move(output_order)}; diff --git a/src/Optimizer/tests/gtest_plan_signature.cpp b/src/Optimizer/tests/gtest_plan_signature.cpp index 03317a35745..569e77f9dd4 100644 --- a/src/Optimizer/tests/gtest_plan_signature.cpp +++ b/src/Optimizer/tests/gtest_plan_signature.cpp @@ -216,7 +216,7 @@ TEST_F(PlanSignatureTest, testTpcdsAllSignaturesWithoutRuntimeFilter) } std::sort( sorted_by_freq.begin(), sorted_by_freq.end(), [](const auto & left, const auto & right) { return left.size() > right.size(); }); - EXPECT_EQ(sorted_by_freq.size(), 11); + EXPECT_EQ(sorted_by_freq.size(), 12); // all binary mappings EXPECT_EQ(sorted_by_freq[0].size(), 2); // std::unordered_map query_mapping; diff --git a/tests/optimizers/tpcds/explains/tpcds100/q83.explain b/tests/optimizers/tpcds/explains/tpcds100/q83.explain index ccc9972d354..c94667b1bfa 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q83.explain @@ -45,17 +45,8 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ └─ Broadcast Exchange Est. 20 rows - │ └─ Left Semi Join Est. 20 rows - │ │ Condition: d_week_seq_2 == d_week_seq_3 - │ │ Runtime Filters Builder: {d_week_seq_3} - │ ├─ Filter Est. 73049 rows - │ │ │ Condition: Runtime Filters: {d_week_seq_2} - │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows - │ │ Where: Runtime Filters: {d_week_seq} - │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq - │ └─ Broadcast Exchange Est. 3 rows - │ └─ Buffer Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 20 rows + │ └─ CTERef[1] Est. 20 rows └─ Inner Join Est. 65882 rows │ Condition: i_item_id == i_item_id_2 │ Runtime Filters Builder: {i_item_id_2} @@ -85,17 +76,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 20 rows - │ │ └─ Left Semi Join Est. 20 rows - │ │ │ Condition: d_week_seq == d_week_seq_1 - │ │ │ Runtime Filters Builder: {d_week_seq_1} - │ │ ├─ Filter Est. 73049 rows - │ │ │ │ Condition: Runtime Filters: {d_week_seq} - │ │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows - │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ Outputs: [d_week_seq], d_date_1:=d_date - │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ Buffer Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 20 rows + │ │ └─ CTERef[1] Est. 20 rows │ └─ Filter Est. 204000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds100.item Est. 204000 rows @@ -133,26 +115,26 @@ Projection Est. 100 rows │ Where: Runtime Filters: {d_date} │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date └─ Broadcast Exchange Est. 20 rows - └─ Left Semi Join Est. 20 rows - │ Condition: d_week_seq_4 == d_week_seq_5 - │ Runtime Filters Builder: {d_week_seq_5} - ├─ Filter Est. 73049 rows - │ │ Condition: Runtime Filters: {d_week_seq_4} - │ └─ TableScan tpcds100.date_dim Est. 73049 rows - │ Where: Runtime Filters: {d_week_seq} - │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq - └─ Broadcast Exchange Est. 3 rows - └─ Buffer Est. 3 rows - └─ CTERef[1] Est. 3 rows + └─ Buffer Est. 20 rows + └─ CTERef[1] Est. 20 rows CTEDef [1] - Repartition Exchange Est. 3 rows - │ Partition by: {d_week_seq_1} - └─ Projection Est. 3 rows - │ Expressions: [d_week_seq_1] - └─ Filter Est. 3 rows - │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - └─ TableScan tpcds100.date_dim Est. 73049 rows - Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq -note: Runtime Filter is applied for 12 times. + Repartition Exchange Est. 20 rows + │ Partition by: {d_date_1} + └─ Left Semi Join Est. 20 rows + │ Condition: d_week_seq == d_week_seq_1 + │ Runtime Filters Builder: {d_week_seq_1} + ├─ Filter Est. 73049 rows + │ │ Condition: Runtime Filters: {d_week_seq} + │ └─ TableScan tpcds100.date_dim Est. 73049 rows + │ Where: Runtime Filters: {d_week_seq} + │ Outputs: [d_week_seq], d_date_1:=d_date + └─ Broadcast Exchange Est. 3 rows + └─ Projection Est. 3 rows + │ Expressions: [d_week_seq_1] + └─ Filter Est. 3 rows + │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + └─ TableScan tpcds100.date_dim Est. 73049 rows + Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq +note: Runtime Filter is applied for 10 times. note: CTE(Common Table Expression) is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000/q83.explain index 4620b7aa3f3..cafa6aebdc3 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q83.explain @@ -43,17 +43,8 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_date} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ │ └─ Broadcast Exchange Est. 20 rows - │ │ │ └─ Left Semi Join Est. 20 rows - │ │ │ │ Condition: d_week_seq_2 == d_week_seq_3 - │ │ │ │ Runtime Filters Builder: {d_week_seq_3} - │ │ │ ├─ Filter Est. 73049 rows - │ │ │ │ │ Condition: Runtime Filters: {d_week_seq_2} - │ │ │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows - │ │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq - │ │ │ └─ Broadcast Exchange Est. 3 rows - │ │ │ └─ Buffer Est. 3 rows - │ │ │ └─ CTERef[1] Est. 3 rows + │ │ │ └─ Buffer Est. 20 rows + │ │ │ └─ CTERef[1] Est. 20 rows │ │ └─ Filter Est. 300000 rows │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -85,17 +76,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 20 rows - │ │ └─ Left Semi Join Est. 20 rows - │ │ │ Condition: d_week_seq == d_week_seq_1 - │ │ │ Runtime Filters Builder: {d_week_seq_1} - │ │ ├─ Filter Est. 73049 rows - │ │ │ │ Condition: Runtime Filters: {d_week_seq} - │ │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows - │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ Outputs: [d_week_seq], d_date_1:=d_date - │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ Buffer Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 20 rows + │ │ └─ CTERef[1] Est. 20 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item Est. 300000 rows @@ -127,28 +109,28 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange Est. 20 rows - │ └─ Left Semi Join Est. 20 rows - │ │ Condition: d_week_seq_4 == d_week_seq_5 - │ │ Runtime Filters Builder: {d_week_seq_5} - │ ├─ Filter Est. 73049 rows - │ │ │ Condition: Runtime Filters: {d_week_seq_4} - │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows - │ │ Where: Runtime Filters: {d_week_seq} - │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq - │ └─ Broadcast Exchange Est. 3 rows - │ └─ Buffer Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 20 rows + │ └─ CTERef[1] Est. 20 rows └─ TableScan tpcds1000.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] - Repartition Exchange Est. 3 rows - │ Partition by: {d_week_seq_1} - └─ Projection Est. 3 rows - │ Expressions: [d_week_seq_1] - └─ Filter Est. 3 rows - │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - └─ TableScan tpcds1000.date_dim Est. 73049 rows - Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq -note: Runtime Filter is applied for 12 times. + Repartition Exchange Est. 20 rows + │ Partition by: {d_date_1} + └─ Left Semi Join Est. 20 rows + │ Condition: d_week_seq == d_week_seq_1 + │ Runtime Filters Builder: {d_week_seq_1} + ├─ Filter Est. 73049 rows + │ │ Condition: Runtime Filters: {d_week_seq} + │ └─ TableScan tpcds1000.date_dim Est. 73049 rows + │ Where: Runtime Filters: {d_week_seq} + │ Outputs: [d_week_seq], d_date_1:=d_date + └─ Broadcast Exchange Est. 3 rows + └─ Projection Est. 3 rows + │ Expressions: [d_week_seq_1] + └─ Filter Est. 3 rows + │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + └─ TableScan tpcds1000.date_dim Est. 73049 rows + Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq +note: Runtime Filter is applied for 10 times. note: CTE(Common Table Expression) is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain index fc465a45cf7..28d78212c69 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q83.explain @@ -43,17 +43,8 @@ Projection │ │ │ │ Where: Runtime Filters: {d_date} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ │ └─ Broadcast Exchange - │ │ │ └─ Left Semi Join - │ │ │ │ Condition: d_week_seq_2 == d_week_seq_3 - │ │ │ │ Runtime Filters Builder: {d_week_seq_3} - │ │ │ ├─ Filter - │ │ │ │ │ Condition: Runtime Filters: {d_week_seq_2} - │ │ │ │ └─ TableScan tpcds1000.date_dim - │ │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq - │ │ │ └─ Broadcast Exchange - │ │ │ └─ Buffer - │ │ │ └─ CTERef[1] + │ │ │ └─ Buffer + │ │ │ └─ CTERef[1] │ │ └─ Filter │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000.item @@ -85,17 +76,8 @@ Projection │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange - │ │ └─ Left Semi Join - │ │ │ Condition: d_week_seq == d_week_seq_1 - │ │ │ Runtime Filters Builder: {d_week_seq_1} - │ │ ├─ Filter - │ │ │ │ Condition: Runtime Filters: {d_week_seq} - │ │ │ └─ TableScan tpcds1000.date_dim - │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ Outputs: [d_week_seq], d_date_1:=d_date - │ │ └─ Broadcast Exchange - │ │ └─ Buffer - │ │ └─ CTERef[1] + │ │ └─ Buffer + │ │ └─ CTERef[1] │ └─ Filter │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000.item @@ -127,28 +109,28 @@ Projection │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange - │ └─ Left Semi Join - │ │ Condition: d_week_seq_4 == d_week_seq_5 - │ │ Runtime Filters Builder: {d_week_seq_5} - │ ├─ Filter - │ │ │ Condition: Runtime Filters: {d_week_seq_4} - │ │ └─ TableScan tpcds1000.date_dim - │ │ Where: Runtime Filters: {d_week_seq} - │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq - │ └─ Broadcast Exchange - │ └─ Buffer - │ └─ CTERef[1] + │ └─ Buffer + │ └─ CTERef[1] └─ TableScan tpcds1000.item Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] Repartition Exchange - │ Partition by: {d_week_seq_1} - └─ Projection - │ Expressions: [d_week_seq_1] - └─ Filter - │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - └─ TableScan tpcds1000.date_dim - Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq -note: Runtime Filter is applied for 12 times. + │ Partition by: {d_date_1} + └─ Left Semi Join + │ Condition: d_week_seq == d_week_seq_1 + │ Runtime Filters Builder: {d_week_seq_1} + ├─ Filter + │ │ Condition: Runtime Filters: {d_week_seq} + │ └─ TableScan tpcds1000.date_dim + │ Where: Runtime Filters: {d_week_seq} + │ Outputs: [d_week_seq], d_date_1:=d_date + └─ Broadcast Exchange + └─ Projection + │ Expressions: [d_week_seq_1] + └─ Filter + │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + └─ TableScan tpcds1000.date_dim + Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq +note: Runtime Filter is applied for 10 times. note: CTE(Common Table Expression) is applied for 3 times. diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain index a145133c6b5..841bcd22cab 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q83.explain @@ -43,17 +43,8 @@ Projection Est. 100 rows │ │ │ │ Where: Runtime Filters: {d_date} │ │ │ │ Outputs: d_date_sk_1:=d_date_sk, d_date_3:=d_date │ │ │ └─ Broadcast Exchange Est. 20 rows - │ │ │ └─ Left Semi Join Est. 20 rows - │ │ │ │ Condition: d_week_seq_2 == d_week_seq_3 - │ │ │ │ Runtime Filters Builder: {d_week_seq_3} - │ │ │ ├─ Filter Est. 73049 rows - │ │ │ │ │ Condition: Runtime Filters: {d_week_seq_2} - │ │ │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - │ │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ │ Outputs: d_date_4:=d_date, d_week_seq_2:=d_week_seq - │ │ │ └─ Broadcast Exchange Est. 3 rows - │ │ │ └─ Buffer Est. 3 rows - │ │ │ └─ CTERef[1] Est. 3 rows + │ │ │ └─ Buffer Est. 20 rows + │ │ │ └─ CTERef[1] Est. 20 rows │ │ └─ Filter Est. 300000 rows │ │ │ Condition: Runtime Filters: {i_item_id_1} │ │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -85,17 +76,8 @@ Projection Est. 100 rows │ │ │ Where: Runtime Filters: {d_date} │ │ │ Outputs: [d_date_sk, d_date] │ │ └─ Broadcast Exchange Est. 20 rows - │ │ └─ Left Semi Join Est. 20 rows - │ │ │ Condition: d_week_seq == d_week_seq_1 - │ │ │ Runtime Filters Builder: {d_week_seq_1} - │ │ ├─ Filter Est. 73049 rows - │ │ │ │ Condition: Runtime Filters: {d_week_seq} - │ │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - │ │ │ Where: Runtime Filters: {d_week_seq} - │ │ │ Outputs: [d_week_seq], d_date_1:=d_date - │ │ └─ Broadcast Exchange Est. 3 rows - │ │ └─ Buffer Est. 3 rows - │ │ └─ CTERef[1] Est. 3 rows + │ │ └─ Buffer Est. 20 rows + │ │ └─ CTERef[1] Est. 20 rows │ └─ Filter Est. 300000 rows │ │ Condition: Runtime Filters: {i_item_id} │ └─ TableScan tpcds1000_sample.item Est. 300000 rows @@ -127,28 +109,28 @@ Projection Est. 100 rows │ │ Where: Runtime Filters: {d_date} │ │ Outputs: d_date_sk_2:=d_date_sk, d_date_6:=d_date │ └─ Broadcast Exchange Est. 20 rows - │ └─ Left Semi Join Est. 20 rows - │ │ Condition: d_week_seq_4 == d_week_seq_5 - │ │ Runtime Filters Builder: {d_week_seq_5} - │ ├─ Filter Est. 73049 rows - │ │ │ Condition: Runtime Filters: {d_week_seq_4} - │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - │ │ Where: Runtime Filters: {d_week_seq} - │ │ Outputs: d_date_7:=d_date, d_week_seq_4:=d_week_seq - │ └─ Broadcast Exchange Est. 3 rows - │ └─ Buffer Est. 3 rows - │ └─ CTERef[1] Est. 3 rows + │ └─ Buffer Est. 20 rows + │ └─ CTERef[1] Est. 20 rows └─ TableScan tpcds1000_sample.item Est. 300000 rows Outputs: i_item_sk_2:=i_item_sk, i_item_id_2:=i_item_id CTEDef [1] - Repartition Exchange Est. 3 rows - │ Partition by: {d_week_seq_1} - └─ Projection Est. 3 rows - │ Expressions: [d_week_seq_1] - └─ Filter Est. 3 rows - │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) - Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq -note: Runtime Filter is applied for 12 times. + Repartition Exchange Est. 20 rows + │ Partition by: {d_date_1} + └─ Left Semi Join Est. 20 rows + │ Condition: d_week_seq == d_week_seq_1 + │ Runtime Filters Builder: {d_week_seq_1} + ├─ Filter Est. 73049 rows + │ │ Condition: Runtime Filters: {d_week_seq} + │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows + │ Where: Runtime Filters: {d_week_seq} + │ Outputs: [d_week_seq], d_date_1:=d_date + └─ Broadcast Exchange Est. 3 rows + └─ Projection Est. 3 rows + │ Expressions: [d_week_seq_1] + └─ Filter Est. 3 rows + │ Condition: d_date_2 IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows + Where: d_date IN (cast(11138, 'Date32'), cast(11227, 'Date32'), cast(11278, 'Date32')) + Outputs: d_date_2:=d_date, d_week_seq_1:=d_week_seq +note: Runtime Filter is applied for 10 times. note: CTE(Common Table Expression) is applied for 3 times. diff --git a/tests/queries/4_cnch_stateless/51005_share_common_plan_node.reference b/tests/queries/4_cnch_stateless/51005_share_common_plan_node.reference new file mode 100644 index 00000000000..6d408ff9947 --- /dev/null +++ b/tests/queries/4_cnch_stateless/51005_share_common_plan_node.reference @@ -0,0 +1,29 @@ +Projection Est. ? rows +│ Expressions: id:=id_4 +└─ Gather Exchange Est. ? rows + └─ Union Est. ? rows + │ OutputToInputs: id_4 = [id,id_2] + ├─ Projection Est. ? rows + │ │ Expressions: [id] + │ └─ Filter Est. ? rows + │ │ Condition: k1 >= 1 + │ └─ CTERef[1] Est. ? rows + └─ Projection Est. ? rows + │ Expressions: [id_2] + └─ Filter Est. ? rows + │ Condition: k1_1 >= 2 + └─ CTERef[1] Est. ? rows +CTEDef [1] + Left Join Est. ? rows + │ Condition: id == id_1 + │ Filter: k1 > 0 + ├─ Filter Est. ? rows + │ │ Condition: k1 >= 1 + │ └─ TableScan 1234.default.51005_share_common_plan_node Est. ? rows + │ Where: k1 >= 1 + │ Outputs: [id, k1] + └─ Broadcast Exchange Est. ? rows + └─ TableScan 1234.default.51005_share_common_plan_node Est. ? rows + Outputs: id_1:=id +note: CTE(Common Table Expression) is applied for 2 times. +1 diff --git a/tests/queries/4_cnch_stateless/51005_share_common_plan_node.sql b/tests/queries/4_cnch_stateless/51005_share_common_plan_node.sql new file mode 100644 index 00000000000..7e417edcaae --- /dev/null +++ b/tests/queries/4_cnch_stateless/51005_share_common_plan_node.sql @@ -0,0 +1,28 @@ +CREATE TABLE 51005_share_common_plan_node +( + `id` UInt32, + `k1` UInt32, + `k2` String +) +ENGINE = CnchMergeTree +ORDER BY id; + +insert into 51005_share_common_plan_node values (1,1,'1'); + +set enable_optimizer=1; +set enable_share_common_plan_node=1; +set dialect_type='ANSI'; + +explain select id from +( + select t1.id from 51005_share_common_plan_node t1 left join 51005_share_common_plan_node t2 on t1.id = t2.id and t1.k1 > 0 where t1.k1 >= 1 +) union all ( + select t1.id from 51005_share_common_plan_node t1 left join 51005_share_common_plan_node t2 on t1.id = t2.id and t1.k1 > 0 where t1.k1 >= 2 +); + +select id from +( + select t1.id from 51005_share_common_plan_node t1 left join 51005_share_common_plan_node t2 on t1.id = t2.id and t1.k1 > 0 where t1.k1 >= 1 +) union all ( + select t1.id from 51005_share_common_plan_node t1 left join 51005_share_common_plan_node t2 on t1.id = t2.id and t1.k1 > 0 where t1.k1 >= 2 +); \ No newline at end of file diff --git a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference index ba24dfdd25b..48f65e07e47 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference @@ -131,25 +131,23 @@ Projection Est. ? rows │ Expressions: x:=`expr#rand(1)_3` └─ Union Est. ? rows │ OutputToInputs: expr#rand(1)_3 = [expr#rand(1)_1,expr#rand(1)_2] - ├─ Inner Join Est. ? rows - │ │ Condition: expr#rand(1)_1 == expr#rand(2)_1 - │ ├─ CTERef[0] Est. 1 rows - │ └─ Buffer Est. 1 rows - │ └─ CTERef[1] Est. 1 rows - └─ Inner Join Est. ? rows - │ Condition: expr#rand(1)_2 == expr#rand(2)_2 - ├─ CTERef[0] Est. 1 rows - └─ Buffer Est. 1 rows - └─ CTERef[1] Est. 1 rows + ├─ CTERef[2] Est. ? rows + └─ CTERef[2] Est. ? rows CTEDef [0] Projection Est. 1 rows │ Expressions: expr#rand(1):=rand(1) └─ Values Est. 1 rows CTEDef [1] - Broadcast Exchange Est. 1 rows - └─ Projection Est. 1 rows - │ Expressions: expr#rand(2):=rand(2) - └─ Values Est. 1 rows + Projection Est. 1 rows + │ Expressions: expr#rand(2):=rand(2) + └─ Values Est. 1 rows +CTEDef [2] + Inner Join Est. ? rows + │ Condition: expr#rand(1)_1 == expr#rand(2)_1 + ├─ CTERef[0] Est. 1 rows + └─ Broadcast Exchange Est. 1 rows + └─ Buffer Est. 1 rows + └─ CTERef[1] Est. 1 rows note: CTE(Common Table Expression) is applied for 4 times. explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select t1.x as x from c1 t1 join c2 t2 on t1.x = t2.x union all select t3.x as x from c2 t3 join c1 t4 on t3.x = t4.x; Projection Est. ? rows From 1944972b3d515e31e4e28c7dc68fcddfb0b7e997 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:46:50 +0000 Subject: [PATCH 049/292] Merge 'cherry-pick-commit-38fc4b64' into 'cnch-2.2' fix(clickhousech@m-4505503868): remove mv materialized column check during create table See merge request: !22623 # Conflicts: # src/Interpreters/InterpreterCreateQuery.cpp --- src/Interpreters/InterpreterCreateQuery.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 9547ba35054..01b5113db77 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1572,23 +1572,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, try { res->checkColumnsValidity(properties.columns); - if (auto * view = dynamic_cast(res.get())) - { - // if (view->async() && getContext()->getSettingsRef().enable_non_partitioned_base_refresh_throw_exception) - // view->validatePartitionBased(getContext()); - - if (view->tryGetTargetTable() && !view->hasInnerTable()) - { - StoragePtr target_table = view->tryGetTargetTable(); - if (!target_table->getInMemoryMetadataPtr()->getColumns().getMaterialized().empty()) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot create materialized view {} to target table {} with materialized columns {}", - view->getStorageID().getNameForLogs(), - target_table->getStorageID().getNameForLogs(), - target_table->getInMemoryMetadataPtr()->getColumns().getMaterialized().toString()); - } - } } catch (...) { From dce00e96b7231ef419bc27350fe23d226f186cb3 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:47:12 +0000 Subject: [PATCH 050/292] Merge 'cherry-pick-adb9de03-2' into 'cnch-2.2' fix(optimizer@m-4549369693): cherry-pick 2 fixes from ce-dev See merge request: !22815 --- src/Optimizer/Rewriter/GroupByKeysPruning.cpp | 7 +++++- src/Optimizer/Rewriter/PredicatePushdown.cpp | 8 ++++-- .../48054_group_by_pruning_type.reference | 1 + .../48054_group_by_pruning_type.sql | 12 +++++++++ .../48055_grouping_sets_predicates.reference | 25 +++++++++++++++++++ .../48055_grouping_sets_predicates.sql | 17 +++++++++++++ 6 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.sql create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.sql diff --git a/src/Optimizer/Rewriter/GroupByKeysPruning.cpp b/src/Optimizer/Rewriter/GroupByKeysPruning.cpp index fc10bc8cebe..790acab5df4 100644 --- a/src/Optimizer/Rewriter/GroupByKeysPruning.cpp +++ b/src/Optimizer/Rewriter/GroupByKeysPruning.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -178,7 +180,10 @@ PlanAndDataDependencyWithConstants GroupByKeysPruning::Rewriter::visitAggregatin } for (const auto & [name, literal] : constants_values) { - new_assignments.emplace(name, std::make_shared(literal.value)); + // date/datetime should make a cast function + // but nullable(UInt64) shouldn't make a cast function + auto literal_ast = LiteralEncoder::encodeForComparisonExpr(literal.value, literal.type, context); + new_assignments.emplace(name, std::move(literal_ast)); new_name_to_type[name] = literal.type; } diff --git a/src/Optimizer/Rewriter/PredicatePushdown.cpp b/src/Optimizer/Rewriter/PredicatePushdown.cpp index 7138cb4cdf8..5fa72ec6b6c 100644 --- a/src/Optimizer/Rewriter/PredicatePushdown.cpp +++ b/src/Optimizer/Rewriter/PredicatePushdown.cpp @@ -225,13 +225,17 @@ PlanNodePtr PredicateVisitor::visitAggregatingNode(AggregatingNode & node, Predi const auto & step = *node.getStep(); const auto & keys = step.getKeys(); - // TODO: in case of grouping sets, we should be able to push the filters over grouping keys below the aggregation - // and also preserve the filter above the aggregation if it has an empty grouping set if (keys.empty()) { return visitPlanNode(node, predicate_context); } + // never push predicate through grouping sets agg + if (step.isGroupingSet()) + { + return visitPlanNode(node, predicate_context); + } + ConstASTPtr inherited_predicate = predicate_context.predicate; EqualityInference equality_inference = EqualityInference::newInstance(inherited_predicate, context); diff --git a/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.reference b/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.reference new file mode 100644 index 00000000000..e3f9ae63ec5 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.reference @@ -0,0 +1 @@ +2024-01-02 1 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.sql b/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.sql new file mode 100644 index 00000000000..d6a48a80f28 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48054_group_by_pruning_type.sql @@ -0,0 +1,12 @@ +SET enable_group_by_keys_pruning = 1, cte_mode='INLINED', dialect_type='ANSI'; +drop table if exists group_by_pruning; +drop table if exists group_by_pruning_local; + +create table group_by_pruning (day Date, id UInt64) engine=CnchMergeTree() order by tuple(); + +insert into group_by_pruning select '2024-01-02', 1; + +select b.day, count() from group_by_pruning a, group_by_pruning b where a.day = b.day and a.day = '2024-01-02' group by b.day, b.id; + +drop table if exists group_by_pruning; +drop table if exists group_by_pruning_local; diff --git a/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.reference b/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.reference new file mode 100644 index 00000000000..0af808118e0 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.reference @@ -0,0 +1,25 @@ +how many days in each year/month +2021 \N 365 +2022 \N 365 +\N 1 62 +\N 2 56 +\N 3 62 +\N 4 60 +\N 5 62 +\N 6 60 +\N 7 62 +\N 8 62 +\N 9 60 +\N 10 62 +\N 11 60 +\N 12 62 +how many days in Jan./Feb./Mar. +\N 1 62 +\N 2 56 +\N 3 62 +how many days in Jan./Feb./Mar., and each year +2021 \N 365 +2022 \N 365 +\N 1 62 +\N 2 56 +\N 3 62 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.sql b/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.sql new file mode 100644 index 00000000000..473cafccdf1 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48055_grouping_sets_predicates.sql @@ -0,0 +1,17 @@ +drop table if exists date_dim; +drop table if exists date_dim_local; +set dialect_type='ANSI'; +create table date_dim (date Date, year Int64, month Int64) engine=CnchMergeTree() order by tuple(); + +-- 2 year data +insert into date_dim select toDate('2021-01-01') + number as date, toYear(date), toMonth(date) from system.numbers limit 730; + +select 'how many days in each year/month'; +select year, month, count() from date_dim group by grouping sets (year, month) order by (year, month); +select 'how many days in Jan./Feb./Mar.'; +select year, month, count() from date_dim group by grouping sets (year, month) having month in (1, 2, 3) order by (year, month); +select 'how many days in Jan./Feb./Mar., and each year'; +select year, month, count() from date_dim group by grouping sets (year, month) having month in (1, 2, 3) or (month is Null) order by (year, month); + +drop table if exists date_dim; +drop table if exists date_dim_local; \ No newline at end of file From 83842a3218117ae99a68614615fdb98d04cca2aa Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:47:55 +0000 Subject: [PATCH 051/292] Merge branch 'cherry-pick-46a8344b-2' into 'cnch-2.2' fix(clickhousech@m-4549346716): [cp] minor fixes for merge mutation See merge request dp/ClickHouse!22804 --- src/CloudServices/CnchMergeMutateThread.cpp | 6 +++++ src/CloudServices/CnchWorkerServiceImpl.cpp | 1 + src/Common/time.h | 9 ++++++++ .../PlanSegmentManagerRpcService.cpp | 17 +++++++++++++- .../DistributedStages/executePlanSegment.cpp | 2 ++ src/Protos/plan_segment_manager.proto | 1 + src/Storages/AlterCommands.cpp | 23 ++++++++++--------- src/Storages/AlterCommands.h | 2 +- src/Storages/StorageCnchMergeTree.cpp | 10 ++++++++ src/Storages/TTLDescription.cpp | 2 +- .../4_cnch_stateless/01001_alter_delete.sql | 7 ++++++ .../10017_nullable_sorting_key.sql | 9 +++++++- 12 files changed, 74 insertions(+), 15 deletions(-) diff --git a/src/CloudServices/CnchMergeMutateThread.cpp b/src/CloudServices/CnchMergeMutateThread.cpp index ad7c72e0c27..27e23565eea 100644 --- a/src/CloudServices/CnchMergeMutateThread.cpp +++ b/src/CloudServices/CnchMergeMutateThread.cpp @@ -654,6 +654,12 @@ bool CnchMergeMutateThread::trySelectPartsToMerge(StoragePtr & istorage, Storage bool only_realtime_partition = storage_settings->cnch_merge_only_realtime_partition; auto partitions = partition_selector->selectForMerge(istorage, num_partitions, only_realtime_partition); + if (partitions.empty()) + { + LOG_TRACE(log, "Skip empty table"); + return false; + } + metrics.num_partitions = partitions.size(); partitions = removeLockedPartition(partitions); metrics.num_unlock_partitions = partitions.size(); diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index ec297ea765e..cfb230f7ea2 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -300,6 +300,7 @@ void CnchWorkerServiceImpl::submitManipulationTask( rpc_context->initCnchServerResource(txn_id); rpc_context->setSetting("prefer_localhost_replica", false); rpc_context->setSetting("prefer_cnch_catalog", true); + rpc_context->setSetting("max_execution_time", 3600); trySetVirtualWarehouseAndWorkerGroup(data->getSettings()->cnch_vw_default.value, rpc_context); } diff --git a/src/Common/time.h b/src/Common/time.h index 08e27548a53..456d5817512 100644 --- a/src/Common/time.h +++ b/src/Common/time.h @@ -31,3 +31,12 @@ inline std::chrono::time_point{ std::chrono::duration_cast(timespec_to_duration(ts))}; } + +/// return duration in ms from now to timestamp_ms, if now exceeded timestamp_ms, return empty +inline std::optional duration_ms_from_now(UInt64 timestamp_ms) +{ + auto now = time_in_milliseconds(std::chrono::system_clock::now()); + if (timestamp_ms < now) + return {}; + return timestamp_ms - now; +} diff --git a/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp b/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp index f0d2b327cf3..8bec509ef91 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int BRPC_PROTOCOL_VERSION_UNSUPPORT; extern const int QUERY_WAS_CANCELLED; extern const int QUERY_WAS_CANCELLED_INTERNAL; + extern const int TIMEOUT_EXCEEDED; } WorkerNodeResourceData ResourceMonitorTimer::getResourceData() const { @@ -359,7 +360,21 @@ void PlanSegmentManagerRpcService::submitPlanSegment( /// Create session context for worker if (context->getServerType() == ServerType::cnch_worker) { - auto named_session = context->acquireNamedCnchSession(txn_id, {}, query_common->check_session()); + size_t max_execution_time_ms = 0; + if (query_common->has_query_expiration_timestamp()) + { + auto duration_ms = duration_ms_from_now(query_common->query_expiration_timestamp()); + if (!duration_ms) + throw Exception( + ErrorCodes::TIMEOUT_EXCEEDED, + "Max execution time exceeded before submit plan segment, try increase max_execution_time, current timestamp:{} " + "expires at:{}", + time_in_milliseconds(std::chrono::system_clock::now()), + query_common->query_expiration_timestamp()); + max_execution_time_ms = duration_ms.value(); + } + auto named_session + = context->acquireNamedCnchSession(txn_id, (max_execution_time_ms / 1000) + 1, query_common->check_session()); query_context = Context::createCopy(named_session->context); query_context->setSessionContext(query_context); query_context->setTemporaryTransaction(txn_id, primary_txn_id); diff --git a/src/Interpreters/DistributedStages/executePlanSegment.cpp b/src/Interpreters/DistributedStages/executePlanSegment.cpp index 15506bf7a20..a5d08f55d08 100644 --- a/src/Interpreters/DistributedStages/executePlanSegment.cpp +++ b/src/Interpreters/DistributedStages/executePlanSegment.cpp @@ -163,6 +163,8 @@ void prepareQueryCommonBuf( query_common.set_check_session(!context->getSettingsRef().bsp_mode && !context->getSettingsRef().enable_prune_empty_resource); query_common.set_txn_id(context->getCurrentTransactionID().toUInt64()); query_common.set_primary_txn_id(context->getCurrentTransaction()->getPrimaryTransactionID().toUInt64()); + auto query_expiration_ts = context->getQueryExpirationTimeStamp(); + query_common.set_query_expiration_timestamp(query_expiration_ts.tv_sec * 1000 + query_expiration_ts.tv_nsec / 1000000); const String & quota_key = client_info.quota_key; if (!client_info.quota_key.empty()) query_common.set_quota(quota_key); diff --git a/src/Protos/plan_segment_manager.proto b/src/Protos/plan_segment_manager.proto index da78625b4c3..b5c4d98472b 100644 --- a/src/Protos/plan_segment_manager.proto +++ b/src/Protos/plan_segment_manager.proto @@ -116,6 +116,7 @@ message QueryCommon { optional TraceMeta trace_meta = 12; optional bool check_session = 13; + optional uint64 query_expiration_timestamp = 14; } message SubmitPlanSegmentRequest { diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index b7824b74ea7..c467569a4b9 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -497,7 +497,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } -void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const +void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context, bool allow_nullable_key) const { if (type == ADD_COLUMN) { @@ -811,7 +811,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) } else if (type == MODIFY_TTL) { - metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key); + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key, allow_nullable_key); } else if (type == REMOVE_TTL) { @@ -1196,9 +1196,18 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context auto metadata_copy = metadata; + bool allow_nullable_key = false; + if (metadata_copy.hasSettingsChanges()) + { + auto settings_changes = metadata_copy.getSettingsChanges()->as().changes; + auto * field = settings_changes.tryGet("allow_nullable_key"); + if (field && field->get()) + allow_nullable_key = true; + } + for (const AlterCommand & command : *this) if (!command.ignore) - command.apply(metadata_copy, context); + command.apply(metadata_copy, context, allow_nullable_key); /// Changes in columns may lead to changes in keys expression. metadata_copy.sorting_key.recalculateWithNewAST(metadata_copy.sorting_key.definition_ast, metadata_copy.columns, context); @@ -1262,14 +1271,6 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context metadata_copy.column_ttls_by_name[name] = new_ttl_entry; } - bool allow_nullable_key = false; - if (metadata_copy.hasSettingsChanges()) - { - auto settings_changes = metadata_copy.getSettingsChanges()->as().changes; - auto * field = settings_changes.tryGet("allow_nullable_key"); - if (field && field->get()) - allow_nullable_key = true; - } if (metadata_copy.table_ttl.definition_ast != nullptr) metadata_copy.table_ttl = TTLTableDescription::getTTLForTableFromAST( diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index e509a7ad9af..b69481366d1 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -203,7 +203,7 @@ struct AlterCommand static std::optional parse(const ASTAlterCommand * command, ContextPtr context); - void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; + void apply(StorageInMemoryMetadata & metadata, ContextPtr context, bool allow_nullable_key = false) const; /// Check that alter command require data modification (mutation) to be /// executed. For example, cast from Date to UInt16 type can be executed diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index c8f6587cc2c..6f0a0088efb 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -3263,6 +3263,16 @@ void StorageCnchMergeTree::mutate(const MutationCommands & commands, ContextPtr if (commands.empty()) return; + /// Check whether PARTITION (ID) is valid. Will throw exception if partition is an invalid value. + for (const auto & c : commands) + { + if (c.partition) + { + auto p = getPartitionIDFromQuery(c.partition, query_context); + LOG_TRACE(log, "Extract partition id from command: {}", p); + } + } + auto txn = query_context->getCurrentTransaction(); auto action = txn->createAction(shared_from_this(), query_context->getSettingsRef(), query_context->getCurrentQueryId()); auto & alter_act = action->as(); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 03a8433a23e..35a3cc041bd 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -121,7 +121,7 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin if (!allow_nullable_type) { throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, - "TTL expression result type is {} but allow_nullable_type is false.", result_type->getName()); + "TTL expression result type is {} but allow_nullable_key is false.", result_type->getName()); } auto nested_type = static_cast(result_type)->getNestedType(); diff --git a/tests/queries/4_cnch_stateless/01001_alter_delete.sql b/tests/queries/4_cnch_stateless/01001_alter_delete.sql index 0a6a02d1e66..a57209a3812 100644 --- a/tests/queries/4_cnch_stateless/01001_alter_delete.sql +++ b/tests/queries/4_cnch_stateless/01001_alter_delete.sql @@ -57,10 +57,17 @@ DROP TABLE t_alter_ids; SELECT '----- DELETE IN INVALID PARTITION -----'; CREATE TABLE t_alter_d_partition(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY (d, k) ORDER BY m; +SYSTEM START MERGES t_alter_d_partition; ALTER TABLE t_alter_d_partition DELETE IN PARTITION '20231010-10' WHERE m = 10; -- { serverError 248} DROP TABLE t_alter_d_partition; +CREATE TABLE t_alter_bad_partition(p DateTime, k Int32) ENGINE = CnchMergeTree PARTITION BY toYYYYMMDD(p) ORDER BY k; +SYSTEM START MERGES t_alter_bad_partition; +ALTER TABLE t_alter_bad_partition ADD INDEX ik(k) TYPE minmax GRANULARITY 1; +ALTER TABLE t_alter_bad_partition MATERIALIZE INDEX ik IN PARTITION '2024-01-01'; -- { serverError 72 } +DROP TABLE t_alter_bad_partition; + CREATE TABLE wrong_column_row_exists(k Int32, _row_exists Int32) ENGINE = CnchMergeTree ORDER BY k; -- { serverError 44 } SELECT '----- TRIVIAL COUNT AFTER DELETING DATA -----'; diff --git a/tests/queries/4_cnch_stateless/10017_nullable_sorting_key.sql b/tests/queries/4_cnch_stateless/10017_nullable_sorting_key.sql index ac662c877f7..468af300c62 100644 --- a/tests/queries/4_cnch_stateless/10017_nullable_sorting_key.sql +++ b/tests/queries/4_cnch_stateless/10017_nullable_sorting_key.sql @@ -11,4 +11,11 @@ select * from null_test where isNotNull(name); select * from null_test where name < 'xyz' and age > 0; select max(name), max(age) from null_test; -DROP TABLE null_test; \ No newline at end of file +DROP TABLE null_test; + +DROP TABLE IF EXISTS null_ttl_key; +CREATE TABLE null_ttl_key (p Nullable(DateTime), id Int32) ENGINE = CnchMergeTree() ORDER BY id; +ALTER TABLE null_ttl_key MODIFY TTL p + INTERVAL 30 DAY; -- { serverError 450} +ALTER TABLE null_ttl_key MODIFY SETTING allow_nullable_key = 1; +ALTER TABLE null_ttl_key MODIFY TTL p + INTERVAL 30 DAY; +DROP TABLE null_ttl_key; From bd62cc42b8fd2051e0ce95ddcf07d556b61de381 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:48:40 +0000 Subject: [PATCH 052/292] Merge 'cherry-pick-153ae250' into 'cnch-2.2' fix(clickhousech@m-4616855063): [cp 2.2] fix bytes_on_disk for CNCH part See merge request: !22775 # Conflicts: # src/Storages/MergeTree/MergeTreeDataPartCNCH.h --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 ++++ .../MergeTree/MergeTreeDataPartCNCH.cpp | 22 +++++++++---------- .../MergeTree/MergeTreeDataPartCNCH.h | 2 +- .../01495_attach_check_bytes.reference | 1 + .../01495_attach_check_bytes.sql | 11 ++++++++++ 5 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/01495_attach_check_bytes.reference create mode 100644 tests/queries/4_cnch_stateless/01495_attach_check_bytes.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e90344ecaf9..dc46766352c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2557,6 +2557,10 @@ void writePartBinary(const IMergeTreeDataPart & part, WriteBuffer & buf) flags |= IMergeTreeDataPart::LOW_PRIORITY_FLAG; writeIntBinary(flags, buf); + /// WARNING: For CNCH, bytes_on_disk is always 0. Keep it here for compatibility. + /// We can only get the value of bytes_on_disk after the whole data file wrote to vfs. + /// So actually we have no way to store correct bytes_on_disk when writing part. + /// It's corrected when loading part. See MergeTreeDataPartCNCH::loadMetaInfoFromBuffer and MergeTreeDataPartCNCH::loadChecksumsFromRemote. writeVarUInt(part.bytes_on_disk, buf); writeVarUInt(part.rows_count, buf); if (auto cnch_part = std::dynamic_pointer_cast(part.shared_from_this())) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index 030030d10b1..f9f4ab98aef 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -643,18 +643,18 @@ ImmutableDeleteBitmapPtr MergeTreeDataPartCNCH::getDeleteBitmap(bool allow_null) return getCombinedDeleteBitmapForNormalTable(allow_null); } -MergeTreeDataPartChecksums::FileChecksums MergeTreeDataPartCNCH::loadPartDataFooter() const +MergeTreeDataPartChecksums::FileChecksums MergeTreeDataPartCNCH::loadPartDataFooter(size_t & out_file_size) const { ProfileEvents::increment(ProfileEvents::LoadDataPartFooter); const String data_file_path = fs::path(getFullRelativePath()) / DATA_FILE; - size_t data_file_size = volume->getDisk()->getFileSize(data_file_path); + out_file_size = volume->getDisk()->getFileSize(data_file_path); auto data_file = openForReading(volume->getDisk(), data_file_path, MERGE_TREE_STORAGE_CNCH_DATA_FOOTER_SIZE, "footer"); if (!parent_part) { - data_file->setReadUntilPosition(data_file_size); - data_file->seek(data_file_size - MERGE_TREE_STORAGE_CNCH_DATA_FOOTER_SIZE); + data_file->setReadUntilPosition(out_file_size); + data_file->seek(out_file_size - MERGE_TREE_STORAGE_CNCH_DATA_FOOTER_SIZE); } else { @@ -830,7 +830,8 @@ IMergeTreeDataPart::ChecksumsPtr MergeTreeDataPartCNCH::loadChecksumsFromRemote( return checksums; String data_rel_path = fs::path(getFullRelativePath()) / DATA_FILE; - auto data_footer = loadPartDataFooter(); + size_t cnch_file_size = 0; + auto data_footer = loadPartDataFooter(cnch_file_size); const auto & checksum_file = data_footer["checksums.txt"]; if (checksum_file.file_size == 0 /* && isDeleted() */) @@ -842,11 +843,6 @@ IMergeTreeDataPart::ChecksumsPtr MergeTreeDataPartCNCH::loadChecksumsFromRemote( if (checksums->read(buf)) { assertEOF(buf); - /// bytes_on_disk += delta_checksums->getTotalSizeOnDisk(); - } - else - { - /// bytes_on_disk += delta_checksums->getTotalSizeOnDisk(); } // merge with data footer @@ -895,6 +891,9 @@ IMergeTreeDataPart::ChecksumsPtr MergeTreeDataPartCNCH::loadChecksumsFromRemote( disk_cache->cacheSegmentsToLocalDisk({std::move(segment)}); } + if (!bytes_on_disk) + bytes_on_disk = cnch_file_size; + return checksums; } @@ -1018,7 +1017,8 @@ void MergeTreeDataPartCNCH::loadMetaInfoFromBuffer(ReadBuffer & buf, bool load_h if (flags & IMergeTreeDataPart::LOW_PRIORITY_FLAG) low_priority = true; - readVarUInt(bytes_on_disk, buf); + size_t skip_bytes_on_disk; + readVarUInt(skip_bytes_on_disk, buf); readVarUInt(rows_count, buf); size_t marks_count = 0; readVarUInt(marks_count, buf); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.h b/src/Storages/MergeTree/MergeTreeDataPartCNCH.h index 1389d8d1004..48a7404cece 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.h @@ -126,7 +126,7 @@ class MergeTreeDataPartCNCH : public IMergeTreeDataPart void loadIndex() override; IndexPtr loadIndexFromStorage() const; - MergeTreeDataPartChecksums::FileChecksums loadPartDataFooter() const; + MergeTreeDataPartChecksums::FileChecksums loadPartDataFooter(size_t & out_file_size) const; ChecksumsPtr loadChecksums(bool require) override; ChecksumsPtr loadChecksumsFromRemote(bool follow_part_chain); diff --git a/tests/queries/4_cnch_stateless/01495_attach_check_bytes.reference b/tests/queries/4_cnch_stateless/01495_attach_check_bytes.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/4_cnch_stateless/01495_attach_check_bytes.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/4_cnch_stateless/01495_attach_check_bytes.sql b/tests/queries/4_cnch_stateless/01495_attach_check_bytes.sql new file mode 100644 index 00000000000..6835395e894 --- /dev/null +++ b/tests/queries/4_cnch_stateless/01495_attach_check_bytes.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test_attach_check_bytes; + +CREATE TABLE test_attach_check_bytes(k Int, m Int) ENGINE = CnchMergeTree() ORDER BY k; + +INSERT INTO test_attach_check_bytes VALUES (1, 1); +ALTER TABLE test_attach_check_bytes DETACH PARTITION ID 'all'; +ALTER TABLE test_attach_check_bytes ATTACH PARTITION ID 'all'; + +SELECT bytes > 0 FROM system.cnch_parts WHERE database = currentDatabase() AND table = 'test_attach_check_bytes' AND active SETTINGS enable_multiple_tables_for_cnch_parts = 1; + +DROP TABLE test_attach_check_bytes; From 1511ba2d181b94dfd8d31c7b0203d282fc19fec6 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:49:02 +0000 Subject: [PATCH 053/292] Merge 'cherry-pick-mr-22732' into 'cnch-2.2' fix(optimizer@m-4548822622): QueryAnalyzer process join_use_nulls See merge request: !22792 --- src/Analyzers/QueryAnalyzer.cpp | 37 ++++++++++++++++-- .../48023_eliminate_join_by_fk.reference | 32 ++++++++------- .../48050_fix_unify_nullable.reference | 1 + .../48050_fix_unify_nullable.sql | 39 +++++++++++++++++++ 4 files changed, 90 insertions(+), 19 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.sql diff --git a/src/Analyzers/QueryAnalyzer.cpp b/src/Analyzers/QueryAnalyzer.cpp index 6933b9188a8..1e4c0fae9dd 100644 --- a/src/Analyzers/QueryAnalyzer.cpp +++ b/src/Analyzers/QueryAnalyzer.cpp @@ -869,6 +869,9 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( NameSet seen_names; FieldDescriptions output_fields; + bool make_nullable_for_left = isRightOrFull(table_join.kind) && context->getSettingsRef().join_use_nulls; + bool make_nullable_for_right = isLeftOrFull(table_join.kind) && context->getSettingsRef().join_use_nulls; + if (use_ansi_semantic) { auto resolve_join_key @@ -936,18 +939,22 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( } /// Step 2. add non join fields - auto add_non_join_fields = [&](ScopePtr scope, std::vector & join_fields_list) { + auto add_non_join_fields = [&](ScopePtr scope, std::vector & join_fields_list, bool make_nullable) { std::unordered_set join_fields{join_fields_list.begin(), join_fields_list.end()}; for (size_t i = 0; i < scope->size(); ++i) { if (join_fields.find(i) == join_fields.end()) + { output_fields.push_back(scope->at(i)); + if (make_nullable) + output_fields.back().type = JoinCommon::convertTypeToNullable(output_fields.back().type); + } } }; - add_non_join_fields(left_scope, left_join_fields); - add_non_join_fields(right_scope, right_join_fields); + add_non_join_fields(left_scope, left_join_fields, make_nullable_for_left); + add_non_join_fields(right_scope, right_join_fields, make_nullable_for_right); } else { @@ -1042,6 +1049,8 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( else { output_fields.emplace_back(input_field); + if (make_nullable_for_left) + output_fields.back().type = JoinCommon::convertTypeToNullable(output_fields.back().type); } } @@ -1063,6 +1072,8 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( if (!right_join_field_reverse_map.count(i)) { output_fields.emplace_back(input_field.withNewName(new_name)); + if (make_nullable_for_right) + output_fields.back().type = JoinCommon::convertTypeToNullable(output_fields.back().type); } else if (required_columns.count(new_name) && !source_columns.count(new_name)) { @@ -1096,18 +1107,35 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinOn( ScopePtr output_scope; { FieldDescriptions output_fields; - + bool make_nullable_for_left = isRightOrFull(table_join.kind) && context->getSettingsRef().join_use_nulls; + bool make_nullable_for_right = isLeftOrFull(table_join.kind) && context->getSettingsRef().join_use_nulls; + auto update_type = [&](DataTypePtr & type, bool make_nullable) + { + if (make_nullable) + return JoinCommon::convertTypeToNullable(type); + return type; + }; + if (use_ansi_semantic) { for (const auto & f : left_scope->getFields()) + { output_fields.emplace_back(f); + output_fields.back().type = update_type(output_fields.back().type, make_nullable_for_left); + } for (const auto & f : right_scope->getFields()) + { output_fields.emplace_back(f); + output_fields.back().type = update_type(output_fields.back().type, make_nullable_for_right); + } } else { for (const auto & f : left_scope->getFields()) + { output_fields.emplace_back(f); + output_fields.back().type = update_type(output_fields.back().type, make_nullable_for_left); + } auto source_names = collectNames(left_scope); bool check_identifier_begin_valid = context->getSettingsRef().check_identifier_begin_valid; @@ -1116,6 +1144,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinOn( { auto new_name = qualifyJoinedName(f.name, right_table_qualifier, source_names, check_identifier_begin_valid); output_fields.emplace_back(f.withNewName(new_name)); + output_fields.back().type = update_type(output_fields.back().type, make_nullable_for_right); } } diff --git a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference index 15f23a2b020..0a24dc961f1 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48023_eliminate_join_by_fk.reference @@ -119,17 +119,17 @@ Projection Est. ? rows └─ Sorting Est. ? rows │ Order by: {price ASC NULLS LAST} └─ Inner Join Est. ? rows - │ Condition: expr#cast(sk, \'Nullable(Int64)\') == expr#cast(sk_2, \'Nullable(Int64)\') + │ Condition: expr#cast(sk, \'Nullable(Int64)\')_1 == expr#cast(sk_2, \'Nullable(Int64)\') ├─ Repartition Exchange Est. ? rows - │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')} + │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')_1} │ └─ Projection Est. ? rows - │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\'):=cast(sk, \'Nullable(Int64)\') + │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\')_1:=cast(sk, \'Nullable(Int64)\') │ └─ Left Join Est. ? rows - │ │ Condition: expr#cast(sk, \'Int64\') == expr#sum(sk) + │ │ Condition: expr#cast(sk, \'Nullable(Int64)\') == expr#sum(sk) │ ├─ Repartition Exchange Est. ? rows - │ │ │ Partition by: {expr#cast(sk, \'Int64\')} + │ │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')} │ │ └─ Projection Est. ? rows - │ │ │ Expressions: [sk], expr#cast(sk, \'Int64\'):=cast(sk, \'Int64\') + │ │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\'):=cast(sk, \'Nullable(Int64)\') │ │ └─ TableScan test.cust Est. ? rows │ │ Outputs: [sk] │ └─ Repartition Exchange Est. ? rows @@ -161,23 +161,25 @@ Projection Est. ? rows └─ Sorting Est. ? rows │ Order by: {price ASC NULLS LAST} └─ Inner Join Est. ? rows - │ Condition: expr#cast(sk, \'Nullable(Int64)\') == expr#cast(sk_1, \'Nullable(Int64)\') + │ Condition: expr#cast(sk, \'Nullable(Int64)\')_1 == expr#cast(sk_1, \'Nullable(Int64)\') ├─ Repartition Exchange Est. ? rows - │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')} + │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')_1} │ └─ Projection Est. ? rows - │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\'):=cast(sk, \'Nullable(Int64)\') + │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\')_1:=cast(sk, \'Nullable(Int64)\') │ └─ Inner Join Est. ? rows - │ │ Condition: expr#cast(sk, \'Int64\') == ws_item_sk + │ │ Condition: expr#cast(sk, \'Nullable(Int64)\') == expr#cast(ws_item_sk, \'Nullable(Int64)\') │ ├─ Repartition Exchange Est. ? rows - │ │ │ Partition by: {expr#cast(sk, \'Int64\')} + │ │ │ Partition by: {expr#cast(sk, \'Nullable(Int64)\')} │ │ └─ Projection Est. ? rows - │ │ │ Expressions: [sk], expr#cast(sk, \'Int64\'):=cast(sk, \'Int64\') + │ │ │ Expressions: [sk], expr#cast(sk, \'Nullable(Int64)\'):=cast(sk, \'Nullable(Int64)\') │ │ └─ TableScan test.cust Est. ? rows │ │ Outputs: [sk] │ └─ Repartition Exchange Est. ? rows - │ │ Partition by: {ws_item_sk} - │ └─ TableScan test.web Est. ? rows - │ Outputs: [ws_item_sk] + │ │ Partition by: {expr#cast(ws_item_sk, \'Nullable(Int64)\')} + │ └─ Projection Est. ? rows + │ │ Expressions: expr#cast(ws_item_sk, \'Nullable(Int64)\'):=cast(ws_item_sk, \'Nullable(Int64)\') + │ └─ TableScan test.web Est. ? rows + │ Outputs: [ws_item_sk] └─ Repartition Exchange Est. ? rows │ Partition by: {expr#cast(sk_1, \'Nullable(Int64)\')} └─ Projection Est. ? rows diff --git a/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.reference b/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.reference new file mode 100644 index 00000000000..7ed6ff82de6 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.reference @@ -0,0 +1 @@ +5 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.sql b/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.sql new file mode 100644 index 00000000000..2ef638fbd64 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48050_fix_unify_nullable.sql @@ -0,0 +1,39 @@ +set join_use_nulls=1; +create database if not exists test; +use test; +drop table if exists test.ad_positions; +drop table if exists test.ad_request_logs; +drop table if exists test.scenes; + +CREATE TABLE test.ad_positions +( + `id` UInt64, + `scene_id` UInt8 COMMENT '广告场景ID' +) +ENGINE = CnchMergeTree +ORDER BY id; + +CREATE TABLE test.ad_request_logs +( + `id` UInt64, + `ad_position_id` UInt16 COMMENT '广告位ID' +) +ENGINE = CnchMergeTree +ORDER BY id; + +CREATE TABLE test.scenes +( + `id` UInt64 +) +ENGINE = CnchMergeTree +ORDER BY id; + +insert into test.ad_positions values(1,2)(2,3)(3,4)(9,1); +insert into test.ad_request_logs values(1,2)(2,3)(3,4)(6,7)(11,12); +insert into test.scenes values(1)(2)(3)(7)(9)(10); + +SELECT + count(*) +FROM test.ad_request_logs + LEFT JOIN test.ad_positions ON ad_request_logs.ad_position_id = ad_positions.id + LEFT JOIN test.scenes ON ad_positions.scene_id = scenes.id; From 50395c15da89e7d9a7e3795b425cddbb43874379 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:49:20 +0000 Subject: [PATCH 054/292] Merge 'cherry-pick-commit-be60339a-4' into 'cnch-2.2' fix(clickhousech@m-4671125918): hll memory leak See merge request: !22825 --- src/Common/HyperLogLogWithSmallSetOptimization.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Common/HyperLogLogWithSmallSetOptimization.h b/src/Common/HyperLogLogWithSmallSetOptimization.h index 39c00660ebe..0df5786e513 100644 --- a/src/Common/HyperLogLogWithSmallSetOptimization.h +++ b/src/Common/HyperLogLogWithSmallSetOptimization.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -28,9 +29,10 @@ class HyperLogLogWithSmallSetOptimization : private boost::noncopyable using Small = SmallSet; using Large = HyperLogLogCounter; using LargeValueType = typename Large::value_type; + using LargePtr = std::shared_ptr; Small small; - Large * large = nullptr; + LargePtr large; bool isLarge() const { @@ -40,22 +42,18 @@ class HyperLogLogWithSmallSetOptimization : private boost::noncopyable void toLarge() { /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data). - Large * tmp_large = new Large; + LargePtr tmp_large = std::make_shared(); for (const auto & x : small) tmp_large->insert(static_cast(x.getValue())); - large = tmp_large; + large = std::move(tmp_large); } public: using value_type = Key; - ~HyperLogLogWithSmallSetOptimization() - { - if (isLarge()) - delete large; - } + ~HyperLogLogWithSmallSetOptimization() = default; /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function void ALWAYS_INLINE insert(Key value) From 5ca7fc350eac7a2430e6fca29e0def7f56599cef Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:49:42 +0000 Subject: [PATCH 055/292] Merge 'fix_logical_optimizer_alias_cnch2.2' into 'cnch-2.2' fix(clickhousech@m-4505531776): Disable logical expression optimizer for expression with aliases. #47451 See merge request: !22849 --- src/Interpreters/LogicalExpressionsOptimizer.cpp | 11 ++++++++--- .../25340_logical_optimizer_alias_bug.reference | 2 ++ .../25340_logical_optimizer_alias_bug.sql | 4 ++++ 3 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.reference create mode 100644 tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.sql diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 936ed0149d2..50d24b4a7ae 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -112,7 +112,9 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains() bool found_chain = false; auto * function = to_node->as(); - if (function && function->name == "or" && function->children.size() == 1) + /// Optimization does not respect aliases properly, which can lead to MULTIPLE_EXPRESSION_FOR_ALIAS error. + /// Disable it if an expression has an alias. Proper implementation is done with the new analyzer. + if (function && function->alias.empty() && function->name == "or" && function->children.size() == 1) { const auto * expression_list = function->children[0]->as(); if (expression_list) @@ -121,14 +123,14 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains() for (const auto & child : expression_list->children) { auto * equals = child->as(); - if (equals && equals->name == "equals" && equals->children.size() == 1) + if (equals && equals->alias.empty() && equals->name == "equals" && equals->children.size() == 1) { const auto * equals_expression_list = equals->children[0]->as(); if (equals_expression_list && equals_expression_list->children.size() == 2) { /// Equality expr = xN. const auto * literal = equals_expression_list->children[1]->as(); - if (literal) + if (literal && literal->alias.empty()) { auto expr_lhs = equals_expression_list->children[0]->getTreeHash(); OrWithExpression or_with_expression{function, expr_lhs, function->tryGetAlias()}; @@ -199,6 +201,9 @@ bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const Disj const auto & equalities = chain.second; const auto & equality_functions = equalities.functions; + if (settings.optimize_min_equality_disjunction_chain_length == 0) + return false; + /// We eliminate too short chains. if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length) return false; diff --git a/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.reference b/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.reference new file mode 100644 index 00000000000..0d66ea1aee9 --- /dev/null +++ b/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.reference @@ -0,0 +1,2 @@ +0 +1 diff --git a/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.sql b/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.sql new file mode 100644 index 00000000000..b16ca8ba180 --- /dev/null +++ b/tests/queries/4_cnch_stateless/25340_logical_optimizer_alias_bug.sql @@ -0,0 +1,4 @@ +create table test_local (id UInt32, path LowCardinality(String)) engine = CnchMergeTree order by id; +WITH ((position(path, '/a') > 0) AND (NOT (position(path, 'a') > 0))) OR (path = '/b') OR (path = '/b/') as alias1 SELECT max(alias1) FROM test_local WHERE (id = 299386662); + +SELECT (a = '1') OR (a = '2') OR (a like '2%') OR (a = '3') AS _1700057091683 FROM (select '1' as a); From 9f33c3d968889fb5ded2ac345612a091336dfb8b Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:52:35 +0000 Subject: [PATCH 056/292] Merge 'cherry-pick-eb87e2d3' into 'cnch-2.2' fix(optimizer@m-4670748330): fix multi distinct property See merge request: !22890 --- src/Optimizer/Property/Property.h | 13 +++++++++++++ src/Optimizer/Property/PropertyDeriver.cpp | 5 ++++- ...48056_expand_distinct_sharding_table.reference | 5 +++++ .../48056_expand_distinct_sharding_table.sql | 15 +++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.sql diff --git a/src/Optimizer/Property/Property.h b/src/Optimizer/Property/Property.h index cb1f7165066..93df8bdc33b 100644 --- a/src/Optimizer/Property/Property.h +++ b/src/Optimizer/Property/Property.h @@ -124,6 +124,18 @@ class Partitioning String getHashFunc(String default_func) const; Array getParams() const; + void resetIfPartitionHandle() + { + if (!isPartitionHandle()) + { + return; + } + this->columns = {}; + this->handle = Handle::UNKNOWN; + this->bucket_expr = nullptr; + this->buckets = 0; + } + bool isSatisfyWorker() const { return satisfy_worker; @@ -397,6 +409,7 @@ class Property const Partitioning & getNodePartitioning() const { return node_partitioning; } Partitioning & getNodePartitioningRef() { return node_partitioning; } const Partitioning & getStreamPartitioning() const { return stream_partitioning; } + Partitioning & getStreamPartitioningRef() { return stream_partitioning; } const Sorting & getSorting() const { return sorting; } const CTEDescriptions & getCTEDescriptions() const { return cte_descriptions; } CTEDescriptions & getCTEDescriptions() { return cte_descriptions; } diff --git a/src/Optimizer/Property/PropertyDeriver.cpp b/src/Optimizer/Property/PropertyDeriver.cpp index b91a78c7dc1..1d9e04c2030 100644 --- a/src/Optimizer/Property/PropertyDeriver.cpp +++ b/src/Optimizer/Property/PropertyDeriver.cpp @@ -734,7 +734,10 @@ Property DeriverVisitor::visitMultiJoinStep(const MultiJoinStep &, DeriverContex Property DeriverVisitor::visitExpandStep(const ExpandStep &, DeriverContext & context) { - return context.getInput()[0]; + auto prop = context.getInput()[0].clearSorting(); + prop.getNodePartitioningRef().resetIfPartitionHandle(); + prop.getStreamPartitioningRef().resetIfPartitionHandle(); + return prop; } } diff --git a/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.reference b/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.reference new file mode 100644 index 00000000000..d1ec5bf0ebe --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.reference @@ -0,0 +1,5 @@ +32768 +32 +32 +32 +32768 32 32 32 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.sql b/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.sql new file mode 100644 index 00000000000..e9910a7efe1 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48056_expand_distinct_sharding_table.sql @@ -0,0 +1,15 @@ +drop table if exists tt; +create table tt (a UInt64, b UInt64, c UInt64) Engine = CnchMergeTree +cluster by a into 16 buckets +order by b; + +insert into tt select intDiv(number, 32 * 32), intDiv(number, 32) % 32, number % 32 from system.numbers limit 32768; + +set enable_expand_distinct_optimization=1; +select count() from tt; +select uniqExact(a) from tt; +select uniqExact(b) from tt; +select uniqExact(c) from tt; +select count(), uniqExact(a), uniqExact(b), uniqExact(c) from tt; + +drop table if exists tt; \ No newline at end of file From 655fc0ec867b3b0ac000db13acd9a6e2c58c98d0 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:52:53 +0000 Subject: [PATCH 057/292] Merge 'cherry-pick-92c1d177' into 'cnch-2.2' fix(optimizer@m-4674652509): fix unstable row count See merge request: !22887 --- src/Statistics/CatalogAdaptor.cpp | 15 +-------------- src/Statistics/CatalogAdaptor.h | 2 +- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/Statistics/CatalogAdaptor.cpp b/src/Statistics/CatalogAdaptor.cpp index 20dd93e7625..bf2157f60b5 100644 --- a/src/Statistics/CatalogAdaptor.cpp +++ b/src/Statistics/CatalogAdaptor.cpp @@ -130,20 +130,7 @@ std::optional CatalogAdaptor::queryRowCount(const StatsTableIdentifier & const auto * cnch_merge_tree = dynamic_cast(storage.get()); if (!cnch_merge_tree) return std::nullopt; - - auto sql = fmt::format( - FMT_STRING("select sum(rows) from system.cnch_parts where database='{}' and table = '{}'"), - getOriginalDatabaseName(table_id.getDatabaseName()), - table_id.getTableName()); - auto helper = SubqueryHelper::create(context, sql); - Block block = getOnlyRowFrom(helper); - if (block.columns() != 1) - { - throw Exception("wrong column", ErrorCodes::LOGICAL_ERROR); - } - auto col = block.getColumns().at(0); - auto row_count = col->getInt(0); - return row_count; + return cnch_merge_tree->totalRows(context); } ColumnDescVector CatalogAdaptor::getAllCollectableColumns(const StatsTableIdentifier & identifier) diff --git a/src/Statistics/CatalogAdaptor.h b/src/Statistics/CatalogAdaptor.h index b9ae6968850..ce655b13d07 100644 --- a/src/Statistics/CatalogAdaptor.h +++ b/src/Statistics/CatalogAdaptor.h @@ -45,7 +45,7 @@ class CatalogAdaptor virtual void invalidateAllServerStatsCache() = 0; // fast way to query row count - std::optional queryRowCount(const StatsTableIdentifier & top_storage); + std::optional queryRowCount(const StatsTableIdentifier & table_id); virtual std::vector getAllTablesID(const String & database_name) = 0; virtual std::optional getTableIdByName(const String & database_name, const String & table) = 0; From 29b89c82faa517f863eafa3fd258590a0b817d4a Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:53:14 +0000 Subject: [PATCH 058/292] Merge 'cnch_2.2_fix_ivt_idx_mvcc' into 'cnch-2.2' fix(clickhousech@m-4655966539): Fix inverted index with mvcc part See merge request: !22798 --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 60d9e18f73a..c643d32352f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1885,7 +1885,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( std::unique_ptr gin_part_helper = nullptr; if (part->getType() == IMergeTreeDataPart::Type::CNCH) { - gin_part_helper = std::make_unique(part, + /// Need to follow the part chain and find the right part with this index + String index_version_file_name = index_helper->getFileName() + ".idx"; + gin_part_helper = std::make_unique( + part->getMvccDataPart(index_version_file_name), DiskCacheFactory::instance().get(DiskCacheType::MergeTree)); } else From 529cfb167343e22f73c1e88b8132783df113d805 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:53:32 +0000 Subject: [PATCH 059/292] Merge 'filter22' into 'cnch-2.2' fix(clickhousech@m-4675069041): use reference in resource filtering See merge request: !22902 --- src/CloudServices/CnchServerResource.cpp | 2 +- src/CloudServices/CnchServerResource.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CloudServices/CnchServerResource.cpp b/src/CloudServices/CnchServerResource.cpp index 11850960f12..9bd1c3800ed 100644 --- a/src/CloudServices/CnchServerResource.cpp +++ b/src/CloudServices/CnchServerResource.cpp @@ -123,7 +123,7 @@ void AssignedResource::addDataParts(const FileDataPartsCNCHVector & parts) } } -void ResourceStageInfo::filterResource(std::optional resource_option) +void ResourceStageInfo::filterResource(std::optional & resource_option) { if (resource_option) { diff --git a/src/CloudServices/CnchServerResource.h b/src/CloudServices/CnchServerResource.h index 0fb908cd133..5285a8d1ea1 100644 --- a/src/CloudServices/CnchServerResource.h +++ b/src/CloudServices/CnchServerResource.h @@ -110,7 +110,7 @@ struct ResourceOption struct ResourceStageInfo { std::unordered_set sent_resource; - void filterResource(std::optional resource_option); + void filterResource(std::optional & resource_option); }; class CnchServerResource { From c6ced0e0bf25642e24b519fd89951443fc459eeb Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:55:04 +0000 Subject: [PATCH 060/292] Merge 'cherry-pick-54c9d575' into 'cnch-2.2' feat(clickhousech@m-3030898703): [cp 2.2] add delete_rows in system.cnch_parts See merge request: !22931 # Conflicts: # src/MergeTreeCommon/MergeTreeMetaBase.h --- src/Catalog/DataModelPartWrapper.cpp | 13 ++- src/Catalog/DataModelPartWrapper.h | 2 +- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 101 ++++++++++++------ src/MergeTreeCommon/MergeTreeMetaBase.h | 3 +- .../System/StorageSystemCnchParts.cpp | 23 ++-- .../4_cnch_stateless/01001_alter_delete.sql | 2 + .../01001_trivial_count_cnch_parts.reference | 2 + .../01001_trivial_count_cnch_parts.sql | 18 ++++ ...0117_uniquekey_test_bucket_table.reference | 16 +-- 9 files changed, 131 insertions(+), 49 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference create mode 100644 tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql diff --git a/src/Catalog/DataModelPartWrapper.cpp b/src/Catalog/DataModelPartWrapper.cpp index 86315896629..91ef3610a73 100644 --- a/src/Catalog/DataModelPartWrapper.cpp +++ b/src/Catalog/DataModelPartWrapper.cpp @@ -196,14 +196,23 @@ void ServerDataPart::setVirtualPartSize(const UInt64 & vp_size) const { virtual_ UInt64 ServerDataPart::getVirtualPartSize() const { return virtual_part_size; } -UInt64 ServerDataPart::deletedRowsCount(const MergeTreeMetaBase & storage) const +UInt64 ServerDataPart::deletedRowsCount(const MergeTreeMetaBase & storage, bool ignore_error) const { UInt64 res = 0; /// For unique table, deletedRowsCount is calculated from delete_bitmap. if (storage.getInMemoryMetadataPtr()->hasUniqueKey()) { if (delete_bitmap_metas.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Delete bitmap meta for part {} is empty whose engine is unique table, it's a bug!", name()); + { + if (ignore_error) + { + LOG_DEBUG(storage.getLogger(), "Delete bitmap meta for part {} is empty whose engine is unique table, it's a bug!", name()); + return 0; + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Delete bitmap meta for part {} is empty whose engine is unique table, it's a bug!", name()); + + } for (const auto & delete_bitmap_meta: delete_bitmap_metas) res += delete_bitmap_meta->cardinality(); diff --git a/src/Catalog/DataModelPartWrapper.h b/src/Catalog/DataModelPartWrapper.h index 72fb4420246..ea20a1b5e06 100644 --- a/src/Catalog/DataModelPartWrapper.h +++ b/src/Catalog/DataModelPartWrapper.h @@ -94,7 +94,7 @@ class ServerDataPart : public std::enable_shared_from_this, publ mutable std::forward_list delete_bitmap_metas; - UInt64 deletedRowsCount(const MergeTreeMetaBase & storage) const; + UInt64 deletedRowsCount(const MergeTreeMetaBase & storage, bool ignore_error = false) const; const ImmutableDeleteBitmapPtr & getDeleteBitmap(const MergeTreeMetaBase & storage, bool is_unique_new_part) const; diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index 262fbd69d8f..744f6c14b95 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -2290,7 +2290,7 @@ Strings MergeTreeMetaBase::selectPartitionsByPredicate( return res_partitions; } -void MergeTreeMetaBase::getDeleteBitmapMetaForServerParts(const ServerDataPartsVector & parts, DeleteBitmapMetaPtrVector & all_bitmaps) const +void MergeTreeMetaBase::getDeleteBitmapMetaForServerParts(const ServerDataPartsVector & parts, DeleteBitmapMetaPtrVector & all_bitmaps, bool force_found) const { DeleteBitmapMetaPtrVector bitmaps; CnchPartsHelper::calcVisibleDeleteBitmaps(all_bitmaps, bitmaps); @@ -2299,47 +2299,86 @@ void MergeTreeMetaBase::getDeleteBitmapMetaForServerParts(const ServerDataPartsV auto bitmap_it = bitmaps.begin(); for (const auto & part : parts) { - /// search for the first bitmap - while (bitmap_it != bitmaps.end() && !(*bitmap_it)->sameBlock(part->info())) - bitmap_it++; - - if (bitmap_it == bitmaps.end()) + if (force_found) { - if (auto unique_table_log = getContext()->getCloudUniqueTableLog()) + /// search for the first bitmap + while (bitmap_it != bitmaps.end() && !(*bitmap_it)->sameBlock(part->info())) + bitmap_it++; + + if (bitmap_it == bitmaps.end()) { - auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, getCnchStorageID()); - current_log.metric = ErrorCodes::LOGICAL_ERROR; - current_log.event_msg = "Delete bitmap metadata of " + part->name() + " is not found"; - unique_table_log->add(current_log); + if (auto unique_table_log = getContext()->getCloudUniqueTableLog()) + { + auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, getCnchStorageID()); + current_log.metric = ErrorCodes::LOGICAL_ERROR; + current_log.event_msg = "Delete bitmap metadata of " + part->name() + " is not found"; + unique_table_log->add(current_log); + } + throw Exception("Delete bitmap metadata of " + part->name() + " is not found", ErrorCodes::LOGICAL_ERROR); } - throw Exception("Delete bitmap metadata of " + part->name() + " is not found", ErrorCodes::LOGICAL_ERROR); - } - /// add all visible bitmaps (from new to old) part - bool found_base = false; - auto list_it = part->delete_bitmap_metas.before_begin(); - for (auto bitmap_meta = *bitmap_it; bitmap_meta; bitmap_meta = bitmap_meta->tryGetPrevious()) - { - list_it = part->delete_bitmap_metas.insert_after(list_it, bitmap_meta->getModel()); - if (bitmap_meta->getType() == DeleteBitmapMetaType::Base) + /// add all visible bitmaps (from new to old) part + bool found_base = false; + auto list_it = part->delete_bitmap_metas.before_begin(); + for (auto bitmap_meta = *bitmap_it; bitmap_meta; bitmap_meta = bitmap_meta->tryGetPrevious()) { - found_base = true; - break; + list_it = part->delete_bitmap_metas.insert_after(list_it, bitmap_meta->getModel()); + if (bitmap_meta->getType() == DeleteBitmapMetaType::Base) + { + found_base = true; + break; + } } + if (!found_base) + { + if (auto unique_table_log = getContext()->getCloudUniqueTableLog()) + { + auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, getCnchStorageID()); + current_log.metric = ErrorCodes::LOGICAL_ERROR; + current_log.event_msg = "Base delete bitmap of " + part->name() + " is not found"; + unique_table_log->add(current_log); + } + throw Exception("Base delete bitmap of " + part->name() + " is not found", ErrorCodes::LOGICAL_ERROR); + } + + bitmap_it++; } - if (!found_base) + else { - if (auto unique_table_log = getContext()->getCloudUniqueTableLog()) + while (bitmap_it != bitmaps.end() && (*(*bitmap_it)) <= part->info()) { - auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, getCnchStorageID()); - current_log.metric = ErrorCodes::LOGICAL_ERROR; - current_log.event_msg = "Base delete bitmap of " + part->name() + " is not found"; - unique_table_log->add(current_log); + if (!(*bitmap_it)->sameBlock(part->info())) + bitmap_it++; + else + { + /// add all visible bitmaps (from new to old) part + bool found_base = false; + auto list_it = part->delete_bitmap_metas.before_begin(); + for (auto bitmap_meta = *bitmap_it; bitmap_meta; bitmap_meta = bitmap_meta->tryGetPrevious()) + { + list_it = part->delete_bitmap_metas.insert_after(list_it, bitmap_meta->getModel()); + if (bitmap_meta->getType() == DeleteBitmapMetaType::Base) + { + found_base = true; + break; + } + } + if (!found_base) + { + if (auto unique_table_log = getContext()->getCloudUniqueTableLog()) + { + auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, getCnchStorageID()); + current_log.metric = ErrorCodes::LOGICAL_ERROR; + current_log.event_msg = "Base delete bitmap of " + part->name() + " is not found"; + unique_table_log->add(current_log); + } + throw Exception("Base delete bitmap of " + part->name() + " is not found", ErrorCodes::LOGICAL_ERROR); + } + bitmap_it++; + } } - throw Exception("Base delete bitmap of " + part->name() + " is not found", ErrorCodes::LOGICAL_ERROR); } - - bitmap_it++; + } } diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h index f946aecb0d1..bc98bb9a290 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.h +++ b/src/MergeTreeCommon/MergeTreeMetaBase.h @@ -463,7 +463,8 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /** * @param parts input parts, must be sorted in PartComparator order */ - void getDeleteBitmapMetaForServerParts(const ServerDataPartsVector & parts, DeleteBitmapMetaPtrVector & delete_bitmap_metas) const; + void getDeleteBitmapMetaForServerParts(const ServerDataPartsVector & parts, DeleteBitmapMetaPtrVector & delete_bitmap_metas, bool force_found = true) const; + void getDeleteBitmapMetaForCnchParts(MutableMergeTreeDataPartsCNCHVector & parts, DeleteBitmapMetaPtrVector & delete_bitmap_metas, bool force_found = true); void getDeleteBitmapMetaForCnchParts(const MergeTreeDataPartsCNCHVector & parts, DeleteBitmapMetaPtrVector & delete_bitmap_metas, bool force_found = true); void getDeleteBitmapMetaForParts(IMergeTreeDataPartsVector & parts, DeleteBitmapMetaPtrVector & delete_bitmap_metas, bool force_found = true); void getDeleteBitmapMetaForStagedParts(const MergeTreeDataPartsCNCHVector & parts, ContextPtr context, TxnTimestamp start_time); diff --git a/src/Storages/System/StorageSystemCnchParts.cpp b/src/Storages/System/StorageSystemCnchParts.cpp index 45bc8883015..c788363996c 100644 --- a/src/Storages/System/StorageSystemCnchParts.cpp +++ b/src/Storages/System/StorageSystemCnchParts.cpp @@ -60,6 +60,7 @@ NamesAndTypesList StorageSystemCnchParts::getNamesAndTypes() {"name", std::make_shared()}, {"bytes_on_disk", std::make_shared()}, {"rows_count", std::make_shared()}, + {"delete_rows", std::make_shared()}, {"columns", std::make_shared()}, {"marks_count", std::make_shared()}, {"index_granularity", std::make_shared(std::make_shared())}, @@ -203,15 +204,23 @@ void StorageSystemCnchParts::fillData(MutableColumns & res_columns, ContextPtr c } /// use committed visibility to include dropped parts (and exclude intermediates) in system table - auto all_parts = enable_filter_by_partition - ? cnch_catalog->getServerDataPartsInPartitions(table, {only_selected_partition_id}, start_time, nullptr, Catalog::VisibilityLevel::Committed) - : cnch_catalog->getAllServerDataParts(table, start_time, nullptr, Catalog::VisibilityLevel::Committed); - - const FormatSettings format_settings; + auto [all_parts, all_bitmaps] = enable_filter_by_partition + ? cnch_catalog->getServerDataPartsInPartitionsWithDBM(table, {only_selected_partition_id}, start_time, nullptr, Catalog::VisibilityLevel::Committed) + : cnch_catalog->getAllServerDataPartsWithDBM(table, start_time, nullptr, Catalog::VisibilityLevel::Committed); ServerDataPartsVector visible_parts; CnchPartsHelper::calcPartsForGC(all_parts, nullptr, &visible_parts); + if (visible_parts.empty()) + continue; + + if (cnch_merge_tree->getInMemoryMetadataPtr()->hasUniqueKey()) + { + cnch_merge_tree->getDeleteBitmapMetaForServerParts(visible_parts, all_bitmaps, /*force_found*/false); + } + + const FormatSettings format_settings; + for (auto & part : visible_parts) { bool latest_in_mvcc = true; @@ -229,7 +238,9 @@ void StorageSystemCnchParts::fillData(MutableColumns & res_columns, ContextPtr c } res_columns[col_num++]->insert(curr_part->name()); res_columns[col_num++]->insert(curr_part->part_model().size()); - res_columns[col_num++]->insert(curr_part->part_model().rows_count()); + auto delete_rows = curr_part->deletedRowsCount(*cnch_merge_tree, /*ignore_error*/true); + res_columns[col_num++]->insert(curr_part->part_model().rows_count() - delete_rows); + res_columns[col_num++]->insert(delete_rows); res_columns[col_num++]->insert(curr_part->part_model().columns()); res_columns[col_num++]->insert(curr_part->part_model().marks_count()); Array index_granularity; diff --git a/tests/queries/4_cnch_stateless/01001_alter_delete.sql b/tests/queries/4_cnch_stateless/01001_alter_delete.sql index a57209a3812..db0fd632a7c 100644 --- a/tests/queries/4_cnch_stateless/01001_alter_delete.sql +++ b/tests/queries/4_cnch_stateless/01001_alter_delete.sql @@ -72,6 +72,7 @@ CREATE TABLE wrong_column_row_exists(k Int32, _row_exists Int32) ENGINE = CnchMe SELECT '----- TRIVIAL COUNT AFTER DELETING DATA -----'; CREATE TABLE t_delete_and_trivial_count(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY d ORDER BY k; +SYSTEM START MERGES t_delete_and_trivial_count; INSERT INTO t_delete_and_trivial_count SELECT '2024-01-01', number, number FROM numbers(5); INSERT INTO t_delete_and_trivial_count SELECT '2024-01-02', number, number FROM numbers(5); ALTER TABLE t_delete_and_trivial_count DELETE WHERE m < 3; @@ -82,6 +83,7 @@ SELECT count() FROM t_delete_and_trivial_count WHERE _partition_id = '20240102' DROP TABLE t_delete_and_trivial_count CREATE TABLE t_delete_and_trivial_count_u(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY d ORDER BY k UNIQUE KEY k; +SYSTEM START MERGES t_delete_and_trivial_count_u; INSERT INTO t_delete_and_trivial_count_u SELECT '2024-01-01', number, number FROM numbers(5); INSERT INTO t_delete_and_trivial_count_u SELECT '2024-01-02', number, number FROM numbers(5); ALTER TABLE t_delete_and_trivial_count_u DELETE WHERE m < 3; -- { serverError 36 } diff --git a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference new file mode 100644 index 00000000000..f6705ccafc4 --- /dev/null +++ b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference @@ -0,0 +1,2 @@ +2 3 +2 3 diff --git a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql new file mode 100644 index 00000000000..4c9af0ffb0f --- /dev/null +++ b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql @@ -0,0 +1,18 @@ +set mutations_sync = 1; +DROP TABLE IF EXISTS t_delete_rows; +DROP TABLE IF EXISTS t_delete_rows_u; + +CREATE TABLE t_delete_rows(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY d ORDER BY k; +SYSTEM START MERGES t_delete_rows; +INSERT INTO t_delete_rows SELECT '2024-01-01', number, number FROM numbers(5); +ALTER TABLE t_delete_rows DELETE WHERE m < 3; +SELECT rows, delete_rows FROM system.cnch_parts WHERE database = currentDatabase() AND table = 't_delete_rows' AND active SETTINGS enable_multiple_tables_for_cnch_parts = 1; + +CREATE TABLE t_delete_rows_u(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY d ORDER BY k UNIQUE KEY k; +SYSTEM START MERGES t_delete_rows_u; +INSERT INTO t_delete_rows_u SELECT '2024-01-01', number, number FROM numbers(5); +DELETE FROM t_delete_rows_u WHERE m < 3; +SELECT sum(rows), sum(delete_rows) FROM system.cnch_parts WHERE database = currentDatabase() AND table = 't_delete_rows' AND active SETTINGS enable_multiple_tables_for_cnch_parts = 1; + +DROP TABLE t_delete_rows; +DROP TABLE t_delete_rows_u; diff --git a/tests/queries/4_cnch_stateless/10117_uniquekey_test_bucket_table.reference b/tests/queries/4_cnch_stateless/10117_uniquekey_test_bucket_table.reference index 8154684045f..ac491bafcfa 100644 --- a/tests/queries/4_cnch_stateless/10117_uniquekey_test_bucket_table.reference +++ b/tests/queries/4_cnch_stateless/10117_uniquekey_test_bucket_table.reference @@ -17,8 +17,8 @@ ALTER MODIFY CLUSTER KEY DEFINITION 2023-06-26 2 2a 2023-06-26 3 3b 2023-06-26 4 4a -0 3 264443531437019481 -0 3 264443531437019481 +0 0 264443531437019481 +0 2 264443531437019481 0 1 6841829813474502258 2 2 6841829813474502258 AFTER MODIFY CLUSTER KEY, test insert one row @@ -52,8 +52,8 @@ ALTER MODIFY CLUSTER KEY DEFINITION 2023-06-26 2 2a 2023-06-26 3 3b 2023-06-26 4 4a -0 3 7626063805666248791 -0 3 7626063805666248791 +0 0 7626063805666248791 +0 2 7626063805666248791 0 2 10103628939067059924 2 1 10103628939067059924 DROP bucket table definition, INSERT, ensure bucket number of new part is -1, ban recluster commands @@ -95,8 +95,8 @@ ALTER MODIFY CLUSTER KEY DEFINITION 2023-06-26 2 2a 2023-06-26 3 3b 2023-06-26 4 4a -0 3 264443531437019481 -0 3 264443531437019481 +0 0 264443531437019481 +0 2 264443531437019481 0 1 6841829813474502258 2 2 6841829813474502258 AFTER MODIFY CLUSTER KEY, test insert one row @@ -131,8 +131,8 @@ ALTER MODIFY CLUSTER KEY DEFINITION 2023-06-26 2 2a 2023-06-26 3 3b 2023-06-26 4 4a -0 3 7626063805666248791 -0 3 7626063805666248791 +0 0 7626063805666248791 +0 2 7626063805666248791 0 2 10103628939067059924 2 1 10103628939067059924 DROP bucket table definition, INSERT, ensure bucket number of new part is -1, ban recluster commands From ac491adbb9d8b385d01d2358d17e83a2de9b35f0 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:57:10 +0000 Subject: [PATCH 061/292] Merge 'cherry-pick-mr-22867' into 'cnch-2.2' feat(clickhousech@m-4674646055): add hasTokens function and standard tokenizer for inverted index See merge request: !22924 # Conflicts: # src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp # src/Storages/MergeTree/MergeTreeIndices.h --- src/Core/Settings.h | 1 + src/Functions/HasTokenImpl.h | 9 +- src/Functions/hasToken.cpp | 9 +- src/Interpreters/GinFilter.cpp | 10 ++ src/Interpreters/GinFilter.h | 3 + src/Interpreters/ITokenExtractor.cpp | 112 +++++++++++++++++ src/Interpreters/ITokenExtractor.h | 11 ++ .../tests/gtest_standard_token.cpp | 118 ++++++++++++++++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 + .../MergeTree/MergeTreeIndexInverted.cpp | 41 +++++- .../MergeTree/MergeTreeIndexInverted.h | 2 + src/Storages/MergeTree/MergeTreeIndices.h | 2 + ...3_has_tokens_with_inverted_index.reference | 58 +++++++++ .../53013_has_tokens_with_inverted_index.sql | 100 +++++++++++++++ .../53014_inv_standard_token.reference | 7 ++ .../53014_inv_standard_token.sql | 21 ++++ 16 files changed, 500 insertions(+), 8 deletions(-) create mode 100644 src/Interpreters/tests/gtest_standard_token.cpp create mode 100644 tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.reference create mode 100644 tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.sql create mode 100644 tests/queries/4_cnch_stateless/53014_inv_standard_token.reference create mode 100644 tests/queries/4_cnch_stateless/53014_inv_standard_token.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fba93a9e103..7bceba3025b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1825,6 +1825,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, force_manipulate_materialized_mysql_table, false, "For tables of materialized mysql engine, force to manipulate it.", 0) \ M(Bool, throw_exception_when_mysql_connection_failed, false, "For mysql database engine, whether throw exception when mysql connection failed. If it is set to true, clickhouse may shutdown during restarting due to mysql connection failure", 0) \ /** for inverted index*/ \ + M(Bool, enable_inverted_index, true, "Enable inverted index", 0) \ M(UInt64, skip_inverted_index_term_size, 512, "If term size bigger than size, do not filter with inverted index", 0) \ M(Bool, disable_str_to_array_cast, false, "disable String to Array(XXX) CAST", 0) \ /** materialized view async refresh related settings */ \ diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index ac5303e361e..498cb73dcbd 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -18,7 +18,7 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; @@ -47,7 +47,8 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (const auto has_separator = std::any_of(pattern.cbegin(), pattern.cend(), isTokenSeparator); has_separator || pattern.empty()) + if (const auto has_separator = std::any_of(pattern.cbegin(), pattern.cend(), isTokenSeparator); + (has_separator && !enable_separator_inside) || pattern.empty()) { if (res_null) { @@ -74,8 +75,8 @@ struct HasTokenImpl while (pos < end && end != (pos = searcher.search(pos, end - pos))) { /// The found substring is a token - if ((pos == begin || isTokenSeparator(pos[-1])) - && (pos + pattern_size == end || isTokenSeparator(pos[pattern_size]))) + if (enable_separator_inside + || ((pos == begin || isTokenSeparator(pos[-1])) && (pos + pattern_size == end || isTokenSeparator(pos[pattern_size])))) { /// Let's determine which index it refers to. while (begin + haystack_offsets[i] <= pos) diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index 2ef4fffb3a9..77fded692ae 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -12,6 +12,11 @@ struct NameHasToken static constexpr auto name = "hasToken"; }; +struct NameHasTokens +{ + static constexpr auto name = "hasTokens"; +}; + struct NameHasTokenOrNull { static constexpr auto name = "hasTokenOrNull"; @@ -19,13 +24,15 @@ struct NameHasTokenOrNull using FunctionHasToken = FunctionsStringSearch>; +using FunctionHasTokens + = FunctionsStringSearch>; using FunctionHasTokenOrNull = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasToken) { factory.registerFunction(FunctionFactory::CaseSensitive); - + factory.registerFunction(FunctionFactory::CaseSensitive); factory.registerFunction(FunctionFactory::CaseSensitive); } diff --git a/src/Interpreters/GinFilter.cpp b/src/Interpreters/GinFilter.cpp index 238f8b8c650..18b066504b0 100644 --- a/src/Interpreters/GinFilter.cpp +++ b/src/Interpreters/GinFilter.cpp @@ -200,4 +200,14 @@ bool GinFilter::match(const GinPostingsCache & postings_cache , roaring::Roaring return false; } +String GinFilter::getTermsInString() const +{ + String result; + for (const String & term : terms) + { + result += " " + term; + } + return result; +} + } diff --git a/src/Interpreters/GinFilter.h b/src/Interpreters/GinFilter.h index 128cddbb5fc..6a5261690fd 100644 --- a/src/Interpreters/GinFilter.h +++ b/src/Interpreters/GinFilter.h @@ -78,6 +78,9 @@ class GinFilter void filpWithRange(roaring::Roaring & result) const; size_t getAllRangeSize() const; + // for log trace + String getTermsInString() const; + private: /// Filter parameters [[__maybe_unused__]] const GinFilterParameters & params; diff --git a/src/Interpreters/ITokenExtractor.cpp b/src/Interpreters/ITokenExtractor.cpp index 26355996f37..07986cc8a2c 100644 --- a/src/Interpreters/ITokenExtractor.cpp +++ b/src/Interpreters/ITokenExtractor.cpp @@ -240,4 +240,116 @@ bool SplitTokenExtractor::nextInStringLike(const char * data, size_t length, siz return !bad_token && !token.empty(); } +bool StandardTokenExtractor::nextInString( + const char * data, size_t length, size_t * __restrict pos, size_t * __restrict token_start, size_t * __restrict token_length) const +{ + *token_start = *pos; + *token_length = 0; + + while (*pos < length) + { + if (isASCII(data[*pos])) + { + if (isAlphaNumericASCII(data[*pos])) + { + // if is Alpha or Numeric just continue + ++*pos; + ++*token_length; + continue; + } + + /// Finish current token if have any + if (*token_length > 0) + { + return true; + } + else + { + // skip current and split continue + *token_start = ++*pos; + } + } + else // UTF-8 case + { + // Finish current token if have any + if (*token_length > 0) + return true; + + // get length and return + const size_t sz = UTF8::seqLength(static_cast(data[*pos])); + (*pos)+=sz; + (*token_length)+=sz; + // submit utf-8 token + if(*token_length > 0) + { + return true; + } + + } + } + + return *token_length > 0; +} + +bool StandardTokenExtractor::nextInStringLike( + const char * data, size_t length, size_t * pos, String & out) const +{ + out.clear(); + bool bad_token = false; // % or _ before token + bool escaped = false; + + while (*pos < length) + { + if (!escaped && (data[*pos] == '%' || data[*pos] == '_')) + { + out.clear(); + bad_token = true; + ++*pos; + } + else if (!escaped && data[*pos] == '\\') + { + escaped = true; + ++*pos; + } + else if (isASCII(data[*pos])) + { + if(isAlphaNumericASCII(data[*pos])) + { + out += data[*pos]; + ++*pos; + escaped = false; + continue; + } + + if (!bad_token && !out.empty()) + return true; + + out.clear(); + bad_token = false; + escaped = false; + ++*pos; + } + else + { + // before cut utf-8 submit token if has any + if (!bad_token && !out.empty()) + return true; + + out.clear(); + bad_token = false; + escaped = false; + + const size_t sz = UTF8::seqLength(static_cast(data[*pos])); + + out.append((data + *pos), sz); + (*pos) += sz; + + // submit token after cut utf-8 + if (!out.empty()) + return true; + } + } + return !bad_token && !out.empty(); +} + } diff --git a/src/Interpreters/ITokenExtractor.h b/src/Interpreters/ITokenExtractor.h index 7b335e02d16..3e12e85c946 100644 --- a/src/Interpreters/ITokenExtractor.h +++ b/src/Interpreters/ITokenExtractor.h @@ -104,7 +104,18 @@ struct SplitTokenExtractor final : public ITokenExtractor }; +class StandardTokenExtractor final : public ITokenExtractor +{ +public: + static const char * getName() { return "standard"; } + + bool nextInString(const char * data, size_t length, size_t * __restrict pos, size_t * __restrict token_start, size_t * __restrict token_length) const override; + + bool nextInStringLike(const char * data, size_t length, size_t * pos, String & out) const override; +}; + } + diff --git a/src/Interpreters/tests/gtest_standard_token.cpp b/src/Interpreters/tests/gtest_standard_token.cpp new file mode 100644 index 00000000000..81e578c9d2f --- /dev/null +++ b/src/Interpreters/tests/gtest_standard_token.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +using namespace DB; + + +TEST(TokenExtractor, StandardToken) +{ + size_t pos = 0; + StandardTokenExtractor tokenizer; + + size_t token_start = 0; + size_t token_length = 0; + + + size_t index = 0; + std::string test_str_1 = "ByConity是分布式的云原生SQL数仓引擎"; + std::vector test_token_1 = {"ByConity","是","分","布","式","的","云","原","生","SQL","数","仓","引","擎"}; + while(tokenizer.nextInString( + test_str_1.data(), test_str_1.length(), &pos, &token_start, &token_length)) + { + auto tmp_str = std::string(test_str_1.data()+token_start, token_length); + ASSERT_EQ(tmp_str, test_token_1[index]); + ++index; + } + + pos = 0; + index = 0; + token_start = 0; + token_length = 0; + std::string test_str_2 = "StandardToken:分词器,可以跳过ASCII符号.,/!@#$@()-空格等并整块切分english token,123456789和单个切分中文"; + std::vector test_token_2 = { + "StandardToken","分","词","器","可","以","跳","过","ASCII", + "符","号","空","格","等","并","整","块","切","分", + "english","token","123456789","和","单","个","切","分","中","文" + }; + + while(tokenizer.nextInString( + test_str_2.data(), test_str_2.length(), &pos, &token_start, &token_length)) + { + auto tmp_str = std::string(test_str_2.data()+token_start, token_length); + ASSERT_EQ(tmp_str, test_token_2[index]); + ++index; + } +} + +TEST(TokenExtractor, StandardTokenLike) +{ + size_t pos = 0; + StandardTokenExtractor tokenizer; + std::string tmp_token; + size_t index = 0; + + pos = 0; + std::string test_str_1 = "%NOTOKEN%"; + while(tokenizer.nextInStringLike(test_str_1.data(), test_str_1.length(), &pos, tmp_token)) + { + if(!tmp_token.empty()) + { + throw std::runtime_error("should no token here"); + } + } + + pos = 0; + std::string test_str_2 = "%NOTOKEN"; + while(tokenizer.nextInStringLike(test_str_2.data(), test_str_2.length(), &pos, tmp_token)) + { + if(!tmp_token.empty()) + { + throw std::runtime_error("should no token here"); + } + } + + pos = 0; + std::string test_str_3 = "NOTOKEN%"; + while(tokenizer.nextInStringLike(test_str_3.data(), test_str_3.length(), &pos, tmp_token)) + { + if(!tmp_token.empty()) + { + throw std::runtime_error("should no token here"); + } + } + + + pos = 0; + std::string test_str_4 = "NO_TOKEN"; + while(tokenizer.nextInStringLike(test_str_4.data(), test_str_4.length(), &pos, tmp_token)) + { + if(!tmp_token.empty()) + { + throw std::runtime_error("should no token here"); + } + } + + index = 0; + pos = 0; + std::string test_str_5 = "%这里_会有中文token%"; + std::vector test_tokens_5 = {"这","里","会","有","中","文"}; + while(tokenizer.nextInStringLike(test_str_5.data(), test_str_5.length(), &pos, tmp_token)) + { + ASSERT_EQ(tmp_token, test_tokens_5[index]); + index++; + } + + index = 0; + pos = 0; + std::string test_str_6 = "%这里_,english %Token也有%"; + std::vector test_tokens_6 = {"这","里","english","也","有"}; + while(tokenizer.nextInStringLike(test_str_6.data(), test_str_6.length(), &pos, tmp_token)) + { + ASSERT_EQ(tmp_token, test_tokens_6[index]); + index++; + } +} diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c643d32352f..fd30686f040 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -916,6 +916,10 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd { LOG_TRACE(&Poco::Logger::get("filterPartsByPrimaryKeyAndSkipIndexes"),"Creating index {} {}\n", index.name, index.type); auto index_helper = MergeTreeIndexFactory::instance().get(index); + if (!settings.enable_inverted_index && index_helper->isInvertedIndex()) + { + continue; + } auto condition = index_helper->createIndexCondition(query_info, context); if (!condition->alwaysUnknownOrTrue()) useful_indices.emplace_back(index_helper, condition); diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 992a37e013a..378422b7aee 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -56,7 +56,7 @@ bool MergeTreeConditionInverted::createFunctionEqualsCondition( { ChineseTokenExtractor::stringLikeToGinFilter(value, nlp_extractor, *out.gin_filter); } - + return true; } #if USE_TSQUERY @@ -524,7 +524,7 @@ bool MergeTreeConditionInverted::atomFromAST(const ASTPtr & node, Block & block_ } return true; } - else if (func_name == "hasToken") + else if (func_name == "hasToken" || func_name == "hasTokens") { out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; @@ -538,7 +538,11 @@ bool MergeTreeConditionInverted::atomFromAST(const ASTPtr & node, Block & block_ { ChineseTokenExtractor::stringToGinFilter(value, nlp_extractor, *out.gin_filter); } + + LOG_TRACE(&Poco::Logger::get("inverted index"),"search string: {} with token : [ {} ] ", value, out.gin_filter->getTermsInString()); + return true; + } else if (func_name == "startsWith") { @@ -723,8 +727,33 @@ bool MergeTreeIndexInverted::mayBenefitFromIndexForIn(const ASTPtr & node) const != std::cend(index.column_names); } -MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index) +// just a tmp function to fit ce branch, will pick soon +bool parseWithNewInvertedIndexArguments(const FieldVector& arguments_) { + return arguments_.size() == 2 && arguments_[0].getType() == Field::Types::String && arguments_[1].getType() == Field::Types::String; +} + +void checkWithNewInvertedIndexArguments(const FieldVector & arguments_) +{ + String config_type = arguments_[0].get(); + String config_value = arguments_[1].get(); + + if (config_type != StandardTokenExtractor::getName() || config_value != "{}") + { + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown config type {} and value should only {} now", config_type); + } +} + +MergeTreeIndexPtr ginIndexCreator(const IndexDescription & index) +{ + // just a tmp code to fit ce branch, will pick soon + if (parseWithNewInvertedIndexArguments(index.arguments)) + { + GinFilterParameters params(0, 1.0); + auto tokenizer = std::make_unique(); + return std::make_shared(index, params, std::move(tokenizer)); + } + if (index.arguments.size() > 2) { // String type_name = index.arguments[0].get(); use for select nlp @@ -766,6 +795,12 @@ void ginIndexValidator(const IndexDescription & index, bool /*attach*/) throw Exception("Inverted index can be used only with `String`, `FixedString`", ErrorCodes::INCORRECT_QUERY); } + if (parseWithNewInvertedIndexArguments(index.arguments)) + { + checkWithNewInvertedIndexArguments(index.arguments); + return; + } + if (index.arguments.size() > 2) /// NLP tokenizer [type_name , config_name , density] { if (index.arguments.size() > 3) diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h index 0f60f3eb42e..01813c39159 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -212,6 +212,8 @@ class MergeTreeIndexInverted final : public IMergeTreeIndex bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; + bool isInvertedIndex() const override { return true; } + GinFilterParameters params; /// Function for selecting next token. std::unique_ptr token_extractor; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 6ae69a27c6a..8368170a2f5 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -92,6 +92,8 @@ struct IMergeTreeIndex virtual MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, ContextPtr context) const = 0; + virtual bool isInvertedIndex() const { return false; } + Names getColumnsRequiredForIndexCalc() const { return index.expression->getRequiredColumns(); } const IndexDescription & index; diff --git a/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.reference b/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.reference new file mode 100644 index 00000000000..abe3cc02883 --- /dev/null +++ b/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.reference @@ -0,0 +1,58 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +true +----- +0 begin-first-second-thrid-end +----- +2 end-begin +----- +0 begin-first-second-thrid-end +1 begin-second-first-thrid-end +----- +0 begin-first-second-thrid-end +1 begin-second-first-thrid-end +2 end-begin +3 begin-second-thrid-end +false +without index +0 begin-first-second-thrid-end +4 abc你好ABC +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +use index +has value +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +true +----- +0 begin-first-second-thrid-end +----- +2 end-begin +----- +0 begin-first-second-thrid-end +1 begin-second-first-thrid-end +----- +0 begin-first-second-thrid-end +1 begin-second-first-thrid-end +2 end-begin +3 begin-second-thrid-end +false +without index +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +4 abc你好ABC +use index +has value +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +0 begin-first-second-thrid-end +4 abc你好ABC diff --git a/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.sql b/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.sql new file mode 100644 index 00000000000..d119afab41d --- /dev/null +++ b/tests/queries/4_cnch_stateless/53013_has_tokens_with_inverted_index.sql @@ -0,0 +1,100 @@ +-- hasTokens() is same with like '%%' without inverted index filter +-- test hasTokens with true +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','hi-jk'); +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','abcd'); +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','uvwxyz'); +SELECT hasTokens('abcd-efg-hi-jk-lm--n--opqrst-uvwxyz','n'); +SELECT hasTokens('abcd-efg-hi-jk-lm--n--opqrst-uvwxyz','-n-'); +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','-hi-'); +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','xyz'); +SELECT hasTokens('abcd-efg-hi-jk-lm-n-opqrst-uvwxyz','-hi-jk-'); +SELECT hasTokens('abc你好ABC','你好'); +SELECT hasTokens('abc你好ABC','好'); + + +-- test hasTokens with token inverted index + +drop table if exists test_inverted_with_token; + +CREATE TABLE test_inverted_with_token +( + `key` UInt64, + `doc` String, + INDEX doc_idx doc TYPE inverted GRANULARITY 1 +) +ENGINE = CnchMergeTree() +ORDER BY key +settings index_granularity = 1; + +INSERT INTO test_inverted_with_token VALUES (0, 'begin-first-second-thrid-end'), (1, 'begin-second-first-thrid-end'), (2, 'end-begin'), (3, 'begin-second-thrid-end'), (4, 'abc你好ABC'); + +-- 1 +select 'true'; +select '-----'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'first-second-thrid'); +select '-----'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'end-begin'); +select '-----'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'first'); +select '-----'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'begin'); + +-- 0 +select 'false'; +select 'without index'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'st-second-th') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, '你好') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'begin-first-') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, '-first-second-') settings enable_inverted_index = 0; +select 'use index'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'st-second-th'); +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, '你好'); +select 'has value'; +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, 'begin-first-'); +SELECT * FROM test_inverted_with_token WHERE hasTokens(doc, '-first-second-'); + + +drop table if exists test_inverted_with_token; + +drop table if exists test_inverted_with_ngram; + +-- test hasTokens with ngram inverted index +CREATE TABLE test_inverted_with_ngram +( + `key` UInt64, + `doc` String, + INDEX doc_idx doc TYPE inverted(2) GRANULARITY 1 +) +ENGINE = CnchMergeTree() +ORDER BY key +settings index_granularity = 1; + +INSERT INTO test_inverted_with_ngram VALUES (0, 'begin-first-second-thrid-end'), (1, 'begin-second-first-thrid-end'), (2, 'end-begin'), (3, 'begin-second-thrid-end'), (4, 'abc你好ABC'); + +-- 1 +select 'true'; +select '-----'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'first-second-thrid'); +select '-----'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'end-begin'); +select '-----'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'first'); +select '-----'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'begin'); + +-- 0 +select 'false'; +select 'without index'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'st-second-th') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'begin-first-') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, '-first-second-') settings enable_inverted_index = 0; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, '你好') settings enable_inverted_index = 0; +select 'use index'; +select 'has value'; +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'st-second-th'); +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, 'begin-first-'); +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, '-first-second-'); +SELECT * FROM test_inverted_with_ngram WHERE hasTokens(doc, '你好'); + +drop table if exists test_inverted_with_ngram; + diff --git a/tests/queries/4_cnch_stateless/53014_inv_standard_token.reference b/tests/queries/4_cnch_stateless/53014_inv_standard_token.reference new file mode 100644 index 00000000000..cda121730c0 --- /dev/null +++ b/tests/queries/4_cnch_stateless/53014_inv_standard_token.reference @@ -0,0 +1,7 @@ +1 擅长交互式查询和即席查询 +2 具有支持多表关联复杂查询 +1 擅长交互式查询和即席查询 +2 具有支持多表关联复杂查询 +with disable has result +0 ByConity 是分布式的云原生SQL数仓引擎 +with enable not has result diff --git a/tests/queries/4_cnch_stateless/53014_inv_standard_token.sql b/tests/queries/4_cnch_stateless/53014_inv_standard_token.sql new file mode 100644 index 00000000000..24be41b16d5 --- /dev/null +++ b/tests/queries/4_cnch_stateless/53014_inv_standard_token.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS test_inverted_with_standard; + +CREATE TABLE test_inverted_with_standard +( + `key` UInt64, + `doc` String, + INDEX inv_stand `doc` TYPE inverted('standard', '{}') GRANULARITY 1 +) +ENGINE = CnchMergeTree() +ORDER BY key SETTINGS index_granularity = 1; + +INSERT INTO test_inverted_with_standard VALUES (0, 'ByConity 是分布式的云原生SQL数仓引擎'), (1, '擅长交互式查询和即席查询'), (2, '具有支持多表关联复杂查询'), (3, '集群扩容无感'), (4, '离线批数据和实时数据流统一汇总等特点。'); + +select * from test_inverted_with_standard where doc like '%查询%'; +select * from test_inverted_with_standard where hasTokens(doc, '查询'); +select 'with disable has result'; +select * from test_inverted_with_standard where hasTokens(doc, 'Con') SETTINGS enable_inverted_index = 0; +select 'with enable not has result'; +select * from test_inverted_with_standard where hasTokens(doc, 'Con'); + +DROP TABLE IF EXISTS test_inverted_with_standard; \ No newline at end of file From 02558d671088504312fb21661f53c6c39e02faf6 Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:57:47 +0000 Subject: [PATCH 062/292] Merge 'explain_ignore_format_cnch2.2' into 'cnch-2.2' fix(optimizer@m-4675314011): explain metadata output OutputDescriptions See merge request: !22920 --- src/Analyzers/Analysis.cpp | 8 +++++++ src/Analyzers/Analysis.h | 1 + src/Interpreters/ExplainSettings.h | 4 +++- src/Interpreters/InterpreterExplainQuery.cpp | 1 + .../InterpreterSelectQueryUseOptimizer.h | 3 +++ src/Parsers/ASTQueryWithOutput.h | 1 + src/Parsers/ParserDumpQuery.cpp | 11 ++++++++++ src/Parsers/ParserExplainQuery.cpp | 4 ++++ src/Parsers/ParserQueryWithOutput.cpp | 12 +++++++---- src/QueryPlan/PlanPrinter.cpp | 13 ++++++++++++ .../48022_explain_metadata.reference | 1 + .../48022_explain_metadata.sql | 1 + .../48045_query_lineage.reference | 21 +++++++++++++++++++ 13 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/Analyzers/Analysis.cpp b/src/Analyzers/Analysis.cpp index 08251361410..13b90e66af3 100644 --- a/src/Analyzers/Analysis.cpp +++ b/src/Analyzers/Analysis.cpp @@ -344,6 +344,14 @@ FieldDescriptions & Analysis::getOutputDescription(IAST & ast) MAP_GET(output_descriptions, &ast); } +bool Analysis::hasOutputDescription(IAST & ast) +{ + if (auto * subquery = ast.as()) + return hasOutputDescription(*subquery->children[0]); + + return output_descriptions.contains(&ast); +} + void Analysis::setRegisteredWindow(ASTSelectQuery & select_query, const String & name, ResolvedWindowPtr & window) { MAP_SET(registered_windows[&select_query], name, window); diff --git a/src/Analyzers/Analysis.h b/src/Analyzers/Analysis.h index 81bfe451182..e47da9f2618 100644 --- a/src/Analyzers/Analysis.h +++ b/src/Analyzers/Analysis.h @@ -401,6 +401,7 @@ struct Analysis std::unordered_map output_descriptions; void setOutputDescription(IAST & ast, const FieldDescriptions & field_descs); FieldDescriptions & getOutputDescription(IAST & ast); + bool hasOutputDescription(IAST & ast); /// Sub column optimization std::unordered_map sub_column_references; diff --git a/src/Interpreters/ExplainSettings.h b/src/Interpreters/ExplainSettings.h index e2740eaec68..94fe5fa4c4d 100644 --- a/src/Interpreters/ExplainSettings.h +++ b/src/Interpreters/ExplainSettings.h @@ -19,6 +19,7 @@ struct QueryMetadataSettings bool lineage = false; bool format_json = false; bool lineage_use_optimizer = false; + bool ignore_format = false; constexpr static char name[] = "METADATA"; @@ -27,7 +28,8 @@ struct QueryMetadataSettings {"json", json}, {"lineage", lineage}, {"lineage_use_optimizer", lineage_use_optimizer}, - {"format_json", format_json} + {"format_json", format_json}, + {"ignore_format", ignore_format} }; }; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 35dc831201e..bc420c84adf 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -810,6 +810,7 @@ BlockInputStreamPtr InterpreterExplainQuery::explainMetaData() { InterpreterSelectQueryUseOptimizer interpreter(query_ptr, contxt, SelectQueryOptions()); interpreter.buildQueryPlan(query_plan, analysis, !metadata_settings.lineage_use_optimizer); + query_ptr = interpreter.getQuery(); } catch (...) { diff --git a/src/Interpreters/InterpreterSelectQueryUseOptimizer.h b/src/Interpreters/InterpreterSelectQueryUseOptimizer.h index f536ab6591a..224765417a6 100644 --- a/src/Interpreters/InterpreterSelectQueryUseOptimizer.h +++ b/src/Interpreters/InterpreterSelectQueryUseOptimizer.h @@ -26,6 +26,7 @@ #include #include #include +#include "Parsers/IAST_fwd.h" namespace Poco { @@ -87,6 +88,8 @@ class InterpreterSelectQueryUseOptimizer : public IInterpreter BlockIO executeCreatePreparedStatementQuery(); bool isCreatePreparedStatement(); + ASTPtr & getQuery() { return query_ptr; } + private: ASTPtr query_ptr; PlanNodePtr sub_plan_ptr; diff --git a/src/Parsers/ASTQueryWithOutput.h b/src/Parsers/ASTQueryWithOutput.h index c60ec79c7e9..f4c33ac8e21 100644 --- a/src/Parsers/ASTQueryWithOutput.h +++ b/src/Parsers/ASTQueryWithOutput.h @@ -39,6 +39,7 @@ class ASTQueryWithOutput : public IAST ASTPtr settings_ast; ASTPtr compression_method; ASTPtr compression_level; + bool ignore_format = false; void formatOutput(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const; diff --git a/src/Parsers/ParserDumpQuery.cpp b/src/Parsers/ParserDumpQuery.cpp index 90f39181a44..b1265bb75fc 100644 --- a/src/Parsers/ParserDumpQuery.cpp +++ b/src/Parsers/ParserDumpQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -48,6 +49,7 @@ bool ParserDumpQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr where_expression; ASTPtr dump_path; ASTPtr settings; + bool ignore_format = false; /// DUMP if (!s_dump.ignore(pos, expected)) @@ -58,6 +60,14 @@ bool ParserDumpQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_settings.parse(pos, settings, expected)) pos = begin; + if (settings) + { + auto & settings_ast = settings->as(); + auto * ignore_format_setting = settings_ast.changes.tryGet("ignore_format"); + if (ignore_format_setting && ignore_format_setting->toString() == "1") + ignore_format = true; + } + /// DDL if (s_ddl.ignore(pos, expected)) { @@ -149,6 +159,7 @@ bool ParserDumpQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) dump_query->setExpression(ASTDumpQuery::Expression::CLUSTER, std::move(cluster_name)); dump_query->setExpression(ASTDumpQuery::Expression::DUMP_PATH, std::move(dump_path)); dump_query->setExpression(ASTDumpQuery::Expression::SETTING, std::move(settings)); + dump_query->ignore_format = ignore_format; if (output_client) { diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index ee5984082ab..033d8a52893 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -110,6 +110,10 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->format = std::make_shared("JSON"); setIdentifierSpecial(explain_query->format); } + + auto * ignore_format = settings_ast.changes.tryGet("ignore_format"); + if (ignore_format && ignore_format->toString() == "1") + explain_query->ignore_format = true; explain_query->setSettings(std::move(settings)); } else diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index afb0fd2d75a..055c4599ad3 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -180,12 +180,16 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_format.ignore(pos, expected)) { ParserIdentifier format_p; + ASTPtr format; - if (!format_p.parse(pos, query_with_output.format, expected)) + if (!format_p.parse(pos, format, expected)) return false; - setIdentifierSpecial(query_with_output.format); - - query_with_output.children.push_back(query_with_output.format); + if (!query_with_output.ignore_format) + { + query_with_output.format = format; + setIdentifierSpecial(query_with_output.format); + query_with_output.children.push_back(query_with_output.format); + } } return true; diff --git a/src/QueryPlan/PlanPrinter.cpp b/src/QueryPlan/PlanPrinter.cpp index 9fe7fb34935..17e61e60c32 100644 --- a/src/QueryPlan/PlanPrinter.cpp +++ b/src/QueryPlan/PlanPrinter.cpp @@ -34,6 +34,7 @@ #include #include #include +#include "Parsers/ASTCreateQuery.h" #include #include @@ -1864,6 +1865,18 @@ String PlanPrinter::jsonMetaData( query_used_settings->set(setting.name, setting.value.toString()); metadata_json->set("UsedSettings", query_used_settings); + Poco::JSON::Array output_descs; + ASTPtr & select_ast = query; + if (auto * insert_query = query->as()) + select_ast = insert_query->select; + + if (analysis->hasOutputDescription(*select_ast)) + { + for (const auto & desc : analysis->getOutputDescription(*select_ast)) + output_descs.add(desc.name); + } + metadata_json->set("OutputDescriptions", output_descs); + // get InsertInfo Poco::JSON::Object::Ptr insert_table_info = new Poco::JSON::Object(true); if (analysis->getInsert()) diff --git a/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.reference b/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.reference index 360fea81f2e..f20e0366ce1 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.reference @@ -15,3 +15,4 @@ ['metadata3'] ['test'] [['b']] [] {} [] {} ['metadata3'] ['test'] [['c']] [] {} [] {} [] [] [] ['getSetting'] {} [] {'getSetting':['enable_optimizer','enable_optimizer_fallback']} +[] [] [] ['plus'] {} [] {} diff --git a/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.sql b/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.sql index ca5c93732d0..14f935d5cd0 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48022_explain_metadata.sql @@ -49,3 +49,4 @@ set enable_optimizer=1; set enable_optimizer_fallback=0; explain metadata select getSetting('enable_optimizer'), getSetting('enable_optimizer_fallback'); +explain metadata ignore_format=1 select 1+1 format JSON; diff --git a/tests/queries/4_cnch_stateless_no_tenant/48045_query_lineage.reference b/tests/queries/4_cnch_stateless_no_tenant/48045_query_lineage.reference index 4078ac5099e..ea2ae6bafda 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48045_query_lineage.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48045_query_lineage.reference @@ -24,6 +24,11 @@ ], "UsedSettings" : { }, + "OutputDescriptions" : [ + "a", + "b", + "2" + ], "InsertInfo" : { }, "FunctionsInfo" : [ @@ -148,6 +153,10 @@ ], "UsedSettings" : { }, + "OutputDescriptions" : [ + "a", + "t2.b" + ], "InsertInfo" : { }, "FunctionsInfo" : [ @@ -225,6 +234,10 @@ ], "UsedSettings" : { }, + "OutputDescriptions" : [ + "a", + "t2.b" + ], "InsertInfo" : { }, "FunctionsInfo" : [ @@ -302,6 +315,10 @@ ], "UsedSettings" : { }, + "OutputDescriptions" : [ + "a", + "t2.b" + ], "InsertInfo" : { }, "FunctionsInfo" : [ @@ -371,6 +388,10 @@ ], "UsedSettings" : { }, + "OutputDescriptions" : [ + "c", + "a" + ], "InsertInfo" : { "Database" : "test", "Table" : "lineage_test", From d249a04aba9ebf71283992482b7c84266c44d02e Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 09:58:06 +0000 Subject: [PATCH 063/292] Merge branch 'zema/cnch-2.2-cp-adbMinorFix' into 'cnch-2.2' fix(clickhousech@m-4655931866): [cp 2.2] minor fix for msyql compatibility See merge request dp/ClickHouse!22904 --- src/Functions/partitionId.cpp | 3 +- src/Interpreters/InterpreterDeleteQuery.cpp | 9 ++- src/Interpreters/InterpreterInsertQuery.cpp | 5 +- src/Interpreters/InterpreterInsertQuery.h | 5 +- src/Interpreters/InterpreterUpdateQuery.cpp | 73 ++++++++++++++++--- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 21 ++++-- src/MergeTreeCommon/MergeTreeMetaBase.h | 9 ++- src/Parsers/ASTAssignment.h | 14 +++- src/Parsers/ExpressionElementParsers.cpp | 26 +++++++ src/Parsers/ExpressionElementParsers.h | 9 +++ src/Parsers/ParserUpdateQuery.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 9 +-- .../MergeTree/MergeTreeDataWriter.cpp | 8 +- src/Storages/MergeTree/MergeTreePartition.cpp | 20 ++++- src/Storages/MergeTree/MergeTreePartition.h | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + src/Storages/StorageCnchMergeTree.cpp | 39 +++++++++- src/Transaction/Actions/DDLAlterAction.cpp | 2 +- .../01001_alter_drop_partition.reference | 5 ++ .../01001_alter_drop_partition.sql | 11 +++ .../01001_trivial_count_cnch_parts.reference | 2 + .../01001_trivial_count_cnch_parts.sql | 9 +++ .../10050_update_join.reference | 5 ++ .../10050_update_join.sql | 20 ++++- .../convert-month-partitioned-parts/main.cpp | 2 +- 25 files changed, 268 insertions(+), 43 deletions(-) rename tests/queries/{4_cnch_stateless => 4_cnch_stateless_no_tenant}/10050_update_join.reference (60%) rename tests/queries/{4_cnch_stateless => 4_cnch_stateless_no_tenant}/10050_update_join.sql (62%) diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index 3d4f588c1b0..9db64ded8ca 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -57,7 +57,8 @@ class FunctionPartitionId : public IFunction for (size_t i = 0; i < size; ++i) arguments[i].column->get(j, row[i]); MergeTreePartition partition(std::move(row)); - result_column->insert(partition.getID(sample_block)); + /// TODO: (zuochuang.zema) how to get extract_nullable_date_value + result_column->insert(partition.getID(sample_block, false)); } return result_column; } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 85eeb84289c..2da012c58eb 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -87,7 +87,14 @@ BlockIO InterpreterDeleteQuery::execute() 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - InterpreterInsertQuery insert_interpreter(insert_ast, getContext()); + InterpreterInsertQuery insert_interpreter( + insert_ast, + getContext(), + /*allow_materialized_*/false, + /*no_squash_*/false, + /*no_destination_*/false, + AccessType::ALTER_DELETE); + return insert_interpreter.execute(); } else diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 7c50b387fd3..1ed068fa77e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -88,12 +88,13 @@ namespace ErrorCodes } InterpreterInsertQuery::InterpreterInsertQuery( - const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_) + const ASTPtr & query_ptr_, ContextPtr context_, bool allow_materialized_, bool no_squash_, bool no_destination_, AccessType access_type_) : WithContext(context_) , query_ptr(query_ptr_) , allow_materialized(allow_materialized_) , no_squash(no_squash_) , no_destination(no_destination_) + , access_type(access_type_) { checkStackSize(); } @@ -311,7 +312,7 @@ BlockIO InterpreterInsertQuery::execute() auto query_sample_block = getSampleBlock(insert_query, table, metadata_snapshot); if (!insert_query.table_function) - getContext()->checkAccess(AccessType::INSERT, insert_query.table_id, query_sample_block.getNames()); + getContext()->checkAccess(access_type, insert_query.table_id, query_sample_block.getNames()); bool is_distributed_insert_select = false; diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index c5e9848acfd..2693b974a20 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -40,7 +41,8 @@ class InterpreterInsertQuery : public IInterpreter, WithContext ContextPtr context_, bool allow_materialized_ = false, bool no_squash_ = false, - bool no_destination_ = false); + bool no_destination_ = false, + AccessType access_type_ = AccessType::INSERT); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -71,6 +73,7 @@ class InterpreterInsertQuery : public IInterpreter, WithContext const bool allow_materialized; const bool no_squash; const bool no_destination; + AccessType access_type{AccessType::INSERT}; }; diff --git a/src/Interpreters/InterpreterUpdateQuery.cpp b/src/Interpreters/InterpreterUpdateQuery.cpp index 9469ac4fec9..a71a93ca586 100644 --- a/src/Interpreters/InterpreterUpdateQuery.cpp +++ b/src/Interpreters/InterpreterUpdateQuery.cpp @@ -58,6 +58,34 @@ BlockIO InterpreterUpdateQuery::execute() } +static ASTTableExpression * getFirstTableExpression(const ASTUpdateQuery & update) +{ + if (!update.tables) + return {}; + + auto & tables_in_update_query = update.tables->as(); + if (tables_in_update_query.children.empty()) + return {}; + + auto & tables_element = tables_in_update_query.children[0]->as(); + if (!tables_element.table_expression) + return {}; + + return tables_element.table_expression->as(); +} + +static String getTableExpressionAlias(const ASTTableExpression * table_expression) +{ + if (table_expression->subquery) + return table_expression->subquery->tryGetAlias(); + else if (table_expression->table_function) + return table_expression->table_function->tryGetAlias(); + else if (table_expression->database_and_table_name) + return table_expression->database_and_table_name->tryGetAlias(); + + return String(); +} + ASTPtr InterpreterUpdateQuery::prepareInterpreterSelectQuery(const StoragePtr & storage) { auto res = std::make_shared(); @@ -78,20 +106,28 @@ ASTPtr InterpreterUpdateQuery::prepareInterpreterSelectQuery(const StoragePtr & //collect assignments std::unordered_map assignments; + String update_table_alias; for (const auto & child : ast_update.assignment_list->children) { - if (const ASTAssignment * assignment = child->as()) + const ASTAssignment * assignment = child->as(); + if (!assignment) + throw Exception("Syntax error in update statement. " + child->getID(), ErrorCodes::SYNTAX_ERROR); + + if (const auto & t = assignment->table_name; !t.empty()) { - if (immutable_columns.count(assignment->column_name)) - throw Exception("Updating partition/unique keys is not allowed.", ErrorCodes::BAD_ARGUMENTS); + if (update_table_alias.empty()) + update_table_alias = t; + else if (update_table_alias != t) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "UPDATE multi tables is not supported. Tables: {}, {}", update_table_alias, t); + } - if (!ordinary_columns.count(assignment->column_name)) - throw Exception("There is no column named " + assignment->column_name, ErrorCodes::BAD_ARGUMENTS); + if (immutable_columns.count(assignment->column_name)) + throw Exception("Updating partition/unique keys is not allowed.", ErrorCodes::BAD_ARGUMENTS); - assignments.emplace(assignment->column_name, assignment->expression()->clone()); - } - else - throw Exception("Syntax error in update statement. " + child->getID(), ErrorCodes::SYNTAX_ERROR); + if (!ordinary_columns.count(assignment->column_name)) + throw Exception("There is no column named " + assignment->column_name, ErrorCodes::BAD_ARGUMENTS); + + assignments.emplace(assignment->column_name, assignment->expression()->clone()); } auto select_list = std::make_shared(); @@ -119,6 +155,8 @@ ASTPtr InterpreterUpdateQuery::prepareInterpreterSelectQuery(const StoragePtr & if (ast_update.single_table) { + if (!update_table_alias.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No table alias found: {}", update_table_alias); res->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); auto tables = res->tables(); auto tables_elem = std::make_shared(); @@ -131,6 +169,23 @@ ASTPtr InterpreterUpdateQuery::prepareInterpreterSelectQuery(const StoragePtr & } else { + const auto & first_table = getFirstTableExpression(ast_update); + auto first_table_alias = getTableExpressionAlias(first_table); + + /// Check that only the first table is updated. + if (update_table_alias.empty()) + { + /// By default, if update_table is empty, it means the first table is updated. + } + else + { + if (first_table_alias.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "SET with table alias but there is no table alias. {}, {}", update_table_alias); + else if (first_table_alias != update_table_alias) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "It's only allowed to update the first table `{}`, but `{}` is given.", first_table_alias, update_table_alias); + } + res->setExpression(ASTSelectQuery::Expression::TABLES, ast_update.tables->clone()); } diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index 744f6c14b95..d93ec676a63 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -836,9 +836,11 @@ Block MergeTreeMetaBase::getBlockWithVirtualPartitionColumns( for (const auto & partition : partition_list) { partition_id_column->insert(partition->getID(*this)); - Tuple tuple(partition->value.begin(), partition->value.end()); if (has_partition_value) + { + Tuple tuple(partition->value.begin(), partition->value.end()); partition_value_column->insert(std::move(tuple)); + } } block.setColumns(std::move(columns)); if (!has_partition_value) @@ -2100,12 +2102,18 @@ ASTPtr MergeTreeMetaBase::applyFilter( return PredicateUtils::combineConjuncts(conjuncts); } -void MergeTreeMetaBase::filterPartitionByTTL(std::vector> & partition_list, ContextPtr local_context) const +bool MergeTreeMetaBase::canFilterPartitionByTTL() const { auto metadata_snapshot = getInMemoryMetadataPtr(); TTLTableDescription table_ttl = metadata_snapshot->getTableTTLs(); - if (metadata_snapshot->hasPartitionLevelTTL() && table_ttl.definition_ast && local_context->getCurrentTransaction()) + return metadata_snapshot->hasPartitionLevelTTL() && table_ttl.definition_ast; +} + +void MergeTreeMetaBase::filterPartitionByTTL(std::vector> & partition_list, time_t query_time) const +{ + if (canFilterPartitionByTTL()) { + const auto & metadata_snapshot = getInMemoryMetadataPtr(); if (!metadata_snapshot->hasRowsTTL()) return; @@ -2142,8 +2150,9 @@ void MergeTreeMetaBase::filterPartitionByTTL(std::vectorsize() != partition_list.size()) throw Exception("Calculated TTL column size cannot match input partitions column size.", ErrorCodes::LOGICAL_ERROR); - TxnTimestamp start_ts = local_context->getCurrentTransactionID(); - time_t query_time = start_ts.toSecond(); + if (query_time == 0) + query_time = std::time(nullptr); + std::vector> filtered_result; if (column->isNullable()) @@ -2213,7 +2222,7 @@ Strings MergeTreeMetaBase::selectPartitionsByPredicate( /// (3) `_partition_id` or `_partition_value` if they're in predicate /// (1) Prune partition by partition level TTL - filterPartitionByTTL(partition_list, local_context); + filterPartitionByTTL(partition_list, local_context->tryGetCurrentTransactionID().toSecond()); const auto partition_key = MergeTreePartition::adjustPartitionKey(getInMemoryMetadataPtr(), local_context); const auto & partition_key_expr = partition_key.expression; diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h index bc98bb9a290..11a87f5ab84 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.h +++ b/src/MergeTreeCommon/MergeTreeMetaBase.h @@ -284,6 +284,12 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /// For ATTACH/DETACH/DROP PARTITION. String getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr context) const; + bool extractNullableForPartitionID() const + { + const auto & settings = getSettings(); + return settings->allow_nullable_key && settings->extract_partition_nullable_date; + } + MutableDataPartPtr cloneAndLoadDataPartOnSameDisk(const DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot); @@ -452,7 +458,8 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /// partition filters /// TODO: make partition_list constant - void filterPartitionByTTL(std::vector> & partition_list, ContextPtr local_context) const; + void filterPartitionByTTL(std::vector> & partition_list, time_t query_time) const; + bool canFilterPartitionByTTL() const; Strings selectPartitionsByPredicate( const SelectQueryInfo & query_info, diff --git a/src/Parsers/ASTAssignment.h b/src/Parsers/ASTAssignment.h index 3da54d717b4..389a56e008e 100644 --- a/src/Parsers/ASTAssignment.h +++ b/src/Parsers/ASTAssignment.h @@ -26,10 +26,11 @@ namespace DB { -/// Part of the ALTER UPDATE statement of the form: column = expr +/// Part of the ALTER UPDATE statement of the form: column = expr or tbl_alias.col = expr class ASTAssignment : public IAST { public: + String table_name; String column_name; ASTPtr expression() const @@ -37,7 +38,9 @@ class ASTAssignment : public IAST return children.at(0); } - String getID(char delim) const override { return "Assignment" + (delim + column_name); } + String tablePrefix() const { return table_name.empty() ? "" : table_name + "."; } + + String getID(char delim) const override { return "Assignment" + (delim + tablePrefix() + column_name); } ASTType getType() const override { return ASTType::ASTAssignment; } @@ -50,11 +53,13 @@ class ASTAssignment : public IAST void toLowerCase() override { + boost::to_lower(table_name); boost::to_lower(column_name); } void toUpperCase() override { + boost::to_upper(table_name); boost::to_upper(column_name); } @@ -62,6 +67,11 @@ class ASTAssignment : public IAST void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << (settings.hilite ? hilite_identifier : ""); + if (!table_name.empty()) + { + settings.writeIdentifier(table_name); + settings.ostr << "."; + } settings.writeIdentifier(column_name); settings.ostr << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 0d47feb9526..01190040e7c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -68,6 +68,7 @@ #include #include #include "ASTColumnsMatcher.h" +#include "Parsers/parseDatabaseAndTableName.h" #include #include @@ -3133,6 +3134,31 @@ bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +/// a.col = _expression_ or col = _expression_ +/// Reuse `parseDatabaseAndTableName` for extracting table alias and column name. +bool ParserAssignmentWithAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto assignment = std::make_shared(); + node = assignment; + + ParserToken s_equals(TokenType::Equals); + ParserExpression p_expression(dt); + + parseDatabaseAndTableName(pos, expected, assignment->table_name, assignment->column_name); + + if (!s_equals.ignore(pos, expected)) + return false; + + ASTPtr expression; + if (!p_expression.parse(pos, expression, expected)) + return false; + + if (expression) + assignment->children.push_back(expression); + + return true; +} + bool ParserEscapeExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserStringLiteral escape_exp(dt); diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index dc1250df57e..63b5590e0ef 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -634,6 +634,15 @@ class ParserAssignment : public IParserDialectBase using IParserDialectBase::IParserDialectBase; }; +class ParserAssignmentWithAlias : public IParserDialectBase +{ +protected: + const char * getName() const override{ return "column assignment with alias"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + using IParserDialectBase::IParserDialectBase; +}; + class ParserEscapeExpression : public IParserDialectBase { const char * getName() const override { return "ESCAPE clause"; } diff --git a/src/Parsers/ParserUpdateQuery.cpp b/src/Parsers/ParserUpdateQuery.cpp index 7b8960fd870..012d92f3fea 100644 --- a/src/Parsers/ParserUpdateQuery.cpp +++ b/src/Parsers/ParserUpdateQuery.cpp @@ -26,7 +26,7 @@ bool ParserUpdateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_limit("LIMIT"); ParserKeyword s_settings("SETTINGS"); - ParserList parser_assignment_list(std::make_unique(dt), std::make_unique(TokenType::Comma), false); + ParserList parser_assignment_list(std::make_unique(dt), std::make_unique(TokenType::Comma), false); ParserExpression exp_list(dt); ParserOrderByExpressionList order_list(dt); ParserExpressionWithOptionalAlias exp_elem(false, dt); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index dc46766352c..11739fc533b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1151,8 +1151,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() if (info.isFakeDropRangePart()) /// Skip check if drop_range_part return; - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); + String calculated_partition_id = partition.getID(storage); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id @@ -2328,8 +2327,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex(ReadBuffer & buf) if (info.isFakeDropRangePart()) /// Skip check if drop_range_part return; - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); + String calculated_partition_id = partition.getID(storage); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id @@ -2510,8 +2508,7 @@ void IMergeTreeDataPart::deserializePartitionAndMinMaxIndex(ReadBuffer & buffer) minmax_idx.load(storage, buffer); } - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); + String calculated_partition_id = partition.getID(storage); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 65d52746c4b..8b8995cce91 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -381,7 +381,13 @@ MergeTreeMetaBase::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), temp_index, temp_index, 0, mutation, hint_mutation); + MergeTreePartInfo new_part_info( + partition.getID(metadata_snapshot->getPartitionKey().sample_block, data.extractNullableForPartitionID()), + /*min_block_*/temp_index, + /*max_block_*/temp_index, + /*level_*/0, + mutation, + hint_mutation); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index b4a47262f5a..cf72575b010 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -32,6 +32,7 @@ #include #include #include +#include "DataTypes/DataTypeNullable.h" #include "IO/ReadSettings.h" #include @@ -267,12 +268,12 @@ static std::unique_ptr openForReading(const DiskPtr & di String MergeTreePartition::getID(const MergeTreeMetaBase & storage) const { - return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block); + return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block, storage.extractNullableForPartitionID()); } /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. /// So if you want to change this method, be sure to guarantee compatibility with existing table data. -String MergeTreePartition::getID(const Block & partition_key_sample) const +String MergeTreePartition::getID(const Block & partition_key_sample, bool extract_nullable_date_value) const { if (value.size() != partition_key_sample.columns()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); @@ -301,8 +302,21 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const { if (i > 0) result += '-'; + + const auto & col = partition_key_sample.getByPosition(i); + auto type = col.type; + + /// As we already support nullable key, user may create a PARTITION BY with Nullable(Date) column. + /// For such scenario, we need to try to format the nested date value if possible. + /// It's disabled by default as it would break the compatibility. + /// It's safe and good to enable the feature for new (or empty) tables, by setting allow_nullable_key = 1 and extract_partition_nullable_date = 1. + if (extract_nullable_date_value) + { + if (type->isNullable()) + type = static_cast(col.type.get())->getNestedType(); + } - if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) + if (typeid_cast(type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index a65ce106ec4..c720cee6037 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -52,7 +52,7 @@ struct MergeTreePartition explicit MergeTreePartition(UInt32 yyyymm) : value(1, yyyymm) {} String getID(const MergeTreeMetaBase & storage) const; - String getID(const Block & partition_key_sample) const; + String getID(const Block & partition_key_sample, bool extract_nullable_date_value) const; void serializeText(const MergeTreeMetaBase & storage, WriteBuffer & out, const FormatSettings & format_settings) const; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c2e24ecb4c1..02ec8d342f9 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -365,6 +365,7 @@ enum StealingCacheMode : UInt64 0) \ M(String, storage_policy, "default", "Name of storage disk policy", 0) \ M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ + M(Bool, extract_partition_nullable_date, false, "Extract date value from Nullable Date column when allow_nullable_key is true", 0) \ M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \ M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \ M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 6f0a0088efb..0c63acfa0c0 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -13,6 +13,7 @@ * limitations under the License. */ +#include #include #include @@ -421,7 +422,7 @@ Strings StorageCnchMergeTree::getPartitionsByPredicate(const ASTPtr & predicate, LOG_TRACE(log, "Return all partitions by predicate"); Names all_partition_ids; for (const auto & partition : partition_list) - all_partition_ids.emplace_back(partition->getID(partition_key_sample)); + all_partition_ids.emplace_back(partition->getID(partition_key_sample, extractNullableForPartitionID())); return all_partition_ids; } @@ -459,7 +460,7 @@ Strings StorageCnchMergeTree::getPartitionsByPredicate(const ASTPtr & predicate, for (size_t i = 0; i < partition_list.size(); ++i) { if (res_column->getBool(i)) - filtered_partition_ids.emplace_back(partition_list[i]->getID(partition_key_sample)); + filtered_partition_ids.emplace_back(partition_list[i]->getID(partition_key_sample, extractNullableForPartitionID())); } LOG_TRACE(log, fmt::format("Return partitions by predicate:{}", fmt::join(filtered_partition_ids, ","))); @@ -564,6 +565,9 @@ time_t StorageCnchMergeTree::getTTLForPartition(const MergeTreePartition & parti if (column->size() > 1) throw Exception("Cannot get TTL value from table ttl ast since there are multiple ttl value", ErrorCodes::LOGICAL_ERROR); + if (column->isNullable()) + column = static_cast(column)->getNestedColumnPtr().get(); + if (const ColumnUInt16 * column_date = typeid_cast(column)) { const auto & date_lut = DateLUT::instance(); @@ -3151,17 +3155,44 @@ String StorageCnchMergeTree::genCreateTableQueryForWorker(const String & suffix) std::optional StorageCnchMergeTree::totalRows(const ContextPtr & query_context) const { + const auto & metadata_snapshot = getInMemoryMetadataPtr(); + auto partition_sample_block = getInMemoryMetadataPtr()->getPartitionKey().sample_block; + + /// Prune partitions by partition level TTL even there is no WHERE condition. + std::optional partition_ids = std::nullopt; + if (canFilterPartitionByTTL()) + { + partition_ids = std::make_optional(); + auto partition_list = query_context->getCnchCatalog()->getPartitionList(shared_from_this(), query_context.get()); + if (partition_list.empty()) + return 0; + auto num_total_partition = partition_list.size(); + + filterPartitionByTTL(partition_list, query_context->tryGetCurrentTransactionID().toSecond()); + if (partition_list.empty()) + return 0; + partition_ids->reserve(partition_list.size()); + + for (const auto & p : partition_list) + partition_ids->insert(p->getID(partition_sample_block, extractNullableForPartitionID())); + + LOG_TRACE(log, "[TrivialCount] after filter partition by TTL {}/{}", partition_ids->size(), num_total_partition); + } + auto parts_with_dbm = getAllPartsWithDBM(query_context); if (parts_with_dbm.first.empty()) return 0; - const auto & metadata_snapshot = getInMemoryMetadataPtr(); if (metadata_snapshot->hasUniqueKey()) getDeleteBitmapMetaForServerParts(parts_with_dbm.first, parts_with_dbm.second); size_t rows = 0; for (const auto & part : parts_with_dbm.first) + { + if (partition_ids.has_value() && !partition_ids->contains(part->partition().getID(partition_sample_block, extractNullableForPartitionID()))) + continue; rows += part->rowsCount() - part->deletedRowsCount(*this); + } - LOG_TRACE(log, "Shortcut: calculate total_rows from metadata {}", rows); + LOG_TRACE(log, "[TrivialCount] calculate total_rows from metadata {}", rows); return rows; } diff --git a/src/Transaction/Actions/DDLAlterAction.cpp b/src/Transaction/Actions/DDLAlterAction.cpp index 58693e6f96b..0f59968be54 100644 --- a/src/Transaction/Actions/DDLAlterAction.cpp +++ b/src/Transaction/Actions/DDLAlterAction.cpp @@ -95,7 +95,7 @@ void DDLAlterAction::executeV1(TxnTimestamp commit_time) else { LOG_DEBUG(log, "Skip change table schema because {}", - new_schema.empty() ? "new shema is empty." : ("new shema is the same as old one : " + old_schema)); + new_schema.empty() ? "new schema is empty." : ("new schema is the same as old one : " + old_schema)); } } catch (...) diff --git a/tests/queries/4_cnch_stateless/01001_alter_drop_partition.reference b/tests/queries/4_cnch_stateless/01001_alter_drop_partition.reference index 406a4e681c7..ae61e3a4d8a 100644 --- a/tests/queries/4_cnch_stateless/01001_alter_drop_partition.reference +++ b/tests/queries/4_cnch_stateless/01001_alter_drop_partition.reference @@ -18,3 +18,8 @@ 3 9 0 +------ TRUNCATE CHECK PARTITION ------ +2 +20240101-1 +6a7327258d2de4c6a04830c43786747a +1 diff --git a/tests/queries/4_cnch_stateless/01001_alter_drop_partition.sql b/tests/queries/4_cnch_stateless/01001_alter_drop_partition.sql index 714f2c871fc..543891399d6 100644 --- a/tests/queries/4_cnch_stateless/01001_alter_drop_partition.sql +++ b/tests/queries/4_cnch_stateless/01001_alter_drop_partition.sql @@ -55,3 +55,14 @@ TRUNCATE TABLE t_truncate; SELECT count() FROM t_truncate; -- 0 DROP TABLE t_truncate; + +SELECT '------ TRUNCATE CHECK PARTITION ------'; +CREATE TABLE t_truncate_nullable_partition(p Nullable(DateTime), k Int32, m Int32) +ENGINE = CnchMergeTree() PARTITION BY (toDate(p), k) ORDER BY m +SETTINGS allow_nullable_key = 1, extract_partition_nullable_date = 1; + +INSERT INTO t_truncate_nullable_partition VALUES ('2024-01-01 10:00:00', 1, 1), (NULL, 1, 1); +SELECT count() FROM t_truncate_nullable_partition; -- 2 +SELECT _partition_id FROM t_truncate_nullable_partition ORDER BY _partition_id; +TRUNCATE TABLE t_truncate_nullable_partition PARTITION '20240101-1'; +SELECT count() FROM t_truncate_nullable_partition; -- 1 diff --git a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference index f6705ccafc4..6f7644b741a 100644 --- a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference +++ b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.reference @@ -1,2 +1,4 @@ 2 3 2 3 +3 +1 diff --git a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql index 4c9af0ffb0f..5f6525c89e3 100644 --- a/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql +++ b/tests/queries/4_cnch_stateless/01001_trivial_count_cnch_parts.sql @@ -16,3 +16,12 @@ SELECT sum(rows), sum(delete_rows) FROM system.cnch_parts WHERE database = curre DROP TABLE t_delete_rows; DROP TABLE t_delete_rows_u; + +DROP TABLE IF EXISTS t_trivial_count_ttl; +CREATE TABLE t_trivial_count_ttl (p DateTime, k Int32) ENGINE = CnchMergeTree() PARTITION BY toDate(p) ORDER BY k TTL toDate(p) + INTERVAL 10 DAY; +INSERT INTO t_trivial_count_ttl VALUES (now() - INTERVAL 3 DAY, 1), (now() - INTERVAL 5 DAY, 1), (now(), 1); +SELECT count() FROM t_trivial_count_ttl; +ALTER TABLE t_trivial_count_ttl MODIFY TTL toDate(p) + INTERVAL 1 DAY; +SELECT count() FROM t_trivial_count_ttl; +DROP TABLE t_trivial_count_ttl; + diff --git a/tests/queries/4_cnch_stateless/10050_update_join.reference b/tests/queries/4_cnch_stateless_no_tenant/10050_update_join.reference similarity index 60% rename from tests/queries/4_cnch_stateless/10050_update_join.reference rename to tests/queries/4_cnch_stateless_no_tenant/10050_update_join.reference index 3a390ce099c..2bcb28d6976 100644 --- a/tests/queries/4_cnch_stateless/10050_update_join.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/10050_update_join.reference @@ -7,3 +7,8 @@ cn 3 40 Try all combinations +3 0 +10 100 +20 200 +0 0 +40 400 diff --git a/tests/queries/4_cnch_stateless/10050_update_join.sql b/tests/queries/4_cnch_stateless_no_tenant/10050_update_join.sql similarity index 62% rename from tests/queries/4_cnch_stateless/10050_update_join.sql rename to tests/queries/4_cnch_stateless_no_tenant/10050_update_join.sql index 204811a8ad5..97ca4750efe 100644 --- a/tests/queries/4_cnch_stateless/10050_update_join.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/10050_update_join.sql @@ -39,10 +39,10 @@ DROP TABLE update_join_test_new; DROP TABLE IF EXISTS test.update_join_tuj; DROP TABLE IF EXISTS test.update_join_tt; -CREATE TABLE test.update_join_tuj(k Int32, m Int32) ENGINE = CnchMergeTree ORDER BY k UNIQUE KEY k; +CREATE TABLE test.update_join_tuj(k Int32, m Int32, m2 Int32) ENGINE = CnchMergeTree ORDER BY k UNIQUE KEY k; CREATE TABLE test.update_join_tt(k Int32, m Int32) ENGINE = CnchMergeTree ORDER BY k; -INSERT INTO test.update_join_tuj SELECT number, number FROM numbers(5); +INSERT INTO test.update_join_tuj SELECT number, number, number FROM numbers(5); INSERT INTO test.update_join_tt VALUES (1, 10), (2, 20), (4, 40), (6, 60); SELECT 'Try all combinations'; @@ -51,4 +51,20 @@ UPDATE test.update_join_tuj SET m = m * 2; UPDATE test.update_join_tuj SET m = 2; UPDATE test.update_join_tuj SET m = 2 WHERE k >= 3; UPDATE test.update_join_tuj AS a SET m = 3; +UPDATE test.update_join_tuj AS a SET a.m = 3; UPDATE test.update_join_tuj AS a LEFT JOIN test.update_join_tt AS b ON a.k = b.k SET m = b.m WHERE k >= 1; +UPDATE test.update_join_tuj AS a LEFT JOIN test.update_join_tt AS b ON a.k = b.k SET a.m = b.m, a.m2 = b.m * 10 WHERE k >= 1; + +-- No table alias found: x +UPDATE test.update_join_tuj SET x.m = 2; -- { serverError 36 } +-- only allowed to update the first table +UPDATE test.update_join_tuj AS a SET b.m = 3; -- { serverError 36 } +-- UPDATE multi tables is not supported. +UPDATE test.update_join_tuj AS a LEFT JOIN test.update_join_tt AS b ON a.k = b.k SET a.m = b.m, b.m = b.m * 10 WHERE k >= 1; -- { serverError 36 } +-- only allowed to update the first table +UPDATE test.update_join_tuj AS a LEFT JOIN test.update_join_tt AS b ON a.k = b.k SET b.m = a.m WHERE k >= 1; -- { serverError 36 } + +SELECT m, m2 FROM test.update_join_tuj ORDER BY k; + +DROP TABLE test.update_join_tuj; +DROP TABLE test.update_join_tt; diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp index a7d1f94990b..fa4923d6317 100644 --- a/utils/convert-month-partitioned-parts/main.cpp +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -122,7 +122,7 @@ void run(String part_path, String date_column, String dest_path) MergeTreePartition partition(yyyymm); partition.store(partition_key_sample, disk, new_tmp_part_path_str, checksums); - String partition_id = partition.getID(partition_key_sample); + String partition_id = partition.getID(partition_key_sample, /*extract_nullable_date_value*/false); Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable(); WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096); From d902af68ab41d814f6b189c5a6499672ca29154e Mon Sep 17 00:00:00 2001 From: fredwang Date: Thu, 1 Aug 2024 10:00:11 +0000 Subject: [PATCH 064/292] Merge 'feat/cp_enable_table_scan_build_pipeline_optimization_cnch_2.2' into 'cnch-2.2' feat(optimizer@m-4172435018): add settings enable_short_circuit to skip optimizer rules cnch-2.2 See merge request: !22938 # Conflicts: # src/Core/Settings.h --- src/Core/Settings.h | 5 ++ src/Optimizer/PlanOptimizer.cpp | 21 ++++- src/Optimizer/PlanOptimizer.h | 1 + src/Optimizer/ShortCircuitPlanner.cpp | 120 ++++++++++++++++++++++++++ src/Optimizer/ShortCircuitPlanner.h | 17 ++++ src/QueryPlan/PlanPrinter.cpp | 5 ++ src/QueryPlan/QueryPlan.h | 11 +++ src/QueryPlan/TableScanStep.cpp | 84 +++++++++++++----- src/QueryPlan/TableScanStep.h | 1 + 9 files changed, 244 insertions(+), 21 deletions(-) create mode 100644 src/Optimizer/ShortCircuitPlanner.cpp create mode 100644 src/Optimizer/ShortCircuitPlanner.h diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7bceba3025b..98c3ab56d5f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -2075,6 +2075,11 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_cnch_engine_conversion, false, "Whether to converse MergeTree engine to CnchMergeTree engine", 0) \ /** End of BitEngine related settings */ \ \ + M(Bool, enable_short_circuit, false, "Whether to enable topn short path", 0) \ + M(Bool, enable_table_scan_build_pipeline_optimization, false, "Whether to enable table scan build pipeline optimization", 0) \ + /** End of gis related settings */ \ + \ + // End of FORMAT_FACTORY_SETTINGS diff --git a/src/Optimizer/PlanOptimizer.cpp b/src/Optimizer/PlanOptimizer.cpp index a81df3535d2..5ee917ee87f 100644 --- a/src/Optimizer/PlanOptimizer.cpp +++ b/src/Optimizer/PlanOptimizer.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -327,6 +328,18 @@ const Rewriters & PlanOptimizer::getFullRewriters() return full_rewrites; } +const Rewriters & PlanOptimizer::getShortCircuitRewriters() +{ + static Rewriters short_circuit_rewriters = { + std::make_shared(), + std::make_shared(Rules::pushDownLimitRules(), "PushDownLimit"), + std::make_shared(Rules::removeRedundantRules(), "RemoveRedundant"), + std::make_shared(Rules::pushIntoTableScanRules(), "PushIntoTableScan"), + std::make_shared(Rules::explainAnalyzeRules(), "ExplainAnalyze"), + }; + return short_circuit_rewriters; +} + void PlanOptimizer::optimize(QueryPlan & plan, ContextMutablePtr context) { int i = GraphvizPrinter::PRINT_PLAN_OPTIMIZE_INDEX; @@ -338,7 +351,13 @@ void PlanOptimizer::optimize(QueryPlan & plan, ContextMutablePtr context) Stopwatch rule_watch, total_watch; total_watch.start(); - if (PlanPattern::isSimpleQuery(plan)) + if (ShortCircuitPlanner::isShortCircuitPlan(plan, context)) + { + plan.setShortCircuit(true); + optimize(plan, context, getShortCircuitRewriters()); + ShortCircuitPlanner::addExchangeIfNeeded(plan, context); + } + else if (PlanPattern::isSimpleQuery(plan)) { optimize(plan, context, getSimpleRewriters()); } diff --git a/src/Optimizer/PlanOptimizer.h b/src/Optimizer/PlanOptimizer.h index 3ea2401877a..e7b55fc485a 100644 --- a/src/Optimizer/PlanOptimizer.h +++ b/src/Optimizer/PlanOptimizer.h @@ -28,6 +28,7 @@ class PlanOptimizer static void optimize(QueryPlan & plan, ContextMutablePtr context, const Rewriters & rewriters); static const Rewriters & getSimpleRewriters(); static const Rewriters & getFullRewriters(); + static const Rewriters & getShortCircuitRewriters(); }; } diff --git a/src/Optimizer/ShortCircuitPlanner.cpp b/src/Optimizer/ShortCircuitPlanner.cpp new file mode 100644 index 00000000000..af68a4e1ad1 --- /dev/null +++ b/src/Optimizer/ShortCircuitPlanner.cpp @@ -0,0 +1,120 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class ShortCircuitPlanner::ShortCircuitPlanVisitor : public DB::PlanNodeVisitor +{ +public: + explicit ShortCircuitPlanVisitor(ContextPtr context_) : context(context_) + { + } + + bool visitPlanNode(PlanNodeBase & plan, Void & c) override + { + if (plan.getChildren().size() == 1) + return VisitorUtil::accept(plan.getChildren()[0], *this, c); + return false; + } + + bool visitLimitNode(LimitNode & plan, Void & v) override + { + return VisitorUtil::accept(plan.getChildren()[0], *this, v); + } + + bool visitProjectionNode(ProjectionNode & plan, Void & v) override + { + return VisitorUtil::accept(plan.getChildren()[0], *this, v); + } + + bool visitFilterNode(FilterNode & plan, Void &) override + { + if (plan.getChildren()[0]->getType() == IQueryPlanStep::Type::TableScan) + return checkTableScan(dynamic_cast(*plan.getChildren()[0]->getStep()), plan.getStep()->getFilter()); + return false; + } + + static bool checkTableScan(TableScanStep & table_scan, ConstASTPtr filter) + { + auto constraints = extractConstraints(filter); + auto metadata = table_scan.getStorage()->getInMemoryMetadataPtr(); + return isPointScan(metadata->getUniqueKey(), constraints); + } + + static std::unordered_set extractConstraints(ConstASTPtr filter) + { + std::unordered_set constraints; + for (const auto & conjunct : PredicateUtils::extractConjuncts(filter)) + { + const auto * func = conjunct->as(); + if (!func || func->name != "equals") + continue; + const auto * column = func->arguments->children[0]->as(); + if (!column) + continue; + if (func->arguments->children[1]->getType() != ASTType::ASTLiteral + && func->arguments->children[1]->getType() != ASTType::ASTPreparedParameter) + continue; + constraints.emplace(column->name()); + } + return constraints; + } + + /** + * Check filter constains all unique. + */ + static bool isPointScan(const KeyDescription & primary_key, const std::unordered_set & constraints) + { + return std::all_of( + primary_key.column_names.begin(), primary_key.column_names.end(), [&](const auto & key) { return constraints.contains(key); }); + } + +private: + ContextPtr context; +}; + +bool ShortCircuitPlanner::isShortCircuitPlan(QueryPlan & query_plan, ContextPtr context) +{ + if (!context->getSettingsRef().enable_short_circuit) + return false; + + ShortCircuitPlanVisitor visitor{context}; + Void v; + return query_plan.getCTEInfo().empty() && VisitorUtil::accept(query_plan.getPlanNode(), visitor, v); +} + +void ShortCircuitPlanner::addExchangeIfNeeded(QueryPlan & query_plan, ContextMutablePtr context) +{ + // todo: analyze optimized cluster + auto output = query_plan.getPlanNode(); + if (output->getType() != IQueryPlanStep::Type::Projection) + throw Exception(ErrorCodes::LOGICAL_ERROR, "output node is expected a project"); + auto child = output->getChildren()[0]; + auto gather = PlanNodeBase::createPlanNode( + query_plan.getIdAllocator()->nextId(), + std::make_unique( + DataStreams{child->getStep()->getOutputStream()}, + ExchangeMode::GATHER, + Partitioning(Names{}), + context->getSettingsRef().enable_shuffle_with_order), + {child}); + output->replaceChildren({gather}); +} +} diff --git a/src/Optimizer/ShortCircuitPlanner.h b/src/Optimizer/ShortCircuitPlanner.h new file mode 100644 index 00000000000..cc45f5702e5 --- /dev/null +++ b/src/Optimizer/ShortCircuitPlanner.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ShortCircuitPlanner +{ +public: + static bool isShortCircuitPlan(QueryPlan & query_plan, ContextPtr context); + static void addExchangeIfNeeded(QueryPlan & query_plan, ContextMutablePtr context); + +private: + class ShortCircuitPlanVisitor; +}; +} diff --git a/src/QueryPlan/PlanPrinter.cpp b/src/QueryPlan/PlanPrinter.cpp index 17e61e60c32..57bef782f3e 100644 --- a/src/QueryPlan/PlanPrinter.cpp +++ b/src/QueryPlan/PlanPrinter.cpp @@ -154,6 +154,11 @@ String PlanPrinter::textLogicalPlan( output += "."; } + if (plan.isShortCircuit()) + { + output += "note: Short Circuit is applied.\n"; + } + return output; } diff --git a/src/QueryPlan/QueryPlan.h b/src/QueryPlan/QueryPlan.h index 74967235f0b..9f2643e3a4c 100644 --- a/src/QueryPlan/QueryPlan.h +++ b/src/QueryPlan/QueryPlan.h @@ -132,6 +132,15 @@ class QueryPlan void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } size_t getMaxThreads() const { return max_threads; } + void setShortCircuit(bool short_circuit_) + { + short_circuit = short_circuit_; + } + bool isShortCircuit() const + { + return short_circuit; + } + void addInterpreterContext(std::shared_ptr context); void serialize(WriteBuffer & buffer) const; @@ -217,6 +226,8 @@ class QueryPlan std::shared_ptr max_node_id; //Whether reset step id in serialize(),use for explain analyze. bool reset_step_id = true; + + bool short_circuit = false; }; std::string debugExplainStep(const IQueryPlanStep & step); diff --git a/src/QueryPlan/TableScanStep.cpp b/src/QueryPlan/TableScanStep.cpp index 187cedd2926..ab00bb6f4d8 100644 --- a/src/QueryPlan/TableScanStep.cpp +++ b/src/QueryPlan/TableScanStep.cpp @@ -1225,31 +1225,37 @@ void TableScanStep::initializePipeline(QueryPipeline & pipeline, const BuildQuer options.ignoreProjections(); stage_watch.restart(); - ASTPtr partition_filter; - auto mutable_context = Context::createCopy(build_context.context); - if (query_info.partition_filter) - partition_filter = query_info.partition_filter->clone(); - // FIXME: It is used to work around partition keys being chosen as PREWHERE. In long term, we should rely on - // enable_partition_filter_push_down = 1 to do the stuff - if (mutable_context->getSettingsRef().remove_partition_filter_on_worker) - mutable_context->setSetting("enable_partition_filter_push_down", 1U); - - options.cache_info = query_info.cache_info; - auto interpreter = std::make_shared(query_info.query, mutable_context, options); - interpreter->execute(true); - auto backup_input_order_info = query_info.input_order_info; - query_info = interpreter->getQueryInfo(); - query_info = fillQueryInfo(build_context.context); - query_info.input_order_info = backup_input_order_info; + if (build_context.context->getSettingsRef().enable_table_scan_build_pipeline_optimization) + { + fillQueryInfoV2(build_context.context); + } + else + { + ASTPtr partition_filter; + auto mutable_context = Context::createCopy(build_context.context); + if (query_info.partition_filter) + partition_filter = query_info.partition_filter->clone(); + // FIXME: It is used to work around partition keys being chosen as PREWHERE. In long term, we should rely on + // enable_partition_filter_push_down = 1 to do the stuff + if (mutable_context->getSettingsRef().remove_partition_filter_on_worker) + mutable_context->setSetting("enable_partition_filter_push_down", 1U); + + options.cache_info = query_info.cache_info; + auto interpreter = std::make_shared(query_info.query, mutable_context, options); + interpreter->execute(true); + auto backup_input_order_info = query_info.input_order_info; + query_info = interpreter->getQueryInfo(); + query_info = fillQueryInfo(build_context.context); + query_info.input_order_info = backup_input_order_info; + if (partition_filter) + query_info.partition_filter = partition_filter; + } LOG_DEBUG(log, "init pipeline stage run time: make up query info, {} ms", stage_watch.elapsedMillisecondsAsDouble()); // always do filter underneath, as WHERE filter won't reuse PREWHERE result in optimizer mode if (query_info.prewhere_info) query_info.prewhere_info->need_filter = true; - if (partition_filter) - query_info.partition_filter = partition_filter; - if (use_projection_index) { auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), build_context.context); @@ -1321,7 +1327,13 @@ void TableScanStep::initializePipeline(QueryPipeline & pipeline, const BuildQuer } // flag = Output auto pipe = storage->read( - interpreter->getRequiredColumns(), storage_snapshot, query_info, build_context.context, QueryProcessingStage::Enum::FetchColumns, max_block_size, max_streams); + getRequiredColumns(), + storage_snapshot, + query_info, + build_context.context, + QueryProcessingStage::Enum::FetchColumns, + max_block_size, + max_streams); if (pipe.getCacheHolder()) pipeline.addCacheHolder(pipe.getCacheHolder()); @@ -2004,4 +2016,36 @@ bool TableScanStep::hasFunctionCanUseBitmapIndex() const } return false; } + +void TableScanStep::fillQueryInfoV2(ContextPtr context) +{ + assert(storage); + auto required_columns = getRequiredColumns(); + auto metadata_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); + auto block = metadata_snapshot->getSampleBlockForColumns(required_columns); + + /// 1. build tree rewriter result + auto syntax_analyzer_result = std::make_shared(block.getNamesAndTypesList(), storage, metadata_snapshot); + syntax_analyzer_result->analyzed_join = std::make_shared(); + query_info.syntax_analyzer_result = syntax_analyzer_result; + + /// 2. build prepared sets + if (auto where = query_info.getSelectQuery()->where()) + makeSetsForIndex(where, context, query_info.sets); + if (auto prewhere = query_info.getSelectQuery()->prewhere()) + makeSetsForIndex(prewhere, context, query_info.sets); + // TODO: atomic_predicates_expr + if (query_info.partition_filter) + makeSetsForIndex(query_info.partition_filter, context, query_info.sets); + + /// 3. build prewhere info + if (auto prewhere = query_info.getSelectQuery()->prewhere()) + { + auto prewhere_action = IQueryPlanStep::createFilterExpressionActions(context, prewhere, block); + query_info.prewhere_info = std::make_shared(prewhere_action, prewhere->getColumnName()); + } + + /// 4. build index context + query_info.index_context = std::make_shared(); +} } diff --git a/src/QueryPlan/TableScanStep.h b/src/QueryPlan/TableScanStep.h index 764fc390ad8..7c81390276c 100644 --- a/src/QueryPlan/TableScanStep.h +++ b/src/QueryPlan/TableScanStep.h @@ -151,6 +151,7 @@ class TableScanStep : public ISourceStep SelectQueryInfo fillQueryInfo(ContextPtr context); void fillPrewhereInfo(ContextPtr context); void makeSetsForIndex(const ASTPtr & node, ContextPtr context, PreparedSets & prepared_sets) const; + void fillQueryInfoV2(ContextPtr context); void allocate(ContextPtr context); Int32 getUniqueId() const { return unique_id; } From ff1e72a47f6ed0976e4505613b1765608c9c2898 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:29:37 +0000 Subject: [PATCH 065/292] Merge 'cherry-pick-mr-22709' into 'cnch-2.2' fix(clickhousech@m-4656104342): Merge 'fix-send-dummy-source-session-expire' into 'cnch-2.2' See merge request: !22772 From 8904d1d098c9960bb079505ef27ceaea3aa0754a Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:33:04 +0000 Subject: [PATCH 066/292] Merge branch 'cherry-pick-a43b3d66' into 'cnch-2.2' fix(clickhousech@m-4675847030): Fix BRPC Deadlock 2.2 See merge request dp/ClickHouse!22983 --- src/Interpreters/Context.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b439c7cd605..6494afd8052 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1846,11 +1846,15 @@ ASTPtr Context::getRowPolicyCondition(const String & database, const String & ta void Context::setInitialRowPolicy() { + String initial_user_copy; + { + auto lock = getLocalLock(); + initial_user_copy = client_info.initial_user; + } + auto initial_user_id = getAccessControlManager().find(initial_user_copy); + auto initial_row_policy_local = initial_user_id ? getAccessControlManager().getEnabledRowPolicies(*initial_user_id, {}) : nullptr; auto lock = getLocalLock(); - auto initial_user_id = getAccessControlManager().find(client_info.initial_user); - initial_row_policy = nullptr; - if (initial_user_id) - initial_row_policy = getAccessControlManager().getEnabledRowPolicies(*initial_user_id, {}); + initial_row_policy = initial_row_policy_local; } From 022af2a137dabfea23c4a9ec18426842b6818554 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:34:01 +0000 Subject: [PATCH 067/292] Merge branch 'fix_json_merge' into 'cnch-2.2' fix(clickhousech@m-4649412027):fix non-host server optimize merge json table See merge request dp/ClickHouse!22834 # Conflicts: # src/Catalog/Catalog.cpp --- src/Catalog/Catalog.cpp | 2 ++ src/CloudServices/CnchServerServiceImpl.cpp | 11 +++++++ src/CloudServices/CnchWorkerServiceImpl.cpp | 8 +++++ src/Storages/StorageCnchMergeTree.cpp | 1 + .../01825_type_json_merge.reference | 16 ++++++++++ .../01825_type_json_merge.sql | 29 +++++++++++++++++++ 6 files changed, 67 insertions(+) create mode 100644 tests/queries/4_cnch_stateless/01825_type_json_merge.reference create mode 100644 tests/queries/4_cnch_stateless/01825_type_json_merge.sql diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index 1c69ff08770..5e1df13c0a1 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -1358,6 +1358,8 @@ namespace Catalog if (cached_storage && cached_storage->commit_time <= ts && cached_storage->getStorageID().database_name == database && cached_storage->getStorageID().table_name == name) { res = cached_storage; + //TODO:(@lianwenlong) force fetch global object schema from catalog + initStorageObjectSchema(res); return; } } diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index b2e5edf017b..d5f796fa5bf 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -1730,6 +1730,17 @@ void CnchServerServiceImpl::executeOptimize( auto & database_catalog = DatabaseCatalog::instance(); auto istorage = database_catalog.getTable(storage_id, global_context); + if (istorage && istorage->getInMemoryMetadataPtr()->hasDynamicSubcolumns()) + { + if (auto * cnch_table = dynamic_cast(istorage.get())) + { + LOG_TRACE( + log, + "Object schema snapshot:{}", + cnch_table->getStorageSnapshot(cnch_table->getInMemoryMetadataPtr(), nullptr)->object_columns.toString()); + } + } + auto * merge_mutate_thread = dynamic_cast(bg_thread.get()); auto task_id = merge_mutate_thread->triggerPartMerge(istorage, partition_id, false, enable_try, false); if (request->mutations_sync()) diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index cfb230f7ea2..1f751ea3627 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -263,7 +263,15 @@ void CnchWorkerServiceImpl::submitManipulationTask( if (!data) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not CloudMergeTree", storage->getStorageID().getNameForLogs()); if (request->has_dynamic_object_column_schema()) + { + LOG_TRACE( + log, + "Received table:{}.{} with dynamic object column schema:{}.", + data->getCnchDatabase(), + data->getCnchTable(), + request->dynamic_object_column_schema()); data->resetObjectColumns(ColumnsDescription::parse(request->dynamic_object_column_schema())); + } auto params = ManipulationTaskParams(storage); params.type = static_cast(request->type()); diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 0c63acfa0c0..2336f1646f6 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -3344,6 +3344,7 @@ void StorageCnchMergeTree::mutate(const MutationCommands & commands, ContextPtr void StorageCnchMergeTree::resetObjectColumns(ContextPtr query_context) { object_columns = object_schemas.assembleSchema(query_context, getInMemoryMetadataPtr()); + LOG_TRACE(log, "Global object schema snapshot:" + object_columns.toString()); } void StorageCnchMergeTree::appendObjectPartialSchema(const TxnTimestamp & txn_id, ObjectPartialSchema partial_schema) diff --git a/tests/queries/4_cnch_stateless/01825_type_json_merge.reference b/tests/queries/4_cnch_stateless/01825_type_json_merge.reference new file mode 100644 index 00000000000..b37329616b0 --- /dev/null +++ b/tests/queries/4_cnch_stateless/01825_type_json_merge.reference @@ -0,0 +1,16 @@ +----------------------before merge------------------------------- +6 +1 +2 +3 +4 +5 +6 +-------------------after first merge----------------------------- +1 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/4_cnch_stateless/01825_type_json_merge.sql b/tests/queries/4_cnch_stateless/01825_type_json_merge.sql new file mode 100644 index 00000000000..93ec8955d16 --- /dev/null +++ b/tests/queries/4_cnch_stateless/01825_type_json_merge.sql @@ -0,0 +1,29 @@ +-- drop database if EXISTS 002_merge_and_complex_json; +-- create database 002_merge_and_complex_json; +-- use 002_merge_and_complex_json; +SET allow_experimental_object_type = 1; +set enable_optimizer = 0; +set describe_extend_object_types=1; +CREATE TABLE if not exists t_alter_ids_8( + `id` UInt64, + `date` Date, + `json` JSON +) ENGINE = CnchMergeTree ORDER BY id partition by date; +system stop merges t_alter_ids_8; +insert into t_alter_ids_8 values(1, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 1,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +insert into t_alter_ids_8 values(2, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 2,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +insert into t_alter_ids_8 values(3, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 3,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +insert into t_alter_ids_8 values(4, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 4,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +insert into t_alter_ids_8 values(5, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 5,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +insert into t_alter_ids_8 values(6, '2023-12-13', '{"_id": "65790d3f5ee1fd8bdefd1e8e","index": 6,"guid": "534c7df4-78f6-4f3f-b631-70af0d14a309","isActive": true,"balance": "$2,491.01","picture": "http://placehold.it/32x32","age": 36,"eyeColor": "green","name": "Macias Snow","gender": "male","company": "TRASOLA","email": "maciassnow@trasola.com","phone": "+1 (961) 529-2559","address": "288 Hinckley Place, Carlos, Florida, 6654","about": "Veniam cillum magna eu ut occaecat. Consectetur aliqua elit enim aliqua. Nulla dolore sint deserunt enim magna eiusmod do. Pariatur eiusmod consequat ea excepteur culpa ipsum duis esse esse.","registered": "2018-05-28T01:27:13 -08:00","latitude": -48.198628,"longitude": 166.730178,"tags": ["commodo", "velit", "reprehenderit","commodo","quis","Lorem","Lorem"],"friends": [{"id": 0,"name": "Blanca Lawson"},{"id": 1,"name":"Karyn Russell"}, {"id": 2,"name":"Ola Joyce"}],"greeting": "Hello, Macias Snow! You have 8 unread messages.","favoriteFruit": "banana"}'); +select '----------------------before merge-------------------------------'; +select count() from system.cnch_parts where database=currentDatabase() and table='t_alter_ids_8' and part_type='VisiblePart'; +select json.index as index from t_alter_ids_8 order by index; +system start merges t_alter_ids_8; +set disable_optimize_final=0; +optimize table t_alter_ids_8 final; +SELECT sleepEachRow(3) FROM numbers(6) FORMAT Null; +select '-------------------after first merge-----------------------------'; +select count() from system.cnch_parts where database=currentDatabase() and table='t_alter_ids_8' and part_type='VisiblePart'; +select json.index as index from t_alter_ids_8 order by index; +drop table if exists t_alter_ids_8; From 78256ef082c0565d5120f3ce5dee6dda9ab91ec4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:34:21 +0000 Subject: [PATCH 068/292] Merge 'cherry-pick-mr-22985' into 'cnch-2.2' fix(clickhousech@m-4676475056): Merge 'disable-row-policy-in-worker' into 'cnch-2.2' See merge request: !23007 --- .../DistributedStages/PlanSegmentManagerRpcService.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp b/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp index 8bec509ef91..dc2626617b3 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentManagerRpcService.cpp @@ -469,8 +469,8 @@ void PlanSegmentManagerRpcService::submitPlanSegment( if (!settings_io_buf->empty()) { ReadBufferFromBrpcBuf settings_read_buf(*settings_io_buf); - /// Sets an extra row policy based on `client_info.initial_user` - query_context->setInitialRowPolicy(); + /// Sets an extra row policy based on `client_info.initial_user`, problematic for now + // query_context->setInitialRowPolicy(); /// apply settings changed const size_t MIN_MINOR_VERSION_ENABLE_STRINGS_WITH_FLAGS = 4; if (query_common->brpc_protocol_minor_revision() >= MIN_MINOR_VERSION_ENABLE_STRINGS_WITH_FLAGS) From deb63542702a718d060e02353dc65abd71e55f35 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:34:39 +0000 Subject: [PATCH 069/292] Merge 'cherry-pick-723a4b68' into 'cnch-2.2' fix(clickhousech@m-4505469713): fix incorrect value for the table_type column in information_schema.tables [CP] See merge request: !22693 --- .../System/attachInformationSchemaTables.cpp | 2 +- .../01161_information_schema.reference | 16 ++++++++-------- ...06_information_schema_show_database.reference | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index c835d028649..96e61477ae1 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -110,7 +110,7 @@ static constexpr std::string_view tables = R"( multiIf(any(T.is_temporary), 'LOCAL TEMPORARY', any(T.engine) LIKE '%View', 'VIEW', any(T.engine) LIKE 'System%', 'SYSTEM VIEW', - 'TABLE' + 'BASE TABLE' ) AS table_type, any(T.engine) AS engine, NULL AS version, diff --git a/tests/queries/4_cnch_stateless/01161_information_schema.reference b/tests/queries/4_cnch_stateless/01161_information_schema.reference index d35f8e19b06..3c230e42bf3 100644 --- a/tests/queries/4_cnch_stateless/01161_information_schema.reference +++ b/tests/queries/4_cnch_stateless/01161_information_schema.reference @@ -60,12 +60,12 @@ triggers views INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema t TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema partitioned TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu2 TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N @@ -86,11 +86,11 @@ information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \ tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema t TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema partitioned TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu2 TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N diff --git a/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference b/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference index 85dba33d109..a86321cd19f 100644 --- a/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference @@ -1,6 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory() CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `extra` Nullable(String),\n `privileges` Nullable(String),\n `TABLE_CATALOG` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `COLUMN_NAME` String,\n `ORDINAL_POSITION` UInt64,\n `COLUMN_DEFAULT` String,\n `IS_NULLABLE` String,\n `DATA_TYPE` String,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n `NUMERIC_PRECISION` Nullable(UInt64),\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n `NUMERIC_SCALE` Nullable(UInt64),\n `DATETIME_PRECISION` Nullable(UInt64),\n `CHARACTER_SET_CATALOG` Nullable(String),\n `CHARACTER_SET_SCHEMA` Nullable(String),\n `CHARACTER_SET_NAME` Nullable(String),\n `COLLATION_CATALOG` Nullable(String),\n `COLLATION_SCHEMA` Nullable(String),\n `COLLATION_NAME` Nullable(String),\n `DOMAIN_CATALOG` Nullable(String),\n `DOMAIN_SCHEMA` Nullable(String),\n `DOMAIN_NAME` Nullable(String),\n `COLUMN_COMMENT` String,\n `COLUMN_TYPE` String,\n `EXTRA` Nullable(String),\n `PRIVILEGES` Nullable(String)\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n table AS table_name,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n convertToDialectDataType(type) AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n \'utf8mb4\' AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n \'utf8mb4_0900_ai_ci\' AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n convertToDialectColumnType(type) AS column_type,\n NULL AS extra,\n NULL AS privileges,\n table_catalog AS TABLE_CATALOG,\n table_schema AS TABLE_SCHEMA,\n table_name AS TABLE_NAME,\n column_name AS COLUMN_NAME,\n ordinal_position AS ORDINAL_POSITION,\n column_default AS COLUMN_DEFAULT,\n is_nullable AS IS_NULLABLE,\n data_type AS DATA_TYPE,\n character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n character_octet_length AS CHARACTER_OCTET_LENGTH,\n numeric_precision AS NUMERIC_PRECISION,\n numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n numeric_scale AS NUMERIC_SCALE,\n datetime_precision AS DATETIME_PRECISION,\n character_set_catalog AS CHARACTER_SET_CATALOG,\n character_set_schema AS CHARACTER_SET_SCHEMA,\n character_set_name AS CHARACTER_SET_NAME,\n collation_catalog AS COLLATION_CATALOG,\n collation_schema AS COLLATION_SCHEMA,\n collation_name AS COLLATION_NAME,\n domain_catalog AS DOMAIN_CATALOG,\n domain_schema AS DOMAIN_SCHEMA,\n domain_name AS DOMAIN_NAME,\n column_comment AS COLUMN_COMMENT,\n column_type AS COLUMN_TYPE,\n extra AS EXTRA,\n privileges AS PRIVILEGES\nFROM system.columns -CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW information_schema.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW information_schema.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW information_schema.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW information_schema.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 From 577de4a0c9ca063654769518b1ee406975d2b413 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:35:31 +0000 Subject: [PATCH 070/292] Merge 'cherry-pick-a05a04ee' into 'cnch-2.2' feat(clickhousech@m-4670794032): Support SHOW KEYS in MySQLHandler [CP] See merge request: !22963 --- src/Server/MySQLHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 17d72747ecf..f92f07bb28f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -201,6 +201,7 @@ MySQLHandler::MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poc queries_replacements.emplace_back("SHOW GLOBAL VARIABLES", showVariableReplacementQuery); queries_replacements.emplace_back("SHOW INDEXES", showIndexReplacementQuery); queries_replacements.emplace_back("SHOW INDEX", showIndexReplacementQuery); + queries_replacements.emplace_back("SHOW KEYS", showIndexReplacementQuery); queries_replacements.emplace_back("SHOW PLUGINS", selectEmptyReplacementQuery); queries_replacements.emplace_back("SHOW PRIVILEGES", ReplaceWith::fn); queries_replacements.emplace_back("SHOW PROCEDURE STATUS", selectEmptySetQuery); From bb68f2b9e829dc3f3008dca9995000718a13604b Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:35:46 +0000 Subject: [PATCH 071/292] Merge 'cherry-pick-0d297728' into 'cnch-2.2' fix(clickhousech@m-4548841616): Adjust MySQL DDL rewrite 2.2 See merge request: !23012 --- src/Core/Settings.h | 1 + .../InterpretersAnalyticalMySQLDDLQuery.cpp | 148 ++++++------------ .../60000_mysql_ddl_create.reference | 5 +- .../60000_mysql_ddl_create.sql | 33 ++++ .../60001_mysql_ddl_create_select.reference | 2 +- 5 files changed, 89 insertions(+), 100 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 98c3ab56d5f..c86104a54f8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1069,6 +1069,7 @@ enum PreloadLevelSettings : UInt64 0) \ \ M(Bool, handle_division_by_zero, false, "If set true, return null for division by zero (MySQL Behavior)", 0) \ + M(Bool, enable_bucket_for_distribute, true, "If set true, enable distribute by keyword by replacing with distribute", 0) \ \ M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ diff --git a/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp index c77edce2094..68577804628 100644 --- a/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp @@ -283,61 +283,6 @@ static std::tuple ASTPtr - { - if (type_max_size <= 1000) - return std::make_shared(column_name); - - return makeASTFunction("intDiv", std::make_shared(column_name), - std::make_shared(UInt64(type_max_size / 1000))); - }; - - ASTPtr best_partition; - size_t best_size = 0; - for (const auto & primary_key : primary_keys) - { - DataTypePtr type = primary_key.type; - WhichDataType which(type); - - if (which.isNullable()) - throw Exception("LOGICAL ERROR: MySQL primary key must be not null, it is a bug.", ErrorCodes::LOGICAL_ERROR); - - if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) - { - /// In any case, date or datetime is always the best partitioning key - return makeASTFunction("toYYYYMM", std::make_shared(primary_key.name)); - } - - if (type->haveMaximumSizeOfValue() && (!best_size || type->getSizeOfValueInMemory() < best_size)) - { - if (which.isInt8() || which.isUInt8()) - { - best_size = type->getSizeOfValueInMemory(); - best_partition = numbers_partition(primary_key.name, std::numeric_limits::max()); - } - else if (which.isInt16() || which.isUInt16()) - { - best_size = type->getSizeOfValueInMemory(); - best_partition = numbers_partition(primary_key.name, std::numeric_limits::max()); - } - else if (which.isInt32() || which.isUInt32()) - { - best_size = type->getSizeOfValueInMemory(); - best_partition = numbers_partition(primary_key.name, std::numeric_limits::max()); - } - else if (which.isInt64() || which.isUInt64()) - { - best_size = type->getSizeOfValueInMemory(); - best_partition = numbers_partition(primary_key.name, std::numeric_limits::max()); - } - } - } - - return best_partition; -} - static ASTPtr getOrderByPolicy( const NamesAndTypesList & primary_keys, const NamesAndTypesList & keys = NamesAndTypesList(), const NamesAndTypesList & cluster_keys = NamesAndTypesList()) { @@ -551,7 +496,6 @@ void InterpreterCreateAnalyticMySQLImpl::validate(const InterpreterCreateAnalyti validateTTLExpression(mysql_storage->ttl_table->ptr()); } - if (mysql_storage->engine) { auto upper_name = Poco::toUpper(mysql_storage->engine->name); @@ -615,7 +559,7 @@ ASTPtr InterpreterCreateAnalyticMySQLImpl::getRewrittenQuery( const TQuery & cre engine_name = Poco::toUpper(mysql_storage->mysql_engine->as()->value.get()); if (engine_names.find(engine_name) == engine_names.end()) { - throw Exception ("Unsupported String Engine Name, please remove quotes", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception ("Unsupported Engine Name", ErrorCodes::MYSQL_SYNTAX_ERROR); } } @@ -625,6 +569,7 @@ ASTPtr InterpreterCreateAnalyticMySQLImpl::getRewrittenQuery( const TQuery & cre return query; } + // table if (has_table_definition) { NamesAndTypesList columns_name_and_type = getColumnsList(create_defines->columns); @@ -633,15 +578,6 @@ ASTPtr InterpreterCreateAnalyticMySQLImpl::getRewrittenQuery( const TQuery & cre setNotNullModifier(create_defines->columns, primary_keys); convertDecimal(create_defines->columns, primary_keys); - /// The `partition by` expression must use primary keys, otherwise the primary keys will not be merge. - if (mysql_storage->mysql_partition_by) - { - storage->set(storage->partition_by, mysql_storage->mysql_partition_by->clone()); - } - else if (ASTPtr partition_expression = getPartitionPolicy(primary_keys)) - storage->set(storage->partition_by, partition_expression); - - /// The `order by` expression must use primary keys, otherwise the primary keys will not be merge. if (ASTPtr order_by_expression = getOrderByPolicy(primary_keys, keys, cluster_keys)) { auto & list = order_by_expression->as()->arguments; @@ -660,46 +596,41 @@ ASTPtr InterpreterCreateAnalyticMySQLImpl::getRewrittenQuery( const TQuery & cre rewritten_query->columns_list->mysql_indices = nullptr; } - if (!storage->engine || engine_names.find(Poco::toUpper(storage->engine->name)) != engine_names.end()) - storage->set(storage->engine, makeASTFunction("CnchMergeTree")); - if (!storage->order_by) - storage->set(storage->order_by, makeASTFunction("tuple")); - if (!storage->unique_key) + // storage { - if (storage->primary_key) + if (!storage->engine || engine_names.find(Poco::toUpper(storage->engine->name)) != engine_names.end()) + storage->set(storage->engine, makeASTFunction("CnchMergeTree")); + + if (!storage->order_by) + storage->set(storage->order_by, makeASTFunction("tuple")); + + if (!storage->unique_key) { - storage->set(storage->unique_key, storage->primary_key->clone()); - storage->primary_key = nullptr; + // clickhouse syntax for primary key + if (storage->primary_key) + { + storage->set(storage->unique_key, storage->primary_key->clone()); + storage->primary_key = nullptr; + } + else + { + storage->set(storage->unique_key, makeASTFunction("tuple")); + } } - else + + if (mysql_storage->mysql_partition_by) { - storage->set(storage->unique_key, makeASTFunction("tuple")); + storage->set(storage->partition_by, mysql_storage->mysql_partition_by->clone()); } - } - - if (mysql_storage->distributed_by) - { - // distributed by hash(col) -> cluster by col - const String vw_name = "vw_default"; - auto vw = context->getVirtualWarehousePool().get(vw_name); - // context->setCurrentVW(std::move(vw_handle)); - // auto vw = context->tryGetCurrentVW(); - int total_bucket_number = vw ? vw->getNumWorkers() : 1; - auto cluster_by_ast = std::make_shared(mysql_storage->distributed_by->clone(), std::make_shared(total_bucket_number), 0, false, false); - storage->set(storage->cluster_by, cluster_by_ast); - } - else if (mysql_storage->cluster_by) - { - storage->set(storage->cluster_by, mysql_storage->cluster_by->clone()); - } - { // settings ASTPtr settings = std::make_shared(); auto *settings_ast = settings->as(); settings_ast->is_standalone = false; bool has_index_granularity_setting = false; bool has_partition_level_unique_keys_setting = false; + bool has_enable_bucket_level_unique_keys = false; + bool has_enable_bucket_for_distribute = context->getSettingsRef().enable_bucket_for_distribute; if (auto *const mysql_settings = mysql_storage->settings->as()) { for (const auto & change: mysql_settings->changes) @@ -708,16 +639,39 @@ ASTPtr InterpreterCreateAnalyticMySQLImpl::getRewrittenQuery( const TQuery & cre has_index_granularity_setting = true; if (change.name == "partition_level_unique_keys") has_partition_level_unique_keys_setting = true; + if (change.name == "enable_bucket_level_unique_keys") + has_enable_bucket_level_unique_keys = true; } } - // It's not recommended to mix mysql and clickhosue dialects - // but we have to provide this in case of fall back + + // block_size -> index_granularity if (mysql_storage->block_size && !has_index_granularity_setting) - // block_size -> index_granularity settings_ast->changes.push_back({"index_granularity", mysql_storage->block_size->as()->value.get()}); + // distributed by hash(col) -> cluster by col + if (mysql_storage->distributed_by && has_enable_bucket_for_distribute) + { + const String vw_name = "vw_default"; + auto vw = context->getVirtualWarehousePool().get(vw_name); + + int total_bucket_number = vw ? vw->getNumWorkers() : 1; + auto cluster_by_ast = std::make_shared(mysql_storage->distributed_by->clone(), std::make_shared(total_bucket_number), 0, false, false); + storage->set(storage->cluster_by, cluster_by_ast); + + // distribute by must contain unique key + if (!has_enable_bucket_level_unique_keys) + settings_ast->changes.push_back({"enable_bucket_level_unique_keys", 1}); + } + else if (mysql_storage->cluster_by) + { + // clickhouse cluster by syntax + storage->set(storage->cluster_by, mysql_storage->cluster_by->clone()); + } + + // storage settings for mysql behavior if (!has_partition_level_unique_keys_setting) settings_ast->changes.push_back({"partition_level_unique_keys", 0}); + if (const auto mysql_settings = mysql_storage->settings->as()) settings_ast->changes.insert(settings_ast->changes.end(), mysql_settings->changes.begin(), mysql_settings->changes.end()); diff --git a/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.reference b/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.reference index 07af9d8ff0d..1b235b12870 100644 --- a/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.reference +++ b/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.reference @@ -17,12 +17,13 @@ mysql_create_ddl4 CREATE TABLE test.mysql_create_ddl4 (\n `id` Int32 NULL,\n id Int32 false val1 DateTime64(3) false 中文 val2 String false DEFAULT \'a\' -mysql_create_ddl5 CREATE TABLE test.mysql_create_ddl5 (\n `id` Int32 NOT NULL,\n `val1` DateTime64(3) NOT NULL COMMENT \'中文\',\n `val2` String NOT NULL DEFAULT \'a\',\n CONSTRAINT un1 UNIQUE (id)\n)\nENGINE = CnchMergeTree\nPARTITION BY intDiv(id, 4294967)\nORDER BY tuple(id)\nUNIQUE KEY tuple(id)\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 +mysql_create_ddl5 CREATE TABLE test.mysql_create_ddl5 (\n `id` Int32 NOT NULL,\n `val1` DateTime64(3) NOT NULL COMMENT \'中文\',\n `val2` String NOT NULL DEFAULT \'a\',\n CONSTRAINT un1 UNIQUE (id)\n)\nENGINE = CnchMergeTree\nORDER BY tuple(id)\nUNIQUE KEY tuple(id)\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 id Int32 false id2 Decimal(18, 0) false val1 DateTime64(3) false 中文 val2 String false DEFAULT \'a\' -mysql_create_ddl6 CREATE TABLE test.mysql_create_ddl6 (\n `id` Int32 NOT NULL,\n `id2` Decimal(18, 0) NOT NULL,\n `val1` DateTime64(3) NOT NULL COMMENT \'中文\',\n `val2` String NOT NULL DEFAULT \'a\'\n)\nENGINE = CnchMergeTree\nPARTITION BY intDiv(id, 4294967)\nORDER BY (id, id2)\nUNIQUE KEY (id, id2)\nSETTINGS index_granularity = 8192, partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\' +mysql_create_ddl6 CREATE TABLE test.mysql_create_ddl6 (\n `id` Int32 NOT NULL,\n `id2` Decimal(18, 0) NOT NULL,\n `val1` DateTime64(3) NOT NULL COMMENT \'中文\',\n `val2` String NOT NULL DEFAULT \'a\'\n)\nENGINE = CnchMergeTree\nORDER BY (id, id2)\nUNIQUE KEY (id, id2)\nSETTINGS index_granularity = 8192, partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\' +test_create_table_unique1 CREATE TABLE test.test_create_table_unique1 (\n `int_col_1` UInt64 NOT NULL,\n `int_col_2` UInt64 NULL,\n `int_col_3` LowCardinality(Int8 NULL) NOT NULL,\n `int_col_4` Bool NULL,\n `int_col_5` Int8 NULL,\n `int_col_6` Int64 NULL,\n `str_col_1` String NOT NULL,\n `str_col_2` String NULL,\n `float_col_1` Float64 NULL,\n `float_col_2` Decimal(3, 2) NULL,\n `date_col_1` Date32 NULL,\n `date_col_2` DateTime64(3, \'Asia/Istanbul\') NULL,\n `enum_col_1` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3, \'d\' = 4) NULL,\n `map_col_1` Map(String, String) NOT NULL,\n `map_col_2` Map(String, UInt64) NOT NULL\n)\nENGINE = CnchMergeTree\nPARTITION BY (int_col_1, date_col_1)\nORDER BY (str_col_1, int_col_1)\nUNIQUE KEY (int_col_1, str_col_1)\nTTL toDate(date_col_1) + toIntervalDay(30)\nSETTINGS index_granularity = 4096, partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\' 1 1 0 1 0 0 test_create_table_unique2 CREATE TABLE test.test_create_table_unique2 (\n `int_col_1` UInt64 NOT NULL,\n `int_col_2` UInt64 NOT NULL,\n `int_col_3` LowCardinality(Int8 NULL) NOT NULL,\n `int_col_4` Bool NULL,\n `int_col_5` Int8 NULL,\n `int_col_6` Int64 NULL,\n `str_col_1` String NOT NULL,\n `str_col_2` String NULL,\n `float_col_1` Float64 NULL,\n `float_col_2` Decimal(3, 2) NULL,\n `date_col_1` Date32 NOT NULL,\n `date_col_2` DateTime64(3) NULL,\n `map_col_1` Map(String, String) NOT NULL,\n `map_col_2` Map(String, UInt64) NOT NULL\n)\nENGINE = CnchMergeTree\nPARTITION BY (int_col_1, date_col_1)\nORDER BY (str_col_1, int_col_1)\nUNIQUE KEY (int_col_1, str_col_1)\nTTL toDate(date_col_1) + toIntervalDay(30)\nSETTINGS index_granularity = 4096, partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\' diff --git a/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.sql b/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.sql index 2a9a8eae2d7..5df51b8422d 100644 --- a/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.sql +++ b/tests/queries/4_cnch_stateless/60000_mysql_ddl_create.sql @@ -131,6 +131,39 @@ BLOCK_SIZE = 4096 TABLE_PROPERTIES = '{"format":"columnstore"}' TTL toDate(date_col_1) + INTERVAL 30 DAY; +set enable_bucket_for_distribute=0; + +DROP TABLE test_create_table_unique1; +CREATE TABLE test_create_table_unique1 +( + `int_col_1` UInt64 NOT NULL, + `int_col_2` Nullable(UInt64), + `int_col_3` LowCardinality(Int8), + `int_col_4` boolean, + `int_col_5` tinyint, + `int_col_6` bigint, + `str_col_1` String NOT NULL, + `str_col_2` varchar, + `float_col_1` Float64, + `float_col_2` decimal(3, 2), + `date_col_1` Date32, + `date_col_2` DateTime('Asia/Istanbul'), + `enum_col_1` Enum('a', 'b', 'c', 'd'), + `map_col_1` Map(String, String) NOT NULL, + `map_col_2` Map(String, UInt64) NOT NULL, + CLUSTERED KEY(int_col_1), + PRIMARY KEY(int_col_1, str_col_1) +) +ENGINE = 'XUANWU' +PARTITION BY VALUE((int_col_1, date_col_1)) +DISTRIBUTED BY HASH(int_col_1) +STORAGE_POLICY = 'MIXED' +hot_partition_count = 10 +BLOCK_SIZE = 4096 +TABLE_PROPERTIES = '{"format":"columnstore"}' +TTL toDate(date_col_1) + INTERVAL 30 DAY; +SHOW CREATE TABLE test_create_table_unique1; + CREATE TABLE test_create_table_unique2 ( `int_col_1` UInt64 NOT NULL, diff --git a/tests/queries/4_cnch_stateless/60001_mysql_ddl_create_select.reference b/tests/queries/4_cnch_stateless/60001_mysql_ddl_create_select.reference index a73513f119c..65f5c979f75 100644 --- a/tests/queries/4_cnch_stateless/60001_mysql_ddl_create_select.reference +++ b/tests/queries/4_cnch_stateless/60001_mysql_ddl_create_select.reference @@ -12,4 +12,4 @@ mysql_create_select_ddl2 CREATE TABLE test.mysql_create_select_ddl2 (\n `id` mysql_create_select_ddl3 CREATE TABLE test.mysql_create_select_ddl3 (\n `id` Int32 NOT NULL,\n `val1` DateTime64(3) NOT NULL,\n `val2` String NOT NULL\n)\nENGINE = CnchMergeTree\nORDER BY id\nUNIQUE KEY tuple()\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 mysql_create_select_ddl4 CREATE TABLE test.mysql_create_select_ddl4 (\n `id` Int32 NOT NULL,\n `val1` DateTime64(3) NOT NULL,\n `val2` String NOT NULL\n)\nENGINE = CnchMergeTree\nORDER BY tuple()\nUNIQUE KEY tuple()\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 mysql_create_select_ddl5 CREATE TABLE test.mysql_create_select_ddl5 (\n `id` Int32 NOT NULL\n)\nENGINE = CnchMergeTree\nORDER BY tuple()\nUNIQUE KEY id\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 -mysql_create_select_ddl6 CREATE TABLE test.mysql_create_select_ddl6 (\n `id` Int32 NOT NULL\n)\nENGINE = CnchMergeTree\nPARTITION BY intDiv(id, 4294967)\nORDER BY tuple(id)\nUNIQUE KEY tuple(id)\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 +mysql_create_select_ddl6 CREATE TABLE test.mysql_create_select_ddl6 (\n `id` Int32 NOT NULL\n)\nENGINE = CnchMergeTree\nORDER BY tuple(id)\nUNIQUE KEY tuple(id)\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192 From fd6590c2a475290348f8e47100b64abbe2818a84 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:36:14 +0000 Subject: [PATCH 072/292] Merge 'cherry-pick-14a37c0a-2' into 'cnch-2.2' fix(clickhousech@m-4676540004): [cp] cnch 2.2 fix read map and map key with 0 rows See merge request: !23004 --- src/Storages/MergeTree/IMergeTreeReader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 21b5da116da..d83c0ab30d3 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -516,7 +516,9 @@ void IMergeTreeReader::readMapDataNotKV( if (dup_implicit_keys.count(impl_key_name) != 0) { auto idx = res_col_to_idx[impl_key_name]; - impl_key_values[kit->second] = std::pair(column_size_before_reading, res_columns[idx].get()); + /// Duplicated implicit key column may be droped if empty + if (res_columns[idx]) + impl_key_values[kit->second] = std::pair(column_size_before_reading, res_columns[idx].get()); continue; } From e089c2b0fed1874d7175ba309a2cc5424d702023 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:36:39 +0000 Subject: [PATCH 073/292] Merge 'cherry-pick-mr-22605-1' into 'cnch-2.2' fix(clickhousech@m-4655150073): fix StrictResizeProcessor losing chunks See merge request: !22627 --- src/Processors/ResizeProcessor.cpp | 51 ++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index d652a342150..c39186edc63 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -263,6 +264,8 @@ IProcessor::Status ResizeProcessor::prepare(const PortNumbers & updated_inputs, IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) { + static auto * logger = &Poco::Logger::get("MultiPartitionExchangeSink"); + if (!initialized) { initialized = true; @@ -320,8 +323,15 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in { input.status = InputStatus::Finished; ++num_finished_inputs; - - waiting_outputs.push(input.waiting_output); + /// Avoid pushing data to outputs which are already hasDate or finished + auto & output = output_ports[input.waiting_output]; + if (!output.port->isFinished() && output.port->canPush()) + { + /// reset status to avoid error: Invalid status NotActive for associated output + /// for example, if output with NotActive status is pushed to waiting_outputs and then assign to another input. + output.status = OutputStatus::NeedData; + waiting_outputs.push(input.waiting_output); + } } continue; } @@ -347,10 +357,8 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in auto & waiting_output = output_ports[input_with_data.waiting_output]; - if (waiting_output.status == OutputStatus::NotActive) - throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR); - - if (waiting_output.status != OutputStatus::Finished) + /// Output status could be NotActive when abandoned_chunks are pushed to it. + if (waiting_output.status == OutputStatus::NeedData) { waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); waiting_output.status = OutputStatus::NotActive; @@ -367,7 +375,8 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in disabled_input_ports.push(input_number); } - if (num_finished_inputs == inputs.size()) + /// Losing abandoned chunks if not judge empty. + if (num_finished_inputs == inputs.size() && abandoned_chunks.empty()) { for (auto & output : outputs) output.finish(); @@ -380,11 +389,17 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in { auto & waiting_output = output_ports[waiting_outputs.front()]; waiting_outputs.pop(); - - waiting_output.port->pushData(std::move(abandoned_chunks.back())); - abandoned_chunks.pop_back(); - - waiting_output.status = OutputStatus::NotActive; + // push chunk to finished port will lose it + if (waiting_output.status == OutputStatus::NeedData) + { + waiting_output.port->pushData(std::move(abandoned_chunks.back())); + abandoned_chunks.pop_back(); + waiting_output.status = OutputStatus::NotActive; + } + else + { + LOG_WARNING(logger, "One output in waiting_outputs is finished"); + } } /// Enable more inputs if needed. @@ -406,9 +421,19 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in auto & output = output_ports[waiting_outputs.front()]; waiting_outputs.pop(); + if (output.status != OutputStatus::Finished) + ++num_finished_outputs; + output.status = OutputStatus::Finished; output.port->finish(); - ++num_finished_outputs; + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; } if (disabled_input_ports.empty()) From a98c7279a14f7d7351369bc0cb49706bc4965cb3 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:36:57 +0000 Subject: [PATCH 074/292] Merge 'cnch2.2-cancel-client' into 'cnch-2.2' fix(clickhousech@m-4656097116): cancel from client See merge request: !23023 --- src/Interpreters/Context.cpp | 4 ++++ src/Server/TCPHandler.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 6494afd8052..d6815be1eb4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -265,6 +265,7 @@ namespace ErrorCodes extern const int NOT_A_LEADER; extern const int INVALID_SETTING_VALUE; extern const int DATABASE_ACCESS_DENIED; + extern const int QUERY_WAS_CANCELLED; } /** Set of known objects (environment), that could be used in query. @@ -2497,6 +2498,9 @@ void Context::killCurrentQuery() { process_list_elem->cancelQuery(true, false); } + getSegmentScheduler()->cancelPlanSegmentsFromCoordinator( + client_info.initial_query_id, ErrorCodes::QUERY_WAS_CANCELLED, "Cancelled by Client.", shared_from_this()); + getPlanSegmentProcessList().tryCancelPlanSegmentGroup(client_info.initial_query_id); }; String Context::getDefaultFormat() const diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 45ea71a41d3..6b48a79e877 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -898,6 +898,10 @@ void TCPHandler::processOrdinaryQueryWithProcessors() { /// A packet was received requesting to stop execution of the request. executor.cancel(); + if (state.io.coordinator && state.is_cancelled) + { + throw Exception("Cancelled by client.", ErrorCodes::QUERY_WAS_CANCELLED); + } break; } From 03a81b438d3372b72c064b70a61b71b73bde82ed Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:38:38 +0000 Subject: [PATCH 075/292] Merge '4655162137_cnch-2.2' into 'cnch-2.2' fix(optimizer@m-4655162137): log json format query plan into query_log system table See merge request: !22989 # Conflicts: # contrib/bytehouse-gis --- src/Core/Settings.h | 1 + src/Interpreters/Context.h | 9 ++++ .../InterpreterSelectQueryUseOptimizer.cpp | 43 +++++++++++++++---- .../InterpreterSelectQueryUseOptimizer.h | 2 + src/Interpreters/QueryLog.cpp | 4 +- src/Interpreters/QueryLog.h | 1 + src/Interpreters/executeQuery.cpp | 5 +++ 7 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c86104a54f8..01c09483281 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -377,6 +377,7 @@ enum PreloadLevelSettings : UInt64 0) \ \ M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \ + M(Bool, log_query_plan, 0, "Log json format query plan to the system query_log table.", 0) \ M(Bool, log_max_io_thread_queries, 1, "Log max io time thread requests and write the log to the system table", 0) \ M(LogQueriesType, \ log_queries_min_type, \ diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 706632b6f9b..6b8dbf5db3c 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -649,6 +649,8 @@ class Context : public ContextData, public std::enable_shared_from_this /// ContextData mutex mutable SharedMutex mutex; + String query_plan; + Context(); Context(const Context &); @@ -1188,6 +1190,13 @@ class Context : public ContextData, public std::enable_shared_from_this UInt32 getZooKeeperSessionUptime() const; + void addQueryPlanInfo(String & query_plan_) + { + this->query_plan = query_plan_; + } + + String getQueryPlan() {return query_plan;} + #if USE_NURAFT std::shared_ptr & getKeeperDispatcher() const; #endif diff --git a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp index e05fbd4435d..4936d1021f9 100644 --- a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp +++ b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp @@ -16,6 +16,10 @@ #include #include +#include +#include +#include +#include #include #include #include @@ -38,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -52,11 +57,15 @@ #include #include #include +#include +#include +#include "common/defines.h" #include #include #include +#include "Interpreters/ClientInfo.h" +#include "Interpreters/Context_fwd.h" -#include namespace ProfileEvents { @@ -603,7 +612,6 @@ void InterpreterSelectQueryUseOptimizer::resetFinalSampleSize(PlanSegmentTreePtr size_t sample_size = (sample->getSampleSize() + 1) / plan_segment.getPlanSegment()->getParallelSize(); sample->setSampleSize(sample_size); } - } } } @@ -655,6 +663,23 @@ void InterpreterSelectQueryUseOptimizer::setUnsupportedSettings(ContextMutablePt context->applySettingsChanges(setting_changes); } +void InterpreterSelectQueryUseOptimizer::fillQueryPlan(ContextPtr context, QueryPlan & query_plan) +{ + WriteBufferFromOwnString buffer; + Protos::QueryPlan plan_pb; + query_plan.toProto(plan_pb); + String json_msg; + google::protobuf::util::JsonPrintOptions pb_options; + pb_options.preserve_proto_field_names = true; + pb_options.always_print_primitive_fields = true; + pb_options.add_whitespace = false; + + google::protobuf::util::MessageToJsonString(plan_pb, &json_msg, pb_options); + buffer << json_msg; + + context->getQueryContext()->addQueryPlanInfo(buffer.str()); +} + void InterpreterSelectQueryUseOptimizer::buildQueryPlan(QueryPlanPtr & query_plan, AnalysisPtr & analysis, bool skip_optimize) { context->createPlanNodeIdAllocator(); @@ -685,6 +710,12 @@ void InterpreterSelectQueryUseOptimizer::buildQueryPlan(QueryPlanPtr & query_pla { stage_watch.restart(); PlanOptimizer::optimize(*query_plan, context); + + if (context->getSettingsRef().log_query_plan) + { + fillQueryPlan(context, *query_plan); + } + context->logOptimizerProfile( log, "Optimizer stage run time: ", "Optimizer", std::to_string(stage_watch.elapsedMillisecondsAsDouble()) + "ms"); ProfileEvents::increment(ProfileEvents::QueryOptimizerTime, stage_watch.elapsedMilliseconds()); @@ -723,13 +754,7 @@ BlockIO InterpreterSelectQueryUseOptimizer::executeCreatePreparedStatementQuery( CollectPreparedParams prepared_params_collector; CollectPreparedParamsVisitor(prepared_params_collector).visit(query_ptr); prep_stat_manager->addPlanToCache( - name, - prepare_ast, - settings_changes, - query_plan, - analysis, - std::move(prepared_params_collector.prepared_params), - context); + name, prepare_ast, settings_changes, query_plan, analysis, std::move(prepared_params_collector.prepared_params), context); return {}; } diff --git a/src/Interpreters/InterpreterSelectQueryUseOptimizer.h b/src/Interpreters/InterpreterSelectQueryUseOptimizer.h index 224765417a6..ab68c8fe070 100644 --- a/src/Interpreters/InterpreterSelectQueryUseOptimizer.h +++ b/src/Interpreters/InterpreterSelectQueryUseOptimizer.h @@ -80,6 +80,8 @@ class InterpreterSelectQueryUseOptimizer : public IInterpreter static void fillContextQueryAccessInfo(ContextPtr context, AnalysisPtr & analysis); + static void fillQueryPlan(ContextPtr context, QueryPlan & query_plan); + Block getSampleBlock(); static void setUnsupportedSettings(ContextMutablePtr & context); diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 7fc7ac8ce3d..78b4f589759 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -150,7 +150,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"fallback_reason", std::make_shared()}, {"segment_profiles", std::make_shared(std::make_shared())}, {"virtual_warehouse", std::make_shared()}, - {"worker_group", std::make_shared()} + {"worker_group", std::make_shared()}, + {"query_plan", std::make_shared()} }; } @@ -353,6 +354,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(virtual_warehouse); columns[i++]->insert(worker_group); + columns[i++]->insert(query_plan); } void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 457551a180b..f3abe3cab7e 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -104,6 +104,7 @@ struct QueryLogElement String virtual_warehouse; String worker_group; + String query_plan; static std::string name() { return "QueryLog"; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f0f98bcc7bf..ee9bfa3261d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1440,6 +1440,11 @@ static std::tuple executeQueryImpl( } } + if (settings.log_query_plan) + { + elem.query_plan = context->getQueryContext()->getQueryPlan(); + } + interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); if (settings.log_query_settings) From 56e1b96b71900a567958c3742e47de85b131c6c6 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:40:09 +0000 Subject: [PATCH 076/292] Merge 'cherry-pick-a46f9673' into 'cnch-2.2' feat(clickhousech@m-4676088037): set allow_mysql_having_name_resolution=1 for mysqlhandler [CP] See merge request: !23028 --- src/Server/MySQLHandler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index f92f07bb28f..d5424912d12 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -560,6 +560,8 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) query_context->setSetting("mysql_map_fixed_string_to_text_in_show_columns", 1); /// TODO(fredwang) change it to a smaller threshold? query_context->setSetting("max_execution_time", 18000); + /// required by quickbi, otherwise it would fail to get table info + query_context->setSetting("allow_mysql_having_name_resolution", 1); CurrentThread::QueryScope query_scope{query_context}; std::atomic affected_rows {0}; From cc89b685b04faa49a39321f78bdbd91b5da1d4a3 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:40:27 +0000 Subject: [PATCH 077/292] Merge branch 'fky@cnch-2.2@buildselect-coredump-fix' into 'cnch-2.2' fix(clickhousech@m-4675644088): defend against empty tables_with_columns. See merge request dp/ClickHouse!23035 --- src/CloudServices/CnchServerServiceImpl.cpp | 7 ++++++ src/Interpreters/MonotonicityCheckVisitor.h | 6 +++++ .../02147_order_by_optimizations.reference | 0 .../02147_order_by_optimizations.sql | 25 +++++++++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 tests/queries/4_cnch_stateless/02147_order_by_optimizations.reference create mode 100644 tests/queries/4_cnch_stateless/02147_order_by_optimizations.sql diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index d5f796fa5bf..107bac38f3e 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -738,6 +738,13 @@ void CnchServerServiceImpl::fetchPartitions( session_context->setCurrentDatabase(request->database()); ReadBufferFromString rb(request->predicate()); ASTPtr query_ptr = deserializeAST(rb); + /// We should to add `database` into AST before calling `buildSelectQueryInfoForQuery`. + { + ASTSelectQuery * select_query = query_ptr->as(); + if (!select_query) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST type found in buildSelectQueryInfoForQuery"); + select_query->replaceDatabaseAndTable(request->database(), request->table()); + } SelectQueryInfo query_info = buildSelectQueryInfoForQuery(query_ptr, session_context); session_context->setTemporaryTransaction(TxnTimestamp(request->has_txnid() ? request->txnid() : session_context->getTimestamp()), 0, false); diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index a26f62a0829..2d47a7d673d 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -69,6 +69,12 @@ class MonotonicityCheckMatcher if (!pos) return false; + /// It is possible that tables list is empty. + /// IdentifierSemantic get the position from AST, and it can be not valid to use it. + /// One example is `fetchPartitions` from 02147_order_by_optimizations.sql + if (*pos >= tables.size()) + return false; + if (auto data_type_and_name = tables[*pos].columns.tryGetByName(identifier->shortName())) { arg_data_type = data_type_and_name->type; diff --git a/tests/queries/4_cnch_stateless/02147_order_by_optimizations.reference b/tests/queries/4_cnch_stateless/02147_order_by_optimizations.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/4_cnch_stateless/02147_order_by_optimizations.sql b/tests/queries/4_cnch_stateless/02147_order_by_optimizations.sql new file mode 100644 index 00000000000..62f0487f234 --- /dev/null +++ b/tests/queries/4_cnch_stateless/02147_order_by_optimizations.sql @@ -0,0 +1,25 @@ +SET enable_optimizer=0; + +CREATE TABLE clickhouse_types_test +( + `id` UInt64, + `int8_t` Int8, + `uint8_t` UInt8, + `int16_t` Int16, + `int32_t` Int32, + `int64_t` Int64, + `uint64_t` UInt64, + `float32_t` Float32, + `float64_t` Float64, + `date_t` Date, + `varchar_t` String, + `datetime_str1` String, + `datetime_str2` String, + `date` Date +) +ENGINE = CnchMergeTree +PARTITION BY date +ORDER BY (id, date, intHash64(id)) +SAMPLE BY intHash64(id); + +SELECT * FROM `clickhouse_types_test` clickhouse_types_test WHERE `clickhouse_types_test`.`datetime_str1` > '1970-01-01 00:00:00' ORDER BY toDateOrNull(`clickhouse_types_test`.`datetime_str1`) From d56d58b9f61d21183c17b726b29991f56d96d9f4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:41:51 +0000 Subject: [PATCH 078/292] Merge '4675551081_cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4675551081): add settings max_in_value_list_to_pushdown See merge request: !23054 # Conflicts: # src/Core/Settings.h # src/QueryPlan/FilterStep.cpp --- src/Core/Settings.h | 1 + src/Optimizer/Rewriter/PredicatePushdown.cpp | 55 ++++- src/QueryPlan/FilterStep.cpp | 64 ++++++ src/QueryPlan/FilterStep.h | 3 + ...095_optimize_large_in_value_list.reference | 188 ++++++++++++++++++ .../48095_optimize_large_in_value_list.sql | 154 ++++++++++++++ 6 files changed, 458 insertions(+), 7 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 01c09483281..7e7f49da852 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -647,6 +647,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, allow_experimental_data_skipping_indices, true, "Emulate data skipping indices", 0) \ M(Bool, enable_predicate_pushdown, false, "Where to push down predicate", 0) \ M(Bool, dict_table_full_mode, false, "If encode / decode table is not bucket table, try to dispatch dict to all workers, if false, throw exception instead", 0) \ + M(UInt64, max_in_value_list_to_pushdown, 10000, "Max size of in value list in filter", 0) \ M(UInt64, pathgraph_threshold_y, 0, "maximum point number in each level", 0) \ M(Bool, to_string_extra_arguments, true, "Whether to allow an extra argument in toString Function", 0) \ \ diff --git a/src/Optimizer/Rewriter/PredicatePushdown.cpp b/src/Optimizer/Rewriter/PredicatePushdown.cpp index 5fa72ec6b6c..7f1e0bcf4fa 100644 --- a/src/Optimizer/Rewriter/PredicatePushdown.cpp +++ b/src/Optimizer/Rewriter/PredicatePushdown.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -154,10 +155,14 @@ PlanNodePtr PredicateVisitor::visitProjectionNode(ProjectionNode & node, Predica auto pushdown_predicate = PredicateUtils::combineConjuncts(inlined_deterministic_conjuncts); LOG_DEBUG( - &Poco::Logger::get("Debugger"), "node {}, pushdown_predicate : {}", node.getId(), pushdown_predicate->formatForErrorMessage()); + &Poco::Logger::get("PredicateVisitor"), + "project node {}, pushdown_predicate : {}", + node.getId(), + pushdown_predicate->formatForErrorMessage()); if (!pushdown_predicate->as()) - pushdown_predicate = ExpressionInterpreter::optimizePredicate(pushdown_predicate, step.getInputStreams()[0].getNamesToTypes(), context); + pushdown_predicate + = ExpressionInterpreter::optimizePredicate(pushdown_predicate, step.getInputStreams()[0].getNamesToTypes(), context); PredicateContext expression_context{ .predicate = pushdown_predicate, .extra_predicate_for_simplify_outer_join @@ -184,12 +189,29 @@ PlanNodePtr PredicateVisitor::visitProjectionNode(ProjectionNode & node, Predica PlanNodePtr PredicateVisitor::visitFilterNode(FilterNode & node, PredicateContext & predicate_context) { const auto & step = *node.getStep(); - auto predicates = std::vector{step.getFilter(), predicate_context.predicate}; + + // handle in function has large value list + UInt64 limit = predicate_context.context->getSettingsRef().max_in_value_list_to_pushdown; + std::pair split_in_filter = FilterStep::splitLargeInValueList(step.getFilter(), limit); + + LOG_DEBUG( + &Poco::Logger::get("PredicateVisitor"), + "filter node {}, split_in_filter.first : {}, split_in_filter.second : {}", + node.getId(), + split_in_filter.first->formatForErrorMessage(), + split_in_filter.second->formatForErrorMessage() + ); + + auto predicates = std::vector{split_in_filter.first, predicate_context.predicate}; ConstASTPtr predicate = PredicateUtils::combineConjuncts(predicates); + if (simplify_common_filter) { predicate = CommonPredicatesRewriter::rewrite(predicate, context); } + + LOG_DEBUG(&Poco::Logger::get("PredicateVisitor"), "filter node {}, pushdown_predicate : {}", node.getId(), predicate->formatForErrorMessage()); + PredicateContext filter_context{ .predicate = predicate, .extra_predicate_for_simplify_outer_join = predicate_context.extra_predicate_for_simplify_outer_join, @@ -198,6 +220,11 @@ PlanNodePtr PredicateVisitor::visitFilterNode(FilterNode & node, PredicateContex if (rewritten->getStep()->getType() != IQueryPlanStep::Type::Filter) { + if (!PredicateUtils::isTruePredicate(split_in_filter.second)) + { + auto filter_step = std::make_shared(rewritten->getStep()->getOutputStream(), split_in_filter.second); + return std::make_shared(context->nextNodeId(), std::move(filter_step), PlanNodes{rewritten}); + } return rewritten; } @@ -205,6 +232,11 @@ PlanNodePtr PredicateVisitor::visitFilterNode(FilterNode & node, PredicateContex { if (rewritten->getChildren()[0] != node.getChildren()[0]) { + if (!PredicateUtils::isTruePredicate(split_in_filter.second)) + { + auto filter_step = std::make_shared(rewritten->getStep()->getOutputStream(), split_in_filter.second); + return std::make_shared(context->nextNodeId(), std::move(filter_step), PlanNodes{rewritten}); + } return rewritten; } auto rewritten_step_ptr = rewritten->getStep(); @@ -214,6 +246,11 @@ PlanNodePtr PredicateVisitor::visitFilterNode(FilterNode & node, PredicateContex // see ExpressionEquivalence if (step.getFilter() != rewritten_step.getFilter()) { + if (!PredicateUtils::isTruePredicate(split_in_filter.second)) + { + auto filter_step = std::make_shared(rewritten->getStep()->getOutputStream(), split_in_filter.second); + return std::make_shared(context->nextNodeId(), std::move(filter_step), PlanNodes{rewritten}); + } return rewritten; } } @@ -329,6 +366,7 @@ PlanNodePtr PredicateVisitor::visitJoinNode(JoinNode & node, PredicateContext & PlanNodePtr & right = node.getChildren()[1]; ConstASTPtr left_effective_predicate = EffectivePredicateExtractor::extract(left, context); ConstASTPtr right_effective_predicate = EffectivePredicateExtractor::extract(right, context); + ConstASTPtr join_predicate = PredicateUtils::extractJoinPredicate(node); std::set left_symbols; @@ -355,8 +393,8 @@ PlanNodePtr PredicateVisitor::visitJoinNode(JoinNode & node, PredicateContext & ASTTableJoin::Kind kind = step->getKind(); - LOG_TRACE( - logger, + LOG_DEBUG( + &Poco::Logger::get("PredicateVisitor"), "join node {}, inherited_predicate : {}, left effective predicate: {} , right effective predicate: {}, join_predicate : {}", node.getId(), inherited_predicate->formatForErrorMessage(), @@ -1602,9 +1640,12 @@ ASTPtr EffectivePredicateVisitor::visitFilterNode(FilterNode & node, ContextMuta removed_inconsistent_type_filters.emplace_back(ptr); } + std::vector removed_large_in_value_list + = FilterStep::removeLargeInValueList(removed_inconsistent_type_filters, context->getSettingsRef().max_in_value_list_to_pushdown); + // Adds on underlying_predicate - removed_inconsistent_type_filters.emplace_back(underlying_predicate); - return PredicateUtils::combineConjuncts(removed_inconsistent_type_filters); + removed_large_in_value_list.emplace_back(underlying_predicate); + return PredicateUtils::combineConjuncts(removed_large_in_value_list); } ASTPtr EffectivePredicateVisitor::visitAggregatingNode(AggregatingNode & node, ContextMutablePtr & context) diff --git a/src/QueryPlan/FilterStep.cpp b/src/QueryPlan/FilterStep.cpp index f410b489d05..dea55faf738 100644 --- a/src/QueryPlan/FilterStep.cpp +++ b/src/QueryPlan/FilterStep.cpp @@ -13,6 +13,7 @@ * limitations under the License. */ +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include namespace DB { @@ -210,4 +212,66 @@ void FilterStep::prepare(const PreparedStatementContext & prepared_context) { prepared_context.prepare(filter); } + +std::pair FilterStep::splitLargeInValueList(const ConstASTPtr & filter, UInt64 limit) +{ + std::vector removed_large_in_value_list; + std::vector large_in_value_list; + for (auto & predicate : PredicateUtils::extractConjuncts(filter)) + { + LOG_DEBUG(&Poco::Logger::get("FilterStep"), " predicate : {}", predicate->formatForErrorMessage()); + + if (predicate->as() && + (predicate->as().name == "in" || + predicate->as().name == "globalIn" || + predicate->as().name == "notIn" || + predicate->as().name == "globalNotIn")) + { + const auto & function = predicate->as(); + if (function.arguments->getChildren()[1]->as()) + { + ASTFunction & tuple = function.arguments->getChildren()[1]->as(); + size_t size = tuple.arguments->getChildren().size(); + if (size > limit) + { + large_in_value_list.emplace_back(predicate); + continue; + } + } + } + removed_large_in_value_list.emplace_back(predicate); + } + + return std::make_pair( + PredicateUtils::combineConjuncts(removed_large_in_value_list), PredicateUtils::combineConjuncts(large_in_value_list)); +} + +std::vector FilterStep::removeLargeInValueList(const std::vector & filters, UInt64 limit) +{ + std::vector removed_large_in_value_list; + for (const auto & predicate : filters) + { + if (predicate->as() && + (predicate->as().name == "in" || + predicate->as().name == "globalIn" || + predicate->as().name == "notIn" || + predicate->as().name == "globalNotIn") + ) + { + const auto & function = predicate->as(); + if (function.arguments->getChildren()[1]->as()) + { + ASTFunction & tuple = function.arguments->getChildren()[1]->as(); + size_t size = tuple.arguments->getChildren().size(); + if (size > limit) + { + continue; + } + } + } + removed_large_in_value_list.emplace_back(predicate); + } + return removed_large_in_value_list; +} + } diff --git a/src/QueryPlan/FilterStep.h b/src/QueryPlan/FilterStep.h index bca2a1fada1..8fc9a5a8cf0 100644 --- a/src/QueryPlan/FilterStep.h +++ b/src/QueryPlan/FilterStep.h @@ -15,6 +15,7 @@ #pragma once #include +#include namespace DB { @@ -61,6 +62,8 @@ class FilterStep : public ITransformingStep void prepare(const PreparedStatementContext & prepared_context) override; + static std::pair splitLargeInValueList(const ConstASTPtr & filter, UInt64 limit); + static std::vector removeLargeInValueList(const std::vector & filters, UInt64 limit); private: ActionsDAGPtr actions_dag; ConstASTPtr filter; diff --git a/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.reference b/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.reference new file mode 100644 index 00000000000..54560b09643 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.reference @@ -0,0 +1,188 @@ +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (ad_plan_id IN (\'131061\', \'131078\', \'154808\', \'154810\', \'154811\', \'89270\', \'89272\', \'89274\', \'89275\')) AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (ad_plan_id IN (\'131061\', \'131078\', \'154808\', \'154810\', \'154811\', \'89270\', \'89272\', \'89274\', \'89275\')) AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ Filter + │ Condition: CAST(apid, \'text\') IN (\'131061\', \'131078\', \'154808\', \'154810\', \'154811\', \'89270\', \'89272\', \'89274\', \'89275\') + └─ TableScan 48095_test.platform_ad_plan + Where: CAST(apid, \'text\') IN (\'131061\', \'131078\', \'154808\', \'154810\', \'154811\', \'89270\', \'89272\', \'89274\', \'89275\') + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id GLOBAL IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id GLOBAL IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ Filter + │ Condition: CAST(apid, \'text\') GLOBAL IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ TableScan 48095_test.platform_ad_plan + Where: CAST(apid, \'text\') GLOBAL IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ Filter + │ Condition: CAST(apid, \'text\') NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ TableScan 48095_test.platform_ad_plan + Where: CAST(apid, \'text\') NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id GLOBAL NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) AND (ad_plan_id GLOBAL NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ Filter + │ Condition: CAST(apid, \'text\') GLOBAL NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ TableScan 48095_test.platform_ad_plan + Where: CAST(apid, \'text\') GLOBAL NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: ad_plan_id IN (\'131061\', \'131078\', \'154808\', \'154810\', \'154811\', \'89270\', \'89272\', \'89274\', \'89275\') + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ TableScan 48095_test.platform_ad_plan + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: ad_plan_id GLOBAL IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ TableScan 48095_test.platform_ad_plan + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: ad_plan_id NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ TableScan 48095_test.platform_ad_plan + Outputs: [apid] +Projection +│ Expressions: count():=`expr#count()` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#count():=AggNull(count)() + └─ Filter + │ Condition: ad_plan_id GLOBAL NOT IN (\'131078\', \'131061\', \'89270\', \'154810\', \'89272\', \'154811\', \'89275\', \'154808\', \'89274\') + └─ Left Join + │ Condition: ad_plan_id == expr#CAST(apid, \'text\') + ├─ Projection + │ │ Expressions: [ad_plan_id] + │ └─ Filter + │ │ Condition: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ └─ TableScan 48095_test.dws_ad_logs_rt_not_distinct + │ Where: (ymd = cast(19901, \'Date\')) AND (recv_timestamp >= \'2024-06-27 00:00:00\') AND (recv_timestamp <= \'2024-06-27 23:59:59\') AND (kind = \'\') AND (dsp_id = \'1000001\') AND (uid IN (\'4f640b0110e647668df11478573dab9b\', \'534757bd6e5340309cc119e54f23d4d7\', \'67921ae675474d49b96f982ea6936363\')) + │ Outputs: [kind, dsp_id, ad_plan_id, uid, recv_timestamp, ymd] + └─ Broadcast Exchange + └─ Projection + │ Expressions: expr#CAST(apid, \'text\'):=CAST(apid, \'text\') + └─ TableScan 48095_test.platform_ad_plan + Outputs: [apid] diff --git a/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.sql b/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.sql new file mode 100644 index 00000000000..ffb6aa9f3a4 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48095_optimize_large_in_value_list.sql @@ -0,0 +1,154 @@ +CREATE DATABASE IF NOT EXISTS 48095_test; + +USE 48095_test; + +DROP TABLE IF EXISTS 48095_test.dws_ad_logs_rt_not_distinct_local; +DROP TABLE IF EXISTS 48095_test.dws_ad_logs_rt_not_distinct; + +DROP TABLE IF EXISTS 48095_test.platform_ad_plan_local; +DROP TABLE IF EXISTS 48095_test.platform_ad_plan; + +CREATE TABLE dws_ad_logs_rt_not_distinct +( + `kind` LowCardinality(String), + `id` String, + `request_id` String NOT NULL, + `bid` Nullable(String), + `app_version_name` LowCardinality(Nullable(String)), + `sdk_version` LowCardinality(Nullable(String)), + `channel_package` LowCardinality(Nullable(String)), + `sex` LowCardinality(Nullable(String)), + `age_range` LowCardinality(Nullable(String)), + `area_province` LowCardinality(Nullable(String)), + `device_brand` LowCardinality(Nullable(String)), + `device_range_sys_version` LowCardinality(Nullable(String)), + `internal_install` LowCardinality(Nullable(String)), + `network_state` LowCardinality(Nullable(String)), + `unit_id` LowCardinality(String), + `dsp_id` LowCardinality(Nullable(String)), + `type` LowCardinality(Nullable(String)), + `pay_type` LowCardinality(Nullable(String)), + `success_code` LowCardinality(Nullable(String)), + `ad_plan_id` Nullable(String), + `ad_group_id` Nullable(String), + `uid` String, + `ad_cost` Nullable(Float64), + `ad_gsp_cost` Nullable(Float64), + `is_supplyment` LowCardinality(Nullable(String)), + `recv_timestamp` String, + `insert_timestamp` String, + `ocpm_bid` Nullable(String), + `ymd` Date +) +ENGINE = CnchMergeTree() order by id; + + +CREATE TABLE platform_ad_plan +( + `apid` Int32 NOT NULL, + `packagename` Nullable(String), + `rec_game_flag` Nullable(String) +) +ENGINE = CnchMergeTree() order by apid; + +set enable_optimizer=1; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 100; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id GLOBAL IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 100; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id NOT IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 100; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id GLOBAL NOT IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 100; + +-- set max_in_value_list_to_pushdown = 5, +-- ad_plan_id IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274') will not be pushdown. +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 5; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id GLOBAL IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 5; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id NOT IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 5; + +EXPLAIN stats = 0 +SELECT + count(*) +FROM +( + SELECT * + FROM dws_ad_logs_rt_not_distinct + WHERE (ymd = toDate('2024-06-27 00:00:00')) AND (recv_timestamp >= '2024-06-27 00:00:00') AND (recv_timestamp <= '2024-06-27 23:59:59') +) AS t1 +LEFT JOIN platform_ad_plan AS t2 ON t1.ad_plan_id = CAST(t2.apid, 'text') +WHERE (ad_plan_id GLOBAL NOT IN ('131078', '131061', '89270', '154810', '89272', '154811', '89275', '154808', '89274')) AND (uid IN ('67921ae675474d49b96f982ea6936363', '534757bd6e5340309cc119e54f23d4d7', '4f640b0110e647668df11478573dab9b')) AND (dsp_id IN ('1000001')) AND (kind IN ('', '')) settings max_in_value_list_to_pushdown= 5; + +DROP DATABASE IF EXISTS 48095_test; \ No newline at end of file From b39fd1d37993e5e86fb8f272a44a5e5fb24ab53c Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:42:07 +0000 Subject: [PATCH 079/292] Merge 'fix_create_view_settings_in_cnch22' into 'cnch-2.2' fix(optimizer@m-4086569214): Fix create view with settings See merge request: !22994 --- src/Interpreters/executeQuery.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ee9bfa3261d..35957ed1e62 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -768,6 +768,16 @@ void interpretSettings(ASTPtr ast, ContextMutablePtr context) } } } + else if (const auto * create_select_query = ast->as(); create_select_query && create_select_query->select) + { + const auto * select_in_query = create_select_query->select->as(); + if (select_in_query && !select_in_query->list_of_selects->children.empty()) + { + const auto * last_select = select_in_query->list_of_selects->children.back()->as(); + if (last_select && last_select->settings()) + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); + } + } else if (const auto * query_with_output = dynamic_cast(ast.get())) { if (query_with_output->settings_ast) From 4d5e4c1b0cdae6a22b833f7162d03081b5ee538e Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:42:24 +0000 Subject: [PATCH 080/292] Merge branch 'fix_unique_delete_flag_column_2.2' into 'cnch-2.2' fix(clickhousech@m-4679215614): [cp-cnch-2.2] bugfix the delete flag column may cannot be correctly initialized See merge request dp/ClickHouse!23089 --- src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp index 52b2b250924..6d65c2af62b 100644 --- a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp @@ -568,7 +568,7 @@ CloudMergeTreeBlockOutputStream::FilterInfo CloudMergeTreeBlockOutputStream::ded res.filter.assign(block_size, UInt8(1)); ColumnWithTypeAndName delete_flag_column; - if (version_column && block.has(StorageInMemoryMetadata::DELETE_FLAG_COLUMN_NAME)) + if (block.has(StorageInMemoryMetadata::DELETE_FLAG_COLUMN_NAME)) delete_flag_column = block.getByName(StorageInMemoryMetadata::DELETE_FLAG_COLUMN_NAME); auto is_delete_row = [&](int rowid) { return delete_flag_column.column && delete_flag_column.column->getBool(rowid); }; From 3aee68f17580745eae78f1db2bb8d188809a3b86 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:42:39 +0000 Subject: [PATCH 081/292] Merge 'cherry-pick-2b2ba473-3' into 'cnch-2.2' fix(clickhousech@m-4675985170): [To cnch-2.2] Make gzip compression ends with 'gz' See merge request: !23039 --- src/Disks/DiskLocal.cpp | 2 +- src/IO/CompressionMethod.cpp | 21 +++++++++++++++++++++ src/IO/CompressionMethod.h | 2 ++ src/IO/OutfileCommon.cpp | 2 +- 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index ba263d7dd01..da065748ddf 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -437,7 +437,7 @@ void registerDiskLocal(DiskFactory & factory) config.getUInt64(config_prefix + ".keep_free_space_inodes", 0), config.getUInt64("global_keep_free_space_inodes", 0)); double ratio = std::max( config.getDouble(config_prefix + ".keep_free_space_ratio", 0), - config.getDouble(config_prefix + "global_keep_free_space_ratio", 0.05)); + config.getDouble("global_keep_free_space_ratio", 0.05)); if (ratio < 0 || ratio > 1) throw Exception("'keep_free_space_ratio' have to be between 0 and 1", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index ce2f4026353..e078d839ce0 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -49,6 +49,27 @@ std::string toContentEncodingName(CompressionMethod method) __builtin_unreachable(); } +std::string getFileSuffix(CompressionMethod method) +{ + switch (method) + { + case CompressionMethod::Gzip: + return "gz"; + case CompressionMethod::Zlib: + return "deflate"; + case CompressionMethod::Brotli: + return "br"; + case CompressionMethod::Xz: + return "xz"; + case CompressionMethod::Zstd: + return "zstd"; + case CompressionMethod::Snappy: + return "snappy"; + case CompressionMethod::None: + return ""; + } +} + CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint) { std::string file_extension; diff --git a/src/IO/CompressionMethod.h b/src/IO/CompressionMethod.h index 105f8baae1c..a95e520b455 100644 --- a/src/IO/CompressionMethod.h +++ b/src/IO/CompressionMethod.h @@ -38,6 +38,8 @@ enum class CompressionMethod /// How the compression method is named in HTTP. std::string toContentEncodingName(CompressionMethod method); +std::string getFileSuffix(CompressionMethod method); + /** Choose compression method from path and hint. * if hint is "auto" or empty string, then path is analyzed, * otherwise path parameter is ignored and hint is used as compression method name. diff --git a/src/IO/OutfileCommon.cpp b/src/IO/OutfileCommon.cpp index ce193bb21bb..8af772b31bf 100644 --- a/src/IO/OutfileCommon.cpp +++ b/src/IO/OutfileCommon.cpp @@ -69,7 +69,7 @@ String getFullOutPath(String & format_name, String & path, int serial_no, Compre } if (compression_method != CompressionMethod::None) - out_path += "." + toContentEncodingName(compression_method); + out_path += "." + getFileSuffix(compression_method); return out_path; } From 9e17ed130f5cb40a4078d762024f44413cfb9ad9 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:43:03 +0000 Subject: [PATCH 082/292] Merge 'cherry-pick-mr-23090' into 'cnch-2.2' fix(clickhousech@m-4678887102): add obsolete setting funnel_old_rule See merge request: !23104 --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7e7f49da852..5efb9275ad9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1859,6 +1859,7 @@ enum PreloadLevelSettings : UInt64 MAKE_OBSOLETE(M, UInt64, exchange_local_no_repartition_extra_threads, 32) \ MAKE_OBSOLETE(M, UInt64, filtered_ratio_to_use_skip_read, 0) \ MAKE_OBSOLETE(M, Bool, enable_two_stages_prewhere, false) \ + MAKE_OBSOLETE(M, Bool, funnel_old_rule, false) \ /** End of OBSOLETE_SETTINGS */ \ #define FORMAT_FACTORY_SETTINGS(M) \ From 9eae1eea448614dbce93fd0d2926215c7493bbbf Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:43:22 +0000 Subject: [PATCH 083/292] Merge 'fix_mysql_having_name_resolution_for_agg-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4209994537): fix mysql having name resolution for agg cnch 2.2 See merge request: !23056 --- src/Analyzers/resolveNamesAsMySQL.cpp | 17 ++++++++++++----- ...40099_mysql_having_name_resolution.reference | 7 +++++++ .../40099_mysql_having_name_resolution.sql | 5 +++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/Analyzers/resolveNamesAsMySQL.cpp b/src/Analyzers/resolveNamesAsMySQL.cpp index 49a6c8b838d..702deadfe53 100644 --- a/src/Analyzers/resolveNamesAsMySQL.cpp +++ b/src/Analyzers/resolveNamesAsMySQL.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -55,6 +56,11 @@ namespace { } void rewriteName(ASTPtr & ast, const ASTPtr & root_expression); + void rewriteChildren(ASTPtr & ast, const ASTPtr & root_expression) + { + for (auto & child : ast->children) + rewriteName(child, root_expression); + } private: std::vector levels; @@ -73,11 +79,12 @@ namespace break; } } - else if ((ast->as() && ast->as()->name != "lambda") || ast->as()) - { - for (auto & child : ast->children) - rewriteName(child, root_expression); - } + else if (const auto * func = ast->as(); func + && !AggregateUtils::isAggregateFunction(*func) /* prefer source column under aggregate function */ + && func->name != "lambda") + rewriteChildren(ast, root_expression); + else if (ast->as()) + rewriteChildren(ast, root_expression); } void collectNamedExpressions(const ASTPtr & expression, NamedExpressions & named_expressions) diff --git a/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.reference b/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.reference index d028d75e2e4..fe0e7e67b0a 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.reference @@ -96,3 +96,10 @@ Projection Est. ? rows Outputs: [b], a_1:=a -- "b" is ambiguous as there are 2 items with name "b" in GROUP BY SELECT t40099_x.b, t40099_y.b, sum(c) as b FROM t40099_x JOIN t40099_y ON t40099_x.a = t40099_y.a GROUP BY t40099_x.b, t40099_y.b HAVING b = 10; -- { serverError 179 } +-- prefer source column under aggregation +SELECT sum(b) as b FROM t40099_x HAVING b = 10; +10 +SELECT sum(b) as b FROM t40099_x HAVING sum(b) = 10; +10 +SELECT sum(b) as b FROM t40099_x HAVING floor(b) = 10; +10 diff --git a/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.sql b/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.sql index 45a5c8ddaf8..be2f1057aaf 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/40099_mysql_having_name_resolution.sql @@ -41,6 +41,11 @@ EXPLAIN SELECT t40099_y.b, sum(c) as b FROM t40099_x JOIN t40099_y ON t40099_x.a -- "b" is ambiguous as there are 2 items with name "b" in GROUP BY SELECT t40099_x.b, t40099_y.b, sum(c) as b FROM t40099_x JOIN t40099_y ON t40099_x.a = t40099_y.a GROUP BY t40099_x.b, t40099_y.b HAVING b = 10; -- { serverError 179 } +-- prefer source column under aggregation +SELECT sum(b) as b FROM t40099_x HAVING b = 10; +SELECT sum(b) as b FROM t40099_x HAVING sum(b) = 10; +SELECT sum(b) as b FROM t40099_x HAVING floor(b) = 10; + -- { echoOff } drop table if exists t40099_x; From 3c3b601a9b12db6ebeddabbe4a7299bd663f180d Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:43:40 +0000 Subject: [PATCH 084/292] Merge 'feat/improve_validate_materialized_view_cnch_2.2' into 'cnch-2.2' fix(optimizer@m-4676553539): fix share common plan node coredump or identifier resolve error for join cnch-2.2 See merge request: !23100 --- src/Core/tests/gtest_protobuf_common.h | 4 +- .../Rewriter/ShareCommonPlanNode.cpp | 4 +- src/Optimizer/Signature/PlanSignature.h | 5 +- src/QueryPlan/IQueryPlanStep.cpp | 4 +- src/QueryPlan/IQueryPlanStep.h | 2 +- src/QueryPlan/PlanSerDerHelper.cpp | 8 +-- src/QueryPlan/PlanSerDerHelper.h | 2 +- ...1005_share_common_plan_node_join.reference | 0 .../51005_share_common_plan_node_join.sql | 60 +++++++++++++++++++ 9 files changed, 73 insertions(+), 16 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.reference create mode 100644 tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.sql diff --git a/src/Core/tests/gtest_protobuf_common.h b/src/Core/tests/gtest_protobuf_common.h index 558a6a2ffe1..e75698161da 100644 --- a/src/Core/tests/gtest_protobuf_common.h +++ b/src/Core/tests/gtest_protobuf_common.h @@ -202,8 +202,8 @@ class ProtobufTest : public testing::Test { auto is_equal = isPlanStepEqual(*a, *b); ASSERT_TRUE(is_equal); - auto ha = hashPlanStep(*a); - auto hb = hashPlanStep(*b); + auto ha = hashPlanStep(*a, true); + auto hb = hashPlanStep(*b, true); ASSERT_EQ(ha, hb); } diff --git a/src/Optimizer/Rewriter/ShareCommonPlanNode.cpp b/src/Optimizer/Rewriter/ShareCommonPlanNode.cpp index 8809a2d9cf7..a9e204a395b 100644 --- a/src/Optimizer/Rewriter/ShareCommonPlanNode.cpp +++ b/src/Optimizer/Rewriter/ShareCommonPlanNode.cpp @@ -57,12 +57,12 @@ class ShareCommonPlanNode::Rewriter : public SimplePlanRewriter auto cte_id = cte.first; auto forward_order = plan_signature_output_orders.at(node_ptr); - auto reserve_order = plan_signature_output_orders.at(cte.second); + auto reverse_order = plan_signature_output_orders.at(cte.second); std::unordered_map output_columns; for (const auto & output : node.getOutputNames()) { - auto input_column = reserve_order.getByPosition(forward_order.getPositionByName(output)).name; + auto input_column = reverse_order.getByPosition(forward_order.getPositionByName(output)).name; output_columns.emplace(output, input_column); } return PlanNodeBase::createPlanNode( diff --git a/src/Optimizer/Signature/PlanSignature.h b/src/Optimizer/Signature/PlanSignature.h index a19e99e3c33..518fd633ffd 100644 --- a/src/Optimizer/Signature/PlanSignature.h +++ b/src/Optimizer/Signature/PlanSignature.h @@ -56,10 +56,7 @@ class PlanSignatureProvider } protected: - virtual PlanSignature computeStepHash(PlanNodePtr node) - { - return normalizer.computeNormalStep(node)->hash(); - } + virtual PlanSignature computeStepHash(PlanNodePtr node) { return normalizer.computeNormalStep(node)->hash(false); } static size_t combine(const std::vector & hashes); diff --git a/src/QueryPlan/IQueryPlanStep.cpp b/src/QueryPlan/IQueryPlanStep.cpp index ea815d6c3d3..f7ff7f4d6ba 100644 --- a/src/QueryPlan/IQueryPlanStep.cpp +++ b/src/QueryPlan/IQueryPlanStep.cpp @@ -282,9 +282,9 @@ String IQueryPlanStep::toString(Type type) return "Unknown"; } -size_t IQueryPlanStep::hash() const +size_t IQueryPlanStep::hash(bool ignore_output_stream) const { - return hashPlanStep(*this); + return hashPlanStep(*this, ignore_output_stream); } } diff --git a/src/QueryPlan/IQueryPlanStep.h b/src/QueryPlan/IQueryPlanStep.h index 65b4b5f2e9a..8e0c2330630 100644 --- a/src/QueryPlan/IQueryPlanStep.h +++ b/src/QueryPlan/IQueryPlanStep.h @@ -291,7 +291,7 @@ class IQueryPlanStep virtual std::shared_ptr copy(ContextPtr) const = 0; - size_t hash() const; + size_t hash(bool ignore_output_stream = true) const; bool operator==(const IQueryPlanStep & r) const { diff --git a/src/QueryPlan/PlanSerDerHelper.cpp b/src/QueryPlan/PlanSerDerHelper.cpp index ed8724109fb..dc48ad1e53f 100644 --- a/src/QueryPlan/PlanSerDerHelper.cpp +++ b/src/QueryPlan/PlanSerDerHelper.cpp @@ -367,23 +367,23 @@ bool isPlanStepEqual(const IQueryPlanStep & a, const IQueryPlanStep & b) } template -UInt64 hashPlanStepImpl(const IQueryPlanStep & raw_step) +UInt64 hashPlanStepImpl(const IQueryPlanStep & raw_step, bool ignore_output_stream) { const auto & step = reinterpret_cast(raw_step); ProtoType proto; - step.toProto(proto, true); + step.toProto(proto, ignore_output_stream); auto res = sipHash64Protobuf(proto); return res; } -UInt64 hashPlanStep(const IQueryPlanStep & step) +UInt64 hashPlanStep(const IQueryPlanStep & step, bool ignore_output_stream) { switch (step.getType()) { #define CASE_DEF(TYPE, VAR_NAME) \ case IQueryPlanStep::Type::TYPE: { \ - return hashPlanStepImpl(step); \ + return hashPlanStepImpl(step, ignore_output_stream); \ } APPLY_STEP_PROTOBUF_TYPES_AND_NAMES(CASE_DEF) diff --git a/src/QueryPlan/PlanSerDerHelper.h b/src/QueryPlan/PlanSerDerHelper.h index 0eb5d890ac1..15c033a809e 100644 --- a/src/QueryPlan/PlanSerDerHelper.h +++ b/src/QueryPlan/PlanSerDerHelper.h @@ -270,5 +270,5 @@ void serializeQueryPlanStepToProto(const QueryPlanStepPtr & step, Protos::QueryP QueryPlanStepPtr deserializeQueryPlanStepFromProto(const Protos::QueryPlanStep & proto, ContextPtr context); bool isPlanStepEqual(const IQueryPlanStep & a, const IQueryPlanStep & b); -UInt64 hashPlanStep(const IQueryPlanStep & step); +UInt64 hashPlanStep(const IQueryPlanStep & step, bool ignore_output_stream); } diff --git a/tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.reference b/tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.sql b/tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.sql new file mode 100644 index 00000000000..d25e0bfb145 --- /dev/null +++ b/tests/queries/4_cnch_stateless/51005_share_common_plan_node_join.sql @@ -0,0 +1,60 @@ +DROP TABLE IF EXISTS 51005_share_common_plan_node_join; + +CREATE TABLE 51005_share_common_plan_node_join (`id` UInt32, `k1` UInt32, `k2` String) ENGINE = CnchMergeTree +ORDER BY id; + +-- not hint +SELECT + subquery1.id, + subquery1.t1k1, + subquery2.t1k1, + subquery2.t2k1 +FROM + ( + SELECT + t1.id, + t1.k1 t1k1 + FROM + 51005_share_common_plan_node_join t1 + LEFT JOIN 51005_share_common_plan_node_join t2 ON t1.id = t2.id + AND t1.k1 = t2.k1 + ) subquery1 + LEFT JOIN ( + SELECT + t1.id, + t1.k1 t1k1, + t2.k1 t2k1 + FROM + 51005_share_common_plan_node_join t1 + LEFT JOIN 51005_share_common_plan_node_join t2 ON t1.id = t2.id + AND t1.k1 = t2.k1 + ) subquery2 ON subquery1.id = subquery2.id settings enable_share_common_plan_node = 1, + max_buffer_size_for_deadlock_cte = -1, + cte_mode = 'SHARED'; + +-- hint share common plan node +SELECT + subquery1.id, + subquery1.t1k1, + subquery2.t1k1 +FROM + ( + SELECT + t1.id, + t1.k1 t1k1 + FROM + 51005_share_common_plan_node_join t1 + LEFT JOIN 51005_share_common_plan_node_join t2 ON t1.id = t2.id + AND t1.k1 = t2.k1 + ) subquery1 + LEFT JOIN ( + SELECT + t1.id, + t1.k1 t1k1 + FROM + 51005_share_common_plan_node_join t1 + LEFT JOIN 51005_share_common_plan_node_join t2 ON t1.id = t2.id + AND t1.k1 = t2.k1 + ) subquery2 ON subquery1.id = subquery2.id settings enable_share_common_plan_node = 1, + max_buffer_size_for_deadlock_cte = -1, + cte_mode = 'SHARED'; From ce5ed89b5eeabbafb13276f542e23768fa8d0120 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:43:56 +0000 Subject: [PATCH 085/292] Merge 'fix-write-part-core-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4676838541): [cp] avoid coredump when disk is full See merge request: !23074 --- src/Common/StatusFile.cpp | 13 ++++++++++++- .../MergeTree/MergeTreeDataPartChecksum.cpp | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index ef00bb41cb2..36b17320749 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -85,7 +85,18 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_) /// Write information about current server instance to the file. WriteBufferFromFileDescriptor out(fd, 1024); - fill(out); + try + { + fill(out); + /// Finalize here to avoid throwing exceptions in destructor. + out.finalize(); + } + catch (...) + { + /// Finalize in case of exception to avoid throwing exceptions in destructor + out.finalize(); + throw; + } } catch (...) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 160213c4ad9..39b156654e5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -455,6 +455,7 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const // writeBinary(sum.is_encrypted, out); writeBinary(sum.is_deleted, out); } + out.finalize(); } void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_size, MergeTreeDataPartChecksum::uint128 file_hash) From ded93e6108d057693a5fa2314dafbe4cf671567e Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:44:15 +0000 Subject: [PATCH 086/292] Merge 'youzhiyuan_refine_cityHash64V2_2_2' into 'cnch-2.2' fix(clickhousech@m-4677211522): improve performance for cityHash64V2 handling nullable type See merge request: !23041 --- src/Functions/FunctionsHashing.h | 36 +++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 22c3ce0259c..4fea62d135c 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1610,13 +1610,39 @@ class FunctionAnyHash : public IFunction if (const ColumnNullable * nullable = typeid_cast(column)) { const IColumn * nullable_column = &nullable->getNestedColumn(); - executeAny(key, nullable_type, nullable_column, vec_to); const auto & null_map_data = nullable->getNullMapData(); auto s = nullable_column->size(); - /// Use fixed data for nulls. - for (size_t row = 0; row < s; ++row) - if (null_map_data[row]) - vec_to[row] = value; + if (first) + { + executeAny(key, nullable_type, nullable_column, vec_to); + /// Use fixed data for nulls. + for (size_t row = 0; row < s; ++row) + if (null_map_data[row]) + vec_to[row] = value; + } + else + { + std::vector null_list; + std::vector null_list_value_before; + null_list.reserve(s); + null_list_value_before.reserve(s); + for (size_t row = 0; row < s; ++row) + { + if (null_map_data[row]) + { + null_list.push_back(row); + null_list_value_before.push_back(vec_to[row]); + } + } + executeAny(key, nullable_type, nullable_column, vec_to); + + for (size_t i = 0; i < null_list.size(); ++i) + { + size_t row = null_list[i]; + ToType value_before = null_list_value_before[i]; + vec_to[row] = value_before; + } + } } // else if (const ColumnNullable * nullable_const = checkAndGetColumnConstData(column)) // { From a48a5276f64dce99abac2ba6de75d61d0ff4ebd4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:44:33 +0000 Subject: [PATCH 087/292] Merge 'fix/fix_mark_distinct_optimization_2.2' into 'cnch-2.2' fix(optimizer@m-4674753509): MultipleDistinctAggregationToMarkDistinct supports count distinct with multi columns cnch-2.2 See merge request: !23102 --- ...leDistinctAggregationToExpandAggregate.cpp | 82 +++++++++++++------ ...ipleDistinctAggregationToExpandAggregate.h | 6 +- ...tipleDistinctAggregationToMarkDistinct.cpp | 59 +++++++++++-- ...091_distinct_aggregate_to_expand.reference | 10 +++ .../40091_distinct_aggregate_to_expand.sql | 23 +++++- 5 files changed, 143 insertions(+), 37 deletions(-) diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp index 98d4b9a87eb..7b909b30465 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include "Interpreters/join_common.h" namespace DB { @@ -114,19 +115,6 @@ bool MultipleDistinctAggregationToExpandAggregate::hasUniqueArgument(const Aggre return true; } -bool MultipleDistinctAggregationToExpandAggregate::allCountHasAtMostOneArguments(const AggregatingStep & s) -{ - for (const auto & agg : s.getAggregates()) - { - if (Poco::toLower(agg.function->getName()) == "uniqexact" || Poco::toLower(agg.function->getName()) == "countdistinct") - { - if (agg.argument_names.size() > 1) - return false; - } - } - return true; -} - bool MultipleDistinctAggregationToExpandAggregate::hasNoUnSupportedFunc(const AggregatingStep & step) { const AggregateDescriptions & agg_descs = step.getAggregates(); @@ -143,7 +131,7 @@ ConstRefPatternPtr MultipleDistinctAggregationToExpandAggregate::getPattern() co static auto pattern = Patterns::aggregating() .matchingStep([](const AggregatingStep & s) { return hasNoFilterOrMask(s) && (hasMultipleDistincts(s) || hasMixedDistinctAndNonDistincts(s)) - && hasUniqueArgument(s) && allCountHasAtMostOneArguments(s) && hasNoUnSupportedFunc(s); + && hasUniqueArgument(s) && hasNoUnSupportedFunc(s); }) .result(); return pattern; @@ -199,6 +187,9 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan AggregateDescriptions aggs_with_mask; String non_distinct_agg_group_id_mask; + + Assignments new_argument_assignments; + for (const auto & agg_desc : agg_descs) { String group_id_mask; @@ -222,7 +213,7 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan makeASTFunction( "equals", std::make_shared(group_id_symbol), std::make_shared(distinct_group_id))); - aggs_with_mask.emplace_back(distinctAggWithMask(agg_desc, group_id_mask)); + aggs_with_mask.emplace_back(distinctAggWithMask(agg_desc, group_id_mask, new_argument_assignments, rule_context.context)); distinct_group_id++; } else @@ -329,11 +320,27 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan } auto mask_step = std::make_shared(pre_agg_node->getStep()->getOutputStream(), mask_assignments, mask_null_name_to_type); - auto mask_node = std::make_shared(rule_context.context->nextNodeId(), std::move(mask_step), PlanNodes{pre_agg_node}); + child = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(mask_step), PlanNodes{pre_agg_node}); + + if (!new_argument_assignments.empty()) + { + NameToType name_to_type; + for (const auto & assignment : new_argument_assignments) + name_to_type.emplace(assignment.first, std::make_shared()); + + for (const auto & input_column : child->getStep()->getOutputStream().header) + { + new_argument_assignments.emplace(input_column.name, makeASTIdentifier(input_column.name)); + name_to_type.emplace(input_column.name, input_column.type); + } + auto new_argument_projection_step + = std::make_shared(child->getStep()->getOutputStream(), new_argument_assignments, name_to_type); + child = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(new_argument_projection_step), {child}); + } // step 4 : final aggregate auto count_agg_step = std::make_shared( - mask_node->getStep()->getOutputStream(), + child->getStep()->getOutputStream(), step.getKeys(), step.getKeysNotHashed(), aggs_with_mask, @@ -346,25 +353,46 @@ TransformResult MultipleDistinctAggregationToExpandAggregate::transformImpl(Plan step.isNoShuffle(), step.isStreamingForCache(), step.getHints()); - auto count_agg_node = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(count_agg_step), {mask_node}); + auto count_agg_node = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(count_agg_step), {child}); return count_agg_node; } -AggregateDescription -MultipleDistinctAggregationToExpandAggregate::distinctAggWithMask(const AggregateDescription & agg_desc, String & mask_column) +AggregateDescription MultipleDistinctAggregationToExpandAggregate::distinctAggWithMask( + const AggregateDescription & agg_desc, String & mask_column, Assignments & new_argument_assignments, ContextMutablePtr context) { - DataTypes data_types = agg_desc.function->getArgumentTypes(); + String fun_remove_distinct = distinct_func_normal_func.at(Poco::toLower(agg_desc.function->getName())); + Names argument_names; + DataTypes data_types; + if (fun_remove_distinct == "countIf" && agg_desc.argument_names.size() > 1) + { + // countDistinct(arg1, arg2) cannot convert to count(arg1, arg2), because clickhousedon't support count multi arguments. + // As an alternative we can rewrite it to count(IF(arg1 is null, null, arg2 is null, null, 1)), + // or sum((arg1 is not null) AND (arg2 is not null)) + fun_remove_distinct = "sumIf"; + + ASTs argument_functions; + for (const auto & argument : agg_desc.argument_names) + argument_functions.emplace_back(makeASTFunction("isNotNull", makeASTIdentifier(argument))); + auto new_argument = PredicateUtils::combineConjuncts(argument_functions); + auto new_argument_name = context->getSymbolAllocator()->newSymbol(new_argument); + new_argument_assignments.emplace_back(new_argument_name, new_argument); + + argument_names.emplace_back(new_argument_name); + data_types.emplace_back(std::make_shared()); + } + else + { + argument_names = agg_desc.argument_names; + data_types = agg_desc.function->getArgumentTypes(); + } + + argument_names.emplace_back(mask_column); data_types.emplace_back(std::make_shared()); Array parameters = agg_desc.function->getParameters(); AggregateFunctionProperties properties; - - String fun_remove_distinct = distinct_func_normal_func.at(Poco::toLower(agg_desc.function->getName())); AggregateFunctionPtr new_agg_fun = AggregateFunctionFactory::instance().get(fun_remove_distinct, data_types, parameters, properties); - Names argument_names = agg_desc.argument_names; - - argument_names.emplace_back(mask_column); AggregateDescription agg_with_mask; diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h index 377550b2660..48e8cfb87e9 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToExpandAggregate.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include "Interpreters/Context_fwd.h" #include "Optimizer/Rule/Pattern.h" namespace DB @@ -110,10 +111,9 @@ class MultipleDistinctAggregationToExpandAggregate : public Rule */ static bool hasUniqueArgument(const AggregatingStep & step); - // All Count Aggregate Functions must have at most one argument. - static bool allCountHasAtMostOneArguments(const AggregatingStep & step); + static AggregateDescription distinctAggWithMask( + const AggregateDescription & agg_desc, String & mask_column, Assignments & new_argument_assignments, ContextMutablePtr context); - static AggregateDescription distinctAggWithMask(const AggregateDescription & agg_desc, String & mask_column); static AggregateDescription nonDistinctAggWithMask(const AggregateDescription & agg_desc, String & mask_column); static PlanNodePtr makeUnionNode( diff --git a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToMarkDistinct.cpp b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToMarkDistinct.cpp index 9b75bc1bc56..5ea703b644a 100644 --- a/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToMarkDistinct.cpp +++ b/src/Optimizer/Rule/Rewrite/MultipleDistinctAggregationToMarkDistinct.cpp @@ -1,6 +1,13 @@ +#include #include +#include +#include +#include +#include #include #include +#include +#include #include #include @@ -75,6 +82,8 @@ TransformResult MultipleDistinctAggregationToMarkDistinct::transformImpl(PlanNod AggregateDescriptions new_agg_descs; PlanNodePtr child = node->getChildren()[0]; + Assignments new_argument_assignments; + for (const auto & agg_desc : agg_descs) { if (distinct_func.contains(Poco::toLower(agg_desc.function->getName())) && agg_desc.mask_column.empty()) @@ -114,18 +123,40 @@ TransformResult MultipleDistinctAggregationToMarkDistinct::transformImpl(PlanNod child = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(mark_distinct_step), PlanNodes{child}); } - DataTypes data_types = agg_desc.function->getArgumentTypes(); + // remove the distinct flag and set the distinct marker + String fun_remove_distinct = distinct_func_normal_func.at(Poco::toLower(agg_desc.function->getName())); + Names argument_names; + DataTypes data_types; + if (fun_remove_distinct == "countIf" && agg_desc.argument_names.size() > 1) + { + // countDistinct(arg1, arg2) cannot convert to count(arg1, arg2), because clickhousedon't support count multi arguments. + // As an alternative we can rewrite it to count(IF(arg1 is null, null, arg2 is null, null, 1)), + // or sum((arg1 is not null) AND (arg2 is not null)) + fun_remove_distinct = "sumIf"; + + ASTs argument_functions; + for (const auto & argument : agg_desc.argument_names) + argument_functions.emplace_back(makeASTFunction("isNotNull", makeASTIdentifier(argument))); + auto new_argument = PredicateUtils::combineConjuncts(argument_functions); + auto new_argument_name = rule_context.context->getSymbolAllocator()->newSymbol(new_argument); + new_argument_assignments.emplace_back(new_argument_name, new_argument); + + argument_names.emplace_back(new_argument_name); + data_types.emplace_back(std::make_shared()); + } + else + { + argument_names = agg_desc.argument_names; + data_types = agg_desc.function->getArgumentTypes(); + } + + argument_names.emplace_back(marker); data_types.emplace_back(std::make_shared()); Array parameters = agg_desc.function->getParameters(); AggregateFunctionProperties properties; - - // remove the distinct flag and set the distinct marker - String fun_remove_distinct = distinct_func_normal_func.at(Poco::toLower(agg_desc.function->getName())); AggregateFunctionPtr new_agg_fun = AggregateFunctionFactory::instance().get(fun_remove_distinct, data_types, parameters, properties); - Names argument_names = agg_desc.argument_names; - argument_names.emplace_back(marker); AggregateDescription new_agg_desc; @@ -145,6 +176,22 @@ TransformResult MultipleDistinctAggregationToMarkDistinct::transformImpl(PlanNod } } + if (!new_argument_assignments.empty()) + { + NameToType name_to_type; + for (const auto & assignment : new_argument_assignments) + name_to_type.emplace(assignment.first, std::make_shared()); + + for (const auto & input : child->getStep()->getOutputStream().header) + { + new_argument_assignments.emplace(input.name, makeASTIdentifier(input.name)); + name_to_type.emplace(input.name, input.type); + } + auto new_argument_projection_step + = std::make_shared(child->getStep()->getOutputStream(), new_argument_assignments, name_to_type); + child = PlanNodeBase::createPlanNode(rule_context.context->nextNodeId(), std::move(new_argument_projection_step), {child}); + } + auto count_agg_step = std::make_shared( child->getStep()->getOutputStream(), step.getKeys(), diff --git a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference index 95815b2fabd..05734ddcb80 100644 --- a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference +++ b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.reference @@ -1,2 +1,12 @@ 1 nan +1 +nan +1 +nan +1 +nan +1 +nan +1 +nan diff --git a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql index cebda2bdaaa..b0b0ffe7701 100644 --- a/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql +++ b/tests/queries/4_cnch_stateless/40091_distinct_aggregate_to_expand.sql @@ -1,6 +1,27 @@ drop table if exists t1; -CREATE TABLE t1(c1 UInt64, c2 String, c3 Int32) ENGINE = CnchMergeTree PARTITION BY c1 ORDER BY c1; +CREATE TABLE t1(c1 UInt64, c2 Nullable(String), c3 Int32) ENGINE = CnchMergeTree PARTITION BY c1 ORDER BY c1; + insert into t1 values (1, 'a', 1); insert into t1 values (2, 'b', 1); +insert into t1 values (3, null, 0); + select count(distinct c2) / sum(c3) from t1; select count(distinct c2) / sum(c3) from t1 where c1 > 10; + +select count(distinct c2, c3) / sum(c3) from t1; +select count(distinct c2, c3) / sum(c3) from t1 where c1 > 10; + +set enable_expand_distinct_optimization=1; + +select count(distinct c2) / sum(c3) from t1; +select count(distinct c2) / sum(c3) from t1 where c1 > 10; + +select count(distinct c2, c3) / sum(c3) from t1; +select count(distinct c2, c3) / sum(c3) from t1 where c1 > 10; + +set enable_mark_distinct_optimzation=1; +select count(distinct c2) / sum(c3) from t1; +select count(distinct c2) / sum(c3) from t1 where c1 > 10; + +select count(distinct c2, c3) / sum(c3) from t1; +select count(distinct c2, c3) / sum(c3) from t1 where c1 > 10; From 63c45b7a8021b5ffe65277ef6ee8ade8803a4cad Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:44:51 +0000 Subject: [PATCH 088/292] Merge 'cherry-pick-mr-22947' into 'cnch-2.2' fix(clickhousech@m-4675484022): [cp]fix wrong database name in loadDictsForCnchServer See merge request: !22981 --- src/Functions/FunctionsBitEngineHelper.h | 9 +-- ...bitengine_server_cloud_table_fix.reference | 1 + ...20014_bitengine_server_cloud_table_fix.sql | 57 +++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference create mode 100644 tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql diff --git a/src/Functions/FunctionsBitEngineHelper.h b/src/Functions/FunctionsBitEngineHelper.h index 10fd8b5e0e7..84d53b7add7 100644 --- a/src/Functions/FunctionsBitEngineHelper.h +++ b/src/Functions/FunctionsBitEngineHelper.h @@ -138,8 +138,9 @@ StorageCloudMergeTree * loadDictsForCnchServer( const auto & dicts_mapping = storage_bitengine_cnch->getUnderlyDictionaryTables(); for (const auto & entry : dicts_mapping) { + StorageID dict_table_id{entry.second.first, entry.second.second}; auto storage_underlying_dict - = DatabaseCatalog::instance().tryGetTable(StorageID{entry.second.first, entry.second.second}, local_context); + = DatabaseCatalog::instance().tryGetTable(dict_table_id, local_context); StorageCnchMergeTree * storage_underlying_dict_cnch = dynamic_cast(storage_underlying_dict.get()); if (storage_underlying_dict_cnch) { @@ -158,7 +159,7 @@ StorageCloudMergeTree * loadDictsForCnchServer( /// try find dict_cloud_table first, maybe it's created already, like insert into select DecodeBitmap() auto storage_underlying_dict_cloud - = worker_resource->getTable(StorageID{storage_bitengine_cnch->getDatabaseName(), dict_table_name_cloud}); + = worker_resource->getTable(StorageID{dict_table_id.getDatabaseName(), dict_table_name_cloud}); bool dict_cloud_already_exists{true}; if (!storage_underlying_dict_cloud) @@ -169,7 +170,7 @@ StorageCloudMergeTree * loadDictsForCnchServer( /// after dict_cloud_table created, now get and load parts storage_underlying_dict_cloud - = worker_resource->getTable(StorageID{storage_bitengine_cnch->getDatabaseName(), dict_table_name_cloud}); + = worker_resource->getTable(StorageID{dict_table_id.getDatabaseName(), dict_table_name_cloud}); auto * underlying_dict_cloud_table = dynamic_cast(storage_underlying_dict_cloud.get()); if (!underlying_dict_cloud_table) @@ -177,7 +178,7 @@ StorageCloudMergeTree * loadDictsForCnchServer( throw Exception( fmt::format( "In decoding, cannot get DictCloudMergeTree for table:<`{}`.`{}`>", - storage_bitengine_cnch->getDatabaseName(), + dict_table_id.getDatabaseName(), dict_table_name_cloud), ErrorCodes::UNKNOWN_TABLE); } diff --git a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference new file mode 100644 index 00000000000..6e3d4bf4d55 --- /dev/null +++ b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference @@ -0,0 +1 @@ +{48,66} diff --git a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql new file mode 100644 index 00000000000..47b3ce56560 --- /dev/null +++ b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql @@ -0,0 +1,57 @@ +create database if not exists dict_db; +create database if not exists bitmap_db; + +create table if not exists dict_db.tag_bitmaps_did_cdp_dict_20014 (`key` UInt64, `value` UInt64, `split_id` UInt64, BITENGINE_CONSTRAINT key_constraint CHECK toUInt64(intHash64(`key`) % 2)) ENGINE = CnchMergeTree CLUSTER BY `split_id` INTO 2 BUCKETS PRIMARY KEY `key` ORDER BY `key`; + +CREATE TABLE if not exists bitmap_db.tag_bitmaps_did_cdp_20014 (`split_id` UInt64, `tag_id` Int32, `p_date` Date, `tag_value_double` Float64, `tag_value` String, `id_map_cnt` UInt64, `id_type` Int32, `id_map` BitMap64 BitEngineEncode, `app_id` Int32, `tag_type` Int8) ENGINE = CnchMergeTree PARTITION BY (toDate(toStartOfDay(`p_date`)), `tag_id`, `tag_type`, `id_type`) CLUSTER BY `split_id` INTO 2 BUCKETS PRIMARY KEY (`tag_value`, `tag_value_double`, cityHash64(`tag_value`)) ORDER BY (`tag_value`, `tag_value_double`, cityHash64(`tag_value`)) SETTINGS underlying_dictionary_tables = '{"id_map":"`dict_db`.`tag_bitmaps_did_cdp_dict_20014`"}'; + +insert into bitmap_db.tag_bitmaps_did_cdp_20014 select 0, 1014834, '2024-03-14', 0, 'aaa', 2, 1358, arrayToBitmap([48,66]), 0, 2; + +select DecodeBitmap(id_map, 'bitmap_db', 'tag_bitmaps_did_cdp_20014', 'id_map') +from ( + select id_map from bitmap_db.tag_bitmaps_did_cdp_20014 where tag_id = 1014834 +); + +SELECT * +FROM +( + SELECT toUInt64(base_id) AS base_id + FROM + ( + SELECT + toUInt64(0) AS base_id, + map('', '') AS string_map, + map('', 0) AS bigint_map, + map('', 0) AS double_map, + map('', '') AS date_map, + map('', '') AS datetime_map, + map('', ['']) AS array_string_map, + map('', [0]) AS array_bigint_map, + map('', [0]) AS array_double_map, + map('', ['']) AS array_date_map, + map('', ['']) AS array_datetime_map, + NULL AS id_type, + NULL AS p_date + FROM numbers(0) + ) + WHERE (((p_date >= '2023-06-27') AND (p_date <= '2023-06-27')) AND (bigint_map{'5002743'} = 0)) AND (id_type = 1358) +) as l inner join ( +select arrayJoin(bitmapToArrayWithDecode(id_map, 'bitmap_db', 'tag_bitmaps_did_cdp_20014', 'id_map')) as id +from ( + select bitmapExtract('0')(idx, id_map) as id_map, split_id + from ( + select bitmapColumnOr(id_map) as id_map, + toInt32(0) as idx, + split_id + from bitmap_db.tag_bitmaps_did_cdp_20014 where tag_id = 1014834 + group by split_id + ) + group by split_id +) SETTINGS dict_table_full_mode = 1 + ) as r on l.base_id = r.id ORDER BY base_id, id; + + +drop table bitmap_db.tag_bitmaps_did_cdp_20014; +drop table dict_db.tag_bitmaps_did_cdp_dict_20014; +drop database bitmap_db; +drop database dict_db; \ No newline at end of file From b884e8f7b526bd26dc87ee791a2c53876ec95d38 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:45:12 +0000 Subject: [PATCH 089/292] Merge 'cherry-pick-454ed2f6-5' into 'cnch-2.2' fix(clickhousech@m-4678891507): [cp] cnch 2.2 fix try get map implicit column See merge request: !23083 --- src/Storages/ColumnsDescription.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index a46987639d3..7154a0befa4 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -613,11 +613,11 @@ std::optional ColumnsDescription::tryGetMapImplicitColumn(const { if (isMapImplicitKey(column_name)) { - auto ordinary_columns = getOrdinary(); - for (auto & nt : ordinary_columns) + const String & map_name = parseMapNameFromImplicitColName(column_name); + if (auto map_col = tryGetColumn(GetColumnsOptions::Ordinary, map_name)) { - if (nt.type->isByteMap() && isMapImplicitKeyOfSpecialMapName(column_name, nt.name)) - return NameAndTypePair(column_name, typeid_cast(*nt.type).getValueTypeForImplicitColumn()); + if (map_col->type->isByteMap()) + return NameAndTypePair(column_name, typeid_cast(*map_col->type).getValueTypeForImplicitColumn()); } } return {}; From 38a9dcb36a25c9d25e2ec942a856359267a430b0 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:46:08 +0000 Subject: [PATCH 090/292] Merge 'cherry-pick-ed87de0e-3' into 'cnch-2.2' fix(clickhousech@m-4692133136): [cp cnch-2.2] minor fix reloadFormatSchema to support cfs See merge request: !23134 --- src/Interpreters/loadMetadata.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index af095d2b89a..cfded4748f1 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -246,7 +246,7 @@ void reloadFormatSchema(ContextMutablePtr context, String remote_format_schema_p remote_format_schema_path += "/"; // add it by default // try download files from remote_format_schema_path to format_schema_path Poco::URI remote_uri(remote_format_schema_path); - if (remote_uri.getScheme() == "hdfs") + if (isHdfsOrCfsScheme(remote_uri.getScheme())) { HDFSBuilderPtr builder = context->getHdfsConnectionParams().createBuilder(remote_uri); HDFSFSPtr fs = createHDFSFS(builder.get()); @@ -289,7 +289,7 @@ void reloadFormatSchema(ContextMutablePtr context, String remote_format_schema_p } else { - if(log) {LOG_ERROR(log, "remote_format_schema_path only support hdfs");} + if(log) {LOG_ERROR(log, "remote_format_schema_path only support hdfs and cfs");} } } #endif From 7554a0d696a859294fb2f1b1b74c016874a34546 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 02:46:25 +0000 Subject: [PATCH 091/292] Merge 'fix_get_json_object_for_lc' into 'cnch-2.2' fix(clickhousech@m-3000759214):fix lc(nullable(string)) as get_json_object arg See merge request: !23154 --- src/Functions/FunctionsJSON.h | 8 +++++--- .../12294_function_get_json_object.reference | 2 ++ .../4_cnch_stateless/12294_function_get_json_object.sql | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 073fb053ec9..074715c792c 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -108,11 +108,13 @@ class FunctionJSONHelpers throw Exception{"Function " + String(Name::name) + " requires at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; const auto & first_column = arguments[0]; - if (!isString(first_column.type)) + auto first_type_base = removeNullable(removeLowCardinality(first_column.type)); + + if (!isString(first_type_base)) throw Exception{"The first argument of function " + String(Name::name) + " should be a string containing JSON, illegal type: " + first_column.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - const ColumnPtr & arg_json = first_column.column; + + const ColumnPtr & arg_json = recursiveAssumeNotNullable(first_column.column); const auto * col_json_const = typeid_cast(arg_json.get()); const auto * col_json_string = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); diff --git a/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference b/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference index f004a481444..95f7c49aa64 100644 --- a/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference +++ b/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference @@ -4,6 +4,8 @@ abc "abc" {"ac":"abc","xz":"xz"} [{"ac":"abc","xz":"xz"},{"def":"def"}] +100 +100 {"a":"b"} 2017-08-31 18:36:48 -1 1504193808 ["a","c"] 1504193808 diff --git a/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql b/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql index 1ac1c643455..a2a72d97966 100644 --- a/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql +++ b/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql @@ -5,6 +5,9 @@ SELECT JSONExtractRaw('{"n_s" : [{"ac":"abc","xz":"xz"}, {"def":"def"}], "n_i" : SELECT JSONExtractRaw('{"n_s" : [{"ac":"abc","xz":"xz"}, {"def":"def"}], "n_i" : [1, 23]}', 'n_s', 1); SELECT JSONExtractRaw('{"n_s" : [{"ac":"abc","xz":"xz"}, {"def":"def"}], "n_i" : [1, 23]}', 'n_s'); +select get_json_object('{"a":100}'::Nullable(String), '$.a'); +select get_json_object('{"a":100}'::LowCardinality(Nullable(String)), '$.a'); + DROP TABLE IF EXISTS test.test; CREATE TABLE test.test(a Nullable(String)) ENGINE = CnchMergeTree ORDER BY tuple() PARTITION BY tuple(); SELECT get_json_object('{"test": "test"}', a) FROM test.test; From e4c1134baadea0c7747e64551fd7836964ee7435 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 03:23:54 +0000 Subject: [PATCH 092/292] Merge branch 'lta@ETL_perf_optimize4cnch-dev' into 'cnch-dev' feat(clickhousech@m-3012442647): optimize ETL perf for unique table See merge request dp/ClickHouse!21744 # Conflicts: # src/CloudServices/CnchDataWriter.cpp # src/CloudServices/CnchWorkerClient.cpp # src/CloudServices/CnchWorkerServiceImpl.cpp # src/MergeTreeCommon/MergeTreeMetaBase.cpp # src/MergeTreeCommon/MergeTreeMetaBase.h # src/Protos/cnch_worker_rpc.proto # src/Storages/BitEngineEncodePartitionHelper.cpp # src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.h # src/Transaction/Actions/InsertAction.cpp # src/Transaction/Actions/InsertAction.h # src/Transaction/CnchWorkerTransaction.cpp --- docker/CI/multi-servers/worker.yml | 5 + docker/CI/s3/worker.yml | 5 + .../CloudMergeTreeDedupWorker.cpp | 4 +- src/CloudServices/CnchDataWriter.cpp | 45 +++-- src/CloudServices/CnchDataWriter.h | 11 ++ src/CloudServices/CnchDedupHelper.cpp | 113 ++++++++++++ src/CloudServices/CnchDedupHelper.h | 72 ++++++++ src/CloudServices/CnchMergeMutateThread.cpp | 6 +- src/CloudServices/CnchServerClient.cpp | 37 ++-- src/CloudServices/CnchServerClient.h | 13 +- src/CloudServices/CnchServerServiceImpl.cpp | 9 +- src/CloudServices/CnchWorkerClient.cpp | 78 ++++++++ src/CloudServices/CnchWorkerClient.h | 34 ++-- src/CloudServices/CnchWorkerServiceImpl.cpp | 59 ++++++ src/CloudServices/CnchWorkerServiceImpl.h | 6 + src/Core/Settings.h | 3 + .../DistributedStages/BSPScheduler.cpp | 8 +- src/Interpreters/InterpreterInsertQuery.cpp | 16 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/MergeTreeCommon/MergeTreeDataDeduper.cpp | 30 ++-- src/MergeTreeCommon/MergeTreeDataDeduper.h | 10 +- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 22 +-- src/MergeTreeCommon/MergeTreeMetaBase.h | 3 - src/Optimizer/Property/PropertyDeterminer.cpp | 11 +- .../Transforms/TableFinishTransform.cpp | 16 +- src/Protos/cnch_server_rpc.proto | 1 + src/Protos/cnch_worker_rpc.proto | 23 +++ .../IngestColumnCnch/IngestColumnCnch.cpp | 2 +- .../CloudMergeTreeBlockOutputStream.cpp | 169 +++--------------- .../CloudMergeTreeBlockOutputStream.h | 3 - .../MergeTree/MergeTreeDataPartCNCH.cpp | 23 ++- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/StorageCloudMergeTree.h | 2 +- src/Storages/StorageCnchMergeTree.cpp | 9 +- src/Storages/StorageCnchMergeTree.h | 2 +- src/Transaction/Actions/InsertAction.cpp | 37 ++++ src/Transaction/Actions/InsertAction.h | 6 + src/Transaction/CnchLock.cpp | 8 + src/Transaction/CnchLock.h | 1 + src/Transaction/CnchServerTransaction.cpp | 168 ++++++++++++++++- src/Transaction/CnchServerTransaction.h | 10 ++ src/Transaction/CnchWorkerTransaction.cpp | 13 +- src/Transaction/CnchWorkerTransaction.h | 2 + src/Transaction/ICnchTransaction.cpp | 21 +-- src/Transaction/ICnchTransaction.h | 7 +- .../CloudUniqueMergeTreeMergeTask.cpp | 4 +- .../48035_insert_select_no_gather.reference | 11 ++ .../48035_insert_select_no_gather.sql | 27 +++ 48 files changed, 859 insertions(+), 310 deletions(-) diff --git a/docker/CI/multi-servers/worker.yml b/docker/CI/multi-servers/worker.yml index b6f9b141788..314f12df597 100644 --- a/docker/CI/multi-servers/worker.yml +++ b/docker/CI/multi-servers/worker.yml @@ -55,6 +55,11 @@ storage_configuration: default: local_disk disk: local_disk hdfs_addr: "hdfs://hdfs-namenode:9000" +cnch_unique_table_log: + database: cnch_system + table: cnch_unique_table_log + flush_max_row_count: 10000 + flush_interval_milliseconds: 7500 query_log: database: system table: query_log diff --git a/docker/CI/s3/worker.yml b/docker/CI/s3/worker.yml index bf0b8b4bfed..503691fab5b 100644 --- a/docker/CI/s3/worker.yml +++ b/docker/CI/s3/worker.yml @@ -63,6 +63,11 @@ storage_configuration: disk: s3_disk # To avoid break hard-coded test cases. cnch_default_policy: cnch_default_hdfs +cnch_unique_table_log: + database: cnch_system + table: cnch_unique_table_log + flush_max_row_count: 10000 + flush_interval_milliseconds: 7500 query_log: database: system table: query_log diff --git a/src/CloudServices/CloudMergeTreeDedupWorker.cpp b/src/CloudServices/CloudMergeTreeDedupWorker.cpp index 364dc981ce1..eb901f40edc 100644 --- a/src/CloudServices/CloudMergeTreeDedupWorker.cpp +++ b/src/CloudServices/CloudMergeTreeDedupWorker.cpp @@ -218,7 +218,7 @@ void CloudMergeTreeDedupWorker::iterate() std::vector locks_to_acquire = CnchDedupHelper::getLocksToAcquire( scope, txn->getTransactionID(), *cnch_table, storage.getSettings()->unique_acquire_write_lock_timeout.value.totalMilliseconds()); lock_watch.restart(); - cnch_lock = txn->createLockHolder(std::move(locks_to_acquire)); + cnch_lock = std::make_shared(context, std::move(locks_to_acquire)); if (!cnch_lock->tryLock()) { if (auto unique_table_log = context->getCloudUniqueTableLog()) @@ -261,6 +261,8 @@ void CloudMergeTreeDedupWorker::iterate() return; } + txn->appendLockHolder(cnch_lock); + /// Sorts by commit time std::sort(staged_parts.begin(), staged_parts.end(), [](auto & lhs, auto & rhs) { return lhs->commit_time < rhs->commit_time; diff --git a/src/CloudServices/CnchDataWriter.cpp b/src/CloudServices/CnchDataWriter.cpp index 2c24e0b8cd4..3da4496070b 100644 --- a/src/CloudServices/CnchDataWriter.cpp +++ b/src/CloudServices/CnchDataWriter.cpp @@ -63,9 +63,17 @@ namespace ErrorCodes extern const int BUCKET_TABLE_ENGINE_MISMATCH; } +bool DumpedData::isEmpty() +{ + return parts.empty() && bitmaps.empty() && staged_parts.empty(); +} + void DumpedData::extend(DumpedData && data) { - auto extendImpl = [](auto & src, auto && dst) { + if (data.isEmpty()) + return; + + auto extendImpl = [] (auto & src, auto && dst) { if (src.empty()) { src = std::move(dst); @@ -80,6 +88,10 @@ void DumpedData::extend(DumpedData && data) extendImpl(parts, std::move(data.parts)); extendImpl(bitmaps, std::move(data.bitmaps)); extendImpl(staged_parts, std::move(data.staged_parts)); + + if (dedup_mode != data.dedup_mode) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Dedup mode is mismatch, {}/{}", typeToString(dedup_mode), typeToString(data.dedup_mode)); } using DumpCancelPred = std::function; @@ -145,7 +157,7 @@ DumpedData CnchDataWriter::dumpAndCommitCnchParts( { if (temp_parts.empty() && temp_bitmaps.empty() && temp_staged_parts.empty()) // Nothing to dump and commit, returns - return {}; + return {.dedup_mode = dedup_mode}; LOG_DEBUG( storage.getLogger(), @@ -172,7 +184,7 @@ DumpedData CnchDataWriter::dumpCnchParts( { if (temp_parts.empty() && temp_bitmaps.empty() && temp_staged_parts.empty()) // Nothing to dump, returns - return {}; + return {.dedup_mode = dedup_mode}; Stopwatch watch; @@ -269,7 +281,7 @@ DumpedData CnchDataWriter::dumpCnchParts( } /// Parallel dumping to shared storage - DumpedData result; + DumpedData result{.dedup_mode = dedup_mode}; S3ObjectMetadata::PartGeneratorID part_generator_id(S3ObjectMetadata::PartGeneratorID::TRANSACTION, curr_txn->getTransactionID().toString()); MergeTreeCNCHDataDumper dumper(storage, part_generator_id); @@ -335,8 +347,7 @@ void CnchDataWriter::commitDumpedParts(const DumpedData & dumped_data) if (settings.debug_cnch_force_commit_parts_rpc) { auto server_client = context->getCnchServerClient("0.0.0.0", context->getRPCPort()); - server_client->commitParts(txn_id, type, storage, dumped_parts, delete_bitmaps, dumped_staged_parts, task_id, false, - consumer_group, tpl, binlog, peak_memory_usage); + server_client->commitParts(txn_id, type, storage, dumped_data, task_id, false, consumer_group, tpl, binlog, peak_memory_usage); } else { @@ -362,8 +373,7 @@ void CnchDataWriter::commitDumpedParts(const DumpedData & dumped_data) throw Exception("Server with transaction " + txn_id.toString() + " is unknown", ErrorCodes::LOGICAL_ERROR); } - server_client->precommitParts( - context, txn_id, type, storage, dumped_parts, delete_bitmaps, dumped_staged_parts, task_id, is_server, consumer_group, tpl, binlog, peak_memory_usage); + server_client->precommitParts(context, txn_id, type, storage, dumped_data, task_id, is_server, consumer_group, tpl, binlog, peak_memory_usage); } } catch (const Exception &) @@ -380,12 +390,13 @@ void CnchDataWriter::commitDumpedParts(const DumpedData & dumped_data) LOG_DEBUG( storage.getLogger(), - "Committed {} parts, {} bitmaps, {} staged parts in transaction {}, elapsed {} ms", + "Committed {} parts, {} bitmaps, {} staged parts in transaction {}, elapsed {} ms, dedup mode is {}", dumped_parts.size(), delete_bitmaps.size(), dumped_staged_parts.size(), toString(UInt64(txn_id)), - watch.elapsedMilliseconds()); + watch.elapsedMilliseconds(), + typeToString(dumped_data.dedup_mode)); } void CnchDataWriter::initialize(size_t max_threads) @@ -560,7 +571,9 @@ void CnchDataWriter::commitPreparedCnchParts(const DumpedData & dumped_data, con } // Precommit stage. Write intermediate parts to KV - auto action = txn->createAction(storage_ptr, dumped_data.parts, dumped_data.bitmaps, dumped_data.staged_parts); + auto action + = txn->createAction(storage_ptr, dumped_data.parts, dumped_data.bitmaps, dumped_data.staged_parts); + action->as()->checkAndSetDedupMode(dumped_data.dedup_mode); txn->appendAction(action); action->executeV2(); } @@ -650,10 +663,18 @@ void CnchDataWriter::commitPreparedCnchParts(const DumpedData & dumped_data, con void CnchDataWriter::publishStagedParts(const MergeTreeDataPartsCNCHVector & staged_parts, const LocalDeleteBitmaps & bitmaps_to_dump) { + if (dedup_mode != CnchDedupHelper::DedupMode::APPEND) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Dedup mode is not append, but got {} when publish staged parts for table {}, it's a bug!", + typeToString(dedup_mode), + storage.getCnchStorageID().getNameForLogs()); DumpedData items; + items.dedup_mode = dedup_mode; + TxnTimestamp txn_id = context->getCurrentTransactionID(); - for (auto & staged_part : staged_parts) + for (const auto & staged_part : staged_parts) { // new part that shares the data file with the staged part Protos::DataModelPart new_part_model; diff --git a/src/CloudServices/CnchDataWriter.h b/src/CloudServices/CnchDataWriter.h index 91eb44b6209..640adc571f0 100644 --- a/src/CloudServices/CnchDataWriter.h +++ b/src/CloudServices/CnchDataWriter.h @@ -26,6 +26,7 @@ #include #include #include +#include namespace DB { @@ -36,7 +37,9 @@ struct DumpedData MutableMergeTreeDataPartsCNCHVector parts; DeleteBitmapMetaPtrVector bitmaps; MutableMergeTreeDataPartsCNCHVector staged_parts; + CnchDedupHelper::DedupMode dedup_mode = CnchDedupHelper::DedupMode::APPEND; + bool isEmpty(); void extend(DumpedData && data); }; @@ -87,6 +90,12 @@ class CnchDataWriter : private boost::noncopyable void setPeakMemoryUsage(UInt64 peak_memory_usage_) { peak_memory_usage = peak_memory_usage_; } + void setDedupMode(CnchDedupHelper::DedupMode dedup_mode_) + { + dedup_mode = dedup_mode_; + res.dedup_mode = dedup_mode; + } + DumpedData res; private: @@ -109,6 +118,8 @@ class CnchDataWriter : private boost::noncopyable UInt64 peak_memory_usage; + CnchDedupHelper::DedupMode dedup_mode = CnchDedupHelper::DedupMode::APPEND; + UUID newPartID(const MergeTreePartInfo& part_info, UInt64 txn_timestamp); }; diff --git a/src/CloudServices/CnchDedupHelper.cpp b/src/CloudServices/CnchDedupHelper.cpp index 37faf3706c1..ccc6eba0715 100644 --- a/src/CloudServices/CnchDedupHelper.cpp +++ b/src/CloudServices/CnchDedupHelper.cpp @@ -18,10 +18,17 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB::ErrorCodes { extern const int LOGICAL_ERROR; +extern const int ABORTED; +extern const int CNCH_LOCK_ACQUIRE_FAILED; } namespace DB::CnchDedupHelper @@ -259,4 +266,110 @@ void DedupScope::filterParts(MergeTreeDataPartsCNCHVector & parts) const parts.end()); } +UInt64 getWriteLockTimeout(StorageCnchMergeTree & cnch_table, ContextPtr local_context) +{ + UInt64 session_value = local_context->getSettingsRef().unique_acquire_write_lock_timeout.value.totalMilliseconds(); + return session_value == 0 ? cnch_table.getSettings()->unique_acquire_write_lock_timeout.value.totalMilliseconds() : session_value; +} + +void acquireLockAndFillDedupTask(StorageCnchMergeTree & cnch_table, DedupTask & dedup_task, CnchServerTransaction & txn, ContextPtr local_context) +{ + /// Note: when txn is launched by worker, local_context is global context which means session settings will not take effect. TBD: support later. + TxnTimestamp ts; + std::sort(dedup_task.new_parts.begin(), dedup_task.new_parts.end(), [](auto & lhs, auto & rhs) { return lhs->info < rhs->info; }); + std::sort(dedup_task.delete_bitmaps_for_new_parts.begin(), dedup_task.delete_bitmaps_for_new_parts.end(), LessDeleteBitmapMeta()); + CnchLockHolderPtr cnch_lock; + MergeTreeDataPartsCNCHVector visible_parts, staged_parts; + bool force_normal_dedup = false; + Stopwatch watch; + do + { + CnchDedupHelper::DedupScope scope = CnchDedupHelper::getDedupScope(cnch_table, dedup_task.new_parts, force_normal_dedup); + + std::vector locks_to_acquire = CnchDedupHelper::getLocksToAcquire( + scope, txn.getTransactionID(), cnch_table, CnchDedupHelper::getWriteLockTimeout(cnch_table, local_context)); + watch.restart(); + cnch_lock = std::make_shared(local_context, std::move(locks_to_acquire)); + if (!cnch_lock->tryLock()) + { + if (auto unique_table_log = local_context->getCloudUniqueTableLog()) + { + auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, cnch_table.getCnchStorageID()); + current_log.txn_id = txn.getTransactionID(); + current_log.metric = ErrorCodes::CNCH_LOCK_ACQUIRE_FAILED; + current_log.event_msg = "Failed to acquire lock for txn " + txn.getTransactionID().toString(); + unique_table_log->add(current_log); + } + throw Exception("Failed to acquire lock for txn " + txn.getTransactionID().toString(), ErrorCodes::CNCH_LOCK_ACQUIRE_FAILED); + } + dedup_task.statistics.acquire_lock_cost += watch.elapsedMilliseconds(); + + watch.restart(); + ts = local_context->getTimestamp(); /// must get a new ts after locks are acquired + visible_parts = CnchDedupHelper::getVisiblePartsToDedup(scope, cnch_table, ts); + staged_parts = CnchDedupHelper::getStagedPartsToDedup(scope, cnch_table, ts); + dedup_task.statistics.get_metadata_cost += watch.elapsedMilliseconds(); + + /// In some case, visible parts or staged parts doesn't have same bucket definition or not a bucket part, we need to convert bucket lock to normal lock. + /// Otherwise, it may lead to duplicated data. + if (scope.isBucketLock() && !cnch_table.getSettings()->enable_bucket_level_unique_keys + && !CnchDedupHelper::checkBucketParts(cnch_table, visible_parts, staged_parts)) + { + force_normal_dedup = true; + cnch_lock->unlock(); + LOG_TRACE(txn.getLogger(), "Check bucket parts failed, switch to normal lock to dedup."); + continue; + } + else + { + /// Filter staged parts if lock scope is bucket level + scope.filterParts(staged_parts); + break; + } + } while (true); + + if (unlikely(local_context->getSettingsRef().unique_sleep_seconds_after_acquire_lock.totalSeconds())) + { + /// Test purpose only + std::this_thread::sleep_for(std::chrono::seconds(local_context->getSettingsRef().unique_sleep_seconds_after_acquire_lock.totalSeconds())); + } + + for (auto & visible_part: visible_parts) + { + dedup_task.visible_parts.emplace_back(std::const_pointer_cast(visible_part)); + for (const auto & bitmap_model : visible_part->delete_bitmap_metas) + dedup_task.delete_bitmaps_for_visible_parts.emplace_back(createFromModel(cnch_table, *bitmap_model)); + } + for (auto & staged_part: staged_parts) + { + dedup_task.staged_parts.emplace_back(std::const_pointer_cast(staged_part)); + for (const auto & bitmap_model: staged_part->delete_bitmap_metas) + dedup_task.delete_bitmaps_for_staged_parts.emplace_back(createFromModel(cnch_table, *bitmap_model)); + } + txn.appendLockHolder(cnch_lock); +} + +void executeDedupTask(StorageCnchMergeTree & cnch_table, DedupTask & dedup_task, const TxnTimestamp & txn_id, ContextPtr local_context) +{ + /// Precondition: parts already be sorted. + cnch_table.getDeleteBitmapMetaForCnchParts(dedup_task.visible_parts, dedup_task.delete_bitmaps_for_visible_parts, /*force_found=*/true); + cnch_table.getDeleteBitmapMetaForCnchParts(dedup_task.new_parts, dedup_task.delete_bitmaps_for_new_parts, /*force_found=*/false); + cnch_table.getDeleteBitmapMetaForCnchParts(dedup_task.staged_parts, dedup_task.delete_bitmaps_for_staged_parts, /*force_found=*/false); + MergeTreeDataDeduper deduper(cnch_table, local_context, dedup_task.dedup_mode); + LocalDeleteBitmaps bitmaps_to_dump = deduper.dedupParts( + txn_id, + {dedup_task.visible_parts.begin(), dedup_task.visible_parts.end()}, + {dedup_task.staged_parts.begin(), dedup_task.staged_parts.end()}, + {dedup_task.new_parts.begin(), dedup_task.new_parts.end()}); + + Stopwatch watch; + CnchDataWriter cnch_writer(cnch_table, local_context, ManipulationType::Insert); + cnch_writer.publishStagedParts({dedup_task.staged_parts.begin(), dedup_task.staged_parts.end()}, bitmaps_to_dump); + LOG_DEBUG( + cnch_table.getLogger(), + "Publish staged parts take {} ms, txn id: {}, dedup mode: {}", + watch.elapsedMilliseconds(), + txn_id.toUInt64(), + typeToString(dedup_task.dedup_mode)); +} } diff --git a/src/CloudServices/CnchDedupHelper.h b/src/CloudServices/CnchDedupHelper.h index 33b590d1e90..93430550bd5 100644 --- a/src/CloudServices/CnchDedupHelper.h +++ b/src/CloudServices/CnchDedupHelper.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -28,12 +30,38 @@ namespace DB { class MergeTreeMetaBase; class StorageCnchMergeTree; +class DeleteBitmapMeta; +using DeleteBitmapMetaPtr = std::shared_ptr; +using DeleteBitmapMetaPtrVector = std::vector; +class CnchServerTransaction; } namespace DB::CnchDedupHelper { +enum class DedupMode : unsigned int +{ + APPEND = 0, + UPSERT, + THROW +}; + +inline String typeToString(DedupMode type) +{ + switch (type) + { + case DedupMode::APPEND: + return "APPEND"; + case DedupMode::UPSERT: + return "UPSERT"; + case DedupMode::THROW: + return "THROW"; + default: + return "Unknown"; + } +} + class DedupScope { public: @@ -141,4 +169,48 @@ bool checkBucketParts( const MergeTreeDataPartsCNCHVector & visible_parts, const MergeTreeDataPartsCNCHVector & staged_parts); +struct DedupTask +{ + DedupMode dedup_mode; + StorageID storage_id; + MutableMergeTreeDataPartsCNCHVector new_parts; + DeleteBitmapMetaPtrVector delete_bitmaps_for_new_parts; + + MutableMergeTreeDataPartsCNCHVector staged_parts; + DeleteBitmapMetaPtrVector delete_bitmaps_for_staged_parts; + + MutableMergeTreeDataPartsCNCHVector visible_parts; + DeleteBitmapMetaPtrVector delete_bitmaps_for_visible_parts; + + struct Statistics + { + /// Record time cost for each stage(ms) + UInt64 acquire_lock_cost = 0; + UInt64 get_metadata_cost = 0; + UInt64 execute_task_cost = 0; + UInt64 other_cost = 0; + UInt64 total_cost = 0; + + String toString() + { + return fmt::format( + "[acquire lock cost {} ms, get metadata cost {} ms, execute task cost {} ms, other cost {} ms, total cost {} ms]", + acquire_lock_cost, + get_metadata_cost, + execute_task_cost, + other_cost, + total_cost); + } + } statistics; + + explicit DedupTask(const DedupMode & dedup_mode_, const StorageID & storage_id_) : dedup_mode(dedup_mode_), storage_id(storage_id_) { } +}; +using DedupTaskPtr = std::shared_ptr; + +UInt64 getWriteLockTimeout(StorageCnchMergeTree & cnch_table, ContextPtr local_context); + +void acquireLockAndFillDedupTask(StorageCnchMergeTree & cnch_table, DedupTask & dedup_task, CnchServerTransaction & txn, ContextPtr local_context); + +void executeDedupTask(StorageCnchMergeTree & cnch_table, DedupTask & dedup_task, const TxnTimestamp & txn_id, ContextPtr local_context); + } diff --git a/src/CloudServices/CnchMergeMutateThread.cpp b/src/CloudServices/CnchMergeMutateThread.cpp index 27e23565eea..6bcd50d641a 100644 --- a/src/CloudServices/CnchMergeMutateThread.cpp +++ b/src/CloudServices/CnchMergeMutateThread.cpp @@ -820,14 +820,14 @@ Strings CnchMergeMutateThread::removeLockedPartition(const Strings & partitions) auto txn_id = transaction->getTransactionID(); Strings res; std::for_each(partitions.begin(), partitions.end(), - [& res, & transaction, txn_id, this] (const String & partition) + [& res, txn_id, this] (const String & partition) { LockInfoPtr partition_lock = std::make_shared(txn_id); partition_lock->setMode(LockMode::X); partition_lock->setUUIDAndPrefix(getStorageID().uuid, LockInfo::task_domain); partition_lock->setPartition(partition); - auto cnch_lock = transaction->createLockHolder({std::move(partition_lock)}); + auto cnch_lock = std::make_shared(getContext(), std::move(partition_lock)); if (cnch_lock->tryLock()) { LOG_TRACE(log, "partition {} is not lock", partition); @@ -897,7 +897,7 @@ String CnchMergeMutateThread::submitFutureManipulationTask( } } - auto cnch_lock = transaction->createLockHolder({std::move(partition_lock)}); + auto cnch_lock = std::make_shared(getContext(), std::move(partition_lock)); if (type == ManipulationType::Merge || type == ManipulationType::Mutate || type == ManipulationType::Clustering) cnch_lock->lock(); diff --git a/src/CloudServices/CnchServerClient.cpp b/src/CloudServices/CnchServerClient.cpp index 84698a2fe4d..d59f0bd87a7 100644 --- a/src/CloudServices/CnchServerClient.cpp +++ b/src/CloudServices/CnchServerClient.cpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace DB @@ -90,10 +91,10 @@ CnchServerClient::commitTransaction(const ICnchTransaction & txn, const StorageI return response.commit_ts(); } -void CnchServerClient::precommitTransaction(const TxnTimestamp & txn_id, const UUID & uuid) +void CnchServerClient::precommitTransaction(const ContextPtr & context, const TxnTimestamp & txn_id, const UUID & uuid) { brpc::Controller cntl; - cntl.set_timeout_ms(10 * 1000); + cntl.set_timeout_ms(context->getSettingsRef().max_dedup_execution_time.totalMilliseconds()); Protos::PrecommitTransactionReq request; Protos::PrecommitTransactionResp response; @@ -514,9 +515,7 @@ void CnchServerClient::commitParts( const TxnTimestamp & txn_id, ManipulationType type, MergeTreeMetaBase & storage, - const MutableMergeTreeDataPartsCNCHVector & parts, - const DeleteBitmapMetaPtrVector & delete_bitmaps, - const MutableMergeTreeDataPartsCNCHVector & staged_parts, + const DumpedData & dumped_data, const String & task_id, const bool from_server, const String & consumer_group, @@ -526,6 +525,10 @@ void CnchServerClient::commitParts( { /// TODO: check txn_id & start_ts + const auto & parts = dumped_data.parts; + const auto & delete_bitmaps = dumped_data.bitmaps; + const auto & staged_parts = dumped_data.staged_parts; + brpc::Controller cntl; cntl.set_timeout_ms(storage.getSettings()->cnch_meta_rpc_timeout_ms); Protos::CommitPartsReq request; @@ -605,6 +608,8 @@ void CnchServerClient::commitParts( new_bitmap->CopyFrom(*(delete_bitmap->getModel())); } + request.set_dedup_mode(static_cast(dumped_data.dedup_mode)); + stub->commitParts(&cntl, &request, &response, nullptr); assertController(cntl); RPCHelpers::checkResponse(response); @@ -617,9 +622,7 @@ void CnchServerClient::precommitParts( const TxnTimestamp & txn_id, ManipulationType type, MergeTreeMetaBase & storage, - const MutableMergeTreeDataPartsCNCHVector & parts, - const DeleteBitmapMetaPtrVector & delete_bitmaps, - const MutableMergeTreeDataPartsCNCHVector & staged_parts, + const DumpedData & dumped_data, const String & task_id, const bool from_server, const String & consumer_group, @@ -628,6 +631,9 @@ void CnchServerClient::precommitParts( const UInt64 peak_memory_usage) { const UInt64 batch_size = context->getSettingsRef().catalog_max_commit_size; + const auto & parts = dumped_data.parts; + const auto & delete_bitmaps = dumped_data.bitmaps; + const auto & staged_parts = dumped_data.staged_parts; // Precommit parts in batches {batch_begin, batch_end} const size_t max_size = std::max({parts.size(), delete_bitmaps.size(), staged_parts.size()}); @@ -646,7 +652,7 @@ void CnchServerClient::precommitParts( LOG_DEBUG( log, "Precommit: parts in batch: [{} ~ {}] of total: {}; delete_bitmaps in batch [{} ~ {}] of total {}; staged parts in batch [{} " - "~ {}] of total {}.", + "~ {}] of total {}; dedup mode is {}", part_batch_begin, part_batch_end, parts.size(), @@ -655,15 +661,20 @@ void CnchServerClient::precommitParts( delete_bitmaps.size(), staged_part_batch_begin, staged_part_batch_end, - staged_parts.size()); + staged_parts.size(), + typeToString(dumped_data.dedup_mode)); + + DumpedData new_dumped_data; + new_dumped_data.parts = {parts.begin() + part_batch_begin, parts.begin() + part_batch_end}; + new_dumped_data.bitmaps = {delete_bitmaps.begin() + bitmap_batch_begin, delete_bitmaps.begin() + bitmap_batch_end}; + new_dumped_data.staged_parts = {staged_parts.begin() + staged_part_batch_begin, staged_parts.begin() + staged_part_batch_end}; + new_dumped_data.dedup_mode = dumped_data.dedup_mode; commitParts( txn_id, type, storage, - {parts.begin() + part_batch_begin, parts.begin() + part_batch_end}, - {delete_bitmaps.begin() + bitmap_batch_begin, delete_bitmaps.begin() + bitmap_batch_end}, - {staged_parts.begin() + staged_part_batch_begin, staged_parts.begin() + staged_part_batch_end}, + new_dumped_data, task_id, from_server, consumer_group, diff --git a/src/CloudServices/CnchServerClient.h b/src/CloudServices/CnchServerClient.h index 3ab0bb7c1ba..caf978b97b7 100644 --- a/src/CloudServices/CnchServerClient.h +++ b/src/CloudServices/CnchServerClient.h @@ -27,7 +27,7 @@ #include #include #include -#include "Storages/MergeTree/MarkRange.h" +#include #include namespace DB @@ -42,6 +42,7 @@ class CnchServerTransaction; using CnchServerTransactionPtr = std::shared_ptr; struct PrunedPartitions; class StorageCloudMergeTree; +struct DumpedData; class CnchServerClient : public RpcClientBase { @@ -58,7 +59,7 @@ class CnchServerClient : public RpcClientBase std::pair createTransactionForKafka(const StorageID & storage_id, const size_t consumer_index); TxnTimestamp commitTransaction( const ICnchTransaction & txn, const StorageID & kafka_storage_id = StorageID::createEmpty(), const size_t consumer_index = 0); - void precommitTransaction(const TxnTimestamp & txn_id, const UUID & uuid = UUIDHelpers::Nil); + void precommitTransaction(const ContextPtr & context, const TxnTimestamp & txn_id, const UUID & uuid = UUIDHelpers::Nil); TxnTimestamp rollbackTransaction(const TxnTimestamp & txn_id); void finishTransaction(const TxnTimestamp & txn_id); @@ -138,9 +139,7 @@ class CnchServerClient : public RpcClientBase const TxnTimestamp & txn_id, ManipulationType type, MergeTreeMetaBase & storage, - const MutableMergeTreeDataPartsCNCHVector & parts, - const DeleteBitmapMetaPtrVector & delete_bitmaps, - const MutableMergeTreeDataPartsCNCHVector & staged_parts, + const DumpedData & dumped_data, const String & task_id = {}, const bool from_server = false, const String & consumer_group = {}, @@ -153,9 +152,7 @@ class CnchServerClient : public RpcClientBase const TxnTimestamp & txn_id, ManipulationType type, MergeTreeMetaBase & storage, - const MutableMergeTreeDataPartsCNCHVector & parts, - const DeleteBitmapMetaPtrVector & delete_bitmaps, - const MutableMergeTreeDataPartsCNCHVector & staged_parts, + const DumpedData & dumped_data, const String & task_id = {}, const bool from_server = false, const String & consumer_group = {}, diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp index 107bac38f3e..ad878eb36a6 100644 --- a/src/CloudServices/CnchServerServiceImpl.cpp +++ b/src/CloudServices/CnchServerServiceImpl.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -189,14 +190,18 @@ void CnchServerServiceImpl::commitParts( CnchDataWriter cnch_writer( *cnch, rpc_context, - ManipulationType(req->type()), + static_cast(req->type()), req->task_id(), std::move(consumer_group), tpl, binlog, peak_memory_usage); - cnch_writer.commitPreparedCnchParts(DumpedData{std::move(parts), std::move(delete_bitmaps), std::move(staged_parts)}); + auto dedup_mode = static_cast(req->dedup_mode()); + cnch_writer.setDedupMode(dedup_mode); + + cnch_writer.commitPreparedCnchParts( + DumpedData{std::move(parts), std::move(delete_bitmaps), std::move(staged_parts), dedup_mode}); } catch (...) { diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp index 6d3e45d3a6a..661e73afe68 100644 --- a/src/CloudServices/CnchWorkerClient.cpp +++ b/src/CloudServices/CnchWorkerClient.cpp @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include #include "Storages/Hive/HiveFile/IHiveFile.h" @@ -515,6 +518,81 @@ brpc::CallId CnchWorkerClient::sendResources( return call_id; } +static void onDedupTaskDone(Protos::ExecuteDedupTaskResp * response, brpc::Controller * cntl, ExceptionHandlerPtr handler, std::function funcOnCallback) +{ + try + { + std::unique_ptr response_guard(response); + std::unique_ptr cntl_guard(cntl); + RPCHelpers::assertController(*cntl); + RPCHelpers::checkResponse(*response); + funcOnCallback(/*success*/ true); + } + catch (...) + { + handler->setException(std::current_exception()); + funcOnCallback(/*success*/ false); + } +} + +brpc::CallId CnchWorkerClient::executeDedupTask( + const ContextPtr & context, + const TxnTimestamp & txn_id, + UInt16 rpc_port, + const IStorage & storage, + const CnchDedupHelper::DedupTask & dedup_task, + const ExceptionHandlerPtr & handler, + std::function funcOnCallback) +{ + Protos::ExecuteDedupTaskReq request; + request.set_txn_id(txn_id); + request.set_rpc_port(rpc_port); + RPCHelpers::fillUUID(dedup_task.storage_id.uuid, *request.mutable_table_uuid()); + request.set_dedup_mode(static_cast(dedup_task.dedup_mode)); + /// New parts + for (const auto & new_part : dedup_task.new_parts) + { + fillPartModel(storage, *new_part, *request.add_new_parts()); + request.add_new_parts_paths()->assign(new_part->relative_path); + } + for (const auto & delete_bitmap : dedup_task.delete_bitmaps_for_new_parts) + { + auto * new_bitmap = request.add_delete_bitmaps_for_new_parts(); + new_bitmap->CopyFrom(*(delete_bitmap->getModel())); + } + + /// Staged parts + for (const auto & staged_part : dedup_task.staged_parts) + { + fillPartModel(storage, *staged_part, *request.add_staged_parts()); + request.add_staged_parts_paths()->assign(staged_part->relative_path); + } + for (const auto & delete_bitmap : dedup_task.delete_bitmaps_for_staged_parts) + { + auto * new_bitmap = request.add_delete_bitmaps_for_staged_parts(); + new_bitmap->CopyFrom(*(delete_bitmap->getModel())); + } + + /// Visible parts + for (const auto & visible_part : dedup_task.visible_parts) + { + fillPartModel(storage, *visible_part, *request.add_visible_parts()); + request.add_visible_parts_paths()->assign(visible_part->relative_path); + } + for (const auto & delete_bitmap : dedup_task.delete_bitmaps_for_visible_parts) + { + auto * new_bitmap = request.add_delete_bitmaps_for_visible_parts(); + new_bitmap->CopyFrom(*(delete_bitmap->getModel())); + } + + auto * cntl = new brpc::Controller; + cntl->set_timeout_ms(context->getSettingsRef().max_dedup_execution_time.totalMilliseconds()); + const auto call_id = cntl->call_id(); + auto * response = new Protos::ExecuteDedupTaskResp; + stub->executeDedupTask(cntl, &request, response, brpc::NewCallback(onDedupTaskDone, response, cntl, handler, funcOnCallback)); + return call_id; +} + brpc::CallId CnchWorkerClient::removeWorkerResource(TxnTimestamp txn_id, ExceptionHandlerPtr handler) { brpc::Controller * cntl = new brpc::Controller; diff --git a/src/CloudServices/CnchWorkerClient.h b/src/CloudServices/CnchWorkerClient.h index 7a8b4ed29e1..c9c0ab66210 100644 --- a/src/CloudServices/CnchWorkerClient.h +++ b/src/CloudServices/CnchWorkerClient.h @@ -28,8 +28,8 @@ #include #include #include -#include "Storages/Hive/HiveFile/IHiveFile_fwd.h" -#include "Storages/MergeTree/MergeTreeDataPartCNCH_fwd.h" +#include +#include #include #include @@ -50,6 +50,11 @@ namespace IngestColumnCnch struct IngestPartitionParam; } +namespace CnchDedupHelper +{ + struct DedupTask; +} + class MergeTreeMetaBase; class StorageMaterializedView; struct MarkRange; @@ -117,14 +122,14 @@ class CnchWorkerClient : public RpcClientBase UInt64 parts_preload_level, UInt64 submit_ts); - brpc::CallId dropPartDiskCache( - const ContextPtr & context, - const TxnTimestamp & txn_id, - const IStorage & storage, - const String & create_local_table_query, - const ServerDataPartsVector & parts, - bool sync, - bool drop_vw_disk_cache); + brpc::CallId dropPartDiskCache( + const ContextPtr & context, + const TxnTimestamp & txn_id, + const IStorage & storage, + const String & create_local_table_query, + const ServerDataPartsVector & parts, + bool sync, + bool drop_vw_disk_cache); brpc::CallId sendOffloadingInfo( const ContextPtr & context, @@ -140,6 +145,15 @@ class CnchWorkerClient : public RpcClientBase const WorkerId & worker_id, bool with_mutations = false); + brpc::CallId executeDedupTask( + const ContextPtr & context, + const TxnTimestamp & txn_id, + UInt16 rpc_port, + const IStorage & storage, + const CnchDedupHelper::DedupTask & dedup_task, + const ExceptionHandlerPtr & handler, + std::function funcOnCallback); + brpc::CallId removeWorkerResource(TxnTimestamp txn_id, ExceptionHandlerPtr handler); void createDedupWorker(const StorageID & storage_id, const String & create_table_query, const HostWithPorts & host_ports, const size_t & deduper_index); diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index 1f751ea3627..1530bf35479 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -88,6 +88,7 @@ namespace ErrorCodes extern const int PREALLOCATE_TOPOLOGY_ERROR; extern const int PREALLOCATE_QUERY_INTENT_NOT_FOUND; extern const int SESSION_NOT_FOUND; + extern const int ABORTED; } CnchWorkerServiceImpl::CnchWorkerServiceImpl(ContextMutablePtr context_) @@ -1112,6 +1113,64 @@ void CnchWorkerServiceImpl::getDedupWorkerStatus( }) } +void CnchWorkerServiceImpl::executeDedupTask( + google::protobuf::RpcController * cntl, + const Protos::ExecuteDedupTaskReq * request, + Protos::ExecuteDedupTaskResp * response, + google::protobuf::Closure * done) +{ + SUBMIT_THREADPOOL({ + auto txn_id = TxnTimestamp(request->txn_id()); + auto rpc_context = RPCHelpers::createSessionContextForRPC(getContext(), *cntl); + rpc_context->getClientInfo().rpc_port = request->rpc_port(); + auto server_client + = rpc_context->getCnchServerClient(rpc_context->getClientInfo().current_address.host().toString(), request->rpc_port()); + auto worker_txn = std::make_shared(rpc_context, txn_id, server_client); + /// This stage is in commit process, we can not finish transaction here. + worker_txn->setIsInitiator(false); + rpc_context->setCurrentTransaction(worker_txn); + + auto catalog = getContext()->getCnchCatalog(); + TxnTimestamp ts = getContext()->getTimestamp(); + auto table_uuid_str = UUIDHelpers::UUIDToString(RPCHelpers::createUUID(request->table_uuid())); + auto table = catalog->tryGetTableByUUID(*getContext(), table_uuid_str, ts); + if (!table) + throw Exception(ErrorCodes::ABORTED, "Table {} has been dropped", table_uuid_str); + auto cnch_table = dynamic_pointer_cast(table); + if (!cnch_table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} is not cnch merge tree", table_uuid_str); + + auto new_parts = createPartVectorFromModels(*cnch_table, request->new_parts(), &request->new_parts_paths()); + DeleteBitmapMetaPtrVector delete_bitmaps_for_new_parts; + delete_bitmaps_for_new_parts.reserve(request->delete_bitmaps_for_new_parts_size()); + for (const auto & bitmap_model : request->delete_bitmaps_for_new_parts()) + delete_bitmaps_for_new_parts.emplace_back(createFromModel(*cnch_table, bitmap_model)); + + auto staged_parts = createPartVectorFromModels(*cnch_table, request->staged_parts(), &request->staged_parts_paths()); + DeleteBitmapMetaPtrVector delete_bitmaps_for_staged_parts; + delete_bitmaps_for_staged_parts.reserve(request->delete_bitmaps_for_staged_parts_size()); + for (const auto & bitmap_model : request->delete_bitmaps_for_staged_parts()) + delete_bitmaps_for_staged_parts.emplace_back(createFromModel(*cnch_table, bitmap_model)); + + auto visible_parts = createPartVectorFromModels(*cnch_table, request->visible_parts(), &request->visible_parts_paths()); + DeleteBitmapMetaPtrVector delete_bitmaps_for_visible_parts; + delete_bitmaps_for_visible_parts.reserve(request->delete_bitmaps_for_visible_parts_size()); + for (const auto & bitmap_model : request->delete_bitmaps_for_visible_parts()) + delete_bitmaps_for_visible_parts.emplace_back(createFromModel(*cnch_table, bitmap_model)); + + auto dedup_mode = static_cast(request->dedup_mode()); + auto dedup_task = std::make_shared(dedup_mode, cnch_table->getCnchStorageID()); + dedup_task->new_parts = std::move(new_parts); + dedup_task->delete_bitmaps_for_new_parts = std::move(delete_bitmaps_for_new_parts); + dedup_task->staged_parts = std::move(staged_parts); + dedup_task->delete_bitmaps_for_staged_parts = std::move(delete_bitmaps_for_staged_parts); + dedup_task->visible_parts = std::move(visible_parts); + dedup_task->delete_bitmaps_for_visible_parts = std::move(delete_bitmaps_for_visible_parts); + + CnchDedupHelper::executeDedupTask(*cnch_table, *dedup_task, txn_id, rpc_context); + }) +} + #if USE_RDKAFKA void CnchWorkerServiceImpl::submitKafkaConsumeTask( google::protobuf::RpcController * cntl, diff --git a/src/CloudServices/CnchWorkerServiceImpl.h b/src/CloudServices/CnchWorkerServiceImpl.h index 0c86f5aa70f..e2963ed4a11 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.h +++ b/src/CloudServices/CnchWorkerServiceImpl.h @@ -133,6 +133,12 @@ class CnchWorkerServiceImpl : protected WithMutableContext, public DB::Protos::C Protos::GetDedupWorkerStatusResp * response, google::protobuf::Closure * done) override; + void executeDedupTask( + google::protobuf::RpcController *, + const Protos::ExecuteDedupTaskReq * request, + Protos::ExecuteDedupTaskResp * response, + google::protobuf::Closure * done) override; + #if USE_RDKAFKA void submitKafkaConsumeTask( google::protobuf::RpcController * cntl, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5efb9275ad9..598c2a4cb0c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1320,6 +1320,9 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_unique_table_detach_ignore_delete_bitmap, false, "Enable ignore delete bitmap info when handling detach commands for unique table, for example: delete bitmap has been broken, we can just ignore it via this parameter.", 0) \ M(DedupKeyMode, dedup_key_mode, DedupKeyMode::REPLACE, "Handle different deduplication modes, current valid values: REPLACE, THROW, APPEND. THROW mode can only be used in non-staging area scenarios. APPEND mode will not execute dedup process, which is suitable for historical non-duplicated data import scenarios", 0) \ M(Seconds, unique_sleep_seconds_after_acquire_lock, 0, "Only for test", 0) \ + M(Seconds, unique_acquire_write_lock_timeout, 0, "It has higher priority than table setting. Only when it's zero, use table setting", 0) \ + M(Seconds, max_dedup_execution_time, 21600, "Set default value to 6h", 0) \ + M(UInt64, max_dedup_retry_time, 1, "Dedup task retry num", 0) \ \ /** Settings for Map */ \ M(Bool, optimize_map_column_serialization, false, "Construct map value columns in advance during serialization", 0) \ diff --git a/src/Interpreters/DistributedStages/BSPScheduler.cpp b/src/Interpreters/DistributedStages/BSPScheduler.cpp index bdd7cfc9728..28c329ede5e 100644 --- a/src/Interpreters/DistributedStages/BSPScheduler.cpp +++ b/src/Interpreters/DistributedStages/BSPScheduler.cpp @@ -181,13 +181,7 @@ bool BSPScheduler::retryTaskIfPossible(size_t segment_id, UInt64 parallel_index) { if (auto step = std::dynamic_pointer_cast(node.step)) { - if (auto cnch_table = step->getTarget()->getStorage()) - { - // unique table can't support retry - if (cnch_table->getInMemoryMetadataPtr()->hasUniqueKey()) - return false; - is_table_write = true; - } + is_table_write = true; } else if (node.step->getType() == IQueryPlanStep::Type::TableFinish) return false; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 1ed068fa77e..020441f3955 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -343,18 +343,10 @@ BlockIO InterpreterInsertQuery::execute() /// Handle the insert commit for insert select/infile case in cnch server. BlockInputStreamPtr in = cnch_merge_tree->writeInWorker(query_ptr, metadata_snapshot, getContext()); - if (const auto * cnch_table = dynamic_cast(table.get()); - cnch_table && cnch_table->commitTxnFromWorkerSide(metadata_snapshot, getContext())) - { - /// for unique table, insert select|infile is committed from worker side - res.in = std::move(in); - } - else - { - auto txn = getContext()->getCurrentTransaction(); - txn->setMainTableUUID(table->getStorageUUID()); - res.in = std::make_shared(in, std::move(txn)); - } + auto txn = getContext()->getCurrentTransaction(); + txn->setMainTableUUID(table->getStorageUUID()); + res.in = std::make_shared(in, std::move(txn)); + if (insert_query.is_overwrite && !lock_holders.empty()) { /// Make sure lock is release after txn commit diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 46c3fde9d09..962ac2b04d9 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -1818,7 +1818,7 @@ void InterpreterSystemQuery::lockMemoryLock(const ASTSystemQuery & query, const Stopwatch lock_watch; - auto cnch_lock = transaction->createLockHolder({std::move(partition_lock)}); + auto cnch_lock = std::make_shared(local_context, std::move(partition_lock)); cnch_lock->lock(); LOG_DEBUG(log, "Acquired lock in {} ms", lock_watch.elapsedMilliseconds()); sleepForSeconds(query.seconds); diff --git a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp index ad4d16956be..439c094dbdf 100644 --- a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp +++ b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace DB { @@ -37,8 +38,9 @@ namespace ErrorCodes using IndexFileIteratorPtr = std::unique_ptr; using IndexFileIterators = std::vector; -MergeTreeDataDeduper::MergeTreeDataDeduper(const MergeTreeMetaBase & data_, ContextPtr context_) - : data(data_), context(context_), log(&Poco::Logger::get(data_.getLogName() + " (Deduper)")) +MergeTreeDataDeduper::MergeTreeDataDeduper( + const MergeTreeMetaBase & data_, ContextPtr context_, const CnchDedupHelper::DedupMode & dedup_mode_) + : data(data_), context(context_), log(&Poco::Logger::get(data_.getLogName() + " (Deduper)")), dedup_mode(dedup_mode_) { if (data.merging_params.hasExplicitVersionColumn()) version_mode = VersionMode::ExplicitVersion; @@ -46,6 +48,12 @@ MergeTreeDataDeduper::MergeTreeDataDeduper(const MergeTreeMetaBase & data_, Cont version_mode = VersionMode::PartitionValueAsVersion; else version_mode = VersionMode::NoVersion; + + if (dedup_mode == CnchDedupHelper::DedupMode::APPEND) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Dedup mode in dedup process is APPEND for table {}, it's a bug!", + data.getCnchStorageID().getNameForLogs()); } namespace @@ -108,8 +116,7 @@ void MergeTreeDataDeduper::dedupKeysWithParts( const IMergeTreeDataPartsVector & parts, DeleteBitmapVector & delta_bitmaps, DedupTaskProgressReporter reporter, - DedupTaskPtr & dedup_task, - DedupKeyMode dedup_key_mode) + DedupTaskPtr & dedup_task) { const IndexFile::Comparator * comparator = IndexFile::BytewiseComparator(); @@ -209,7 +216,7 @@ void MergeTreeDataDeduper::dedupKeysWithParts( else { exact_match = true; - if (dedup_key_mode == DedupKeyMode::THROW) + if (dedup_mode == CnchDedupHelper::DedupMode::THROW) throw Exception("Found duplication when insert with setting dedup_key_mode=DedupKeyMode::THROW", ErrorCodes::INCORRECT_DATA); } @@ -442,20 +449,22 @@ LocalDeleteBitmaps MergeTreeDataDeduper::dedupParts( size_t num_bitmaps_to_dump = prepare_bitmaps_to_dump(visible_parts, new_parts, bitmaps); LOG_DEBUG( log, - "Dedup {} in {} ms, visible parts={}, new parts={}, result bitmaps={}, txn_id: {}", + "Dedup {} in {} ms, visible parts={}, new parts={}, result bitmaps={}, txn_id: {}, dedup mode: {}", dedup_task_local->getDedupLevelInfo(), sub_task_watch.elapsedMilliseconds(), visible_parts.size(), new_parts.size(), num_bitmaps_to_dump, - txn_id.toUInt64()); + txn_id.toUInt64(), + CnchDedupHelper::typeToString(dedup_mode)); }); } dedup_pool.wait(); LOG_DEBUG( log, - "Dedup {} tasks in {} ms, thread pool={}, visible parts={}, staged parts={}, uncommitted_parts = {}, result bitmaps={}, txn_id: {}", + "Dedup {} tasks in {} ms, thread pool={}, visible parts={}, staged parts={}, uncommitted_parts = {}, result bitmaps={}, txn_id: " + "{}, dedup mode: {}", dedup_tasks.size(), watch.elapsedMilliseconds(), dedup_pool_size, @@ -463,7 +472,8 @@ LocalDeleteBitmaps MergeTreeDataDeduper::dedupParts( all_staged_parts.size(), all_uncommitted_parts.size(), res.size(), - txn_id.toUInt64()); + txn_id.toUInt64(), + CnchDedupHelper::typeToString(dedup_mode)); return res; } @@ -722,7 +732,7 @@ MergeTreeDataDeduper::dedupImpl(const IMergeTreeDataPartsVector & visible_parts, return os.str(); }; - dedupKeysWithParts(dedup_task->iter, visible_parts, res, task_progress_reporter, dedup_task, context->getSettings().dedup_key_mode); + dedupKeysWithParts(dedup_task->iter, visible_parts, res, task_progress_reporter, dedup_task); return res; } diff --git a/src/MergeTreeCommon/MergeTreeDataDeduper.h b/src/MergeTreeCommon/MergeTreeDataDeduper.h index 857464180e8..5ba21bd1d77 100644 --- a/src/MergeTreeCommon/MergeTreeDataDeduper.h +++ b/src/MergeTreeCommon/MergeTreeDataDeduper.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -34,7 +35,10 @@ class MergeTreeDataDeduper using RowPos = ReplacingSortedKeysIterator::RowPos; using DeleteCallback = ReplacingSortedKeysIterator::DeleteCallback; - MergeTreeDataDeduper(const MergeTreeMetaBase & data_, ContextPtr context_); + MergeTreeDataDeduper( + const MergeTreeMetaBase & data_, + ContextPtr context_, + const CnchDedupHelper::DedupMode & dedup_mode_ = CnchDedupHelper::DedupMode::UPSERT); /// Remove duplicate keys among visible, staged, and uncommitted parts. /// Assumes that @@ -102,8 +106,7 @@ class MergeTreeDataDeduper const IMergeTreeDataPartsVector & parts, DeleteBitmapVector & delta_bitmaps, DedupTaskProgressReporter reporter, - DedupTaskPtr & dedup_task, - DedupKeyMode dedup_key_mode = DedupKeyMode::REPLACE); + DedupTaskPtr & dedup_task); /// Convert dedup task into multiple sub dedup tasks. If valid_bucket_table is true, it will split dedup task into bucket granule. DedupTasks convertIntoSubDedupTasks( @@ -124,6 +127,7 @@ class MergeTreeDataDeduper ContextPtr context; Poco::Logger * log; VersionMode version_mode; + CnchDedupHelper::DedupMode dedup_mode; }; } diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index d93ec676a63..2dc927ced63 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include @@ -1921,15 +1921,6 @@ void MergeTreeMetaBase::checkColumnsValidity(const ColumnsDescription & columns, } } -bool MergeTreeMetaBase::commitTxnFromWorkerSide(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const -{ - if (!metadata_snapshot->hasUniqueKey()) - return false; - bool enable_staging_area = query_context->getSettingsRef().enable_staging_area_for_write || getSettings()->cloud_enable_staging_area; - bool enable_append_mode = query_context->getSettingsRef().dedup_key_mode == DedupKeyMode::APPEND; - return !enable_append_mode && !enable_staging_area; -} - ColumnSize MergeTreeMetaBase::getMapColumnSizes(const DataPartPtr & part, const String & map_implicit_column_name) const { auto part_checksums = part->getChecksums(); @@ -2387,10 +2378,19 @@ void MergeTreeMetaBase::getDeleteBitmapMetaForServerParts(const ServerDataPartsV } } } - + } } +void MergeTreeMetaBase::getDeleteBitmapMetaForCnchParts(MutableMergeTreeDataPartsCNCHVector & parts, DeleteBitmapMetaPtrVector & all_bitmaps, bool force_found) +{ + MergeTreeDataPartsCNCHVector cnch_parts; + cnch_parts.reserve(parts.size()); + for (auto & part : parts) + cnch_parts.emplace_back(const_pointer_cast(part)); + getDeleteBitmapMetaForCnchParts(cnch_parts, all_bitmaps, force_found); +} + void MergeTreeMetaBase::getDeleteBitmapMetaForCnchParts(const MergeTreeDataPartsCNCHVector & parts, DeleteBitmapMetaPtrVector & all_bitmaps, bool force_found) { DeleteBitmapMetaPtrVector bitmaps; diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h index 11a87f5ab84..2215380f965 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.h +++ b/src/MergeTreeCommon/MergeTreeMetaBase.h @@ -441,9 +441,6 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer virtual bool supportsOptimizer() const override { return true; } - bool commitTxnFromWorkerSide(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const; - virtual bool supportIntermedicateResultCache() const override { return true; } - ColumnSize calculateMapColumnSizesImpl(const String & map_implicit_column_name) const; void resetObjectColumns(const ColumnsDescription & object_columns_) { object_columns = object_columns_; } diff --git a/src/Optimizer/Property/PropertyDeterminer.cpp b/src/Optimizer/Property/PropertyDeterminer.cpp index 446a571d10a..349f8b47472 100644 --- a/src/Optimizer/Property/PropertyDeterminer.cpp +++ b/src/Optimizer/Property/PropertyDeterminer.cpp @@ -24,7 +24,6 @@ #include #include #include -#include namespace DB { @@ -546,17 +545,9 @@ PropertySets DeterminerVisitor::visitFillingStep(const FillingStep &, Determiner return {{Property{Partitioning{Partitioning::Handle::SINGLE}}}}; } -PropertySets DeterminerVisitor::visitTableWriteStep(const TableWriteStep & step, DeterminerContext & context) +PropertySets DeterminerVisitor::visitTableWriteStep(const TableWriteStep &, DeterminerContext &) { auto node = Partitioning{Partitioning::Handle::FIXED_ARBITRARY}; - if (const auto * cnch_table = dynamic_cast(step.getTarget()->getStorage().get())) - { - // unique table can't support do TableWrite in many workers. - if (cnch_table->getInMemoryMetadataPtr()->hasUniqueKey() && !context.getContext().getSettingsRef().enable_staging_area_for_write) - { - node = Partitioning{Partitioning::Handle::SINGLE}; - } - } node.setComponent(Partitioning::Component::WORKER); return {{Property{node}}}; } diff --git a/src/Processors/Transforms/TableFinishTransform.cpp b/src/Processors/Transforms/TableFinishTransform.cpp index e7d1b688eb1..ff22c942431 100644 --- a/src/Processors/Transforms/TableFinishTransform.cpp +++ b/src/Processors/Transforms/TableFinishTransform.cpp @@ -99,18 +99,10 @@ void TableFinishTransform::consume(Chunk chunk) void TableFinishTransform::onFinish() { - if (const auto * cnch_table = dynamic_cast(storage.get()); - cnch_table && cnch_table->commitTxnFromWorkerSide(cnch_table->getInMemoryMetadataPtr(), context)) - { - /// for unique table, insert select|infile is committed from worker side - /// TODO: should also commit in server side - } - else - { - TransactionCnchPtr txn = context->getCurrentTransaction(); - txn->setMainTableUUID(storage->getStorageUUID()); - txn->commitV2(); - } + TransactionCnchPtr txn = context->getCurrentTransaction(); + txn->setMainTableUUID(storage->getStorageUUID()); + txn->commitV2(); + /// Make sure locks are release after transaction commit if (!lock_holders.empty()) lock_holders.clear(); diff --git a/src/Protos/cnch_server_rpc.proto b/src/Protos/cnch_server_rpc.proto index 51ba12d1b0b..eea6ca3d3ac 100644 --- a/src/Protos/cnch_server_rpc.proto +++ b/src/Protos/cnch_server_rpc.proto @@ -114,6 +114,7 @@ message CommitPartsReq { // Binlog will be committed with parts while sync MaterializedMySQL optional MySQLBinlogModel binlog = 14; optional uint64 peak_memory_usage = 15; + optional uint32 dedup_mode = 16; }; message CommitPartsResp { diff --git a/src/Protos/cnch_worker_rpc.proto b/src/Protos/cnch_worker_rpc.proto index 99cd8a50906..10a2e5be2ec 100644 --- a/src/Protos/cnch_worker_rpc.proto +++ b/src/Protos/cnch_worker_rpc.proto @@ -551,6 +551,28 @@ message CheckMySQLSyncThreadStatusResp optional bool is_running = 2; } +message ExecuteDedupTaskReq +{ + required uint64 txn_id = 1; + required uint32 rpc_port = 2; + required UUID table_uuid = 3; + repeated DataModelPart new_parts = 4; + repeated string new_parts_paths = 5; + repeated DataModelDeleteBitmap delete_bitmaps_for_new_parts = 6; + repeated DataModelPart staged_parts = 7; + repeated string staged_parts_paths = 8; + repeated DataModelDeleteBitmap delete_bitmaps_for_staged_parts = 9; + repeated DataModelPart visible_parts = 10; + repeated string visible_parts_paths = 11; + repeated DataModelDeleteBitmap delete_bitmaps_for_visible_parts = 12; + required uint32 dedup_mode = 13; +} + +message ExecuteDedupTaskResp +{ + optional string exception = 1; +} + service CnchWorkerService { rpc executeSimpleQuery(ExecuteSimpleQueryReq) returns (ExecuteSimpleQueryResp); @@ -596,4 +618,5 @@ service CnchWorkerService rpc removeWorkerResource(RemoveWorkerResourceReq) returns (RemoveWorkerResourceResp); rpc preloadDataParts(PreloadDataPartsReq) returns (PreloadDataPartsResp); rpc dropPartDiskCache(DropPartDiskCacheReq) returns (DropPartDiskCacheResp); + rpc executeDedupTask(ExecuteDedupTaskReq) returns (ExecuteDedupTaskResp); } diff --git a/src/Storages/IngestColumnCnch/IngestColumnCnch.cpp b/src/Storages/IngestColumnCnch/IngestColumnCnch.cpp index a0c5fcdf6a8..4e9b27bad97 100644 --- a/src/Storages/IngestColumnCnch/IngestColumnCnch.cpp +++ b/src/Storages/IngestColumnCnch/IngestColumnCnch.cpp @@ -323,7 +323,7 @@ Pipe ingestPartitionInServer( Stopwatch lock_watch; - auto cnch_lock = cur_txn->createLockHolder({std::move(partition_lock)}); + auto cnch_lock = std::make_shared(local_context, std::move(partition_lock)); cnch_lock->lock(); LOG_DEBUG(log, "Acquired lock in {} ms", lock_watch.elapsedMilliseconds()); diff --git a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp index 6d65c2af62b..ff5dddc3c10 100644 --- a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include namespace DB { @@ -75,8 +76,30 @@ void CloudMergeTreeBlockOutputStream::checkAndInit() throw Exception(ErrorCodes::BAD_ARGUMENTS, "Insert VALUES into staging area with dedup_key_mode=DedupKeyMode::THROW is not allowed"); LOG_DEBUG(log, "enable staging area for write"); } - if (dedup_parameters.enable_append_mode) - LOG_DEBUG(log, "enable append dedup key mode"); + else + { + switch (context->getSettings().dedup_key_mode) { + case DedupKeyMode::APPEND: + /// case 1(unique table with async insert): commit all the temp parts as staged parts, which will be converted to visible parts later by dedup worker + /// case 2(unique table with append mode): just commit all the temp parts as visible parts with empty delete bitmaps. Insert is lock-free and faster than upsert due to its simplicity. + cnch_writer.setDedupMode(CnchDedupHelper::DedupMode::APPEND); + LOG_DEBUG(log, "enable append dedup key mode"); + break; + case DedupKeyMode::THROW: + /// case 3(unique table with sync insert and throw when there has same key with existing parts) + cnch_writer.setDedupMode(CnchDedupHelper::DedupMode::THROW); + LOG_DEBUG(log, "enable throw dedup key mode"); + break; + case DedupKeyMode::REPLACE: + /// case 4(unique table with sync insert): In commit stage, acquire the necessary locks to avoid write-write conflicts and then remove duplicate keys between visible parts and temp parts. + cnch_writer.setDedupMode(CnchDedupHelper::DedupMode::UPSERT); + LOG_TRACE(log, "enable upsert dedup mode"); + break; + default: + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Unsupported dedup key mode: {}", context->getSettings().dedup_key_mode.toString()); + } + } } initOverwritePartitionPruner(); @@ -335,26 +358,6 @@ void CloudMergeTreeBlockOutputStream::writeSuffixImpl() { cnch_writer.preload(preload_parts); - if (!metadata_snapshot->hasUniqueKey() || dedup_parameters.enable_staging_area || dedup_parameters.enable_append_mode) - { - /// case1(normal table): commit all the temp parts as visible parts - /// case2(unique table with async insert): commit all the temp parts as staged parts, - /// which will be converted to visible parts later by dedup worker - /// case3(unique table with append mode): just commit all the temp parts as visible parts with empty delete bitmaps. - /// insert is lock-free and faster than upsert due to its simplicity. - writeSuffixForInsert(); - } - else - { - /// case(unique table with sync insert): acquire the necessary locks to avoid write-write conflicts - /// and then remove duplicate keys between visible parts and temp parts. - writeSuffixForUpsert(); - } -} - -void CloudMergeTreeBlockOutputStream::writeSuffixForInsert() -{ - // Commit for insert values in server side. auto txn = context->getCurrentTransaction(); if (dynamic_pointer_cast(txn) && !disable_transaction_commit) { @@ -415,128 +418,6 @@ namespace }; } -void CloudMergeTreeBlockOutputStream::writeSuffixForUpsert() -{ - auto txn = context->getCurrentTransaction(); - if (!txn) - throw Exception("Transaction is not set", ErrorCodes::LOGICAL_ERROR); - - UUID uuid = storage.getCnchStorageUUID(); - String uuid_str = UUIDHelpers::UUIDToString(uuid); - txn->setMainTableUUID(uuid); - if (auto worker_txn = dynamic_pointer_cast(txn); worker_txn && !worker_txn->tryGetServerClient()) - { - /// case: server initiated "insert select/infile" txn, need to set server client here in order to commit from worker - if (const auto & client_info = context->getClientInfo(); client_info.rpc_port) - worker_txn->setServerClient(context->getCnchServerClient(client_info.current_address.host().toString(), client_info.rpc_port)); - else - throw Exception("Missing rpc_port, can't obtain server client to commit txn", ErrorCodes::LOGICAL_ERROR); - } - else - { - /// no need to set server client - /// case: server initiated "insert values" txn, server client not required - /// case: worker initiated "insert values|select|infile" txn, server client already set - } - - auto catalog = context->getCnchCatalog(); - /// must use cnch table to construct staged parts. - TxnTimestamp ts = context->getTimestamp(); - auto table = catalog->tryGetTableByUUID(*context, uuid_str, ts); - if (!table) - throw Exception("Table " + storage.getStorageID().getNameForLogs() + " has been dropped", ErrorCodes::ABORTED); - auto cnch_table = dynamic_pointer_cast(table); - if (!cnch_table) - throw Exception("Table " + storage.getStorageID().getNameForLogs() + " is not cnch merge tree", ErrorCodes::LOGICAL_ERROR); - - if (preload_parts.empty()) - { - Stopwatch watch; - txn->commitV2(); - LOG_INFO( - log, - "Committed transaction {} in {} ms, preload_parts is empty", - txn->getTransactionID(), - watch.elapsedMilliseconds(), - preload_parts.size()); - return; - } - - CnchLockHolderPtr cnch_lock; - MergeTreeDataPartsCNCHVector visible_parts, staged_parts; - bool force_normal_dedup = false; - Stopwatch lock_watch; - do - { - CnchDedupHelper::DedupScope scope = CnchDedupHelper::getDedupScope(*cnch_table, preload_parts, force_normal_dedup); - - std::vector locks_to_acquire = CnchDedupHelper::getLocksToAcquire( - scope, txn->getTransactionID(), *cnch_table, storage.getSettings()->unique_acquire_write_lock_timeout.value.totalMilliseconds()); - lock_watch.restart(); - cnch_lock = txn->createLockHolder(std::move(locks_to_acquire)); - if (!cnch_lock->tryLock()) - { - if (auto unique_table_log = context->getCloudUniqueTableLog()) - { - auto current_log = UniqueTable::createUniqueTableLog(UniqueTableLogElement::ERROR, cnch_table->getCnchStorageID()); - current_log.txn_id = txn->getTransactionID(); - current_log.metric = ErrorCodes::CNCH_LOCK_ACQUIRE_FAILED; - current_log.event_msg = "Failed to acquire lock for txn " + txn->getTransactionID().toString(); - unique_table_log->add(current_log); - } - throw Exception("Failed to acquire lock for txn " + txn->getTransactionID().toString(), ErrorCodes::CNCH_LOCK_ACQUIRE_FAILED); - } - - lock_watch.restart(); - ts = context->getTimestamp(); /// must get a new ts after locks are acquired - visible_parts = CnchDedupHelper::getVisiblePartsToDedup(scope, *cnch_table, ts); - staged_parts = CnchDedupHelper::getStagedPartsToDedup(scope, *cnch_table, ts); - - /// In some case, visible parts or staged parts doesn't have same bucket definition or not a bucket part, we need to convert bucket lock to normal lock. - /// Otherwise, it may lead to duplicated data. - if (scope.isBucketLock() && !cnch_table->getSettings()->enable_bucket_level_unique_keys - && !CnchDedupHelper::checkBucketParts(*cnch_table, visible_parts, staged_parts)) - { - force_normal_dedup = true; - cnch_lock->unlock(); - LOG_TRACE(log, "Check bucket parts failed, switch to normal lock to dedup."); - continue; - } - else - { - /// Filter staged parts if lock scope is bucket level - scope.filterParts(staged_parts); - break; - } - } while (true); - - if (unlikely(context->getSettingsRef().unique_sleep_seconds_after_acquire_lock.totalSeconds())) - { - /// Test purpose only - std::this_thread::sleep_for(std::chrono::seconds(context->getSettingsRef().unique_sleep_seconds_after_acquire_lock.totalSeconds())); - } - - MergeTreeDataDeduper deduper(*cnch_table, context); - LocalDeleteBitmaps bitmaps_to_dump = deduper.dedupParts( - txn->getTransactionID(), - CnchPartsHelper::toIMergeTreeDataPartsVector(visible_parts), - CnchPartsHelper::toIMergeTreeDataPartsVector(staged_parts), - {preload_parts.begin(), preload_parts.end()}); - - Stopwatch watch; - cnch_writer.publishStagedParts(staged_parts, bitmaps_to_dump); - LOG_DEBUG(log, "Publishing staged parts take {} ms", watch.elapsedMilliseconds()); - - watch.restart(); - txn->commitV2(); - LOG_INFO( - log, - "Committed transaction {} in {} ms (with {} ms holding lock)", - txn->getTransactionID(), - watch.elapsedMilliseconds(), - lock_watch.elapsedMilliseconds()); -} - CloudMergeTreeBlockOutputStream::FilterInfo CloudMergeTreeBlockOutputStream::dedupWithUniqueKey(const Block & block) { if (!metadata_snapshot->hasUniqueKey()) diff --git a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.h b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.h index 9c9d49667bb..dbadd34f3f0 100644 --- a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.h +++ b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.h @@ -54,9 +54,6 @@ class CloudMergeTreeBlockOutputStream : public IBlockOutputStream using FilterInfo = CnchDedupHelper::FilterInfo; FilterInfo dedupWithUniqueKey(const Block & block); - void writeSuffixForInsert(); - void writeSuffixForUpsert(); - void initOverwritePartitionPruner(); void checkAndInit(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index f9f4ab98aef..1d6e548b1ef 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -457,7 +457,14 @@ ImmutableDeleteBitmapPtr MergeTreeDataPartCNCH::getCombinedDeleteBitmapForUnique DeleteBitmapPtr bitmap = std::make_shared(); std::forward_list to_reads; /// store meta in ascending order of commit time - if (cached_version > target_version) + bool skip_cache = false; + if (delete_flag && target_version == 0) + { + /// case: insert with _delete_flag_ & sync dedup + skip_cache = true; + } + + if (cached_version > target_version || skip_cache) { /// case: querying an older version than the cached version /// then cached bitmap can't be used and we need to build the bitmap from all metas @@ -475,8 +482,13 @@ ImmutableDeleteBitmapPtr MergeTreeDataPartCNCH::getCombinedDeleteBitmapForUnique { to_reads.insert_after(to_reads.before_begin(), meta); } - else if (meta->commit_time() == cached_version) + else if (hit_cache && meta->commit_time() == cached_version) { + /// Make sure that cached bitmap is not nullptr + if (!cached_bitmap) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Cached bitmap is nullptr at version: {}, part: {}", cached_version, name); + *bitmap = *cached_bitmap; /// copy the cached bitmap as the base break; } @@ -504,19 +516,20 @@ ImmutableDeleteBitmapPtr MergeTreeDataPartCNCH::getCombinedDeleteBitmapForUnique combineWithRowExists(bitmap); delete_bitmap = std::move(bitmap); - if (target_version > cached_version) + if (target_version > cached_version && !skip_cache) { cache->insert(cache_key, target_version, delete_bitmap); } LOG_DEBUG( storage.log, - "Loaded delete bitmap for unique table part {} in {} ms, commit_time: {}, bitmap cardinality: {}, generated in txn: {}", + "Loaded delete bitmap for unique table part {} in {} ms, commit_time: {}, bitmap cardinality: {}, generated in txn: {}{}", name, watch.elapsedMilliseconds(), target_version, delete_bitmap->cardinality(), - txn_id); + txn_id, + skip_cache ? ", skip cache": ""); } } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 02ec8d342f9..a5b820b9a63 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -476,7 +476,7 @@ enum StealingCacheMode : UInt64 M(UInt64, dedup_worker_max_heartbeat_interval, 16, "", 0) \ M(Bool, partition_level_unique_keys, true, "", 0) \ M(UInt64, staged_part_lifetime_threshold_ms_to_block_kafka_consume, 10000, "", 0) \ - M(Seconds, unique_acquire_write_lock_timeout, 300, "", 0) \ + M(Seconds, unique_acquire_write_lock_timeout, 300, "It has lower priority than session setting. Only when session setting is zero, use this setting", 0) \ M(MaxThreads, cnch_parallel_dumping_threads, 8, "", 0) \ M(MaxThreads, unique_table_dedup_threads, 8, "", 0) \ M(Seconds, dedup_worker_progress_log_interval, 120, "", 0) \ diff --git a/src/Storages/StorageCloudMergeTree.h b/src/Storages/StorageCloudMergeTree.h index a4e44dce276..a6984fd7d05 100644 --- a/src/Storages/StorageCloudMergeTree.h +++ b/src/Storages/StorageCloudMergeTree.h @@ -41,7 +41,7 @@ class StorageCloudMergeTree : public shared_ptr_helper, p std::string getName() const override { return "CloudMergeTree"; } - bool supportsParallelInsert() const override { return !getInMemoryMetadataPtr()->hasUniqueKey(); } + bool supportsParallelInsert() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 2336f1646f6..b87e2d9df21 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -1330,8 +1330,11 @@ void StorageCnchMergeTree::executeDedupForRepair(const ASTSystemQuery & query, C scope = CnchDedupHelper::DedupScope::TableDedupWithBucket(buckets); } - auto cnch_lock = txn->createLockHolder(CnchDedupHelper::getLocksToAcquire( - scope, txn->getTransactionID(), *this, getSettings()->unique_acquire_write_lock_timeout.value.totalMilliseconds())); + auto cnch_lock = std::make_shared( + local_context, + CnchDedupHelper::getLocksToAcquire( + scope, txn->getTransactionID(), *this, CnchDedupHelper::getWriteLockTimeout(*this, local_context))); + txn->appendLockHolder(cnch_lock); cnch_lock->lock(); TxnTimestamp ts = local_context->getTimestamp(); @@ -2439,7 +2442,7 @@ void StorageCnchMergeTree::dropPartitionOrPart( } /// else { lock all partitions } Stopwatch lock_watch; - auto cnch_lock = cur_txn->createLockHolder({std::move(partition_lock)}); + auto cnch_lock = std::make_shared(local_context, std::move(partition_lock)); cnch_lock->lock(); LOG_DEBUG(log, "DROP PARTITION acquired lock in {} ms", lock_watch.elapsedMilliseconds()); diff --git a/src/Storages/StorageCnchMergeTree.h b/src/Storages/StorageCnchMergeTree.h index f51ed0e48a7..7e71ada7a4e 100644 --- a/src/Storages/StorageCnchMergeTree.h +++ b/src/Storages/StorageCnchMergeTree.h @@ -43,7 +43,7 @@ class StorageCnchMergeTree final : public shared_ptr_helperhasUniqueKey(); } + bool supportsParallelInsert() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } diff --git a/src/Transaction/Actions/InsertAction.cpp b/src/Transaction/Actions/InsertAction.cpp index 35501bfe9a5..a763cc8a76e 100644 --- a/src/Transaction/Actions/InsertAction.cpp +++ b/src/Transaction/Actions/InsertAction.cpp @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -131,4 +134,38 @@ UInt32 InsertAction::collectNewParts(MutableMergeTreeDataPartsCNCHVector const & return size; } +void InsertAction::checkAndSetDedupMode(CnchDedupHelper::DedupMode dedup_mode_) +{ + if (dedup_mode_ >= CnchDedupHelper::DedupMode::UPSERT) + { + if (!table->getInMemoryMetadataPtr()->hasUniqueKey()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Table {} is not unique engine, but dedup mode is {}, it's a bug!", + table->getCnchStorageID().getNameForLogs(), + typeToString(dedup_mode_)); + + if (!staged_parts.empty()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Dedup mode is {}, but staged parts are not empty for table {}, it's a bug!", + table->getCnchStorageID().getNameForLogs(), + typeToString(dedup_mode_)); + + LOG_TRACE(log, "Table {} is in {} mode.", table->getCnchStorageID().getNameForLogs(), typeToString(dedup_mode_)); + } + dedup_mode = dedup_mode_; +} + +CnchDedupHelper::DedupTaskPtr InsertAction::getDedupTask() const +{ + /// If parts are empty or dedup mode is append, just skip dedup stage. + if (dedup_mode == CnchDedupHelper::DedupMode::APPEND || parts.empty()) + return nullptr; + + auto dedup_task = std::make_shared(dedup_mode, table->getCnchStorageID()); + dedup_task->new_parts = parts; + dedup_task->delete_bitmaps_for_new_parts = delete_bitmaps; + return dedup_task; +} } diff --git a/src/Transaction/Actions/InsertAction.h b/src/Transaction/Actions/InsertAction.h index d986c8c4f0d..c37bfc1c5c5 100644 --- a/src/Transaction/Actions/InsertAction.h +++ b/src/Transaction/Actions/InsertAction.h @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB { @@ -62,11 +63,16 @@ class InsertAction : public IAction UInt32 getSize() const override { return parts.size() + delete_bitmaps.size() + staged_parts.size(); } + void checkAndSetDedupMode(CnchDedupHelper::DedupMode dedup_mode_); + + CnchDedupHelper::DedupTaskPtr getDedupTask() const; + private: const StoragePtr table; MutableMergeTreeDataPartsCNCHVector parts; DeleteBitmapMetaPtrVector delete_bitmaps; MutableMergeTreeDataPartsCNCHVector staged_parts; + CnchDedupHelper::DedupMode dedup_mode = CnchDedupHelper::DedupMode::APPEND; bool executed{false}; Poco::Logger * log{&Poco::Logger::get("InsertAction")}; diff --git a/src/Transaction/CnchLock.cpp b/src/Transaction/CnchLock.cpp index 8cb99ebde23..6d7f6993b50 100644 --- a/src/Transaction/CnchLock.cpp +++ b/src/Transaction/CnchLock.cpp @@ -122,6 +122,14 @@ friend class CnchLockHolder; CnchServerClientPtr server_client; }; +CnchLockHolder::CnchLockHolder(const ContextPtr & context_, LockInfoPtr && elem) : WithContext(context_) +{ + txn_id = elem->txn_id; + assert(txn_id); + elem->setLockID(context_->getTimestamp()); + cnch_locks.push_back(std::make_unique(context_, elem)); +} + CnchLockHolder::CnchLockHolder(const ContextPtr & context_, std::vector && elems) : WithContext(context_) { assert(!elems.empty()); diff --git a/src/Transaction/CnchLock.h b/src/Transaction/CnchLock.h index 27048ad2fcd..7d7860074b4 100644 --- a/src/Transaction/CnchLock.h +++ b/src/Transaction/CnchLock.h @@ -16,6 +16,7 @@ class CnchLockHolder : private boost::noncopyable, WithContext public: friend class ICnchTransaction; explicit CnchLockHolder(const ContextPtr & context_, std::vector && elems); + explicit CnchLockHolder(const ContextPtr & context_, LockInfoPtr && elem); ~CnchLockHolder(); diff --git a/src/Transaction/CnchServerTransaction.cpp b/src/Transaction/CnchServerTransaction.cpp index bb8fc0d9c48..3ad359a76b4 100644 --- a/src/Transaction/CnchServerTransaction.cpp +++ b/src/Transaction/CnchServerTransaction.cpp @@ -26,6 +26,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace ProfileEvents { @@ -59,7 +64,7 @@ namespace ErrorCodes extern const int INSERTION_LABEL_ALREADY_EXISTS; extern const int FAILED_TO_PUT_INSERTION_LABEL; extern const int BRPC_TIMEOUT; - // extern const int BAD_CAST; + extern const int ABORTED; } CnchServerTransaction::CnchServerTransaction(const ContextPtr & context_, TransactionRecord txn_record_) @@ -146,7 +151,6 @@ TxnTimestamp CnchServerTransaction::commitV2() try { precommit(); - assertLockAcquired(); /// XXX: If a topo switch occurs during the commit phase, it may lead to parallel lock holding. /// While this problem is difficult to solve because committed transactions are not supported to be rolled back. Temporarily use the time window of topo switching to avoid this problem return commit(); @@ -185,14 +189,162 @@ void CnchServerTransaction::precommit() Stopwatch watch(CLOCK_MONOTONIC_COARSE); SCOPE_EXIT({ ProfileEvents::increment(ProfileEvents::CnchTxnPrecommitElapsedMilliseconds, watch.elapsedMilliseconds()); }); - auto lock = getLock(); - if (auto status = getStatus(); status != CnchTransactionStatus::Running) - throw Exception("Transaction is not in running status, but in " + String(txnStatusToString(status)), ErrorCodes::LOGICAL_ERROR); + { + auto lock = getLock(); + if (auto status = getStatus(); status != CnchTransactionStatus::Running) + throw Exception("Transaction is not in running status, but in " + String(txnStatusToString(status)), ErrorCodes::LOGICAL_ERROR); + + for (auto & action : actions) + action->executeV2(); + + txn_record.prepared = true; + action_size_before_dedup = actions.size(); + } + + auto retry_time = getContext()->getSettingsRef().max_dedup_retry_time.value; + do + { + try + { + executeDedupStage(); + assertLockAcquired(); + } + catch (...) + { + if (retry_time == 0) + throw; + else if (action_size_before_dedup < actions.size()) + { + /// TODO: Impl retry in this case, especially handle undo buffer + LOG_WARNING( + log, + "Dedup stage failed, but result is not empty({}/{}), unable to retry, retry time: {}", + actions.size(), + action_size_before_dedup, + retry_time); + throw; + } + else + { + LOG_WARNING(log, "Dedup stage failed, retry time: {}, reason: {}", retry_time, getCurrentExceptionMessage(false)); + retry_time--; + dedup_stage_flag = false; + continue; + } + } + break; + } while (true); +} + +void CnchServerTransaction::executeDedupStage() +{ + auto expected_value = false; + if (!dedup_stage_flag.compare_exchange_strong(expected_value, true)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Execute dedup stage concurrently which may lead to dirty dedup result, it's bug for current impl."); + + /// Currently, lock holder only serve for dedup stage for unique table, we can just clear it in case of retry. + lock_holders.clear(); + std::unordered_map dedup_task_map; + for (size_t i = 0 ; i < action_size_before_dedup; ++i) + { + auto & action = actions[i]; + if (auto * insert_action = dynamic_cast(action.get())) + { + auto dedup_task = insert_action->getDedupTask(); + if (dedup_task) + { + if (dedup_task_map.count(dedup_task->storage_id)) + { + auto & final_dedup_info = dedup_task_map[dedup_task->storage_id]; + final_dedup_info->new_parts.insert( + final_dedup_info->new_parts.end(), dedup_task->new_parts.begin(), dedup_task->new_parts.end()); + final_dedup_info->delete_bitmaps_for_new_parts.insert( + final_dedup_info->delete_bitmaps_for_new_parts.end(), + dedup_task->delete_bitmaps_for_new_parts.begin(), + dedup_task->delete_bitmaps_for_new_parts.end()); + } + else + dedup_task_map[dedup_task->storage_id] = std::move(dedup_task); + } + } + } + + if (dedup_task_map.empty()) + return; + + Stopwatch watch; + auto txn_id = getTransactionID(); + if (dedup_task_map.size() > 1) + LOG_TRACE(log, "Start handle dedup stage for {} tables, txn id: {}", dedup_task_map.size(), txn_id.toUInt64()); + + auto handler = std::make_shared(); + std::vector call_ids; + for (auto & it : dedup_task_map) + { + Stopwatch total_task_watch; + const auto & storage_id = it.first; + auto & dedup_task = it.second; + if (!dedup_task) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Dedup task for table {} is nullptr, this is a bug!", storage_id.getNameForLogs()); + + if (dedup_task->new_parts.empty()) + continue; + + LOG_TRACE( + log, + "Start handle dedup stage for table {}, part size: {}, delete bitmap size: {}, txn: {}", + storage_id.getNameForLogs(), + dedup_task->new_parts.size(), + dedup_task->delete_bitmaps_for_new_parts.size(), + txn_id.toUInt64()); + + auto catalog = getContext()->getCnchCatalog(); + TxnTimestamp ts = getContext()->getTimestamp(); + auto table = catalog->tryGetTableByUUID(*getContext(), UUIDHelpers::UUIDToString(dedup_task->storage_id.uuid), ts); + if (!table) + throw Exception(ErrorCodes::ABORTED, "Table {} has been dropped", dedup_task->storage_id.getNameForLogs()); + auto cnch_table = dynamic_pointer_cast(table); + if (!cnch_table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} is not cnch merge tree", dedup_task->storage_id.getNameForLogs()); + + /// 1. Acquire lock and fill task + CnchDedupHelper::acquireLockAndFillDedupTask(*cnch_table, *dedup_task, *this, getContext()); + /// 2. Random pick one worker to execute dedup task + auto vw_handle = getContext()->getVirtualWarehousePool().get(cnch_table->getSettings()->cnch_vw_write); + auto worker_client = vw_handle->getWorker(); + LOG_DEBUG(log, "Choose worker: {} to execute dedup task for txn {}", worker_client->getHostWithPorts().toDebugString(), txn_id.toUInt64()); + + /// 3. Execute dedup task and wait result + Stopwatch inner_watch; + auto funcOnCallback = [&, dedup_task, inner_watch, pre_cost = total_task_watch.elapsedMilliseconds()](bool success) { + dedup_task->statistics.execute_task_cost = inner_watch.elapsedMilliseconds(); + dedup_task->statistics.total_cost = dedup_task->statistics.execute_task_cost + pre_cost; + dedup_task->statistics.other_cost = pre_cost - dedup_task->statistics.acquire_lock_cost - dedup_task->statistics.get_metadata_cost; + LOG_DEBUG( + log, + "{} handle dedup stage for table {}, part size: {}, delete bitmap size: {}, txn id: {}, statistics: {}", + success ? "Finish" : "Failed", + dedup_task->storage_id.getNameForLogs(), + dedup_task->new_parts.size(), + dedup_task->delete_bitmaps_for_new_parts.size(), + txn_id.toUInt64(), + dedup_task->statistics.toString()); + }; + auto call_id + = worker_client->executeDedupTask(getContext(), txn_id, getContext()->getRPCPort(), *cnch_table, *dedup_task, handler, funcOnCallback); + call_ids.emplace_back(call_id); + } + + /// 4. Wait result + for (auto & call_id : call_ids) + brpc::Join(call_id); - for (auto & action : actions) - action->executeV2(); + handler->throwIfException(); - txn_record.prepared = true; + if (dedup_task_map.size() > 1) + LOG_TRACE(log, "Finish handle dedup stage for {} tables, total cost {} ms, txn id: {}", dedup_task_map.size(), watch.elapsedMilliseconds(), txn_id.toUInt64()); } TxnTimestamp CnchServerTransaction::commit() diff --git a/src/Transaction/CnchServerTransaction.h b/src/Transaction/CnchServerTransaction.h index 821a0fe57e7..e93fae11c3a 100644 --- a/src/Transaction/CnchServerTransaction.h +++ b/src/Transaction/CnchServerTransaction.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,9 @@ class CnchServerTransaction : public ICnchTransaction void removeIntermediateData() override; void incrementModifiedCount(const Statistics::AutoStats::ModifiedCounter& new_counts); + + Poco::Logger * getLogger() { return log; } + protected: static constexpr size_t MAX_RETRY = 3; std::vector actions; @@ -73,6 +77,12 @@ class CnchServerTransaction : public ICnchTransaction Poco::Logger * log {&Poco::Logger::get("CnchServerTransaction")}; + std::atomic_bool dedup_stage_flag{false}; + + size_t action_size_before_dedup = 0; + + void executeDedupStage(); + }; using CnchServerTransactionPtr = std::shared_ptr; diff --git a/src/Transaction/CnchWorkerTransaction.cpp b/src/Transaction/CnchWorkerTransaction.cpp index d704cc99e30..d1308c46fb2 100644 --- a/src/Transaction/CnchWorkerTransaction.cpp +++ b/src/Transaction/CnchWorkerTransaction.cpp @@ -118,10 +118,14 @@ void CnchWorkerTransaction::precommit() if (auto status = getStatus(); status != CnchTransactionStatus::Running) throw Exception("Cannot precommit a transaction that is " + String(txnStatusToString(status)), ErrorCodes::LOGICAL_ERROR); checkServerClient(); - auto lock = getLock(); - server_client->precommitTransaction(getTransactionID(), getMainTableUUID()); - txn_record.prepared = true; - LOG_DEBUG(log, "Transaction {} successfully finished pre commit."); + { + auto lock = getLock(); + server_client->precommitTransaction(getContext(), getTransactionID(), getMainTableUUID()); + txn_record.prepared = true; + } + + assertLockAcquired(); + LOG_DEBUG(log, "Transaction {} successfully finished pre commit.", txn_record.txnID().toUInt64()); } TxnTimestamp CnchWorkerTransaction::commit() @@ -188,7 +192,6 @@ TxnTimestamp CnchWorkerTransaction::commitV2() try { - assertLockAcquired(); /// XXX: If a topo switch occurs during the commit phase, it may lead to parallel lock holding. /// While this problem is difficult to solve because committed transactions are not supported to be rolled back. Temporarily use the time window of topo switching to avoid this problem return commit(); diff --git a/src/Transaction/CnchWorkerTransaction.h b/src/Transaction/CnchWorkerTransaction.h index f141666bf8b..87a57fcb2e9 100644 --- a/src/Transaction/CnchWorkerTransaction.h +++ b/src/Transaction/CnchWorkerTransaction.h @@ -84,6 +84,8 @@ class CnchWorkerTransaction : public ICnchTransaction throw Exception("abort is not supported for " + getTxnType(), ErrorCodes::NOT_IMPLEMENTED); } + void setIsInitiator(bool is_initiator_) { is_initiator = is_initiator_; } + private: void checkServerClient() const; diff --git a/src/Transaction/ICnchTransaction.cpp b/src/Transaction/ICnchTransaction.cpp index e96b71efcef..400abf26b3e 100644 --- a/src/Transaction/ICnchTransaction.cpp +++ b/src/Transaction/ICnchTransaction.cpp @@ -59,30 +59,19 @@ void ICnchTransaction::setTransactionRecord(TransactionRecord record) txn_record = std::move(record); } -std::shared_ptr ICnchTransaction::createLockHolder(std::vector && elems) +void ICnchTransaction::appendLockHolder(CnchLockHolderPtr & lock_holder) { - // if (lock_holder.has_value()) - // throw Exception("Invalid operation, should only acquired lock once", ErrorCodes::LOGICAL_ERROR); - /// TODO: should avoid acquired lock multiple time - auto holder = std::make_shared(global_context, std::move(elems)); - lock_holder = holder; - return holder; + auto lock = getLock(); + lock_holders.emplace_back(lock_holder); } void ICnchTransaction::assertLockAcquired() const { /// threadsafe - if (auto impl = lock_holder.lock()) - { - impl->assertLockAcquired(); - } + for (const auto & lock_holder: lock_holders) + lock_holder->assertLockAcquired(); } -// IntentLockPtr ICnchTransaction::createIntentLock(const LockEntity & entity, const Strings & intent_names) -// { -// return std::make_unique(context, getTransactionRecord(), entity, intent_names); -// } - void ICnchTransaction::setKafkaTpl(const String & consumer_group_, const cppkafka::TopicPartitionList & tpl_) { this->consumer_group = consumer_group_; diff --git a/src/Transaction/ICnchTransaction.h b/src/Transaction/ICnchTransaction.h index 414006c2853..2394aaf0b5b 100644 --- a/src/Transaction/ICnchTransaction.h +++ b/src/Transaction/ICnchTransaction.h @@ -50,6 +50,8 @@ namespace DB { struct TxnCleanTask; class CnchLockHolder; +using CnchLockHolderPtr = std::shared_ptr; +using CnchLockHolderPtrs = std::vector; namespace ErrorCodes { @@ -217,7 +219,7 @@ class ICnchTransaction : public std::enable_shared_from_this, // Clean intermediate parts synchronously virtual void removeIntermediateData() { } - std::shared_ptr createLockHolder(std::vector && elems); + void appendLockHolder(CnchLockHolderPtr & lock_holder); bool force_clean_by_dm = false; @@ -237,7 +239,6 @@ class ICnchTransaction : public std::enable_shared_from_this, void setStatus(CnchTransactionStatus status); void setTransactionRecord(TransactionRecord record); void assertLockAcquired() const; - void setLockHolder(std::shared_ptr p) { lock_holder = p; } /// Clean CurrentlyMergingPartsTagger for merge txn after the txn finished. void tryCleanMergeTagger(); @@ -263,7 +264,7 @@ class ICnchTransaction : public std::enable_shared_from_this, #endif InsertionLabelPtr insertion_label; - std::weak_ptr lock_holder; + CnchLockHolderPtrs lock_holders; /// Currently it only serve for unique dedup stage std::vector extern_commit_functions; diff --git a/src/WorkerTasks/CloudUniqueMergeTreeMergeTask.cpp b/src/WorkerTasks/CloudUniqueMergeTreeMergeTask.cpp index 3e6438332fd..f52283dbb20 100644 --- a/src/WorkerTasks/CloudUniqueMergeTreeMergeTask.cpp +++ b/src/WorkerTasks/CloudUniqueMergeTreeMergeTask.cpp @@ -272,7 +272,7 @@ void CloudUniqueMergeTreeMergeTask::executeImpl() if (locks_to_acquire.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge task {} acquires more than one lock.", params.task_id); String lock_debug_info = locks_to_acquire[0]->toDebugString(); - cnch_lock = txn->createLockHolder(std::move(locks_to_acquire)); + cnch_lock = std::make_shared(getContext(), std::move(locks_to_acquire)); while (num_try--) { LOG_TRACE(log, "Try lock: {}", lock_debug_info); @@ -319,6 +319,8 @@ void CloudUniqueMergeTreeMergeTask::executeImpl() if (!lock_success) throw Exception("Failed to acquire lock for merge task " + params.task_id, ErrorCodes::ABORTED); + txn->appendLockHolder(cnch_lock); + lock_watch.restart(); /// there may be new deletes before we acquired the lock since last update, handle them here diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.reference b/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.reference index ed81cc35f2c..f1db849473d 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.reference @@ -7,3 +7,14 @@ TableFinish Est. ? rows │ Expressions: [id, k, p_date] └─ TableScan test.test_insert_bucket_source Est. ? rows Outputs: [p_date, id, k] + +test unique table +TableFinish Est. ? rows +└─ Gather Exchange Est. ? rows + └─ TableWrite Est. ? rows + │ Insert test.test_unique_insert_bucket + └─ Local Exchange Est. ? rows + └─ Projection Est. ? rows + │ Expressions: [id, k, p_date] + └─ TableScan test.test_insert_bucket_source Est. ? rows + Outputs: [p_date, id, k] diff --git a/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.sql b/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.sql index 1b4bbb3da9b..32e9b03d8b3 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/48035_insert_select_no_gather.sql @@ -34,3 +34,30 @@ INSERT INTO test_insert_bucket SELECT id, k FROM test_insert_bucket_source; + +select ''; +select 'test unique table'; +DROP TABLE IF EXISTS test_unique_insert_bucket; + +CREATE TABLE test_unique_insert_bucket +( + `p_date` Date, + `id` Int32, + `k` Int32 +) +ENGINE = CnchMergeTree +PARTITION BY p_date +UNIQUE KEY id +CLUSTER BY id INTO 1 BUCKETS +ORDER BY id; + +EXPLAIN +INSERT INTO test_unique_insert_bucket SELECT + p_date, + id, + k +FROM test_insert_bucket_source; + +DROP TABLE IF EXISTS test_insert_bucket; +DROP TABLE IF EXISTS test_unique_insert_bucket; +DROP TABLE IF EXISTS test_insert_bucket_source; From 62aff06954a3e4dd06edc0aff8180bc36ec4d12b Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 04:15:23 +0000 Subject: [PATCH 093/292] Merge branch 'cherry-pick-fda414cd' into 'cnch-2.2' feat(clickhousech@m-4172455166): [To cnch2.2] introduce send_cacheable_table_definitions See merge request dp/ClickHouse!23021 --- src/CloudServices/CnchCreateQueryHelper.cpp | 118 ++++++++-- src/CloudServices/CnchCreateQueryHelper.h | 30 ++- src/CloudServices/CnchServerResource.cpp | 39 +++- src/CloudServices/CnchServerResource.h | 21 +- src/CloudServices/CnchWorkerClient.cpp | 22 +- src/CloudServices/CnchWorkerResource.cpp | 221 +++++++++++------- src/CloudServices/CnchWorkerResource.h | 15 +- src/CloudServices/CnchWorkerServiceImpl.cpp | 33 ++- src/CloudServices/DedupWorkerManager.cpp | 16 +- src/Common/ProfileEvents.cpp | 5 + src/Core/Settings.h | 1 + src/Databases/DatabasesCommon.cpp | 4 +- src/FormaterTool/PartMergerImpl.cpp | 11 +- src/Interpreters/AsynchronousMetrics.cpp | 8 + src/Interpreters/Context.cpp | 16 ++ src/Interpreters/Context.h | 4 + src/MergeTreeCommon/CnchStorageCommon.cpp | 69 +----- src/MergeTreeCommon/CnchStorageCommon.h | 20 +- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 73 +++--- src/MergeTreeCommon/MergeTreeMetaBase.h | 9 +- .../gtest_create_query_for_cloud_table.cpp | 5 - src/Protos/cnch_worker_rpc.proto | 23 +- .../MergeTree/CloudTableDefinitionCache.h | 43 ++++ src/Storages/MergeTree/MergeTreeCloudData.cpp | 3 +- src/Storages/MergeTree/MergeTreeCloudData.h | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 3 + .../MergeTree/registerStorageMergeTree.cpp | 3 +- src/Storages/StorageCloudMergeTree.cpp | 20 +- src/Storages/StorageCloudMergeTree.h | 7 +- src/Storages/StorageCnchMergeTree.cpp | 21 +- src/Storages/StorageDictCloudMergeTree.cpp | 3 - src/Storages/StorageDictCloudMergeTree.h | 3 +- src/Transaction/CnchServerTransaction.cpp | 1 + 33 files changed, 579 insertions(+), 292 deletions(-) create mode 100644 src/Storages/MergeTree/CloudTableDefinitionCache.h diff --git a/src/CloudServices/CnchCreateQueryHelper.cpp b/src/CloudServices/CnchCreateQueryHelper.cpp index 8d660cc7c7d..faed0b9d04a 100644 --- a/src/CloudServices/CnchCreateQueryHelper.cpp +++ b/src/CloudServices/CnchCreateQueryHelper.cpp @@ -15,12 +15,15 @@ #include +#include #include #include #include #include +#include #include #include +#include #include #include #include @@ -32,6 +35,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int DUPLICATE_COLUMN; + extern const int INCORRECT_QUERY; +} + std::shared_ptr getASTCreateQueryFromString(const String & query, const ContextPtr & context) { ParserCreateQuery parser_create; @@ -51,6 +60,77 @@ std::shared_ptr getASTCreateQueryFromStorage(const IStorage & st return getASTCreateQueryFromString(storage.getCreateTableSql(), context); } +StoragePtr createStorageFromQuery(ASTCreateQuery & create_query, ContextMutablePtr context) +{ + ColumnsDescription columns; + IndicesDescription indices; + ConstraintsDescription constraints; + ForeignKeysDescription foreign_keys; + UniqueNotEnforcedDescription unique_not_enforced; + + if (create_query.columns_list) + { + if (create_query.columns_list->columns) + { + // Set attach = true to avoid making columns nullable due to ANSI settings, because the dialect change + // should NOT affect existing tables. + columns = InterpreterCreateQuery::getColumnsDescription(*create_query.columns_list->columns, context, /* attach= */ true); + } + + if (create_query.columns_list->indices) + for (const auto & index : create_query.columns_list->indices->children) + indices.push_back(IndexDescription::getIndexFromAST(index->clone(), columns, context)); + + if (create_query.columns_list->constraints) + for (const auto & constraint : create_query.columns_list->constraints->children) + constraints.constraints.push_back(std::dynamic_pointer_cast(constraint->clone())); + + if (create_query.columns_list->foreign_keys) + for (const auto & foreign_key : create_query.columns_list->foreign_keys->children) + foreign_keys.foreign_keys.push_back(std::dynamic_pointer_cast(foreign_key->clone())); + + if (create_query.columns_list->unique) + for (const auto & unique : create_query.columns_list->unique->children) + unique_not_enforced.unique.push_back(std::dynamic_pointer_cast(unique->clone())); + } + else + throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); + + /// Even if query has list of columns, canonicalize it (unfold Nested columns). + ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns, ParserSettings::valueOf(context->getSettingsRef())); + ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); + ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(constraints); + ASTPtr new_foreign_keys = InterpreterCreateQuery::formatForeignKeys(foreign_keys); + ASTPtr new_unique_not_enforced = InterpreterCreateQuery::formatUnique(unique_not_enforced); + + if (create_query.columns_list->columns) + create_query.columns_list->replace(create_query.columns_list->columns, new_columns); + + if (create_query.columns_list->indices) + create_query.columns_list->replace(create_query.columns_list->indices, new_indices); + + if (create_query.columns_list->constraints) + create_query.columns_list->replace(create_query.columns_list->constraints, new_constraints); + + if (create_query.columns_list->foreign_keys) + create_query.columns_list->replace(create_query.columns_list->foreign_keys, new_foreign_keys); + + if (create_query.columns_list->unique) + create_query.columns_list->replace(create_query.columns_list->unique, new_unique_not_enforced); + + /// Check for duplicates + std::set all_columns; + for (const auto & column : columns) + { + if (!all_columns.emplace(column.name).second) + throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); + } + + /// Table constructing + return StorageFactory::instance().get(create_query, "", context, context->getGlobalContext(), columns, constraints, foreign_keys, unique_not_enforced, false); +} + +/// TODO: impl based on createStorageFromQuery(create_query, context) ? StoragePtr createStorageFromQuery(const String & query, const ContextPtr & context) { auto ast = getASTCreateQueryFromString(query, context); @@ -90,23 +170,35 @@ StoragePtr createStorageFromQuery(const String & query, const ContextPtr & conte false /*has_force_restore_data_flag*/); } -void replaceCnchWithCloud(ASTCreateQuery & create_query, const String & new_table_name, const String & cnch_db, const String & cnch_table) +void replaceCnchWithCloud( + ASTStorage * storage, + const String & cnch_database, + const String & cnch_table, + WorkerEngineType engine_type, + const Strings & engine_args) { - if (!new_table_name.empty()) - create_query.table = new_table_name; - - auto * storage = create_query.storage; + if (!startsWith(storage->engine->name, "Cnch")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expect Cnch-family Engine but got {}", storage->engine->name); auto engine = std::make_shared(); - if (auto pos = storage->engine->name.find("Cnch"); pos != std::string::npos) - engine->name = String(storage->engine->name).replace(pos, strlen("Cnch"), "Cloud"); - + engine->name = storage->engine->name.replace(0, strlen("Cnch"), toString(engine_type)); engine->arguments = std::make_shared(); - engine->arguments->children.push_back(std::make_shared(cnch_db)); - engine->arguments->children.push_back(std::make_shared(cnch_table)); - if (storage->unique_key && storage->engine->arguments && storage->engine->arguments->children.size()) - /// NOTE: Used to pass the version column for unique table here. - engine->arguments->children.push_back(storage->engine->arguments->children[0]); + engine->arguments->children.emplace_back(std::make_shared(cnch_database)); + engine->arguments->children.emplace_back(std::make_shared(cnch_table)); + if (!engine_args.empty()) + { + for (const auto & arg : engine_args) + { + engine->arguments->children.emplace_back(std::make_shared(arg)); + } + } + else if (storage->engine->arguments) + { + for (const auto & arg : storage->engine->arguments->children) + { + engine->arguments->children.push_back(arg); + } + } storage->set(storage->engine, engine); } diff --git a/src/CloudServices/CnchCreateQueryHelper.h b/src/CloudServices/CnchCreateQueryHelper.h index fe8d5afa12d..e64371ef9e3 100644 --- a/src/CloudServices/CnchCreateQueryHelper.h +++ b/src/CloudServices/CnchCreateQueryHelper.h @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -23,8 +24,27 @@ namespace DB { +/// used in worker RPC, don't break backward compatibility +enum class WorkerEngineType : uint8_t +{ + CLOUD = 0, // CloudMergeTree + DICT = 1, // DictCloudMergeTree (for BitEngine dict table) +}; + +inline static String toString(WorkerEngineType type) +{ + switch (type) + { + case WorkerEngineType::CLOUD: + return "Cloud"; + case WorkerEngineType::DICT: + return "DictCloud"; + } +} + class ASTCreateQuery; class ASTSetQuery; +class ASTStorage; /// see Databases/DatabaseOnDisk.h extern String getObjectDefinitionFromCreateQuery(const ASTPtr & query, std::optional attach); @@ -32,9 +52,17 @@ extern String getObjectDefinitionFromCreateQuery(const ASTPtr & query, std::opti std::shared_ptr getASTCreateQueryFromString(const String & query, const ContextPtr & context); std::shared_ptr getASTCreateQueryFromStorage(const IStorage & storage, const ContextPtr & context); +StoragePtr createStorageFromQuery(ASTCreateQuery & create_query, ContextMutablePtr context); StoragePtr createStorageFromQuery(const String & query, const ContextPtr & context); -void replaceCnchWithCloud(ASTCreateQuery & create_query, const String & new_table_name, const String & cnch_db, const String & cnch_table); +/// change storage engine from Cnch-family to Cloud-family +/// TODO: can we get rid of engine_args? +void replaceCnchWithCloud( + ASTStorage * storage, + const String & cnch_database, + const String & cnch_table, + WorkerEngineType engine_type = WorkerEngineType::CLOUD, + const Strings & engine_args = {}); void modifyOrAddSetting(ASTSetQuery & set_query, const String & name, Field value); void modifyOrAddSetting(ASTCreateQuery & create_query, const String & name, Field value); diff --git a/src/CloudServices/CnchServerResource.cpp b/src/CloudServices/CnchServerResource.cpp index 9bd1c3800ed..35f9270da1c 100644 --- a/src/CloudServices/CnchServerResource.cpp +++ b/src/CloudServices/CnchServerResource.cpp @@ -55,13 +55,12 @@ AssignedResource::AssignedResource(const StoragePtr & storage_) : storage(storag AssignedResource::AssignedResource(AssignedResource && resource) { storage = resource.storage; - worker_table_name = resource.worker_table_name; - create_table_query = resource.create_table_query; + table_version = resource.table_version; + table_definition = resource.table_definition; sent_create_query = resource.sent_create_query; bucket_numbers = resource.bucket_numbers; replicated = resource.replicated; - table_version = resource.table_version; server_parts = std::move(resource.server_parts); hive_parts = std::move(resource.hive_parts); file_parts = std::move(resource.file_parts); @@ -223,8 +222,35 @@ void CnchServerResource::addCreateQuery( if (it == assigned_table_resource.end()) it = assigned_table_resource.emplace(storage->getStorageUUID(), AssignedResource{storage}).first; - it->second.create_table_query = create_query; - it->second.worker_table_name = worker_table_name; + it->second.table_definition.definition = create_query; + it->second.table_definition.local_table_name = worker_table_name; + it->second.table_definition.cacheable = false; +} + +void CnchServerResource::addCacheableCreateQuery( + const StoragePtr & storage, + const String & worker_table_name, + WorkerEngineType engine_type, + String underlying_dictionary_tables) +{ + auto uuid = storage->getStorageUUID(); + if (uuid == UUIDHelpers::Nil) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add definition for {} : UUID is empty", storage->getStorageID().getNameForLogs()); + + auto lock = getLock(); + + auto it = assigned_table_resource.find(uuid); + if (it == assigned_table_resource.end()) + it = assigned_table_resource.emplace(uuid, AssignedResource{storage}).first; + + it->second.table_definition = TableDefinitionResource { + storage->getCreateTableSql(), + worker_table_name, + /*cacheable=*/ true, + engine_type, + underlying_dictionary_tables + }; } void CnchServerResource::setTableVersion(const UUID & storage_uuid, const UInt64 table_version) @@ -578,8 +604,7 @@ void CnchServerResource::allocateResource( worker_resource.addDataParts(assigned_file_parts); worker_resource.sent_create_query = resource.sent_create_query; worker_resource.table_version = resource.table_version; - worker_resource.create_table_query = resource.create_table_query; - worker_resource.worker_table_name = resource.worker_table_name; + worker_resource.table_definition = resource.table_definition; worker_resource.object_columns = resource.object_columns; } } diff --git a/src/CloudServices/CnchServerResource.h b/src/CloudServices/CnchServerResource.h index 5285a8d1ea1..ae38ab84372 100644 --- a/src/CloudServices/CnchServerResource.h +++ b/src/CloudServices/CnchServerResource.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +70,17 @@ struct SendLock ServerResourceLockManager & manager; }; +struct TableDefinitionResource +{ + /// if cacheable == 0, it's the rewrited table definition for worker; + /// otherwise, it's the original definition for cnch table + String definition; + String local_table_name; + bool cacheable = false; + WorkerEngineType engine_type = WorkerEngineType::CLOUD; + String underlying_dictionary_tables; // local dictionary table names for bitengine +}; + struct AssignedResource { explicit AssignedResource(const StoragePtr & storage); @@ -77,8 +89,7 @@ struct AssignedResource StoragePtr storage; UInt64 table_version{0}; //send table version instead of parts if set - String worker_table_name; - String create_table_query; + TableDefinitionResource table_definition; bool sent_create_query{false}; bool replicated{false}; @@ -131,6 +142,12 @@ class CnchServerResource const String & worker_table_name, bool create_local_table = true); + void addCacheableCreateQuery( + const StoragePtr & storage, + const String & worker_table_name, + WorkerEngineType engine_type, + String underlying_dictionary_tables); + void setTableVersion(const UUID & storage_uuid, const UInt64 table_version); void setAggregateWorker(HostWithPorts aggregate_worker_) { aggregate_worker = std::move(aggregate_worker_); } diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp index 661e73afe68..9bc151d4111 100644 --- a/src/CloudServices/CnchWorkerClient.cpp +++ b/src/CloudServices/CnchWorkerClient.cpp @@ -420,8 +420,24 @@ brpc::CallId CnchWorkerClient::sendResources( { if (!resource.sent_create_query) { - request.add_create_queries(resource.create_table_query); - request.add_dynamic_object_column_schema(resource.object_columns.toString()); + const auto & def = resource.table_definition; + if (resource.table_definition.cacheable) + { + auto * cacheable = request.add_cacheable_create_queries(); + RPCHelpers::fillStorageID(resource.storage->getStorageID(), *cacheable->mutable_storage_id()); + cacheable->set_definition(def.definition); + if (!resource.object_columns.empty()) + cacheable->set_dynamic_object_column_schema(resource.object_columns.toString()); + cacheable->set_local_engine_type(static_cast(def.engine_type)); + cacheable->set_local_table_name(def.local_table_name); + if (!def.underlying_dictionary_tables.empty()) + cacheable->set_local_underlying_dictionary_tables(def.underlying_dictionary_tables); + } + else + { + request.add_create_queries(def.definition); + request.add_dynamic_object_column_schema(resource.object_columns.toString()); + } } /// parts @@ -442,7 +458,7 @@ brpc::CallId CnchWorkerClient::sendResources( } table_data_parts.set_database(resource.storage->getDatabaseName()); - table_data_parts.set_table(resource.worker_table_name); + table_data_parts.set_table(resource.table_definition.local_table_name); if (resource.table_version) { require_worker_info = true; diff --git a/src/CloudServices/CnchWorkerResource.cpp b/src/CloudServices/CnchWorkerResource.cpp index b101932f3df..bc42995bfc1 100644 --- a/src/CloudServices/CnchWorkerResource.cpp +++ b/src/CloudServices/CnchWorkerResource.cpp @@ -16,23 +16,23 @@ #include #include +#include #include #include #include #include -#include #include -#include +#include +#include #include -#include -#include +#include #include -#include +#include #include -#include #include -#include -#include +#include +#include +#include namespace DB @@ -40,122 +40,142 @@ namespace DB namespace ErrorCodes { - extern const int DUPLICATE_COLUMN; - extern const int INCORRECT_QUERY; + extern const int BAD_ARGUMENTS; extern const int TABLE_ALREADY_EXISTS; } -void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists, const ColumnsDescription & object_columns) +static ASTPtr parseCreateQuery(ContextMutablePtr context, const String & create_query) { - LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "start create cloud table {}", create_query); const char * begin = create_query.data(); const char * end = create_query.data() + create_query.size(); ParserQueryWithOutput parser{end}; const auto & settings = context->getSettingsRef(); - ASTPtr ast_query = parseQuery(parser, begin, end, "CreateCloudTable", settings.max_query_size, settings.max_parser_depth); + return parseQuery(parser, begin, end, "CreateCloudTable", settings.max_query_size, settings.max_parser_depth); +} + +void CnchWorkerResource::executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists, const ColumnsDescription & object_columns) +{ + LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "start create cloud table {}", create_query); + auto ast_query = parseCreateQuery(context, create_query); auto & ast_create_query = ast_query->as(); /// set query settings + /// TODO: can we remove this? i.e., don't rely on create query to pass query setting if (ast_create_query.settings_ast) InterpreterSetQuery(ast_create_query.settings_ast, context).executeForCurrentContext(); + auto res = createStorageFromQuery(ast_create_query, context); + if (auto cloud_table = std::dynamic_pointer_cast(res)) + cloud_table->resetObjectColumns(object_columns); + res->startup(); + + bool throw_if_exists = !ast_create_query.if_not_exists && !skip_if_exists; const auto & database_name = ast_create_query.database; // not empty. const auto & table_name = ast_create_query.table; String tenant_db = formatTenantDatabaseName(database_name); - { - auto lock = getLock(); - if (cloud_tables.find({tenant_db, table_name}) != cloud_tables.end()) - { - if (ast_create_query.if_not_exists || skip_if_exists) - return; - else - throw Exception("Table " + tenant_db + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); - } - } + insertCloudTable({tenant_db, table_name}, res, context, throw_if_exists); +} - ColumnsDescription columns; - IndicesDescription indices; - ConstraintsDescription constraints; - ForeignKeysDescription foreign_keys; - UniqueNotEnforcedDescription unique_not_enforced; +void CnchWorkerResource::executeCacheableCreateQuery( + ContextMutablePtr context, + const StorageID & cnch_storage_id, + const String & definition, + const String & local_table_name, + WorkerEngineType engine_type, + const String & underlying_dictionary_tables, + const ColumnsDescription & object_columns) +{ + static auto * log = &Poco::Logger::get("WorkerResource"); - if (ast_create_query.columns_list) + std::shared_ptr cached; + if (auto cache = context->tryGetCloudTableDefinitionCache()) { - if (ast_create_query.columns_list->columns) + auto load = [&]() -> std::shared_ptr { - // Set attach = true to avoid making columns nullable due to ANSI settings, because the dialect change - // should NOT affect existing tables. - columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, /* attach= */ true); - } - - if (ast_create_query.columns_list->indices) - for (const auto & index : ast_create_query.columns_list->indices->children) - indices.push_back(IndexDescription::getIndexFromAST(index->clone(), columns, context)); - - if (ast_create_query.columns_list->constraints) - for (const auto & constraint : ast_create_query.columns_list->constraints->children) - constraints.constraints.push_back(std::dynamic_pointer_cast(constraint->clone())); - - if (ast_create_query.columns_list->foreign_keys) - for (const auto & foreign_key : ast_create_query.columns_list->foreign_keys->children) - foreign_keys.foreign_keys.push_back(std::dynamic_pointer_cast(foreign_key->clone())); - - if (ast_create_query.columns_list->unique) - for (const auto & unique : ast_create_query.columns_list->unique->children) - unique_not_enforced.unique.push_back(std::dynamic_pointer_cast(unique->clone())); + auto ast_query = parseCreateQuery(context, definition); + auto & create_query = ast_query->as(); + + replaceCnchWithCloud( + create_query.storage, + cnch_storage_id.getDatabaseName(), + cnch_storage_id.getTableName(), + engine_type); + + auto table = createStorageFromQuery(create_query, context); + if (auto cloud_table = std::dynamic_pointer_cast(table)) + return cloud_table; + return {}; + }; + + cached = cache->getOrSet(CloudTableDefinitionCache::hash(definition), std::move(load)).first; } - else - throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); - /// Even if query has list of columns, canonicalize it (unfold Nested columns). - ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns, ParserSettings::valueOf(context->getSettingsRef())); - ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices); - ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(constraints); - ASTPtr new_foreign_keys = InterpreterCreateQuery::formatForeignKeys(foreign_keys); - ASTPtr new_unique_not_enforced = InterpreterCreateQuery::formatUnique(unique_not_enforced); + StoragePtr res; + if (cached) + { + LOG_DEBUG(log, "Creating cloud table {} from cached template of definition {}", local_table_name, definition); + StorageID actual_table_id = cached->getStorageID(); + actual_table_id.table_name = local_table_name; - if (ast_create_query.columns_list->columns) - ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); + std::unique_ptr new_settings = std::make_unique(*cached->getSettings()); + if (!underlying_dictionary_tables.empty()) + new_settings->underlying_dictionary_tables = underlying_dictionary_tables; - if (ast_create_query.columns_list->indices) - ast_create_query.columns_list->replace(ast_create_query.columns_list->indices, new_indices); + switch (engine_type) + { + case WorkerEngineType::CLOUD: + res = StorageCloudMergeTree::create( + actual_table_id, + cnch_storage_id.database_name, + cnch_storage_id.table_name, + *cached->getInMemoryMetadataPtr(), + context, + /*date_column_name*/ "", + cached->getMergingParams(), + std::move(new_settings)); + break; + case WorkerEngineType::DICT: + /// NOTE: StorageDictCloudMergeTree::create is broken, don't use it + res = std::make_shared( + actual_table_id, + cnch_storage_id.database_name, + cnch_storage_id.table_name, + *cached->getInMemoryMetadataPtr(), + context, + /*date_column_name*/ "", + cached->getMergingParams(), + std::move(new_settings)); + break; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown value for engine_type: {}", static_cast(engine_type)); + } - if (ast_create_query.columns_list->constraints) - ast_create_query.columns_list->replace(ast_create_query.columns_list->constraints, new_constraints); + } + else /// for cloud table other than CloudMergeTree. e.g., CloudS3, CloudHive, ... + { + auto ast_query = parseCreateQuery(context, definition); + auto & create_query = ast_query->as(); - if (ast_create_query.columns_list->foreign_keys) - ast_create_query.columns_list->replace(ast_create_query.columns_list->foreign_keys, new_foreign_keys); + replaceCnchWithCloud( + create_query.storage, + cnch_storage_id.getDatabaseName(), + cnch_storage_id.getTableName(), + engine_type); - if (ast_create_query.columns_list->unique) - ast_create_query.columns_list->replace(ast_create_query.columns_list->unique, new_unique_not_enforced); + create_query.table = local_table_name; + if (!underlying_dictionary_tables.empty()) + modifyOrAddSetting(create_query, "underlying_dictionary_tables", Field(underlying_dictionary_tables)); - /// Check for duplicates - std::set all_columns; - for (const auto & column : columns) - { - if (!all_columns.emplace(column.name).second) - throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); + LOG_DEBUG(log, "Creating cloud table {} from rewritted definition {}", local_table_name, serializeAST(create_query)); + res = createStorageFromQuery(create_query, context); } - /// Table constructing - StoragePtr res = StorageFactory::instance().get(ast_create_query, "", context, context->getGlobalContext(), columns, constraints, foreign_keys, unique_not_enforced, false); - res->startup(); - if (auto cloud_table = std::dynamic_pointer_cast(res)) cloud_table->resetObjectColumns(object_columns); + res->startup(); - { - auto lock = getLock(); - cloud_tables.emplace(std::make_pair(tenant_db, table_name), res); - auto it = memory_databases.find(tenant_db); - if (it == memory_databases.end()) - { - DatabasePtr database = std::make_shared(tenant_db, context->getGlobalContext()); - memory_databases.insert(std::make_pair(tenant_db, std::move(database))); - } - } - - LOG_DEBUG(&Poco::Logger::get("WorkerResource"), "Successfully create cloud table {} and database {}", res->getStorageID().getNameForLogs(), database_name); + auto res_table_id = res->getStorageID(); + insertCloudTable({res_table_id.getDatabaseName(), res_table_id.getTableName()}, res, context, /*throw_if_exists=*/ false); } StoragePtr CnchWorkerResource::getTable(const StorageID & table_id) const @@ -184,6 +204,27 @@ DatabasePtr CnchWorkerResource::getDatabase(const String & database_name) const return {}; } +void CnchWorkerResource::insertCloudTable(DatabaseAndTableName key, const StoragePtr & storage, ContextPtr context, bool throw_if_exists) +{ + auto & tenant_db = key.first; + { + auto lock = getLock(); + bool inserted = cloud_tables.emplace(key, storage).second; + if (!inserted && throw_if_exists) + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists", storage->getStorageID().getFullTableName()); + auto it = memory_databases.find(tenant_db); + if (it == memory_databases.end()) + { + DatabasePtr database = std::make_shared(tenant_db, context->getGlobalContext()); + memory_databases.insert(std::make_pair(tenant_db, std::move(database))); + } + } + + static auto * log = &Poco::Logger::get("WorkerResource"); + LOG_DEBUG(log, "Successfully create database {} and table {} {}", + tenant_db, storage->getName(), storage->getStorageID().getNameForLogs()); +} + bool CnchWorkerResource::isCnchTableInWorker(const StorageID & table_id) const { String tenant_db = formatTenantDatabaseName(table_id.getDatabaseName()); diff --git a/src/CloudServices/CnchWorkerResource.h b/src/CloudServices/CnchWorkerResource.h index c4b422dacf6..24da35455dd 100644 --- a/src/CloudServices/CnchWorkerResource.h +++ b/src/CloudServices/CnchWorkerResource.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -35,11 +36,21 @@ class CnchWorkerResource { public: void executeCreateQuery(ContextMutablePtr context, const String & create_query, bool skip_if_exists = false, const ColumnsDescription & object_columns = {}); + + void executeCacheableCreateQuery( + ContextMutablePtr context, + const StorageID & cnch_storage_id, + const String & definition, + const String & local_table_name, + WorkerEngineType engine_type, + const String & underlying_dictionary_tables, + const ColumnsDescription & object_columns); + StoragePtr getTable(const StorageID & table_id) const; DatabasePtr getDatabase(const String & database_name) const; bool isCnchTableInWorker(const StorageID & table_id) const; - ~CnchWorkerResource() + ~CnchWorkerResource() { clearResource(); } @@ -83,6 +94,8 @@ class CnchWorkerResource TablesMap cloud_tables; std::unordered_map memory_databases; + void insertCloudTable(DatabaseAndTableName key, const StoragePtr & storage, ContextPtr context, bool throw_if_exists); + /// for offloading query TablesSet cnch_tables; std::map worker_table_names; diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index 1530bf35479..bf4ae5faa80 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,12 @@ namespace ProfileEvents extern const Event PreloadExecTotalOps; } +namespace ProfileEvents +{ + extern const Event QueryCreateTablesMicroseconds; + extern const Event QuerySendResourcesMicroseconds; +} + namespace DB { namespace ErrorCodes @@ -749,6 +756,7 @@ void CnchWorkerServiceImpl::sendResources( /// store cloud tables in cnch_session_resource. { + Stopwatch create_timer; /// create a copy of session_context to avoid modify settings in SessionResource auto context_for_create = Context::createCopy(query_context); for (int i = 0; i < request->create_queries_size(); i++) @@ -758,9 +766,24 @@ void CnchWorkerServiceImpl::sendResources( worker_resource->executeCreateQuery(context_for_create, create_query, false, ColumnsDescription::parse(object_columns)); } - - - LOG_DEBUG(log, "Successfully create {} queries for Session: {}", request->create_queries_size(), request->txn_id()); + for (int i = 0; i < request->cacheable_create_queries_size(); i++) + { + auto & item = request->cacheable_create_queries().at(i); + ColumnsDescription object_columns; + if (item.has_dynamic_object_column_schema()) + object_columns = ColumnsDescription::parse(item.dynamic_object_column_schema()); + worker_resource->executeCacheableCreateQuery( + context_for_create, + RPCHelpers::createStorageID(item.storage_id()), + item.definition(), + item.local_table_name(), + static_cast(item.local_engine_type()), + item.local_underlying_dictionary_tables(), + object_columns); + } + create_timer.stop(); + LOG_INFO(log, "Prepared {} tables for session {} in {} us", request->create_queries_size() + request->cacheable_create_queries_size(), request->txn_id(), create_timer.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::QueryCreateTablesMicroseconds, create_timer.elapsedMicroseconds()); } for (const auto & data : request->data_parts()) @@ -879,7 +902,9 @@ void CnchWorkerServiceImpl::sendResources( throw Exception("Unknown table engine: " + storage->getName(), ErrorCodes::UNKNOWN_TABLE); } - LOG_TRACE(log, "Received all resource for session: {}, elapsed: {}ms.", request->txn_id(), watch.elapsedMilliseconds()); + watch.stop(); + LOG_INFO(log, "Received all resources for session {} in {} us.", request->txn_id(), watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::QuerySendResourcesMicroseconds, watch.elapsedMicroseconds()); }) } diff --git a/src/CloudServices/DedupWorkerManager.cpp b/src/CloudServices/DedupWorkerManager.cpp index 30ab8fe94b2..5dacbd2fce0 100644 --- a/src/CloudServices/DedupWorkerManager.cpp +++ b/src/CloudServices/DedupWorkerManager.cpp @@ -169,7 +169,8 @@ void DedupWorkerManager::createDeduperOnWorker(StoragePtr & storage, StorageCnch return; try { - info->worker_storage_id = {storage->getStorageID().getDatabaseName(), storage->getStorageID().getTableName()}; + auto cnch_storage_id = storage->getStorageID(); + info->worker_storage_id = {cnch_storage_id.getDatabaseName(), cnch_storage_id.getTableName()}; selectDedupWorker(cnch_table, info, info_lock); /// create a unique table suffix @@ -177,14 +178,15 @@ void DedupWorkerManager::createDeduperOnWorker(StoragePtr & storage, StorageCnch info->worker_storage_id.table_name = storage_id.table_name + deduper_table_suffix; auto create_ast = getASTCreateQueryFromStorage(*storage, getContext()); - replaceCnchWithCloud( - *create_ast, info->worker_storage_id.table_name, storage->getStorageID().getDatabaseName(), storage->getStorageID().getTableName()); - modifyOrAddSetting(*create_ast, "cloud_enable_dedup_worker", Field(UInt64(1))); - modifyOrAddSetting(*create_ast, "allow_nullable_key", Field(UInt64(1))); + auto & create = *create_ast; + create.table = info->worker_storage_id.table_name; + replaceCnchWithCloud(create.storage, cnch_storage_id.getDatabaseName(), cnch_storage_id.getTableName()); + modifyOrAddSetting(create, "cloud_enable_dedup_worker", Field(UInt64(1))); + modifyOrAddSetting(create, "allow_nullable_key", Field(UInt64(1))); /// Set cnch uuid for CloudMergeTree to commit data on worker side - modifyOrAddSetting(*create_ast, "cnch_table_uuid", Field(static_cast(UUIDHelpers::UUIDToString(create_ast->uuid)))); + modifyOrAddSetting(create, "cnch_table_uuid", Field(static_cast(UUIDHelpers::UUIDToString(create_ast->uuid)))); /// It's not allowed to create multi tables with same uuid on Cnch-Worker side now - create_ast->uuid = UUIDHelpers::Nil; + create.uuid = UUIDHelpers::Nil; String create_query = getTableDefinitionFromCreateQuery(static_pointer_cast(create_ast), false); LOG_TRACE(log, "Create table query of dedup worker: {}", create_query); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 49d0127f6de..6afe834798d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -316,6 +316,11 @@ M(PerfInequalConditionAppendMicroseconds, "") \ M(PerfJoinElapsedMicroseconds, "") \ M(PerfFilterElapsedMicroseconds, "") \ +\ + M(QueryCreateTablesMicroseconds, "") \ + M(QuerySendResourcesMicroseconds, "") \ + M(CloudTableDefinitionCacheHits, "") \ + M(CloudTableDefinitionCacheMisses, "") \ \ M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \ \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 598c2a4cb0c..30a6b7d69b8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1410,6 +1410,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_prune_source_plan_segment, false, "Whether prune source plan segment", 0) \ M(Bool, enable_prune_empty_resource, false, "Whether prune resource sending", 0) \ M(Bool, enable_prune_compute_plan_segment, false, "Whether prune compute plan segment", 0) \ + M(Bool, send_cacheable_table_definitions, false, "Whether to send cacheable table definitions to worker, which reduces parsing overhead and is particularly beneficial for high concurrency workload", 0) \ M(Bool, enable_optimizer_for_create_select, false, "Whether enable query optimizer for CREATE TABLE SELECT queries", 0) \ M(Bool, log_optimizer_run_time, false, "Whether Log optimizer runtime", 0) \ M(UInt64, plan_optimizer_timeout, 600000, "Max running time of a plan rewriter optimizer in ms", 0) \ diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 91241a92e67..20c8680d088 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -188,7 +189,8 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n auto table_id = res->getStorageID(); if (table_id.hasUUID()) { - assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil || ((res->getName() == "CloudMergeTree") && (getEngineName() == "Memory"))); + [[maybe_unused]] bool is_cloud = dynamic_cast(res.get()) != nullptr; + assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil || (is_cloud && (getEngineName() == "Memory"))); DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid); } diff --git a/src/FormaterTool/PartMergerImpl.cpp b/src/FormaterTool/PartMergerImpl.cpp index b30b80b0fd2..bf8cabd2fef 100644 --- a/src/FormaterTool/PartMergerImpl.cpp +++ b/src/FormaterTool/PartMergerImpl.cpp @@ -47,14 +47,21 @@ void PartMergerImpl::copyPartData(const DiskPtr & from_disk, const String & from std::shared_ptr PartMergerImpl::createStorage(const String & path, const String & create_table_query) { auto context = getContext(); - auto storage = createStorageFromQuery(create_table_query, context); + auto ast = getASTCreateQueryFromString(create_table_query, context); + ASTCreateQuery & create_query = *ast; + /// CloudMergeTree checks for non-empty UUID in its constructor, + /// let's fake it (not used in part-merger anyway) + UUID fake_cnch_uuid = UUIDHelpers::generateV4(); + modifyOrAddSetting(create_query, "cnch_table_uuid", Field(UUIDHelpers::UUIDToString(fake_cnch_uuid))); + auto storage = createStorageFromQuery(create_query, context); auto merge_tree = std::dynamic_pointer_cast(storage); - merge_tree->setRelativeDataPath(IStorage::StorageLocation::MAIN, path); if (!merge_tree) { /// Must use part-merger with `ENGINE = CloudMergeTree`. throw Exception("Please choose `CloudMergeTree` as the engine.", ErrorCodes::INVALID_CONFIG_PARAMETER); } + /// IMPORTANT: reset table relative path to the requested value + merge_tree->setRelativeDataPath(IStorage::StorageLocation::MAIN, path); return merge_tree; } diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index e98b8493eda..43fb7dcd2cd 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -574,6 +575,13 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti /// This is also a good indicator of system responsiveness. new_values["Jitter"] = std::chrono::duration_cast(current_time - update_time).count() / 1e9; + { + if (auto cloud_table_definition_cache = getContext()->tryGetCloudTableDefinitionCache()) + { + new_values["CloudTableDefinitionCacheCells"] = cloud_table_definition_cache->count(); + } + } + { if (auto mark_cache = getContext()->getMarkCache()) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d6815be1eb4..e62ba863145 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -126,6 +126,7 @@ #include #include #include +#include #include #include #include @@ -360,6 +361,10 @@ struct ContextSharedPart mutable IntermediateResultCachePtr intermediate_result_cache; /// part cache of queries' results. mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. + mutable OnceFlag cloud_table_definition_cache_initialized; + /// Cache of CloudMergeTree objects to speed up table creation during query execution. + /// Used when send_cacheable_table_definitions is enabled + mutable CloudTableDefinitionCachePtr cloud_table_definition_cache; ProcessList process_list; /// Executing queries at the moment. SegmentSchedulerPtr segment_scheduler; ExchangeStatusTrackerPtr exchange_data_tracker; @@ -2867,6 +2872,17 @@ void Context::dropMarkCache() const shared->mark_cache->reset(); } +std::shared_ptr Context::tryGetCloudTableDefinitionCache() const +{ + callOnce(shared->cloud_table_definition_cache_initialized, [&] { + const Poco::Util::AbstractConfiguration & config = getConfigRef(); + auto cache_size = config.getUInt(".cloud_table_definition_cache_size", 50000); + if (getServerType() == ServerType::cnch_worker && cache_size) + shared->cloud_table_definition_cache = std::make_shared(cache_size); + }); + return shared->cloud_table_definition_cache; +} + void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) { auto lock = getLock(); // checked diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 6b8dbf5db3c..b3108460ac7 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -123,6 +123,7 @@ class ManipulationList; class ReplicatedFetchList; class Cluster; class Compiler; +class CloudTableDefinitionCache; class MarkCache; class MMappedFileCache; class UncompressedCache; @@ -1235,6 +1236,9 @@ class Context : public ContextData, public std::enable_shared_from_this std::shared_ptr getMarkCache() const; void dropMarkCache() const; + /// result maybe nullptr + std::shared_ptr tryGetCloudTableDefinitionCache() const; + /// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads. void setMMappedFileCache(size_t cache_size_in_num_entries); std::shared_ptr getMMappedFileCache() const; diff --git a/src/MergeTreeCommon/CnchStorageCommon.cpp b/src/MergeTreeCommon/CnchStorageCommon.cpp index 49d97236cc8..e0800c45d3d 100644 --- a/src/MergeTreeCommon/CnchStorageCommon.cpp +++ b/src/MergeTreeCommon/CnchStorageCommon.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -303,7 +302,7 @@ void CnchStorageCommonHelper::filterCondition( String CnchStorageCommonHelper::getCreateQueryForCloudTable( const String & query, const String & local_table_name, - const ContextPtr & context, + const ContextPtr & /*context*/, bool enable_staging_area, const std::optional & cnch_storage_id, const Strings & engine_args, @@ -318,63 +317,19 @@ String CnchStorageCommonHelper::getCreateQueryForCloudTable( if (!local_database_name.empty()) create_query.database = local_database_name; - auto * storage = create_query.storage; + replaceCnchWithCloud( + create_query.storage, + cnch_storage_id.value_or(table_id).getDatabaseName(), + cnch_storage_id.value_or(table_id).getTableName(), + engine_type, + engine_args); - auto engine = std::make_shared(); - engine->name = storage->engine->name.replace(0, strlen("Cnch"), "Cloud"); - engine->arguments = std::make_shared(); - engine->arguments->children.emplace_back(std::make_shared(cnch_storage_id.value_or(table_id).getDatabaseName())); - engine->arguments->children.emplace_back(std::make_shared(cnch_storage_id.value_or(table_id).getTableName())); - if (!engine_args.empty()) - { - for (const auto & arg : engine_args) - { - engine->arguments->children.emplace_back(std::make_shared(arg)); - } - } - else if (storage->engine->arguments) - { - for (const auto & arg : storage->engine->arguments->children) - { - engine->arguments->children.push_back(arg); - } - } - - storage->set(storage->engine, engine); - - if (startsWith(engine->name, "Cloud")) /// table settings for *MergeTree engines - { - modifyOrAddSetting(create_query, "cnch_temporary_table", Field(UInt64(1))); - - if (enable_staging_area) - modifyOrAddSetting(create_query, "cloud_enable_staging_area", Field(UInt64(1))); - } - else if(engine->name == "CnchHive" || engine->name == "CnchHDFS" || engine->name == "CnchS3") - { - modifyOrAddSetting(create_query, "cnch_temporary_table", Field(UInt64(1))); - } - - /// query settings - auto query_settings = std::make_shared(); - query_settings->is_standalone = false; - - if (context) - query_settings->changes = context->getSettingsRef().getChangedSettings(); - - if (create_query.settings_ast) - { - auto & settings_ast = create_query.settings_ast->as(); - if (!query_settings->changes.empty()) - { - for (const auto & change: settings_ast.changes) - modifyOrAddSetting(*query_settings, change.name, std::move(change.value)); - } - else - query_settings->changes = std::move(settings_ast.changes); - } + // perhaps better to enable if_not_exists by default + if (engine_type == WorkerEngineType::DICT) + create_query.if_not_exists = true; - if (!query_settings->changes.empty()) - create_query.setOrReplaceAST(create_query.settings_ast, query_settings); + if (enable_staging_area) + modifyOrAddSetting(create_query, "cloud_enable_staging_area", Field(UInt64(1))); WriteBufferFromOwnString statement_buf; formatAST(create_query, statement_buf, false); diff --git a/src/MergeTreeCommon/CnchStorageCommon.h b/src/MergeTreeCommon/CnchStorageCommon.h index c610fde086d..59542bc252e 100644 --- a/src/MergeTreeCommon/CnchStorageCommon.h +++ b/src/MergeTreeCommon/CnchStorageCommon.h @@ -15,6 +15,7 @@ #pragma once +#include #include #include #include @@ -69,23 +70,6 @@ enum class CNCHStorageMediumType String toStr(CNCHStorageMediumType tp); CNCHStorageMediumType fromStr(const String & type_str); -enum class WorkerEngineType : uint8_t -{ - CLOUD, - DICT, -}; - -inline static String toString(WorkerEngineType type) -{ - switch (type) - { - case WorkerEngineType::CLOUD: - return "Cloud"; - case WorkerEngineType::DICT: - return "DictCloud"; - } -} - class CnchStorageCommonHelper { public: @@ -121,6 +105,8 @@ class CnchStorageCommonHelper // when move these conditions from where to implicit_where. static ASTs getConditions(const ASTPtr & ast); + // TODO: too many arguments, try remove `enable_staging_area', `cnch_storage_id', `engine_args', `local_database_name'. + // check StorageCnchMergeTree::genViewDependencyCreateQueries to see whether it's possible String getCreateQueryForCloudTable( const String & query, const String & local_table_name, diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index 2dc927ced63..8ae7ad520ac 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -141,9 +141,7 @@ MergeTreeMetaBase::MergeTreeMetaBase( { try { - checkPartitionKeyAndInitMinMax(metadata_.partition_key); - setProperties(metadata_, metadata_, false); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); } @@ -158,39 +156,18 @@ MergeTreeMetaBase::MergeTreeMetaBase( { is_custom_partitioned = true; checkPartitionKeyAndInitMinMax(metadata_.partition_key); - setProperties(metadata_, metadata_, false); } - format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; + storage_address = fmt::format("{}", fmt::ptr(this)); /// NOTE: using the same columns list as is read when performing actual merges. - merging_params.check(metadata_, metadata_.hasUniqueKey()); - - if (merging_params.partitionValueAsVersion()) - { - if (metadata_.partition_key.sample_block.columns() == 0) - throw Exception("Table is not partitioned, can't use partition value as version", ErrorCodes::BAD_ARGUMENTS); - if (metadata_.partition_key.sample_block.columns() > 1) - throw Exception("Partition key contains more than one column, can't use it as version", ErrorCodes::BAD_ARGUMENTS); - auto partition_key_type = metadata_.partition_key.sample_block.getDataTypes()[0]; - if (!partition_key_type->canBeUsedAsVersion()) - throw Exception("Partition key has type " + partition_key_type->getName() + ", can't be used as version", ErrorCodes::BAD_ARGUMENTS); - } - - if (metadata_.hasUniqueKey() && !attach_) - checkVersionColumnConstraint(); + merging_params.check(metadata_, attach_); if (metadata_.sampling_key.definition_ast != nullptr) { /// This is for backward compatibility. checkSampleExpression(metadata_, getSettings()->compatibility_allow_sampling_expression_not_in_primary_key); } - - checkTTLExpressions(metadata_, metadata_); - - storage_address = fmt::format("{}", fmt::ptr(this)); - - setServerVwName(getSettings()->cnch_server_vw); } StoragePolicyPtr MergeTreeMetaBase::getStoragePolicy(StorageLocation location) const @@ -213,7 +190,7 @@ const String& MergeTreeMetaBase::getRelativeDataPath(StorageLocation location) c return relative_data_path; } -void MergeTreeMetaBase::setRelativeDataPath(StorageLocation location, const String& rel_path) +void MergeTreeMetaBase::setRelativeDataPath(StorageLocation location, const String & rel_path) { if (unlikely(location == StorageLocation::AUXILITY)) { @@ -1434,22 +1411,9 @@ MergeTreeMetaBase::DataPartPtr MergeTreeMetaBase::getAnyPartInPartition( return nullptr; } -void MergeTreeMetaBase::checkVersionColumnConstraint() -{ - if (merging_params.partitionValueAsVersion()) - { - auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes(); - if (partition_types.size() >= 1) - { - auto & type = partition_types[0]; - if (TypeIndex::UInt64 < type->getTypeId() && type->getTypeId() <= TypeIndex::Int256) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The type of version column is {}, it is not compatible with UInt64", type->getName()); - } - } -} - -void MergeTreeMetaBase::MergingParams::check(const StorageInMemoryMetadata & metadata, bool has_unique_key) const +void MergeTreeMetaBase::MergingParams::check(const StorageInMemoryMetadata & metadata, bool attach) const { + const bool has_unique_key = metadata.hasUniqueKey(); const auto columns = metadata.getColumns().getAllPhysical(); if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing) @@ -1553,8 +1517,31 @@ void MergeTreeMetaBase::MergingParams::check(const StorageInMemoryMetadata & met } } - if (has_unique_key && !partitionValueAsVersion()) - check_version_column(true, "Unique Key"); + + if (has_unique_key) + { + if (partitionValueAsVersion()) + { + if (metadata.partition_key.sample_block.columns() == 0) + throw Exception("Table is not partitioned, can't use partition value as version", ErrorCodes::BAD_ARGUMENTS); + if (metadata.partition_key.sample_block.columns() > 1) + throw Exception("Partition key contains more than one column, can't use it as version", ErrorCodes::BAD_ARGUMENTS); + auto partition_key_type = metadata.partition_key.sample_block.getDataTypes()[0]; + if (!partition_key_type->canBeUsedAsVersion()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition key has type {}, can't be used as version", partition_key_type->getName()); + // singed integer and types larger than 64 bits are not supported currently + if (!attach && TypeIndex::UInt64 < partition_key_type->getTypeId() && partition_key_type->getTypeId() <= TypeIndex::Int256) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition key has type {}, can't be used as version", partition_key_type->getName()); + } + else + { + check_version_column(true, "Unique Key"); + } + } + else if (partitionValueAsVersion()) + { + throw Exception("Table doesn't have UNIQUE KEY, can't use partition value as version", ErrorCodes::BAD_ARGUMENTS); + } if (mode == MergingParams::Replacing) check_version_column(true, "ReplacingMergeTree"); diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h index 2215380f965..affa1fe1144 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.h +++ b/src/MergeTreeCommon/MergeTreeMetaBase.h @@ -145,7 +145,7 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer Graphite::Params graphite_params; /// Check that needed columns are present and have correct types. - void check(const StorageInMemoryMetadata & metadata, bool has_unique_key) const; + void check(const StorageInMemoryMetadata & metadata, bool attach) const; String getModeName() const; @@ -172,7 +172,7 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer StoragePolicyPtr getStoragePolicy(StorageLocation location) const override; virtual const String& getRelativeDataPath(StorageLocation location) const; - virtual void setRelativeDataPath(StorageLocation location, const String& rel_path); + void setRelativeDataPath(StorageLocation location, const String & rel_path); bool supportsFinal() const override { @@ -204,6 +204,8 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /// If uuid is empty, throw exception UUID getCnchStorageUUID() const; + const MergingParams & getMergingParams() const { return merging_params; } + //// Data parts /// Returns a copy of the list so that the caller shouldn't worry about locks. DataParts getDataParts(const DataPartStates & affordable_states) const; @@ -599,9 +601,6 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false) const; - /// Check version column constrains when create table - void checkVersionColumnConstraint(); - void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false); void checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key); diff --git a/src/MergeTreeCommon/tests/gtest_create_query_for_cloud_table.cpp b/src/MergeTreeCommon/tests/gtest_create_query_for_cloud_table.cpp index 0ae38b147f3..581ed395600 100644 --- a/src/MergeTreeCommon/tests/gtest_create_query_for_cloud_table.cpp +++ b/src/MergeTreeCommon/tests/gtest_create_query_for_cloud_table.cpp @@ -30,7 +30,6 @@ PARTITION BY toDate(event_time) PRIMARY KEY s ORDER BY (s, id) UNIQUE KEY id -SETTINGS cnch_temporary_table = 1 )#"; EXPECT_EQ(res, expected); } @@ -55,7 +54,6 @@ PARTITION BY toDate(event_time) PRIMARY KEY s ORDER BY (s, id) UNIQUE KEY id -SETTINGS cnch_temporary_table = 1 )#"; EXPECT_EQ(res, expected); } @@ -76,7 +74,6 @@ TEST(test_create_query_for_cloud_table, version_collapse) ) ENGINE = CloudVersionedCollapsingMergeTree(db1, tb1, Sign, Version) ORDER BY UserID -SETTINGS cnch_temporary_table = 1 )#"; EXPECT_EQ(res, expected); } @@ -94,7 +91,6 @@ TEST(test_create_query_for_cloud_table, s3) `age` String ) ENGINE = CloudS3(db1, tb1, `http://some_link/some_path/some_file.csv`, CSV, none, AKkkkkkkkkk, sKkkkkkkkkkkkkkkkkkkk) -SETTINGS cnch_temporary_table = 1 )#"; EXPECT_EQ(res, expected); } @@ -118,7 +114,6 @@ ENGINE = CloudMergeTree(db1, tb1) PARTITION BY toDate(event_time) PRIMARY KEY s ORDER BY (s, id) -SETTINGS cnch_temporary_table = 1 )#"; EXPECT_EQ(res, expected); } diff --git a/src/Protos/cnch_worker_rpc.proto b/src/Protos/cnch_worker_rpc.proto index 10a2e5be2ec..b637752deb7 100644 --- a/src/Protos/cnch_worker_rpc.proto +++ b/src/Protos/cnch_worker_rpc.proto @@ -490,19 +490,38 @@ message TableDataParts optional uint64 table_version = 11; } +// Send original (cnch) table definition and override to worker, in order to +// 1. remove server's parsing & formatting overhead +// before +// server: parse(create query) -> rewrite(ast) -> format(ast) -> send(new create query) +// worker: parse(new create query) -> create table(ast) +// after +// server: send(create query, override) +// worker: parse(create query) -> rewrite(ast) -> create table(ast, override) +// 2. be able to cache table template at worker +message CacheableTableDefinition +{ + required StorageID storage_id = 1; + required string definition = 2; + optional string dynamic_object_column_schema = 3; // present if not empty + required uint32 local_engine_type = 4; // WorkerEngineType + required string local_table_name = 5; + optional string local_underlying_dictionary_tables = 6; // for bitengine +} + message SendResourcesReq { required uint64 txn_id = 1; required uint64 primary_txn_id = 2; required uint64 timeout = 3; - /// create queries repeated string create_queries = 4; - /// data parts repeated TableDataParts data_parts = 5; optional string disk_cache_mode = 6; repeated UDFInfo udf_infos = 7; repeated string dynamic_object_column_schema = 8; optional WorkerInfo worker_info = 9; + // can coexist with `create_queries' + repeated CacheableTableDefinition cacheable_create_queries = 10; } message SendResourcesResp diff --git a/src/Storages/MergeTree/CloudTableDefinitionCache.h b/src/Storages/MergeTree/CloudTableDefinitionCache.h new file mode 100644 index 00000000000..d012f2b85e2 --- /dev/null +++ b/src/Storages/MergeTree/CloudTableDefinitionCache.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#include + +namespace ProfileEvents +{ +extern const Event CloudTableDefinitionCacheHits; +extern const Event CloudTableDefinitionCacheMisses; +} + +namespace DB +{ +class StorageCloudMergeTree; + +/// cache value can be nullptr, client should decide how to handle it +class CloudTableDefinitionCache : public LRUCache +{ + using Base = LRUCache; + +public: + using Base::Base; + + explicit CloudTableDefinitionCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) { } + + static UInt128 hash(const String & create_query) { return sipHash128(create_query.data(), create_query.length()); } + + template + std::pair getOrSet(const Key & key, LoadFunc && load) + { + auto result = Base::getOrSet(key, load); + if (result.second) + ProfileEvents::increment(ProfileEvents::CloudTableDefinitionCacheMisses); + else + ProfileEvents::increment(ProfileEvents::CloudTableDefinitionCacheHits); + return result; + } +}; + +using CloudTableDefinitionCachePtr = std::shared_ptr; +} diff --git a/src/Storages/MergeTree/MergeTreeCloudData.cpp b/src/Storages/MergeTree/MergeTreeCloudData.cpp index 22a5f10eead..de493a0d515 100644 --- a/src/Storages/MergeTree/MergeTreeCloudData.cpp +++ b/src/Storages/MergeTree/MergeTreeCloudData.cpp @@ -45,7 +45,6 @@ namespace ErrorCodes MergeTreeCloudData::MergeTreeCloudData( const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, @@ -53,7 +52,7 @@ MergeTreeCloudData::MergeTreeCloudData( std::unique_ptr settings_) : MergeTreeMetaBase( table_id_, - relative_data_path_, + "", // relative_data_path will be set later metadata_, context_, date_column_name_, diff --git a/src/Storages/MergeTree/MergeTreeCloudData.h b/src/Storages/MergeTree/MergeTreeCloudData.h index 18feee7d1b3..ffebde648c6 100644 --- a/src/Storages/MergeTree/MergeTreeCloudData.h +++ b/src/Storages/MergeTree/MergeTreeCloudData.h @@ -72,7 +72,6 @@ class MergeTreeCloudData : public MergeTreeMetaBase MergeTreeCloudData( const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4fd48565ed0..9d3a03e3fb0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -177,6 +177,9 @@ MergeTreeData::MergeTreeData( , replicated_sends_throttler( std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { + setProperties(metadata_, metadata_, false); + checkTTLExpressions(metadata_, metadata_); + const auto settings = getSettings(); enable_metastore = settings->enable_metastore; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 6b23a620ba1..7ea0393da9b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -865,7 +865,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (storage_settings->storage_dialect_type.value != DialectType::CLICKHOUSE && !args.storage_def->settings->changes.tryGet("storage_dialect_type")) { args.storage_def->settings->changes.push_back(SettingChange( - "storage_dialect_type", + "storage_dialect_type", SettingFieldDialectTypeTraits::toString(storage_settings->storage_dialect_type.value))); } } @@ -1000,7 +1000,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) args.table_id, cnch_database_name, cnch_table_name, - "", /// Do NOT set relative path for CloudMergeTree args.relative_data_path, metadata, args.getContext(), date_column_name, diff --git a/src/Storages/StorageCloudMergeTree.cpp b/src/Storages/StorageCloudMergeTree.cpp index 76419233f07..5736c2b52ef 100644 --- a/src/Storages/StorageCloudMergeTree.cpp +++ b/src/Storages/StorageCloudMergeTree.cpp @@ -63,7 +63,6 @@ StorageCloudMergeTree::StorageCloudMergeTree( const StorageID & table_id_, String cnch_database_name_, String cnch_table_name_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, @@ -71,7 +70,6 @@ StorageCloudMergeTree::StorageCloudMergeTree( std::unique_ptr settings_) : MergeTreeCloudData( // NOLINT table_id_, - relative_data_path_, metadata_, context_, date_column_name_, @@ -80,21 +78,15 @@ StorageCloudMergeTree::StorageCloudMergeTree( , cnch_database_name(std::move(cnch_database_name_)) , cnch_table_name(std::move(cnch_table_name_)) { - const String & cnch_uuid = getSettings()->cnch_table_uuid.toString(); - String relative_table_path(cnch_uuid); - - if (relative_table_path.empty()) - relative_table_path = UUIDHelpers::UUIDToString(table_id_.uuid); - - relative_table_path = getStoragePolicy(IStorage::StorageLocation::MAIN)->getAnyDisk()->getTableRelativePathOnDisk(relative_table_path); - - if (relative_data_path_.empty() || relative_table_path.empty()) - MergeTreeMetaBase::setRelativeDataPath(IStorage::StorageLocation::MAIN, relative_table_path); + setServerVwName(getSettings()->cnch_server_vw); + setInMemoryMetadata(metadata_); + format_version = MERGE_TREE_CHCH_DATA_STORAGTE_VERSION; + String cnch_uuid = UUIDHelpers::UUIDToString(getCnchStorageUUID()); + String relative_table_path = getStoragePolicy(IStorage::StorageLocation::MAIN)->getAnyDisk()->getTableRelativePathOnDisk(cnch_uuid); + MergeTreeMetaBase::setRelativeDataPath(IStorage::StorageLocation::MAIN, relative_table_path); relative_auxility_storage_path = fs::path("auxility_store") / relative_table_path / ""; - format_version = MERGE_TREE_CHCH_DATA_STORAGTE_VERSION; - if (getInMemoryMetadataPtr()->hasUniqueKey() && getSettings()->cloud_enable_dedup_worker) dedup_worker = std::make_unique(*this); } diff --git a/src/Storages/StorageCloudMergeTree.h b/src/Storages/StorageCloudMergeTree.h index a6984fd7d05..f84d60708bc 100644 --- a/src/Storages/StorageCloudMergeTree.h +++ b/src/Storages/StorageCloudMergeTree.h @@ -39,7 +39,7 @@ class StorageCloudMergeTree : public shared_ptr_helper, p public: virtual ~StorageCloudMergeTree() override; - std::string getName() const override { return "CloudMergeTree"; } + std::string getName() const override { return "Cloud" + merging_params.getModeName() + "MergeTree"; } bool supportsParallelInsert() const override { return true; } bool supportsSampling() const override { return true; } @@ -109,7 +109,7 @@ class StorageCloudMergeTree : public shared_ptr_helper, p QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; bool getQueryProcessingStageWithAggregateProjection(ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info) const; - void resetObjectColumns(const ColumnsDescription & object_columns_) { object_columns = object_columns_; } + void resetObjectColumns(const ColumnsDescription & object_columns_) { object_columns = object_columns_; } protected: MutationCommands getFirstAlterMutationCommandsForPart(const DataPartPtr & part) const override; @@ -118,14 +118,13 @@ class StorageCloudMergeTree : public shared_ptr_helper, p const StorageID & table_id_, String cnch_database_name_, String cnch_table_name_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, const MergeTreeMetaBase::MergingParams & merging_params_, std::unique_ptr settings_); - + std::unique_ptr getDefaultSettings() const override; const String cnch_database_name; diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index b87e2d9df21..342597faf1b 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -195,6 +195,10 @@ StorageCnchMergeTree::StorageCnchMergeTree( [](const String &) {}) , CnchStorageCommonHelper(table_id_, getDatabaseName(), getTableName()) { + setServerVwName(getSettings()->cnch_server_vw); + setProperties(metadata_, metadata_, false); + checkTTLExpressions(metadata_, metadata_); + String relative_table_path = getStoragePolicy(IStorage::StorageLocation::MAIN) ->getAnyDisk() ->getTableRelativePathOnDisk(UUIDHelpers::UUIDToString(table_id_.uuid)); @@ -1408,13 +1412,22 @@ void StorageCnchMergeTree::collectResource( const String & local_table_name, const std::set & required_bucket_numbers, const StorageSnapshotPtr & storage_snapshot, - WorkerEngineType /*engine_type*/, + WorkerEngineType engine_type, bool replicated) { auto cnch_resource = local_context->getCnchServerResource(); - auto create_table_query = getCreateQueryForCloudTable(getCreateTableSql(), local_table_name, local_context); + if (local_context->getSettingsRef().send_cacheable_table_definitions) + { + String local_dictionary_tables; + cnch_resource->addCacheableCreateQuery(shared_from_this(), local_table_name, engine_type, local_dictionary_tables); + } + else + { + auto create_table_query = getCreateQueryForCloudTable( + getCreateTableSql(), local_table_name, local_context, false, std::nullopt, {}, {}, engine_type); + cnch_resource->addCreateQuery(local_context, shared_from_this(), create_table_query, local_table_name, false); + } - cnch_resource->addCreateQuery(local_context, shared_from_this(), create_table_query, local_table_name, false); // if (local_context.getSettingsRef().enable_virtual_part) // setVirtualPartSize(local_context, parts, worker_group->getReadWorkers().size()); @@ -3170,7 +3183,7 @@ std::optional StorageCnchMergeTree::totalRows(const ContextPtr & query_c if (partition_list.empty()) return 0; auto num_total_partition = partition_list.size(); - + filterPartitionByTTL(partition_list, query_context->tryGetCurrentTransactionID().toSecond()); if (partition_list.empty()) return 0; diff --git a/src/Storages/StorageDictCloudMergeTree.cpp b/src/Storages/StorageDictCloudMergeTree.cpp index 7d06ffbd503..f9287035f7e 100644 --- a/src/Storages/StorageDictCloudMergeTree.cpp +++ b/src/Storages/StorageDictCloudMergeTree.cpp @@ -24,7 +24,6 @@ StorageDictCloudMergeTree::StorageDictCloudMergeTree( const StorageID & table_id_, String cnch_database_name_, String cnch_table_name_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, @@ -34,7 +33,6 @@ StorageDictCloudMergeTree::StorageDictCloudMergeTree( table_id_, std::move(cnch_database_name_), std::move(cnch_table_name_), - relative_data_path_, metadata_, context_, date_column_name_, @@ -255,7 +253,6 @@ void registerStorageDictCloud(StorageFactory & factory) args.table_id, cnch_database_name, cnch_table_name, - "",/// Do NOT set relative path for CloudMergeTree args.relative_data_path, metadata, args.getContext(), date_column_name, diff --git a/src/Storages/StorageDictCloudMergeTree.h b/src/Storages/StorageDictCloudMergeTree.h index be127fcf938..9e74acb5b5a 100644 --- a/src/Storages/StorageDictCloudMergeTree.h +++ b/src/Storages/StorageDictCloudMergeTree.h @@ -24,14 +24,13 @@ class StorageDictCloudMergeTree final : public StorageCloudMergeTree { public: - std::string getName() const override { return merging_params.getModeName() + "DictCloudMergeTree"; } + std::string getName() const override { return "DictCloud" + merging_params.getModeName() + "MergeTree"; } String getQualifiedTableName() const { return getDatabaseName() + "." + getTableName(); } StorageDictCloudMergeTree( const StorageID & table_id_, String cnch_database_name_, String cnch_table_name_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name_, diff --git a/src/Transaction/CnchServerTransaction.cpp b/src/Transaction/CnchServerTransaction.cpp index 3ad359a76b4..f7735ed96bc 100644 --- a/src/Transaction/CnchServerTransaction.cpp +++ b/src/Transaction/CnchServerTransaction.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include From 7104fe16e7acf0a92248d466181b24040b558241 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 06:49:59 +0000 Subject: [PATCH 094/292] remove bitengine test cases --- ...bitengine_server_cloud_table_fix.reference | 1 - ...20014_bitengine_server_cloud_table_fix.sql | 57 ------------------- 2 files changed, 58 deletions(-) delete mode 100644 tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference delete mode 100644 tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql diff --git a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference deleted file mode 100644 index 6e3d4bf4d55..00000000000 --- a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.reference +++ /dev/null @@ -1 +0,0 @@ -{48,66} diff --git a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql b/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql deleted file mode 100644 index 47b3ce56560..00000000000 --- a/tests/queries/4_cnch_stateless/20014_bitengine_server_cloud_table_fix.sql +++ /dev/null @@ -1,57 +0,0 @@ -create database if not exists dict_db; -create database if not exists bitmap_db; - -create table if not exists dict_db.tag_bitmaps_did_cdp_dict_20014 (`key` UInt64, `value` UInt64, `split_id` UInt64, BITENGINE_CONSTRAINT key_constraint CHECK toUInt64(intHash64(`key`) % 2)) ENGINE = CnchMergeTree CLUSTER BY `split_id` INTO 2 BUCKETS PRIMARY KEY `key` ORDER BY `key`; - -CREATE TABLE if not exists bitmap_db.tag_bitmaps_did_cdp_20014 (`split_id` UInt64, `tag_id` Int32, `p_date` Date, `tag_value_double` Float64, `tag_value` String, `id_map_cnt` UInt64, `id_type` Int32, `id_map` BitMap64 BitEngineEncode, `app_id` Int32, `tag_type` Int8) ENGINE = CnchMergeTree PARTITION BY (toDate(toStartOfDay(`p_date`)), `tag_id`, `tag_type`, `id_type`) CLUSTER BY `split_id` INTO 2 BUCKETS PRIMARY KEY (`tag_value`, `tag_value_double`, cityHash64(`tag_value`)) ORDER BY (`tag_value`, `tag_value_double`, cityHash64(`tag_value`)) SETTINGS underlying_dictionary_tables = '{"id_map":"`dict_db`.`tag_bitmaps_did_cdp_dict_20014`"}'; - -insert into bitmap_db.tag_bitmaps_did_cdp_20014 select 0, 1014834, '2024-03-14', 0, 'aaa', 2, 1358, arrayToBitmap([48,66]), 0, 2; - -select DecodeBitmap(id_map, 'bitmap_db', 'tag_bitmaps_did_cdp_20014', 'id_map') -from ( - select id_map from bitmap_db.tag_bitmaps_did_cdp_20014 where tag_id = 1014834 -); - -SELECT * -FROM -( - SELECT toUInt64(base_id) AS base_id - FROM - ( - SELECT - toUInt64(0) AS base_id, - map('', '') AS string_map, - map('', 0) AS bigint_map, - map('', 0) AS double_map, - map('', '') AS date_map, - map('', '') AS datetime_map, - map('', ['']) AS array_string_map, - map('', [0]) AS array_bigint_map, - map('', [0]) AS array_double_map, - map('', ['']) AS array_date_map, - map('', ['']) AS array_datetime_map, - NULL AS id_type, - NULL AS p_date - FROM numbers(0) - ) - WHERE (((p_date >= '2023-06-27') AND (p_date <= '2023-06-27')) AND (bigint_map{'5002743'} = 0)) AND (id_type = 1358) -) as l inner join ( -select arrayJoin(bitmapToArrayWithDecode(id_map, 'bitmap_db', 'tag_bitmaps_did_cdp_20014', 'id_map')) as id -from ( - select bitmapExtract('0')(idx, id_map) as id_map, split_id - from ( - select bitmapColumnOr(id_map) as id_map, - toInt32(0) as idx, - split_id - from bitmap_db.tag_bitmaps_did_cdp_20014 where tag_id = 1014834 - group by split_id - ) - group by split_id -) SETTINGS dict_table_full_mode = 1 - ) as r on l.base_id = r.id ORDER BY base_id, id; - - -drop table bitmap_db.tag_bitmaps_did_cdp_20014; -drop table dict_db.tag_bitmaps_did_cdp_dict_20014; -drop database bitmap_db; -drop database dict_db; \ No newline at end of file From bec8b74ae84dab137441f8c22fb8f081e2b6179a Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:06:10 +0000 Subject: [PATCH 095/292] Merge branch 'jiashuo_support-attach-preload-2.2' into 'cnch-2.2' fix(clickhousech@m-4711548586): [CP to 2.2]support preload for attach operation See merge request dp/ClickHouse!23224 # Conflicts: # src/CloudServices/CnchDataWriter.cpp # src/Storages/MergeTree/CnchAttachProcessor.cpp --- src/CloudServices/CnchDataWriter.cpp | 2 +- src/Protos/DataModelHelpers.cpp | 10 ++++++ src/Protos/DataModelHelpers.h | 1 + .../DiskCache/PartFileDiskCacheSegment.cpp | 2 +- .../MergeTree/CnchAttachProcessor.cpp | 35 +++++++++++++++++++ src/Storages/MergeTree/CnchAttachProcessor.h | 2 ++ .../MergeTree/MergeTreeDataPartCNCH.cpp | 5 --- src/Storages/StorageCnchMergeTree.cpp | 9 +++-- 8 files changed, 57 insertions(+), 9 deletions(-) diff --git a/src/CloudServices/CnchDataWriter.cpp b/src/CloudServices/CnchDataWriter.cpp index 3da4496070b..dd7937f392b 100644 --- a/src/CloudServices/CnchDataWriter.cpp +++ b/src/CloudServices/CnchDataWriter.cpp @@ -748,7 +748,7 @@ void CnchDataWriter::preload(const MutableMergeTreeDataPartsCNCHVector & dumped_ auto server_client = context->getCnchServerClientPool().get(); MutableMergeTreeDataPartsCNCHVector preload_parts; std::copy_if(dumped_parts.begin(), dumped_parts.end(), std::back_inserter(preload_parts), [](const auto & part) { - return !part->deleted && !part->isPartial(); + return !part->deleted; }); if (!preload_parts.empty()) diff --git a/src/Protos/DataModelHelpers.cpp b/src/Protos/DataModelHelpers.cpp index 8b96c9d6ef2..7f4fcdba5b4 100644 --- a/src/Protos/DataModelHelpers.cpp +++ b/src/Protos/DataModelHelpers.cpp @@ -35,6 +35,7 @@ #include "common/logger_useful.h" #include #include +#include #include #include #include @@ -533,6 +534,15 @@ ServerDataPartsVector createServerPartsFromDataParts(const MergeTreeMetaBase & s return res; } +ServerDataPartsVector createServerPartsFromDataParts(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts) +{ + ServerDataPartsVector res; + res.reserve(parts.size()); + for (const auto & part : parts) + res.push_back(createServerPartFromDataPart(storage, part)); + return res; +} + IMergeTreeDataPartsVector createPartVectorFromServerParts( const MergeTreeMetaBase & storage, const ServerDataPartsVector & parts) { diff --git a/src/Protos/DataModelHelpers.h b/src/Protos/DataModelHelpers.h index 1b8ff6a1f90..ca205411abf 100644 --- a/src/Protos/DataModelHelpers.h +++ b/src/Protos/DataModelHelpers.h @@ -349,6 +349,7 @@ ServerDataPartsVector createServerPartsFromModels(const MergeTreeMetaBase & storage, const pb::RepeatedPtrField & parts_model); ServerDataPartsVector createServerPartsFromDataParts(const MergeTreeMetaBase & storage, const MergeTreeDataPartsCNCHVector & parts); +ServerDataPartsVector createServerPartsFromDataParts(const MergeTreeMetaBase & storage, const MutableMergeTreeDataPartsCNCHVector & parts); IMergeTreeDataPartsVector createPartVectorFromServerParts( const MergeTreeMetaBase & storage, diff --git a/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp b/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp index 8cc8695e245..691bd319fc3 100644 --- a/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp +++ b/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp @@ -139,7 +139,7 @@ void PartFileDiskCacheSegment::cacheToDisk(IDiskCache & disk_cache, bool throw_e String data_path = data_part->getFullRelativePath() + "data"; auto disk = data_part->volume->getDisk(); auto source_buffer = std::make_unique( - disk->readFile(data_path, merge_tree_reader_settings.read_settings), stream_file_pos.file_offset, + disk->readFile(data_path, merge_tree_reader_settings.read_settings), false, stream_file_pos.file_offset, stream_file_pos.file_size, true); const auto & right_mark_pos = marks_loader.getMark(right_mark); diff --git a/src/Storages/MergeTree/CnchAttachProcessor.cpp b/src/Storages/MergeTree/CnchAttachProcessor.cpp index 7368b2e1670..9747e8606f4 100644 --- a/src/Storages/MergeTree/CnchAttachProcessor.cpp +++ b/src/Storages/MergeTree/CnchAttachProcessor.cpp @@ -14,6 +14,10 @@ */ #include +#include +#include +#include +#include #include #include #include @@ -336,6 +340,7 @@ void CnchAttachProcessor::exec() std::vector attached_partitions; AttachFilter filter; + MutableMergeTreeDataPartsCNCHVector preload_parts; try { // Find all parts which matches filter, these parts will retain it's origin @@ -346,6 +351,7 @@ void CnchAttachProcessor::exec() // Assign new part name and rename it to target location PartsWithHistory prepared_parts = prepareParts(parts_from_sources, attach_ctx); + preload_parts = prepared_parts.second; if (command.replace) { @@ -400,6 +406,8 @@ void CnchAttachProcessor::exec() } attach_ctx.commit(); + + tryPreload(preload_parts); } std::vector CnchAttachProcessor::getDetachedParts(const AttachFilter& filter) @@ -1763,4 +1771,31 @@ void CnchAttachProcessor::injectFailure(AttachFailurePoint point) const } } +void CnchAttachProcessor::tryPreload(MutableMergeTreeDataPartsCNCHVector & attached_parts) +{ + const auto & settings = query_ctx->getSettingsRef(); + if (!settings.parts_preload_level || (!target_tbl.getSettings()->parts_preload_level && !target_tbl.getSettings()->enable_preload_parts) + || !target_tbl.getSettings()->enable_local_disk_cache) + return; + + try + { + if (!attached_parts.empty()) + { + ServerDataPartsVector preload_parts = createServerPartsFromDataParts(target_tbl, attached_parts); + target_tbl.sendPreloadTasks( + query_ctx, + preload_parts, + false, + (target_tbl.getSettings()->enable_preload_parts ? PreloadLevelSettings::AllPreload + : target_tbl.getSettings()->parts_preload_level.value), + time(nullptr)); + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, "Fail to preload"); + } +} + } diff --git a/src/Storages/MergeTree/CnchAttachProcessor.h b/src/Storages/MergeTree/CnchAttachProcessor.h index c1df170b3e3..3cacc1ff91a 100644 --- a/src/Storages/MergeTree/CnchAttachProcessor.h +++ b/src/Storages/MergeTree/CnchAttachProcessor.h @@ -236,6 +236,8 @@ class CnchAttachProcessor void loadUniqueDeleteMeta(IMergeTreeDataPartPtr & part, const MergeTreePartInfo & info); void waitingForDedup(const NameSet & partitions_filter, const NameSet & staged_parts_name); + void tryPreload(MutableMergeTreeDataPartsCNCHVector & preload_parts); + UInt64 failure_injection_knob; StorageCnchMergeTree& target_tbl; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp index 1d6e548b1ef..6ec1d463b2e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp @@ -1233,11 +1233,6 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) cons { Stopwatch watch; String full_path = getFullPath(); - if (isPartial()) - { - LOG_WARNING(storage.log, "Preload partial parts in invalid: {}", full_path); - return; - } String part_path = fs::path(getFullRelativePath()) / DATA_FILE; if (!volume->getDisk()->fileExists(part_path)) diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 342597faf1b..7103924ee99 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -105,6 +105,7 @@ extern const Event PrunePartsTime; extern const Event TotalPartitions; extern const Event PrunedPartitions; extern const Event SelectedParts; +extern const Event PreloadSubmitTotalOps; } namespace DB @@ -1464,6 +1465,9 @@ void StorageCnchMergeTree::collectResourceWithTableVersion( void StorageCnchMergeTree::sendPreloadTasks(ContextPtr local_context, ServerDataPartsVector parts, bool enable_parts_sync_preload, UInt64 parts_preload_level, UInt64 ts) { + ProfileEvents::increment(ProfileEvents::PreloadSubmitTotalOps, 1, Metrics::MetricType::Rate); + Stopwatch timer; + auto worker_group = getWorkerGroupForTable(*this, local_context); local_context->setCurrentWorkerGroup(worker_group); @@ -1505,11 +1509,12 @@ void StorageCnchMergeTree::sendPreloadTasks(ContextPtr local_context, ServerData ids.emplace_back(id); LOG_TRACE( log, - "send preload data parts size = {}, enable_parts_sync_preload = {}, enable_parts_sync_preload = {}, submit_ts = {}", + "send preload data parts size = {}, enable_parts_sync_preload = {}, parts_preload_level = {}, submit_ts = {}, time_ms = {}", resource.server_parts.size(), enable_parts_sync_preload, parts_preload_level, - ts); + ts, + timer.elapsedMilliseconds()); } return ids; }); From 0303dd80423586ca15df06ab3f121f39903be5c5 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:06:30 +0000 Subject: [PATCH 096/292] Merge branch 'cherry-pick-8d6a3ec3' into 'cnch-2.2' fix(clickhousech@m-4691886549): [TO CHCH-2.2] fix hybrid allocation bug when query multiple tables See merge request dp/ClickHouse!23252 --- src/CloudServices/CnchServerResource.cpp | 2 +- .../10074_hybrid_allocation_join.reference | 3 +++ .../10074_hybrid_allocation_join.sql | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.reference create mode 100644 tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.sql diff --git a/src/CloudServices/CnchServerResource.cpp b/src/CloudServices/CnchServerResource.cpp index 35f9270da1c..cbb6ebea5df 100644 --- a/src/CloudServices/CnchServerResource.cpp +++ b/src/CloudServices/CnchServerResource.cpp @@ -536,7 +536,7 @@ void CnchServerResource::allocateResource( LOG_TRACE( log, "Send {} virtual data part (hybrid_allocation) to worker {} for table {}", - assigned_parts.size(), + assigned_virtual_parts.size(), host_ports.toDebugString(), storage->getStorageID().getNameForLogs()); } diff --git a/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.reference b/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.reference new file mode 100644 index 00000000000..03283f06f42 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.reference @@ -0,0 +1,3 @@ +1024 +1024 +1024 diff --git a/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.sql b/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.sql new file mode 100644 index 00000000000..f161c0b38f6 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10074_hybrid_allocation_join.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_hybrid_join; +DROP TABLE IF EXISTS test_normal; + +CREATE TABLE test_hybrid_join (a UInt64, b UInt64) ENGINE = CnchMergeTree() ORDER BY (a, b) PARTITION BY (a % 4) SETTINGS index_granularity=128, enable_hybrid_allocation = 1, min_rows_per_virtual_part = 128; +INSERT INTO test_hybrid_join SELECT number, xor(number, 223344) FROM numbers(1024); + +CREATE TABLE test_normal (a UInt64, b UInt64) ENGINE = CnchMergeTree() ORDER BY (a, b) PARTITION BY (a % 4) SETTINGS index_granularity=128; +INSERT INTO test_normal SELECT number, xor(number, 223344) FROM numbers(1024); + +SELECT count() FROM (SELECT * from test_hybrid_join as t1 LEFT JOIN test_normal as t2 ON t1.a = t2.a); +SELECT count() FROM (SELECT * from test_hybrid_join as t1 RIGHT JOIN test_normal as t2 ON t1.a = t2.a) SETTINGS enable_optimizer = 1; +SELECT count() FROM (SELECT * from test_hybrid_join as t1 INNER JOIN test_normal as t2 ON t1.a = t2.a); + +DROP TABLE test_hybrid_join; +DROP TABLE test_normal; From 36ef8450ca0054c941b2d70c268ec8d53b1d6a65 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:07:15 +0000 Subject: [PATCH 097/292] Merge branch 'fix-sensitive-permission' into 'cnch-2.2' fix(clickhousech@m-17322645): Fix sensitive permission See merge request dp/ClickHouse!22909 # Conflicts: # src/Access/ContextAccess.cpp --- src/Access/AccessControlManager.cpp | 20 ++- src/Access/AccessControlManager.h | 6 + src/Access/AccessRights.cpp | 97 +++++------ src/Access/AccessRights.h | 25 +-- src/Access/ContextAccess.cpp | 91 ++++++----- src/Access/ContextAccess.h | 17 +- src/Access/DiskAccessStorage.cpp | 17 +- src/Access/EnabledRolesInfo.h | 1 + src/Access/KVAccessStorage.cpp | 24 ++- src/Access/RoleCache.cpp | 2 + src/Interpreters/Context.cpp | 6 +- src/Interpreters/InterpreterGrantQuery.cpp | 41 +++-- src/Interpreters/InterpreterGrantQuery.h | 4 +- .../InterpreterShowAccessQuery.cpp | 6 +- .../InterpreterShowGrantsQuery.cpp | 14 +- src/Interpreters/InterpreterShowGrantsQuery.h | 4 +- src/Parsers/ASTGrantQuery.cpp | 10 +- src/Parsers/ASTGrantQuery.h | 1 + src/Parsers/ParserGrantQuery.cpp | 4 + src/Parsers/formatTenantDatabaseName.cpp | 8 +- src/Server/HTTPHandler.cpp | 73 +++++---- src/Server/MySQLHandler.cpp | 10 +- src/Server/TCPHandler.cpp | 74 +++++---- .../01074_partial_revokes.reference | 12 ++ .../60000_rbac_sensitive.reference | 19 +-- .../4_cnch_stateless/60000_rbac_sensitive.sh | 152 +++++++++++------- 26 files changed, 445 insertions(+), 293 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 53d9380cf21..b5160391164 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -148,6 +148,20 @@ AccessControlManager::AccessControlManager() { } +bool AccessControlManager::isSensitiveGrantee(const String & grantee) const +{ + auto pos = grantee.find('.'); + + if (pos == String::npos || pos == 0) + return false; + + return isSensitiveTenant(grantee.substr(0, pos)); +} + +bool AccessControlManager::isSensitiveTenant(const String & tenant) const +{ + return sensitive_permission_tenants->isSensitivePermissionEnabled(tenant); +} bool AccessControlManager::isSensitiveTenant(const String & tenant) const { @@ -445,6 +459,7 @@ ContextAccessParams AccessControlManager::getContextAccessParams( const String & current_database, const ClientInfo & client_info, const String & tenant, + bool has_tenant_id_in_username, bool load_roles) const { ContextAccessParams params; @@ -459,8 +474,9 @@ ContextAccessParams AccessControlManager::getContextAccessParams( params.http_method = client_info.http_method; params.address = client_info.current_address.host(); params.quota_key = client_info.quota_key; - params.has_tenant_id_in_username = !tenant.empty(); - params.enable_sensitive_permission = sensitive_permission_tenants->isSensitivePermissionEnabled(tenant); + params.has_tenant_id_in_username = has_tenant_id_in_username; + params.enable_sensitive_permission = + has_tenant_id_in_username ? isSensitiveTenant(tenant) : false; params.load_roles = load_roles; /// Extract the last entry from comma separated list of X-Forwarded-For addresses. diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 461612a3d07..a92c0321523 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -139,6 +139,7 @@ class AccessControlManager : public MultipleAccessStorage const String & current_database, const ClientInfo & client_info, const String & tenant, + bool has_tenant_id_in_username, bool load_roles) const; std::shared_ptr getContextAccess(const ContextAccessParams & params) const; @@ -170,8 +171,13 @@ class AccessControlManager : public MultipleAccessStorage const ExternalAuthenticators & getExternalAuthenticators() const; + bool isSensitiveGrantee(const String & grantee) const; + std::function sensitive_resource_getter; +private: + bool isSensitiveTenant(const String & tenant) const; + private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 8b0856e57a8..90455108bba 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -192,6 +192,7 @@ namespace }; + /* must be synced with the Level definition in ContextAccess.cpp */ enum Level { GLOBAL_LEVEL, @@ -376,58 +377,59 @@ struct AccessRightsBase::Node return true; } - bool isGranted(const std::unordered_set &, const AccessFlags & flags_) const requires Permission + bool isGranted(int sensitive_level, const AccessFlags & flags_) const requires Permission { + /* sensitive resource is not granted */ + if (level < sensitive_level) + return false; + return isGranted(flags_); } template - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags_, const std::string_view & name, const Args &... subnames) const requires Permission + bool isGranted(int sensitive_level, const AccessFlags & flags_, const std::string_view & name, const Args &... subnames) const requires Permission { AccessFlags flags_to_check = flags_ - min_flags_with_children; + if (!max_flags_with_children.contains(flags_to_check)) + return false; - const Node * child = tryGetChild(name); // to reject, this should fail + const Node * child = tryGetChild(name); if (child) - { - return child->isGranted(sensitive_columns, flags_to_check, subnames...); - } - else - { - auto current_node_name = node_name ? *node_name : "NULL"; - return name == current_node_name && flags.contains(flags_to_check); - } + return child->isGranted(sensitive_level, flags_to_check, subnames...); + + /* sensitive resource is not granted */ + if (level < sensitive_level) + return false; + + return flags.contains(flags_to_check); } template - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags_, const std::vector & names) const requires Permission + bool isGranted(int sensitive_level, const AccessFlags & flags_, const std::unordered_set & names) const requires Permission { AccessFlags flags_to_check = flags_ - min_flags_with_children; + if (!max_flags_with_children.contains(flags_to_check)) + return false; for (const auto & name : names) { const Node * child = tryGetChild(name); if (child) { - if (sensitive_columns.contains(name)) - { - if (!child->isGranted(sensitive_columns, flags_to_check, name) || !flags.contains(flags_to_check)) // For sensitive column, must have permissions granted for both table and the column - return false; - } - else if (!child->isGranted(sensitive_columns, flags_to_check, name)) + if (!child->isGranted(sensitive_level, flags_to_check, name)) return false; } else { - if (sensitive_columns.contains(name)) - { - auto current_node_name = node_name ? *node_name : "NULL"; - if (name != current_node_name || !flags.contains(flags_to_check)) - return false; - } + /* sensitive resource is not granted */ + if (level < sensitive_level) + return false; + if (!flags.contains(flags_to_check)) return false; } } + return true; } @@ -633,8 +635,8 @@ struct AccessRightsBase::Node auto flags_go = node_go ? node_go->flags : parent_fl_go; auto revokes = parent_fl - flags; auto revokes_go = parent_fl_go - flags_go - revokes; - auto grants_go = flags_go - parent_fl_go; - auto grants = flags - parent_fl - grants_go; + auto grants_go = IsSensitive ? flags_go : flags_go - parent_fl_go; + auto grants = IsSensitive ? flags - grants_go : flags - parent_fl - grants_go; if (revokes) res.push_back(ProtoElement{revokes, full_name, false, true}); @@ -1262,14 +1264,14 @@ void AccessRightsBase::logTree() const } template -bool SensitiveAccessRights::isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const Args &... args) const +bool SensitiveAccessRights::isGrantedImpl(int sensitive_level, const AccessFlags & flags, const Args &... args) const { auto helper = [&](const std::unique_ptr & root_node) -> bool { if (!root_node) return flags.isEmpty(); - return root_node->isGranted(sensitive_columns, flags, args...); + return root_node->isGranted(sensitive_level, flags, args...); }; if constexpr (grant_option) return helper(root_with_grant_option); @@ -1278,52 +1280,51 @@ bool SensitiveAccessRights::isGrantedImpl(const std::unordered_set -bool SensitiveAccessRights::isGrantedImplHelper(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const +bool SensitiveAccessRights::isGrantedImplHelper(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const { assert(!element.grant_option || grant_option); if (element.any_database) - return isGrantedImpl(sensitive_columns, element.access_flags); + return isGrantedImpl(sensitive_level, element.access_flags); else if (element.any_table) - return isGrantedImpl(sensitive_columns, element.access_flags, element.database); + return isGrantedImpl(sensitive_level, element.access_flags, element.database); else if (element.any_column) - return isGrantedImpl(sensitive_columns, element.access_flags, element.database, element.table); + return isGrantedImpl(sensitive_level, element.access_flags, element.database, element.table); else - return isGrantedImpl(sensitive_columns, element.access_flags, element.database, element.table, element.columns); + return isGrantedImpl(sensitive_level, element.access_flags, element.database, element.table, sensitive_columns); } template -bool SensitiveAccessRights::isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const +bool SensitiveAccessRights::isGrantedImpl(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const { if constexpr (grant_option) { - return isGrantedImplHelper(sensitive_columns, element); + return isGrantedImplHelper(sensitive_level, sensitive_columns, element); } else { if (element.grant_option) - return isGrantedImplHelper(sensitive_columns, element); + return isGrantedImplHelper(sensitive_level, sensitive_columns, element); else - return isGrantedImplHelper(sensitive_columns, element); + return isGrantedImplHelper(sensitive_level, sensitive_columns, element); } } template -bool SensitiveAccessRights::isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const +bool SensitiveAccessRights::isGrantedImpl(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!isGrantedImpl(sensitive_columns, element)) + if (!isGrantedImpl(sensitive_level, sensitive_columns, element)) return false; return true; } - -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags) const { return isGrantedImpl(sensitive_columns, flags); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(sensitive_columns, flags, database); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(sensitive_columns, flags, database, table); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(sensitive_columns, flags, database, table, column); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isGrantedImpl(sensitive_columns, flags, database, table, columns); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isGrantedImpl(sensitive_columns, flags, database, table, columns); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const { return isGrantedImpl(sensitive_columns, element); } -bool SensitiveAccessRights::isGranted(const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const { return isGrantedImpl(sensitive_columns, elements); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set &, const AccessFlags & flags) const { return isGrantedImpl(sensitive_level, flags); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set &, const AccessFlags & flags, const std::string_view & database) const { return isGrantedImpl(sensitive_level, flags, database); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set &, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const { return isGrantedImpl(sensitive_level, flags, database, table); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set &, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isGrantedImpl(sensitive_level, flags, database, table, column); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector &) const { return isGrantedImpl(sensitive_level, flags, database, table, sensitive_columns); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings &) const { return isGrantedImpl(sensitive_level, flags, database, table, sensitive_columns); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const { return isGrantedImpl(sensitive_level, sensitive_columns, element); } +bool SensitiveAccessRights::isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const { return isGrantedImpl(sensitive_level, sensitive_columns, elements); } template class AccessRightsBase; template class AccessRightsBase; diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index 2eeea796a90..ad75fb3f3ff 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -191,26 +191,27 @@ class SensitiveAccessRights : public AccessRightsBase public: using Base = AccessRightsBase; using Base::Base; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; - bool isGranted(const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const; + + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessFlags & flags, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; + bool isGranted(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const; private: template - bool isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessFlags & flags, const Args &... args) const; + bool isGrantedImpl(int sensitive_level, const AccessFlags & flags, const Args &... args) const; template - bool isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; + bool isGrantedImpl(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; template - bool isGrantedImpl(const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const; + bool isGrantedImpl(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElements & elements) const; template - bool isGrantedImplHelper(const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; + bool isGrantedImplHelper(int sensitive_level, const std::unordered_set & sensitive_columns, const AccessRightsElement & element) const; }; } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index f9eb6b48258..084fce7b878 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -40,6 +40,15 @@ namespace ErrorCodes namespace { + /* must be synced with the Level definition in AccessRights.cpp */ + enum Level + { + GLOBAL_LEVEL, + DATABASE_LEVEL, + TABLE_LEVEL, + COLUMN_LEVEL, + }; + static const std::unordered_set always_accessible_tables { /// Constant tables "one", @@ -388,7 +397,8 @@ void ContextAccess::calculateAccessRights() const } LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection); LOG_TRACE(trace_log, "List of all grants: {}", access->toString()); - LOG_TRACE(trace_log, "List of all sensitive grants: {}", sensitive_access->toString()); + if (params.enable_sensitive_permission) + LOG_TRACE(trace_log, "List of all sensitive grants: {}", sensitive_access->toString()); LOG_TRACE(trace_log, "List of all grants including implicit: {}", access_with_implicit->toString()); } } @@ -514,7 +524,7 @@ std::shared_ptr ContextAccess::getSensitiveAccessRi return nothing_granted; } -bool ContextAccess::isSensitiveImpl(std::unordered_set & cols, const std::string_view & database, const std::string_view & table = {}, const std::vector & columns = {}) const +int ContextAccess::isSensitiveImpl(std::unordered_set & cols, const std::string_view & database, const std::string_view & table = {}, const std::vector & columns = {}) const { auto sensitive_resource = manager->sensitive_resource_getter(formatTenantDatabaseName(std::string(database))); if (!sensitive_resource) @@ -537,7 +547,7 @@ bool ContextAccess::isSensitiveImpl(std::unordered_set & cols, } if (!cols.empty()) - return true; + return COLUMN_LEVEL; } } @@ -545,33 +555,21 @@ bool ContextAccess::isSensitiveImpl(std::unordered_set & cols, { for (auto & sensitive_table : sensitive_resource->tables()) { - if (sensitive_table.table() == table) - return sensitive_table.is_sensitive(); + if (sensitive_table.table() != table) + continue; + + if (sensitive_table.is_sensitive()) + return TABLE_LEVEL; } } if (!database.empty()) { - return sensitive_resource->is_sensitive(); + if (sensitive_resource->is_sensitive()) + return DATABASE_LEVEL; } - return false; -} - -template -bool ContextAccess::checkSensitivePermissions(std::unordered_set & cols, const Args &... args) const -{ - auto tenant_id = getCurrentTenantId(); - - // Only apply sensitive permission checks on tenanted users only - if (tenant_id.empty()) - return false; - - // Only enable sensitive permission check for selected tenants - if (!params.enable_sensitive_permission) - return false; - - return isSensitive(cols, args...); + return GLOBAL_LEVEL; } template @@ -658,21 +656,42 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args auto acs = getAccessRightsWithImplicit(); bool granted; bool check_sensitive_permissions = false; - std::unordered_set sensitive_columns; if constexpr (grant_option) granted = acs->hasGrantOption(flags, args...); - else if (checkSensitivePermissions(sensitive_columns, args...)) - { - check_sensitive_permissions = true; - granted = getSensitiveAccessRights()->isGranted(sensitive_columns, flags, args...); - } else granted = acs->isGranted(flags, args...); if (granted) granted = checkTenantsAccess(args...); + while (granted) + { + auto tenant_id = getCurrentTenantId(); + + // Only apply sensitive permission checks on tenanted users only + if (tenant_id.empty()) + break; + + // Only enable sensitive permission check for selected tenants + if (!params.enable_sensitive_permission) + break; + + if (roles_info->is_admin) + break; + + std::unordered_set sensitive_columns; + int sensitive_level = isSensitive(sensitive_columns, args...); + + if (sensitive_level != GLOBAL_LEVEL) + { + check_sensitive_permissions = true; + //std::vector cols{sensitive_columns.begin(), sensitive_columns.end()}; + granted = getSensitiveAccessRights()->isGranted(sensitive_level, sensitive_columns, flags, args...); + } + break; + } + if (!granted) { if (grant_option && acs->isGranted(flags, args...)) @@ -687,7 +706,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args return access_denied( "Not enough privileges. To execute this query it's necessary to have grant" - + (check_sensitive_permissions && !grant_option ? std::string("(sensitive) ") : std::string(" ")) + + (check_sensitive_permissions && !grant_option ? std::string(" SENSITIVE ") : std::string(" ")) + AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : ""), ErrorCodes::ACCESS_DENIED); } @@ -800,12 +819,12 @@ bool ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const return true; } -bool ContextAccess::isSensitive(std::unordered_set & /*cols*/) const { return false; } -bool ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database) const { return isSensitiveImpl(cols, database); } -bool ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table) const { return isSensitiveImpl(cols, database, table); } -bool ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isSensitiveImpl(cols, database, table, {column}); } -bool ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isSensitiveImpl(cols, database, table, columns); } -bool ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isSensitiveImpl(cols, database, table, {columns.begin(), columns.end()}); } +int ContextAccess::isSensitive(std::unordered_set & /*cols*/) { return GLOBAL_LEVEL; } +int ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database) const { return isSensitiveImpl(cols, database); } +int ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table) const { return isSensitiveImpl(cols, database, table); } +int ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::string_view & column) const { return isSensitiveImpl(cols, database, table, {column}); } +int ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const { return isSensitiveImpl(cols, database, table, columns); } +int ContextAccess::isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const Strings & columns) const { return isSensitiveImpl(cols, database, table, {columns.begin(), columns.end()}); } bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(flags); } bool ContextAccess::isGranted(const AccessFlags & flags, const std::string_view & database) const { return checkAccessImpl(flags, database); } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 30028d4e607..91098e95ef3 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -118,13 +118,13 @@ class ContextAccess : public std::enable_shared_from_this void checkGrantOption(const AccessRightsElement & element) const; void checkGrantOption(const AccessRightsElements & elements) const; - bool isSensitiveImpl(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - bool isSensitive(std::unordered_set & cols) const; - bool isSensitive(std::unordered_set & cols, const std::string_view & database) const; - bool isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table) const; - bool isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; - bool isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; - bool isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const Strings & columns) const; + int isSensitiveImpl(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + static int isSensitive(std::unordered_set & cols); + int isSensitive(std::unordered_set & cols, const std::string_view & database) const; + int isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table) const; + int isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::string_view & column) const; + int isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const std::vector & columns) const; + int isSensitive(std::unordered_set & cols, const std::string_view & database, const std::string_view & table, const Strings & columns) const; /// Checks if a specified access is granted, and returns false if not. /// Empty database means the current database. @@ -220,9 +220,6 @@ class ContextAccess : public std::enable_shared_from_this template bool checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const; - template - bool checkSensitivePermissions(std::unordered_set & cols, const Args &... args) const; - const AccessControlManager * manager = nullptr; const Params params; bool is_full_access = false; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 11b2065a0a0..0d017d55681 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -136,6 +136,7 @@ namespace std::shared_ptr quota; std::shared_ptr profile; AccessEntityPtr res; + bool sensitive_tenant = false; for (const auto & query : queries) { @@ -176,12 +177,16 @@ namespace } else if (auto * grant_query = query->as()) { + /* sensitive permissions were serialized first */ + if (grant_query->is_sensitive) + sensitive_tenant = true; + if (!user && !role) throw Exception("A user or role should be attached before grant in file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); if (user) - InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query); + InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query, sensitive_tenant); else - InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query); + InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query, sensitive_tenant); } else throw Exception("No interpreter found for query " + query->getID(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); @@ -215,7 +220,11 @@ namespace ASTs queries; queries.push_back(InterpreterShowCreateAccessEntityQuery::getAttachQuery(entity)); if ((entity.getType() == EntityType::USER) || (entity.getType() == EntityType::ROLE)) - boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity)); + { + /* The true/false order must be kept, to be used for detecting sensitive tenant in KVAccessStorage.cpp */ + boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity, true)); + boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity, false)); + } /// Serialize the list of ATTACH queries to a string. WriteBufferFromOwnString buf; @@ -396,7 +405,7 @@ void DiskAccessStorage::clear() { entries_by_id.clear(); for (auto type : collections::range(EntityType::MAX)) - // collections::range(MAX_CONDITION_TYPE) give us a range of [0, MAX_CONDITION_TYPE) + // collections::range(MAX_CONDITION_TYPE) give us a range of [0, MAX_CONDITION_TYPE) // coverity[overrun-local] entries_by_name_and_type[static_cast(type)].clear(); } diff --git a/src/Access/EnabledRolesInfo.h b/src/Access/EnabledRolesInfo.h index 1795a573bee..872822d7dd4 100644 --- a/src/Access/EnabledRolesInfo.h +++ b/src/Access/EnabledRolesInfo.h @@ -20,6 +20,7 @@ struct EnabledRolesInfo AccessRights access; SensitiveAccessRights sensitive_access; SettingsProfileElements settings_from_enabled_roles; + bool is_admin = false; Strings getCurrentRolesNames() const; Strings getEnabledRolesNames() const; diff --git a/src/Access/KVAccessStorage.cpp b/src/Access/KVAccessStorage.cpp index 0abc7493877..cf99fa5f373 100644 --- a/src/Access/KVAccessStorage.cpp +++ b/src/Access/KVAccessStorage.cpp @@ -132,6 +132,7 @@ namespace std::shared_ptr quota; std::shared_ptr profile; AccessEntityPtr res; + bool sensitive_tenant = false; for (const auto & query : queries) { @@ -172,12 +173,16 @@ namespace } else if (auto * grant_query = query->as()) { + /* sensitive permissions were serialized first */ + if (grant_query->is_sensitive) + sensitive_tenant = true; + if (!user && !role) throw Exception("A user or role should be attached before grant in sql: " + create_sql, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); if (user) - InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query); + InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query, sensitive_tenant); else - InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query); + InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query, sensitive_tenant); } else throw Exception("No interpreter found for query " + query->getID(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); @@ -197,7 +202,11 @@ namespace ASTs queries; queries.push_back(InterpreterShowCreateAccessEntityQuery::getAttachQuery(entity)); if ((entity.getType() == EntityType::USER) || (entity.getType() == EntityType::ROLE)) - boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity)); + { + /* The true/false order must be kept, to be used for detecting sensitive tenant in KVAccessStorage.cpp */ + boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity, true)); + boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity, false)); + } /// Serialize the list of ATTACH queries to a string. WriteBufferFromOwnString buf; @@ -211,7 +220,10 @@ namespace class ConcurrentAccessGuard { public: - ConcurrentAccessGuard(const UUID &uuid) + ConcurrentAccessGuard & operator=(const ConcurrentAccessGuard &) = delete; + ConcurrentAccessGuard(const ConcurrentAccessGuard &) = delete; + ConcurrentAccessGuard() = delete; + explicit ConcurrentAccessGuard(const UUID &uuid) { { std::scoped_lock lock(map_mtx); @@ -554,9 +566,9 @@ void KVAccessStorage::updateImpl(const UUID & uuid, const UpdateFunc & update_fu if (new_entity_model.commit_time() < entry->commit_time) throw Exception("Concurrent rbac update, model had been overwritten by another server", ErrorCodes::CONCURRENT_RBAC_UPDATE); - entry->entity = new_entity; + entry->entity = std::move(new_entity); entry->commit_time = new_entity_model.commit_time(); - entry->entity_model = new_entity_model; + entry->entity_model = std::move(new_entity_model); if (name_changed) { diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index 96c41b0f9f7..db2d6a22d9e 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -43,6 +43,8 @@ namespace roles_info.enabled_roles_with_admin_option.emplace(role_id); roles_info.names_of_roles[role_id] = role->getName(); + if (roles_info.names_of_roles[role_id].ends_with("AccountAdmin") && is_current_role) + roles_info.is_admin = true; roles_info.access.makeUnion(role->access); roles_info.sensitive_access.makeUnion(role->sensitive_access); roles_info.settings_from_enabled_roles.merge(role->settings); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e62ba863145..2d0d6b12990 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1620,9 +1620,11 @@ void Context::setUser(const Credentials & credentials, const Poco::Net::SocketAd client_info.current_password = basic_credentials->getPassword(); //#endif + String tenant = getTenantId(); params = getAccessControlManager().getContextAccessParams( new_user_id, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info, - has_tenant_id_in_username ? tenant_id : "", + tenant, + has_tenant_id_in_username, getServerType() != ServerType::cnch_server); } @@ -1737,7 +1739,7 @@ void Context::calculateAccessRightsWithLock(const std::unique_lock { auto params = getAccessControlManager().getContextAccessParams( *user_id, current_roles, use_default_roles, settings, current_database, client_info, - has_tenant_id_in_username ? tenant_id : "", false); + tenant_id, has_tenant_id_in_username, false); access = getAccessControlManager().getContextAccess(params); } } diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index b3472ee3096..e831eb9825b 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -26,7 +26,8 @@ namespace void updateFromQueryTemplate( T & grantee, const ASTGrantQuery & query, - const std::vector & roles_to_grant_or_revoke) + const std::vector & roles_to_grant_or_revoke, + bool sensitive_tenant) { if (!query.access_rights_elements.empty()) { @@ -34,19 +35,25 @@ namespace { if (query.if_exists) { - grantee.access.tryRevoke(query.access_rights_elements); - grantee.sensitive_access.tryRevoke(query.access_rights_elements); + if (!query.is_sensitive) + grantee.access.tryRevoke(query.access_rights_elements); + if (sensitive_tenant) + grantee.sensitive_access.tryRevoke(query.access_rights_elements); } else { - grantee.access.revoke(query.access_rights_elements); - grantee.sensitive_access.revoke(query.access_rights_elements); + if (!query.is_sensitive) + grantee.access.revoke(query.access_rights_elements); + if (sensitive_tenant) + grantee.sensitive_access.revoke(query.access_rights_elements); } } else { - grantee.access.grant(query.access_rights_elements); - grantee.sensitive_access.grant(query.access_rights_elements); + if (!query.is_sensitive) + grantee.access.grant(query.access_rights_elements); + if (sensitive_tenant) + grantee.sensitive_access.grant(query.access_rights_elements); } } @@ -72,12 +79,13 @@ namespace void updateFromQueryImpl( IAccessEntity & grantee, const ASTGrantQuery & query, - const std::vector & roles_to_grant_or_revoke) + const std::vector & roles_to_grant_or_revoke, + bool sensitive_tenant) { if (auto * user = typeid_cast(&grantee)) - updateFromQueryTemplate(*user, query, roles_to_grant_or_revoke); + updateFromQueryTemplate(*user, query, roles_to_grant_or_revoke, sensitive_tenant); else if (auto * role = typeid_cast(&grantee)) - updateFromQueryTemplate(*role, query, roles_to_grant_or_revoke); + updateFromQueryTemplate(*role, query, roles_to_grant_or_revoke, sensitive_tenant); } void checkGranteeIsAllowed(const ContextAccess & access, const UUID & grantee_id, const IAccessEntity & grantee) @@ -275,10 +283,11 @@ BlockIO InterpreterGrantQuery::execute() checkGrantOption(access_control, *getContext()->getAccess(), query, grantees); /// Update roles and users listed in `grantees`. - auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr + auto update_func = [&, ctx = getContext()](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); - updateFromQueryImpl(*clone, query, roles); + bool sensitive_tenant = ctx->getAccessControlManager().isSensitiveGrantee(clone->getName()); + updateFromQueryImpl(*clone, query, roles, sensitive_tenant); return clone; }; @@ -288,21 +297,21 @@ BlockIO InterpreterGrantQuery::execute() } -void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery & query) +void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery & query, bool sensitive_tenant) { std::vector roles_to_grant_or_revoke; if (query.roles) roles_to_grant_or_revoke = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(user, query, roles_to_grant_or_revoke); + updateFromQueryImpl(user, query, roles_to_grant_or_revoke, sensitive_tenant); } -void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery & query) +void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery & query, bool sensitive_tenant) { std::vector roles_to_grant_or_revoke; if (query.roles) roles_to_grant_or_revoke = RolesOrUsersSet{*query.roles}.getMatchingIDs(); - updateFromQueryImpl(role, query, roles_to_grant_or_revoke); + updateFromQueryImpl(role, query, roles_to_grant_or_revoke, sensitive_tenant); } void InterpreterGrantQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr) const diff --git a/src/Interpreters/InterpreterGrantQuery.h b/src/Interpreters/InterpreterGrantQuery.h index abaddcc599b..fe24361a005 100644 --- a/src/Interpreters/InterpreterGrantQuery.h +++ b/src/Interpreters/InterpreterGrantQuery.h @@ -19,8 +19,8 @@ class InterpreterGrantQuery : public IInterpreter, WithMutableContext BlockIO execute() override; - static void updateUserFromQuery(User & user, const ASTGrantQuery & query); - static void updateRoleFromQuery(Role & role, const ASTGrantQuery & query); + static void updateUserFromQuery(User & user, const ASTGrantQuery & query, bool sensitive_tenant); + static void updateRoleFromQuery(Role & role, const ASTGrantQuery & query, bool sensitive_tenant); void extendQueryLogElemImpl(QueryLogElement &, const ASTPtr &, ContextPtr) const override; private: diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp index 5533e07c415..2ea8394d75e 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -83,7 +83,11 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const { create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control)); if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::ROLE)) - boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); + { + /* The true/false order must be kept, to be used for detecting sensitive tenant in KVAccessStorage.cpp */ + boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control, true)); + boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control, false)); + } } ASTs result = std::move(create_queries); diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index b274e2775fa..3cfa8d9814b 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -56,6 +56,7 @@ namespace if (!current_query) { current_query = std::make_shared(); + current_query->is_sensitive = sensitive_mode; current_query->grantees = grantees; current_query->attach_mode = attach_mode; if (element.is_partial_revoke) @@ -66,6 +67,9 @@ namespace current_query->access_rights_elements.emplace_back(std::move(element)); } + if (sensitive_mode) + return res; + for (const auto & element : grantee.granted_roles.getElements()) { if (element.empty()) @@ -167,15 +171,19 @@ ASTs InterpreterShowGrantsQuery::getGrantQueries() const ASTs grant_queries; for (const auto & entity : entities) - boost::range::push_back(grant_queries, getGrantQueries(*entity, access_control)); + { + /* The true/false order must be kept, to be used for detecting sensitive tenant in KVAccessStorage.cpp */ + boost::range::push_back(grant_queries, getGrantQueries(*entity, access_control, true)); + boost::range::push_back(grant_queries, getGrantQueries(*entity, access_control, false)); + } return grant_queries; } -ASTs InterpreterShowGrantsQuery::getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control) +ASTs InterpreterShowGrantsQuery::getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control, bool sensitive_mode) { - return getGrantQueriesImpl(user_or_role, &access_control, false); + return getGrantQueriesImpl(user_or_role, &access_control, false, sensitive_mode); } diff --git a/src/Interpreters/InterpreterShowGrantsQuery.h b/src/Interpreters/InterpreterShowGrantsQuery.h index 9550bbdf387..8113f15a3ea 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.h +++ b/src/Interpreters/InterpreterShowGrantsQuery.h @@ -20,8 +20,8 @@ class InterpreterShowGrantsQuery : public IInterpreter, WithContext BlockIO execute() override; - static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control); - static ASTs getAttachGrantQueries(const IAccessEntity & user_or_role, bool sensitive_mode = false); + static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control, bool sensitive_mode); + static ASTs getAttachGrantQueries(const IAccessEntity & user_or_role, bool sensitive_mode); bool ignoreQuota() const override { return true; } bool ignoreLimits() const override { return true; } diff --git a/src/Parsers/ASTGrantQuery.cpp b/src/Parsers/ASTGrantQuery.cpp index baa5045704e..cd547e29a7b 100644 --- a/src/Parsers/ASTGrantQuery.cpp +++ b/src/Parsers/ASTGrantQuery.cpp @@ -108,6 +108,12 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (attach_mode ? "ATTACH " : "") << (is_revoke ? "REVOKE" : "GRANT") << (settings.hilite ? IAST::hilite_none : ""); + if (is_sensitive) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (" SENSITIVE") + << (settings.hilite ? IAST::hilite_none : ""); + } + if (!access_rights_elements.sameOptions()) throw Exception("Elements of an ASTGrantQuery are expected to have the same options", ErrorCodes::LOGICAL_ERROR); if (!access_rights_elements.empty() && access_rights_elements[0].is_partial_revoke && !is_revoke) @@ -118,6 +124,8 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F if (is_revoke) { + if (if_exists) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " IF EXISTS" << (settings.hilite ? hilite_none : ""); if (grant_option) settings.ostr << (settings.hilite ? hilite_keyword : "") << " GRANT OPTION FOR" << (settings.hilite ? hilite_none : ""); else if (admin_option) @@ -180,7 +188,7 @@ void ASTGrantQuery::rewriteNamesWithTenant(const Context *) } tenant_rewritten = true; - } + } } void ASTGrantQuery::rewriteNamesWithoutTenant(const Context *) diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/ASTGrantQuery.h index bc56cb26b45..a36dbaf77d9 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/ASTGrantQuery.h @@ -41,6 +41,7 @@ class ASTGrantQuery : public IAST { public: bool attach_mode = false; + bool is_sensitive = false; bool is_revoke = false; bool if_exists = false; AccessRightsElements access_rights_elements; diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 1b6f8c1e631..51eb4bcca15 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -243,6 +243,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) // String cluster; // parseOnCluster(pos, expected, cluster); + bool is_sensitive = false; + if (ParserKeyword{"SENSITIVE"}.ignore(pos, expected)) + is_sensitive = true; bool if_exists = false; bool grant_option = false; @@ -300,6 +303,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto query = std::make_shared(); node = query; + query->is_sensitive = is_sensitive; query->if_exists = if_exists; query->is_revoke = is_revoke; query->attach_mode = attach_mode; diff --git a/src/Parsers/formatTenantDatabaseName.cpp b/src/Parsers/formatTenantDatabaseName.cpp index 43a5956adde..d6d9cb8afca 100644 --- a/src/Parsers/formatTenantDatabaseName.cpp +++ b/src/Parsers/formatTenantDatabaseName.cpp @@ -66,7 +66,7 @@ String formatTenantName(const String & name, char separator) { auto tenant_id = getCurrentTenantId(); if (!tenant_id.empty() && - (name.find(tenant_id) != 0 || name.size() == tenant_id.size() || name[tenant_id.size()] != separator)) + (!name.starts_with(tenant_id) || name.size() == tenant_id.size() || name[tenant_id.size()] != separator)) { String result = tenant_id; result += separator; @@ -81,7 +81,7 @@ static String formatTenantDatabaseNameImpl(const String & database_name, char se { auto tenant_id = getCurrentTenantId(); if (!tenant_id.empty() && !isInternalDatabaseName(database_name) && - (database_name.find(tenant_id) != 0 || database_name.size() == tenant_id.size() || database_name[tenant_id.size()] != separator)) + (!database_name.starts_with(tenant_id) || database_name.size() == tenant_id.size() || database_name[tenant_id.size()] != separator)) { String result = tenant_id; result += separator; @@ -96,7 +96,7 @@ static String formatTenantUserNameImpl(const String & user_name, char separator { auto tenant_id = getCurrentTenantId(); if (!tenant_id.empty() && - (user_name.find(tenant_id) != 0 || user_name.size() == tenant_id.size() || user_name[tenant_id.size()] != separator)) + (!user_name.starts_with(tenant_id) || user_name.size() == tenant_id.size() || user_name[tenant_id.size()] != separator)) { String result = tenant_id; result += separator; @@ -130,7 +130,7 @@ String appendTenantIdOnly(const String& name, bool is_datbase_name) String formatTenantDatabaseNameWithTenantId(const String & database_name, const String & tenant_id, char separator) { - if (!tenant_id.empty() && !isInternalDatabaseName(database_name) && database_name.find(tenant_id) != 0) + if (!tenant_id.empty() && !isInternalDatabaseName(database_name) && !database_name.starts_with(tenant_id)) { String result = tenant_id; result += separator; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 78232a92a1f..53a0ab7b8d4 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -474,6 +474,42 @@ void HTTPHandler::processQuery( using namespace Poco::Net; LOG_TRACE(log, "Request URI: {}", request.getURI()); + std::string tenant_id = params.getParsed("tenant_id", ""); + std::string database = request.get("X-ClickHouse-Database", ""); + if (database.empty()) + database = params.getParsed("database", ""); + + if (auto pos = database.find('`'); pos != String::npos) + { + //CNCH multi-tenant default database pattern from gateway client: {tenant_id}`{default_database} + //Even this is a GET request or with "readonly=1" setting, we force to apply the tenant_id setting change. + auto tenant_id_from_db = String(database.c_str(), pos); + if (tenant_id.empty()) + tenant_id = tenant_id_from_db; + else if (tenant_id != tenant_id_from_db && !tenant_id_from_db.empty()) + throw Exception("tenant id " + tenant_id + " from setting doesn't match tenant id from database " + tenant_id_from_db, ErrorCodes::UNKNOWN_USER); + + ///multi-tenant default database storage pattern: {tenant_id}.{database} + if (pos + 1 != database.size()) + { + auto sub_str = database.substr(pos + 1); + if (sub_str == "default" || sub_str == "system") + database = std::move(sub_str); + else + database[pos] = '.'; + } + else /// {tenant_id}` + database.clear(); + } + + if (!database.empty()) + context->setCurrentDatabase(database); + + if (!tenant_id.empty()) + { + context->setSetting("tenant_id", tenant_id); + context->setTenantId(tenant_id); + } if (!authenticateUser(context, request, params, response)) return; // '401 Unauthorized' response with 'Negotiate' has been sent at this point. @@ -702,28 +738,20 @@ void HTTPHandler::processQuery( reserved_param_suffixes.emplace_back("_structure"); } - std::string database = request.get("X-ClickHouse-Database", ""); std::string default_format = request.get("X-ClickHouse-Format", ""); SettingsChanges settings_changes; for (const auto & [key, value] : params) { if (key == "database") - { - if (database.empty()) - database = value; - } + continue; else if (key == "default_format") { if (default_format.empty()) default_format = value; } else if (key == "tenant_id") - { - //Even this is a GET request or with "readonly=1" setting, we force to apply the tenant_id setting change. - context->setSetting("tenant_id", value); - context->setTenantId(value); - } + continue; else if (param_could_be_skipped(key)) { } @@ -735,31 +763,6 @@ void HTTPHandler::processQuery( } } - if (!database.empty()) - { - auto &default_database = database; - auto &connection_context = context; - //CNCH multi-tenant default database pattern from gateway client: {tenant_id}`{default_database} - if (auto pos = default_database.find('`'); pos != String::npos) - { - //Even this is a GET request or with "readonly=1" setting, we force to apply the tenant_id setting change. - connection_context->setSetting("tenant_id", String(default_database.c_str(), pos)); - connection_context->setTenantId(String(default_database.c_str(), pos)); - if (pos + 1 != default_database.size()) ///multi-tenant default database storage pattern: {tenant_id}.{default_database} - { - auto sub_str = default_database.substr(pos + 1); - if (sub_str == "default" || sub_str == "system") - default_database = std::move(sub_str); - else - default_database[pos] = '.'; - } - else /// {tenant_id}` - default_database.clear(); - } - if (!default_database.empty()) - connection_context->setCurrentDatabase(default_database); - } - if (!default_format.empty()) context->setDefaultFormat(default_format); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index d5424912d12..9d9bd66535e 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -262,11 +262,6 @@ void MySQLHandler::run() if (!(client_capabilities & CLIENT_PROTOCOL_41)) throw Exception("Required capability: CLIENT_PROTOCOL_41.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); - handshake_response.username = connection_context->formatUserName(handshake_response.username); - authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response); - - connection_context->getClientInfo().initial_user = handshake_response.username; - try { auto &default_database = handshake_response.database; @@ -303,6 +298,11 @@ void MySQLHandler::run() packet_endpoint->sendPacket(ERRPacket(exc.code(), "HY000", exc.message()), true); } + handshake_response.username = connection_context->formatUserName(handshake_response.username); + authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response); + + connection_context->getClientInfo().initial_user = handshake_response.username; + OKPacket ok_packet(0, handshake_response.capability_flags, 0, 0, 0); packet_endpoint->sendPacket(ok_packet, true); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 6b48a79e877..fc331b8f307 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -221,32 +221,14 @@ void TCPHandler::runImpl() /// When connecting, the default database can be specified. if (!default_database.empty()) { - //CNCH multi-tenant default database pattern from gateway client: {tenant_id}`{default_database} - if (auto pos = default_database.find('`'); pos != String::npos) - { - connection_context->setSetting("tenant_id", String(default_database.c_str(), pos)); /// {tenant_id}`* - connection_context->setTenantId(String(default_database.c_str(), pos)); - if (pos + 1 != default_database.size()) ///multi-tenant default database storage pattern: {tenant_id}.{default_database} - { - auto sub_str = default_database.substr(pos + 1); - if (sub_str == "default" || sub_str == "system") - default_database = std::move(sub_str); - else - default_database[pos] = '.'; - } - else /// {tenant_id}` - default_database.clear(); - } - - if ((!default_database.empty()) && (!DatabaseCatalog::instance().isDatabaseExist(default_database, connection_context))) + if (!DatabaseCatalog::instance().isDatabaseExist(default_database, connection_context)) { Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString()); sendException(e, connection_context->getSettingsRef().calculate_text_stack_trace); return; } - if (!default_database.empty()) - connection_context->setCurrentDatabase(default_database); + connection_context->setCurrentDatabase(default_database); } UInt64 idle_connection_timeout = connection_settings.idle_connection_timeout; @@ -1179,26 +1161,52 @@ void TCPHandler::receiveHello() throw NetException("Unexpected packet from client (no user in Hello package)", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); String tenant_id_from_db; - String tenant_id_from_user; - if (auto pos = user.find('`'); pos != String::npos) - tenant_id_from_user = String(user.c_str(), pos); - if (!default_database.empty()) + /* need to support below cases: + tenanted user + tenanted db: gateway 2.0 user access + tenanted user + db: internal dictionary access + user + tenanted db: 2.0 server backwards compatible user access + user + db: devops/developers + */ + + if (auto pos = default_database.find('`'); pos != String::npos) { - if (auto pos = default_database.find('`'); pos != String::npos) - tenant_id_from_db = String(default_database.c_str(), pos); + tenant_id_from_db = String(default_database.c_str(), pos); + connection_context->setSetting("tenant_id", tenant_id_from_db); /// {tenant_id}`* + connection_context->setTenantId(tenant_id_from_db); + ///multi-tenant default database storage pattern: {tenant_id}.{default_database} + if (pos + 1 != default_database.size()) + { + auto sub_str = default_database.substr(pos + 1); + if (sub_str == "default" || sub_str == "system") + default_database = std::move(sub_str); + else + default_database[pos] = '.'; + } + else /// {tenant_id}` + { + default_database.clear(); + } } - if (!tenant_id_from_user.empty() && tenant_id_from_db.empty()) + if (auto pos = user.find('`'); pos != String::npos) { - default_database = formatTenantDatabaseNameWithTenantId(default_database, tenant_id_from_user, '`'); - if (auto pos = user.find('`'); pos != String::npos) // remove tenant id for server and worker communication + String tenant_id_from_user = String(user.c_str(), pos); + + if (tenant_id_from_db.empty()) + { + /// internal dictionary access user = user.substr(pos + 1); + + if (!default_database.empty()) + default_database = formatTenantDatabaseNameWithTenantId(default_database, tenant_id_from_user, '`'); + } + else + { + if (!tenant_id_from_user.empty() && tenant_id_from_user != tenant_id_from_db) + throw NetException("Tenant ID of user and default database are not matching", ErrorCodes::LOGICAL_ERROR); + } } - // else if (tenant_id_from_user.empty() && !tenant_id_from_db.empty()) - // user = tenant_id_from_db + '`' + user; - else if (!tenant_id_from_user.empty() && !tenant_id_from_db.empty() && tenant_id_from_user != tenant_id_from_db) - throw NetException("Tenant ID of user and default database are not matching", ErrorCodes::LOGICAL_ERROR); LOG_DEBUG( log, diff --git a/tests/queries/4_cnch_stateless/01074_partial_revokes.reference b/tests/queries/4_cnch_stateless/01074_partial_revokes.reference index 6d8f46fdbab..5a5f2a2e928 100644 --- a/tests/queries/4_cnch_stateless/01074_partial_revokes.reference +++ b/tests/queries/4_cnch_stateless/01074_partial_revokes.reference @@ -3,10 +3,14 @@ GRANT SELECT ON *.* TO `1234.test_user_01074` REVOKE SELECT ON `1234.db`.* FROM `1234.test_user_01074` --cleanup --simple 2 +GRANT SENSITIVE SELECT ON `1234.db`.* TO `1234.test_user_01074` +REVOKE SENSITIVE SELECT ON `1234.db`.table FROM `1234.test_user_01074` GRANT SELECT ON `1234.db`.* TO `1234.test_user_01074` REVOKE SELECT ON `1234.db`.table FROM `1234.test_user_01074` --cleanup --simple 3 +GRANT SENSITIVE SELECT ON `1234.db`.table TO `1234.test_user_01074` +REVOKE SENSITIVE SELECT(col1) ON `1234.db`.table FROM `1234.test_user_01074` GRANT SELECT ON `1234.db`.table TO `1234.test_user_01074` REVOKE SELECT(col1) ON `1234.db`.table FROM `1234.test_user_01074` --cleanup @@ -15,6 +19,8 @@ GRANT SELECT ON *.* TO `1234.test_user_01074` REVOKE SELECT(col1, col2) ON `1234.db`.table FROM `1234.test_user_01074` --cleanup --complex 2 +GRANT SENSITIVE SELECT ON `1234.db`.table TO `1234.test_user_01074` +REVOKE SENSITIVE SELECT(col1, col2) ON `1234.db`.table FROM `1234.test_user_01074` GRANT SELECT ON *.* TO `1234.test_user_01074` REVOKE SELECT ON `1234.db`.* FROM `1234.test_user_01074` GRANT SELECT ON `1234.db`.table TO `1234.test_user_01074` @@ -36,6 +42,8 @@ GRANT SELECT ON *.* TO `1234.test_user_01074` REVOKE SELECT ON `1234.db`.* FROM `1234.test_user_01074` --cleanup --revoke 2 +GRANT SENSITIVE SELECT ON `1234.db`.* TO `1234.test_user_01074` +REVOKE SENSITIVE SELECT ON `1234.db`.table FROM `1234.test_user_01074` GRANT SELECT ON *.* TO `1234.test_user_01074` --cleanup --grant option 1 @@ -57,5 +65,9 @@ REVOKE SELECT(col1) ON `1234.db`.table FROM `1234.test_user_01074` GRANT SELECT ON *.* TO `1234.test_user_01074` --cleanup --grant option 4 +GRANT SENSITIVE SELECT ON `1234.db`.* TO `1234.test_user_01074` WITH GRANT OPTION +GRANT SENSITIVE SELECT ON `1234.db`.table TO `1234.test_user_01074` WITH GRANT OPTION +REVOKE SENSITIVE SELECT ON `1234.db`.table FROM `1234.test_user_01074` +GRANT SENSITIVE SELECT(col1, col2) ON `1234.db`.table TO `1234.test_user_01074` WITH GRANT OPTION GRANT SELECT ON *.* TO `1234.test_user_01074` GRANT SELECT ON `1234.db`.* TO `1234.test_user_01074` WITH GRANT OPTION diff --git a/tests/queries/4_cnch_stateless/60000_rbac_sensitive.reference b/tests/queries/4_cnch_stateless/60000_rbac_sensitive.reference index 374e3431cd2..174228c8059 100644 --- a/tests/queries/4_cnch_stateless/60000_rbac_sensitive.reference +++ b/tests/queries/4_cnch_stateless/60000_rbac_sensitive.reference @@ -1,17 +1,8 @@ -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges -Not enough privileges +-- drop db +-- drop table +-- select table +-- select column +-- select denied "1234.my_user",\N,"SELECT","1234.db",\N,\N,0 "1234.my_user",\N,"SELECT","1234.db","tb",\N,0 "1234.my_user",\N,"SELECT","1234.db","tb","id",1 diff --git a/tests/queries/4_cnch_stateless/60000_rbac_sensitive.sh b/tests/queries/4_cnch_stateless/60000_rbac_sensitive.sh index 12eb93f3a56..0624d0eedb0 100755 --- a/tests/queries/4_cnch_stateless/60000_rbac_sensitive.sh +++ b/tests/queries/4_cnch_stateless/60000_rbac_sensitive.sh @@ -6,89 +6,127 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh set -e +[ -v TENANT_ID ] && NEW_USER="${TENANT_ID}\`my_user" -$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS my_user;" -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db;" -$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS db.tb;" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE db;" -$CLICKHOUSE_CLIENT --query "CREATE USER my_user NOT IDENTIFIED;" -$CLICKHOUSE_CLIENT --query "REVOKE ALL ON *.* FROM my_user;" -$CLICKHOUSE_CLIENT --query "REVOKE ALL ON db.* FROM my_user;" - -$CLICKHOUSE_CLIENT --query "SET SENSITIVE DATABASE db = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE TABLE db.tb = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(id) = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(a) = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(b) = 0;" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP USER IF EXISTS my_user; +DROP DATABASE IF EXISTS db; +DROP TABLE IF EXISTS db.tb; -[ -v TENANT_ID ] && NEW_USER="${TENANT_ID}\`my_user" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE DATABASE db = 1;" -$CLICKHOUSE_CLIENT --query "GRANT DROP DATABASE ON *.* TO my_user" +CREATE DATABASE db; +CREATE USER my_user NOT IDENTIFIED; +REVOKE ALL ON *.* FROM my_user; +REVOKE ALL ON db.* FROM my_user; -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "DROP DATABASE db" 2>&1| grep -Fo "Not enough privileges" | uniq +SET SENSITIVE DATABASE db = 0; +SET SENSITIVE TABLE db.tb = 0; +SET SENSITIVE COLUMN db.tb(id) = 0; +SET SENSITIVE COLUMN db.tb(a) = 0; +SET SENSITIVE COLUMN db.tb(b) = 0; +""" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +SET SENSITIVE DATABASE db = 1; +GRANT DROP DATABASE ON *.* TO my_user; +SELECT '-- drop db'; +""" +$CLICKHOUSE_CLIENT --multiline --multiquery --user=$NEW_USER --testmode --query """ +DROP DATABASE db; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT DROP DATABASE ON db.* TO my_user" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "DROP DATABASE db" -$CLICKHOUSE_CLIENT --query "CREATE DATABASE db;" -$CLICKHOUSE_CLIENT --query "CREATE TABLE db.tb (id UInt64, a String, b String) ENGINE = CnchMergeTree ORDER BY id;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE TABLE db.tb = 1" -$CLICKHOUSE_CLIENT --query "GRANT DROP TABLE ON *.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "DROP TABLE db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +CREATE DATABASE db; +CREATE TABLE db.tb (id UInt64, a String, b String) ENGINE = CnchMergeTree ORDER BY id; +SET SENSITIVE TABLE db.tb = 1; +GRANT DROP TABLE ON *.* TO my_user; +SELECT '-- drop table'; +""" +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +DROP TABLE db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT DROP TABLE ON db.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "DROP TABLE db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +DROP TABLE db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT DROP TABLE ON db.tb TO my_user" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "DROP TABLE db.tb" -$CLICKHOUSE_CLIENT --query "CREATE TABLE db.tb (id UInt64, a String, b String) ENGINE = CnchMergeTree ORDER BY id;" -$CLICKHOUSE_CLIENT --query "REVOKE ALL ON db.* FROM my_user;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE TABLE db.tb = 1" -$CLICKHOUSE_CLIENT --query "GRANT SELECT ON *.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +CREATE TABLE db.tb (id UInt64, a String, b String) ENGINE = CnchMergeTree ORDER BY id; +REVOKE ALL ON db.* FROM my_user; +SET SENSITIVE TABLE db.tb = 1; +GRANT SELECT ON *.* TO my_user; +SELECT '-- select table'; +""" +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT id FROM db.tb; -- { serverError ACCESS_DENIED } +SELECT * FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT id FROM db.tb; -- { serverError ACCESS_DENIED } +SELECT * FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.tb TO my_user" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb FORMAT CSV" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb FORMAT CSV" -$CLICKHOUSE_CLIENT --query "REVOKE ALL ON db.* FROM my_user;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(id) = 1" -$CLICKHOUSE_CLIENT --query "GRANT SELECT ON *.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +REVOKE ALL ON db.* FROM my_user; +SET SENSITIVE COLUMN db.tb(id) = 1; +GRANT SELECT ON *.* TO my_user; +SELECT '-- select column'; +""" +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT id FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT id FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.tb TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT id FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "REVOKE ALL ON db.* FROM my_user;" $CLICKHOUSE_CLIENT --query "GRANT SELECT(id) ON db.tb TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.tb TO my_user" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT id FROM db.tb" $CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" -$CLICKHOUSE_CLIENT --query "REVOKE ALL ON db.* FROM my_user;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(id) = 0" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(a) = 1" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(b) = 1" -$CLICKHOUSE_CLIENT --query "GRANT SELECT ON *.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +REVOKE ALL ON db.* FROM my_user; +SET SENSITIVE COLUMN db.tb(id) = 0; +SET SENSITIVE COLUMN db.tb(a) = 1; +SET SENSITIVE COLUMN db.tb(b) = 1; +GRANT SELECT ON *.* TO my_user; +SELECT '-- select denied'; +""" +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT * FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.* TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT * FROM db.tb; -- { serverError ACCESS_DENIED } +""" $CLICKHOUSE_CLIENT --query "GRANT SELECT ON db.tb TO my_user" -$CLICKHOUSE_CLIENT --user=$NEW_USER --query "SELECT * FROM db.tb" 2>&1| grep -Fo "Not enough privileges" | uniq +$CLICKHOUSE_CLIENT --user=$NEW_USER --multiline --multiquery --testmode --query """ +SELECT * FROM db.tb; -- { serverError ACCESS_DENIED } +""" # select from system table -$CLICKHOUSE_CLIENT --query "SELECT * FROM system.sensitive_grants where user_name like '%my_user' FORMAT CSV" - -$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS db.tb;" -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db;" -$CLICKHOUSE_CLIENT --query "DROP USER my_user;" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +SELECT * FROM system.sensitive_grants where user_name like '%my_user' FORMAT CSV; +DROP TABLE IF EXISTS db.tb; +DROP DATABASE IF EXISTS db; +DROP USER my_user; +SET SENSITIVE DATABASE db = 0; +SET SENSITIVE TABLE db.tb = 0; +SET SENSITIVE COLUMN db.tb(id) = 0; +SET SENSITIVE COLUMN db.tb(a) = 0; +SET SENSITIVE COLUMN db.tb(b) = 0; +""" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE DATABASE db = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE TABLE db.tb = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(id) = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(a) = 0;" -$CLICKHOUSE_CLIENT --query "SET SENSITIVE COLUMN db.tb(b) = 0;" From bd683e6dc3c1dc48ee0b278c4dcc2f5bbf651b19 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:07:36 +0000 Subject: [PATCH 098/292] Merge branch 'fix_intersection_of_access_rights_2p2' into 'cnch-2.2' fix(clickhousech@m-3594234201): Fix calculating of intersection of access rights See merge request dp/ClickHouse!23238 --- src/Access/AccessRights.cpp | 18 ++-- src/Access/tests/gtest_access_rights_ops.cpp | 102 +++++++++++++++++++ src/Interpreters/InterpreterGrantQuery.cpp | 64 +++++++++--- 3 files changed, 162 insertions(+), 22 deletions(-) create mode 100644 src/Access/tests/gtest_access_rights_ops.cpp diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 90455108bba..8a1cce5393a 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -741,7 +741,7 @@ struct AccessRightsBase::Node for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags(); + lhs_child.addGrantsRec(rhs.flags, COLUMN_LEVEL); } } } @@ -759,7 +759,7 @@ struct AccessRightsBase::Node for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags &= rhs.flags; + lhs_child.removeGrantsRec(~rhs.flags); } } } @@ -1200,17 +1200,15 @@ void AccessRightsBase::makeIntersection(const AccessRightsBase & root_node, const std::unique_ptr & other_root_node) { if (!root_node) - { - if (other_root_node) - root_node = std::make_unique(*other_root_node); return; - } - if (other_root_node) + if (!other_root_node) { - root_node->makeIntersection(*other_root_node); - if (!root_node->flags && !root_node->children) - root_node = nullptr; + root_node = nullptr; + return; } + root_node->makeIntersection(*other_root_node); + if (!root_node->flags && !root_node->children) + root_node = nullptr; }; helper(root, other.root); helper(root_with_grant_option, other.root_with_grant_option); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp new file mode 100644 index 00000000000..0a985a06aae --- /dev/null +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -0,0 +1,102 @@ +#include +#include + +using namespace DB; + + +TEST(AccessRights, Union) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs.clear(); + rhs.clear(); + rhs.grant(AccessType::SELECT, "db2"); + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), + "GRANT INSERT ON *.*, " + "GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, " + "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, " + "TRUNCATE, OPTIMIZE, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " + "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " + "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " + "SYSTEM RESTORE REPLICA, SYSTEM RECALCULATE METRICS, SYSTEM FLUSH DISTRIBUTED, SYSTEM CONSUME, dictGet ON db1.*"); +} + + +TEST(AccessRights, Intersection) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs.clear(); + rhs.clear(); + lhs.grant(AccessType::SELECT, "db2"); + rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*"); +} diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index e831eb9825b..36524902c7f 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -115,24 +115,28 @@ namespace const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { const auto & elements = query.access_rights_elements; + need_check_grantees_are_allowed = true; if (elements.empty()) + { + /// No access rights to grant or revoke. + need_check_grantees_are_allowed = false; return; + } - /// To execute the command GRANT the current user needs to have the access granted - /// with GRANT OPTION. if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION. access.checkGrantOption(elements); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return; } if (access.hasGrantOption(elements)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the grant option for all the access rights specified for REVOKE. return; } @@ -159,6 +163,7 @@ namespace all_granted_access.makeUnion(user->access); } } + need_check_grantees_are_allowed = false; /// already checked AccessRights required_access; if (elements[0].is_partial_revoke) @@ -180,21 +185,28 @@ namespace } } - std::vector getRoleIDsAndCheckAdminOption( const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, const RolesOrUsersSet & roles_from_query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { - std::vector matching_ids; + need_check_grantees_are_allowed = true; + if (roles_from_query.empty()) + { + /// No roles to grant or revoke. + need_check_grantees_are_allowed = false; + return {}; + } + std::vector matching_ids; if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the roles granted with ADMIN OPTION. matching_ids = roles_from_query.getMatchingIDs(access_control); access.checkAdminOption(matching_ids); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return matching_ids; } @@ -203,7 +215,7 @@ namespace matching_ids = roles_from_query.getMatchingIDs(); if (access.hasAdminOption(matching_ids)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the admin option for all the roles specified for REVOKE. return matching_ids; } } @@ -231,6 +243,7 @@ namespace all_granted_roles.makeUnion(user->granted_roles); } } + need_check_grantees_are_allowed = false; /// already checked const auto & all_granted_roles_set = query.admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); if (roles_from_query.all) @@ -240,6 +253,33 @@ namespace access.checkAdminOption(matching_ids); return matching_ids; } + + void checkGrantOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + checkGrantOption(access_control, access, query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + } + + std::vector getRoleIDsAndCheckAdminOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + auto role_ids = getRoleIDsAndCheckAdminOption( + access_control, access, query, roles_from_query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + return role_ids; + } } @@ -265,7 +305,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding roles granted with admin option. std::vector roles; if (roles_set) - roles = getRoleIDsAndCheckAdminOption(access_control, *getContext()->getAccess(), query, *roles_set, grantees); + roles = getRoleIDsAndCheckAdminOptionAndGrantees(access_control, *getContext()->getAccess(), query, *roles_set, grantees); // if (!query.cluster.empty()) // { @@ -280,7 +320,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding access rights with grant option. if (!query.access_rights_elements.empty()) - checkGrantOption(access_control, *getContext()->getAccess(), query, grantees); + checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), query, grantees); /// Update roles and users listed in `grantees`. auto update_func = [&, ctx = getContext()](const AccessEntityPtr & entity) -> AccessEntityPtr From acb3f0e6f48b2c7f909c806f6373f6a814b13681 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:07:56 +0000 Subject: [PATCH 099/292] Merge 'cherry-pick-mr-23213' into 'cnch-2.2' fix(clickhousech@m-4691623503): [cp] fix active txn list leak for CTAS See merge request: !23257 --- src/Interpreters/InterpreterCreateQuery.cpp | 25 ++++++++----- src/Interpreters/trySetVirtualWarehouse.cpp | 13 ++++++- .../StorageSystemCnchTableTransactions.cpp | 37 +++++++++++++++++++ .../StorageSystemCnchTableTransactions.h | 25 +++++++++++++ .../System/StorageSystemCnchTables.cpp | 2 +- src/Storages/System/attachSystemTables.cpp | 3 +- .../TransactionCoordinatorRcCnch.cpp | 12 +++++- .../TransactionCoordinatorRcCnch.h | 6 +++ .../10800_ctas_txn_finish.reference | 12 ++++++ .../10800_ctas_txn_finish.sql | 25 +++++++++++++ 10 files changed, 144 insertions(+), 16 deletions(-) create mode 100644 src/Storages/System/StorageSystemCnchTableTransactions.cpp create mode 100644 src/Storages/System/StorageSystemCnchTableTransactions.h create mode 100644 tests/queries/4_cnch_stateless/10800_ctas_txn_finish.reference create mode 100644 tests/queries/4_cnch_stateless/10800_ctas_txn_finish.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 01b5113db77..933db40f14d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1670,7 +1670,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) { - /// If the query is a CREATE SELECT, insert the data into the table. + /// If the query is a CREATE SELECT, insert the data into the table via INSERT INTO ... SELECT FROM if (create.select && !create.attach && !create.is_ordinary_view && !create.is_live_view && (!create.is_materialized_view || create.is_populate)) { @@ -1689,22 +1689,27 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) } else { - /// Just run it as new INSET INTO ... SELECT FROM - /// Cannot directly use InterpreterInsertQuery here, because Cnch requires some resouce initialization (txn, vw, session resource) - /// all done in executeQuery now. Directly initialization didn't work. - auto insert_context = Context::createCopy(getContext()->getSessionContext()); - insert_context->makeQueryContext(); - insert_context->setSettings(getContext()->getSettingsRef()); + /// reuse the query context for INSERT instead of creating a new context, + /// because we want the outermost executeQuery to finish the INSERT txn rather than the DDL txn + auto insert_context = getContext()->getQueryContext(); + auto & coordinator = insert_context->getCnchTransactionCoordinator(); + if (insert_context->getCurrentTransaction()) + { + /// finish the last txn (for DDL) and create a new one for INSERT + insert_context->setCurrentTransaction(coordinator.createTransaction()); + } + bool is_internal = true; // TODO @wangtao.2077: review this when internal queries are fully supported by optimizer if (insert_context->getSettingsRef().enable_optimizer && insert_context->getSettingsRef().enable_optimizer_for_create_select) { + /// optimizer doesn't support internal query + is_internal = false; + /// in order to add the insert query to processlist, need to allocate a new query id insert_context->setCurrentQueryId(""); - CurrentThread::attachQueryContext(insert_context); - return executeQuery(insert->formatForErrorMessage(), insert_context, /*internal=*/false); } - return executeQuery(insert->formatForErrorMessage(), insert_context, /*internal=*/true); + return executeQuery(insert->formatForErrorMessage(), insert_context, is_internal); } } diff --git a/src/Interpreters/trySetVirtualWarehouse.cpp b/src/Interpreters/trySetVirtualWarehouse.cpp index ee64dbd2330..31b10baa4a3 100644 --- a/src/Interpreters/trySetVirtualWarehouse.cpp +++ b/src/Interpreters/trySetVirtualWarehouse.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -68,8 +69,9 @@ static bool trySetVirtualWarehouseFromStorageID(const StorageID table_id, Contex LOG_DEBUG( &Poco::Logger::get("trySetVirtualWarehouse"), - "try get warehouse from {}, type is WRITE {}", + "set vw to {} from cnch table {}, type is WRITE {}", vw_name, + table_id.getNameForLogs(), VirtualWarehouseType::Write == vw_type); setVirtualWarehouseByName(vw_name, context); return true; @@ -111,8 +113,9 @@ static bool trySetVirtualWarehouseFromStorageID(const StorageID table_id, Contex LOG_DEBUG( &Poco::Logger::get("trySetVirtualWarehouse"), - "try get warehouse from {}, type is WRITE {}", + "set vw to {} from nested cnch table {}, type is WRITE {}", nested_vw_name, + nested_table->getStorageID().getNameForLogs(), VirtualWarehouseType::Write == vw_type); setVirtualWarehouseByName(nested_vw_name, context); return true; @@ -294,6 +297,12 @@ static bool trySetVirtualWarehouseFromAST(const ASTPtr & ast, ContextMutablePtr if (trySetVirtualWarehouseFromTable(database, refresh_mv->table, context)) return true; } + else if (auto * create = ast->as()) + { + /// No need to set vw for create query. + /// For CTAS, the data filling work is implemented as ASTInsertQuery (insert select) + return false; + } } while (false); diff --git a/src/Storages/System/StorageSystemCnchTableTransactions.cpp b/src/Storages/System/StorageSystemCnchTableTransactions.cpp new file mode 100644 index 00000000000..d01820b124d --- /dev/null +++ b/src/Storages/System/StorageSystemCnchTableTransactions.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +NamesAndTypesList StorageSystemCnchTableTransactions::getNamesAndTypes() +{ + return { + {"table_uuid", std::make_shared()}, + {"txn_id", std::make_shared()}, + {"create_time", std::make_shared()}, + }; +} + +void StorageSystemCnchTableTransactions::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & /*query_info*/) const +{ + if (context->getServerType() != ServerType::cnch_server) + throw Exception("Table system.cnch_table_transctions only support cnch server", ErrorCodes::NOT_IMPLEMENTED); + + auto table_to_xids = context->getCnchTransactionCoordinator().getActiveXIDsPerTable(); + for (const auto & pair : table_to_xids) + { + for (const TxnTimestamp & xid : pair.second) + { + size_t col = 0; + res_columns[col++]->insert(pair.first); + res_columns[col++]->insert(xid.toUInt64()); + res_columns[col++]->insert(xid.toSecond()); + } + } +} +} diff --git a/src/Storages/System/StorageSystemCnchTableTransactions.h b/src/Storages/System/StorageSystemCnchTableTransactions.h new file mode 100644 index 00000000000..55568e35ecb --- /dev/null +++ b/src/Storages/System/StorageSystemCnchTableTransactions.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace DB +{ +class Context; + +/// Used to provide active transaction list for each table running on the CURRENT server +class StorageSystemCnchTableTransactions : public shared_ptr_helper, + public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemCnchTableTransactions"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemCnchTables.cpp b/src/Storages/System/StorageSystemCnchTables.cpp index fadafdb5e61..3e6e977162c 100644 --- a/src/Storages/System/StorageSystemCnchTables.cpp +++ b/src/Storages/System/StorageSystemCnchTables.cpp @@ -174,7 +174,7 @@ Pipe StorageSystemCnchTables::read( Catalog::CatalogPtr cnch_catalog = context->getCnchCatalog(); if (context->getServerType() != ServerType::cnch_server || !cnch_catalog) - throw Exception("Table system.cnch_tables_history only support cnch_server", ErrorCodes::LOGICAL_ERROR); + throw Exception("Table system.cnch_tables only support cnch_server", ErrorCodes::LOGICAL_ERROR); bool require_key_columns = false; bool require_storage = false; diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 54cf329b8d7..7cada3439e1 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -77,7 +77,6 @@ #include #endif #include -#include #include #include "Storages/System/StorageSystemExternalTables.h" @@ -135,6 +134,7 @@ #include #include #include +#include #include #include #include @@ -274,6 +274,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper) attach(system_database, "cnch_trash_items_info_local"); attach(system_database, "cnch_trash_items_info"); attach(system_database, "cnch_table_info"); + attach(system_database, "cnch_table_transactions"); attach(system_database, "cnch_tables_history"); attach(system_database, "cnch_databases"); attach(system_database, "cnch_databases_history"); diff --git a/src/Transaction/TransactionCoordinatorRcCnch.cpp b/src/Transaction/TransactionCoordinatorRcCnch.cpp index 0d38c003721..5fd8d381d82 100644 --- a/src/Transaction/TransactionCoordinatorRcCnch.cpp +++ b/src/Transaction/TransactionCoordinatorRcCnch.cpp @@ -100,8 +100,16 @@ TransactionCnchPtr TransactionCoordinatorRcCnch::createTransaction(const CreateT txn->force_clean_by_dm = opt.force_clean_by_dm; txn->async_post_commit = opt.async_post_commit; - ProfileEvents::increment((opt.read_only ? ProfileEvents::CnchTxnReadTxnCreated : ProfileEvents::CnchTxnWriteTxnCreated)); - LOG_DEBUG(log, "Created txn {}", txn->getTransactionRecord().toString()); + if (opt.read_only) + { + ProfileEvents::increment(ProfileEvents::CnchTxnReadTxnCreated); + LOG_DEBUG(log, "Created read-only txn {}", txn->getTransactionRecord().toString()); + } + else + { + ProfileEvents::increment(ProfileEvents::CnchTxnWriteTxnCreated); + LOG_DEBUG(log, "Created write txn {}", txn->getTransactionRecord().toString()); + } return txn; } diff --git a/src/Transaction/TransactionCoordinatorRcCnch.h b/src/Transaction/TransactionCoordinatorRcCnch.h index 479cb8d2fbb..27b09a78ede 100644 --- a/src/Transaction/TransactionCoordinatorRcCnch.h +++ b/src/Transaction/TransactionCoordinatorRcCnch.h @@ -168,6 +168,12 @@ class TransactionCoordinatorRcCnch : WithContext return active_txn_list; } + auto getActiveXIDsPerTable() const + { + std::lock_guard lock(min_ts_mutex); + return table_to_timestamps; + } + void shutdown() { scan_active_txns_task->deactivate(); diff --git a/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.reference b/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.reference new file mode 100644 index 00000000000..9bbb245d11b --- /dev/null +++ b/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.reference @@ -0,0 +1,12 @@ +test CTAS in non-optimizer +0 +1 +2 +3 +4 +test CTAS in optimizer +0 +1 +2 +3 +4 diff --git a/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.sql b/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.sql new file mode 100644 index 00000000000..05a2d66da58 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10800_ctas_txn_finish.sql @@ -0,0 +1,25 @@ +set dialect_type = 'ANSI'; + +drop table if exists ctas_src; +drop table if exists ctas_dst; + +create table ctas_src (id Int64) engine = CnchMergeTree order by id; +insert into ctas_src select number from numbers(5); + +set enable_optimizer_for_create_select = 0; +select 'test CTAS in non-optimizer'; +create table ctas_dst engine = CnchMergeTree order by id as select id from ctas_src; +select * from ctas_dst; +-- verify no leak in active txn list +select * from cnch(server, system.cnch_table_transactions) where table_uuid = (select uuid from system.cnch_tables where database = currentDatabase() and name = 'ctas_src'); +drop table ctas_dst; + +set enable_optimizer_for_create_select = 1; +select 'test CTAS in optimizer'; +create table ctas_dst engine = CnchMergeTree order by id as select id from ctas_src; +select * from ctas_dst; +-- verify no leak in active txn list +select * from cnch(server, system.cnch_table_transactions) where table_uuid = (select uuid from system.cnch_tables where database = currentDatabase() and name = 'ctas_src'); +drop table ctas_dst; + +drop table if exists ctas_src; From 9fa9a11c53593151f80ae80d480f45148e287558 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:08:13 +0000 Subject: [PATCH 100/292] Merge 'cnch_2.2_update_cache_cfg' into 'cnch-2.2' fix(clickhousech@m-4711213091): Change index cache default argument See merge request: !23233 --- programs/server/Server.cpp | 4 ++-- src/Interpreters/AsynchronousMetrics.cpp | 8 ++++++++ src/Storages/MergeTree/GinIndexStore.h | 7 +++++-- src/Storages/MergeTree/MergeTreeIndexInverted.cpp | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index e5ab12d7248..af4a77245c7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1222,8 +1222,8 @@ int Server::main(const std::vector & /*args*/) /// A cache for gin index store GinIndexStoreCacheSettings ginindex_store_cache_settings; ginindex_store_cache_settings.lru_max_size = config().getUInt64("ginindex_store_cache_size", 5368709120); //5GB - ginindex_store_cache_settings.mapping_bucket_size = config().getUInt64("ginindex_store_cache_bucket", 5000); //5000 - ginindex_store_cache_settings.cache_shard_num = config().getUInt64("ginindex_store_cache_shard", 8); //8 + ginindex_store_cache_settings.mapping_bucket_size = config().getUInt64("ginindex_store_cache_bucket", 1000); //1000 + ginindex_store_cache_settings.cache_shard_num = config().getUInt64("ginindex_store_cache_shard", 2); //2 ginindex_store_cache_settings.lru_update_interval = config().getUInt64("ginindex_store_cache_lru_update_interval", 60); //60 seconds global_context->setGinIndexStoreFactory(ginindex_store_cache_settings); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 43fb7dcd2cd..cab46bbb432 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -722,6 +722,14 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti } } + { + if (auto gin_store_cache = getContext()->getGinIndexStoreFactory()) + { + new_values["GinStoreCacheCount"] = gin_store_cache->count(); + new_values["GinStoreCacheWeight"] = gin_store_cache->weight(); + } + } + #if USE_EMBEDDED_COMPILER { if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache()) diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index 3ae0e4b62be..c3caa6eca07 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -315,12 +315,12 @@ struct GinIndexStoreCacheSettings size_t lru_max_size {5368709120/*5GB*/}; // Cache mapping bucket size - size_t mapping_bucket_size {5000}; + size_t mapping_bucket_size {1000}; // LRU queue update interval in seconds size_t lru_update_interval {60}; - size_t cache_shard_num {8}; + size_t cache_shard_num {2}; }; struct GinIndexStoreWeightFunction @@ -340,6 +340,9 @@ class GinIndexStoreFactory : private boost::noncopyable ///Get GinIndexStore by using index name and data part GinIndexStorePtr get(const String & name, GinDataPartHelperPtr && storage_info); + size_t count() const { return stores_lru_cache.count(); } + size_t weight() const { return stores_lru_cache.weight(); } + /// GinIndexStores indexed by part file path using GinIndexStores = std::unordered_map; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 378422b7aee..fbeda9533fb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -740,7 +740,7 @@ void checkWithNewInvertedIndexArguments(const FieldVector & arguments_) if (config_type != StandardTokenExtractor::getName() || config_value != "{}") { - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown config type {} and value should only {} now", config_type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown config type {} and value should only {{}} now", config_type); } } From 6895d74b9ef2283dea476d9f94dafc557c2303fb Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:08:46 +0000 Subject: [PATCH 101/292] Merge 'feat/push_order_by_desc_to_table_scan_cnch_2.2' into 'cnch-2.2' feat(optimizer@m-4670772522): push order by desc into table scan cnch-2.2 See merge request: !22940 # Conflicts: # src/QueryPlan/SortingStep.cpp --- src/Core/Settings.h | 1 + src/Core/SortDescription.cpp | 15 -- src/Core/SortDescription.h | 6 +- src/Optimizer/Property/Property.cpp | 33 +++ src/Optimizer/Property/Property.h | 11 +- src/Optimizer/Property/PropertyDeriver.cpp | 10 +- src/Optimizer/Property/PropertyMatcher.cpp | 33 ++- src/Optimizer/Property/PropertyMatcher.h | 4 +- src/Optimizer/Rewriter/UseSortingProperty.cpp | 216 +++++++++++---- src/Optimizer/Rewriter/UseSortingProperty.h | 48 +++- .../PushPartialStepThroughExchangeRules.cpp | 61 ++++- .../PushPartialStepThroughExchangeRules.h | 14 + src/Optimizer/Rule/Rule.h | 1 + src/Optimizer/Rule/Rules.cpp | 1 + src/Protos/plan_node.proto | 1 + src/QueryPlan/GraphvizPrinter.cpp | 5 + src/QueryPlan/PlanPrinter.cpp | 8 + src/QueryPlan/SortingStep.cpp | 71 +++-- src/QueryPlan/SortingStep.h | 3 +- src/QueryPlan/TableScanStep.cpp | 11 +- src/QueryPlan/TableScanStep.h | 1 + .../tpcds/explains/tpcds100/q23.explain | 204 +++++++------- .../tpcds/explains/tpcds1000/q23.explain | 204 +++++++------- .../tpcds1000_not_show_stats/q23.explain | 198 +++++++------- .../explains/tpcds1000_sample/q23.explain | 204 +++++++------- .../10020_test_fusion_merge.reference | 162 +++++++----- .../40022_topn_filtering_opt.reference | 1 + .../40023_mv_with_topn_filtering.reference | 3 +- .../48015_simplify_multiIf_function.reference | 18 +- ...cute_uncorrelated_subquery_first.reference | 12 +- .../48018_push_limit_into_sorting.reference | 14 +- .../48023_foreign_key_definition.reference | 2 + ...8049_optimzier_use_sort_property.reference | 250 ++++++++++++++++++ .../48049_optimzier_use_sort_property.sql | 56 ++++ 34 files changed, 1285 insertions(+), 597 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 30a6b7d69b8..c219b96789e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1545,6 +1545,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, enable_push_partial_agg_through_exchange, true, "Whether to enable PushPartialAggThroughExchange rules", 0) \ M(Bool, enable_push_partial_agg_through_union, true, "Whether to enable PushPartialAggThroughUnion rules", 0) \ M(Bool, enable_push_partial_sorting_through_exchange, true, "Whether to enable PushPartialSortingThroughExchange rules", 0) \ + M(Bool, enable_push_partial_sorting_through_union, true, "Whether to enable PushPartialSortingThroughUnion rules", 0) \ M(Bool, enable_push_partial_limit_through_exchange, true, "Whether to enable PushPartialLimitThroughExchange rules", 0) \ M(Bool, enable_push_partial_distinct_through_exchange, true, "Whether to enable PushPartialDistinctThroughExchange rules", 0) \ M(UInt64, max_rows_to_use_topn_filtering, 0, "The maximum N of TopN to use topn filtering optimization. Set 0 to choose this value adaptively.", 0) \ diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp index ff5da536005..4c9d904567b 100644 --- a/src/Core/SortDescription.cpp +++ b/src/Core/SortDescription.cpp @@ -137,19 +137,4 @@ JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, return json_array; } -bool SortDescription::hasPrefix(const SortDescription & prefix) const -{ - if (prefix.empty()) - return true; - - if (prefix.size() > size()) - return false; - - for (size_t i = 0; i < prefix.size(); ++i) - { - if ((*this)[i] != prefix[i]) - return false; - } - return true; -} } diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 956b312fa9a..f9cd4b73305 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -93,8 +93,8 @@ struct SortColumnDescription bool operator == (const SortColumnDescription & other) const { - return column_name == other.column_name && column_number == other.column_number - && direction == other.direction && (nulls_direction == other.nulls_direction || nulls_direction == 0 || other.nulls_direction == 0); + return column_name == other.column_name && column_number == other.column_number && direction == other.direction + && nulls_direction == other.nulls_direction; } bool operator != (const SortColumnDescription & other) const @@ -130,7 +130,7 @@ struct SortColumnDescription class SortDescription : public std::vector { public: - bool hasPrefix(const SortDescription & prefix) const; + using vector::vector; }; /// Outputs user-readable description into `out`. diff --git a/src/Optimizer/Property/Property.cpp b/src/Optimizer/Property/Property.cpp index 0a5106e6858..92586a37d73 100644 --- a/src/Optimizer/Property/Property.cpp +++ b/src/Optimizer/Property/Property.cpp @@ -333,6 +333,30 @@ String Partitioning::toString() const } } +SortOrder SortColumn::toReverseOrder(SortOrder sort_order) +{ + switch (sort_order) + { + case SortOrder::ASC_NULLS_FIRST: + return SortOrder::DESC_NULLS_LAST; + case SortOrder::ASC_NULLS_LAST: + return SortOrder::DESC_NULLS_FIRST; + case SortOrder::ASC_ANY: + return SortOrder::DESC_ANY; + case SortOrder::DESC_NULLS_FIRST: + return SortOrder::ASC_NULLS_LAST; + case SortOrder::DESC_NULLS_LAST: + return SortOrder::ASC_NULLS_FIRST; + case SortOrder::DESC_ANY: + return SortOrder::ASC_ANY; + case SortOrder::ANY: + return SortOrder::ANY; + case SortOrder::UNKNOWN: + return SortOrder::UNKNOWN; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "unknown sort order"); +} + size_t SortColumn::hash() const { size_t hash = MurmurHash3Impl64::apply(name.c_str(), name.size()); @@ -364,6 +388,15 @@ String SortColumn::toString() const return "unknown"; } +Sorting Sorting::toReverseOrder() const +{ + Sorting ret; + ret.reserve(size()); + for (const SortColumn & sort_column : *this) + ret.emplace_back(sort_column.toReverseOrder()); + return ret; +} + size_t Sorting::hash() const { size_t hash = IntHash64Impl::apply(this->size()); diff --git a/src/Optimizer/Property/Property.h b/src/Optimizer/Property/Property.h index 93df8bdc33b..f28233899c2 100644 --- a/src/Optimizer/Property/Property.h +++ b/src/Optimizer/Property/Property.h @@ -205,7 +205,8 @@ class SortColumn return SortOrder::ASC_NULLS_LAST; else if (nulls_direction == -1) return SortOrder::ASC_NULLS_FIRST; - // else if (nulls_direction == 0) // no need, this case should return ASC_NULLS_LAST. + else if (nulls_direction == 0) + return SortOrder::ASC_ANY; } else if (direction == -1) { @@ -213,9 +214,14 @@ class SortColumn return SortOrder::DESC_NULLS_LAST; else if (nulls_direction == -1) return SortOrder::DESC_NULLS_FIRST; + else if (nulls_direction == 0) + return SortOrder::DESC_ANY; } + else if (direction == 0 && nulls_direction == 0) + return SortOrder::ANY; return SortOrder::UNKNOWN; } + static SortOrder toReverseOrder(SortOrder sort_order); SortColumn(String name_, SortOrder order_) : name(std::move(name_)), order(order_) { } explicit SortColumn(const SortColumnDescription & sort_column_description) : name(sort_column_description.column_name) @@ -225,6 +231,7 @@ class SortColumn const String & getName() const { return name; } SortOrder getOrder() const { return order; } + SortColumn toReverseOrder() const { return SortColumn{name, toReverseOrder(order)}; } SortColumnDescription toSortColumnDesc() const { @@ -307,6 +314,8 @@ class Sorting : public std::vector return res; } + Sorting toReverseOrder() const; + size_t hash() const; String toString() const; }; diff --git a/src/Optimizer/Property/PropertyDeriver.cpp b/src/Optimizer/Property/PropertyDeriver.cpp index 1d9e04c2030..a6e242ea7b3 100644 --- a/src/Optimizer/Property/PropertyDeriver.cpp +++ b/src/Optimizer/Property/PropertyDeriver.cpp @@ -89,7 +89,7 @@ static String getClusterByHint(const StoragePtr & storage) return ""; } -Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, const Property &, ContextMutablePtr & context) +Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, const Property & required, ContextMutablePtr & context) { if (storage->getDatabaseName() == "system") { @@ -108,12 +108,18 @@ Property PropertyDeriver::deriveStorageProperty(const StoragePtr & storage, cons sorting.emplace_back(SortColumn(descs.column_names[i], SortOrder::ASC_NULLS_FIRST)); } + bool use_reverse_sorting = !required.getSorting().empty() + && (required.getSorting()[0].getOrder() == SortOrder::DESC_ANY || required.getSorting()[0].getOrder() == SortOrder::DESC_NULLS_FIRST + || required.getSorting()[0].getOrder() == SortOrder::DESC_NULLS_LAST); + if (use_reverse_sorting) + sorting = sorting.toReverseOrder(); + auto metadata = storage->getInMemoryMetadataPtr(); Names cluster_by; UInt64 buckets = 0; auto normalize_ast = [&](ASTPtr sharding_key) -> std::pair { - static SymbolVisitor visitor; + SymbolVisitor visitor; Names partition_keys; SymbolVisitorContext symbol_context; ASTVisitorUtil::accept(sharding_key, visitor, symbol_context); diff --git a/src/Optimizer/Property/PropertyMatcher.cpp b/src/Optimizer/Property/PropertyMatcher.cpp index 38d0d81d2cb..c6c7bfbadf3 100644 --- a/src/Optimizer/Property/PropertyMatcher.cpp +++ b/src/Optimizer/Property/PropertyMatcher.cpp @@ -68,9 +68,9 @@ bool PropertyMatcher::matchStreamPartitioning( } Sorting PropertyMatcher::matchSorting( - const Context & context, const Sorting & required, const Sorting & actual, const SymbolEquivalences & equivalences) + const Context & context, const Sorting & required, const Sorting & actual, const SymbolEquivalences & equivalences, const Constants & constants) { - return matchSorting(context, required.toSortDesc(), actual, equivalences); + return matchSorting(context, required.toSortDesc(), actual, equivalences, constants); } /// Optimize in case of exact match with order key element @@ -129,15 +129,10 @@ SortOrder matchSortDescription(const SortColumnDescription & require, const Sort return SortOrder::UNKNOWN; } -Sorting PropertyMatcher::matchSorting(const Context &, const SortDescription & required, const Sorting & actual, const SymbolEquivalences &) +Sorting PropertyMatcher::matchSorting(const Context &, const SortDescription & required, const Sorting & actual, const SymbolEquivalences &, const Constants & constants) { if (!actual.empty()) { - SortOrder read_direction = SortOrder::UNKNOWN; - - // todo@jingpeng.mt constant - // auto fixed_sorting_columns = getFixedSortingColumns(query, sorting_key_columns, context); - SortDescription sort_description_for_merging; sort_description_for_merging.reserve(required.size()); @@ -147,24 +142,26 @@ Sorting PropertyMatcher::matchSorting(const Context &, const SortDescription & r while (desc_pos < required.size() && key_pos < actual.size()) { auto match = matchSortDescription(required[desc_pos], actual[key_pos].toSortColumnDesc()); - bool is_matched = match != SortOrder::UNKNOWN && (desc_pos == 0 || match == read_direction); - + bool is_matched = match != SortOrder::UNKNOWN; if (!is_matched) { /// If one of the sorting columns is constant after filtering, /// skip it, because it won't affect order anymore. - // if (fixed_sorting_columns.contains(sorting_key_columns[key_pos])) - // { - // ++key_pos; - // continue; - // } + if (constants.contains(actual[key_pos].getName())) + { + ++key_pos; + continue; + } + else if (constants.contains(required[desc_pos].column_name)) + { + sort_description_for_merging.push_back(required[desc_pos]); + ++desc_pos; + continue; + } break; } - if (desc_pos == 0) - read_direction = match; - sort_description_for_merging.push_back(required[desc_pos]); ++desc_pos; diff --git a/src/Optimizer/Property/PropertyMatcher.h b/src/Optimizer/Property/PropertyMatcher.h index c76dded39e8..2908a746b53 100644 --- a/src/Optimizer/Property/PropertyMatcher.h +++ b/src/Optimizer/Property/PropertyMatcher.h @@ -32,10 +32,10 @@ class PropertyMatcher const Context & context, const Partitioning & required, const Partitioning & actual, const SymbolEquivalences & equivalences = {}, const Constants & constants = {}, bool match_local_exchange = true); static Sorting - matchSorting(const Context & context, const Sorting & required, const Sorting & actual, const SymbolEquivalences & equivalences = {}); + matchSorting(const Context & context, const Sorting & required, const Sorting & actual, const SymbolEquivalences & equivalences = {}, const Constants & constants = {}); static Sorting matchSorting( - const Context & context, const SortDescription & required, const Sorting & actual, const SymbolEquivalences & equivalences = {}); + const Context & context, const SortDescription & required, const Sorting & actual, const SymbolEquivalences & equivalences = {}, const Constants & constants = {}); static Property compatibleCommonRequiredProperty(const std::unordered_set & properties); }; diff --git a/src/Optimizer/Rewriter/UseSortingProperty.cpp b/src/Optimizer/Rewriter/UseSortingProperty.cpp index ee82915d820..4c5c1160ec6 100644 --- a/src/Optimizer/Rewriter/UseSortingProperty.cpp +++ b/src/Optimizer/Rewriter/UseSortingProperty.cpp @@ -1,62 +1,73 @@ +#include +#include #include +#include +#include #include #include #include +#include #include +#include #include +#include #include -#include +#include +#include namespace DB { void SortingOrderedSource::rewrite(QueryPlan & plan, ContextMutablePtr context) const { SortingOrderedSource::Rewriter rewriter{context, plan.getCTEInfo()}; - Void require; - auto result = VisitorUtil::accept(plan.getPlanNode(), rewriter, require); + SortDescription required; + auto result = VisitorUtil::accept(plan.getPlanNode(), rewriter, required); - PushSortingInfoRewriter push_rewriter{context, plan.getCTEInfo()}; + PruneSortingInfoRewriter push_rewriter{context, plan.getCTEInfo()}; SortInfo sort_info; auto plan_node = VisitorUtil::accept(result.plan, push_rewriter, sort_info); plan.update(plan_node); } -PlanAndProp SortingOrderedSource::Rewriter::visitPlanNode(PlanNodeBase & node, Void &) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitPlanNode(PlanNodeBase & node, SortDescription &) { PlanNodes children; - Void require; + SortDescription required; PropertySet input_properties; + ConstantsSet input_constants; for (const auto & child : node.getChildren()) { - auto result = VisitorUtil::accept(child, *this, require); + auto result = VisitorUtil::accept(child, *this, required); children.emplace_back(result.plan); input_properties.emplace_back(result.property); + input_constants.emplace_back(result.constants); } node.replaceChildren(children); Property any_prop; Property prop = PropertyDeriver::deriveProperty(node.getStep(), input_properties, any_prop, context); - return {node.shared_from_this(), prop}; + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), input_constants, cte_helper.getCTEInfo(), context); + return {node.shared_from_this(), prop, constants}; } -PlanAndProp SortingOrderedSource::Rewriter::visitSortingNode(SortingNode & node, Void & v) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitSortingNode(SortingNode & node, SortDescription &) { - auto result = VisitorUtil::accept(node.getChildren()[0], *this, v); - auto step = node.getStep(); - auto prefix_sorting = PropertyMatcher::matchSorting(*context, step->getSortDescription(), result.property.getSorting()); + auto required = step->getSortDescription(); + auto result = VisitorUtil::accept(node.getChildren()[0], *this, required); + + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + auto prefix_sorting = PropertyMatcher::matchSorting(*context, step->getSortDescription(), result.property.getSorting(), {}, constants); step->setPrefixDescription(prefix_sorting.toSortDesc()); Property any_prop; Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); - return {node.shared_from_this(), prop}; + return {node.shared_from_this(), prop, constants}; } -PlanAndProp SortingOrderedSource::Rewriter::visitAggregatingNode(AggregatingNode & node, Void & v) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitAggregatingNode(AggregatingNode & node, SortDescription & required) { - auto result = VisitorUtil::accept(node.getChildren()[0], *this, v); const auto & settings = context->getSettingsRef(); - if (settings.optimize_aggregation_in_order /* && !settings.optimize_aggregate_function_type */) { auto step = node.getStep(); @@ -68,19 +79,21 @@ PlanAndProp SortingOrderedSource::Rewriter::visitAggregatingNode(AggregatingNode order_descr.emplace_back(name, 1, 1); } + PlanAndPropConstants result = VisitorUtil::accept(node.getChildren()[0], *this, order_descr); - auto prefix_sorting = PropertyMatcher::matchSorting(*context, order_descr, result.property.getSorting()); + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + auto prefix_sorting = PropertyMatcher::matchSorting(*context, order_descr, result.property.getSorting(), {}, constants); step->setGroupBySortDescription(prefix_sorting.toSortDesc()); + + Property any_prop; + Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); + return {node.shared_from_this(), prop, constants}; } - Property any_prop; - Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); - return {node.shared_from_this(), prop}; + return visitPlanNode(node, required); } -PlanAndProp SortingOrderedSource::Rewriter::visitWindowNode(WindowNode & node, Void & v) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitWindowNode(WindowNode & node, SortDescription & required) { - auto result = VisitorUtil::accept(node.getChildren()[0], *this, v); - #if 0 if (context->getSettingsRef().optimize_read_in_window_order) { @@ -93,65 +106,178 @@ PlanAndProp SortingOrderedSource::Rewriter::visitWindowNode(WindowNode & node, V const auto & order_by = step->getWindowDescription().order_by; order_descr.insert(order_descr.end(), order_by.begin(), order_by.end()); - auto prefix_sorting = PropertyMatcher::matchSorting(*context, order_descr, result.property.getSorting()); + auto result = VisitorUtil::accept(node.getChildren()[0], *this, order_descr); + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + auto prefix_sorting = PropertyMatcher::matchSorting(*context, order_descr, result.property.getSorting(), {}, constants); step->setPrefixDescription(prefix_sorting.toSortDesc()); + + Property any_prop; + Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); + return {node.shared_from_this(), prop, constants}; } #endif + + return visitPlanNode(node, required); +} + +PlanAndPropConstants SortingOrderedSource::Rewriter::visitCTERefNode(CTERefNode & node, SortDescription &) +{ + const auto * step = node.getStep().get(); + SortDescription required; + auto cte_plan = cte_helper.acceptAndUpdate(step->getId(), *this, required, [](auto & result) { return result.plan; }); + return {node.shared_from_this(), Property{}, cte_plan.constants}; +} + +PlanAndPropConstants SortingOrderedSource::Rewriter::visitTopNFilteringNode(TopNFilteringNode & node, SortDescription &) +{ + auto & topn_filtering = node.getStep(); + auto required_sorting = topn_filtering->getSortDescription(); + + auto result = VisitorUtil::accept(node.getChildren()[0], *this, required_sorting); + + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + auto prefix_sorting = PropertyMatcher::matchSorting(*context, required_sorting, result.property.getSorting(), {}, constants); + if (prefix_sorting.size() == required_sorting.size()) + topn_filtering->setAlgorithm(TopNFilteringAlgorithm::Limit); + Property any_prop; Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); - return {node.shared_from_this(), prop}; + return {node.shared_from_this(), prop, constants}; } -PlanAndProp SortingOrderedSource::Rewriter::visitCTERefNode(CTERefNode & node, Void & v) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitTableScanNode(TableScanNode & node, SortDescription & required) { - const auto * step = node.getStep().get(); + auto & step = node.getStep(); - auto cte_plan = cte_helper.acceptAndUpdate(step->getId(), *this, v, [](auto & result) { return result.plan; }); - return {node.shared_from_this(), Property{}}; + Property any_prop; + any_prop.setSorting(Sorting{required}); + Property prop = PropertyDeriver::deriveProperty(step, context, any_prop); + step->setReadOrder(prop.getSorting().translate(node.getStep()->getAliasToColumnMap()).toSortDesc()); + Constants constants = ConstantsDeriver::deriveConstants(step, cte_helper.getCTEInfo(), context); + return {node.shared_from_this(), prop, constants}; } -PlanAndProp SortingOrderedSource::Rewriter::visitTopNFilteringNode(TopNFilteringNode & node, Void & ctx) +PlanAndPropConstants SortingOrderedSource::Rewriter::visitFilterNode(FilterNode & node, SortDescription & required) { - auto result = VisitorUtil::accept(node.getChildren()[0], *this, ctx); - auto actual_sorting = result.property.getSorting().toSortDesc(); + auto result = VisitorUtil::accept(node.getChildren()[0], *this, required); + Property any_prop; + Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + return {node.shared_from_this(), prop, constants}; +} - auto & topn_filtering = dynamic_cast(*node.getStep()); - const auto & required_sorting = topn_filtering.getSortDescription(); +PlanAndPropConstants SortingOrderedSource::Rewriter::visitProjectionNode(ProjectionNode & node, SortDescription & require) +{ + auto mappings = Utils::computeIdentityTranslations(node.getStep()->getAssignments()); - if (actual_sorting.hasPrefix(required_sorting)) - topn_filtering.setAlgorithm(TopNFilteringAlgorithm::Limit); + SortDescription push_down_sort_description; + for (const auto & column : require) + { + if (!mappings.contains(column.column_name)) + break; + push_down_sort_description.emplace_back( + SortColumnDescription{mappings.at(column.column_name), column.direction, column.nulls_direction}); + } + auto result = VisitorUtil::accept(node.getChildren()[0], *this, push_down_sort_description); + Property any_prop; Property prop = PropertyDeriver::deriveProperty(node.getStep(), {result.property}, any_prop, context); - return {node.shared_from_this(), prop}; + Constants constants = ConstantsDeriver::deriveConstants(node.getStep(), {result.constants}, cte_helper.getCTEInfo(), context); + return {node.shared_from_this(), prop, constants}; +} + +PlanNodePtr PruneSortingInfoRewriter::visitPlanNode(PlanNodeBase & node, SortInfo & required) +{ + SortInfo s{required.sort_desc, size_t{0}}; + return SimplePlanRewriter::visitPlanNode(node, s); } -PlanNodePtr PushSortingInfoRewriter::visitSortingNode(SortingNode & node, SortInfo &) +PlanNodePtr PruneSortingInfoRewriter::visitSortingNode(SortingNode & node, SortInfo &) { auto prefix_desc = node.getStep()->getPrefixDescription(); SortInfo s{prefix_desc, node.getStep()->getLimit()}; - return visitPlanNode(node, s); + return SimplePlanRewriter::visitPlanNode(node, s); } -PlanNodePtr PushSortingInfoRewriter::visitAggregatingNode(AggregatingNode & node, SortInfo &) +PlanNodePtr PruneSortingInfoRewriter::visitAggregatingNode(AggregatingNode & node, SortInfo &) { auto prefix_desc = node.getStep()->getGroupBySortDescription(); SortInfo s{prefix_desc, size_t{0}}; - return visitPlanNode(node, s); + return SimplePlanRewriter::visitPlanNode(node, s); } -PlanNodePtr PushSortingInfoRewriter::visitWindowNode(WindowNode & node, SortInfo &) +PlanNodePtr PruneSortingInfoRewriter::visitWindowNode(WindowNode & node, SortInfo &) { auto prefix_desc = node.getStep()->getPrefixDescription(); SortInfo s{prefix_desc, size_t{0}}; - return visitPlanNode(node, s); + return SimplePlanRewriter::visitPlanNode(node, s); +} + +PlanNodePtr PruneSortingInfoRewriter::visitTopNFilteringNode(TopNFilteringNode & node, SortInfo &) +{ + auto prefix_desc = node.getStep()->getSortDescription(); + SortInfo s{prefix_desc, size_t{0}}; + return SimplePlanRewriter::visitPlanNode(node, s); } -PlanNodePtr PushSortingInfoRewriter::visitTableScanNode(TableScanNode & node, SortInfo & s) +PlanNodePtr PruneSortingInfoRewriter::visitTableScanNode(TableScanNode & node, SortInfo & required) { - node.getStep()->setReadOrder(s.sort_desc); + auto & step = node.getStep(); + + NameSet required_columns; + auto mappings = step->getAliasToColumnMap(); + for (const auto & column : required.sort_desc) + { + auto column_name = mappings.contains(column.column_name) ? mappings.at(column.column_name) : column.column_name; + required_columns.emplace(column_name); + } + + // prune unused read order columns + // eg, select * from table(order by a,b,c) where a = 'x' and d = 'y' order by b,d + // required sort columns may be: b,d; read order columns should be a,b + auto read_order = step->getReadOrder(); + auto it = std::find_if(read_order.rbegin(), read_order.rend(), [&](const SortColumnDescription & sort_column) { + return required_columns.contains(sort_column.column_name); + }); + + SortDescription pruned_read_order(read_order.begin(), read_order.begin() + std::distance(it, read_order.rend())); + + if (!required.sort_desc.empty() && pruned_read_order.empty()) + { + // do nothing if all columns in required don't exist in table + if (logger->error()) + { + Names names; + for (const auto & desc : required.sort_desc) + names.emplace_back(desc.column_name); + LOG_WARNING(logger, "unkown required sorting: {}", fmt::format("{}", fmt::join(names, ", "))); + } + } + else + { + node.getStep()->setReadOrder(pruned_read_order); + } + return node.shared_from_this(); } +PlanNodePtr PruneSortingInfoRewriter::visitProjectionNode(ProjectionNode & node, SortInfo & required) +{ + auto mappings = Utils::computeIdentityTranslations(node.getStep()->getAssignments()); + + SortDescription push_down_sort_description; + for (const auto & column : required.sort_desc) + { + if (!mappings.contains(column.column_name)) + break; + push_down_sort_description.emplace_back( + SortColumnDescription{mappings.at(column.column_name), column.direction, column.nulls_direction}); + } + + SortInfo child_required{push_down_sort_description, Utils::canChangeOutputRows(*node.getStep(), context) ? required.limit : 0ul}; + return SimplePlanRewriter::visitPlanNode(node, child_required); +} + } diff --git a/src/Optimizer/Rewriter/UseSortingProperty.h b/src/Optimizer/Rewriter/UseSortingProperty.h index c6143119acd..c936d1c4997 100644 --- a/src/Optimizer/Rewriter/UseSortingProperty.h +++ b/src/Optimizer/Rewriter/UseSortingProperty.h @@ -5,13 +5,23 @@ #include #include #include +#include #include #include #include #include +#include namespace DB { + +struct PlanAndPropConstants +{ + PlanNodePtr plan; + Property property; + Constants constants; +}; + class SortingOrderedSource : public Rewriter { public: @@ -25,39 +35,51 @@ class SortingOrderedSource : public Rewriter class Rewriter; }; -class SortingOrderedSource::Rewriter : public PlanNodeVisitor +class SortingOrderedSource::Rewriter : public PlanNodeVisitor { public: Rewriter(ContextMutablePtr context_, CTEInfo & cte_info_) : context(context_), cte_helper(cte_info_) { } - PlanAndProp visitPlanNode(PlanNodeBase &, Void &) override; - PlanAndProp visitSortingNode(SortingNode &, Void &) override; - PlanAndProp visitAggregatingNode(AggregatingNode &, Void &) override; - PlanAndProp visitWindowNode(WindowNode &, Void &) override; - PlanAndProp visitCTERefNode(CTERefNode & node, Void &) override; - PlanAndProp visitTopNFilteringNode(TopNFilteringNode & node, Void &) override; + + PlanAndPropConstants visitPlanNode(PlanNodeBase &, SortDescription & required) override; + PlanAndPropConstants visitSortingNode(SortingNode &, SortDescription & required) override; + PlanAndPropConstants visitAggregatingNode(AggregatingNode &, SortDescription & required) override; + PlanAndPropConstants visitWindowNode(WindowNode &, SortDescription & required) override; + PlanAndPropConstants visitTopNFilteringNode(TopNFilteringNode & node, SortDescription & required) override; + + PlanAndPropConstants visitCTERefNode(CTERefNode & node, SortDescription & required) override; + PlanAndPropConstants visitProjectionNode(ProjectionNode & node, SortDescription & required) override; + PlanAndPropConstants visitFilterNode(FilterNode & node, SortDescription & required) override; + PlanAndPropConstants visitTableScanNode(TableScanNode & node, SortDescription & required) override; private: ContextMutablePtr context; - SimpleCTEVisitHelper cte_helper; + SimpleCTEVisitHelper cte_helper; }; - struct SortInfo { SortDescription sort_desc; - SizeOrVariable limit; + SizeOrVariable limit = 0ul; }; -class PushSortingInfoRewriter : public SimplePlanRewriter +class PruneSortingInfoRewriter : public SimplePlanRewriter { public: - PushSortingInfoRewriter(ContextMutablePtr context_, CTEInfo & cte_info_) : SimplePlanRewriter(context_, cte_info_) + PruneSortingInfoRewriter(ContextMutablePtr context_, CTEInfo & cte_info_) + : SimplePlanRewriter(context_, cte_info_), logger(&Poco::Logger::get("PruneSortingInfoRewriter")) { } + + PlanNodePtr visitPlanNode(PlanNodeBase & node, SortInfo & required) override; PlanNodePtr visitSortingNode(SortingNode &, SortInfo &) override; PlanNodePtr visitAggregatingNode(AggregatingNode &, SortInfo &) override; PlanNodePtr visitWindowNode(WindowNode &, SortInfo &) override; - PlanNodePtr visitTableScanNode(TableScanNode &, SortInfo &) override; + PlanNodePtr visitTopNFilteringNode(TopNFilteringNode & node, SortInfo &) override; + PlanNodePtr visitProjectionNode(ProjectionNode & node, SortInfo & required) override; + PlanNodePtr visitTableScanNode(TableScanNode &, SortInfo & required) override; + +private: + Poco::Logger * logger; }; } diff --git a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp index 5cf70e7c222..b48ea35d715 100644 --- a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp +++ b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp @@ -31,6 +31,7 @@ #include #include #include +#include namespace DB { @@ -295,8 +296,12 @@ TransformResult PushPartialAggThroughUnion::transformImpl(PlanNodePtr node, cons ConstRefPatternPtr PushPartialSortingThroughExchange::getPattern() const { - static auto pattern = Patterns::sorting().withSingle(Patterns::exchange().matchingStep( - [](const ExchangeStep & step) { return step.getExchangeMode() == ExchangeMode::GATHER; })).result(); + static auto pattern + = Patterns::sorting() + .matchingStep([](const SortingStep & step) { return step.getStage() == SortingStep::Stage::FULL; }) + .withSingle(Patterns::exchange().matchingStep( + [](const ExchangeStep & step) { return step.getExchangeMode() == ExchangeMode::GATHER; })) + .result(); return pattern; } @@ -347,6 +352,58 @@ TransformResult PushPartialSortingThroughExchange::transformImpl(PlanNodePtr nod return final_sort_node; } +ConstRefPatternPtr PushPartialSortingThroughUnion::getPattern() const +{ + static auto pattern + = Patterns::sorting() + .matchingStep([](const SortingStep & step) { return step.getStage() == SortingStep::Stage::PARTIAL; }) + .withSingle(Patterns::unionn()) + .result(); + return pattern; +} + +TransformResult PushPartialSortingThroughUnion::transformImpl(PlanNodePtr node, const Captures &, RuleContext & context) +{ + const auto * step = dynamic_cast(node->getStep().get()); + auto union_node = node->getChildren()[0]; + const auto * union_step = dynamic_cast(union_node->getStep().get()); + + PlanNodes union_inputs; + for (size_t index = 0; index < union_node->getChildren().size(); index++) + { + auto exchange_child = union_node->getChildren()[index]; + if (dynamic_cast(exchange_child.get())) + return {}; + + SortDescription new_sort_desc; + for (const auto & desc : step->getSortDescription()) + { + auto new_desc = desc; + const auto & out_to_inputs = union_step->getOutToInputs(); + if (!out_to_inputs.contains(desc.column_name) || out_to_inputs.at(desc.column_name).size() <= index) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "PushPartialSortingThroughUnion: Can not find {} in out_to_inputs.", desc.column_name); + new_desc.column_name = union_step->getOutToInputs().at(desc.column_name).at(index); + new_sort_desc.emplace_back(new_desc); + } + + auto partial_sorting = std::make_unique( + exchange_child->getStep()->getOutputStream(), new_sort_desc, step->getLimit(), SortingStep::Stage::PARTIAL_NO_MERGE, SortDescription{}); + PlanNodes children{exchange_child}; + auto before_exchange_sort_node + = PlanNodeBase::createPlanNode(context.context->nextNodeId(), std::move(partial_sorting), children, node->getStatistics()); + union_inputs.emplace_back(before_exchange_sort_node); + } + + auto merging_sorted = std::make_unique( + step->getOutputStream(), step->getSortDescription(), step->getLimit(), SortingStep::Stage::MERGE, SortDescription{}); + + return PlanNodeBase::createPlanNode( + context.context->nextNodeId(), + std::move(merging_sorted), + {PlanNodeBase::createPlanNode(context.context->nextNodeId(), union_node->getStep(), union_inputs)}); +} + static bool isLimitNeeded(const LimitStep & limit, const PlanNodePtr & node) { auto range = PlanNodeCardinality::extractCardinality(*node); diff --git a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.h b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.h index f2973558e9f..648cdf381ab 100644 --- a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.h +++ b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.h @@ -62,6 +62,20 @@ class PushPartialSortingThroughExchange : public Rule TransformResult transformImpl(PlanNodePtr node, const Captures & captures, RuleContext & context) override; }; +class PushPartialSortingThroughUnion : public Rule +{ +public: + RuleType getType() const override { return RuleType::PUSH_PARTIAL_SORTING_THROUGH_UNION; } + String getName() const override { return "PUSH_PARTIAL_SORTING_THROUGH_UNION"; } + bool isEnabled(ContextPtr context) const override + { + return context->getSettingsRef().enable_push_partial_sorting_through_union; + } + ConstRefPatternPtr getPattern() const override; + + TransformResult transformImpl(PlanNodePtr node, const Captures & captures, RuleContext & context) override; +}; + class PushPartialLimitThroughExchange : public Rule { public: diff --git a/src/Optimizer/Rule/Rule.h b/src/Optimizer/Rule/Rule.h index 8045f40cf2d..0cfef52c85a 100644 --- a/src/Optimizer/Rule/Rule.h +++ b/src/Optimizer/Rule/Rule.h @@ -65,6 +65,7 @@ enum class RuleType : UInt32 PUSH_PARTIAL_AGG_THROUGH_EXCHANGE, PUSH_PARTIAL_AGG_THROUGH_UNION, PUSH_PARTIAL_SORTING_THROUGH_EXCHANGE, + PUSH_PARTIAL_SORTING_THROUGH_UNION, PUSH_PARTIAL_LIMIT_THROUGH_EXCHANGE, PUSH_PARTIAL_DISTINCT_THROUGH_EXCHANGE, diff --git a/src/Optimizer/Rule/Rules.cpp b/src/Optimizer/Rule/Rules.cpp index 9e5ccb588cc..f7042a8d924 100644 --- a/src/Optimizer/Rule/Rules.cpp +++ b/src/Optimizer/Rule/Rules.cpp @@ -97,6 +97,7 @@ std::vector Rules::pushPartialStepRules() std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared()}; diff --git a/src/Protos/plan_node.proto b/src/Protos/plan_node.proto index e74926417ca..421d203fdf5 100644 --- a/src/Protos/plan_node.proto +++ b/src/Protos/plan_node.proto @@ -89,6 +89,7 @@ message SortingStep { FULL = 0; MERGE = 1; PARTIAL = 2; + PARTIAL_NO_MERGE = 3; } } diff --git a/src/QueryPlan/GraphvizPrinter.cpp b/src/QueryPlan/GraphvizPrinter.cpp index 7430f1ecf5c..8bd72d9343e 100644 --- a/src/QueryPlan/GraphvizPrinter.cpp +++ b/src/QueryPlan/GraphvizPrinter.cpp @@ -2239,6 +2239,11 @@ String StepPrinter::printSortingStep(const SortingStep & step) details << "|"; details << "partial"; } + if (step.getStage() == SortingStep::Stage::PARTIAL_NO_MERGE) + { + details << "|"; + details << "partial no merge"; + } details << "|"; details << "Output |"; for (const auto & column : step.getOutputStream().header) diff --git a/src/QueryPlan/PlanPrinter.cpp b/src/QueryPlan/PlanPrinter.cpp index 57bef782f3e..b4ee30b8669 100644 --- a/src/QueryPlan/PlanPrinter.cpp +++ b/src/QueryPlan/PlanPrinter.cpp @@ -915,6 +915,14 @@ String PlanPrinter::TextPrinter::printDetail(QueryPlanStepPtr plan, const TextPr sort_columns.emplace_back(desc.format()); out << intent.detailIntent() << "Order by: " << join(sort_columns, ", ", "{", "}"); + if (!sort->getPrefixDescription().empty()) + { + std::vector prefix_sort_columns; + for (const auto & desc : sort->getPrefixDescription()) + prefix_sort_columns.emplace_back(desc.column_name); + out << intent.detailIntent() << "Prefix Order: " << join(prefix_sort_columns, ", ", "{", "}"); + } + std::visit( overloaded{ [&](size_t x) { diff --git a/src/QueryPlan/SortingStep.cpp b/src/QueryPlan/SortingStep.cpp index 50aa9f80a53..e789ea5261e 100644 --- a/src/QueryPlan/SortingStep.cpp +++ b/src/QueryPlan/SortingStep.cpp @@ -13,19 +13,20 @@ * limitations under the License. */ +#include #include #include +#include #include #include #include #include #include #include -#include #include +#include +#include #include -#include "Core/SettingsEnums.h" -#include "QueryPlan/PlanSerDerHelper.h" namespace DB { @@ -51,7 +52,7 @@ SortingStep::SortingStep( Stage stage_, SortDescription prefix_description_, bool enable_adaptive_spill_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_, stage_ != Stage::PARTIAL)) + : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_, stage_ != Stage::PARTIAL && stage_ != Stage::PARTIAL_NO_MERGE)) , result_description(result_description_) , limit(limit_) , stage(stage_) @@ -62,7 +63,7 @@ SortingStep::SortingStep( /// TODO: support mannual/auto spill output_stream->sort_description = result_description; output_stream->sort_mode - = (input_stream_.has_single_port || stage_ != Stage::PARTIAL) ? DataStream::SortMode::Stream : DataStream::SortMode::Port; + = (input_stream_.has_single_port || (stage_ != Stage::PARTIAL && stage_ != Stage::PARTIAL_NO_MERGE)) ? DataStream::SortMode::Stream : DataStream::SortMode::Port; } void SortingStep::setInputStreams(const DataStreams & input_streams_) @@ -87,36 +88,52 @@ void SortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPi auto desc_copy = result_description; - if (stage == Stage::FULL || stage == Stage::PARTIAL) + if (stage == Stage::FULL || stage == Stage::PARTIAL || stage == Stage::PARTIAL_NO_MERGE) { // finish sorting if (!prefix_description.empty()) { bool need_finish_sorting = (prefix_description.size() < result_description.size()); + + if (!need_finish_sorting) + { + if (pipeline.getNumStreams() > 1 && stage != Stage::PARTIAL_NO_MERGE) + { + auto transform = std::make_shared( + pipeline.getHeader(), pipeline.getNumStreams(), prefix_description, local_settings.max_block_size, getLimitValue()); + + pipeline.addTransform(std::move(transform)); + } + if (getLimitValue() > 0) + { + auto transform = std::make_shared( + pipeline.getHeader(), getLimitValue(), 0, pipeline.getNumStreams(), false, false, result_description); + pipeline.addTransform(std::move(transform)); + } + return; + } + if (pipeline.getNumStreams() > 1) { - UInt64 limit_for_merging = (need_finish_sorting ? 0 : getLimitValue()); + UInt64 limit_for_merging = 0; // need_finish_sorting auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), prefix_description, local_settings.max_block_size, limit_for_merging); pipeline.addTransform(std::move(transform)); } - if (need_finish_sorting) - { - pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr { - if (stream_type != QueryPipeline::StreamType::Main) - return nullptr; - - return std::make_shared(header, result_description, getLimitValue()); - }); - - /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform - pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr { - return std::make_shared( - header, prefix_description, result_description, local_settings.max_block_size, getLimitValue()); - }); - } + pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr { + if (stream_type != QueryPipeline::StreamType::Main) + return nullptr; + + return std::make_shared(header, result_description, getLimitValue()); + }); + + /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform + pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr { + return std::make_shared( + header, prefix_description, result_description, local_settings.max_block_size, getLimitValue()); + }); return; } @@ -155,6 +172,16 @@ void SortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPi local_settings.min_free_disk_space_for_temporary_data, local_settings.spill_mode == SpillMode::AUTO); }); + + /// If there are several streams, then we merge them into one + if (pipeline.getNumStreams() > 1 && stage != Stage::PARTIAL_NO_MERGE) + { + auto transform = std::make_shared( + pipeline.getHeader(), pipeline.getNumStreams(), desc_copy, local_settings.max_block_size, getLimitValue()); + + pipeline.addTransform(std::move(transform)); + } + return; } /// If there are several streams, then we merge them into one diff --git a/src/QueryPlan/SortingStep.h b/src/QueryPlan/SortingStep.h index 3af210406cd..4454a2b1cf4 100644 --- a/src/QueryPlan/SortingStep.h +++ b/src/QueryPlan/SortingStep.h @@ -31,7 +31,8 @@ class SortingStep : public ITransformingStep Protos::SortingStep::Stage, // proto enum message (FULL), (MERGE), - (PARTIAL) + (PARTIAL), + (PARTIAL_NO_MERGE) ); explicit SortingStep(const DataStream & input_stream, SortDescription description_, SizeOrVariable limit_, Stage stage_, SortDescription prefix_description_ = {}, bool enable_adaptive_spill_ = false); diff --git a/src/QueryPlan/TableScanStep.cpp b/src/QueryPlan/TableScanStep.cpp index ab00bb6f4d8..f079c4df848 100644 --- a/src/QueryPlan/TableScanStep.cpp +++ b/src/QueryPlan/TableScanStep.cpp @@ -1934,9 +1934,16 @@ void TableScanStep::setQuotaAndLimits(QueryPipeline & pipeline, const SelectQuer void TableScanStep::setReadOrder(SortDescription read_order) { if (!read_order.empty()) - { query_info.input_order_info = std::make_shared(read_order, read_order[0].direction); - } + else + query_info.input_order_info = nullptr; +} + +SortDescription TableScanStep::getReadOrder() const +{ + if (query_info.input_order_info) + return query_info.input_order_info->order_key_prefix_descr; + return SortDescription{}; } Names TableScanStep::getRequiredColumns(GetFlags flags) const diff --git a/src/QueryPlan/TableScanStep.h b/src/QueryPlan/TableScanStep.h index 7c81390276c..ce71efae9ae 100644 --- a/src/QueryPlan/TableScanStep.h +++ b/src/QueryPlan/TableScanStep.h @@ -138,6 +138,7 @@ class TableScanStep : public ISourceStep } void setReadOrder(SortDescription read_order); + SortDescription getReadOrder() const; void formatOutputStream(ContextPtr context); diff --git a/tests/optimizers/tpcds/explains/tpcds100/q23.explain b/tests/optimizers/tpcds/explains/tpcds100/q23.explain index 90a7db1d2a2..7bd8c0b44b3 100644 --- a/tests/optimizers/tpcds/explains/tpcds100/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds100/q23.explain @@ -162,106 +162,112 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_last_name_2 ASC NULLS LAST, c_first_name_2 ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price))_1 ASC NULLS LAST} │ Limit: 100 - └─ Union Est. 2094756 rows + └─ Union Est. 200 rows │ OutputToInputs: c_first_name_2 = [c_first_name,c_first_name_1], expr#sum(multiply(cs_quantity, cs_list_price))_1 = [expr#sum(multiply(cs_quantity, cs_list_price)),expr#sum(multiply(ws_quantity, ws_list_price))], c_last_name_2 = [c_last_name,c_last_name_1] - ├─ MergingAggregated Est. 1098322 rows - │ └─ Repartition Exchange Est. 1098322 rows - │ │ Partition by: {c_last_name, c_first_name} - │ └─ Aggregating Est. 1098322 rows - │ │ Group by: {c_last_name, c_first_name} - │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) - │ └─ Projection Est. 1098322 rows - │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 - │ └─ Inner Join Est. 1098322 rows - │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 - │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} - │ ├─ Repartition Exchange Est. 2000000 rows - │ │ │ Partition by: {c_customer_sk_4} - │ │ └─ Filter Est. 2000000 rows - │ │ │ Condition: Runtime Filters: {c_customer_sk_4} - │ │ └─ TableScan tpcds100.customer Est. 2000000 rows - │ │ Where: Runtime Filters: {c_customer_sk} - │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk - │ └─ Left Semi Join Est. 1098322 rows - │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 - │ │ Runtime Filters Builder: {c_customer_sk_7} - │ ├─ Repartition Exchange Est. 2196033 rows - │ │ │ Partition by: {cs_bill_customer_sk_1} - │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 2196033 rows - │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 - │ │ │ Runtime Filters Builder: {cs_item_sk_1} - │ │ ├─ Filter Est. 14386027 rows - │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} - │ │ │ └─ Local Exchange Est. 57544111 rows - │ │ │ └─ Buffer Est. 57544111 rows - │ │ │ └─ CTERef[0] Est. 57544111 rows - │ │ └─ Inner Join Est. 2196033 rows - │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 - │ │ │ Runtime Filters Builder: {d_date_sk_10} - │ │ ├─ Filter Est. 143997065 rows - │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} - │ │ │ └─ TableScan tpcds100.catalog_sales Est. 143997065 rows - │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} - │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price - │ │ └─ Broadcast Exchange Est. 28 rows - │ │ └─ Projection Est. 28 rows - │ │ │ Expressions: [d_date_sk_10] - │ │ └─ Filter Est. 28 rows - │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) - │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows - │ │ Where: (d_moy = 2) AND (d_year = 2000) - │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ Buffer Est. 996434 rows - │ └─ CTERef[1] Est. 996434 rows - └─ MergingAggregated Est. 996434 rows - └─ Repartition Exchange Est. 999986 rows - │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 999986 rows - │ Group by: {c_last_name_1, c_first_name_1} - │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 999986 rows - │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner Join Est. 999986 rows - │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 - │ Runtime Filters Builder: {ws_bill_customer_sk_1} - ├─ Repartition Exchange Est. 2000000 rows - │ │ Partition by: {c_customer_sk_8} - │ └─ Filter Est. 2000000 rows - │ │ Condition: Runtime Filters: {c_customer_sk_8} - │ └─ TableScan tpcds100.customer Est. 2000000 rows - │ Where: Runtime Filters: {c_customer_sk} - │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name - └─ Left Semi Join Est. 996434 rows - │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 - │ Runtime Filters Builder: {c_customer_sk_9} - ├─ Repartition Exchange Est. 1105888 rows - │ │ Partition by: {ws_bill_customer_sk_1} - │ └─ Right Semi Join Est. 1105888 rows - │ │ Condition: i_item_sk_7 == ws_item_sk_1 - │ │ Runtime Filters Builder: {ws_item_sk_1} - │ ├─ Filter Est. 14386027 rows - │ │ │ Condition: Runtime Filters: {i_item_sk_7} - │ │ └─ Local Exchange Est. 57544111 rows - │ │ └─ Buffer Est. 57544111 rows - │ │ └─ CTERef[0] Est. 57544111 rows - │ └─ Inner Join Est. 1105888 rows - │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 - │ │ Runtime Filters Builder: {d_date_sk_13} - │ ├─ Filter Est. 72001237 rows - │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} - │ │ └─ TableScan tpcds100.web_sales Est. 72001237 rows - │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} - │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price - │ └─ Broadcast Exchange Est. 28 rows - │ └─ Projection Est. 28 rows - │ │ Expressions: [d_date_sk_13] - │ └─ Filter Est. 28 rows - │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) - │ └─ TableScan tpcds100.date_dim Est. 73049 rows - │ Where: (d_moy = 2) AND (d_year = 2000) - │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ Buffer Est. 996434 rows - └─ CTERef[1] Est. 996434 rows + ├─ Sorting Est. 100 rows + │ │ Order by: {c_last_name ASC NULLS LAST, c_first_name ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price)) ASC NULLS LAST} + │ │ Limit: 100 + │ └─ MergingAggregated Est. 1098322 rows + │ └─ Repartition Exchange Est. 1098322 rows + │ │ Partition by: {c_last_name, c_first_name} + │ └─ Aggregating Est. 1098322 rows + │ │ Group by: {c_last_name, c_first_name} + │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) + │ └─ Projection Est. 1098322 rows + │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 + │ └─ Inner Join Est. 1098322 rows + │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 + │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} + │ ├─ Repartition Exchange Est. 2000000 rows + │ │ │ Partition by: {c_customer_sk_4} + │ │ └─ Filter Est. 2000000 rows + │ │ │ Condition: Runtime Filters: {c_customer_sk_4} + │ │ └─ TableScan tpcds100.customer Est. 2000000 rows + │ │ Where: Runtime Filters: {c_customer_sk} + │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk + │ └─ Left Semi Join Est. 1098322 rows + │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 + │ │ Runtime Filters Builder: {c_customer_sk_7} + │ ├─ Repartition Exchange Est. 2196033 rows + │ │ │ Partition by: {cs_bill_customer_sk_1} + │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 2196033 rows + │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 + │ │ │ Runtime Filters Builder: {cs_item_sk_1} + │ │ ├─ Filter Est. 14386027 rows + │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} + │ │ │ └─ Local Exchange Est. 57544111 rows + │ │ │ └─ Buffer Est. 57544111 rows + │ │ │ └─ CTERef[0] Est. 57544111 rows + │ │ └─ Inner Join Est. 2196033 rows + │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 + │ │ │ Runtime Filters Builder: {d_date_sk_10} + │ │ ├─ Filter Est. 143997065 rows + │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} + │ │ │ └─ TableScan tpcds100.catalog_sales Est. 143997065 rows + │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} + │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price + │ │ └─ Broadcast Exchange Est. 28 rows + │ │ └─ Projection Est. 28 rows + │ │ │ Expressions: [d_date_sk_10] + │ │ └─ Filter Est. 28 rows + │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) + │ │ └─ TableScan tpcds100.date_dim Est. 73049 rows + │ │ Where: (d_moy = 2) AND (d_year = 2000) + │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy + │ └─ Buffer Est. 996434 rows + │ └─ CTERef[1] Est. 996434 rows + └─ Sorting Est. 100 rows + │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, expr#sum(multiply(ws_quantity, ws_list_price)) ASC NULLS LAST} + │ Limit: 100 + └─ MergingAggregated Est. 996434 rows + └─ Repartition Exchange Est. 999986 rows + │ Partition by: {c_last_name_1, c_first_name_1} + └─ Aggregating Est. 999986 rows + │ Group by: {c_last_name_1, c_first_name_1} + │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) + └─ Projection Est. 999986 rows + │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 + └─ Inner Join Est. 999986 rows + │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 + │ Runtime Filters Builder: {ws_bill_customer_sk_1} + ├─ Repartition Exchange Est. 2000000 rows + │ │ Partition by: {c_customer_sk_8} + │ └─ Filter Est. 2000000 rows + │ │ Condition: Runtime Filters: {c_customer_sk_8} + │ └─ TableScan tpcds100.customer Est. 2000000 rows + │ Where: Runtime Filters: {c_customer_sk} + │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name + └─ Left Semi Join Est. 996434 rows + │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 + │ Runtime Filters Builder: {c_customer_sk_9} + ├─ Repartition Exchange Est. 1105888 rows + │ │ Partition by: {ws_bill_customer_sk_1} + │ └─ Right Semi Join Est. 1105888 rows + │ │ Condition: i_item_sk_7 == ws_item_sk_1 + │ │ Runtime Filters Builder: {ws_item_sk_1} + │ ├─ Filter Est. 14386027 rows + │ │ │ Condition: Runtime Filters: {i_item_sk_7} + │ │ └─ Local Exchange Est. 57544111 rows + │ │ └─ Buffer Est. 57544111 rows + │ │ └─ CTERef[0] Est. 57544111 rows + │ └─ Inner Join Est. 1105888 rows + │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 + │ │ Runtime Filters Builder: {d_date_sk_13} + │ ├─ Filter Est. 72001237 rows + │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} + │ │ └─ TableScan tpcds100.web_sales Est. 72001237 rows + │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} + │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price + │ └─ Broadcast Exchange Est. 28 rows + │ └─ Projection Est. 28 rows + │ │ Expressions: [d_date_sk_13] + │ └─ Filter Est. 28 rows + │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) + │ └─ TableScan tpcds100.date_dim Est. 73049 rows + │ Where: (d_moy = 2) AND (d_year = 2000) + │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy + └─ Buffer Est. 996434 rows + └─ CTERef[1] Est. 996434 rows CTEDef [0] Projection Est. 57544111 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000/q23.explain index a2a5ddb4ef6..68f9d9a82e5 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000/q23.explain @@ -162,106 +162,112 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_last_name_2 ASC NULLS LAST, c_first_name_2 ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price))_1 ASC NULLS LAST} │ Limit: 100 - └─ Union Est. 16955595 rows + └─ Union Est. 200 rows │ OutputToInputs: c_first_name_2 = [c_first_name,c_first_name_1], expr#sum(multiply(cs_quantity, cs_list_price))_1 = [expr#sum(multiply(cs_quantity, cs_list_price)),expr#sum(multiply(ws_quantity, ws_list_price))], c_last_name_2 = [c_last_name,c_last_name_1] - ├─ MergingAggregated Est. 10980241 rows - │ └─ Repartition Exchange Est. 10980241 rows - │ │ Partition by: {c_last_name, c_first_name} - │ └─ Aggregating Est. 10980241 rows - │ │ Group by: {c_last_name, c_first_name} - │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) - │ └─ Projection Est. 10980241 rows - │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 - │ └─ Inner (PARALLEL_HASH) Join Est. 10980241 rows - │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 - │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} - │ ├─ Repartition Exchange Est. 12000000 rows - │ │ │ Partition by: {c_customer_sk_4} - │ │ └─ Filter Est. 12000000 rows - │ │ │ Condition: Runtime Filters: {c_customer_sk_4} - │ │ └─ TableScan tpcds1000.customer Est. 12000000 rows - │ │ Where: Runtime Filters: {c_customer_sk} - │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk - │ └─ Left Semi (PARALLEL_HASH) Join Est. 10980241 rows - │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 - │ │ Runtime Filters Builder: {c_customer_sk_7} - │ ├─ Repartition Exchange Est. 21960485 rows - │ │ │ Partition by: {cs_bill_customer_sk_1} - │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 21960485 rows - │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 - │ │ │ Runtime Filters Builder: {cs_item_sk_1} - │ │ ├─ Filter Est. 143861166 rows - │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} - │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ Buffer Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows - │ │ └─ Inner Join Est. 21960485 rows - │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 - │ │ │ Runtime Filters Builder: {d_date_sk_10} - │ │ ├─ Filter Est. 1439980416 rows - │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} - │ │ │ └─ TableScan tpcds1000.catalog_sales Est. 1439980416 rows - │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} - │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price - │ │ └─ Broadcast Exchange Est. 28 rows - │ │ └─ Projection Est. 28 rows - │ │ │ Expressions: [d_date_sk_10] - │ │ └─ Filter Est. 28 rows - │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) - │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows - │ │ Where: (d_moy = 2) AND (d_year = 2000) - │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ Buffer Est. 5975354 rows - │ └─ CTERef[1] Est. 5975354 rows - └─ MergingAggregated Est. 5975354 rows - └─ Repartition Exchange Est. 5999999 rows - │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 5999999 rows - │ Group by: {c_last_name_1, c_first_name_1} - │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 5999999 rows - │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner (PARALLEL_HASH) Join Est. 5999999 rows - │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 - │ Runtime Filters Builder: {ws_bill_customer_sk_1} - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_customer_sk_8} - │ └─ Filter Est. 12000000 rows - │ │ Condition: Runtime Filters: {c_customer_sk_8} - │ └─ TableScan tpcds1000.customer Est. 12000000 rows - │ Where: Runtime Filters: {c_customer_sk} - │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name - └─ Left Semi (PARALLEL_HASH) Join Est. 5975354 rows - │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 - │ Runtime Filters Builder: {c_customer_sk_9} - ├─ Repartition Exchange Est. 11058700 rows - │ │ Partition by: {ws_bill_customer_sk_1} - │ └─ Right Semi (PARALLEL_HASH) Join Est. 11058700 rows - │ │ Condition: i_item_sk_7 == ws_item_sk_1 - │ │ Runtime Filters Builder: {ws_item_sk_1} - │ ├─ Filter Est. 143861166 rows - │ │ │ Condition: Runtime Filters: {i_item_sk_7} - │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ Buffer Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows - │ └─ Inner Join Est. 11058700 rows - │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 - │ │ Runtime Filters Builder: {d_date_sk_13} - │ ├─ Filter Est. 720000376 rows - │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} - │ │ └─ TableScan tpcds1000.web_sales Est. 720000376 rows - │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} - │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price - │ └─ Broadcast Exchange Est. 28 rows - │ └─ Projection Est. 28 rows - │ │ Expressions: [d_date_sk_13] - │ └─ Filter Est. 28 rows - │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) - │ └─ TableScan tpcds1000.date_dim Est. 73049 rows - │ Where: (d_moy = 2) AND (d_year = 2000) - │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ Buffer Est. 5975354 rows - └─ CTERef[1] Est. 5975354 rows + ├─ Sorting Est. 100 rows + │ │ Order by: {c_last_name ASC NULLS LAST, c_first_name ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price)) ASC NULLS LAST} + │ │ Limit: 100 + │ └─ MergingAggregated Est. 10980241 rows + │ └─ Repartition Exchange Est. 10980241 rows + │ │ Partition by: {c_last_name, c_first_name} + │ └─ Aggregating Est. 10980241 rows + │ │ Group by: {c_last_name, c_first_name} + │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) + │ └─ Projection Est. 10980241 rows + │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 + │ └─ Inner (PARALLEL_HASH) Join Est. 10980241 rows + │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 + │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_customer_sk_4} + │ │ └─ Filter Est. 12000000 rows + │ │ │ Condition: Runtime Filters: {c_customer_sk_4} + │ │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ │ Where: Runtime Filters: {c_customer_sk} + │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk + │ └─ Left Semi (PARALLEL_HASH) Join Est. 10980241 rows + │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 + │ │ Runtime Filters Builder: {c_customer_sk_7} + │ ├─ Repartition Exchange Est. 21960485 rows + │ │ │ Partition by: {cs_bill_customer_sk_1} + │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 21960485 rows + │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 + │ │ │ Runtime Filters Builder: {cs_item_sk_1} + │ │ ├─ Filter Est. 143861166 rows + │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} + │ │ │ └─ Local Exchange Est. 575444667 rows + │ │ │ └─ Buffer Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ Inner Join Est. 21960485 rows + │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 + │ │ │ Runtime Filters Builder: {d_date_sk_10} + │ │ ├─ Filter Est. 1439980416 rows + │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} + │ │ │ └─ TableScan tpcds1000.catalog_sales Est. 1439980416 rows + │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} + │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price + │ │ └─ Broadcast Exchange Est. 28 rows + │ │ └─ Projection Est. 28 rows + │ │ │ Expressions: [d_date_sk_10] + │ │ └─ Filter Est. 28 rows + │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) + │ │ └─ TableScan tpcds1000.date_dim Est. 73049 rows + │ │ Where: (d_moy = 2) AND (d_year = 2000) + │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy + │ └─ Buffer Est. 5975354 rows + │ └─ CTERef[1] Est. 5975354 rows + └─ Sorting Est. 100 rows + │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, expr#sum(multiply(ws_quantity, ws_list_price)) ASC NULLS LAST} + │ Limit: 100 + └─ MergingAggregated Est. 5975354 rows + └─ Repartition Exchange Est. 5999999 rows + │ Partition by: {c_last_name_1, c_first_name_1} + └─ Aggregating Est. 5999999 rows + │ Group by: {c_last_name_1, c_first_name_1} + │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) + └─ Projection Est. 5999999 rows + │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 + └─ Inner (PARALLEL_HASH) Join Est. 5999999 rows + │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 + │ Runtime Filters Builder: {ws_bill_customer_sk_1} + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_customer_sk_8} + │ └─ Filter Est. 12000000 rows + │ │ Condition: Runtime Filters: {c_customer_sk_8} + │ └─ TableScan tpcds1000.customer Est. 12000000 rows + │ Where: Runtime Filters: {c_customer_sk} + │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name + └─ Left Semi (PARALLEL_HASH) Join Est. 5975354 rows + │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 + │ Runtime Filters Builder: {c_customer_sk_9} + ├─ Repartition Exchange Est. 11058700 rows + │ │ Partition by: {ws_bill_customer_sk_1} + │ └─ Right Semi (PARALLEL_HASH) Join Est. 11058700 rows + │ │ Condition: i_item_sk_7 == ws_item_sk_1 + │ │ Runtime Filters Builder: {ws_item_sk_1} + │ ├─ Filter Est. 143861166 rows + │ │ │ Condition: Runtime Filters: {i_item_sk_7} + │ │ └─ Local Exchange Est. 575444667 rows + │ │ └─ Buffer Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows + │ └─ Inner Join Est. 11058700 rows + │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 + │ │ Runtime Filters Builder: {d_date_sk_13} + │ ├─ Filter Est. 720000376 rows + │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} + │ │ └─ TableScan tpcds1000.web_sales Est. 720000376 rows + │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} + │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price + │ └─ Broadcast Exchange Est. 28 rows + │ └─ Projection Est. 28 rows + │ │ Expressions: [d_date_sk_13] + │ └─ Filter Est. 28 rows + │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) + │ └─ TableScan tpcds1000.date_dim Est. 73049 rows + │ Where: (d_moy = 2) AND (d_year = 2000) + │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy + └─ Buffer Est. 5975354 rows + └─ CTERef[1] Est. 5975354 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain index 81a23f0e26d..63f83fe28ba 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q23.explain @@ -164,104 +164,110 @@ Projection │ Limit: 100 └─ Union │ OutputToInputs: c_first_name_2 = [c_first_name,c_first_name_1], expr#sum(multiply(cs_quantity, cs_list_price))_1 = [expr#sum(multiply(cs_quantity, cs_list_price)),expr#sum(multiply(ws_quantity, ws_list_price))], c_last_name_2 = [c_last_name,c_last_name_1] - ├─ MergingAggregated - │ └─ Repartition Exchange - │ │ Partition by: {c_last_name, c_first_name} - │ └─ Aggregating - │ │ Group by: {c_last_name, c_first_name} - │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) - │ └─ Projection - │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 - │ └─ Inner (PARALLEL_HASH) Join - │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 - │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} - │ ├─ Repartition Exchange - │ │ │ Partition by: {c_customer_sk_4} - │ │ └─ Filter - │ │ │ Condition: Runtime Filters: {c_customer_sk_4} - │ │ └─ TableScan tpcds1000.customer - │ │ Where: Runtime Filters: {c_customer_sk} - │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk - │ └─ Left Semi (PARALLEL_HASH) Join - │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 - │ │ Runtime Filters Builder: {c_customer_sk_7} + ├─ Sorting + │ │ Order by: {c_last_name ASC NULLS LAST, c_first_name ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price)) ASC NULLS LAST} + │ │ Limit: 100 + │ └─ MergingAggregated + │ └─ Repartition Exchange + │ │ Partition by: {c_last_name, c_first_name} + │ └─ Aggregating + │ │ Group by: {c_last_name, c_first_name} + │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) + │ └─ Projection + │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 + │ └─ Inner (PARALLEL_HASH) Join + │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 + │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} │ ├─ Repartition Exchange - │ │ │ Partition by: {cs_bill_customer_sk_1} - │ │ └─ Right Semi (PARALLEL_HASH) Join - │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 - │ │ │ Runtime Filters Builder: {cs_item_sk_1} - │ │ ├─ Filter - │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} - │ │ │ └─ Local Exchange - │ │ │ └─ Buffer - │ │ │ └─ CTERef[0] - │ │ └─ Inner Join - │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 - │ │ │ Runtime Filters Builder: {d_date_sk_10} - │ │ ├─ Filter - │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} - │ │ │ └─ TableScan tpcds1000.catalog_sales - │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} - │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price - │ │ └─ Broadcast Exchange - │ │ └─ Projection - │ │ │ Expressions: [d_date_sk_10] - │ │ └─ Filter - │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) - │ │ └─ TableScan tpcds1000.date_dim - │ │ Where: (d_moy = 2) AND (d_year = 2000) - │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ Buffer - │ └─ CTERef[1] - └─ MergingAggregated - └─ Repartition Exchange - │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating - │ Group by: {c_last_name_1, c_first_name_1} - │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection - │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner (PARALLEL_HASH) Join - │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 - │ Runtime Filters Builder: {ws_bill_customer_sk_1} - ├─ Repartition Exchange - │ │ Partition by: {c_customer_sk_8} - │ └─ Filter - │ │ Condition: Runtime Filters: {c_customer_sk_8} - │ └─ TableScan tpcds1000.customer - │ Where: Runtime Filters: {c_customer_sk} - │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name - └─ Left Semi (PARALLEL_HASH) Join - │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 - │ Runtime Filters Builder: {c_customer_sk_9} + │ │ │ Partition by: {c_customer_sk_4} + │ │ └─ Filter + │ │ │ Condition: Runtime Filters: {c_customer_sk_4} + │ │ └─ TableScan tpcds1000.customer + │ │ Where: Runtime Filters: {c_customer_sk} + │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk + │ └─ Left Semi (PARALLEL_HASH) Join + │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 + │ │ Runtime Filters Builder: {c_customer_sk_7} + │ ├─ Repartition Exchange + │ │ │ Partition by: {cs_bill_customer_sk_1} + │ │ └─ Right Semi (PARALLEL_HASH) Join + │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 + │ │ │ Runtime Filters Builder: {cs_item_sk_1} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} + │ │ │ └─ Local Exchange + │ │ │ └─ Buffer + │ │ │ └─ CTERef[0] + │ │ └─ Inner Join + │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 + │ │ │ Runtime Filters Builder: {d_date_sk_10} + │ │ ├─ Filter + │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} + │ │ │ └─ TableScan tpcds1000.catalog_sales + │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} + │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price + │ │ └─ Broadcast Exchange + │ │ └─ Projection + │ │ │ Expressions: [d_date_sk_10] + │ │ └─ Filter + │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) + │ │ └─ TableScan tpcds1000.date_dim + │ │ Where: (d_moy = 2) AND (d_year = 2000) + │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy + │ └─ Buffer + │ └─ CTERef[1] + └─ Sorting + │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, expr#sum(multiply(ws_quantity, ws_list_price)) ASC NULLS LAST} + │ Limit: 100 + └─ MergingAggregated + └─ Repartition Exchange + │ Partition by: {c_last_name_1, c_first_name_1} + └─ Aggregating + │ Group by: {c_last_name_1, c_first_name_1} + │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) + └─ Projection + │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 + └─ Inner (PARALLEL_HASH) Join + │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 + │ Runtime Filters Builder: {ws_bill_customer_sk_1} ├─ Repartition Exchange - │ │ Partition by: {ws_bill_customer_sk_1} - │ └─ Right Semi (PARALLEL_HASH) Join - │ │ Condition: i_item_sk_7 == ws_item_sk_1 - │ │ Runtime Filters Builder: {ws_item_sk_1} - │ ├─ Filter - │ │ │ Condition: Runtime Filters: {i_item_sk_7} - │ │ └─ Local Exchange - │ │ └─ Buffer - │ │ └─ CTERef[0] - │ └─ Inner Join - │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 - │ │ Runtime Filters Builder: {d_date_sk_13} - │ ├─ Filter - │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} - │ │ └─ TableScan tpcds1000.web_sales - │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} - │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price - │ └─ Broadcast Exchange - │ └─ Projection - │ │ Expressions: [d_date_sk_13] - │ └─ Filter - │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) - │ └─ TableScan tpcds1000.date_dim - │ Where: (d_moy = 2) AND (d_year = 2000) - │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ Buffer - └─ CTERef[1] + │ │ Partition by: {c_customer_sk_8} + │ └─ Filter + │ │ Condition: Runtime Filters: {c_customer_sk_8} + │ └─ TableScan tpcds1000.customer + │ Where: Runtime Filters: {c_customer_sk} + │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name + └─ Left Semi (PARALLEL_HASH) Join + │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 + │ Runtime Filters Builder: {c_customer_sk_9} + ├─ Repartition Exchange + │ │ Partition by: {ws_bill_customer_sk_1} + │ └─ Right Semi (PARALLEL_HASH) Join + │ │ Condition: i_item_sk_7 == ws_item_sk_1 + │ │ Runtime Filters Builder: {ws_item_sk_1} + │ ├─ Filter + │ │ │ Condition: Runtime Filters: {i_item_sk_7} + │ │ └─ Local Exchange + │ │ └─ Buffer + │ │ └─ CTERef[0] + │ └─ Inner Join + │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 + │ │ Runtime Filters Builder: {d_date_sk_13} + │ ├─ Filter + │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} + │ │ └─ TableScan tpcds1000.web_sales + │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} + │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price + │ └─ Broadcast Exchange + │ └─ Projection + │ │ Expressions: [d_date_sk_13] + │ └─ Filter + │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) + │ └─ TableScan tpcds1000.date_dim + │ Where: (d_moy = 2) AND (d_year = 2000) + │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy + └─ Buffer + └─ CTERef[1] CTEDef [0] Projection │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain index 6a389296a57..6685087a8a8 100644 --- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain +++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q23.explain @@ -162,106 +162,112 @@ Projection Est. 100 rows └─ Sorting Est. 100 rows │ Order by: {c_last_name_2 ASC NULLS LAST, c_first_name_2 ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price))_1 ASC NULLS LAST} │ Limit: 100 - └─ Union Est. 16988347 rows + └─ Union Est. 200 rows │ OutputToInputs: c_first_name_2 = [c_first_name,c_first_name_1], expr#sum(multiply(cs_quantity, cs_list_price))_1 = [expr#sum(multiply(cs_quantity, cs_list_price)),expr#sum(multiply(ws_quantity, ws_list_price))], c_last_name_2 = [c_last_name,c_last_name_1] - ├─ MergingAggregated Est. 11080454 rows - │ └─ Repartition Exchange Est. 11080454 rows - │ │ Partition by: {c_last_name, c_first_name} - │ └─ Aggregating Est. 11080454 rows - │ │ Group by: {c_last_name, c_first_name} - │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) - │ └─ Projection Est. 11080454 rows - │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 - │ └─ Inner (PARALLEL_HASH) Join Est. 11080454 rows - │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 - │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} - │ ├─ Repartition Exchange Est. 12000000 rows - │ │ │ Partition by: {c_customer_sk_4} - │ │ └─ Filter Est. 12000000 rows - │ │ │ Condition: Runtime Filters: {c_customer_sk_4} - │ │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows - │ │ Where: Runtime Filters: {c_customer_sk} - │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk - │ └─ Left Semi (PARALLEL_HASH) Join Est. 11080454 rows - │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 - │ │ Runtime Filters Builder: {c_customer_sk_7} - │ ├─ Repartition Exchange Est. 21960485 rows - │ │ │ Partition by: {cs_bill_customer_sk_1} - │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 21960485 rows - │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 - │ │ │ Runtime Filters Builder: {cs_item_sk_1} - │ │ ├─ Filter Est. 143861166 rows - │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} - │ │ │ └─ Local Exchange Est. 575444667 rows - │ │ │ └─ Buffer Est. 575444667 rows - │ │ │ └─ CTERef[0] Est. 575444667 rows - │ │ └─ Inner Join Est. 21960485 rows - │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 - │ │ │ Runtime Filters Builder: {d_date_sk_10} - │ │ ├─ Filter Est. 1439980416 rows - │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} - │ │ │ └─ TableScan tpcds1000_sample.catalog_sales Est. 1439980416 rows - │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} - │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price - │ │ └─ Broadcast Exchange Est. 28 rows - │ │ └─ Projection Est. 28 rows - │ │ │ Expressions: [d_date_sk_10] - │ │ └─ Filter Est. 28 rows - │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) - │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - │ │ Where: (d_moy = 2) AND (d_year = 2000) - │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy - │ └─ Buffer Est. 5907893 rows - │ └─ CTERef[1] Est. 5907893 rows - └─ MergingAggregated Est. 5907893 rows - └─ Repartition Exchange Est. 5932260 rows - │ Partition by: {c_last_name_1, c_first_name_1} - └─ Aggregating Est. 5932260 rows - │ Group by: {c_last_name_1, c_first_name_1} - │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) - └─ Projection Est. 5932260 rows - │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 - └─ Inner (PARALLEL_HASH) Join Est. 5932260 rows - │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 - │ Runtime Filters Builder: {ws_bill_customer_sk_1} - ├─ Repartition Exchange Est. 12000000 rows - │ │ Partition by: {c_customer_sk_8} - │ └─ Filter Est. 12000000 rows - │ │ Condition: Runtime Filters: {c_customer_sk_8} - │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows - │ Where: Runtime Filters: {c_customer_sk} - │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name - └─ Left Semi (PARALLEL_HASH) Join Est. 5907893 rows - │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 - │ Runtime Filters Builder: {c_customer_sk_9} - ├─ Repartition Exchange Est. 11058700 rows - │ │ Partition by: {ws_bill_customer_sk_1} - │ └─ Right Semi (PARALLEL_HASH) Join Est. 11058700 rows - │ │ Condition: i_item_sk_7 == ws_item_sk_1 - │ │ Runtime Filters Builder: {ws_item_sk_1} - │ ├─ Filter Est. 143861166 rows - │ │ │ Condition: Runtime Filters: {i_item_sk_7} - │ │ └─ Local Exchange Est. 575444667 rows - │ │ └─ Buffer Est. 575444667 rows - │ │ └─ CTERef[0] Est. 575444667 rows - │ └─ Inner Join Est. 11058700 rows - │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 - │ │ Runtime Filters Builder: {d_date_sk_13} - │ ├─ Filter Est. 720000376 rows - │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} - │ │ └─ TableScan tpcds1000_sample.web_sales Est. 720000376 rows - │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} - │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price - │ └─ Broadcast Exchange Est. 28 rows - │ └─ Projection Est. 28 rows - │ │ Expressions: [d_date_sk_13] - │ └─ Filter Est. 28 rows - │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) - │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows - │ Where: (d_moy = 2) AND (d_year = 2000) - │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy - └─ Buffer Est. 5907893 rows - └─ CTERef[1] Est. 5907893 rows + ├─ Sorting Est. 100 rows + │ │ Order by: {c_last_name ASC NULLS LAST, c_first_name ASC NULLS LAST, expr#sum(multiply(cs_quantity, cs_list_price)) ASC NULLS LAST} + │ │ Limit: 100 + │ └─ MergingAggregated Est. 11080454 rows + │ └─ Repartition Exchange Est. 11080454 rows + │ │ Partition by: {c_last_name, c_first_name} + │ └─ Aggregating Est. 11080454 rows + │ │ Group by: {c_last_name, c_first_name} + │ │ Aggregates: expr#sum(multiply(cs_quantity, cs_list_price)):=AggNull(sum)(expr#multiply(cs_quantity, cs_list_price)_2) + │ └─ Projection Est. 11080454 rows + │ │ Expressions: [c_first_name, c_last_name], expr#multiply(cs_quantity, cs_list_price)_2:=cs_quantity_1 * cs_list_price_1 + │ └─ Inner (PARALLEL_HASH) Join Est. 11080454 rows + │ │ Condition: c_customer_sk_4 == cs_bill_customer_sk_1 + │ │ Runtime Filters Builder: {cs_bill_customer_sk_1} + │ ├─ Repartition Exchange Est. 12000000 rows + │ │ │ Partition by: {c_customer_sk_4} + │ │ └─ Filter Est. 12000000 rows + │ │ │ Condition: Runtime Filters: {c_customer_sk_4} + │ │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ │ Where: Runtime Filters: {c_customer_sk} + │ │ Outputs: [c_first_name, c_last_name], c_customer_sk_4:=c_customer_sk + │ └─ Left Semi (PARALLEL_HASH) Join Est. 11080454 rows + │ │ Condition: cs_bill_customer_sk_1 == c_customer_sk_7 + │ │ Runtime Filters Builder: {c_customer_sk_7} + │ ├─ Repartition Exchange Est. 21960485 rows + │ │ │ Partition by: {cs_bill_customer_sk_1} + │ │ └─ Right Semi (PARALLEL_HASH) Join Est. 21960485 rows + │ │ │ Condition: i_item_sk_6 == cs_item_sk_1 + │ │ │ Runtime Filters Builder: {cs_item_sk_1} + │ │ ├─ Filter Est. 143861166 rows + │ │ │ │ Condition: Runtime Filters: {i_item_sk_6} + │ │ │ └─ Local Exchange Est. 575444667 rows + │ │ │ └─ Buffer Est. 575444667 rows + │ │ │ └─ CTERef[0] Est. 575444667 rows + │ │ └─ Inner Join Est. 21960485 rows + │ │ │ Condition: cs_sold_date_sk_1 == d_date_sk_10 + │ │ │ Runtime Filters Builder: {d_date_sk_10} + │ │ ├─ Filter Est. 1439980416 rows + │ │ │ │ Condition: Runtime Filters: {cs_bill_customer_sk_1, cs_sold_date_sk_1} + │ │ │ └─ TableScan tpcds1000_sample.catalog_sales Est. 1439980416 rows + │ │ │ Where: Runtime Filters: {cs_bill_customer_sk, cs_sold_date_sk} + │ │ │ Outputs: cs_sold_date_sk_1:=cs_sold_date_sk, cs_bill_customer_sk_1:=cs_bill_customer_sk, cs_item_sk_1:=cs_item_sk, cs_quantity_1:=cs_quantity, cs_list_price_1:=cs_list_price + │ │ └─ Broadcast Exchange Est. 28 rows + │ │ └─ Projection Est. 28 rows + │ │ │ Expressions: [d_date_sk_10] + │ │ └─ Filter Est. 28 rows + │ │ │ Condition: (d_moy_2 = 2) AND (d_year_10 = 2000) + │ │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows + │ │ Where: (d_moy = 2) AND (d_year = 2000) + │ │ Outputs: d_date_sk_10:=d_date_sk, d_year_10:=d_year, d_moy_2:=d_moy + │ └─ Buffer Est. 5907893 rows + │ └─ CTERef[1] Est. 5907893 rows + └─ Sorting Est. 100 rows + │ Order by: {c_last_name_1 ASC NULLS LAST, c_first_name_1 ASC NULLS LAST, expr#sum(multiply(ws_quantity, ws_list_price)) ASC NULLS LAST} + │ Limit: 100 + └─ MergingAggregated Est. 5907893 rows + └─ Repartition Exchange Est. 5932260 rows + │ Partition by: {c_last_name_1, c_first_name_1} + └─ Aggregating Est. 5932260 rows + │ Group by: {c_last_name_1, c_first_name_1} + │ Aggregates: expr#sum(multiply(ws_quantity, ws_list_price)):=AggNull(sum)(expr#multiply(ws_quantity, ws_list_price)_1) + └─ Projection Est. 5932260 rows + │ Expressions: [c_first_name_1, c_last_name_1], expr#multiply(ws_quantity, ws_list_price)_1:=ws_quantity_1 * ws_list_price_1 + └─ Inner (PARALLEL_HASH) Join Est. 5932260 rows + │ Condition: c_customer_sk_8 == ws_bill_customer_sk_1 + │ Runtime Filters Builder: {ws_bill_customer_sk_1} + ├─ Repartition Exchange Est. 12000000 rows + │ │ Partition by: {c_customer_sk_8} + │ └─ Filter Est. 12000000 rows + │ │ Condition: Runtime Filters: {c_customer_sk_8} + │ └─ TableScan tpcds1000_sample.customer Est. 12000000 rows + │ Where: Runtime Filters: {c_customer_sk} + │ Outputs: c_customer_sk_8:=c_customer_sk, c_first_name_1:=c_first_name, c_last_name_1:=c_last_name + └─ Left Semi (PARALLEL_HASH) Join Est. 5907893 rows + │ Condition: ws_bill_customer_sk_1 == c_customer_sk_9 + │ Runtime Filters Builder: {c_customer_sk_9} + ├─ Repartition Exchange Est. 11058700 rows + │ │ Partition by: {ws_bill_customer_sk_1} + │ └─ Right Semi (PARALLEL_HASH) Join Est. 11058700 rows + │ │ Condition: i_item_sk_7 == ws_item_sk_1 + │ │ Runtime Filters Builder: {ws_item_sk_1} + │ ├─ Filter Est. 143861166 rows + │ │ │ Condition: Runtime Filters: {i_item_sk_7} + │ │ └─ Local Exchange Est. 575444667 rows + │ │ └─ Buffer Est. 575444667 rows + │ │ └─ CTERef[0] Est. 575444667 rows + │ └─ Inner Join Est. 11058700 rows + │ │ Condition: ws_sold_date_sk_1 == d_date_sk_13 + │ │ Runtime Filters Builder: {d_date_sk_13} + │ ├─ Filter Est. 720000376 rows + │ │ │ Condition: Runtime Filters: {ws_bill_customer_sk_1, ws_sold_date_sk_1} + │ │ └─ TableScan tpcds1000_sample.web_sales Est. 720000376 rows + │ │ Where: Runtime Filters: {ws_bill_customer_sk, ws_sold_date_sk} + │ │ Outputs: ws_sold_date_sk_1:=ws_sold_date_sk, ws_item_sk_1:=ws_item_sk, ws_bill_customer_sk_1:=ws_bill_customer_sk, ws_quantity_1:=ws_quantity, ws_list_price_1:=ws_list_price + │ └─ Broadcast Exchange Est. 28 rows + │ └─ Projection Est. 28 rows + │ │ Expressions: [d_date_sk_13] + │ └─ Filter Est. 28 rows + │ │ Condition: (d_moy_3 = 2) AND (d_year_13 = 2000) + │ └─ TableScan tpcds1000_sample.date_dim Est. 73049 rows + │ Where: (d_moy = 2) AND (d_year = 2000) + │ Outputs: d_date_sk_13:=d_date_sk, d_year_13:=d_year, d_moy_3:=d_moy + └─ Buffer Est. 5907893 rows + └─ CTERef[1] Est. 5907893 rows CTEDef [0] Projection Est. 575444667 rows │ Expressions: i_item_sk_5:=ss_item_sk_3 diff --git a/tests/queries/4_cnch_stateless_no_tenant/10020_test_fusion_merge.reference b/tests/queries/4_cnch_stateless_no_tenant/10020_test_fusion_merge.reference index b65d8a4e8aa..2d9094a3040 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/10020_test_fusion_merge.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/10020_test_fusion_merge.reference @@ -7,16 +7,20 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ Filter Est. ? rows - │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) - │ └─ TableScan test.test_fusion_merge_history Est. ? rows - │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) - │ Outputs: [event_date, server_time, name] - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ Filter Est. ? rows + │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2 └─ Sorting Est. ? rows @@ -26,16 +30,20 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ Filter Est. ? rows - │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time < 1577858400) - │ └─ TableScan test.test_fusion_merge_history Est. ? rows - │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time < 1577858400) - │ Outputs: [event_date, server_time, name] - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 < 1577944800) - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time < 1577944800) - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ Filter Est. ? rows + │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time < 1577858400) + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time < 1577858400) + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 < 1577944800) + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time < 1577944800) + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2 └─ Sorting Est. ? rows @@ -45,13 +53,17 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ TableScan test.test_fusion_merge_history Est. ? rows - │ Outputs: [event_date, server_time, name] - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2 └─ Sorting Est. ? rows @@ -61,10 +73,14 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ TableScan test.test_fusion_merge_history Est. ? rows - │ Outputs: [event_date, server_time, name] - └─ TableScan test.test_fusion_merge_real Est. ? rows - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ TableScan test.test_fusion_merge_real Est. ? rows + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2 └─ Sorting Est. ? rows @@ -74,16 +90,20 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ Filter Est. ? rows - │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) AND (name = \'test_fusion_merge_history1\') - │ └─ TableScan test.test_fusion_merge_history Est. ? rows - │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) AND (name = \'test_fusion_merge_history1\') - │ Outputs: [event_date, server_time, name] - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) AND (name_1 = \'test_fusion_merge_history1\') - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) AND (name = \'test_fusion_merge_history1\') - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ Filter Est. ? rows + │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) AND (name = \'test_fusion_merge_history1\') + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800) AND (server_time <= 1577858400) AND (name = \'test_fusion_merge_history1\') + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200) AND (server_time_1 <= 1577944800) AND (name_1 = \'test_fusion_merge_history1\') + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200) AND (server_time <= 1577944800) AND (name = \'test_fusion_merge_history1\') + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2 └─ Sorting Est. ? rows @@ -93,16 +113,20 @@ Projection Est. ? rows │ Order by: {name_2 ASC NULLS LAST} └─ Union Est. ? rows │ OutputToInputs: name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] - ├─ Filter Est. ? rows - │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800000) AND (server_time <= 1577858400000) - │ └─ TableScan test.test_fusion_merge_history Est. ? rows - │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800000) AND (server_time <= 1577858400000) - │ Outputs: [event_date, server_time, name] - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200000) AND (server_time_1 <= 1577944800000) - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200000) AND (server_time <= 1577944800000) - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + ├─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} + │ └─ Filter Est. ? rows + │ │ Condition: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800000) AND (server_time <= 1577858400000) + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Where: (event_date = cast(18262, \'Date\')) AND (server_time >= 1577854800000) AND (server_time <= 1577858400000) + │ Outputs: [event_date, server_time, name] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (server_time_1 >= 1577941200000) AND (server_time_1 <= 1577944800000) + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (server_time >= 1577941200000) AND (server_time <= 1577944800000) + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name Projection Est. ? rows │ Expressions: event_date:=event_date_2, name:=name_2, server_time:=server_time_2, time:=`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_2` └─ Sorting Est. ? rows @@ -113,23 +137,31 @@ Projection Est. ? rows └─ Union Est. ? rows │ OutputToInputs: expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_2 = [expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time),expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_1], name_2 = [name,name_1], server_time_2 = [server_time,server_time_1], event_date_2 = [event_date,event_date_1] ├─ Projection Est. ? rows - │ │ Expressions: [event_date, name, server_time], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time):=`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` - │ └─ Filter Est. ? rows - │ │ Condition: (event_date = cast(18262, \'Date\')) AND (`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` >= 1577854800000) AND (`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` <= 1577858400000) + │ │ Expressions: [event_date, expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time), name, server_time] + │ └─ Sorting Est. ? rows + │ │ Order by: {name ASC NULLS LAST} │ └─ Projection Est. ? rows - │ │ Expressions: [event_date, name, server_time], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3:=if(server_time <= 2000000000, server_time * 1000, server_time) - │ └─ TableScan test.test_fusion_merge_history Est. ? rows - │ Where: (event_date = cast(18262, \'Date\')) AND (if(server_time <= 2000000000, server_time * 1000, server_time) >= 1577854800000) AND (if(server_time <= 2000000000, server_time * 1000, server_time) <= 1577858400000) - │ Outputs: [event_date, server_time, name] + │ │ Expressions: [event_date, name, server_time], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time):=`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` + │ └─ Filter Est. ? rows + │ │ Condition: (event_date = cast(18262, \'Date\')) AND (`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` >= 1577854800000) AND (`expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3` <= 1577858400000) + │ └─ Projection Est. ? rows + │ │ Expressions: [event_date, name, server_time], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_3:=if(server_time <= 2000000000, server_time * 1000, server_time) + │ └─ TableScan test.test_fusion_merge_history Est. ? rows + │ Where: (event_date = cast(18262, \'Date\')) AND (if(server_time <= 2000000000, server_time * 1000, server_time) >= 1577854800000) AND (if(server_time <= 2000000000, server_time * 1000, server_time) <= 1577858400000) + │ Outputs: [event_date, server_time, name] └─ Projection Est. ? rows - │ Expressions: [event_date_1, name_1, server_time_1], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_1:=`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` - └─ Filter Est. ? rows - │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` >= 1577941200000) AND (`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` <= 1577944800000) + │ Expressions: [event_date_1, expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_1, name_1, server_time_1] + └─ Sorting Est. ? rows + │ Order by: {name_1 ASC NULLS LAST} └─ Projection Est. ? rows - │ Expressions: [event_date_1, name_1, server_time_1], expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1):=if(server_time_1 <= 2000000000, server_time_1 * 1000, server_time_1) - └─ TableScan test.test_fusion_merge_real Est. ? rows - Where: (event_date = cast(18263, \'Date\')) AND (if(server_time <= 2000000000, server_time * 1000, server_time) >= 1577941200000) AND (if(server_time <= 2000000000, server_time * 1000, server_time) <= 1577944800000) - Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name + │ Expressions: [event_date_1, name_1, server_time_1], expr#if(lessOrEquals(server_time, 2000000000), multiply(server_time, 1000), server_time)_1:=`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` + └─ Filter Est. ? rows + │ Condition: (event_date_1 = cast(18263, \'Date\')) AND (`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` >= 1577941200000) AND (`expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1)` <= 1577944800000) + └─ Projection Est. ? rows + │ Expressions: [event_date_1, name_1, server_time_1], expr#if(lessOrEquals(server_time_1, 2000000000), multiply(server_time_1, 1000), server_time_1):=if(server_time_1 <= 2000000000, server_time_1 * 1000, server_time_1) + └─ TableScan test.test_fusion_merge_real Est. ? rows + Where: (event_date = cast(18263, \'Date\')) AND (if(server_time <= 2000000000, server_time * 1000, server_time) >= 1577941200000) AND (if(server_time <= 2000000000, server_time * 1000, server_time) <= 1577944800000) + Outputs: event_date_1:=event_date, server_time_1:=server_time, name_1:=name 2020-01-01 1577854800 test_fusion_merge_history1 2020-01-02 1577944800 test_fusion_merge_real2 2020-01-02 1577944800 test_fusion_merge_real2 diff --git a/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference b/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference index 3594eb7dbd4..86754c3daea 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40022_topn_filtering_opt.reference @@ -92,5 +92,6 @@ Projection Est. 10 rows, cost 2.740000e+00 └─ Filter Est. ? rows, cost 0.000000e+00 │ Condition: (block_number > 10000000) AND (block_number < 20000000) └─ TableScan test.polygon_tx_v87 Est. ? rows, cost 0.000000e+00 + Input Order Info: {block_number ASC ANY} Where: (block_number > 10000000) AND (block_number < 20000000) Outputs: [block_number, gas, gas_price] diff --git a/tests/queries/4_cnch_stateless_no_tenant/40023_mv_with_topn_filtering.reference b/tests/queries/4_cnch_stateless_no_tenant/40023_mv_with_topn_filtering.reference index 474c22cfd52..eb2cf141ecc 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40023_mv_with_topn_filtering.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40023_mv_with_topn_filtering.reference @@ -22,8 +22,9 @@ Projection └─ TopNFiltering │ Order by: {block_number ASC NULLS LAST} │ Size: 10 - │ Algorithm: Unspecified + │ Algorithm: Limit └─ TableScan test.block_number_agg_sum_hit_mv_target + Input Order Info: {block_number ASC ANY} Outputs: [gas_fee, block_number] note: Materialized Views is applied for 1 times: test.mv_block_number_agg_hit. Projection diff --git a/tests/queries/4_cnch_stateless_no_tenant/48015_simplify_multiIf_function.reference b/tests/queries/4_cnch_stateless_no_tenant/48015_simplify_multiIf_function.reference index 63788bf0c2a..7e70c857d62 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48015_simplify_multiIf_function.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48015_simplify_multiIf_function.reference @@ -15,10 +15,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: multiIf(a = 6, a + a, b + a) > 10 └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: multiIf(a = 6, a + a, b + a) > 10 Outputs: [a, b] Projection Est. ? rows @@ -28,10 +29,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: multiIf(a = 3, b + b, a * a) > 10 └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: multiIf(a = 3, b + b, a * a) > 10 Outputs: [a, b] 5 6 @@ -49,10 +51,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: (a + 2) < 10 └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: (a + 2) < 10 Outputs: [a, b] Projection Est. ? rows @@ -62,8 +65,9 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Outputs: [a, b] 5 6 5 6 @@ -78,10 +82,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: (a + 4) < 10 └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: (a + 4) < 10 Outputs: [a, b] Projection Est. ? rows @@ -91,6 +96,7 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ TableScan test.multiIf Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Outputs: [a, b] diff --git a/tests/queries/4_cnch_stateless_no_tenant/48016_execute_uncorrelated_subquery_first.reference b/tests/queries/4_cnch_stateless_no_tenant/48016_execute_uncorrelated_subquery_first.reference index bb9ce7302e9..64cf772f571 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48016_execute_uncorrelated_subquery_first.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48016_execute_uncorrelated_subquery_first.reference @@ -57,10 +57,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: a < 5 └─ TableScan test.uncorrelated Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: a < 5 Outputs: [a, b] Projection Est. ? rows @@ -70,10 +71,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: a < 10 └─ TableScan test.uncorrelated Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: a < 10 Outputs: [a, b] Projection Est. ? rows @@ -83,10 +85,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST} + │ Prefix Order: {a} └─ Filter Est. ? rows │ Condition: a < 11 └─ TableScan test.uncorrelated Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Where: a < 11 Outputs: [a, b] Projection Est. ? rows @@ -96,8 +99,9 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} └─ TableScan test.uncorrelated Est. ? rows - Input Order Info: {a ASC NULLS LAST} + Input Order Info: {a ASC ANY} Outputs: [a] Projection Est. 0 rows, cost 0.000000e+00 │ Expressions: [a] diff --git a/tests/queries/4_cnch_stateless_no_tenant/48018_push_limit_into_sorting.reference b/tests/queries/4_cnch_stateless_no_tenant/48018_push_limit_into_sorting.reference index c692b362f91..7503aad66e8 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48018_push_limit_into_sorting.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48018_push_limit_into_sorting.reference @@ -8,6 +8,7 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {a ASC NULLS FIRST} + │ Prefix Order: {a} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 Input Order Info: {a ASC NULLS FIRST} @@ -22,8 +23,10 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 + Input Order Info: {a ASC NULLS FIRST} Outputs: [k, a, b, c] Projection Est. 10 rows, cost 2.500000e+00 │ Expressions: [a] @@ -35,8 +38,10 @@ Projection Est. 10 rows, cost 2.500000e+00 └─ Gather Exchange Est. 10 rows, cost 1.760000e+00 └─ Sorting Est. 10 rows, cost 0.000000e+00 │ Order by: {a ASC NULLS LAST} + │ Prefix Order: {a} │ Limit: 10 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 + Input Order Info: {a ASC NULLS FIRST} Outputs: [a] Projection Est. 3 rows, cost 1.502000e+00 │ Expressions: [b, c, k] @@ -48,9 +53,10 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {b ASC NULLS FIRST} + │ Prefix Order: {b} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 - Input Order Info: {b ASC NULLS FIRST} + Input Order Info: {b ASC ANY} Outputs: [k, b, c] Projection Est. 3 rows, cost 1.502000e+00 │ Expressions: [b, c, k] @@ -62,9 +68,10 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {b ASC NULLS LAST} + │ Prefix Order: {b} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 - Input Order Info: {b ASC NULLS LAST} + Input Order Info: {b ASC ANY} Outputs: [k, b, c] Projection Est. 3 rows, cost 1.502000e+00 │ Expressions: [c, k] @@ -76,6 +83,7 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {c ASC NULLS FIRST} + │ Prefix Order: {c} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 Input Order Info: {c ASC NULLS FIRST} @@ -90,6 +98,8 @@ Projection Est. 3 rows, cost 1.502000e+00 └─ Gather Exchange Est. 7 rows, cost 1.280000e+00 └─ Sorting Est. 7 rows, cost 0.000000e+00 │ Order by: {c ASC NULLS LAST} + │ Prefix Order: {c} │ Limit: 7 └─ TableScan test.t1 Est. ? rows, cost 0.000000e+00 + Input Order Info: {c ASC NULLS FIRST} Outputs: [k, c] diff --git a/tests/queries/4_cnch_stateless_no_tenant/48023_foreign_key_definition.reference b/tests/queries/4_cnch_stateless_no_tenant/48023_foreign_key_definition.reference index 39cc4bc2719..5d1f1939845 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/48023_foreign_key_definition.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/48023_foreign_key_definition.reference @@ -7,9 +7,11 @@ Projection Est. ? rows └─ Gather Exchange Est. ? rows └─ Sorting Est. ? rows │ Order by: {id ASC NULLS LAST} + │ Prefix Order: {id} └─ Filter Est. ? rows │ Condition: isNotNull(parent_id) └─ TableScan test.child Est. ? rows + Input Order Info: {id ASC NULLS FIRST} Where: isNotNull(parent_id) Outputs: [id, parent_id, description] 1 1 CHILD ONE diff --git a/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.reference b/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.reference new file mode 100644 index 00000000000..9e21896fa09 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.reference @@ -0,0 +1,250 @@ +-- { echoOn } +explain select * from test order by a, b, c, d; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, c ASC NULLS LAST, d ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, c ASC NULLS LAST, d ASC NULLS LAST} + │ Prefix Order: {a, b, c} + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY, b ASC ANY, c ASC ANY} + Outputs: [a, b, c, d] +explain select * from test order by a desc, b desc, c desc, d desc; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b DESC NULLS LAST, c DESC NULLS LAST, d DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b DESC NULLS LAST, c DESC NULLS LAST, d DESC NULLS LAST} + │ Prefix Order: {a, b, c} + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + Outputs: [a, b, c, d] +explain select * from test order by a, b desc, c; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b DESC NULLS LAST, c ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b DESC NULLS LAST, c ASC NULLS LAST} + │ Prefix Order: {a} + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY} + Outputs: [a, b, c, d] +explain select * from test order by a desc, b, c desc; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b ASC NULLS LAST, c DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b ASC NULLS LAST, c DESC NULLS LAST} + │ Prefix Order: {a} + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY} + Outputs: [a, b, c, d] +explain select a, b, concat(c, d) as e from test order by a, b, e; +Projection Est. ? rows +│ Expressions: [a, b], e:=`expr#concat(c, d)` +└─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, expr#concat(c, d) ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, expr#concat(c, d) ASC NULLS LAST} + │ Prefix Order: {a, b} + └─ Projection Est. ? rows + │ Expressions: [a, b], expr#concat(c, d):=concat(c, d) + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY, b ASC ANY} + Outputs: [a, b, c, d] +explain select a as e, b, concat(c, d) as f from test order by e, b, f; +Projection Est. ? rows +│ Expressions: [b], e:=a, f:=`expr#concat(c, d)` +└─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, expr#concat(c, d) ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a ASC NULLS LAST, b ASC NULLS LAST, expr#concat(c, d) ASC NULLS LAST} + │ Prefix Order: {a, b} + └─ Projection Est. ? rows + │ Expressions: [a, b], expr#concat(c, d):=concat(c, d) + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY, b ASC ANY} + Outputs: [a, b, c, d] +explain select a as e, b, concat(c, d) as f from test order by e desc, b desc, f desc; +Projection Est. ? rows +│ Expressions: [b], e:=a, f:=`expr#concat(c, d)` +└─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b DESC NULLS LAST, expr#concat(c, d) DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a DESC NULLS LAST, b DESC NULLS LAST, expr#concat(c, d) DESC NULLS LAST} + │ Prefix Order: {a, b} + └─ Projection Est. ? rows + │ Expressions: [a, b], expr#concat(c, d):=concat(c, d) + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY, b DESC ANY} + Outputs: [a, b, c, d] +explain select * from test where a = 'x' order by b, c, d; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {b ASC NULLS LAST, c ASC NULLS LAST, d ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {b ASC NULLS LAST, c ASC NULLS LAST, d ASC NULLS LAST} + │ Prefix Order: {b, c} + └─ Filter Est. ? rows + │ Condition: a = \'x\' + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY, b ASC ANY, c ASC ANY} + Where: a = \'x\' + Outputs: [a, b, c, d] +explain select * from test where a = 'x' order by b desc, c desc, d desc; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {b DESC NULLS LAST, c DESC NULLS LAST, d DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {b DESC NULLS LAST, c DESC NULLS LAST, d DESC NULLS LAST} + │ Prefix Order: {b, c} + └─ Filter Est. ? rows + │ Condition: a = \'x\' + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + Where: a = \'x\' + Outputs: [a, b, c, d] +explain select * from test where a = 'x' and d = 'z' order by d, b, c; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {d ASC NULLS LAST, b ASC NULLS LAST, c ASC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {d ASC NULLS LAST, b ASC NULLS LAST, c ASC NULLS LAST} + │ Prefix Order: {d, b, c} + └─ Filter Est. ? rows + │ Condition: (a = \'x\') AND (d = \'z\') + └─ TableScan default.test Est. ? rows + Input Order Info: {a ASC ANY, b ASC ANY, c ASC ANY} + Where: (a = \'x\') AND (d = \'z\') + Outputs: [a, b, c, d] +explain select * from test where a = 'x' and d = 'z' order by d desc, b desc, c desc; +Projection Est. ? rows +│ Expressions: [a, b, c, d] +└─ Sorting Est. ? rows + │ Order by: {d DESC NULLS LAST, b DESC NULLS LAST, c DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {d DESC NULLS LAST, b DESC NULLS LAST, c DESC NULLS LAST} + │ Prefix Order: {d, b, c} + └─ Filter Est. ? rows + │ Condition: (a = \'x\') AND (d = \'z\') + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + Where: (a = \'x\') AND (d = \'z\') + Outputs: [a, b, c, d] +explain pipeline select * from test where a = 'x' order by b, c limit 10; + +Segment[ 1 ] : +(Sorting) +Limit + (Filter) + FilterTransform + (TableScan) # + NullSource 0 → 1 + +------------------------------------------ + +Segment[ 0 ] : +(Projection) +ExpressionTransform + (Limit) + Limit + (Sorting) + (RemoteExchangeSource) + Source segment_id : [ 1 ] + ExchangeSource 0 → 1 + +------------------------------------------ +explain select a, b, c from (select a, b, c from test union all select a, b, c from test) order by a desc, b desc, c desc; +Projection Est. ? rows +│ Expressions: a:=a_2, b:=b_2, c:=c_2 +└─ Sorting Est. ? rows + │ Order by: {a_2 DESC NULLS LAST, b_2 DESC NULLS LAST, c_2 DESC NULLS LAST} + └─ Gather Exchange Est. ? rows + └─ Sorting Est. ? rows + │ Order by: {a_2 DESC NULLS LAST, b_2 DESC NULLS LAST, c_2 DESC NULLS LAST} + └─ Union Est. ? rows + │ OutputToInputs: c_2 = [c,c_1], b_2 = [b,b_1], a_2 = [a,a_1] + ├─ Sorting Est. ? rows + │ │ Order by: {a DESC NULLS LAST, b DESC NULLS LAST, c DESC NULLS LAST} + │ │ Prefix Order: {a, b, c} + │ └─ TableScan default.test Est. ? rows + │ Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + │ Outputs: [a, b, c] + └─ Sorting Est. ? rows + │ Order by: {a_1 DESC NULLS LAST, b_1 DESC NULLS LAST, c_1 DESC NULLS LAST} + │ Prefix Order: {a_1, b_1, c_1} + └─ TableScan default.test Est. ? rows + Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + Outputs: a_1:=a, b_1:=b, c_1:=c +explain select a, b, c from (select a, b, c from test where a = '1' and b = '1' union all select a, b, c from test where a = '3' and b = '3') order by c desc limit 10; +Projection Est. 10 rows, cost 2.500000e+00 +│ Expressions: a:=a_2, b:=b_2, c:=c_2 +└─ Limit Est. 10 rows, cost 1.760000e+00 + │ Limit: 10 + └─ Sorting Est. 10 rows, cost 1.760000e+00 + │ Order by: {c_2 DESC NULLS LAST} + │ Limit: 10 + └─ Gather Exchange Est. 10 rows, cost 1.760000e+00 + └─ Sorting Est. 10 rows, cost 0.000000e+00 + │ Order by: {c_2 DESC NULLS LAST} + │ Limit: 10 + └─ Union Est. 20 rows, cost 0.000000e+00 + │ OutputToInputs: c_2 = [c,c_1], b_2 = [b,b_1], a_2 = [a,a_1] + ├─ Sorting Est. 10 rows, cost 0.000000e+00 + │ │ Order by: {c DESC NULLS LAST} + │ │ Prefix Order: {c} + │ │ Limit: 10 + │ └─ Filter Est. ? rows, cost 0.000000e+00 + │ │ Condition: (a = \'1\') AND (b = \'1\') + │ └─ TableScan default.test Est. ? rows, cost 0.000000e+00 + │ Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + │ Where: (a = \'1\') AND (b = \'1\') + │ Outputs: [a, b, c] + └─ Sorting Est. 10 rows, cost 0.000000e+00 + │ Order by: {c_1 DESC NULLS LAST} + │ Prefix Order: {c_1} + │ Limit: 10 + └─ Filter Est. ? rows, cost 0.000000e+00 + │ Condition: (a_1 = \'3\') AND (b_1 = \'3\') + └─ TableScan default.test Est. ? rows, cost 0.000000e+00 + Input Order Info: {a DESC ANY, b DESC ANY, c DESC ANY} + Where: (a = \'3\') AND (b = \'3\') + Outputs: a_1:=a, b_1:=b, c_1:=c +-- { echoOn } +select a as e, b, concat(c, d) as f from test order by e desc, b desc, f desc; +3 3 44 +3 3 33 +1 1 22 +1 1 11 +select a, b, c from (select a, b, c from test union all select a, b, c from test) order by a desc, b desc, c desc; +3 3 4 +3 3 4 +3 3 3 +3 3 3 +1 1 2 +1 1 2 +1 1 1 +1 1 1 +select a, b, c from (select a, b, c from test where a = '1' and b = '1' union all select a, b, c from test where a = '3' and b = '3') order by c desc limit 10; +3 3 4 +3 3 3 +1 1 2 +1 1 1 diff --git a/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.sql b/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.sql new file mode 100644 index 00000000000..55c8ad0bbed --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/48049_optimzier_use_sort_property.sql @@ -0,0 +1,56 @@ +CREATE TABLE test +( + `a` String, + `b` String, + `c` String, + `d` Nullable(String) +) +ENGINE = CnchMergeTree +ORDER BY (a, b, c) +SETTINGS storage_policy = 'cnch_default_hdfs', index_granularity = 8192; + +set enable_optimizer = 1; +set enable_sorting_property = 1; + +-- { echoOn } +explain select * from test order by a, b, c, d; + +explain select * from test order by a desc, b desc, c desc, d desc; + +explain select * from test order by a, b desc, c; + +explain select * from test order by a desc, b, c desc; + +explain select a, b, concat(c, d) as e from test order by a, b, e; + +explain select a as e, b, concat(c, d) as f from test order by e, b, f; + +explain select a as e, b, concat(c, d) as f from test order by e desc, b desc, f desc; + +explain select * from test where a = 'x' order by b, c, d; + +explain select * from test where a = 'x' order by b desc, c desc, d desc; + +explain select * from test where a = 'x' and d = 'z' order by d, b, c; + +explain select * from test where a = 'x' and d = 'z' order by d desc, b desc, c desc; + +explain pipeline select * from test where a = 'x' order by b, c limit 10; + +explain select a, b, c from (select a, b, c from test union all select a, b, c from test) order by a desc, b desc, c desc; + +explain select a, b, c from (select a, b, c from test where a = '1' and b = '1' union all select a, b, c from test where a = '3' and b = '3') order by c desc limit 10; + +-- { echoOff } +insert into test values ('1', '1', '1', '1'); +insert into test values ('1', '1', '2', '2'); + +insert into test values ('3', '3', '3', '3'); +insert into test values ('3', '3', '4', '4'); + +-- { echoOn } +select a as e, b, concat(c, d) as f from test order by e desc, b desc, f desc; + +select a, b, c from (select a, b, c from test union all select a, b, c from test) order by a desc, b desc, c desc; + +select a, b, c from (select a, b, c from test where a = '1' and b = '1' union all select a, b, c from test where a = '3' and b = '3') order by c desc limit 10; From 662d0ca29cc7d2d75eb7f38233cb15f7a17e7713 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:09:02 +0000 Subject: [PATCH 102/292] Merge 'fix_static_ast_visitor_cnch22' into 'cnch-2.2' fix(optimizer@m-4711238875): fix static ast visitor See merge request: !23244 From 7323c4f96fa8063bb26ba17d78eb8f86427858e4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:09:21 +0000 Subject: [PATCH 103/292] Merge 'cherry-pick-mr-23196-1' into 'cnch-2.2' fix(clickhousech@m-4711215564): Merge 'fix-getBatchTaskToSchedule' into 'cnch-2.2' See merge request: !23203 --- src/Interpreters/DistributedStages/Scheduler.cpp | 6 +++++- .../4_cnch_stateless/10300_bsp_schedule_timeout.reference | 0 .../queries/4_cnch_stateless/10300_bsp_schedule_timeout.sql | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.reference create mode 100644 tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.sql diff --git a/src/Interpreters/DistributedStages/Scheduler.cpp b/src/Interpreters/DistributedStages/Scheduler.cpp index 8e1fb1b4880..69ce416c121 100644 --- a/src/Interpreters/DistributedStages/Scheduler.cpp +++ b/src/Interpreters/DistributedStages/Scheduler.cpp @@ -34,7 +34,11 @@ bool Scheduler::addBatchTask(BatchTaskPtr batch_task) bool Scheduler::getBatchTaskToSchedule(BatchTaskPtr & task) { - return queue.tryPop(task, query_expiration_ms); + auto now = time_in_milliseconds(std::chrono::system_clock::now()); + if (query_expiration_ms <= now) + return false; + else + return queue.tryPop(task, query_expiration_ms - now); } void Scheduler::dispatchTask(PlanSegment * plan_segment_ptr, const SegmentTask & task, const size_t idx) diff --git a/tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.reference b/tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.sql b/tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.sql new file mode 100644 index 00000000000..1c368499be4 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10300_bsp_schedule_timeout.sql @@ -0,0 +1,3 @@ +CREATE TABLE 10300_bsp_schedule_timeout(a UInt32) ENGINE=CnchMergeTree() ORDER BY a; +INSERT INTO 10300_bsp_schedule_timeout (a) VALUES (1); +SELECT sleepEachRow(2) FROM 10300_bsp_schedule_timeout settings bsp_mode=1,enable_optimizer=1,max_execution_time=1; -- { serverError 49 } From a52a48f70415b8dd9803e67d330669734751424d Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:09:50 +0000 Subject: [PATCH 104/292] Merge 'cherry-pick-mr-23262-1' into 'cnch-2.2' fix(optimizer@m-4679217588): fix symbol transform See merge request: !23267 --- src/Optimizer/SymbolTransformMap.cpp | 2 +- ...expression_share_with_projection.reference | 1 + ...ommon_expression_share_with_projection.sql | 36 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/Optimizer/SymbolTransformMap.cpp b/src/Optimizer/SymbolTransformMap.cpp index 7b274719253..50c5b27cbfb 100644 --- a/src/Optimizer/SymbolTransformMap.cpp +++ b/src/Optimizer/SymbolTransformMap.cpp @@ -181,7 +181,7 @@ String SymbolTransformMap::toString() const bool SymbolTransformMap::addSymbolMapping(const String & symbol, ConstASTPtr expr) { for (const auto & symbol_in_expr : SymbolsExtractor::extract(expr)) - if (symbol_to_expressions.contains(symbol_in_expr)) + if (symbol == symbol_in_expr || symbol_to_expressions.contains(symbol_in_expr)) return false; return symbol_to_expressions.emplace(symbol, std::move(expr)).second; } diff --git a/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.reference b/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.reference index 69d51df0238..0bf71f72ca7 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.reference @@ -12,3 +12,4 @@ Projection Est. ? rows Group by: {} Aggregates: expr#sum(val):=AggNull(sum)(val) 200 +11 111 diff --git a/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.sql b/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.sql index 4e69794789f..917272468fd 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/40069_common_expression_share_with_projection.sql @@ -54,3 +54,39 @@ SELECT sum(val) FROM t40069_ces PREWHERE (multiIf((((key + 1) + 2) + 3) % 10 = 1, 'a', 'b') as x) != 'xx' WHERE (multiIf((((key + 1) + 2) + 3) % 10 = 2, 'c', 'd') as y) != 'xx'; + + +set dialect_type = 'MYSQL'; +DROP TABLE IF EXISTS tthenghamdext_full_att_adid_active_hourly_report_v1; +DROP TABLE IF EXISTS tthenghamdext_agent_account; + +CREATE TABLE tthenghamdext_full_att_adid_active_hourly_report_v1 +( + `id` Int64 NOT NULL, + `date` Date NOT NULL DEFAULT '2018-01-01', + `active` Int32 NOT NULL DEFAULT '0', + `agent_account_id` Int64 NOT NULL DEFAULT '0' COMMENT '账号id' +) +ENGINE = CnchMergeTree +ORDER BY (id, date) +UNIQUE KEY (id, date); + +CREATE TABLE tthenghamdext_agent_account +( + `id` Int64 NOT NULL, + `account_type` Int8 NOT NULL DEFAULT '1' COMMENT '账号类型 1新增 2内拉新 3召回 4 地推' +) +ENGINE = CnchMergeTree +ORDER BY tuple(id); + +SELECT + concat('490', '1', aa.id) AS idx_hash, + sum(aadr.active) AS inner_active +FROM tthenghamdext_full_att_adid_active_hourly_report_v1 AS aadr LEFT JOIN tthenghamdext_agent_account AS aa ON aa.id = aadr.agent_account_id +WHERE aa.account_type = 1 +GROUP BY idx_hash +HAVING inner_active > 0 +UNION +SELECT + '11' AS idx, + 111 AS inner; From 2bfd4c40046c8c26ad69ebbf40de3a5149825c06 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:10:15 +0000 Subject: [PATCH 105/292] Merge 'cherry-pick-4d3a1df7-3' into 'cnch-2.2' fix(clickhousech@m-4691623503): [cp 2.2] make sure clean outdated txn for GC See merge request: !23149 --- src/Transaction/TransactionCoordinatorRcCnch.cpp | 8 ++------ src/Transaction/TransactionCoordinatorRcCnch.h | 1 - 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Transaction/TransactionCoordinatorRcCnch.cpp b/src/Transaction/TransactionCoordinatorRcCnch.cpp index 5fd8d381d82..8f93c055865 100644 --- a/src/Transaction/TransactionCoordinatorRcCnch.cpp +++ b/src/Transaction/TransactionCoordinatorRcCnch.cpp @@ -27,6 +27,7 @@ #include #include #include +#include "Transaction/TxnTimestamp.h" #include // #include @@ -272,21 +273,16 @@ void TransactionCoordinatorRcCnch::eraseActiveTimestamp(const TransactionCnchPtr std::optional TransactionCoordinatorRcCnch::getMinActiveTimestamp(const StorageID & storage_id) { const UInt64 expired_interval = getContext()->getRootConfig().cnch_transaction_ts_expire_time; // default 2h - auto now = UInt64(time(nullptr)) * 1000; std::lock_guard lock(min_ts_mutex); auto timestamps_it = table_to_timestamps.find(storage_id.uuid); if (timestamps_it == table_to_timestamps.end() || timestamps_it->second.empty()) return std::nullopt; - if (now - last_time_clean_timestamps < expired_interval) - return *(timestamps_it->second.begin()); - try { /// Try to clean all outdated Txns. - last_time_clean_timestamps = now; - TxnTimestamp cur_ts = getContext()->getTimestamp(); + TxnTimestamp cur_ts = TxnTimestamp::fromUnixTimestamp(static_cast(time(nullptr))); for (auto it = timestamps_it->second.begin(); it != timestamps_it->second.end();) { if ((cur_ts.toMillisecond() - it->toMillisecond()) > expired_interval) diff --git a/src/Transaction/TransactionCoordinatorRcCnch.h b/src/Transaction/TransactionCoordinatorRcCnch.h index 27b09a78ede..05f9b43141d 100644 --- a/src/Transaction/TransactionCoordinatorRcCnch.h +++ b/src/Transaction/TransactionCoordinatorRcCnch.h @@ -194,7 +194,6 @@ class TransactionCoordinatorRcCnch : WithContext mutable std::mutex min_ts_mutex; std::map> timestamp_to_tables; std::map> table_to_timestamps; - uint64_t last_time_clean_timestamps; // TimestampCacheManagerPtr ts_cache_manager; TransactionCleanerPtr txn_cleaner; From 894de028699d95e64b582b0825347914930f2235 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:10:31 +0000 Subject: [PATCH 106/292] Merge 'cnch_fix_tenant_on_mv_2_2' into 'cnch-2.2' fix(clickhousech@m-4716918150): only async mv support tenant id settings See merge request: !23282 --- src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 933db40f14d..c5360ec6f00 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1406,7 +1406,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return doCreateOrReplaceTable(create, properties); /// when create materialized view and tenant id is not empty add setting tenant_id to select query - if (create.is_materialized_view && !getCurrentTenantId().empty()) + if (create.is_materialized_view && create.refresh_strategy && (create.refresh_strategy->schedule_kind == RefreshScheduleKind::ASYNC || + create.refresh_strategy->schedule_kind == RefreshScheduleKind::MANUAL) && !getCurrentTenantId().empty()) { ASTPtr settings = std::make_shared(); settings->as().is_standalone = false; From 4cd777676a7d50fc1a82d48f755dd27ce84634bb Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:10:46 +0000 Subject: [PATCH 107/292] Merge 'jiashuo_fix-assign-table-settings-2.2' into 'cnch-2.2' fix(clickhousech@m-4716941540): [CP to 2.2]fix parts assign table settings failed See merge request: !23273 --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2d0d6b12990..c633c2754a9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5768,7 +5768,7 @@ Context::PartAllocator Context::getPartAllocationAlgo(MergeTreeSettingsPtr table auto algorithm = table_settings->cnch_part_allocation_algorithm >= 0 ? table_settings->cnch_part_allocation_algorithm : settings.cnch_part_allocation_algorithm; LOG_DEBUG(shared->log, "Send query with cnch_part_allocation_algorithm = {}, system setting = {}, table setting = {}", algorithm, settings.cnch_part_allocation_algorithm, table_settings->cnch_part_allocation_algorithm); - switch (settings.cnch_part_allocation_algorithm) + switch (algorithm) { case 0: return PartAllocator::JUMP_CONSISTENT_HASH; From 861b30246d7da8d7f4ff8b1fb8398b1436bd88ef Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:11:04 +0000 Subject: [PATCH 108/292] Merge branch 'cherry-pick-fa7bc04b' into 'cnch-2.2' feat(clickhousech@m-4678879016): support write insert rows to query_log [CP] See merge request dp/ClickHouse!23336 --- .../DistributedStages/PlanSegmentExecutor.cpp | 2 + src/Processors/QueryPipeline.cpp | 5 ++ .../Transforms/TableFinishTransform.cpp | 49 ++++++++++++++++--- .../Transforms/TableFinishTransform.h | 20 +++++++- src/QueryPlan/TableFinishStep.cpp | 3 +- 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp index db9fe1c879e..5e6e6ede606 100644 --- a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp +++ b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -229,6 +230,7 @@ std::optional PlanSegmentExecutor::execute BlockIO PlanSegmentExecutor::lazyExecute(bool /*add_output_processors*/) { + LOG_DEBUG(&Poco::Logger::get("PlanSegmentExecutor"), "lazyExecute: {}", plan_segment->getPlanSegmentId()); BlockIO res; // Will run as master query and already initialized if (!CurrentThread::get().getQueryContext() || CurrentThread::get().getQueryContext().get() != context.get()) diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index 4f1af41d9c0..6fce02279ab 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -49,6 +49,7 @@ #include #include #include +#include "Processors/Transforms/TableFinishTransform.h" #include @@ -578,6 +579,8 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback) { if (auto * source = dynamic_cast(processor.get())) source->setProgressCallback(callback); + if (auto * finish_transform = dynamic_cast(processor.get())) + finish_transform->setProgressCallback(callback); } } @@ -603,6 +606,8 @@ void QueryPipeline::setProcessListElement(QueryStatus * elem) { if (auto * source = dynamic_cast(processor.get())) source->setProcessListElement(elem); + if (auto * finish_transform = dynamic_cast(processor.get())) + finish_transform->setProcessListElement(elem); } } diff --git a/src/Processors/Transforms/TableFinishTransform.cpp b/src/Processors/Transforms/TableFinishTransform.cpp index ff22c942431..4e55b5bfaf2 100644 --- a/src/Processors/Transforms/TableFinishTransform.cpp +++ b/src/Processors/Transforms/TableFinishTransform.cpp @@ -1,26 +1,42 @@ #include #include +#include #include #include #include #include #include -#include +#include "Common/StackTrace.h" #include +#include "Interpreters/ActionsVisitor.h" +#include "Interpreters/ProcessList.h" + +namespace ProfileEvents +{ +extern const Event InsertedRows; +extern const Event InsertedBytes; +} namespace DB { -TableFinishTransform::TableFinishTransform(const Block & header_, const StoragePtr & storage_, - const ContextPtr & context_, ASTPtr & query_) - : IProcessor({header_}, {header_}), input(inputs.front()) +TableFinishTransform::TableFinishTransform( + const Block & header_, const StoragePtr & storage_, const ContextPtr & context_, ASTPtr & query_, bool insert_select_with_profiles_) + : IProcessor({header_}, {header_}) + , input(inputs.front()) , output(outputs.front()) , storage(storage_) , context(context_) , query(query_) + , insert_select_with_profiles(insert_select_with_profiles_) { } +void TableFinishTransform::setProcessListElement(QueryStatus * elem) +{ + process_list_elem = elem; +} + Block TableFinishTransform::getHeader() { return header; @@ -80,20 +96,41 @@ TableFinishTransform::Status TableFinishTransform::prepare() if (!input.hasData()) return Status::NeedData; - current_chunk = input.pull(true); + current_output_chunk = input.pull(true); has_input = true; return Status::Ready; } void TableFinishTransform::work() { - consume(std::move(current_chunk)); + consume(std::move(current_output_chunk)); has_input = false; } void TableFinishTransform::consume(Chunk chunk) { output_chunk = std::move(chunk); + + if (insert_select_with_profiles && !output_chunk.empty()) + { + auto & column = output_chunk.getColumns()[0]; + + ReadProgress local_progress(column->get64(0), 0); + + ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.read_rows); + ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.read_bytes); + + if (process_list_elem) + { + process_list_elem->updateProgressOut(Progress(local_progress)); + } + + if (progress_callback) + { + progress_callback(Progress(local_progress)); + } + } + has_output = true; } diff --git a/src/Processors/Transforms/TableFinishTransform.h b/src/Processors/Transforms/TableFinishTransform.h index c0bcfb44052..a4da03b286e 100644 --- a/src/Processors/Transforms/TableFinishTransform.h +++ b/src/Processors/Transforms/TableFinishTransform.h @@ -4,6 +4,7 @@ #include #include #include +#include "Processors/Sources/SourceWithProgress.h" namespace DB { @@ -11,7 +12,12 @@ namespace DB class TableFinishTransform : public IProcessor { public: - TableFinishTransform(const Block & header_, const StoragePtr & storage_, const ContextPtr & context_, ASTPtr & query_); + TableFinishTransform( + const Block & header_, + const StoragePtr & storage_, + const ContextPtr & context_, + ASTPtr & query_, + bool insert_select_with_profiles_ = false); String getName() const override { @@ -30,6 +36,12 @@ class TableFinishTransform : public IProcessor return output; } + void setProcessListElement(QueryStatus * elem); + void setProgressCallback(const ProgressCallback & callback) + { + progress_callback = callback; + } + private: void consume(Chunk block); void onFinish(); @@ -40,14 +52,18 @@ class TableFinishTransform : public IProcessor Block header; - Chunk current_chunk; + Chunk current_output_chunk; Chunk output_chunk; bool has_input = false; bool has_output = false; + ProgressCallback progress_callback; + QueryStatus * process_list_elem = nullptr; + StoragePtr storage; ContextPtr context; ASTPtr query; + bool insert_select_with_profiles; CnchLockHolderPtrs lock_holders; }; diff --git a/src/QueryPlan/TableFinishStep.cpp b/src/QueryPlan/TableFinishStep.cpp index 70cf88bf537..70688132f6a 100644 --- a/src/QueryPlan/TableFinishStep.cpp +++ b/src/QueryPlan/TableFinishStep.cpp @@ -45,7 +45,8 @@ std::shared_ptr TableFinishStep::copy(ContextPtr) const void TableFinishStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings & settings) { pipeline.resize(1); - pipeline.addTransform(std::make_shared(getInputStreams()[0].header, target->getStorage(), settings.context, query)); + pipeline.addTransform(std::make_shared( + getInputStreams()[0].header, target->getStorage(), settings.context, query, insert_select_with_profiles)); } void TableFinishStep::toProto(Protos::TableFinishStep & proto, bool) const From 36c3d764fc38995c787af2baf7ed27b33189e0f3 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:11:20 +0000 Subject: [PATCH 109/292] Merge branch 'cherry-pick-3954f39f' into 'cnch-2.2' feat(clickhousech@m-4675508046): fix issues for connecting from PowerBI via MySQL protocol [CP] See merge request dp/ClickHouse!23077 --- src/Core/MySQL/PacketsProtocolText.cpp | 27 ++++- src/Core/MySQL/PacketsProtocolText.h | 1 + src/Server/MySQLHandler.cpp | 4 +- .../System/attachInformationSchemaTables.cpp | 34 +++--- .../01161_information_schema.reference | 106 +++++++++--------- ...information_schema_show_database.reference | 10 +- 6 files changed, 104 insertions(+), 78 deletions(-) diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp index d67e9b01609..fa29df257d5 100644 --- a/src/Core/MySQL/PacketsProtocolText.cpp +++ b/src/Core/MySQL/PacketsProtocolText.cpp @@ -158,68 +158,89 @@ void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const ColumnDefinition getColumnDefinition(const String & column_name, const DataTypePtr & data_type) { ColumnType column_type; + /// max column length after serialize into text + /// however, this func is called before serializing data. + /// we therefore do not have the exact max length + /// if set to 0, power BI would treat the column as null and reports error + /// to avoid that, we return the theoretical max length based on data type + uint32_t column_length = 0; CharacterSet charset = CharacterSet::binary; int flags = 0; uint8_t decimals = 0; + if (!data_type->isNullable()) + flags = ColumnDefinitionFlags::NOT_NULL_FLAG; DataTypePtr normalized_data_type = removeLowCardinalityAndNullable(data_type); TypeIndex type_index = normalized_data_type->getTypeId(); switch (type_index) { case TypeIndex::UInt8: column_type = ColumnType::MYSQL_TYPE_TINY; + column_length = 3; // max val 255 flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; break; case TypeIndex::UInt16: column_type = ColumnType::MYSQL_TYPE_SHORT; + column_length = 5; flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; break; case TypeIndex::UInt32: column_type = ColumnType::MYSQL_TYPE_LONG; + column_length = 10; flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; break; case TypeIndex::UInt64: column_type = ColumnType::MYSQL_TYPE_LONGLONG; + column_length = 20; flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; break; case TypeIndex::Int8: column_type = ColumnType::MYSQL_TYPE_TINY; + column_length = 4; // min val -127 flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Int16: column_type = ColumnType::MYSQL_TYPE_SHORT; + column_length = 6; flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Int32: column_type = ColumnType::MYSQL_TYPE_LONG; + column_length = 11; flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Int64: column_type = ColumnType::MYSQL_TYPE_LONGLONG; + column_length = 21; flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Float32: column_type = ColumnType::MYSQL_TYPE_FLOAT; flags = ColumnDefinitionFlags::BINARY_FLAG; decimals = 31; + column_length = 14; break; case TypeIndex::Float64: column_type = ColumnType::MYSQL_TYPE_DOUBLE; flags = ColumnDefinitionFlags::BINARY_FLAG; decimals = 31; + column_length = 24; break; case TypeIndex::Date: case TypeIndex::Date32: column_type = ColumnType::MYSQL_TYPE_DATE; + column_length = 10; // e.g., 2020-12-12 flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::DateTime: case TypeIndex::DateTime64: column_type = ColumnType::MYSQL_TYPE_DATETIME; + column_length = 26; // e.g., 2020-12-12 11:11:11.123456 flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Decimal32: case TypeIndex::Decimal64: column_type = ColumnType::MYSQL_TYPE_DECIMAL; + column_length = 20; // 18 (precision) + 1 (sign) + 1 (point) flags = ColumnDefinitionFlags::BINARY_FLAG; break; case TypeIndex::Decimal128: { @@ -237,14 +258,16 @@ ColumnDefinition getColumnDefinition(const String & column_name, const DataTypeP column_type = ColumnType::MYSQL_TYPE_DECIMAL; flags = ColumnDefinitionFlags::BINARY_FLAG; } + column_length = 67; // 65 + 1 (sign) + 1 (point) break; } default: - column_type = ColumnType::MYSQL_TYPE_STRING; + column_type = ColumnType::MYSQL_TYPE_VAR_STRING; + column_length = 65535; // max mysql var string len charset = CharacterSet::utf8_general_ci; break; } - return ColumnDefinition(column_name, charset, 0, column_type, flags, decimals); + return ColumnDefinition(column_name, charset, column_length, column_type, flags, decimals); } } diff --git a/src/Core/MySQL/PacketsProtocolText.h b/src/Core/MySQL/PacketsProtocolText.h index 07969a1ed93..309b07eed30 100644 --- a/src/Core/MySQL/PacketsProtocolText.h +++ b/src/Core/MySQL/PacketsProtocolText.h @@ -24,6 +24,7 @@ enum CharacterSet // https://dev.mysql.com/doc/dev/mysql-server/latest/group__group__cs__column__definition__flags.html enum ColumnDefinitionFlags { + NOT_NULL_FLAG = 1, UNSIGNED_FLAG = 32, BINARY_FLAG = 128 }; diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 9d9bd66535e..dc75b3ff923 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -175,7 +175,9 @@ MySQLHandler::MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poc server_capabilities |= CLIENT_SSL; static constexpr const char SHOW_CHARSET[] = "SELECT 'utf8mb4' AS charset, 'UTF-8 Unicode' AS Description, 'utf8mb4_0900_ai_ci' AS `Default collation`, 4 AS Maxlen"; - static constexpr const char SHOW_COLLATION[] = "SELECT 'utf8mb4_0900_ai_ci' AS collation, 'utf8mb4' AS Charset, '255' AS Id, 'Yes' AS Default, 'Yes' AS Compiled, 0 AS Sortlen, 'NO PAD' AS Pad_attribute"; + static constexpr const char SHOW_COLLATION[] = "SELECT 'utf8_general_ci' AS collation, 'utf8' AS charset, 33 AS id, 'Yes' AS default, 'Yes' AS Compiled, 1 AS Sortlen, 'NO PAD' AS Pad_attribute " + "UNION SELECT 'binary' AS collation, 'binary' AS charset, 63 AS id, 'Yes' AS default, 'Yes' AS Compiled, 1 AS Sortlen, 'NO PAD' AS Pad_attribute " + "UNION SELECT 'utf8mb4_0900_ai_ci' AS collation, 'utf8mb4' AS Charset, '255' AS Id, 'Yes' AS Default, 'Yes' AS Compiled, 0 AS Sortlen, 'NO PAD' AS Pad_attribute"; static constexpr const char SHOW_ENGINES[] = "SELECT name AS Engine, 'Yes' AS Support, concat(name, ' engine') AS Comment, 'NO' AS Transcations, 'NO' AS XA, 'NO' AS Savepoints FROM system.table_engines"; static constexpr const char SHOW_PRIVILEGES[] = "SELECT '' AS Privilege, '' AS Context, '' AS Comment"; diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 96e61477ae1..c628f1a1f97 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -60,10 +60,10 @@ static constexpr std::string_view schemata = R"( static constexpr std::string_view tables = R"( ATTACH VIEW tables ( - `table_catalog` Nullable(String), - `table_schema` Nullable(String), - `table_name` Nullable(String), - `table_type` Nullable(String), + `table_catalog` String, + `table_schema` String, + `table_name` String, + `table_type` String, `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), @@ -79,12 +79,12 @@ static constexpr std::string_view tables = R"( `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), - `table_comment` Nullable(String), + `table_comment` String, `create_options` Nullable(String), - `TABLE_CATALOG` Nullable(String), - `TABLE_SCHEMA` Nullable(String), - `TABLE_NAME` Nullable(String), - `TABLE_TYPE` Nullable(String), + `TABLE_CATALOG` String, + `TABLE_SCHEMA` String, + `TABLE_NAME` String, + `TABLE_TYPE` String, `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), @@ -100,11 +100,11 @@ static constexpr std::string_view tables = R"( `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), - `TABLE_COMMENT` Nullable(String), + `TABLE_COMMENT` String, `CREATE_OPTIONS` Nullable(String) ) AS SELECT - T.database AS table_catalog, + 'def' AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), 'LOCAL TEMPORARY', @@ -153,7 +153,7 @@ static constexpr std::string_view tables = R"( FROM system.tables as T LEFT OUTER JOIN (select * from system.cnch_parts where visible = 1) as P on T.database = P.database and T.name = P.table - GROUP BY table_catalog, table_name + GROUP BY table_schema, table_name SETTINGS enable_multiple_tables_for_cnch_parts=1, join_use_nulls=1; )"; @@ -190,7 +190,7 @@ static constexpr std::string_view views = R"( `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1) ) AS SELECT - database AS table_catalog, + 'def' AS table_catalog, database AS table_schema, name AS table_name, as_select AS view_definition, @@ -281,7 +281,7 @@ static constexpr std::string_view columns = R"( `PRIVILEGES` Nullable(String) ) AS SELECT - database AS table_catalog, + 'def' AS table_catalog, database AS table_schema, table AS table_name, name AS column_name, @@ -488,7 +488,7 @@ static constexpr std::string_view statistics = R"( `EXPRESSION` Nullable(String) ) AS SELECT - '' AS table_catalog, + 'def' AS table_catalog, database AS table_schema, name AS table_name, 1 AS non_unique, @@ -537,7 +537,7 @@ static constexpr std::string_view statistics = R"( ) UNION ALL SELECT - '' AS table_catalog, + 'def' AS table_catalog, database AS table_schema, table AS table_name, 1 AS non_unique, @@ -963,7 +963,7 @@ static constexpr std::string_view partitions = R"( `TABLESPACE_NAME` Nullable(String) ) AS SELECT - system.cnch_parts.database AS table_catalog, + 'def' AS table_catalog, system.cnch_parts.database AS table_schema, system.cnch_parts.table AS table_name, partition AS partition_name, diff --git a/tests/queries/4_cnch_stateless/01161_information_schema.reference b/tests/queries/4_cnch_stateless/01161_information_schema.reference index 3c230e42bf3..add97e1ebec 100644 --- a/tests/queries/4_cnch_stateless/01161_information_schema.reference +++ b/tests/queries/4_cnch_stateless/01161_information_schema.reference @@ -60,55 +60,55 @@ triggers views INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N - tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO -01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N 01161_information_schema 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N -01161_information_schema 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N 01161_information_schema 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N -01161_information_schema 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N 01161_information_schema 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N -01161_information_schema 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N 01161_information_schema 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N -01161_information_schema 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N 01161_information_schema 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N -01161_information_schema 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N 01161_information_schema 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N -01161_information_schema 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N 01161_information_schema 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N -01161_information_schema 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N 01161_information_schema 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N - tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N - tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N - tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N +def 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def tmp LOCAL TEMPORARY Memory \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO def 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci NO NO NO NO NO +def 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N def 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N +def 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N def 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N +def 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N def 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N +def 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N def 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N +def 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N def 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N +def 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N def 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N +def 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N def 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N +def 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N def 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N +def tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N def tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N +def tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N def tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N +def tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N def tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N INFORMATION_SCHEMA INFORMATION_SCHEMA default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N information_schema information_schema default \N \N utf8mb4 utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N 01161_information_schema 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N -01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 01161_information_schema 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 -01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N 01161_information_schema 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N -01161_information_schema 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N 01161_information_schema 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N -01161_information_schema 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N 01161_information_schema 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N -01161_information_schema 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N 01161_information_schema 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N -01161_information_schema 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N 01161_information_schema 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N -01161_information_schema 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N 01161_information_schema 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N -01161_information_schema 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N 01161_information_schema 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N -01161_information_schema 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N 01161_information_schema 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N -01161_information_schema 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N 01161_information_schema 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N -01161_information_schema 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N 01161_information_schema 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N - tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N - tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N - tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N +def 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema kcu BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema v VIEW View \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema t BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema partitioned BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N def 01161_information_schema kcu2 BASE TABLE CnchMergeTree \N \N \N \N \N \N 0 \N \N \N \N \N \N utf8mb4_0900_ai_ci \N +def 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 def 01161_information_schema v SELECT n, f FROM `1234.01161_information_schema`.t NONE utf8mb4 utf8mb4_0900_ai_ci 0 0 0 0 0 +def 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema kcu i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema kcu s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema kcu2 i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N def 01161_information_schema kcu2 d 2 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N +def 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N def 01161_information_schema kcu2 u 3 0 UUID \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UUID \N \N +def 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N def 01161_information_schema partitioned i 1 0 UInt32 \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt32 \N \N +def 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema partitioned s 2 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N def 01161_information_schema t n 1 0 UInt64 \N \N 64 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N UInt64 \N \N +def 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N def 01161_information_schema t f 2 0 Float32 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float32 \N \N +def 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N def 01161_information_schema t s 3 0 String \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N String \N \N +def 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N def 01161_information_schema t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N FixedString(42) \N \N +def 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N def 01161_information_schema t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Decimal(9, 6) \N \N +def 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N def 01161_information_schema v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Nullable(Int32) \N \N +def 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N def 01161_information_schema v f 2 0 Float64 \N \N \N \N \N \N \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Float64 \N \N +def tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N def tmp d 1 0 Date \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N Date \N \N +def tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N def tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime \N \N +def tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N def tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N utf8mb4 \N \N utf8mb4_0900_ai_ci \N \N \N DateTime64(3) \N \N 1 1 def 01161_information_schema PRIMARY def 01161_information_schema kcu i 1 \N \N \N \N def 01161_information_schema PRIMARY def 01161_information_schema kcu i 1 \N \N \N \N @@ -116,12 +116,12 @@ def 01161_information_schema PRIMARY def 01161_information_schema kcu2 d 1 \N \N def 01161_information_schema PRIMARY def 01161_information_schema kcu2 u 1 \N \N \N \N def 01161_information_schema PRIMARY def 01161_information_schema kcu2 u 1 \N \N \N \N -- information_schema.referential_constraints -- information_schema.statistics - 01161_information_schema kcu 1 01161_information_schema PRIMARY 0 i utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema kcu 1 01161_information_schema PRIMARY 0 i utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N - 01161_information_schema kcu2 1 01161_information_schema PRIMARY 0 u utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema kcu2 1 01161_information_schema PRIMARY 0 u utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N - 01161_information_schema kcu2 1 01161_information_schema PRIMARY 1 d utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema kcu2 1 01161_information_schema PRIMARY 1 d utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N - 01161_information_schema partitioned 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema partitioned 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N - 01161_information_schema t 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema t 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N - 01161_information_schema v 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N 01161_information_schema v 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema kcu 1 01161_information_schema PRIMARY 0 i utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema kcu 1 01161_information_schema PRIMARY 0 i utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema kcu2 1 01161_information_schema PRIMARY 0 u utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema kcu2 1 01161_information_schema PRIMARY 0 u utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema kcu2 1 01161_information_schema PRIMARY 1 d utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema kcu2 1 01161_information_schema PRIMARY 1 d utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema partitioned 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema partitioned 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema t 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema t 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N +def 01161_information_schema v 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N def 01161_information_schema v 1 01161_information_schema PRIMARY 0 utf8mb4_0900_ai_ci 0 \N \N PRIMARY \N -- information_schema.events -- information_schema.routines -- information_schema.triggers diff --git a/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference b/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference index a86321cd19f..94b1a1b9ec1 100644 --- a/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference +++ b/tests/queries/4_cnch_stateless/02206_information_schema_show_database.reference @@ -1,6 +1,6 @@ CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory() -CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `extra` Nullable(String),\n `privileges` Nullable(String),\n `TABLE_CATALOG` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `COLUMN_NAME` String,\n `ORDINAL_POSITION` UInt64,\n `COLUMN_DEFAULT` String,\n `IS_NULLABLE` String,\n `DATA_TYPE` String,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n `NUMERIC_PRECISION` Nullable(UInt64),\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n `NUMERIC_SCALE` Nullable(UInt64),\n `DATETIME_PRECISION` Nullable(UInt64),\n `CHARACTER_SET_CATALOG` Nullable(String),\n `CHARACTER_SET_SCHEMA` Nullable(String),\n `CHARACTER_SET_NAME` Nullable(String),\n `COLLATION_CATALOG` Nullable(String),\n `COLLATION_SCHEMA` Nullable(String),\n `COLLATION_NAME` Nullable(String),\n `DOMAIN_CATALOG` Nullable(String),\n `DOMAIN_SCHEMA` Nullable(String),\n `DOMAIN_NAME` Nullable(String),\n `COLUMN_COMMENT` String,\n `COLUMN_TYPE` String,\n `EXTRA` Nullable(String),\n `PRIVILEGES` Nullable(String)\n) AS\nSELECT\n database AS table_catalog,\n database AS table_schema,\n table AS table_name,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n convertToDialectDataType(type) AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n \'utf8mb4\' AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n \'utf8mb4_0900_ai_ci\' AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n convertToDialectColumnType(type) AS column_type,\n NULL AS extra,\n NULL AS privileges,\n table_catalog AS TABLE_CATALOG,\n table_schema AS TABLE_SCHEMA,\n table_name AS TABLE_NAME,\n column_name AS COLUMN_NAME,\n ordinal_position AS ORDINAL_POSITION,\n column_default AS COLUMN_DEFAULT,\n is_nullable AS IS_NULLABLE,\n data_type AS DATA_TYPE,\n character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n character_octet_length AS CHARACTER_OCTET_LENGTH,\n numeric_precision AS NUMERIC_PRECISION,\n numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n numeric_scale AS NUMERIC_SCALE,\n datetime_precision AS DATETIME_PRECISION,\n character_set_catalog AS CHARACTER_SET_CATALOG,\n character_set_schema AS CHARACTER_SET_SCHEMA,\n character_set_name AS CHARACTER_SET_NAME,\n collation_catalog AS COLLATION_CATALOG,\n collation_schema AS COLLATION_SCHEMA,\n collation_name AS COLLATION_NAME,\n domain_catalog AS DOMAIN_CATALOG,\n domain_schema AS DOMAIN_SCHEMA,\n domain_name AS DOMAIN_NAME,\n column_comment AS COLUMN_COMMENT,\n column_type AS COLUMN_TYPE,\n extra AS EXTRA,\n privileges AS PRIVILEGES\nFROM system.columns -CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW information_schema.TABLES (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 -CREATE VIEW information_schema.tables (`table_catalog` Nullable(String), `table_schema` Nullable(String), `table_name` Nullable(String), `table_type` Nullable(String), `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` Nullable(String), `create_options` Nullable(String), `TABLE_CATALOG` Nullable(String), `TABLE_SCHEMA` Nullable(String), `TABLE_NAME` Nullable(String), `TABLE_TYPE` Nullable(String), `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String), `CREATE_OPTIONS` Nullable(String)) AS SELECT T.database AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_catalog, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n `table_catalog` String,\n `table_schema` String,\n `table_name` String,\n `column_name` String,\n `ordinal_position` UInt64,\n `column_default` String,\n `is_nullable` String,\n `data_type` String,\n `character_maximum_length` Nullable(UInt64),\n `character_octet_length` Nullable(UInt64),\n `numeric_precision` Nullable(UInt64),\n `numeric_precision_radix` Nullable(UInt64),\n `numeric_scale` Nullable(UInt64),\n `datetime_precision` Nullable(UInt64),\n `character_set_catalog` Nullable(String),\n `character_set_schema` Nullable(String),\n `character_set_name` Nullable(String),\n `collation_catalog` Nullable(String),\n `collation_schema` Nullable(String),\n `collation_name` Nullable(String),\n `domain_catalog` Nullable(String),\n `domain_schema` Nullable(String),\n `domain_name` Nullable(String),\n `column_comment` String,\n `column_type` String,\n `extra` Nullable(String),\n `privileges` Nullable(String),\n `TABLE_CATALOG` String,\n `TABLE_SCHEMA` String,\n `TABLE_NAME` String,\n `COLUMN_NAME` String,\n `ORDINAL_POSITION` UInt64,\n `COLUMN_DEFAULT` String,\n `IS_NULLABLE` String,\n `DATA_TYPE` String,\n `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n `NUMERIC_PRECISION` Nullable(UInt64),\n `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n `NUMERIC_SCALE` Nullable(UInt64),\n `DATETIME_PRECISION` Nullable(UInt64),\n `CHARACTER_SET_CATALOG` Nullable(String),\n `CHARACTER_SET_SCHEMA` Nullable(String),\n `CHARACTER_SET_NAME` Nullable(String),\n `COLLATION_CATALOG` Nullable(String),\n `COLLATION_SCHEMA` Nullable(String),\n `COLLATION_NAME` Nullable(String),\n `DOMAIN_CATALOG` Nullable(String),\n `DOMAIN_SCHEMA` Nullable(String),\n `DOMAIN_NAME` Nullable(String),\n `COLUMN_COMMENT` String,\n `COLUMN_TYPE` String,\n `EXTRA` Nullable(String),\n `PRIVILEGES` Nullable(String)\n) AS\nSELECT\n \'def\' AS table_catalog,\n database AS table_schema,\n table AS table_name,\n name AS column_name,\n position AS ordinal_position,\n default_expression AS column_default,\n type LIKE \'Nullable(%)\' AS is_nullable,\n convertToDialectDataType(type) AS data_type,\n character_octet_length AS character_maximum_length,\n character_octet_length,\n numeric_precision,\n numeric_precision_radix,\n numeric_scale,\n datetime_precision,\n NULL AS character_set_catalog,\n NULL AS character_set_schema,\n \'utf8mb4\' AS character_set_name,\n NULL AS collation_catalog,\n NULL AS collation_schema,\n \'utf8mb4_0900_ai_ci\' AS collation_name,\n NULL AS domain_catalog,\n NULL AS domain_schema,\n NULL AS domain_name,\n comment AS column_comment,\n convertToDialectColumnType(type) AS column_type,\n NULL AS extra,\n NULL AS privileges,\n table_catalog AS TABLE_CATALOG,\n table_schema AS TABLE_SCHEMA,\n table_name AS TABLE_NAME,\n column_name AS COLUMN_NAME,\n ordinal_position AS ORDINAL_POSITION,\n column_default AS COLUMN_DEFAULT,\n is_nullable AS IS_NULLABLE,\n data_type AS DATA_TYPE,\n character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n character_octet_length AS CHARACTER_OCTET_LENGTH,\n numeric_precision AS NUMERIC_PRECISION,\n numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n numeric_scale AS NUMERIC_SCALE,\n datetime_precision AS DATETIME_PRECISION,\n character_set_catalog AS CHARACTER_SET_CATALOG,\n character_set_schema AS CHARACTER_SET_SCHEMA,\n character_set_name AS CHARACTER_SET_NAME,\n collation_catalog AS COLLATION_CATALOG,\n collation_schema AS COLLATION_SCHEMA,\n collation_name AS COLLATION_NAME,\n domain_catalog AS DOMAIN_CATALOG,\n domain_schema AS DOMAIN_SCHEMA,\n domain_name AS DOMAIN_NAME,\n column_comment AS COLUMN_COMMENT,\n column_type AS COLUMN_TYPE,\n extra AS EXTRA,\n privileges AS PRIVILEGES\nFROM system.columns +CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` String, `create_options` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` String, `CREATE_OPTIONS` Nullable(String)) AS SELECT \'def\' AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_schema, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` String, `create_options` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` String, `CREATE_OPTIONS` Nullable(String)) AS SELECT \'def\' AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_schema, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` String, `create_options` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` String, `CREATE_OPTIONS` Nullable(String)) AS SELECT \'def\' AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_schema, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 +CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `engine` Nullable(String), `version` Nullable(String), `row_format` Nullable(String), `table_rows` Nullable(UInt64), `avg_row_length` Nullable(UInt64), `data_length` Nullable(UInt64), `max_data_length` Nullable(UInt64), `index_length` Nullable(UInt64), `data_free` Nullable(UInt64), `auto_increment` Nullable(UInt64), `create_time` Nullable(DateTime), `update_time` Nullable(DateTime), `check_time` Nullable(DateTime), `checksum` Nullable(UInt64), `table_collation` Nullable(String), `table_comment` String, `create_options` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `ENGINE` Nullable(String), `VERSION` Nullable(String), `ROW_FORMAT` Nullable(String), `TABLE_ROWS` Nullable(UInt64), `AVG_ROW_LENGTH` Nullable(UInt64), `DATA_LENGTH` Nullable(UInt64), `MAX_DATA_LENGTH` Nullable(UInt64), `INDEX_LENGTH` Nullable(UInt64), `DATA_FREE` Nullable(UInt64), `AUTO_INCREMENT` Nullable(UInt64), `CREATE_TIME` Nullable(DateTime), `UPDATE_TIME` Nullable(DateTime), `CHECK_TIME` Nullable(DateTime), `CHECKSUM` Nullable(UInt64), `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` String, `CREATE_OPTIONS` Nullable(String)) AS SELECT \'def\' AS table_catalog, T.database AS table_schema, T.name AS table_name, multiIf(any(T.is_temporary), \'LOCAL TEMPORARY\', any(T.engine) LIKE \'%View\', \'VIEW\', any(T.engine) LIKE \'System%\', \'SYSTEM VIEW\', \'BASE TABLE\') AS table_type, any(T.engine) AS engine, NULL AS version, NULL AS row_format, sum(rows_count) AS table_rows, intDiv(data_length, table_rows) AS avg_row_length, sum(bytes_on_disk) AS data_length, NULL AS max_data_length, 0 AS index_length, NULL AS data_free, NULL AS auto_increment, NULL AS create_time, NULL AS update_time, NULL AS check_time, NULL AS checksum, \'utf8mb4_0900_ai_ci\' AS table_collation, any(T.comment) AS table_comment, NULL AS create_options, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, any(T.engine) AS ENGINE, version AS VERSION, row_format AS ROW_FORMAT, table_rows AS TABLE_ROWS, avg_row_length AS AVG_ROW_LENGTH, data_length AS DATA_LENGTH, max_data_length AS MAX_DATA_LENGTH, index_length AS INDEX_LENGTH, data_free AS DATA_FREE, auto_increment AS AUTO_INCREMENT, create_time AS CREATE_TIME, update_time AS UPDATE_TIME, check_time AS CHECK_TIME, checksum AS CHECKSUM, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT, create_options AS CREATE_OPTIONS FROM system.tables AS T LEFT JOIN (SELECT * FROM system.cnch_parts WHERE visible = 1) AS P ON (T.database = P.database) AND (T.name = P.table) GROUP BY table_schema, table_name SETTINGS enable_multiple_tables_for_cnch_parts = 1, join_use_nulls = 1 From 538e9ba029aa740654d52a04073e535ba360bfa5 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:11:47 +0000 Subject: [PATCH 110/292] Merge branch 'jiashuo_fix-disk-reserve-inode-2.2' into 'cnch-2.2' fix(clickhousech@m-4724047605): [CP to 2.2]fix reserved inode not been released See merge request dp/ClickHouse!23357 --- src/Disks/DiskLocal.cpp | 25 ++++++++++++++++++---- src/Disks/DiskLocal.h | 2 +- src/Storages/System/StorageSystemDisks.cpp | 8 +++++++ src/WorkerTasks/MergeTreeDataMerger.cpp | 7 ++++-- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index da065748ddf..3fed0b43f9b 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -136,16 +136,33 @@ bool DiskLocal::tryReserve(UInt64 bytes) } auto available_space = getAvailableSpace(); - auto unreserved_space = available_space - DiskStats{std::min(available_space.bytes, reserved_bytes), std::min(available_space.inodes, reserved_inodes)}; + auto unreserved_space + = available_space - DiskStats{std::min(available_space.bytes, reserved_bytes), std::min(available_space.inodes, reserved_inodes)}; if (!unreserved_space.isEmpty()) { - LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}({}).", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space.bytes), unreserved_space.inodes); + LOG_TRACE( + log, + "Reserving {} on disk {}(free {}({})), having unreserved {}({}).", + ReadableSize(bytes), + backQuote(name), + ReadableSize(available_space.bytes), + available_space.inodes, + ReadableSize(unreserved_space.bytes), + unreserved_space.inodes); ++reservation_count; reserved_bytes += bytes; - reserved_inodes += 1; return true; } + + LOG_WARNING( + log, + "Can't reserving {} on disk {}(free {}({})), having unreserved {}({}).", + ReadableSize(bytes), + backQuote(name), + ReadableSize(available_space.bytes), + available_space.inodes, + ReadableSize(unreserved_space.bytes), + unreserved_space.inodes); return false; } diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index fdc6fc9cbba..2fe954bbebe 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -130,7 +130,7 @@ class DiskLocal : public IDisk const DiskStats keep_free_disk_stats; UInt64 reserved_bytes = 0; - UInt64 reserved_inodes = 0; + UInt64 reserved_inodes = 0; // TODO: placeholder and not implemented yet UInt64 reservation_count = 0; static std::mutex reservation_mutex; diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 50981ca9be7..e2630722b3b 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -43,9 +43,11 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"free_space", std::make_shared()}, {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, + {"unreserved_space", std::make_shared()}, {"free_inodes", std::make_shared()}, {"total_inodes", std::make_shared()}, {"keep_free_inodes", std::make_shared()}, + {"unreserved_inode", std::make_shared()}, {"type", std::make_shared()}, })); setInMemoryMetadata(storage_metadata); @@ -68,9 +70,11 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_free_bytes = ColumnUInt64::create(); MutableColumnPtr col_total_bytes = ColumnUInt64::create(); MutableColumnPtr col_keep_bytes = ColumnUInt64::create(); + MutableColumnPtr col_unreserved_bytes = ColumnUInt64::create(); MutableColumnPtr col_free_inodes = ColumnUInt64::create(); MutableColumnPtr col_total_inodes = ColumnUInt64::create(); MutableColumnPtr col_keep_inodes = ColumnUInt64::create(); + MutableColumnPtr col_unreserved_inodes = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); for (const auto & [disk_name, disk_ptr] : context->getDisksMap()) @@ -81,9 +85,11 @@ Pipe StorageSystemDisks::read( col_free_bytes->insert(disk_ptr->getAvailableSpace().bytes); col_total_bytes->insert(disk_ptr->getTotalSpace().bytes); col_keep_bytes->insert(disk_ptr->getKeepingFreeSpace().bytes); + col_unreserved_bytes->insert(disk_ptr->getUnreservedSpace().bytes); col_free_inodes->insert(disk_ptr->getAvailableSpace().inodes); col_total_inodes->insert(disk_ptr->getTotalSpace().inodes); col_keep_inodes->insert(disk_ptr->getKeepingFreeSpace().inodes); + col_unreserved_inodes->insert(disk_ptr->getUnreservedSpace().inodes); col_type->insert(DiskType::toString(disk_ptr->getType())); } @@ -94,9 +100,11 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_free_bytes)); res_columns.emplace_back(std::move(col_total_bytes)); res_columns.emplace_back(std::move(col_keep_bytes)); + res_columns.emplace_back(std::move(col_unreserved_bytes)); res_columns.emplace_back(std::move(col_free_inodes)); res_columns.emplace_back(std::move(col_total_inodes)); res_columns.emplace_back(std::move(col_keep_inodes)); + res_columns.emplace_back(std::move(col_unreserved_inodes)); res_columns.emplace_back(std::move(col_type)); UInt64 num_rows = res_columns.at(0)->size(); diff --git a/src/WorkerTasks/MergeTreeDataMerger.cpp b/src/WorkerTasks/MergeTreeDataMerger.cpp index 0218a347804..4df7a94fa24 100644 --- a/src/WorkerTasks/MergeTreeDataMerger.cpp +++ b/src/WorkerTasks/MergeTreeDataMerger.cpp @@ -379,8 +379,6 @@ MergeTreeMutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPartImpl( for (const auto & part : source_data_parts) part->accumulateColumnSizes(merged_column_to_size); - column_sizes = ColumnSizeEstimator(merged_column_to_size, merging_column_names, gathering_column_names); - tmp_disk = context->getTemporaryVolume()->getDisk(); rows_sources_file = createTemporaryFile(tmp_disk->getPath()); rows_sources_uncompressed_write_buf = std::make_unique(rows_sources_file->path()); @@ -396,8 +394,12 @@ MergeTreeMutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPartImpl( /// Implicit column names in merged_column_to_size is unescaped, so column.name need keep unescaped either. auto [curr, end] = getFileRangeFromOrderedFilesByPrefix(getMapKeyPrefix(column.name), merged_column_to_size); for (; curr != end; ++curr) + { + /// Treat map implicit columns as ordinary columns, need add to ColumnSizeEstimator to get the right total size. + gathering_column_names.emplace_back(curr->first); gathering_columns.emplace_back( curr->first, dynamic_cast(column.type.get())->getValueTypeForImplicitColumn()); + } } else { @@ -405,6 +407,7 @@ MergeTreeMutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPartImpl( } } gathering_columns.sort(); /// It gains better performance if gathering by sorted columns + column_sizes = ColumnSizeEstimator(merged_column_to_size, merging_column_names, gathering_column_names); } else { From e959d31f4154fddd6583c5c10b6c42341719e9cf Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:12:06 +0000 Subject: [PATCH 111/292] Merge 'optimizer_respect_mysql_implicit_cast_rules_cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4052801080): optimizer respect mysql implicit cast rules cnch 2.2 See merge request: !23364 --- src/Analyzers/QueryAnalyzer.cpp | 22 +++++++++++++------ .../40097_mysql_implicit_cast_rules.reference | 0 .../40097_mysql_implicit_cast_rules.sql | 9 ++++++++ 3 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.sql diff --git a/src/Analyzers/QueryAnalyzer.cpp b/src/Analyzers/QueryAnalyzer.cpp index 1e4c0fae9dd..b3f20774918 100644 --- a/src/Analyzers/QueryAnalyzer.cpp +++ b/src/Analyzers/QueryAnalyzer.cpp @@ -133,6 +133,7 @@ class QueryAnalyzerVisitor : public ASTVisitor , enable_implicit_type_conversion(context->getSettingsRef().enable_implicit_type_conversion) , allow_extended_conversion(context->getSettingsRef().allow_extended_type_conversion) , enable_subcolumn_optimization_through_union(context->getSettingsRef().enable_subcolumn_optimization_through_union) + , enable_implicit_arg_type_convert(context->getSettingsRef().enable_implicit_arg_type_convert) { } @@ -145,6 +146,7 @@ class QueryAnalyzerVisitor : public ASTVisitor const bool enable_implicit_type_conversion; const bool allow_extended_conversion; const bool enable_subcolumn_optimization_through_union; + const bool enable_implicit_arg_type_convert; // MySQL implicit cast rules Poco::Logger * logger = &Poco::Logger::get("QueryAnalyzerVisitor"); @@ -204,6 +206,7 @@ class QueryAnalyzerVisitor : public ASTVisitor void rewriteSelectInANSIMode(ASTSelectQuery & select_query, const Aliases & aliases, const NameSet & source_columns_set); void normalizeAliases(ASTPtr & expr, ASTPtr & aliases_expr); void normalizeAliases(ASTPtr & expr, const Aliases & aliases, const NameSet & source_columns_set); + DataTypePtr getCommonType(const DataTypes & types); }; static NameSet collectNames(ScopePtr scope); @@ -448,10 +451,7 @@ void QueryAnalyzerVisitor::analyzeSetOperation(ASTPtr & node, ASTs & selects) DataTypePtr output_type; // promote output type to super type if necessary - if (context->getSettingsRef().enable_implicit_arg_type_convert) - output_type = getLeastSupertype(elem_types, true); - else - output_type = getLeastSupertype(elem_types, allow_extended_conversion); + output_type = getCommonType(elem_types); output_desc.emplace_back( first_input_desc[column_idx].name, output_type, @@ -919,7 +919,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( { try { - output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion); + output_type = getCommonType(DataTypes{left_type, right_type}); } catch (DB::Exception & ex) { @@ -1014,7 +1014,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinUsing( { try { - output_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion); + output_type = getCommonType(DataTypes{left_type, right_type}); } catch (DB::Exception & ex) { @@ -1233,7 +1233,7 @@ ScopePtr QueryAnalyzerVisitor::analyzeJoinOn( { try { - super_type = getLeastSupertype(DataTypes{left_type, right_type}, allow_extended_conversion); + super_type = getCommonType(DataTypes{left_type, right_type}); } catch (DB::Exception & ex) { @@ -2181,6 +2181,14 @@ void QueryAnalyzerVisitor::normalizeAliases(ASTPtr & expr, const Aliases & alias QueryNormalizer(normalizer_data).visit(expr); } +DataTypePtr QueryAnalyzerVisitor::getCommonType(const DataTypes & types) +{ + if (enable_implicit_arg_type_convert) + return getLeastSupertype(types, true); + else + return getLeastSupertype(types, allow_extended_conversion); +} + NameSet collectNames(ScopePtr scope) { NameSet result; diff --git a/tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.reference b/tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.sql b/tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.sql new file mode 100644 index 00000000000..65975fb2f55 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/40097_mysql_implicit_cast_rules.sql @@ -0,0 +1,9 @@ +SELECT 1 as a UNION ALL SELECT '1' as a; -- { serverError 386 } +SELECT count() FROM (SELECT 1 as a) JOIN (SELECT '1' as a) USING a; -- { serverError 53 } +SELECT count() FROM (SELECT 1 as a) t1 JOIN (SELECT '1' as a) t2 ON t1.a = t2.a; -- { serverError 53 } + +SET dialect_type = 'MYSQL'; + +SELECT 1 as a UNION ALL SELECT '1' as a FORMAT Null; +SELECT count() FROM (SELECT 1 as a) JOIN (SELECT '1' as a) USING a FORMAT Null; +SELECT count() FROM (SELECT 1 as a) t1 JOIN (SELECT '1' as a) t2 ON t1.a = t2.a FORMAT Null; From 1b587bca63d5cb6eaf6ef5034c82ffdc3d1c4f48 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:12:23 +0000 Subject: [PATCH 112/292] Merge 'implicit_cast_for_subcolumn-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4671091203): implicit cast for subcolumn cnch 2.2 See merge request: !23362 --- src/QueryPlan/GraphvizPrinter.cpp | 99 ++++++++++++------- src/QueryPlan/QueryPlanner.cpp | 59 +++++++++-- ...100_implicit_cast_for_sub_column.reference | 11 +++ .../40100_implicit_cast_for_sub_column.sql | 26 +++++ 4 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.sql diff --git a/src/QueryPlan/GraphvizPrinter.cpp b/src/QueryPlan/GraphvizPrinter.cpp index 8bd72d9343e..0ad1ba7e774 100644 --- a/src/QueryPlan/GraphvizPrinter.cpp +++ b/src/QueryPlan/GraphvizPrinter.cpp @@ -1235,7 +1235,8 @@ String StepPrinter::printStep(const IQueryPlanStep & step, bool include_output) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } } return details.str(); @@ -1351,7 +1352,8 @@ String StepPrinter::printFilterStep(const FilterStep & step, bool include_output for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } } @@ -1483,7 +1485,8 @@ String StepPrinter::printJoinStep(const JoinStep & step) for (const auto & item : step.getOutputStream().header) { details << item.name << ":"; - details << item.type->getName() << "\\n"; + details << item.type->getName() << " "; + details << (item.column ? item.column->getName() : "") << "\\n"; } return details.str(); } @@ -1501,7 +1504,8 @@ String StepPrinter::printArrayJoinStep(const ArrayJoinStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1592,7 +1596,8 @@ String StepPrinter::printAggregatingStep(const AggregatingStep & step, bool incl for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } } @@ -1636,7 +1641,8 @@ String StepPrinter::printMarkDistinctStep(const MarkDistinctStep & step, bool /* for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1742,7 +1748,8 @@ String StepPrinter::printUnionStep(const UnionStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1756,7 +1763,8 @@ String StepPrinter::printIntersectOrExceptStep(const IntersectOrExceptStep & ste for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1772,7 +1780,8 @@ String StepPrinter::printIntersectStep(const IntersectStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1786,7 +1795,8 @@ String StepPrinter::printExceptStep(const ExceptStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1833,7 +1843,8 @@ String StepPrinter::printExchangeStep(const ExchangeStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1860,7 +1871,8 @@ String StepPrinter::printRemoteExchangeSourceStep(const RemoteExchangeSourceStep for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -1903,7 +1915,8 @@ String StepPrinter::printTableFinishStep(const TableFinishStep & step) for (auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2049,7 +2062,8 @@ String StepPrinter::printTableScanStep(const TableScanStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); @@ -2141,7 +2155,8 @@ String StepPrinter::printLimitStep(const LimitStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } if (step.isPartial()) details << "|" @@ -2159,7 +2174,8 @@ String StepPrinter::printOffsetStep(const OffsetStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2177,7 +2193,8 @@ String StepPrinter::printLimitByStep(const LimitByStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2198,7 +2215,8 @@ String StepPrinter::printMergeSortingStep(const MergeSortingStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2249,7 +2267,8 @@ String StepPrinter::printSortingStep(const SortingStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2271,7 +2290,8 @@ String StepPrinter::printPartialSortingStep(const PartialSortingStep & step) for (auto & column : step_ptr->getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } */ return details.str(); @@ -2295,7 +2315,8 @@ String StepPrinter::printMergingSortedStep(const MergingSortedStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2316,7 +2337,8 @@ String StepPrinter::printDistinctStep(const DistinctStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2380,7 +2402,8 @@ String StepPrinter::printApplyStep(const ApplyStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2392,7 +2415,8 @@ String StepPrinter::printEnforceSingleRowStep(const EnforceSingleRowStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2403,7 +2427,8 @@ String StepPrinter::printAssignUniqueIdStep(const AssignUniqueIdStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2422,7 +2447,8 @@ String StepPrinter::printCTERefStep(const CTERefStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); @@ -2455,7 +2481,8 @@ String StepPrinter::printPartitionTopNStep(const PartitionTopNStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2512,7 +2539,8 @@ String StepPrinter::printWindowStep(const WindowStep & step) for (auto & column : step_ptr->getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } */ return details.str(); @@ -2555,7 +2583,8 @@ String StepPrinter::printExplainAnalyzeStep(const ExplainAnalyzeStep & step) for (auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2579,7 +2608,8 @@ String StepPrinter::printTopNFilteringStep(const TopNFilteringStep & step) for (auto & column : stepPtr->getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } */ return details.str(); @@ -2606,7 +2636,8 @@ String StepPrinter::printFillingStep(const FillingStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2621,7 +2652,8 @@ String StepPrinter::printTotalsHavingStep(const TotalsHavingStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } @@ -2633,7 +2665,8 @@ String StepPrinter::printExtremesStep(const ExtremesStep & step) for (const auto & column : step.getOutputStream().header) { details << column.name << ":"; - details << column.type->getName() << "\\n"; + details << column.type->getName() << " "; + details << (column.column ? column.column->getName() : "") << "\\n"; } return details.str(); } diff --git a/src/QueryPlan/QueryPlanner.cpp b/src/QueryPlan/QueryPlanner.cpp index 41054d2a9ef..bdcc2d1134b 100644 --- a/src/QueryPlan/QueryPlanner.cpp +++ b/src/QueryPlan/QueryPlanner.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -2251,7 +2252,13 @@ RelationPlan QueryPlannerVisitor::planSetOperation(ASTs & selects, ASTSelectWith if (sub_plans.size() == 1) return sub_plans.front(); + std::vector sub_plan_types; + for (const auto & sub_plan : sub_plans) + sub_plan_types.emplace_back(sub_plan.getRoot()->getOutputNamesToTypes()); + FieldSubColumnIDs sub_column_positions; + DataTypes sub_column_type_coercions; + // compute common subcolumns for each field if (enable_subcolumn_optimization_through_union) { @@ -2274,6 +2281,24 @@ RelationPlan QueryPlannerVisitor::planSetOperation(ASTs & selects, ASTSelectWith for (const auto & sub_col_id : common_sub_col_ids) sub_column_positions.emplace_back(field_id, sub_col_id); } + + if (enable_implicit_type_conversion) + { + for (const auto & field_sub_col_id : sub_column_positions) + { + DataTypes sub_col_types; + for (size_t select_id = 0; select_id < selects.size(); ++select_id) + { + auto sub_col_symbol = sub_plans[select_id] + .getFieldSymbolInfos() + .at(field_sub_col_id.first) + .sub_column_symbols.at(field_sub_col_id.second); + sub_col_types.push_back(sub_plan_types[select_id].at(sub_col_symbol)); + } + sub_column_type_coercions.emplace_back( + getLeastSupertype(sub_col_types, context->getSettingsRef().allow_extended_type_conversion)); + } + } } // 2. prepare sub plan & collect input info @@ -2284,6 +2309,7 @@ RelationPlan QueryPlannerVisitor::planSetOperation(ASTs & selects, ASTSelectWith { auto & select = selects[select_id]; auto & sub_plan = sub_plans[select_id]; + const auto & name_to_type = sub_plan_types[select_id]; // prune invisible columns, copy duplicated columns, sort columns by a specific order(primary columns + sub columns) sub_plan = projectFieldSymbols(sub_plan, sub_column_positions); @@ -2291,20 +2317,37 @@ RelationPlan QueryPlannerVisitor::planSetOperation(ASTs & selects, ASTSelectWith auto column_names1 = sub_plan.getRoot()->getOutputNames(); #endif // coerce to common type - if (enable_implicit_type_conversion && analysis.hasRelationTypeCoercion(*select)) + if (enable_implicit_type_conversion) { - auto field_symbol_infos = sub_plan.getFieldSymbolInfos(); - const auto & target_types = analysis.getRelationTypeCoercion(*select); - assert(target_types.size() == field_symbol_infos.size()); NameToType symbols_and_types; + auto field_symbol_infos = sub_plan.getFieldSymbolInfos(); - for (size_t i = 0; i < target_types.size(); ++i) + if (analysis.hasRelationTypeCoercion(*select)) { - auto target_type = target_types[i]; - if (target_type) - symbols_and_types.emplace(field_symbol_infos[i].getPrimarySymbol(), target_type); + const auto & target_types = analysis.getRelationTypeCoercion(*select); + assert(target_types.size() == field_symbol_infos.size()); + + for (size_t i = 0; i < target_types.size(); ++i) + { + auto target_type = target_types[i]; + if (target_type) + symbols_and_types.emplace(field_symbol_infos[i].getPrimarySymbol(), target_type); + } } + if (!sub_column_type_coercions.empty()) + { + for (size_t pos = 0; pos < sub_column_type_coercions.size(); ++pos) + { + const auto & field_sub_col_id = sub_column_positions.at(pos); + const auto & sub_col_symbol + = field_symbol_infos.at(field_sub_col_id.first).sub_column_symbols.at(field_sub_col_id.second); + auto sub_col_type = name_to_type.at(sub_col_symbol); + auto target_type = sub_column_type_coercions[pos]; + if (!target_type->equals(*sub_col_type)) + symbols_and_types.emplace(sub_col_symbol, target_type); + } + } auto coercion_result = coerceTypesForSymbols(sub_plan.getRoot(), symbols_and_types, true); mapFieldSymbolInfos(field_symbol_infos, coercion_result.mappings, false); sub_plan = RelationPlan{coercion_result.plan, field_symbol_infos}; diff --git a/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.reference b/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.reference new file mode 100644 index 00000000000..64e9a1fd1af --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.reference @@ -0,0 +1,11 @@ +Projection +│ Expressions: c:=`__string_params__\'ff\'_3` +└─ Gather Exchange + └─ Union + │ OutputToInputs: __string_params__\'ff\'_3 = [__string_params__\'ff\'_2,__string_params__\'ff\'_1] + ├─ Projection + │ │ Expressions: __string_params__\'ff\'_2:=cast(`__string_params__\'ff\'`, \'Nullable(String)\') + │ └─ TableScan default.t40100_imp_cast_1 + │ Outputs: [__string_params__\'ff\'] + └─ TableScan default.t40100_imp_cast_2 + Outputs: __string_params__\'ff\'_1:=__string_params__\'ff\' diff --git a/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.sql b/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.sql new file mode 100644 index 00000000000..382a98d4668 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/40100_implicit_cast_for_sub_column.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t40100_imp_cast_1; +DROP TABLE IF EXISTS t40100_imp_cast_2; + +CREATE TABLE t40100_imp_cast_1 +( + `string_params` Map(String, LowCardinality(Nullable(String))) CODEC(ZSTD(1)) +) +engine = CnchMergeTree() order by tuple(); + + +CREATE TABLE t40100_imp_cast_2 +( + `string_params` Map(String, String) CODEC(ZSTD(1)) +) +engine = CnchMergeTree() order by tuple(); + +explain stats = 0 +select string_params{'ff'} as c +from ( + select string_params from t40100_imp_cast_1 + union all + select string_params from t40100_imp_cast_2 +) settings enable_optimizer=1; + +DROP TABLE IF EXISTS t40100_imp_cast_1; +DROP TABLE IF EXISTS t40100_imp_cast_2; From d0b260fc77efd384611127f70e337f7cae6272ff Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:12:40 +0000 Subject: [PATCH 113/292] Merge 'fky@cnch-2.2@fix@pin-VERSION_STRING' into 'cnch-2.2' fix(clickhousech@m-4731013521): pin to 21.8.7.1 to make it compatible. See merge request: !23369 --- utils/release/release_lib.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh index 27ef1ca3959..6bfab43c764 100644 --- a/utils/release/release_lib.sh +++ b/utils/release/release_lib.sh @@ -52,6 +52,9 @@ function gen_revision_author { VERSION_DATE=$(git show -s --format="%cd" --date=format:'%Y-%m-%d' $git_hash) git_describe="${VERSION_BRANCH}-${VERSION_STRING}.${VERSION_REVISION}-${git_hash:0:8}-${BUILD_DATE}-${BUILD_SCM}" + # NOTE: To make CNCH compatible to community release version, pin it to this version. + VERSION_STRING=21.8.7.1 + sed -i -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION $VERSION_REVISION/g;" \ -e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE ${git_describe//\//\\\/}/g;" \ -e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH $git_hash/g;" \ From 3ea056c5f4ff521a8da4d205fbd2e8786138f017 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:12:56 +0000 Subject: [PATCH 114/292] Merge branch 'cherry-pick-b2868154-2' into 'cnch-2.2' fix(clickhousech@m-4679236657):[To CNCH-2.2] Fix tos error message when using compression or result empty See merge request dp/ClickHouse!23374 --- src/IO/OutfileCommon.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/IO/OutfileCommon.cpp b/src/IO/OutfileCommon.cpp index 8af772b31bf..b963ebee188 100644 --- a/src/IO/OutfileCommon.cpp +++ b/src/IO/OutfileCommon.cpp @@ -145,9 +145,10 @@ void OutfileTarget::getRawBuffer() else if (scheme == "tos") { if (out_uri.getQueryParameters().empty()) - { throw Exception("Missing access key, please check configuration.", ErrorCodes::BAD_ARGUMENTS); - } + if (compression_method != CompressionMethod::None) + throw Exception("Compression is not supported for tos outfile", ErrorCodes::BAD_ARGUMENTS); + out_buf_raw = std::make_unique(); } #if USE_HDFS @@ -291,7 +292,10 @@ void OutfileTarget::flushFile() ConnectionTimeouts timeouts(settings.http_connection_timeout, settings.http_send_timeout, settings.http_receive_timeout); HTTPSender http_sender(tos_uri, Poco::Net::HTTPRequest::HTTP_PUT, timeouts, http_headers); - http_sender.send((*out_tos_buf).str()); + String res = (*out_tos_buf).str(); + if (res.empty()) + res = "\n"; + http_sender.send(res); http_sender.handleResponse(); } catch (...) From b32ec39fdd727ad99c899324f43b2deef134aa2c Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:13:12 +0000 Subject: [PATCH 115/292] Merge 'fix_in_subquery_no_super_type_error-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4724154113): fix in subquery no super type error cnch 2.2 See merge request: !23430 --- src/Analyzers/ExprAnalyzer.cpp | 18 +++++++- src/Core/Settings.h | 1 + ...40103_fix_in_subquery_type_issue.reference | 41 ++++++++++++++++++ .../40103_fix_in_subquery_type_issue.sql | 42 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.reference create mode 100644 tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.sql diff --git a/src/Analyzers/ExprAnalyzer.cpp b/src/Analyzers/ExprAnalyzer.cpp index d9e377bcea2..82ab8207b9f 100644 --- a/src/Analyzers/ExprAnalyzer.cpp +++ b/src/Analyzers/ExprAnalyzer.cpp @@ -693,7 +693,23 @@ void ExprAnalyzerVisitor::processSubqueryArgsWithCoercion(ASTPtr & lhs_ast, ASTP { DataTypePtr super_type = nullptr; if (enable_implicit_type_conversion) - super_type = getLeastSupertype(DataTypes{lhs_type, rhs_type}, allow_extended_conversion); + { + if (context->getSettingsRef().convert_to_right_type_for_in_subquery) + { + if (const auto * type_tuple = typeid_cast(rhs_type.get())) + { + DataTypes elem_types = type_tuple->getElements(); + std::transform(elem_types.begin(), elem_types.end(), elem_types.begin(), &JoinCommon::convertTypeToNullable); + super_type = std::make_shared(elem_types, type_tuple->getElementNames()); + } + else + { + super_type = JoinCommon::convertTypeToNullable(rhs_type); + } + } + else + super_type = getLeastSupertype(DataTypes{lhs_type, rhs_type}, allow_extended_conversion); + } if (!super_type) throw Exception("Incompatible types for IN prediacte", ErrorCodes::TYPE_MISMATCH); if (!lhs_type->equals(*super_type)) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c219b96789e..5bdb217d1e7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1431,6 +1431,7 @@ enum PreloadLevelSettings : UInt64 M(UInt64, global_bindings_update_time, 60*60, "Interval to update global binding cache from catalog, in seconds.", 0) \ /** */ \ M(Bool, late_materialize_aggressive_push_down, false, "When table use early materialize strategy, this setting enable aggressively moving predicates to read chain w/o considering other factor like columns size or number of columns in the query", 0) \ + M(Bool, convert_to_right_type_for_in_subquery, true, "For IN subquery, whether convert arguments to the right type", 0) \ /** Optimizer relative settings, Plan build and RBO */ \ M(Bool, enable_auto_prepared_statement, false, "Whether to enable automatic prepared statement", 0) \ M(Bool, enable_nested_loop_join, true, "Whether enable nest loop join for outer join with filter", 0)\ diff --git a/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.reference b/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.reference new file mode 100644 index 00000000000..293971b58de --- /dev/null +++ b/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.reference @@ -0,0 +1,41 @@ +-- { echoOn } +/* test incompatible types */ +select k, i1 in (select s1 from t40103) from t40103 order by k; +1 1 +2 0 +3 1 +-- bsp mode generate different output +-- select k, s1 in (select i1 from t40103) from t40103 order by k; + +/* test null */ +select k, i1 in (select i2 from t40103) from t40103 order by k; +1 1 +2 1 +3 0 +-- slightly different +-- select k, i2 in (select i1 from t40103) from t40103 order by k; + +-- select k, i2 in (select i2 from t40103) from t40103 order by k; + +-- select k, i2 in (select i2 from t40103) from t40103 order by k settings transform_null_in=1; + +/* test tuple */ +select k, (i1, i1) in (select s1, s1 from t40103) from t40103 order by k; +1 1 +2 0 +3 1 +select k, (s1, s1) in (select i1, i1 from t40103) from t40103 order by k; +1 1 +2 0 +3 1 +-- slightly different +-- select k, (i1, i2) in (select s1, s2 from t40103) from t40103 order by k; + +select k, (i1, i1) in (select i2, i2 from t40103) from t40103 order by k; +1 1 +2 1 +3 0 +select k, (i2, i2) in (select i1, i1 from t40103) from t40103 order by k; +1 1 +2 1 +3 0 diff --git a/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.sql b/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.sql new file mode 100644 index 00000000000..460255cdb20 --- /dev/null +++ b/tests/queries/4_cnch_stateless/40103_fix_in_subquery_type_issue.sql @@ -0,0 +1,42 @@ +drop table if exists t40103; + +create table t40103 (k Int32, i1 Int32, i2 Nullable(Int32), s1 String, s2 Nullable(String)) engine = CnchMergeTree() order by tuple(); + +insert into t40103 values (1, 1, 1, '1', '1') , (2, 2, 2, 'a', 'a') , (3, 3, NULL, '3', NULL) ; + +-- { echoOn } +/* test incompatible types */ +select k, i1 in (select s1 from t40103) from t40103 order by k; + +-- bsp mode generate different output +-- select k, s1 in (select i1 from t40103) from t40103 order by k; + +/* test null */ +select k, i1 in (select i2 from t40103) from t40103 order by k; + +-- slightly different +-- select k, i2 in (select i1 from t40103) from t40103 order by k; + +-- select k, i2 in (select i2 from t40103) from t40103 order by k; + +-- select k, i2 in (select i2 from t40103) from t40103 order by k settings transform_null_in=1; + +/* test tuple */ +select k, (i1, i1) in (select s1, s1 from t40103) from t40103 order by k; + +select k, (s1, s1) in (select i1, i1 from t40103) from t40103 order by k; + +-- slightly different +-- select k, (i1, i2) in (select s1, s2 from t40103) from t40103 order by k; + +select k, (i1, i1) in (select i2, i2 from t40103) from t40103 order by k; + +select k, (i2, i2) in (select i1, i1 from t40103) from t40103 order by k; + +-- slightly different +-- select k, (i2, i2) in (select i2, i2 from t40103) from t40103 order by k; + +-- select k, (i2, i2) in (select i2, i2 from t40103) from t40103 order by k settings transform_null_in=1; + +-- { echoOff } +drop table if exists t40103; From 60f8eaf9f80adb98366a8c94a03ac9c697ef7226 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:13:46 +0000 Subject: [PATCH 116/292] Merge 'cnch_22_fix_sketch' into 'cnch-2.2' feat(clickhousech@m-13693971): add composite estimate for hll sketch See merge request: !23445 # Conflicts: # src/DataTypes/IDataType.h --- .../AggregateFunctionSketchEstimate.cpp | 83 +++++----- .../AggregateFunctionSketchEstimate.h | 142 +++++++++++++----- .../AggregateFunctionThetaSketchEstimate.cpp | 137 +++++++++++++++++ .../AggregateFunctionThetaSketchEstimate.h | 121 +++++++++++++++ .../registerAggregateFunctions.cpp | 2 + src/DataTypes/IDataType.h | 1 + src/Functions/FunctionSketch.h | 16 +- .../20000_theta_sketch_estimate.reference | 2 + .../20000_theta_sketch_estimate.sql | 17 +++ 9 files changed, 439 insertions(+), 82 deletions(-) create mode 100644 src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.h create mode 100644 tests/queries/0_stateless/20000_theta_sketch_estimate.reference create mode 100644 tests/queries/0_stateless/20000_theta_sketch_estimate.sql diff --git a/src/AggregateFunctions/AggregateFunctionSketchEstimate.cpp b/src/AggregateFunctions/AggregateFunctionSketchEstimate.cpp index eab9defba88..c16b89cee42 100644 --- a/src/AggregateFunctions/AggregateFunctionSketchEstimate.cpp +++ b/src/AggregateFunctions/AggregateFunctionSketchEstimate.cpp @@ -31,16 +31,18 @@ AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_typ const IDataType & argument_type = *argument_types[0]; WhichDataType which(argument_type); + bool ignore_wrong_date = argument_types.size() == 2; + if (which.idx == TypeIndex::SketchBinary) { - return std::make_shared::template AggregateFunction>(argument_types, params); + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); } else if (which.isAggregateFunction()) { - return std::make_shared::template AggregateFunction>(argument_types, params); + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); } - return std::make_shared::template AggregateFunction>(argument_types, params); + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); } AggregateFunctionPtr createAggregateFunctionHllSketchEstimate @@ -67,7 +69,7 @@ AggregateFunctionPtr createAggregateFunctionHllSketchEstimate precision = precision_param; } - if (argument_types.size() != 1) + if (argument_types.size() != 1 && argument_types.size() != 2) throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); switch (precision) @@ -137,7 +139,7 @@ AggregateFunctionPtr createAggregateFunctionHllSketchUnion precision = precision_param; } - if (argument_types.size() != 1) + if (argument_types.size() != 1 && argument_types.size() != 2) throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (argument_types[0]->getTypeId() != TypeIndex::SketchBinary) throw Exception("Incorrect type of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -190,7 +192,7 @@ AggregateFunctionPtr createAggregateFunctionHllSketchUnion AggregateFunctionPtr createAggregateFunctionKllSketchEstimate (const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { - if (argument_types.size() != 1) + if (argument_types.size() != 1 && argument_types.size() != 2) throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); Float64 quantile; @@ -208,27 +210,28 @@ AggregateFunctionPtr createAggregateFunctionKllSketchEstimate "Aggregate function " + name + " first parameter should between 0 and 1.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); String type_name = params[1].safeGet(); - + bool ignore_wrong_date = argument_types.size() == 2; + if (type_name == "UInt8") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt16") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int8") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int16") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Float32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Float64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else throw Exception( "Aggregate function " + name + " second parameter not correct.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -242,7 +245,7 @@ AggregateFunctionPtr createAggregateFunctionKllSketchEstimate AggregateFunctionPtr createAggregateFunctionQuantilesSketchEstimate (const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { - if (argument_types.size() != 1) + if (argument_types.size() != 1 && argument_types.size() != 2) throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); Float64 quantile; @@ -260,27 +263,28 @@ AggregateFunctionPtr createAggregateFunctionQuantilesSketchEstimate "Aggregate function " + name + " first parameter should between 0 and 1.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); String type_name = params[1].safeGet(); + bool ignore_wrong_date = argument_types.size() == 2; if (type_name == "UInt8") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt16") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "UInt64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int8") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int16") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Int64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Float32") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else if (type_name == "Float64") - return std::make_shared>(quantile, argument_types, params); + return std::make_shared>(quantile, argument_types, params, ignore_wrong_date); else throw Exception( "Aggregate function " + name + " second parameter not correct.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -293,7 +297,7 @@ AggregateFunctionPtr createAggregateFunctionQuantilesSketchEstimate AggregateFunctionPtr createAggregateFunctionQuantilesSketchUnion (const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) { - if (argument_types.size() != 1) + if (argument_types.size() != 1 && argument_types.size() != 2) throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!params.empty()) @@ -305,27 +309,28 @@ AggregateFunctionPtr createAggregateFunctionQuantilesSketchUnion } String type_name = params[0].safeGet(); + bool ignore_wrong_date = argument_types.size() == 2; if (type_name == "UInt8") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "UInt16") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "UInt32") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "UInt64") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Int8") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Int16") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Int32") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Int64") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Float32") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else if (type_name == "Float64") - return std::make_shared>(argument_types, params); + return std::make_shared>(argument_types, params, ignore_wrong_date); else throw Exception( "Aggregate function " + name + " second parameter not correct.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/AggregateFunctions/AggregateFunctionSketchEstimate.h b/src/AggregateFunctions/AggregateFunctionSketchEstimate.h index 826e5e3ec62..0623daeaa60 100644 --- a/src/AggregateFunctions/AggregateFunctionSketchEstimate.h +++ b/src/AggregateFunctions/AggregateFunctionSketchEstimate.h @@ -37,8 +37,8 @@ class AggregateFunctionHLLSketchUnion final : public IAggregateFunctionDataHelper, AggregateFunctionHLLSketchUnion> { public: - AggregateFunctionHLLSketchUnion(const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionHLLSketchUnion>(argument_types_, params_){} + AggregateFunctionHLLSketchUnion(const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionHLLSketchUnion>(argument_types_, params_), ignore_wrong_data(ignore_wrong_data_) {} String getName() const override { @@ -52,9 +52,19 @@ class AggregateFunctionHLLSketchUnion final void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - datasketches::hll_sketch hll_sketch_data = datasketches::hll_sketch::deserialize(value.data, value.size, AggregateFunctionHllSketchAllocator()); - this->data(place).u.update(hll_sketch_data); + try + { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + datasketches::hll_sketch hll_sketch_data = datasketches::hll_sketch::deserialize(value.data, value.size, AggregateFunctionHllSketchAllocator()); + this->data(place).u.update(hll_sketch_data); + } + catch (std::exception & e) + { + if (!ignore_wrong_data) + throw e; + } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -84,6 +94,7 @@ class AggregateFunctionHLLSketchUnion final bool allocatesMemoryInArena() const override { return true; } private: + bool ignore_wrong_data = false; inline datasketches::hll_sketch readHLLSketch(ReadBuffer & buf) const { String d; @@ -97,8 +108,12 @@ class AggregateFunctionHllSketchEstimate final : public IAggregateFunctionDataHelper, AggregateFunctionHllSketchEstimate> { public: - AggregateFunctionHllSketchEstimate(const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionHllSketchEstimate>(argument_types_, params_) {} + AggregateFunctionHllSketchEstimate(const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionHllSketchEstimate>(argument_types_, params_), ignore_wrong_data(ignore_wrong_data_) + { + if (params_.size() == 2) + use_composite_estimate = true; + } String getName() const override { @@ -112,24 +127,36 @@ class AggregateFunctionHllSketchEstimate final void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - // String is for new datatype "Sketch" - if constexpr (std::is_same_v) + try { - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - datasketches::hll_sketch hllSketch = datasketches::hll_sketch::deserialize(value.data, value.size, AggregateFunctionHllSketchAllocator()); - this->data(place).u.update(hllSketch); + // String is for new datatype "Sketch" + if constexpr (std::is_same_v) + { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + datasketches::hll_sketch hllSketch = datasketches::hll_sketch::deserialize(value.data, value.size, AggregateFunctionHllSketchAllocator()); + this->data(place).u.update(hllSketch); + } + else if constexpr (std::is_same_v) + { + //the format of this value should be the same with serialize + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + ReadBuffer buf(const_cast(value.data), value.size); + this->data(place).u.update(readHllSketch(buf)); + } + else + { + StringRef value = columns[0]->getDataAt(row_num); + this->data(place).u.update(value.toString()); + } } - else if constexpr (std::is_same_v) + catch (std::exception & e) { - //the format of this value should be the same with serialize - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - ReadBuffer buf(const_cast(value.data), value.size); - this->data(place).u.update(readHllSketch(buf)); - } - else - { - StringRef value = columns[0]->getDataAt(row_num); - this->data(place).u.update(value.toString()); + if (!ignore_wrong_data) + throw e; } } @@ -152,12 +179,17 @@ class AggregateFunctionHllSketchEstimate final void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * ) const override { - static_cast(to).getData().push_back(this->data(place).u.get_estimate()); + if (use_composite_estimate) + static_cast(to).getData().push_back(this->data(place).u.get_composite_estimate()); + else + static_cast(to).getData().push_back(this->data(place).u.get_estimate()); } bool allocatesMemoryInArena() const override { return true; } private: + bool ignore_wrong_data = false; + bool use_composite_estimate = false; inline datasketches::hll_sketch readHllSketch(ReadBuffer & buf) const { String d; @@ -181,8 +213,8 @@ class AggregateFunctionKllSketchEstimate final : public IAggregateFunctionDataHelper, AggregateFunctionKllSketchEstimate> { public: - AggregateFunctionKllSketchEstimate(const double quantile_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionKllSketchEstimate>(argument_types_, params_),quantile(quantile_) {} + AggregateFunctionKllSketchEstimate(const double quantile_, const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionKllSketchEstimate>(argument_types_, params_),quantile(quantile_), ignore_wrong_data(ignore_wrong_data_) {} Float64 quantile = 0; @@ -198,9 +230,18 @@ class AggregateFunctionKllSketchEstimate final void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - datasketches::kll_sketch kll_sketch_data = datasketches::kll_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); - this->data(place).u.merge(kll_sketch_data); + try { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + datasketches::kll_sketch kll_sketch_data = datasketches::kll_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); + this->data(place).u.merge(kll_sketch_data); + } + catch (std::exception & e) + { + if (!ignore_wrong_data) + throw e; + } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -229,6 +270,7 @@ class AggregateFunctionKllSketchEstimate final bool allocatesMemoryInArena() const override { return true; } private: + bool ignore_wrong_data = false; inline datasketches::kll_sketch readKllSketch(ReadBuffer & buf) const { String d; @@ -252,8 +294,8 @@ class AggregateFunctionQuantilesSketchEstimate final : public IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchEstimate> { public: - AggregateFunctionQuantilesSketchEstimate(const double quantile_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchEstimate>(argument_types_, params_),quantile(quantile_) {} + AggregateFunctionQuantilesSketchEstimate(const double quantile_, const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchEstimate>(argument_types_, params_),quantile(quantile_), ignore_wrong_data(ignore_wrong_data_) {} Float64 quantile = 0; @@ -269,9 +311,19 @@ class AggregateFunctionQuantilesSketchEstimate final void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - datasketches::quantiles_sketch quantiles_sketch_data = datasketches::quantiles_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); - this->data(place).u.merge(quantiles_sketch_data); + try + { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + datasketches::quantiles_sketch quantiles_sketch_data = datasketches::quantiles_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); + this->data(place).u.merge(quantiles_sketch_data); + } + catch (std::exception & e) + { + if (!ignore_wrong_data) + throw e; + } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -299,6 +351,7 @@ class AggregateFunctionQuantilesSketchEstimate final bool allocatesMemoryInArena() const override { return true; } private: + bool ignore_wrong_data = false; inline datasketches::quantiles_sketch readQuantilesSketch(ReadBuffer & buf) const { String d; @@ -309,11 +362,11 @@ class AggregateFunctionQuantilesSketchEstimate final template class AggregateFunctionQuantilesSketchUnion final - : public IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchEstimate> + : public IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchUnion> { public: - AggregateFunctionQuantilesSketchUnion(const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchEstimate>(argument_types_, params_){} + AggregateFunctionQuantilesSketchUnion(const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionQuantilesSketchUnion>(argument_types_, params_), ignore_wrong_data(ignore_wrong_data_) {} String getName() const override { @@ -327,9 +380,19 @@ class AggregateFunctionQuantilesSketchUnion final void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { - const auto & value = static_cast(*columns[0]).getDataAt(row_num); - datasketches::quantiles_sketch quantiles_sketch_data = datasketches::quantiles_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); - this->data(place).u.merge(quantiles_sketch_data); + try + { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + datasketches::quantiles_sketch quantiles_sketch_data = datasketches::quantiles_sketch::deserialize(value.data, value.size, datasketches::serde(), std::less(), AggregateFunctionHllSketchAllocator()); + this->data(place).u.merge(quantiles_sketch_data); + } + catch (std::exception & e) + { + if (!ignore_wrong_data) + throw e; + } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -359,6 +422,7 @@ class AggregateFunctionQuantilesSketchUnion final bool allocatesMemoryInArena() const override { return true; } private: + bool ignore_wrong_data = false; inline datasketches::quantiles_sketch readQuantilesSketch(ReadBuffer & buf) const { String d; diff --git a/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.cpp b/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.cpp new file mode 100644 index 00000000000..20125088e65 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.cpp @@ -0,0 +1,137 @@ +#include + +#include +#include + +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + + template + struct WithK + { + template + using AggregateFunction = AggregateFunctionThetaSketchEstimate; + }; + + template + AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params) + { + const IDataType & argument_type = *argument_types[0]; + WhichDataType which(argument_type); + + bool ignore_wrong_date = argument_types.size() == 2; + + if (which.isSketchBinary()) + { + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); + } + else if (which.isAggregateFunction()) + { + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); + } + else if (which.isString()) + { + return std::make_shared::template AggregateFunction>(argument_types, params, ignore_wrong_date); + } + else + { + throw Exception("Incorrect columns type for aggregate function: " + argument_type.getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + + AggregateFunctionPtr createAggregateFunctionThetaSketchEstimate + (const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *) + { + UInt8 precision = 15; + if (!params.empty()) + { + if (params.size() != 1) + { + throw Exception( + "Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + UInt64 precision_param = applyVisitor(FieldVisitorConvertToNumber(), params[0]); + // This range is hardcoded below + if (precision_param > 26 || precision_param < 5) + { + throw Exception( + "Parameter for aggregate function " + name + "is out or range: [5, 26].", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + precision = precision_param; + } + if (argument_types.size() != 1 && argument_types.size() != 2) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + switch (precision) + { + case 5: + return createAggregateFunctionWithK<5>(argument_types, params); + case 6: + return createAggregateFunctionWithK<6>(argument_types, params); + case 7: + return createAggregateFunctionWithK<7>(argument_types, params); + case 8: + return createAggregateFunctionWithK<8>(argument_types, params); + case 9: + return createAggregateFunctionWithK<9>(argument_types, params); + case 10: + return createAggregateFunctionWithK<10>(argument_types, params); + case 11: + return createAggregateFunctionWithK<11>(argument_types, params); + case 12: + return createAggregateFunctionWithK<12>(argument_types, params); + case 13: + return createAggregateFunctionWithK<13>(argument_types, params); + case 14: + return createAggregateFunctionWithK<14>(argument_types, params); + case 15: + return createAggregateFunctionWithK<15>(argument_types, params); + case 16: + return createAggregateFunctionWithK<16>(argument_types, params); + case 17: + return createAggregateFunctionWithK<17>(argument_types, params); + case 18: + return createAggregateFunctionWithK<18>(argument_types, params); + case 19: + return createAggregateFunctionWithK<19>(argument_types, params); + case 20: + return createAggregateFunctionWithK<20>(argument_types, params); + case 21: + return createAggregateFunctionWithK<21>(argument_types, params); + case 22: + return createAggregateFunctionWithK<22>(argument_types, params); + case 23: + return createAggregateFunctionWithK<23>(argument_types, params); + case 24: + return createAggregateFunctionWithK<24>(argument_types, params); + case 25: + return createAggregateFunctionWithK<25>(argument_types, params); + case 26: + return createAggregateFunctionWithK<26>(argument_types, params); + } + + __builtin_unreachable(); + } +} + +void registerAggregateFunctionThetaSketchEstimate(AggregateFunctionFactory & factory) +{ + factory.registerFunction("thetaSketchEstimate", createAggregateFunctionThetaSketchEstimate); +} +} diff --git a/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.h b/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.h new file mode 100644 index 00000000000..3341aded950 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionThetaSketchEstimate.h @@ -0,0 +1,121 @@ +// +// Created by vita.lai on 2022/7/9. +// +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + + +namespace DB +{ +template +struct AggregateFunctionThetaSketchEstimateData +{ + datasketches::theta_union sk_union; + AggregateFunctionThetaSketchEstimateData():sk_union(datasketches::theta_union::builder().set_lg_k(K).build()){} + static String getName() { return "theta_sketch"; } +}; + +template +class AggregateFunctionThetaSketchEstimate final + : public IAggregateFunctionDataHelper, AggregateFunctionThetaSketchEstimate> +{ +public: + AggregateFunctionThetaSketchEstimate(const DataTypes & argument_types_, const Array & params_, bool ignore_wrong_data_ = false) + : IAggregateFunctionDataHelper, AggregateFunctionThetaSketchEstimate>(argument_types_, params_), ignore_wrong_data(ignore_wrong_data_) {} + + String getName() const override + { + return "thetaSketchEstimate"; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + try + { + // String is for new datatype "Sketch" + if constexpr (std::is_same_v) + { + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + // datasketches::compact_theta_sketch thetaSketch = datasketches::compact_theta_sketch::deserialize(value.data, value.size, datasketches::DEFAULT_SEED, AggregateFunctionThetaSketchAllocator()); + this->data(place).sk_union.update(datasketches::wrapped_compact_theta_sketch::wrap(value.data, value.size)); + } + else if constexpr (std::is_same_v) + { + //the format of this value should be the same with serialize + const auto & value = static_cast(*columns[0]).getDataAt(row_num); + if (ignore_wrong_data && value.size == 0) + return; + // ReadBuffer buf(const_cast(value.data), value.size); + // this->data(place).sk_union.update(readThetaSketch(buf)); + this->data(place).sk_union.update(datasketches::wrapped_compact_theta_sketch::wrap(value.data, value.size)); + } + else + { + StringRef value = columns[0]->getDataAt(row_num); + datasketches::update_theta_sketch sk_update = datasketches::update_theta_sketch::builder().build(); + sk_update.update(value.toString()); + this->data(place).sk_union.update(sk_update); + } + } + catch (std::exception & e) + { + if (!ignore_wrong_data) + throw e; + } + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).sk_union.update(this->data(rhs).sk_union.get_result()); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override + { + std::ostringstream oss; + this->data(place).sk_union.get_result().serialize(oss); + writeBinary(oss.str(), buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override + { + this->data(place).sk_union.update(readThetaSketch(buf)); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * ) const override + { + static_cast(to).getData().push_back(this->data(place).sk_union.get_result().get_estimate()); + } + + bool allocatesMemoryInArena() const override { return true; } + +private: + bool ignore_wrong_data = false; + inline datasketches::compact_theta_sketch readThetaSketch(ReadBuffer & buf) const + { + String d; + readBinary(d, buf); + return datasketches::compact_theta_sketch::deserialize(d.data(), d.size(), datasketches::DEFAULT_SEED, AggregateFunctionThetaSketchAllocator()); + } +}; +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 2e04caddb1e..838d11cdb4e 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -124,6 +124,7 @@ void registerAggregateFunctionNdvBuckets(AggregateFunctionFactory & factory); void registerAggregateFunctionNdvBucketsExtend(AggregateFunctionFactory & factory); void registerAggregateFunctionNothing(AggregateFunctionFactory & factory); void registerAggregateFunctionHllSketchEstimate(AggregateFunctionFactory &); +void registerAggregateFunctionThetaSketchEstimate(AggregateFunctionFactory &); void registerAggregateFunctionAuc(AggregateFunctionFactory &); void registerAggregateFunctionFastAuc(AggregateFunctionFactory &); void registerAggregateFunctionFastAuc2(AggregateFunctionFactory &); @@ -259,6 +260,7 @@ void registerAggregateFunctions() registerAggregateFunctionNdvBucketsExtend(factory); registerAggregateFunctionNothing(factory); registerAggregateFunctionHllSketchEstimate(factory); + registerAggregateFunctionThetaSketchEstimate(factory); registerAggregateFunctionAuc(factory); registerAggregateFunctionFastAuc(factory); registerAggregateFunctionFastAuc2(factory); diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 50ce606e0f7..7be66a7f936 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -487,6 +487,7 @@ struct WhichDataType constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); } constexpr bool isBitmap64() const { return idx == TypeIndex::BitMap64; } constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; } + constexpr bool isSketchBinary() const { return idx == TypeIndex::SketchBinary; } }; /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) diff --git a/src/Functions/FunctionSketch.h b/src/Functions/FunctionSketch.h index 81f0fcd2e8e..5920d63ea73 100644 --- a/src/Functions/FunctionSketch.h +++ b/src/Functions/FunctionSketch.h @@ -176,13 +176,14 @@ class FunctionHLLSketch : public IFunction return name; } - size_t getNumberOfArguments() const override { return 1; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() != 1) + if (arguments.size() != 1 && arguments.size() != 2) throw Exception("Illegal argument size of function " + getName(), ErrorCodes::BAD_ARGUMENTS); @@ -207,6 +208,7 @@ class FunctionHLLSketch : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + bool use_composite_estimate = arguments.size() > 1 ? true : false; if (arguments[0].column->isNullable()) { auto result_type = std::make_shared(std::make_shared()); @@ -222,7 +224,10 @@ class FunctionHLLSketch : public IFunction else { datasketches::hll_sketch hll_sketch_data = datasketches::hll_sketch::deserialize(nullable_sketch.getDataAt(i).data, nullable_sketch.getDataAt(i).size, AggregateFunctionHllSketchAllocator()); - result_column->insert(hll_sketch_data.get_estimate()); + if (use_composite_estimate) + result_column->insert(hll_sketch_data.get_composite_estimate()); + else + result_column->insert(hll_sketch_data.get_estimate()); } } return result_column; @@ -238,7 +243,10 @@ class FunctionHLLSketch : public IFunction { auto value = value_column.getDataAt(i); datasketches::hll_sketch hll_sketch_data = datasketches::hll_sketch::deserialize(value.data, value.size, AggregateFunctionHllSketchAllocator()); - dst_data[i] = hll_sketch_data.get_estimate(); + if (use_composite_estimate) + dst_data[i] = hll_sketch_data.get_composite_estimate(); + else + dst_data[i] = hll_sketch_data.get_estimate(); } return result_column; diff --git a/tests/queries/0_stateless/20000_theta_sketch_estimate.reference b/tests/queries/0_stateless/20000_theta_sketch_estimate.reference new file mode 100644 index 00000000000..57877db9980 --- /dev/null +++ b/tests/queries/0_stateless/20000_theta_sketch_estimate.reference @@ -0,0 +1,2 @@ +5 +9 diff --git a/tests/queries/0_stateless/20000_theta_sketch_estimate.sql b/tests/queries/0_stateless/20000_theta_sketch_estimate.sql new file mode 100644 index 00000000000..9f15109e123 --- /dev/null +++ b/tests/queries/0_stateless/20000_theta_sketch_estimate.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS test.test_theta_sketch; + +CREATE TABLE test.test_theta_sketch +( + `test_col` Int8, + `test_col2` SketchBinary +) ENGINE = MergeTree() ORDER BY test_col; + +INSERT INTO test.test_theta_sketch values (1, 'AgMDAAAazJMFAAAAAACAPxX5fcu9hqEFw5f8EoFwnR4oBDhkJpYOMJ+FWhAWLpBs2C0jd0u5NX4=') + +SELECT thetaSketchEstimate(test_col2) FROM test.test_theta_sketch; + +INSERT INTO test.test_theta_sketch values (2, 'AgMDAAAazJMEAAAAAACAP+egZA96NAcHbakWvEpmYR6sUwzUAj6rLtyf/LHURyhv') + +SELECT thetaSketchEstimate(test_col2) FROM test.test_theta_sketch; + +DROP TABLE IF EXISTS test.test_theta_sketch; \ No newline at end of file From 5799da8923c377d97eb7c52a3d1d3cc2d44f9357 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:15:13 +0000 Subject: [PATCH 117/292] Merge 'cherry-pick-mr-23389' into 'cnch-2.2' perf(clickhousech@m-4731388683): [cp] add optimize_read_in_partition_order to improve topn See merge request: !23458 # Conflicts: # src/QueryPlan/ReadFromMergeTree.cpp --- src/Core/Settings.h | 2 + src/Optimizer/Property/Constants.cpp | 10 + src/Optimizer/Property/Constants.h | 1 + src/QueryPlan/ReadFromMergeTree.cpp | 213 ++++++++++++++++-- src/QueryPlan/ReadFromMergeTree.h | 10 + .../00941_read_in_partition_order.reference | 33 +++ .../00941_read_in_partition_order.sql | 81 +++++++ 7 files changed, 335 insertions(+), 15 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/00941_read_in_partition_order.reference create mode 100644 tests/queries/4_cnch_stateless/00941_read_in_partition_order.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5bdb217d1e7..cba6346fc04 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -869,6 +869,8 @@ enum PreloadLevelSettings : UInt64 "longest one.", \ 0) \ M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ + M(Bool, optimize_read_in_partition_order, false, "In optimize_read_in_order mode, whether to read parts partition-by-partition if applicable", 0) \ + M(Bool, force_read_in_partition_order, 0, "Similar to optimize_read_in_partition_order, but throw an exception if it cannot be applied to the query, mainly for testing", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ diff --git a/src/Optimizer/Property/Constants.cpp b/src/Optimizer/Property/Constants.cpp index 13f6f8f4fb0..1ab851453af 100644 --- a/src/Optimizer/Property/Constants.cpp +++ b/src/Optimizer/Property/Constants.cpp @@ -27,4 +27,14 @@ Constants Constants::normalize(const SymbolEquivalences & symbol_equivalences) c return translate(mapping); } +String Constants::toString() const +{ + std::stringstream output; + output << "{"; + for (const auto & item : values) + output << " " << item.first << "=" << item.second.value.toString(); + output << "}"; + return output.str(); +} + } diff --git a/src/Optimizer/Property/Constants.h b/src/Optimizer/Property/Constants.h index cd588263ec1..4328a657ccb 100644 --- a/src/Optimizer/Property/Constants.h +++ b/src/Optimizer/Property/Constants.h @@ -26,6 +26,7 @@ class Constants Constants translate(const std::unordered_map & identities) const; Constants normalize(const SymbolEquivalences & symbol_equivalences) const; + String toString() const; private: std::map values{}; diff --git a/src/QueryPlan/ReadFromMergeTree.cpp b/src/QueryPlan/ReadFromMergeTree.cpp index b3f236742a8..5d5fdde0ee1 100644 --- a/src/QueryPlan/ReadFromMergeTree.cpp +++ b/src/QueryPlan/ReadFromMergeTree.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -163,6 +164,98 @@ static Array extractMapColumnKeys(const MergeTreeMetaBase & data, const MergeTre return res; } +static bool isSamePartition(const RangesInDataPart & lhs, const RangesInDataPart & rhs) +{ + return lhs.data_part->partition.value == rhs.data_part->partition.value; +} + +static bool canReadInPartitionOrder( + const StorageInMemoryMetadata & metadata, + const InputOrderInfo & input_order_info, + const ASTSelectQuery & select) +{ + if (!metadata.isPartitionKeyDefined() || !metadata.isSortingKeyDefined()) + return false; + + const auto & partition_key = metadata.getPartitionKey(); + Names minmax_columns = partition_key.expression->getRequiredColumns(); + /// for simplicity, only support table with one partition key + if (partition_key.column_names.size() != 1 || minmax_columns.size() != 1) + return false; + + String partition_column = minmax_columns[0]; + Names sorting_columns = metadata.getSortingKeyColumns(); + chassert(sorting_columns.size() >= input_order_info.order_key_prefix_descr.size()); + /// optimizer guarantees that order_key_prefix is a prefix of sorting columns + sorting_columns.resize(input_order_info.order_key_prefix_descr.size()); + + /// sorting columns should contain partition column + auto partition_column_it = std::find(sorting_columns.begin(), sorting_columns.end(), partition_column); + if (partition_column_it == sorting_columns.end()) + return false; + + /// Allow table "partition by c order by (a, b, c)" for query "where a={} and b={} order by c", + /// where all sorting columns before partition column match single value, + /// note that in this case, input order is (a, b, c) + if (partition_column_it != sorting_columns.begin()) + { + NameSet single_value_columns; + auto collect = [&](const ASTPtr & filter) + { + if (!filter) + return; + + for (const auto & conjunct : PredicateUtils::extractConjuncts(filter->clone())) + { + const auto * func = conjunct->as(); + if (!func || func->name != "equals") + continue; + const auto * column = func->arguments->children[0]->as(); + const auto * literal = func->arguments->children[1]->as(); + if (column && literal) + single_value_columns.insert(column->name()); + } + }; + collect(select.where()); + collect(select.prewhere()); + auto match_single_value = [&](const String & name) { return single_value_columns.count(name); }; + if (!std::all_of(sorting_columns.begin(), partition_column_it, match_single_value)) + return false; + } + + /// fast path for: order by sort_column partition by sort_column + if (partition_key.column_names.front() == *partition_column_it) + return true; + + /// Allow "partition by func(x) order by (x)" where func is monotonic nondecreasing + IFunction::Monotonicity monotonicity; + for (const auto & action : partition_key.expression->getActions()) + { + if (action.node->type != ActionsDAG::ActionType::FUNCTION) + { + continue; + } + + /// Allow only one simple monotonic functions with one argument + if (monotonicity.is_monotonic) + { + monotonicity.is_monotonic = false; + break; + } + + if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != *partition_column_it) + break; + + const auto & func = *action.node->function_base; + if (!func.hasInformationAboutMonotonicity()) + break; + + monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {}); + } + + return monotonicity.is_monotonic && monotonicity.is_positive; +} + ReadFromMergeTree::ReadFromMergeTree( MergeTreeMetaBase::DataPartsVector parts_, MergeTreeMetaBase::DeleteBitmapGetter delete_bitmap_getter_, @@ -495,12 +588,75 @@ static ActionsDAGPtr createProjection(const Block & header) return projection; } -Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( +namespace +{ +template +struct PartitionValueComparator +{ + bool operator()(const RangesInDataPart & lhs, const RangesInDataPart & rhs) const + { + const auto & l = lhs.data_part->partition.value[0]; + const auto & r = rhs.data_part->partition.value[0]; + if constexpr (ascend) + return l < r; + else + return l > r; + } +}; +} // anonymouse namespace + +Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithPartitionOrder( RangesInDataParts && parts_with_ranges, const Names & column_names, const ActionsDAGPtr & sorting_key_prefix_expr, ActionsDAGPtr & out_projection, const InputOrderInfoPtr & input_order_info) +{ + chassert(!parts_with_ranges.empty()); + + /// sort parts by partition value + if (input_order_info->direction > 0) + std::sort(parts_with_ranges.begin(), parts_with_ranges.end(), PartitionValueComparator{}); + else + std::sort(parts_with_ranges.begin(), parts_with_ranges.end(), PartitionValueComparator{}); + + Pipes pipes; + auto prev = parts_with_ranges.begin(); + auto end = parts_with_ranges.end(); + + while (prev != end) + { + auto curr = std::next(prev); + while (curr != end && isSamePartition(*prev, *curr)) + ++curr; + + auto pipe = spreadMarkRangesAmongStreamsWithOrder( + {std::make_move_iterator(prev), std::make_move_iterator(curr)}, + column_names, + sorting_key_prefix_expr, + out_projection, + input_order_info, + // for the result pipe to output ordered tuples for this partition + 1 /*num_streams*/, true /*need_preliminary_merge*/); + + pipes.emplace_back(std::move(pipe)); + prev = curr; + } + + auto res = Pipe::unitePipes(std::move(pipes)); + if (res.numOutputPorts() > 1) + res.addTransform(std::make_shared(res.getHeader(), res.numOutputPorts())); + return res; +} + +Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( + RangesInDataParts && parts_with_ranges, + const Names & column_names, + const ActionsDAGPtr & sorting_key_prefix_expr, + ActionsDAGPtr & out_projection, + const InputOrderInfoPtr & input_order_info, + size_t num_streams, + bool need_preliminary_merge) { const auto & settings = context->getSettingsRef(); const auto data_settings = data.getSettings(); @@ -558,12 +714,11 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( return new_ranges; }; - const size_t min_marks_per_stream = (info.sum_marks - 1) / requested_num_streams + 1; - bool need_preliminary_merge = (parts_with_ranges.size() > settings.read_in_order_two_level_merge_threshold); + const size_t min_marks_per_stream = (info.sum_marks - 1) / num_streams + 1; Pipes pipes; - for (size_t i = 0; i < requested_num_streams && !parts_with_ranges.empty(); ++i) + for (size_t i = 0; i < num_streams && !parts_with_ranges.empty(); ++i) { size_t need_marks = min_marks_per_stream; RangesInDataParts new_parts; @@ -630,10 +785,10 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( : ReadFromMergeTree::ReadType::InReverseOrder; pipes.emplace_back(read(std::move(new_parts), column_names, read_type, - requested_num_streams, info.min_marks_for_concurrent_read, info.use_uncompressed_cache)); + num_streams, info.min_marks_for_concurrent_read, info.use_uncompressed_cache)); } - if (need_preliminary_merge) + if (need_preliminary_merge && !pipes.empty()) { SortDescription sort_description; for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) @@ -644,9 +799,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( for (auto & pipe : pipes) { - /// Drop temporary columns, added by 'sorting_key_prefix_expr' - out_projection = createProjection(pipe.getHeader()); - pipe.addSimpleTransform([sorting_key_expr](const Block & header) { return std::make_shared(header, sorting_key_expr); @@ -663,6 +815,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( pipe.addTransform(std::move(transform)); } } + + if (!out_projection) + { + /// Drop temporary columns, added by 'sorting_key_prefix_expr' + out_projection = createProjection(pipes.front().getHeader()); + } } return Pipe::unitePipes(std::move(pipes)); @@ -1154,12 +1312,14 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( size_t sum_marks = 0; size_t sum_ranges = 0; size_t sum_rows = 0; + NameSet partition_ids; for (const auto & part : result.parts_with_ranges) { sum_ranges += part.ranges.size(); sum_marks += part.getMarksCount(); sum_rows += part.getRowsCount(); + partition_ids.insert(part.data_part->info.partition_id); } result.total_parts = total_parts; @@ -1170,6 +1330,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( result.selected_marks_pk = sum_marks_pk; result.total_marks_pk = total_marks_pk; result.selected_rows = sum_rows; + result.selected_partitions = partition_ids.size(); const auto & input_order_info = query_info.input_order_info ? query_info.input_order_info @@ -1249,6 +1410,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build Pipe pipe; const auto & settings = context->getSettingsRef(); + bool can_read_in_partition_order = false; if (select.final()) { @@ -1278,12 +1440,30 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build auto syntax_result = TreeRewriter(context).analyze(order_key_prefix_ast, metadata_for_reading->getColumns().getAllPhysical()); auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActionsDAG(false); - pipe = spreadMarkRangesAmongStreamsWithOrder( - std::move(result.parts_with_ranges), - column_names_to_read, - sorting_key_prefix_expr, - result_projection, - input_order_info); + can_read_in_partition_order = (settings.optimize_read_in_partition_order || settings.force_read_in_partition_order) + && canReadInPartitionOrder(*metadata_for_reading, *input_order_info, query_info.query->as()); + + if (can_read_in_partition_order && result.selected_partitions > 1) + { + pipe = spreadMarkRangesAmongStreamsWithPartitionOrder( + std::move(result.parts_with_ranges), + column_names_to_read, + sorting_key_prefix_expr, + result_projection, + input_order_info); + } + else + { + bool need_preliminary_merge = (result.parts_with_ranges.size() > settings.read_in_order_two_level_merge_threshold); + pipe = spreadMarkRangesAmongStreamsWithOrder( + std::move(result.parts_with_ranges), + column_names_to_read, + sorting_key_prefix_expr, + result_projection, + input_order_info, + requested_num_streams, + need_preliminary_merge); + } } else { @@ -1292,6 +1472,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build column_names_to_read); } + if (settings.force_read_in_partition_order && !can_read_in_partition_order) + throw Exception(ErrorCodes::INDEX_NOT_USED, "Cannot read in partition order but 'force_read_in_partition_order' is set"); + if (pipe.empty()) { pipeline.init(Pipe(std::make_shared(getOutputStream().header))); diff --git a/src/QueryPlan/ReadFromMergeTree.h b/src/QueryPlan/ReadFromMergeTree.h index bcba9d5f286..e22f3cb7f0a 100644 --- a/src/QueryPlan/ReadFromMergeTree.h +++ b/src/QueryPlan/ReadFromMergeTree.h @@ -104,6 +104,7 @@ class ReadFromMergeTree final : public ISourceStep UInt64 selected_marks_pk = 0; UInt64 total_marks_pk = 0; UInt64 selected_rows = 0; + UInt64 selected_partitions = 0; }; ReadFromMergeTree( @@ -205,6 +206,15 @@ class ReadFromMergeTree final : public ISourceStep const Names & column_names); Pipe spreadMarkRangesAmongStreamsWithOrder( + RangesInDataParts && parts_with_ranges, + const Names & column_names, + const ActionsDAGPtr & sorting_key_prefix_expr, + ActionsDAGPtr & out_projection, + const InputOrderInfoPtr & input_order_info, + size_t num_streams, + bool need_preliminary_merge); + + Pipe spreadMarkRangesAmongStreamsWithPartitionOrder( RangesInDataParts && parts_with_ranges, const Names & column_names, const ActionsDAGPtr & sorting_key_prefix_expr, diff --git a/tests/queries/4_cnch_stateless/00941_read_in_partition_order.reference b/tests/queries/4_cnch_stateless/00941_read_in_partition_order.reference new file mode 100644 index 00000000000..ee967d4d74b --- /dev/null +++ b/tests/queries/4_cnch_stateless/00941_read_in_partition_order.reference @@ -0,0 +1,33 @@ +order by c1 +1 1 +2 2 +3 3 +order by c1 desc +3 3 +2 2 +1 1 +order by ts +2024-06-01 10:00:00 +2024-06-02 11:00:00 +2024-06-03 12:00:00 +order by ts desc +2024-06-03 12:00:00 +2024-06-02 11:00:00 +2024-06-01 10:00:00 +order by d, c3 +1 a 2024-06-01 10 +1 a 2024-06-03 40 +1 a 2024-06-03 50 +order by d desc, c3 +1 a 2024-06-03 40 +1 a 2024-06-03 50 +1 a 2024-06-01 10 +first 5 2024-06-01 10:00:00 5 a +first 6 2024-06-01 10:00:00 6 a +last 5 2024-06-02 11:00:00 5 a +last 6 2024-06-02 11:00:00 6 a +norder1 +0 +norder2 +2 1 +1 2 diff --git a/tests/queries/4_cnch_stateless/00941_read_in_partition_order.sql b/tests/queries/4_cnch_stateless/00941_read_in_partition_order.sql new file mode 100644 index 00000000000..ded67a9008d --- /dev/null +++ b/tests/queries/4_cnch_stateless/00941_read_in_partition_order.sql @@ -0,0 +1,81 @@ +set enable_optimizer = 1; +set optimize_read_in_order = 1; +set optimize_read_in_partition_order = 1; +set force_read_in_partition_order = 1; + +-- case: order by partition column +drop table if exists porder1; +create table porder1 (c1 Int64, c2 Int64) engine = CnchMergeTree partition by c1 order by (c1, c2); +insert into porder1 values (1, 1), (2, 2), (3, 3); +select 'order by c1'; +select * from porder1 order by c1; +select 'order by c1 desc'; +select * from porder1 order by c1 desc; +drop table porder1; + +-- case: partition by func(sort_column) order by sort_column +drop table if exists porder2; +create table porder2 (ts DateTime) engine = CnchMergeTree partition by toDate(ts) order by ts; +insert into porder2 values ('2024-06-01 10:00:00'), ('2024-06-02 11:00:00'), ('2024-06-03 12:00:00'); +select 'order by ts'; +select * from porder2 order by ts; +select 'order by ts desc'; +select * from porder2 order by ts desc; +drop table porder2; + +-- case: order by (.., pc, ..) partitoin by pc +drop table if exists porder3; +create table porder3 (c1 Int64, c2 String, d Date, c3 Int64) engine = CnchMergeTree partition by d order by (c1, c2, d, c3); +insert into porder3 values (1, 'a', '2024-06-01', 10), (2, 'b', '2024-06-01', 20), (3, 'c', '2024-06-02', 30), (1, 'a', '2024-06-03', 40), (1, 'a', '2024-06-03', 50); +select 'order by d, c3'; +select * from porder3 where c1=1 and c2='a' and d < '2024-06-10' and c3 < 100 order by d, c3; +select 'order by d desc, c3'; +select * from porder3 where c1=1 and c2='a' and d < '2024-06-10' and c3 < 100 order by d desc, c3; +drop table porder3; + +-- case: partition has more than 1 part +drop table if exists porder4; +create table porder4 (ts DateTime, c1 Int64, c2 String) engine = CnchMergeTree partition by toYYYYMMDD(ts) order by (c1, c2, ts); +system stop merges porder4; +insert into porder4 values ('2024-06-01 11:00:00', 5, 'a'), ('2024-06-01 10:00:00', 6, 'a'); +insert into porder4 values ('2024-06-01 10:00:00', 5, 'a'), ('2024-06-01 11:00:00', 6, 'a'); +insert into porder4 values ('2024-06-02 11:00:00', 5, 'a'), ('2024-06-02 10:00:00', 6, 'a'); +insert into porder4 values ('2024-06-02 10:00:00', 5, 'a'), ('2024-06-02 11:00:00', 6, 'a'); +select 'first 5', * from porder4 where c1=5 and c2='a' order by ts limit 1; +select 'first 6', * from porder4 where c1=6 and c2='a' order by ts limit 1; +select 'last 5', * from porder4 where c1=5 and c2='a' order by ts desc limit 1; +select 'last 6', * from porder4 where c1=6 and c2='a' order by ts desc limit 1; +drop table porder4; + +-- negative case: partition by non-atomic function +drop table if exists norder1; +create table norder1 (c1 Int64) engine = CnchMergeTree partition by c1 % 4 order by c1; +insert into norder1 select number from numbers(10); +select * from norder1 order by c1 limit 1; -- { serverError 277 } +select 'norder1'; +select * from norder1 order by c1 limit 1 settings force_read_in_partition_order=0; +drop table norder1; + +-- negative case: sort by non-partition column +drop table if exists norder2; +create table norder2 (c1 Int64, c2 Int64) engine = CnchMergeTree partition by c1 order by c2; +insert into norder2 values (1, 2), (2, 1); +select * from norder2 order by c2; -- { serverError 277 } +select 'norder2'; +select * from norder2 order by c2 settings force_read_in_partition_order=0; +drop table norder2; + +-- negative case: partition by multi-func +drop table if exists norder3; +create table norder3 (ts DateTime) engine = CnchMergeTree partition by (toYYYYMMDD(ts) % 2) order by ts; +insert into norder3 values ('2024-06-01 00:00:00'), ('2024-06-02 00:00:00'); +select * from norder3 order by ts; -- { serverError 277 } +drop table norder3; + +-- negative case: no equal predicate on prefix sort column +drop table if exists norder4; +create table norder4 (c1 Int64, c2 Int64) engine = CnchMergeTree order by (c1, c2) partition by c2; +insert into norder4 select number, 1 from numbers(5); +select * from norder4 order by c2; -- { serverError 277 } +select * from norder4 where c1 < 2 order by c2; -- { serverError 277 } +drop table norder4; From d2a38035b089201a14f9723f15c826ed0880067f Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:17:10 +0000 Subject: [PATCH 118/292] Merge 'cnch_2.2_v2_fix_cherrypick' into 'cnch-2.2' fix(clickhousech@m-4679216622): [cp] fix some issues in catalog v2 See merge request: !23367 # Conflicts: # src/Storages/MergeTree/MergeTreeCloudData.cpp # src/Storages/StorageCloudMergeTree.cpp --- src/CloudServices/CnchWorkerServiceImpl.cpp | 15 +-- src/Common/ProfileEvents.cpp | 6 +- src/MergeTreeCommon/GlobalDataManager.cpp | 6 +- src/MergeTreeCommon/GlobalDataManager.h | 3 +- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 27 ------ src/MergeTreeCommon/MergeTreeMetaBase.h | 15 --- src/MergeTreeCommon/StorageDataManager.cpp | 47 +++++++++- src/MergeTreeCommon/StorageDataManager.h | 8 +- src/MergeTreeCommon/TableVersion.cpp | 32 +++++-- src/MergeTreeCommon/TableVersion.h | 2 + src/MergeTreeCommon/assignCnchParts.cpp | 12 +-- src/MergeTreeCommon/assignCnchParts.h | 2 +- src/Storages/MergeTree/MergeTreeCloudData.cpp | 94 +++++++++++++------ src/Storages/MergeTree/MergeTreeCloudData.h | 13 ++- src/Storages/StorageCloudMergeTree.cpp | 39 +------- src/Storages/StorageCloudMergeTree.h | 2 - 16 files changed, 171 insertions(+), 152 deletions(-) diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index bf4ae5faa80..57ce0be051a 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -55,10 +55,10 @@ #include #include #include -#include #include #include #include +#include #if USE_RDKAFKA # include @@ -796,17 +796,8 @@ void CnchWorkerServiceImpl::sendResources( { WGWorkerInfoPtr worker_info = RPCHelpers::createWorkerInfo(request->worker_info()); UInt64 version = data.table_version(); - ServerDataPartsWithDBM server_parts_with_dbms; - query_context->getGlobalDataManager()->loadDataPartsWithDBM(*cloud_merge_tree, cloud_merge_tree->getStorageUUID(), version, worker_info, server_parts_with_dbms); - size_t server_part_size = server_parts_with_dbms.first.size(); - size_t delete_bitmap_size = server_parts_with_dbms.second.size(); - cloud_merge_tree->loadServerDataPartsWithDBM(std::move(server_parts_with_dbms)); - - LOG_DEBUG( - log, - "Loaded {} server parts and {} delete bitmap for table {} with version {}", - server_part_size, - delete_bitmap_size, + cloud_merge_tree->setDataDescription(std::move(worker_info), version); + LOG_DEBUG(log, "Received table {} with data version {}", cloud_merge_tree->getStorageID().getNameForLogs(), version); } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 6afe834798d..298830c9694 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -1126,7 +1126,11 @@ M(OrcIOSharedBytes, "") \ M(OrcIODirectCount, "") \ M(OrcIODirectBytes, "") \ - M(PreparePartsForReadMilliseconds, "The time spend on loading CNCH part from ServerPart on worker when query with table version") + M(PreparePartsForReadMilliseconds, "The time spend on loading CNCH part from ServerPart on worker when query with table version") \ + M(LoadedServerParts, "Total server parts loaded from storage manager by version") \ + M(LoadServerPartsMilliseconds, "The time spend on loading server parts by version from storage data manager.") \ + M(LoadManifestPartsCacheHits, "Cache(disk) hit count of loading parts from manifest") \ + M(LoadManifestPartsCacheMisses, "Cache(disk) miss count of loading parts from manifest") namespace ProfileEvents { diff --git a/src/MergeTreeCommon/GlobalDataManager.cpp b/src/MergeTreeCommon/GlobalDataManager.cpp index 574d47ca695..e73839f5b25 100644 --- a/src/MergeTreeCommon/GlobalDataManager.cpp +++ b/src/MergeTreeCommon/GlobalDataManager.cpp @@ -13,11 +13,11 @@ void GlobalDataManager::loadDataPartsWithDBM( const UUID & storage_uuid, const UInt64 table_version, const WGWorkerInfoPtr & runtime_worker_info, - ServerDataPartsWithDBM & server_parts) + std::unordered_map & server_parts, + std::vector> & partitions) { auto storage_manager = getStorageDataManager(storage_uuid, runtime_worker_info); - - return storage_manager->loadDataPartsWithDBM(storage, table_version, server_parts); + return storage_manager->loadDataPartsWithDBM(storage, table_version, server_parts, partitions); } StorageDataManagerPtr GlobalDataManager::getStorageDataManager(const UUID & storage_uuid, const WGWorkerInfoPtr & runtime_worker_info) diff --git a/src/MergeTreeCommon/GlobalDataManager.h b/src/MergeTreeCommon/GlobalDataManager.h index 00f9abf6c9a..e22aa2eb43d 100644 --- a/src/MergeTreeCommon/GlobalDataManager.h +++ b/src/MergeTreeCommon/GlobalDataManager.h @@ -15,7 +15,8 @@ class GlobalDataManager : public WithContext const UUID & storage_uuid, const UInt64 table_version, const WGWorkerInfoPtr & runtime_worker_info, - ServerDataPartsWithDBM & server_parts); + std::unordered_map & server_parts, + std::vector> & partitions); private: diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index 8ae7ad520ac..6844528f5c2 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -1049,33 +1049,6 @@ MergeTreeMetaBase::getDataPartsVectorInPartition(MergeTreeMetaBase::DataPartStat data_parts_by_state_and_info.lower_bound(state_with_partition), data_parts_by_state_and_info.upper_bound(state_with_partition)); } -ServerDataPartsVector MergeTreeMetaBase::getServerDataPartsInPartitions(const Strings & required_partitions) -{ - ServerDataPartsVector server_parts; - DeleteBitmapMetaPtrVector delete_bitmaps; - { - auto lock = lockPartsRead(); - for (const String & partition_id : required_partitions) - { - const auto & parts_with_dbm = server_data_parts[partition_id]; - server_parts.insert(server_parts.end(), parts_with_dbm.first.begin(), parts_with_dbm.first.end()); - delete_bitmaps.insert(delete_bitmaps.end(), parts_with_dbm.second.begin(), parts_with_dbm.second.end()); - } - } - auto visible_server_parts = CnchPartsHelper::calcVisibleParts(server_parts, false, CnchPartsHelper::LoggingOption::DisableLogging, true); - - if (getInMemoryMetadataPtr()->hasUniqueKey() && !visible_server_parts.empty()) - getDeleteBitmapMetaForServerParts(visible_server_parts, delete_bitmaps); - - return visible_server_parts; -} - -MergeTreeMetaBase::MergeTreePartitions MergeTreeMetaBase::getAllPartitions() const -{ - auto lock = lockPartsRead(); - return data_partitions; -} - MergeTreeMetaBase::DataParts MergeTreeMetaBase::getDataParts() const { return getDataParts({DataPartState::Committed}); diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h index affa1fe1144..758743d9ff8 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.h +++ b/src/MergeTreeCommon/MergeTreeMetaBase.h @@ -54,9 +54,6 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer using MetaStorePtr = std::shared_ptr; - using MergeTreePartitions = std::vector>; - using ServerDataParts = std::unordered_map; - /// Alter conversions which should be applied on-fly for part. Build from of /// the most recent mutation commands for part. Now we have only rename_map /// here (from ALTER_RENAME) command, because for all other type of alters @@ -223,14 +220,10 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /// Returns all parts in specified partition DataPartsVector getDataPartsVectorInPartition(DataPartState /*state*/, const String & /*partition_id*/) const; - MergeTreePartitions getAllPartitions() const; - /// Returns Committed parts DataParts getDataParts() const; DataPartsVector getDataPartsVector() const; - ServerDataPartsVector getServerDataPartsInPartitions(const Strings & required_partitions); - /// Returns the part with the given name and state or nullptr if no such part. DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); DataPartPtr getPartIfExistsWithoutLock(const String & part_name, const DataPartStates & valid_states); @@ -626,14 +619,6 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer /// Returns default settings for storage with possible changes from global config. virtual std::unique_ptr getDefaultSettings() const = 0; - /// track runtime server parts by partition id. Used when query by table version - MergeTreePartitions data_partitions; - // Server dataparts with delete bitmap. should be protected by data part lock - ServerDataParts server_data_parts; - - mutable std::mutex server_data_mutex; - mutable std::atomic has_server_part_to_load{false}; - private: // Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable. mutable std::set query_id_set; diff --git a/src/MergeTreeCommon/StorageDataManager.cpp b/src/MergeTreeCommon/StorageDataManager.cpp index 7996a008b81..9723d36363f 100644 --- a/src/MergeTreeCommon/StorageDataManager.cpp +++ b/src/MergeTreeCommon/StorageDataManager.cpp @@ -2,9 +2,19 @@ #include +namespace ProfileEvents +{ + extern const Event LoadedServerParts; +} + namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + StorageDataManager::StorageDataManager(const ContextPtr context_,const UUID & uuid_, const WGWorkerInfoPtr & worker_info_ ) : WithContext(context_), storage_uuid(uuid_), @@ -12,20 +22,47 @@ StorageDataManager::StorageDataManager(const ContextPtr context_,const UUID & uu { } -void StorageDataManager::loadDataPartsWithDBM(const MergeTreeMetaBase & storage, const UInt64 & version, ServerDataPartsWithDBM & server_parts) +void StorageDataManager::loadDataPartsWithDBM( + const MergeTreeMetaBase & storage, + const UInt64 & version, + std::unordered_map & res_server_parts, + std::vector> & res_partitions) { auto table_versions_ptr = getRequiredTableVersions(version); - LOG_TRACE(&Poco::Logger::get("StorageDataManager"), "Get required table versions from {} to {}", + LOG_TRACE(log, "Get required table versions from {} to {}", table_versions_ptr.back()->getVersion(), table_versions_ptr.front()->getVersion()); + size_t loaded_parts_count = 0; for (auto it=table_versions_ptr.begin(); itgetAllPartsWithDBM(storage); - server_parts.first.insert(server_parts.first.end(), parts_with_dbm.first.begin(), parts_with_dbm.first.end()); - server_parts.second.insert(server_parts.second.end(), parts_with_dbm.second.begin(), parts_with_dbm.second.end()); + + for (auto & server_part : parts_with_dbm.first) + { + const String & partition_id = server_part->info().partition_id; + auto inner_it = res_server_parts.find(partition_id); + if (inner_it == res_server_parts.end()) + { + // add to result partition list + res_partitions.emplace_back(server_part->part_model_wrapper->partition); + } + res_server_parts[partition_id].first.emplace_back(std::move(server_part)); + loaded_parts_count++; + } + + for (auto & delete_bitmap : parts_with_dbm.second) + { + const String & partition_id = delete_bitmap->getModel()->partition_id(); + if (res_server_parts.find(partition_id) == res_server_parts.end()) + throw Exception("Load delete bitmap mismatch server data part. Its a logic error. ", ErrorCodes::LOGICAL_ERROR); + + res_server_parts[partition_id].second.emplace_back(std::move(delete_bitmap)); + } } + + ProfileEvents::increment(ProfileEvents::LoadedServerParts, loaded_parts_count); } UInt64 StorageDataManager::getLatestVersion() @@ -41,7 +78,7 @@ std::vector StorageDataManager::getRequiredTableVersions(const UInt64 latest_version = getLatestVersion(); if (latest_version < required_version) { - LOG_TRACE(&Poco::Logger::get("StorageDataManager"), "Latest version {} less than required version {}. Will reload table versions.", + LOG_TRACE(log, "Latest version {} less than required version {}. Will reload table versions.", latest_version, required_version); reloadTableVersions(); } diff --git a/src/MergeTreeCommon/StorageDataManager.h b/src/MergeTreeCommon/StorageDataManager.h index 08cc72e34c4..f9e8bc918bc 100644 --- a/src/MergeTreeCommon/StorageDataManager.h +++ b/src/MergeTreeCommon/StorageDataManager.h @@ -13,7 +13,11 @@ class StorageDataManager : public WithContext public: StorageDataManager(const ContextPtr context, const UUID & uuid_, const WGWorkerInfoPtr & worker_info_); - void loadDataPartsWithDBM(const MergeTreeMetaBase & storage, const UInt64 & version, ServerDataPartsWithDBM & server_parts); + void loadDataPartsWithDBM( + const MergeTreeMetaBase & storage, + const UInt64 & version, + std::unordered_map & server_parts, + std::vector> & partitions); WGWorkerInfoPtr getWorkerInfo() const { return worker_info; } @@ -33,6 +37,8 @@ class StorageDataManager : public WithContext WGWorkerInfoPtr worker_info; std::shared_mutex mutex; std::map versions; + + Poco::Logger * log = &Poco::Logger::get("StorageDataManager"); }; using StorageDataManagerPtr = std::shared_ptr; diff --git a/src/MergeTreeCommon/TableVersion.cpp b/src/MergeTreeCommon/TableVersion.cpp index 58543ec7525..5081077f554 100644 --- a/src/MergeTreeCommon/TableVersion.cpp +++ b/src/MergeTreeCommon/TableVersion.cpp @@ -10,6 +10,13 @@ #include #include + +namespace ProfileEvents +{ + extern const Event LoadManifestPartsCacheHits; + extern const Event LoadManifestPartsCacheMisses; +} + namespace DB { @@ -127,8 +134,8 @@ void TableVersion::fileterDataByWorkerInfo(const MergeTreeMetaBase & storage, st String worker_id_prefix = worker_id.substr(0, worker_id.find_last_of('-') + 1); WorkerGroupHandle mock_wg = WorkerGroupHandleImpl::mockWorkerGroupHandle(worker_id_prefix, worker_info->num_workers, getContext()); - // Use the same allocation algorithm as preaload. can work with parts as well as delete bitmap. - auto allocate_res = assignCnchParts(mock_wg, data_vector, getContext(), storage.getSettings()); + // Use consistent hash to make sure the parts with the same basic name are always allocated to the same worker + auto allocate_res = assignCnchParts(mock_wg, data_vector, getContext(), storage.getSettings(), Context::PartAllocator::JUMP_CONSISTENT_HASH); // only get the allocated data which belongs to current worker worker_hold_data = std::move(allocate_res[worker_id]); @@ -179,15 +186,19 @@ void TableVersion::loadManifestData(const MergeTreeMetaBase & storage) { data_parts.swap(loaded_parts); delete_bitmaps.swap(loaded_dbm); - loaded_from_manifest = true; } - - LOG_TRACE(&Poco::Logger::get("TableVersion"), "Loaded {} data parts and {} delete bitmaps from manifest disk cache {}.", - data_parts.size(), - delete_bitmaps.size(), - manifest_seg->getSegmentName()); - return; } + + // Disk may be empty if no server parts assigned to this worker. Then, nothin will be loaded. + LOG_TRACE(log, "Loaded {} data parts and {} delete bitmaps from manifest disk cache {}. Path : {}", + data_parts.size(), + delete_bitmaps.size(), + manifest_seg->getSegmentName(), + segment_path); + + loaded_from_manifest = true; + ProfileEvents::increment(ProfileEvents::LoadManifestPartsCacheHits); + return; } } @@ -216,6 +227,7 @@ void TableVersion::loadManifestData(const MergeTreeMetaBase & storage) loaded_dbm = catalog->getDeleteBitmapsFromManifest(storage, txn_list); } + ProfileEvents::increment(ProfileEvents::LoadManifestPartsCacheMisses); // filter parts by worker info. if (worker_info) { @@ -231,7 +243,7 @@ void TableVersion::loadManifestData(const MergeTreeMetaBase & storage) loaded_from_manifest = true; } - LOG_TRACE(&Poco::Logger::get("TableVersion"), "Loaded {} parts and {} delete bitmap in table version {} from {}.", + LOG_TRACE(log, "Loaded {} parts and {} delete bitmap in table version {} from {}.", data_parts.size(), delete_bitmaps.size(), version, diff --git a/src/MergeTreeCommon/TableVersion.h b/src/MergeTreeCommon/TableVersion.h index a6b2c95590e..bb0da390876 100644 --- a/src/MergeTreeCommon/TableVersion.h +++ b/src/MergeTreeCommon/TableVersion.h @@ -59,6 +59,8 @@ class TableVersion : public std::enable_shared_from_this, public W std::shared_mutex mutex; DataModelPartWrapperVector data_parts; DeleteBitmapMetaPtrVector delete_bitmaps; + + Poco::Logger * log = &Poco::Logger::get("TableVersion"); }; using TableVersionPtr = std::shared_ptr; diff --git a/src/MergeTreeCommon/assignCnchParts.cpp b/src/MergeTreeCommon/assignCnchParts.cpp index 7262b0e64f0..b4ffe7478eb 100644 --- a/src/MergeTreeCommon/assignCnchParts.cpp +++ b/src/MergeTreeCommon/assignCnchParts.cpp @@ -67,16 +67,16 @@ inline void reportStats(Poco::Logger * log, const M & map, const String & name, } /// explicit instantiation for server part and cnch data part. -template ServerAssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const ServerDataPartsVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings); -template AssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const MergeTreeDataPartsCNCHVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings); -template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataModelPartWrapperVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings); -template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DeleteBitmapMetaPtrVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings); +template ServerAssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const ServerDataPartsVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings, std::optional allocator = std::nullopt); +template AssignmentMap assignCnchParts(const WorkerGroupHandle & worker_group, const MergeTreeDataPartsCNCHVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings, std::optional allocator = std::nullopt); +template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataModelPartWrapperVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings, std::optional allocator = std::nullopt); +template std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DeleteBitmapMetaPtrVector &, const ContextPtr & query_context, MergeTreeSettingsPtr settings, std::optional allocator = std::nullopt); template -std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings) +std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & query_context, MergeTreeSettingsPtr settings, std::optional allocator) { static auto * log = &Poco::Logger::get("assignCnchParts"); - Context::PartAllocator part_allocation_algorithm = query_context->getPartAllocationAlgo(settings); + Context::PartAllocator part_allocation_algorithm = allocator.value_or(query_context->getPartAllocationAlgo(settings)); switch (part_allocation_algorithm) { diff --git a/src/MergeTreeCommon/assignCnchParts.h b/src/MergeTreeCommon/assignCnchParts.h index dbccb269345..73dc72c788d 100644 --- a/src/MergeTreeCommon/assignCnchParts.h +++ b/src/MergeTreeCommon/assignCnchParts.h @@ -48,7 +48,7 @@ FilePartsAssignMap assignCnchFileParts(const WorkerGroupHandle & worker_group, c HivePartsAssignMap assignCnchHiveParts(const WorkerGroupHandle & worker_group, const HiveFiles & parts); template -std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & context, MergeTreeSettingsPtr settings); +std::unordered_map assignCnchParts(const WorkerGroupHandle & worker_group, const DataPartsCnchVector & parts, const ContextPtr & context, MergeTreeSettingsPtr settings, std::optional allocator = std::nullopt); /** * splitCnchParts will split server parts into bucketed parts and leftover server parts. diff --git a/src/Storages/MergeTree/MergeTreeCloudData.cpp b/src/Storages/MergeTree/MergeTreeCloudData.cpp index de493a0d515..6974fc5f0ed 100644 --- a/src/Storages/MergeTree/MergeTreeCloudData.cpp +++ b/src/Storages/MergeTree/MergeTreeCloudData.cpp @@ -14,10 +14,19 @@ */ #include +#include +#include #include "Processors/QueryPipeline.h" #include #include +namespace ProfileEvents +{ + extern const Event PrunedPartitions; + extern const Event PreparePartsForReadMilliseconds; + extern const Event LoadServerPartsMilliseconds; +} + namespace DB { @@ -221,49 +230,74 @@ void MergeTreeCloudData::loadDataParts(MutableDataPartsVector & parts, UInt64) LOG_DEBUG(log, "Loaded {} data parts in {} ms", data_parts_indexes.size(), stopwatch.elapsedMilliseconds()); } -void MergeTreeCloudData::loadServerDataPartsWithDBM(ServerDataPartsWithDBM && parts_with_dbm) +void MergeTreeCloudData::setDataDescription(WGWorkerInfoPtr && worker_info_, UInt64 data_version_) { - if (parts_with_dbm.first.empty()) - return; - - size_t part_counter=0, delete_bitmap_counter=0; - auto lock = lockParts(); - for (auto & server_part : parts_with_dbm.first) + // resuse load parts lock + std::lock_guard lock(load_data_parts_mutex); + if (data_version == 0) { - const String & partition_id = server_part->info().partition_id; - auto it = server_data_parts.find(partition_id); - if (it == server_data_parts.end()) - { - // add to partition list - data_partitions.emplace_back(server_part->part_model_wrapper->partition); - // add new server parts vector for this partition - server_data_parts.emplace(partition_id, std::make_pair(ServerDataPartsVector{}, DeleteBitmapMetaPtrVector{})); - } - server_data_parts[partition_id].first.emplace_back(std::move(server_part)); - part_counter++; + worker_info = std::move(worker_info_); + data_version = data_version_; } +} - for (auto & delete_bitmap : parts_with_dbm.second) - { - const String & partition_id = delete_bitmap->getModel()->partition_id(); - if (server_data_parts.find(partition_id) == server_data_parts.end()) - throw Exception("Load delete bitmap mismatch server data part. Its a logic error. ", ErrorCodes::LOGICAL_ERROR); +void MergeTreeCloudData::prepareVersionedPartsForRead(ContextPtr local_context, SelectQueryInfo & query_info, const Names & column_names) +{ + Stopwatch watch; - server_data_parts[partition_id].second.emplace_back(std::move(delete_bitmap)); - delete_bitmap_counter++; - } + std::lock_guard lock(load_data_parts_mutex); + if (data_parts_loaded) + return; + + SCOPE_EXIT_SAFE(data_parts_loaded=true); + + std::unordered_map server_parts_by_partition; + std::vector> partition_list; + //load server parts by data version + local_context->getGlobalDataManager()->loadDataPartsWithDBM(*this, getStorageUUID(), data_version, worker_info, server_parts_by_partition, partition_list); + ProfileEvents::increment(ProfileEvents::LoadServerPartsMilliseconds, watch.elapsedMilliseconds()); - has_server_part_to_load = true; + if (server_parts_by_partition.empty()) + return; + + watch.restart(); + + // load data parts for read + Strings required_partitions = selectPartitionsByPredicate(query_info, partition_list, column_names, local_context); + + size_t loaded_parts_count = loadFromServerPartsInPartition(required_partitions, server_parts_by_partition); + + LOG_DEBUG(log, "Loaded {} server data parts in {} partitions, elapsed: {}ms.", + loaded_parts_count, + required_partitions.size(), + watch.elapsedMilliseconds()); + + ProfileEvents::increment(ProfileEvents::PrunedPartitions, required_partitions.size()); + ProfileEvents::increment(ProfileEvents::PreparePartsForReadMilliseconds, watch.elapsedMilliseconds()); } -size_t MergeTreeCloudData::loadFromServerPartsInPartition(const Strings & required_partitions) +size_t MergeTreeCloudData::loadFromServerPartsInPartition(const Strings & required_partitions, std::unordered_map & server_parts_by_partition) { if (required_partitions.empty()) return 0; - ServerDataPartsVector visible_server_parts = getServerDataPartsInPartitions(required_partitions); - MergeTreeMutableDataPartsVector data_parts; + ServerDataPartsVector server_parts; + DeleteBitmapMetaPtrVector delete_bitmaps; + { + for (const String & partition_id : required_partitions) + { + const auto & parts_with_dbm = server_parts_by_partition[partition_id]; + server_parts.insert(server_parts.end(), parts_with_dbm.first.begin(), parts_with_dbm.first.end()); + delete_bitmaps.insert(delete_bitmaps.end(), parts_with_dbm.second.begin(), parts_with_dbm.second.end()); + } + } + auto visible_server_parts = CnchPartsHelper::calcVisibleParts(server_parts, false, CnchPartsHelper::LoggingOption::DisableLogging, true); + + if (getInMemoryMetadataPtr()->hasUniqueKey() && !visible_server_parts.empty()) + getDeleteBitmapMetaForServerParts(visible_server_parts, delete_bitmaps); + + MergeTreeMutableDataPartsVector data_parts; for (const auto & server_part : visible_server_parts) { auto part = createPartFromModelCommon(*this, *(server_part->part_model_wrapper->part_model)); diff --git a/src/Storages/MergeTree/MergeTreeCloudData.h b/src/Storages/MergeTree/MergeTreeCloudData.h index ffebde648c6..49aec3c3dae 100644 --- a/src/Storages/MergeTree/MergeTreeCloudData.h +++ b/src/Storages/MergeTree/MergeTreeCloudData.h @@ -49,6 +49,9 @@ class MergeTreeCloudData : public MergeTreeMetaBase /// DO NOT check reference count of parts. void unloadOldPartsByTimestamp(Int64 expired_ts); + /// set data description in sendResource stage if query with table version + void setDataDescription(WGWorkerInfoPtr && worker_info_, UInt64 data_version_); + void prepareVersionedPartsForRead(ContextPtr local_context, SelectQueryInfo & query_info, const Names & column_names); protected: void addPreparedPart(MutableDataPartPtr & part, DataPartsLock &); @@ -62,7 +65,7 @@ class MergeTreeCloudData : public MergeTreeMetaBase void deactivateOutdatedParts(); - size_t loadFromServerPartsInPartition(const Strings & required_partitions); + size_t loadFromServerPartsInPartition(const Strings & required_partitions, std::unordered_map & server_parts_by_partition); void loadDataPartsInParallel(MutableDataPartsVector & parts); @@ -79,6 +82,14 @@ class MergeTreeCloudData : public MergeTreeMetaBase std::unique_ptr settings_); ~MergeTreeCloudData() override = default; + + /// guard for loading received data_parts and virtual_data_parts. + std::mutex load_data_parts_mutex; + bool data_parts_loaded{false}; + + // data description for query with table version; + WGWorkerInfoPtr worker_info; + UInt64 data_version {0}; }; } diff --git a/src/Storages/StorageCloudMergeTree.cpp b/src/Storages/StorageCloudMergeTree.cpp index 5736c2b52ef..5d23e1181c5 100644 --- a/src/Storages/StorageCloudMergeTree.cpp +++ b/src/Storages/StorageCloudMergeTree.cpp @@ -117,8 +117,8 @@ void StorageCloudMergeTree::read( size_t max_block_size, unsigned num_streams) { - // need create IMergeTreeDataPart from loaded server parts when query with table version - prepareDataPartsForRead(local_context, query_info, column_names); + if (data_version) + prepareVersionedPartsForRead(local_context, query_info, column_names); if (auto plan = MergeTreeDataSelectExecutor(*this).read( column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage)) @@ -134,9 +134,6 @@ Pipe StorageCloudMergeTree::read( const size_t max_block_size, const unsigned num_streams) { - // need create IMergeTreeDataPart from loaded server parts when query with table version - prepareDataPartsForRead(local_context, query_info, column_names); - QueryPlan plan; read(plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); return plan.convertToPipe( @@ -750,36 +747,4 @@ std::unique_ptr StorageCloudMergeTree::getDefaultSettings() c return std::make_unique(getContext()->getMergeTreeSettings()); } -void StorageCloudMergeTree::prepareDataPartsForRead(ContextPtr local_context, SelectQueryInfo & query_info, const Names & column_names) -{ - Stopwatch watch; - - std::lock_guard lock(server_data_mutex); - - if (!has_server_part_to_load) - return; - - auto partition_list = getAllPartitions(); - - if (partition_list.empty()) - return; - - Strings required_partitions = selectPartitionsByPredicate(query_info, partition_list, column_names, local_context); - - SCOPE_EXIT({ - ProfileEvents::increment(ProfileEvents::PrunedPartitions, required_partitions.size()); - ProfileEvents::increment(ProfileEvents::PreparePartsForReadMilliseconds, watch.elapsedMilliseconds()); - }); - - size_t loaded_parts_count = loadFromServerPartsInPartition(required_partitions); - - /// data part only need to be loaded once - has_server_part_to_load = false; - - LOG_TRACE(log, "Loaded {} data parts in {} partitions elapsed {}ms.", - loaded_parts_count, - required_partitions.size(), - watch.elapsedMilliseconds()); -} - } diff --git a/src/Storages/StorageCloudMergeTree.h b/src/Storages/StorageCloudMergeTree.h index f84d60708bc..2b09a27619f 100644 --- a/src/Storages/StorageCloudMergeTree.h +++ b/src/Storages/StorageCloudMergeTree.h @@ -131,8 +131,6 @@ class StorageCloudMergeTree : public shared_ptr_helper, p const String cnch_table_name; private: - void prepareDataPartsForRead(ContextPtr local_context, SelectQueryInfo & query_info, const Names & column_names); - // Relative path to auxility storage disk root String relative_auxility_storage_path; From f66840259a49059d1c7a4b394a7a02dd615b1639 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:17:37 +0000 Subject: [PATCH 119/292] Merge 'cherry-pick-337e26ca-2' into 'cnch-2.2' fix(clickhousech@m-4656018162): [cp] cnch 2.2 fix map comparision See merge request: !23486 --- src/Interpreters/convertFieldToType.cpp | 37 ++++++++++--------- .../02682_map_bug_sqls.reference | 1 + .../4_cnch_stateless/02682_map_bug_sqls.sql | 4 ++ 3 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/02682_map_bug_sqls.reference create mode 100644 tests/queries/4_cnch_stateless/02682_map_bug_sqls.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index fd3e0c37973..61b06c160fe 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -399,29 +399,32 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } else if (const DataTypeMap * type_map = typeid_cast(&type)) { - const auto & key_type = *type_map->getKeyType(); - const auto & value_type = *type_map->getValueType(); + if (src.getType() == Field::Types::Map) + { + const auto & key_type = *type_map->getKeyType(); + const auto & value_type = *type_map->getValueType(); - const auto & map = src.get(); - size_t map_size = map.size(); + const auto & map = src.get(); + size_t map_size = map.size(); - Map res(map_size); + Map res(map_size); - bool have_unconvertible_element = false; - for (size_t i = 0; i < map_size; ++i) - { - const auto & key = map[i].first; - const auto & value = map[i].second; + bool have_unconvertible_element = false; + for (size_t i = 0; i < map_size; ++i) + { + const auto & key = map[i].first; + const auto & value = map[i].second; - res[i] = {convertFieldToType(key, key_type), convertFieldToType(value, value_type)}; - if (res[i].first.isNull() && !key_type.isNullable()) - have_unconvertible_element = true; + res[i] = {convertFieldToType(key, key_type), convertFieldToType(value, value_type)}; + if (res[i].first.isNull() && !key_type.isNullable()) + have_unconvertible_element = true; - if (res[i].second.isNull() && !value_type.isNullable()) - have_unconvertible_element = true; - } + if (res[i].second.isNull() && !value_type.isNullable()) + have_unconvertible_element = true; + } - return have_unconvertible_element ? Field(Null()) : Field(res); + return have_unconvertible_element ? Field(Null()) : Field(res); + } } else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast(&type)) { diff --git a/tests/queries/4_cnch_stateless/02682_map_bug_sqls.reference b/tests/queries/4_cnch_stateless/02682_map_bug_sqls.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/4_cnch_stateless/02682_map_bug_sqls.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/4_cnch_stateless/02682_map_bug_sqls.sql b/tests/queries/4_cnch_stateless/02682_map_bug_sqls.sql new file mode 100644 index 00000000000..9196741e62c --- /dev/null +++ b/tests/queries/4_cnch_stateless/02682_map_bug_sqls.sql @@ -0,0 +1,4 @@ +-- Type checks in comparision. If using const string, will try to deserialize to corrosponding types +SELECT map('a', 'b') = ''; -- { serverError 27 } +SELECT map('a', 'b') = '{\'a\': \'b\'}'; +SELECT map('a', 'b') = materialize('{\'a\': \'b\'}'); -- { serverError 386 } From 658ebea868e41fb6be30f29d35fc2d79ab3e27cc Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:17:54 +0000 Subject: [PATCH 120/292] Merge 'fix_interpreter_optimizer_execute_nullptr-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4770113765): fix interpreter optimizer execute nullptr cnch 2.2 See merge request: !23491 --- src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp index 4936d1021f9..aebc2d02d0a 100644 --- a/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp +++ b/src/Interpreters/InterpreterSelectQueryUseOptimizer.cpp @@ -554,7 +554,7 @@ BlockIO InterpreterSelectQueryUseOptimizer::readFromQueryCache(ContextPtr local_ BlockIO InterpreterSelectQueryUseOptimizer::execute() { - if (auto * create_prepared = query_ptr->as()) + if (query_ptr && query_ptr->as()) { // if (!create_prepared->cluster.empty()) // return executeDDLQueryOnCluster(query_ptr, context); From baca783a4287cd2ea748ed91311896118466f069 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:18:11 +0000 Subject: [PATCH 121/292] Merge 'dog-cnch-2.2-fix-histogram' into 'cnch-2.2' fix(optimizer@m-4717122209): statistics_expand_to_current handle only tail of histogram See merge request: !23462 --- src/Core/Settings.h | 3 +- src/Statistics/StatisticsCollector.cpp | 81 ++++++---- .../45023_expand_stats.reference | 141 ++++++++++++++++++ .../45023_expand_stats.sql | 37 +++++ .../45023_expand_stats_no_hist.sql | 2 + 5 files changed, 238 insertions(+), 26 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cba6346fc04..7c55cf6c0bf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1589,9 +1589,10 @@ enum PreloadLevelSettings : UInt64 M(Bool, statistics_simplify_histogram, false, "Reduce buckets of histogram with simplifying", 0) \ M(Float, statistics_simplify_histogram_ndv_density_threshold, 0.2, "Histogram simplifying threshold for ndv", 0) \ M(Float, statistics_simplify_histogram_range_density_threshold, 0.2, "Histogram simplifying threshold for range", 0) \ - M(Bool, statistics_expand_to_current, false, "Expand Date/Date32/DateTime/DateTime64 columns stats to current timestamp", 0) \ + M(Bool, statistics_expand_to_current, true, "Expand Date/Date32/DateTime/DateTime64 columns stats to current timestamp", 0) \ M(UInt64, statistics_current_timestamp, 0, "Timestamp used for statistics_expand_to_current, 0 to use now(), for testing purpose", 0) \ M(UInt64, statistics_expand_to_current_threshold_days, 31, "If abs(stats_timestamp - stats_column_max) is within this threshold, we will expand this column", 0) \ + M(Float, statistics_expand_to_current_histogram_ratio, 0.10, "For histogram, only expand last buckets containing rows with this ratio", 0) \ M(StatisticsCachePolicy, statistics_cache_policy, StatisticsCachePolicy::Default, "Cache policy for stats command and SQLs: (default|cache|catalog)", 0) \ M(Bool, statistics_query_cnch_parts_for_row_count, true, "Use cnch parts instead of count(*) for row count to speed up test", 0) \ /** Optimizer relative settings, cost model and estimation */ \ diff --git a/src/Statistics/StatisticsCollector.cpp b/src/Statistics/StatisticsCollector.cpp index 54016bce03e..2fe3e37adee 100644 --- a/src/Statistics/StatisticsCollector.cpp +++ b/src/Statistics/StatisticsCollector.cpp @@ -34,15 +34,16 @@ #include #include #include "Storages/ColumnsDescription.h" -#include #include +#include +#include namespace DB::Statistics { StorageMetadataPtr getStorageMetaData(ContextPtr context, StoragePtr storage) { - (void) context; + (void)context; StorageMetadataPtr storage_metadata = storage->getInMemoryMetadataPtr(); // for Cnch, no need to workaround Distribtued return storage_metadata; @@ -187,7 +188,7 @@ PlanNodeStatisticsPtr expandToCurrent(ContextPtr context, PlanNodeStatisticsPtr auto date_helper = TypeHelper{.now = datetime_helper.now / (24 * 3600), .skip = datetime_helper.skip / (24 * 3600)}; std::set modified_cols; - auto all_scale_factor = 0.0; + UInt64 all_delta_count = 0; for (auto & [col_name, symbol] : plan_node_stats->getSymbolStatistics()) { auto type = symbol->getType(); @@ -200,9 +201,8 @@ PlanNodeStatisticsPtr expandToCurrent(ContextPtr context, PlanNodeStatisticsPtr { auto helper = is_date ? date_helper : datetime_helper; - auto stats_min = symbol_stats->getMin(); auto stats_max = symbol_stats->getMax(); - if (std::isnan(stats_min) || std::isnan(stats_max)) + if (std::isnan(stats_max)) { continue; } @@ -219,27 +219,58 @@ PlanNodeStatisticsPtr expandToCurrent(ContextPtr context, PlanNodeStatisticsPtr continue; } - // +1 to avoid inf - auto scale_factor = (helper.now - stats_min + 1) / (stats_max - stats_min + 1); - - // we hard code scale_factor limit to 2, to avoid unexpected situations - // like some column stores a fixed old date_time - scale_factor = std::min(scale_factor, 2.0); - - // we just set max here, scale it in next step - symbol_stats->setMax(helper.now); auto & histogram = symbol_stats->getHistogram(); - if (!histogram.empty()) + UInt64 delta_count = 0; + if (histogram.empty()) + { + auto stats_min = symbol_stats->getMin(); + if (isnan(stats_min)) + continue; + // +1 to avoid inf + auto expand_factor = (helper.now - stats_max) / (stats_max - stats_min + 1); + + // we hard code expand_factor limit to 1, to avoid unexpected situations + // like some column stores a fixed old date_time + expand_factor = std::min(expand_factor, 1.0); + + // we just set max here, scale it in next step + symbol_stats->setNdv(symbol_stats->getNdv() * (1 + expand_factor)); + symbol_stats->setMax(helper.now); + delta_count = expand_factor * full_count; + } + else { - auto count = (scale_factor - 1) * (full_count - symbol_stats->getNullsCount()); - auto ndv = (scale_factor - 1) * symbol_stats->getNdv(); - auto new_bucket = Bucket(stats_max, helper.now, ndv, count, false, true); - histogram.emplaceBackBucket(new_bucket); + double hist_ratio = context->getSettingsRef().statistics_expand_to_current_histogram_ratio; + UInt64 bucket_id = static_cast(histogram.getBucketSize() * (1 - hist_ratio)); + + if (bucket_id == histogram.getBucketSize()) + { + --bucket_id; + } + + auto stats_min = histogram.getBucket(bucket_id)->getLowerBound(); + UInt64 tail_hist_count = 0; + UInt64 tail_hist_ndv = 0; + for(auto i = bucket_id; i < histogram.getBucketSize(); ++i) + { + auto bucket = histogram.getBuckets()[i]; + tail_hist_count += bucket.getCount(); + tail_hist_ndv += bucket.getNumDistinct(); + } + auto local_expand_factor = 1.0 * (helper.now - stats_max) / (stats_max - stats_min + 1); + local_expand_factor = std::min(1.0 * full_count / tail_hist_count, local_expand_factor); + auto local_count = local_expand_factor * tail_hist_count; + auto local_ndv = local_expand_factor * tail_hist_ndv; + auto new_bucket = Bucket(stats_max, helper.now, local_ndv, local_count, false, true); + histogram.emplaceBackBucket(std::move(new_bucket)); + + delta_count = local_count; + symbol_stats->setMax(helper.now); + symbol_stats->setNdv(symbol_stats->getNdv() + local_ndv); } - symbol_stats->setNdv(symbol_stats->getNdv() * scale_factor); modified_cols.insert(col_name); - all_scale_factor = std::max(all_scale_factor, scale_factor); + all_delta_count = std::max(all_delta_count, delta_count); } } @@ -248,14 +279,14 @@ PlanNodeStatisticsPtr expandToCurrent(ContextPtr context, PlanNodeStatisticsPtr return plan_node_stats; } - if (all_scale_factor == 0.0) + if (all_delta_count == 0) { return plan_node_stats; } - auto old_count = plan_node_stats->getRowCount(); - plan_node_stats->updateRowCount(old_count * all_scale_factor); + plan_node_stats->updateRowCount(full_count + all_delta_count); + auto scale_factor = 1.0 * (full_count + all_delta_count) / full_count; for (auto & [col, symbol] : plan_node_stats->getSymbolStatistics()) { if (modified_cols.count(col)) @@ -265,7 +296,7 @@ PlanNodeStatisticsPtr expandToCurrent(ContextPtr context, PlanNodeStatisticsPtr // NOTE: we have made sure selectivity can be larger than 1 using this API // NOTE: future improvement should consider this scenario - symbol = symbol->applySelectivity(all_scale_factor, 1); + symbol = symbol->applySelectivity(scale_factor, 1); } return plan_node_stats; diff --git a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.reference b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.reference index 3da406587cd..406fc6e31b6 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.reference @@ -139,3 +139,144 @@ Projection Est. 1 rows, cost 6.516680e+02 └─ Projection Est. 1 rows, cost 6.512740e+02 └─ Filter Est. 1 rows, cost 6.512000e+02 └─ TableScan default.tb Est. 800 rows, cost 5.920000e+02 +tb 9 801 +tb.* 801 +tb.id UInt64 801 0 100 0 99 1 +tb.date Date 801 0 101 0 19856.513888888891 1 +tb.date32 Date32 801 0 101 -25567 19856.513888888891 1 +tb.datetime DateTime 801 0 101 0 1715602800 1 +tb.datetime64 DateTime64(3) 801 0 101 0 1715602800 1 +tb.old_date Date 801 0 101 0 18252 1 +tb.old_date32 Date32 801 0 101 -25567 18252 1 +tb.old_datetime DateTime 801 0 101 0 1577825990 1 +tb.old_datetime64 DateTime64(3) 801 0 101 0 1577825990 1 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 7 rows, cost 6.543300e+02 +└─ Gather Exchange Est. 7 rows, cost 6.538120e+02 + └─ Projection Est. 7 rows, cost 6.525320e+02 + └─ Filter Est. 7 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 7 rows, cost 6.543300e+02 +└─ Gather Exchange Est. 7 rows, cost 6.538120e+02 + └─ Projection Est. 7 rows, cost 6.525320e+02 + └─ Filter Est. 7 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 7 rows, cost 6.543300e+02 +└─ Gather Exchange Est. 7 rows, cost 6.538120e+02 + └─ Projection Est. 7 rows, cost 6.525320e+02 + └─ Filter Est. 7 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 7 rows, cost 6.543300e+02 +└─ Gather Exchange Est. 7 rows, cost 6.538120e+02 + └─ Projection Est. 7 rows, cost 6.525320e+02 + └─ Filter Est. 7 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +tb.* 888 +tb.id UInt64 888 0 100 0 99 1 +tb.date Date 888 0 111 0 19856.513888888891 1 +tb.date32 Date32 888 0 111 -25567 19856.513888888891 1 +tb.datetime DateTime 888 0 111 0 1715602800 1 +tb.datetime64 DateTime64(3) 888 0 111 0 1715602800 1 +tb.old_date Date 888 0 101 0 18252 1 +tb.old_date32 Date32 888 0 101 -25567 18252 1 +tb.old_datetime DateTime 888 0 101 0 1577825990 1 +tb.old_datetime64 DateTime64(3) 888 0 101 0 1577825990 1 +Projection Est. 43 rows, cost 7.337940e+02 +└─ Gather Exchange Est. 43 rows, cost 7.306120e+02 + └─ Projection Est. 43 rows, cost 7.235720e+02 + └─ Filter Est. 43 rows, cost 7.203900e+02 + └─ TableScan default.tb Est. 885 rows, cost 6.549000e+02 +Projection Est. 7 rows, cost 7.227060e+02 +└─ Gather Exchange Est. 7 rows, cost 7.221880e+02 + └─ Projection Est. 7 rows, cost 7.209080e+02 + └─ Filter Est. 7 rows, cost 7.203900e+02 + └─ TableScan default.tb Est. 885 rows, cost 6.549000e+02 +Projection Est. 43 rows, cost 7.337940e+02 +└─ Gather Exchange Est. 43 rows, cost 7.306120e+02 + └─ Projection Est. 43 rows, cost 7.235720e+02 + └─ Filter Est. 43 rows, cost 7.203900e+02 + └─ TableScan default.tb Est. 885 rows, cost 6.549000e+02 +Projection Est. 7 rows, cost 7.227060e+02 +└─ Gather Exchange Est. 7 rows, cost 7.221880e+02 + └─ Projection Est. 7 rows, cost 7.209080e+02 + └─ Filter Est. 7 rows, cost 7.203900e+02 + └─ TableScan default.tb Est. 885 rows, cost 6.549000e+02 +Projection Est. 42 rows, cost 7.359280e+02 +└─ Gather Exchange Est. 42 rows, cost 7.328200e+02 + └─ Projection Est. 42 rows, cost 7.259400e+02 + └─ Filter Est. 42 rows, cost 7.228320e+02 + └─ TableScan default.tb Est. 888 rows, cost 6.571200e+02 +Projection Est. 7 rows, cost 7.251480e+02 +└─ Gather Exchange Est. 7 rows, cost 7.246300e+02 + └─ Projection Est. 7 rows, cost 7.233500e+02 + └─ Filter Est. 7 rows, cost 7.228320e+02 + └─ TableScan default.tb Est. 888 rows, cost 6.571200e+02 +Projection Est. 42 rows, cost 7.359280e+02 +└─ Gather Exchange Est. 42 rows, cost 7.328200e+02 + └─ Projection Est. 42 rows, cost 7.259400e+02 + └─ Filter Est. 42 rows, cost 7.228320e+02 + └─ TableScan default.tb Est. 888 rows, cost 6.571200e+02 +Projection Est. 7 rows, cost 7.251480e+02 +└─ Gather Exchange Est. 7 rows, cost 7.246300e+02 + └─ Projection Est. 7 rows, cost 7.233500e+02 + └─ Filter Est. 7 rows, cost 7.228320e+02 + └─ TableScan default.tb Est. 888 rows, cost 6.571200e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 +Projection Est. 1 rows, cost 6.524820e+02 +└─ Gather Exchange Est. 1 rows, cost 6.524080e+02 + └─ Projection Est. 1 rows, cost 6.520880e+02 + └─ Filter Est. 1 rows, cost 6.520140e+02 + └─ TableScan default.tb Est. 801 rows, cost 5.927400e+02 diff --git a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.sql b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.sql index becead46cdd..04c9f1415a9 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats.sql @@ -39,6 +39,7 @@ insert into tb select * from tb; insert into tb select * from tb; insert into tb select * from tb; +set statistics_expand_to_current=0; create stats tb; show stats tb; explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; @@ -57,6 +58,7 @@ explain stats=1, verbose=0 select id from tb where old_datetime64 > toDateTime(' set statistics_expand_to_current=1; +set statistics_expand_to_current_histogram_ratio=1; show stats tb; explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; explain stats=1, verbose=0 select id from tb where date = toDate('2024-05-13'); @@ -71,3 +73,38 @@ explain stats=1, verbose=0 select id from tb where old_date32 > toDate('2024-05- explain stats=1, verbose=0 select id from tb where old_datetime > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); explain stats=1, verbose=0 select id from tb where old_datetime64 > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); + +-- insert default data so that estimator will be affected +insert into tb(id) select 0; +create stats tb; +show stats tb; +explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where date = toDate('2024-05-13'); +explain stats=1, verbose=0 select id from tb where date32 > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where date32 = toDate('2024-05-13'); +explain stats=1, verbose=0 select id from tb where datetime > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where datetime = toDateTime('2024-05-13 15:20:00'); +explain stats=1, verbose=0 select id from tb where datetime64 > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where datetime64 = toDateTime('2024-05-13 15:20:00'); +explain stats=1, verbose=0 select id from tb where old_date > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where old_date32 > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where old_datetime > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where old_datetime64 > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); + +set statistics_expand_to_current_histogram_ratio=0.1; +show stats tb; +explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where date = toDate('2024-05-13'); +explain stats=1, verbose=0 select id from tb where date32 > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where date32 = toDate('2024-05-13'); +explain stats=1, verbose=0 select id from tb where datetime > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where datetime = toDateTime('2024-05-13 15:20:00'); +explain stats=1, verbose=0 select id from tb where datetime64 > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where datetime64 = toDateTime('2024-05-13 15:20:00'); +explain stats=1, verbose=0 select id from tb where old_date > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where old_date32 > toDate('2024-05-13') - 5; +explain stats=1, verbose=0 select id from tb where old_datetime > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); +explain stats=1, verbose=0 select id from tb where old_datetime64 > toDateTime('2024-05-13 15:20:00') - toIntervalDay(5); + + + diff --git a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats_no_hist.sql b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats_no_hist.sql index 85f1f72e2b1..d73e9a28307 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats_no_hist.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/45023_expand_stats_no_hist.sql @@ -40,6 +40,7 @@ insert into tb select * from tb; insert into tb select * from tb; insert into tb select * from tb; +set statistics_expand_to_current=0; create stats tb; show stats tb; explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; @@ -58,6 +59,7 @@ explain stats=1, verbose=0 select id from tb where old_datetime64 > toDateTime(' set statistics_expand_to_current=1; +set statistics_expand_to_current_histogram_ratio=1; show stats tb; explain stats=1, verbose=0 select id from tb where date > toDate('2024-05-13') - 5; explain stats=1, verbose=0 select id from tb where date = toDate('2024-05-13'); From 78f830b46c7ec4033ea0fbf57310833b0d93fa58 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:18:26 +0000 Subject: [PATCH 122/292] Merge 'cherry-pick-mr-23481' into 'cnch-2.2' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix(clickhousech@m-4691895692): 【CP】fix cache core for reorder See merge request: !23511 --- .../SourceFromIntermediateResultCache.h | 7 +++++-- .../AggregatingStreamingTransform.cpp | 11 ++++++---- .../IntermediateResultCacheTransform.cpp | 7 +++++-- src/QueryPlan/IntermediateResultCacheStep.cpp | 8 ++++++++ ...ntermediate_result_cache_reorder.reference | 6 ++++++ ...0102_intermediate_result_cache_reorder.sql | 20 +++++++++++++++++++ 6 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.reference create mode 100644 tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.sql diff --git a/src/Processors/Sources/SourceFromIntermediateResultCache.h b/src/Processors/Sources/SourceFromIntermediateResultCache.h index 19f36628d5d..c0f8f6b8860 100644 --- a/src/Processors/Sources/SourceFromIntermediateResultCache.h +++ b/src/Processors/Sources/SourceFromIntermediateResultCache.h @@ -30,9 +30,12 @@ class SourceFromIntermediateResultCache : public ISource if (!chunk.empty()) { size_t num_columns = chunk.getNumColumns(); - auto columns = chunk.detachColumns(); + size_t num_rows = chunk.getNumRows(); + auto cache_columns = chunk.detachColumns(); + Columns output_columns(num_columns); for (size_t i = 0; i < num_columns; ++i) - chunk.addColumn(std::move(columns[cache_pos_to_output_pos[i]])); + output_columns[cache_pos_to_output_pos[i]] = std::move(cache_columns[i]); + chunk.setColumns(std::move(output_columns), num_rows); return chunk; } else diff --git a/src/Processors/Transforms/AggregatingStreamingTransform.cpp b/src/Processors/Transforms/AggregatingStreamingTransform.cpp index 129d2ac9897..dd35c00424c 100644 --- a/src/Processors/Transforms/AggregatingStreamingTransform.cpp +++ b/src/Processors/Transforms/AggregatingStreamingTransform.cpp @@ -110,7 +110,8 @@ ISimpleTransform::Status AggregatingStreamingTransform::prepare() /// To do this, we pass a block with zero rows to aggregate. if (params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { - params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); + params->aggregator.executeOnBlock( + getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); has_left = true; start_generated = true; return Status::Ready; @@ -172,12 +173,12 @@ void AggregatingStreamingTransform::transform(DB::Chunk & chunk) if (!is_generated) { generate(chunk); - is_generated = true; + if (!is_without_key) + is_generated = true; return; } - output_data.chunk = std::move(chunks[chunk_idx++]); - output.pushData(std::move(output_data)); + chunk = std::move(chunks[chunk_idx++]); has_left = chunk_idx != chunks.size(); } return; @@ -278,5 +279,7 @@ void AggregatingStreamingTransform::generate(DB::Chunk & chunk) chunk = std::move(chunks[chunk_idx++]); rows_returned += chunk.getNumRows(); has_left = chunk_idx != chunks.size(); + LOG_TRACE( + log, "{} blocks generate, {} chunks remain, {} rows return", blocks_list.size(), chunks.size() - chunk_idx, chunk.getNumRows()); } } diff --git a/src/Processors/Transforms/IntermediateResultCacheTransform.cpp b/src/Processors/Transforms/IntermediateResultCacheTransform.cpp index ef1b11db845..d4a777eab84 100644 --- a/src/Processors/Transforms/IntermediateResultCacheTransform.cpp +++ b/src/Processors/Transforms/IntermediateResultCacheTransform.cpp @@ -79,9 +79,12 @@ void IntermediateResultCacheTransform::transform(DB::Chunk & chunk) auto cache_chunk = chunk.clone(); size_t num_columns = cache_chunk.getNumColumns(); - auto columns = cache_chunk.detachColumns(); + size_t num_rows = cache_chunk.getNumRows(); + auto output_columns = cache_chunk.detachColumns(); + Columns cache_columns(num_columns); for (size_t i = 0; i < num_columns; ++i) - cache_chunk.addColumn(std::move(columns[cache_param.output_pos_to_cache_pos[i]])); + cache_columns[cache_param.output_pos_to_cache_pos[i]] = std::move(output_columns[i]); + cache_chunk.setColumns(std::move(cache_columns), num_rows); if (value) value->addChunk(cache_chunk); diff --git a/src/QueryPlan/IntermediateResultCacheStep.cpp b/src/QueryPlan/IntermediateResultCacheStep.cpp index 3d2c97033f6..2ea8e90c469 100644 --- a/src/QueryPlan/IntermediateResultCacheStep.cpp +++ b/src/QueryPlan/IntermediateResultCacheStep.cpp @@ -42,6 +42,14 @@ QueryPipelinePtr IntermediateResultCacheStep::processCacheTransform( if (!cache) return std::move(pipelines[0]); + LOG_DEBUG( + log, + "process cache transform for digest:{}, write:{}, read:{}, all_part_in_cache:{}", + cache_param.digest, + cache_holder->write_cache.size(), + cache_holder->read_cache.size(), + cache_holder->all_part_in_cache); + const auto & settings = build_settings.context->getSettingsRef(); // write cache or skip pipeline if (!cache_holder->write_cache.empty() || cache_holder->all_part_in_cache) diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.reference b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.reference new file mode 100644 index 00000000000..d9a35dfe7c9 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.reference @@ -0,0 +1,6 @@ +3 5 2 +5 2 3 5 +5 3 2 5 +2 3 5 +3 2 5 +3 5 2 diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.sql b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.sql new file mode 100644 index 00000000000..21b8c158dcf --- /dev/null +++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_reorder.sql @@ -0,0 +1,20 @@ +set enable_optimizer=1; +set wait_intermediate_result_cache=0; +set enable_optimizer_fallback=0; +set enable_intermediate_result_cache=1; + +DROP TABLE if exists reorder_cache_table_all; + +CREATE TABLE reorder_cache_table_all(c1 UInt64, c2 String) ENGINE = CnchMergeTree ORDER BY c1; + +insert into reorder_cache_table_all values (1, 'a'), (2, 'b'), (2, 'c'); + +select countDistinct(c2), sum(c1), countDistinct(c1) from reorder_cache_table_all; + +select sum(c1), countDistinct(c1), countDistinct(c2), sum(c1) from reorder_cache_table_all settings max_bytes_to_read = 1; +select sum(c1), countDistinct(c2), countDistinct(c1), sum(c1) from reorder_cache_table_all settings max_bytes_to_read = 1; +select countDistinct(c1), countDistinct(c2), sum(c1) from reorder_cache_table_all settings max_bytes_to_read = 1; +select countDistinct(c2), countDistinct(c1), sum(c1) from reorder_cache_table_all settings max_bytes_to_read = 1; +select countDistinct(c2), sum(c1), countDistinct(c1) from reorder_cache_table_all settings max_bytes_to_read = 1; + +DROP TABLE reorder_cache_table_all; From 9261bfb7b53b5adda0cedf673ec1c98d0190c1ce Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:19:07 +0000 Subject: [PATCH 123/292] Merge branch 'cherry-pick-08b93a57' into 'cnch-2.2' fix(clickhousech@m-4717110049): fix hualloc config reload See merge request dp/ClickHouse!23523 --- contrib/hualloc/hu_alloc.cpp | 4 ++-- contrib/hualloc/hu_alloc.h | 2 +- programs/server/Server.cpp | 46 ++++++++++++++++++++---------------- src/Common/HuAllocator.h | 16 ++++++++----- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/contrib/hualloc/hu_alloc.cpp b/contrib/hualloc/hu_alloc.cpp index 0a2f598a9c6..44093081bd1 100644 --- a/contrib/hualloc/hu_alloc.cpp +++ b/contrib/hualloc/hu_alloc.cpp @@ -71,7 +71,7 @@ void* ReclaimThread(void *args) { // keep & max can be separate for large & segment spaces const char * sleep_second = std::getenv("HUALLOC_CLAIM_INTERVAL"); - int sleep = 3; + int sleep = 1; try { if (sleep_second && std::strlen(sleep_second) > 0) @@ -79,7 +79,7 @@ void* ReclaimThread(void *args) } catch(...) { - sleep = 3; + sleep = 1; } yint cached = *(yint *) args; diff --git a/contrib/hualloc/hu_alloc.h b/contrib/hualloc/hu_alloc.h index 036b3bcd47b..2a0b5484959 100644 --- a/contrib/hualloc/hu_alloc.h +++ b/contrib/hualloc/hu_alloc.h @@ -1843,7 +1843,7 @@ static void DumpLocalAllocMasksLocked(char *segment) //////////////////////////////////////////////////////////////////////////////////////////////////////// static yint ReclaimKeepSize = 1 * 1024 * (1ull << 20); -static yint ReclaimMaxReclaim = 512 * (1ull << 20); +static yint ReclaimMaxReclaim = 1* 1024 * (1ull << 20); static void hu_init() { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index af4a77245c7..d07b70032b5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1021,27 +1021,31 @@ int Server::main(const std::vector & /*args*/) #if USE_HUALLOC if (config->getBool("hualloc_numa_aware", false)) { - size_t max_numa_node = SystemUtils::getMaxNumaNode(); - std::vector numa_nodes_cpu_mask = SystemUtils::getNumaNodesCpuMask(); - bool hualloc_enable_mbind = config->getBool("hualloc_enable_mbind", false); - int mbind_mode = config->getInt("hualloc_mbind_mode", 1); - - /* - *mbind mode - #define MPOL_DEFAULT 0 - #define MPOL_PREFERRED 1 - #define MPOL_BIND 2 - #define MPOL_INTERLEAVE 3 - #define MPOL_LOCAL 4 - #define MPOL_MAX 5 - */ - huallocSetNumaInfo( - max_numa_node, - numa_nodes_cpu_mask, - hualloc_enable_mbind, - mbind_mode, - huallocLogPrint - ); + static std::once_flag numa_aware_init_flag; + std::call_once(numa_aware_init_flag, [&]() + { + size_t max_numa_node = SystemUtils::getMaxNumaNode(); + std::vector numa_nodes_cpu_mask = SystemUtils::getNumaNodesCpuMask(); + bool hualloc_enable_mbind = config->getBool("hualloc_enable_mbind", false); + int mbind_mode = config->getInt("hualloc_mbind_mode", 1); + + /* + *mbind mode + #define MPOL_DEFAULT 0 + #define MPOL_PREFERRED 1 + #define MPOL_BIND 2 + #define MPOL_INTERLEAVE 3 + #define MPOL_LOCAL 4 + #define MPOL_MAX 5 + */ + huallocSetNumaInfo( + max_numa_node, + numa_nodes_cpu_mask, + hualloc_enable_mbind, + mbind_mode, + huallocLogPrint + ); + }); } double default_hualloc_cache_ratio = config->getDouble("hualloc_cache_ratio", 0.25); diff --git a/src/Common/HuAllocator.h b/src/Common/HuAllocator.h index 7f627808917..5a4ea1e84d3 100644 --- a/src/Common/HuAllocator.h +++ b/src/Common/HuAllocator.h @@ -121,12 +121,16 @@ class HuAllocator static void InitHuAlloc(size_t cached) { - hu_check_init_w(); - pthread_t tid; - size_t use_cache = cached / 2; - if (use_cache <= 0) - use_cache = 1024 * (1ull << 20); /// If not set properly use 1G as default - pthread_create(&tid, nullptr, ReclaimThread, &use_cache); + static std::once_flag hualloc_init_flag; + std::call_once(hualloc_init_flag, [&]() + { + hu_check_init_w(); + pthread_t tid; + size_t use_cache = cached / 2; + if (use_cache <= 0) + use_cache = 1024 * (1ull << 20); /// If not set properly use 1G as default + pthread_create(&tid, nullptr, ReclaimThread, &use_cache); + }); } protected: From efd695fea2710c04b648fb8acfec3d755330ccb7 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:19:24 +0000 Subject: [PATCH 124/292] Merge 'cherry-pick-b6702db1' into 'cnch-2.2' fix(clickhousech@m-4679209062): [cp] flatten parts after calculating new part names See merge request: !23528 --- src/CloudServices/CnchMergeMutateThread.cpp | 104 ++++++++++++------ src/CloudServices/CnchMergeMutateThread.h | 7 +- src/Common/ErrorCodes.cpp | 2 + src/WorkerTasks/ManipulationTaskParams.cpp | 42 +++++-- src/WorkerTasks/ManipulationTaskParams.h | 5 +- .../00718_low_cardinality_alter.sql | 28 ++--- 6 files changed, 121 insertions(+), 67 deletions(-) diff --git a/src/CloudServices/CnchMergeMutateThread.cpp b/src/CloudServices/CnchMergeMutateThread.cpp index 6bcd50d641a..87360a13d72 100644 --- a/src/CloudServices/CnchMergeMutateThread.cpp +++ b/src/CloudServices/CnchMergeMutateThread.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -137,7 +138,7 @@ namespace } /// We maintain merging_mutating_parts based on the merge task's lifecycle. -/// Source parts are added to merging_mutating_parts when task is created, see FutureManipulationTask::assignSourceParts. +/// Source parts are added to merging_mutating_parts when task is created, see FutureManipulationTask::tagSourceParts. /// And they are removed from merging_mutating_parts when task record is destroyed. /// As the merge txn is committed in a 2-phase style, we need to hold the task record until txn phase-2 finish (success or fail). /// * phase 1 - ::finishTask is called. We mark the task record as committing by set commit_start_time, instead of destroy the record. @@ -150,7 +151,15 @@ ManipulationTaskRecord::~ManipulationTaskRecord() { std::lock_guard lock(parent.currently_merging_mutating_parts_mutex); for (auto & part : parts) + { parent.currently_merging_mutating_parts.erase(part->name()); + auto prev_part = part->tryGetPreviousPart(); + while(prev_part) + { + parent.currently_merging_mutating_parts.erase(prev_part->name()); + prev_part = prev_part->tryGetPreviousPart(); + } + } } { @@ -165,11 +174,22 @@ ManipulationTaskRecord::~ManipulationTaskRecord() } } -Strings ManipulationTaskRecord::getSourcePartNames() const +Strings ManipulationTaskRecord::getSourcePartNames(bool flatten) const { Strings res; for (const auto & part : parts) + { res.emplace_back(part->name()); + if (likely(flatten)) + { + auto prev_part = part->tryGetPreviousPart(); + while (prev_part) + { + res.emplace_back(prev_part->name()); + prev_part = prev_part->tryGetPreviousPart(); + } + } + } return res; } @@ -181,7 +201,15 @@ FutureManipulationTask::~FutureManipulationTask() { std::lock_guard lock(parent.currently_merging_mutating_parts_mutex); for (auto & part : parts) + { parent.currently_merging_mutating_parts.erase(part->name()); + auto prev_part = part->tryGetPreviousPart(); + while(prev_part) + { + parent.currently_merging_mutating_parts.erase(prev_part->name()); + prev_part = prev_part->tryGetPreviousPart(); + } + } } } catch (...) @@ -190,26 +218,38 @@ FutureManipulationTask::~FutureManipulationTask() } } -FutureManipulationTask & FutureManipulationTask::assignSourceParts(ServerDataPartsVector && parts_) +/// Add source parts (include invisible parts) to merging_mutating_parts. +FutureManipulationTask & FutureManipulationTask::tagSourceParts(ServerDataPartsVector && parts_) { - for (auto & part : parts_) - { - LOG_DEBUG(&Poco::Logger::get("MergeMutateDEBUG"), "assignSourceParts part {} name {}", static_cast(part.get()), part->name()); - } - - /// flatten the parts - CnchPartsHelper::flattenPartsVector(parts_); + auto check_and_add = [&](const auto & part_name) { + if (parent.currently_merging_mutating_parts.count(part_name)) + throw Exception("Part '" + part_name + "' was already in other Task, cancel merge.", ErrorCodes::ABORTED); + parent.currently_merging_mutating_parts.emplace(part_name); + }; if (!record->try_execute) { std::lock_guard lock(parent.currently_merging_mutating_parts_mutex); - for (auto & part : parts_) - if (parent.currently_merging_mutating_parts.count(part->name())) - throw Exception("Part '" + part->name() + "' was already in other Task, cancel merge.", ErrorCodes::ABORTED); + for (const auto & p : parts_) + { + check_and_add(p->name()); - for (auto & part : parts_) - parent.currently_merging_mutating_parts.emplace(part->name()); + auto prev_part = p->tryGetPreviousPart(); + while (prev_part) + { + check_and_add(prev_part->name()); + prev_part = prev_part->tryGetPreviousPart(); + } + } + } + + if (parent.log->trace()) + { + WriteBufferFromOwnString wb; + for (const auto & p : parts_) + wb << p->name() << " "; + LOG_TRACE(parent.log, "Added parts to merging_mutating_parts: {}", wb.str()); } parts = std::move(parts_); @@ -612,16 +652,14 @@ bool CnchMergeMutateThread::tryMergeParts(StoragePtr & istorage, StorageCnchMerg submitFutureManipulationTask(storage, *future_task); } - try - { - /// TODO: catch the exception during tryMergeParts() ? - - writePartMergeLogElement(istorage, part_merge_log_elem, metrics); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + // try + // { + // writePartMergeLogElement(istorage, part_merge_log_elem, metrics); + // } + // catch (...) + // { + // tryLogCurrentException(__PRETTY_FUNCTION__); + // } return result; } @@ -785,7 +823,7 @@ bool CnchMergeMutateThread::trySelectPartsToMerge(StoragePtr & istorage, Storage postpone_partitions.erase(selected_parts.front()->info().partition_id); auto future_task = std::make_unique(*this, ManipulationType::Merge); - future_task->assignSourceParts(std::move(selected_parts)); + future_task->tagSourceParts(std::move(selected_parts)); merge_pending_queue.push(std::move(future_task)); } @@ -800,7 +838,7 @@ bool CnchMergeMutateThread::trySelectPartsToMerge(StoragePtr & istorage, Storage Strings CnchMergeMutateThread::removeLockedPartition(const Strings & partitions) { - constexpr UInt64 SLOW_THRESHOLD_MS = 200; + constexpr UInt64 slow_threshold_ms = 200; Stopwatch watch; auto & txn_coordinator = getContext()->getCnchTransactionCoordinator(); auto transaction = txn_coordinator.createTransaction( @@ -844,7 +882,7 @@ Strings CnchMergeMutateThread::removeLockedPartition(const Strings & partitions) /// And finishTransaction in the SCOPE_EXIT make sure the txn is clean by server but not DM. transaction->commitV2(); UInt64 milliseconds = watch.elapsedMilliseconds(); - if (milliseconds >= SLOW_THRESHOLD_MS) + if (milliseconds >= slow_threshold_ms) LOG_INFO(log, "removeLockedPartition took {} ms.", milliseconds); return res; } @@ -955,7 +993,6 @@ String CnchMergeMutateThread::submitFutureManipulationTask( task_record.task_id = params.task_id; task_record.worker = worker_client; - task_record.result_part_name = params.new_part_names.front(); task_record.manipulation_entry = local_context->getGlobalContext()->getManipulationList().insert(params, true, getContext()); task_record.manipulation_entry->get()->related_node = worker_client->getRPCAddress(); @@ -1032,6 +1069,7 @@ String CnchMergeMutateThread::triggerPartMerge( std::map mutation_entries; std::vector> mutation_timestamps; catalog->fillMutationsByStorage(storage_id, mutation_entries); + mutation_timestamps.reserve(mutation_entries.size()); for (const auto & [_, mutation_entry] : mutation_entries) mutation_timestamps.emplace_back(mutation_entry.commit_time, mutation_entry.commands.changeSchema()); @@ -1091,7 +1129,7 @@ String CnchMergeMutateThread::triggerPartMerge( storage, FutureManipulationTask(*this, ManipulationType::Merge) .setTryExecute(try_execute) - .assignSourceParts(std::move(res.front())), + .tagSourceParts(std::move(res.front())), true); } @@ -1196,7 +1234,7 @@ void CnchMergeMutateThread::finishTask(const String & task_id, std::functionsecond->commit_start_time = time(nullptr); partition_id = it->second->parts.front()->info().partition_id; - source_part_names = it->second->getSourcePartNames(); + source_part_names = it->second->getSourcePartNames(/*flatten*/true); try_execute = it->second->try_execute; manipulation_submit_time_ns = it->second->submit_time_ns; } @@ -1400,7 +1438,7 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer storage, FutureManipulationTask(*this, type) .setMutationEntry(*current_mutate_entry) - .assignSourceParts(std::move(alter_parts))); + .tagSourceParts(std::move(alter_parts))); alter_parts.clear(); curr_mutate_part_size = 0; if (running_mutation_tasks >= storage.getSettings()->max_addition_mutation_task_num) @@ -1415,7 +1453,7 @@ bool CnchMergeMutateThread::tryMutateParts(StoragePtr & istorage, StorageCnchMer storage, FutureManipulationTask(*this, type) .setMutationEntry(*current_mutate_entry) - .assignSourceParts(std::move(alter_parts))); + .tagSourceParts(std::move(alter_parts))); } return remain_tasks_in_partition; diff --git a/src/CloudServices/CnchMergeMutateThread.h b/src/CloudServices/CnchMergeMutateThread.h index 60e9dbecc2f..5fb80b2a310 100644 --- a/src/CloudServices/CnchMergeMutateThread.h +++ b/src/CloudServices/CnchMergeMutateThread.h @@ -74,10 +74,7 @@ struct ManipulationTaskRecord CnchWorkerClientPtr worker; size_t lost_count{0}; - /// for system.part_merge_log & system.server_part_log - String result_part_name; - - Strings getSourcePartNames() const; + Strings getSourcePartNames(bool flatten = false) const; }; struct FutureManipulationTask @@ -102,7 +99,7 @@ struct FutureManipulationTask } TxnTimestamp calcColumnsCommitTime() const; - FutureManipulationTask & assignSourceParts(ServerDataPartsVector && parts); + FutureManipulationTask & tagSourceParts(ServerDataPartsVector && parts); FutureManipulationTask & prepareTransaction(); std::unique_ptr moveRecord(); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 1883368903b..74c6ced4e5c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -861,6 +861,8 @@ M(7112, RESOURCE_MANAGER_WRONG_COORDINATE_MODE) \ M(7113, RESOURCE_MANAGER_REMOVE_WORKER_ERROR) \ M(7114, RESOURCE_MANAGER_LEADER_NOT_WORK_WELL) \ +\ + M(7150, MERGE_BAD_PART_NAME) \ \ M(7200, UNIQUE_KEY_STRING_SIZE_LIMIT_EXCEEDED) \ M(7201, UNIQUE_TABLE_DUPLICATE_KEY_FOUND) \ diff --git a/src/WorkerTasks/ManipulationTaskParams.cpp b/src/WorkerTasks/ManipulationTaskParams.cpp index c10e978cfb5..834f6e18428 100644 --- a/src/WorkerTasks/ManipulationTaskParams.cpp +++ b/src/WorkerTasks/ManipulationTaskParams.cpp @@ -27,6 +27,7 @@ namespace DB namespace ErrorCodes { extern const int LOG_ERROR; + extern const int MERGE_BAD_PART_NAME; } String ManipulationTaskParams::toDebugString() const @@ -74,7 +75,16 @@ String ManipulationTaskParams::toDebugString() const } template -void ManipulationTaskParams::assignSourcePartsImpl(const Vec & parts, UInt64 ts) +static String toPartNames(const Vec & parts) +{ + WriteBufferFromOwnString wb; + for (const auto & p : parts) + wb << p->get_name() << ", "; + return wb.str(); +} + +template +void ManipulationTaskParams::calcNewPartNames(const Vec & parts, UInt64 ts) { if (unlikely(type == Type::Empty)) throw Exception("Expected non-empty manipulate type", ErrorCodes::LOGICAL_ERROR); @@ -114,9 +124,6 @@ void ManipulationTaskParams::assignSourcePartsImpl(const Vec & parts, UInt64 ts) part_info.min_block = (*left)->get_info().min_block; part_info.max_block = (*std::prev(right))->get_info().max_block; part_info.level = (*left)->get_info().level + 1; - - // TODO: Double check any issue: previously the mutation is set to max part's mutation, now set mutation to current txn id. - // part_info.mutation = (*std::prev(right))->info.mutation; part_info.mutation = txn_id; for (auto it = left; it != right; ++it) @@ -124,6 +131,20 @@ void ManipulationTaskParams::assignSourcePartsImpl(const Vec & parts, UInt64 ts) part_info.level = std::max(part_info.level, (*it)->get_info().level + 1); } + /// If merged_part's name is same with some source part, it means there will be duplicate part names in result parts + /// (merged_part and some tombstone part). It's undefined behavior when committing such parts to KV. + /// So check the part name before executing. + /// Skip the check for single-part merge (parts.size == 1), as it acquire new block id on worker. + if (type == ManipulationType::Merge && parts.size() > 1) + { + for (const auto & p : parts) + { + if (p->get_info().min_block == part_info.min_block && p->get_info().max_block == part_info.max_block) + throw Exception(ErrorCodes::MERGE_BAD_PART_NAME, + "Merged part has the same part name with some source part: {}", toPartNames(parts)); + } + } + new_part_names.push_back(part_info.getPartName()); left = right; @@ -133,24 +154,29 @@ void ManipulationTaskParams::assignSourcePartsImpl(const Vec & parts, UInt64 ts) /// For server (CnchMergeMutateThread) void ManipulationTaskParams::assignSourceParts(ServerDataPartsVector parts) { - assignSourcePartsImpl(parts); + /// Make sure there are only visible parts when doing calculating new part names. + calcNewPartNames(parts); + /// Then, flatten parts so that the RPC request contains all parts. + CnchPartsHelper::flattenPartsVector(parts); source_parts = std::move(parts); } /// For part merger void ManipulationTaskParams::assignSourceParts(MergeTreeDataPartsVector parts) { - assignSourcePartsImpl(parts); + calcNewPartNames(parts); + /// Do we need flatten parts for part merger? source_data_parts = std::move(parts); } -/// For worker +/// For worker. The input parts are flattened. void ManipulationTaskParams::assignParts(MergeTreeMutableDataPartsVector parts, const std::function & ts_getter) { for (auto & part: parts) all_parts.emplace_back(std::move(part)); + /// Make sure there are only visible parts when doing calculating new part names. source_data_parts = CnchPartsHelper::calcVisibleParts(all_parts, false); - assignSourcePartsImpl(source_data_parts, (source_data_parts.size() == 1 && type == Type::Merge) ? ts_getter() : 0); + calcNewPartNames(source_data_parts, (source_data_parts.size() == 1 && type == Type::Merge) ? ts_getter() : 0); } } diff --git a/src/WorkerTasks/ManipulationTaskParams.h b/src/WorkerTasks/ManipulationTaskParams.h index 11c6cdad5a3..cfa088df6b1 100644 --- a/src/WorkerTasks/ManipulationTaskParams.h +++ b/src/WorkerTasks/ManipulationTaskParams.h @@ -75,8 +75,9 @@ struct ManipulationTaskParams void assignParts(MergeTreeMutableDataPartsVector parts, const std::function & ts_getter); private: - template - void assignSourcePartsImpl(const Vec & parts, UInt64 ts = 0); + /// Calculate new part names based on source parts. + /// `parts` should be visible parts (not flattened). + template void calcNewPartNames(const Vec & parts, UInt64 ts = 0); }; } diff --git a/tests/queries/4_cnch_stateless/00718_low_cardinality_alter.sql b/tests/queries/4_cnch_stateless/00718_low_cardinality_alter.sql index 8311bfded54..0bbf5034e10 100644 --- a/tests/queries/4_cnch_stateless/00718_low_cardinality_alter.sql +++ b/tests/queries/4_cnch_stateless/00718_low_cardinality_alter.sql @@ -1,40 +1,30 @@ set allow_suspicious_low_cardinality_types = 1; +SET mutations_sync = 1; drop table if exists tab_00718; -create table tab_00718 (a String, b LowCardinality(UInt32)) engine = CnchMergeTree order by a; + +-- set max_addition_bg_task_num = 0 to stop merge select. +create table tab_00718 (a String, b LowCardinality(UInt32)) engine = CnchMergeTree order by a SETTINGS max_addition_bg_task_num = 0; +SYSTEM START MERGES tab_00718; + insert into tab_00718 values ('a', 1); select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b UInt32; --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; - select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b LowCardinality(UInt32); --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; - select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b StringWithDictionary; --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; - select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b LowCardinality(UInt32); --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; - select *, toTypeName(b) from tab_00718; -alter table tab_00718 modify column b String; --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; +alter table tab_00718 modify column b String; select *, toTypeName(b) from tab_00718; -alter table tab_00718 modify column b LowCardinality(UInt32); --- wait task finish -SELECT sleepEachRow(3) FROM numbers(30) FORMAT Null; +alter table tab_00718 modify column b LowCardinality(UInt32); select *, toTypeName(b) from tab_00718; + drop table if exists tab_00718; From e51016e201ed63489864b99710c89eff544e10d4 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:20:11 +0000 Subject: [PATCH 125/292] Merge 'cherry-pick-ea50d18e-2' into 'cnch-2.2' feat(clickhousech@m-4674720504): [cp 2.2] support CNCH unique table insert ignore See merge request: !23526 --- src/CloudServices/CnchDataWriter.cpp | 87 +++++++++++++------ src/CloudServices/CnchDedupHelper.h | 27 +++--- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 2 +- src/Core/SettingsEnums.h | 1 + src/MergeTreeCommon/MergeTreeDataDeduper.cpp | 2 +- .../CloudMergeTreeBlockOutputStream.cpp | 77 +++++++++++----- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/StorageCnchMergeTree.cpp | 7 +- ...st_exception_on_unique_key_duplication.sql | 4 + ...niquekey_test_insert_ignore_mode.reference | 11 +++ ...0045_uniquekey_test_insert_ignore_mode.sql | 47 ++++++++++ 12 files changed, 205 insertions(+), 64 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.reference create mode 100644 tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.sql diff --git a/src/CloudServices/CnchDataWriter.cpp b/src/CloudServices/CnchDataWriter.cpp index dd7937f392b..780293d464e 100644 --- a/src/CloudServices/CnchDataWriter.cpp +++ b/src/CloudServices/CnchDataWriter.cpp @@ -16,7 +16,7 @@ #include #include #include - +#include #include #include #include @@ -223,7 +223,7 @@ DumpedData CnchDataWriter::dumpCnchParts( auto txn_id = curr_txn->getTransactionID(); /// Write undo buffer first before dump to vfs - std::vector undo_resources; + UndoResources undo_resources; undo_resources.reserve(temp_parts.size() + temp_bitmaps.size() + temp_staged_parts.size()); /// For local parts and stage parts, the remote parts can be at different disk, /// so we record the disk name of each part in the undo buffer. @@ -287,33 +287,74 @@ DumpedData CnchDataWriter::dumpCnchParts( MergeTreeCNCHDataDumper dumper(storage, part_generator_id); watch.restart(); - ThreadPool dump_pool(std::min( - static_cast(storage.getSettings()->cnch_parallel_dumping_threads), std::max(temp_staged_parts.size(), temp_parts.size()))); + size_t pool_size = std::min(static_cast(storage.getSettings()->cnch_parallel_dumping_threads), std::max(temp_staged_parts.size(), temp_parts.size())); + /// make sure pool_size >= 1 + pool_size = pool_size >= 1 ? pool_size : 1; result.parts.resize(temp_parts.size()); - /// TODO: only use pool if > 1 parts - for (size_t i = 0; i < temp_parts.size(); ++i) - { - dump_pool.scheduleOrThrowOnError([&, i]() { + /// parallel dump delete bitmaps + // TODO: dump all bitmaps to one file to avoid creating too many small files on vfs + result.bitmaps = dumpDeleteBitmaps(storage, temp_bitmaps); + result.staged_parts.resize(temp_staged_parts.size()); + + auto dump_parts = [&, this](size_t i) -> void { + for (; i < temp_parts.size(); i += pool_size) + { const auto & temp_part = temp_parts[i]; auto dumped_part = dumper.dumpTempPart(temp_part, part_disks[i]); LOG_TRACE(storage.getLogger(), "Dumped part {}", temp_part->name); result.parts[i] = std::move(dumped_part); - }); - } - dump_pool.wait(); - // TODO: dump all bitmaps to one file to avoid creating too many small files on vfs - result.bitmaps = dumpDeleteBitmaps(storage, temp_bitmaps); - result.staged_parts.resize(temp_staged_parts.size()); - for (size_t i = 0; i < temp_staged_parts.size(); ++i) - { - dump_pool.scheduleOrThrowOnError([&, i]() { + } + }; + + auto dump_staged_parts = [&, this](size_t i) -> void { + for (; i < temp_staged_parts.size(); i += pool_size) + { const auto & temp_staged_part = temp_staged_parts[i]; auto staged_part = dumper.dumpTempPart(temp_staged_part, part_disks[i + temp_parts.size()]); LOG_TRACE(storage.getLogger(), "Dumped staged part {}", temp_staged_part->name); result.staged_parts[i] = std::move(staged_part); - }); + } + }; + + if (pool_size > 1) + { + ThreadPool dump_pool(pool_size); + for (size_t thread_id = 1; thread_id <= pool_size; thread_id++) + { + dump_pool.scheduleOrThrowOnError([&dump_parts, i = thread_id - 1, thread_group = CurrentThread::getGroup()] + { + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + }); + if (thread_group) + CurrentThread::attachTo(thread_group); + dump_parts(i); + }); + } + dump_pool.wait(); + + for (size_t thread_id = 1; thread_id <= pool_size; thread_id++) + { + dump_pool.scheduleOrThrowOnError([&dump_staged_parts, i = thread_id - 1, thread_group = CurrentThread::getGroup()] + { + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + }); + if (thread_group) + CurrentThread::attachTo(thread_group); + dump_staged_parts(i); + }); + } + dump_pool.wait(); + } + else + { + assert(pool_size == 1); + dump_parts(0); + dump_staged_parts(0); } - dump_pool.wait(); LOG_DEBUG( storage.getLogger(), @@ -382,11 +423,7 @@ void CnchDataWriter::commitDumpedParts(const DumpedData & dumped_data) throw; } - if (auto part_log = context->getPartLog(storage.getDatabaseName())) - { - // for (auto & dumped_part : dumped_parts) - // part_log->add(PartLog::createElement(PartLogElement::COMMIT_PART, dumped_part, watch.elapsed())); - } + /// part log will be written in InsertAction::postCommit LOG_DEBUG( storage.getLogger(), @@ -704,7 +741,7 @@ void CnchDataWriter::publishStagedParts(const MergeTreeDataPartsCNCHVector & sta /// prepare undo resources /// setMetadata() return reference, so need to cast move - std::vector undo_resources; + UndoResources undo_resources; for (auto & part : items.parts) undo_resources.emplace_back( std::move(UndoResource(txn_id, UndoResourceType::Part, part->info.getPartNameWithHintMutation()).setMetadataOnly(true))); diff --git a/src/CloudServices/CnchDedupHelper.h b/src/CloudServices/CnchDedupHelper.h index 93430550bd5..f5ed9ce548f 100644 --- a/src/CloudServices/CnchDedupHelper.h +++ b/src/CloudServices/CnchDedupHelper.h @@ -44,7 +44,8 @@ enum class DedupMode : unsigned int { APPEND = 0, UPSERT, - THROW + THROW, + IGNORE }; inline String typeToString(DedupMode type) @@ -57,6 +58,8 @@ inline String typeToString(DedupMode type) return "UPSERT"; case DedupMode::THROW: return "THROW"; + case DedupMode::IGNORE: + return "IGNORE"; default: return "Unknown"; } @@ -66,13 +69,13 @@ class DedupScope { public: - enum class DedupMode + enum class DedupLevel { TABLE, PARTITION, }; - enum class LockMode + enum class LockLevel { NORMAL, /// For NORMAL lock mode, if dedup mode is table, it's table level. Otherwise, it's partition level. BUCKET, /// BUCKET level lock mode. @@ -91,35 +94,35 @@ class DedupScope static DedupScope TableDedup() { - static DedupScope table_scope{DedupMode::TABLE}; + static DedupScope table_scope{DedupLevel::TABLE}; return table_scope; } static DedupScope TableDedupWithBucket(const BucketSet & buckets_) { - DedupScope table_scope{DedupMode::TABLE, LockMode::BUCKET}; + DedupScope table_scope{DedupLevel::TABLE, LockLevel::BUCKET}; table_scope.buckets = buckets_; return table_scope; } static DedupScope PartitionDedup(const NameOrderedSet & partitions_) { - DedupScope partition_scope{DedupMode::PARTITION}; + DedupScope partition_scope{DedupLevel::PARTITION}; partition_scope.partitions = partitions_; return partition_scope; } static DedupScope PartitionDedupWithBucket(const BucketWithPartitionSet & bucket_with_partition_set_) { - DedupScope partition_scope{DedupMode::PARTITION, LockMode::BUCKET}; + DedupScope partition_scope{DedupLevel::PARTITION, LockLevel::BUCKET}; partition_scope.bucket_with_partition_set = bucket_with_partition_set_; for (const auto & bucket_with_partition : partition_scope.bucket_with_partition_set) partition_scope.partitions.insert(bucket_with_partition.first); return partition_scope; } - bool isTableDedup() const { return dedup_mode == DedupMode::TABLE; } - bool isBucketLock() const { return lock_mode == LockMode::BUCKET; } + bool isTableDedup() const { return dedup_level == DedupLevel::TABLE; } + bool isBucketLock() const { return lock_level == LockLevel::BUCKET; } const NameOrderedSet & getPartitions() const { return partitions; } @@ -131,10 +134,10 @@ class DedupScope void filterParts(MergeTreeDataPartsCNCHVector & parts) const; private: - DedupScope(DedupMode dedup_mode_, LockMode lock_mode_ = LockMode::NORMAL) : dedup_mode(dedup_mode_), lock_mode(lock_mode_) { } + DedupScope(DedupLevel dedup_level_, LockLevel lock_level_ = LockLevel::NORMAL) : dedup_level(dedup_level_), lock_level(lock_level_) { } - DedupMode dedup_mode; - LockMode lock_mode; + DedupLevel dedup_level; + LockLevel lock_level; NameOrderedSet partitions; BucketSet buckets; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7c55cf6c0bf..a7bd124ba64 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1320,7 +1320,7 @@ enum PreloadLevelSettings : UInt64 M(Seconds, unique_key_attach_partition_timeout, 3600, "Default timeout (seconds) for attaching partition for unique key", 0) \ M(Bool, enable_unique_table_attach_without_dedup, false, "Enable directly make attached parts visible without dedup for unique table, for example: override mode of offline loading", 0) \ M(Bool, enable_unique_table_detach_ignore_delete_bitmap, false, "Enable ignore delete bitmap info when handling detach commands for unique table, for example: delete bitmap has been broken, we can just ignore it via this parameter.", 0) \ - M(DedupKeyMode, dedup_key_mode, DedupKeyMode::REPLACE, "Handle different deduplication modes, current valid values: REPLACE, THROW, APPEND. THROW mode can only be used in non-staging area scenarios. APPEND mode will not execute dedup process, which is suitable for historical non-duplicated data import scenarios", 0) \ + M(DedupKeyMode, dedup_key_mode, DedupKeyMode::REPLACE, "Handle different deduplication modes, current valid values: REPLACE, THROW, APPEND, IGNORE. THROW mode and IGNORE mode can only be used in non-staging area scenarios. APPEND mode will not execute dedup process, which is suitable for historical non-duplicated data import scenarios", 0) \ M(Seconds, unique_sleep_seconds_after_acquire_lock, 0, "Only for test", 0) \ M(Seconds, unique_acquire_write_lock_timeout, 0, "It has higher priority than table setting. Only when it's zero, use table setting", 0) \ M(Seconds, max_dedup_execution_time, 21600, "Set default value to 6h", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index c43d93819c2..82a72828946 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -223,7 +223,7 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS IMPLEMENT_SETTING_ENUM( DedupKeyMode, ErrorCodes::BAD_ARGUMENTS, - {{"replace", DedupKeyMode::REPLACE}, {"append", DedupKeyMode::APPEND}, {"throw", DedupKeyMode::THROW}}) + {{"replace", DedupKeyMode::REPLACE}, {"append", DedupKeyMode::APPEND}, {"throw", DedupKeyMode::THROW}, {"ignore", DedupKeyMode::IGNORE}}) IMPLEMENT_SETTING_ENUM( RefreshViewTaskStatus, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a3b978632c9..c591a088b5f 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -381,6 +381,7 @@ enum class DedupKeyMode REPLACE, THROW, APPEND, + IGNORE, }; DECLARE_SETTING_ENUM(DedupKeyMode) diff --git a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp index 439c094dbdf..201e31f4215 100644 --- a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp +++ b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp @@ -224,7 +224,7 @@ void MergeTreeDataDeduper::dedupKeysWithParts( { RowPos lhs = ReplacingSortedKeysIterator::decodeCurrentRowPos(base_iter, version_mode, parts, base_implicit_versions); const RowPos & rhs = keys->CurrentRowPos(); - if (keys->IsCurrentLowPriority()) + if (keys->IsCurrentLowPriority() || dedup_mode == CnchDedupHelper::DedupMode::IGNORE) addRowIdToBitmap(delta_bitmaps[rhs.child + parts.size()], rhs.rowid); else { diff --git a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp index ff5dddc3c10..d1916b0e350 100644 --- a/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/CloudMergeTreeBlockOutputStream.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ namespace ErrorCodes { extern const int ABORTED; extern const int CNCH_LOCK_ACQUIRE_FAILED; + extern const int INCORRECT_DATA; extern const int INSERTION_LABEL_ALREADY_EXISTS; extern const int LOGICAL_ERROR; extern const int UNIQUE_KEY_STRING_SIZE_LIMIT_EXCEEDED; @@ -70,10 +72,8 @@ void CloudMergeTreeBlockOutputStream::checkAndInit() if (dedup_parameters.enable_staging_area) { - if (dedup_parameters.enable_append_mode) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "In APPEND dedup key mode, can't write to staging area."); - if (context->getSettings().dedup_key_mode == DedupKeyMode::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Insert VALUES into staging area with dedup_key_mode=DedupKeyMode::THROW is not allowed"); + if (context->getSettings().dedup_key_mode != DedupKeyMode::REPLACE) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only UPSERT mode can write to staging area."); LOG_DEBUG(log, "enable staging area for write"); } else @@ -95,6 +95,11 @@ void CloudMergeTreeBlockOutputStream::checkAndInit() cnch_writer.setDedupMode(CnchDedupHelper::DedupMode::UPSERT); LOG_TRACE(log, "enable upsert dedup mode"); break; + case DedupKeyMode::IGNORE: + /// case 5(unique table with sync insert, when there has same keys, only keep the first occurrences of the row and ignore subsequent occurrences rows) + cnch_writer.setDedupMode(CnchDedupHelper::DedupMode::IGNORE); + LOG_TRACE(log, "enable insert ignore dedup mode"); + break; default: throw Exception( ErrorCodes::LOGICAL_ERROR, "Unsupported dedup key mode: {}", context->getSettings().dedup_key_mode.toString()); @@ -405,15 +410,25 @@ namespace bool operator()(size_t lhs, size_t rhs) const { - for (auto & key : keys) - { - int cmp = key.column->compareAt(lhs, rhs, *key.column, /*nan_direction_hint=*/1); - if (cmp < 0) - return true; - if (cmp > 0) + for (const auto & key : keys) + if (key.column->compareAt(lhs, rhs, *key.column, /*nan_direction_hint=*/1)) return false; - } - return false; + return true; + } + }; + + struct BlockUniqueKeyHasher + { + const ColumnsWithTypeAndName & keys; + explicit BlockUniqueKeyHasher(const ColumnsWithTypeAndName & keys_) : keys(keys_) { } + + size_t operator()(size_t rowid) const + { + size_t hash_value{0}; + std::hash hash_function; + for (const auto & key : keys) + hash_value ^= hash_function(key.column.get()->getDataAt(rowid).toView()); + return hash_value; } }; } @@ -441,8 +456,10 @@ CloudMergeTreeBlockOutputStream::FilterInfo CloudMergeTreeBlockOutputStream::ded } BlockUniqueKeyComparator comparator(keys); - /// first rowid of key -> rowid of the last occurrence of the same key - std::map index(comparator); + BlockUniqueKeyHasher hasher(keys); + /// first rowid of key -> rowid of the last occurrence of the same key in replace/append/throw mode; + /// first rowid of key -> rowid of the first occurrence of the same key in insert ignore mode. + phmap::flat_hash_map index(keys[0].column->size(), hasher, comparator); auto block_size = block_copy.rows(); FilterInfo res; @@ -468,16 +485,36 @@ CloudMergeTreeBlockOutputStream::FilterInfo CloudMergeTreeBlockOutputStream::ded /// Otherwise use value from version column size_t old_pos = it->second; size_t new_pos = rowid; - if (version_column && !delete_ignore_version(rowid) - && version_column->column->getUInt(old_pos) > version_column->column->getUInt(new_pos)) - std::swap(old_pos, new_pos); - res.filter[old_pos] = 0; - it->second = new_pos; - res.num_filtered++; + if (context->getSettings().dedup_key_mode == DedupKeyMode::THROW) + { + /// In insert throw mode, when multiple records with the same unique key are found, + /// we will not consider the delete flag column, instead, we will immediately throw an exception. + throw Exception("Found duplication in the block when insert with setting dedup_key_mode=DedupKeyMode::THROW", ErrorCodes::INCORRECT_DATA); + } + else if (context->getSettings().dedup_key_mode == DedupKeyMode::REPLACE || context->getSettings().dedup_key_mode == DedupKeyMode::APPEND) + { + if (version_column && !delete_ignore_version(rowid) && version_column->column->getUInt(old_pos) > version_column->column->getUInt(new_pos)) + std::swap(old_pos, new_pos); + res.filter[old_pos] = 0; + it->second = new_pos; + res.num_filtered++; + } + else + { + /// In insert ignore mode, when multiple records with the same unique key are found, + /// we will ignore version column, and save the first row(not deleted) of duplicated keys. + if (is_delete_row(old_pos)) + std::swap(old_pos, new_pos); + res.filter[new_pos] = 0; + it->second = old_pos; + res.num_filtered++; + } } else + { index[rowid] = rowid; + } /// Check the length limit for string type. size_t unique_string_keys_size = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 8b8995cce91..e6592ba2183 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -506,7 +506,7 @@ MergeTreeMetaBase::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( nullptr, write_location); - LOG_DEBUG(log, "Writing temp part to {}...\n", new_data_part->getFullRelativePath()); + LOG_DEBUG(log, "Writing temp part to {}", new_data_part->getFullRelativePath()); if (data.storage_settings.get()->assign_part_uuids) new_data_part->uuid = UUIDHelpers::generateV4(); diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp index 7103924ee99..22f0e82e326 100644 --- a/src/Storages/StorageCnchMergeTree.cpp +++ b/src/Storages/StorageCnchMergeTree.cpp @@ -223,16 +223,17 @@ void StorageCnchMergeTree::loadMutations() { getContext()->getCnchCatalog()->fillMutationsByStorage(getStorageID(), mutations_by_version); - auto mutations_debug_str = [&]() -> String { + auto print_mutations_debug_str = [&]() -> void { String res; for (auto const & [_, mutation] : mutations_by_version) { res += mutation.toString() + "\n"; } - return res; + if (!mutations_by_version.empty()) + LOG_TRACE(log, "All mutations:\n{}", res); }; - LOG_TRACE(log, "All mutations:\n{}", mutations_debug_str()); + print_mutations_debug_str(); } catch(...) { diff --git a/tests/queries/4_cnch_stateless/10044_uniquekey_test_exception_on_unique_key_duplication.sql b/tests/queries/4_cnch_stateless/10044_uniquekey_test_exception_on_unique_key_duplication.sql index 47e3c1e10a2..0c682a6f57e 100644 --- a/tests/queries/4_cnch_stateless/10044_uniquekey_test_exception_on_unique_key_duplication.sql +++ b/tests/queries/4_cnch_stateless/10044_uniquekey_test_exception_on_unique_key_duplication.sql @@ -32,6 +32,10 @@ INSERT INTO unique_with_exception_on_unique_key_duplication VALUES ('2020-10-29 INSERT INTO unique_with_exception_on_unique_key_duplication Format Values SETTINGS dedup_key_mode = 'throw' ('2020-10-29 23:50:00', 10004, 'Beijing', '男装', 3, 300), ('2020-10-29 23:50:00', 10005, 'Beijing', '男装', 2, 200); -- { serverError 117 } +-- block contains duplicate unique keys +INSERT INTO unique_with_exception_on_unique_key_duplication Format Values SETTINGS dedup_key_mode = 'throw' +('2020-10-29 23:50:00', 10006, 'Beijing', '男装', 6, 600), ('2020-10-29 23:50:00', 10006, 'Beijing', '男装', 6, 600); -- { serverError 117 } + SELECT * FROM unique_with_exception_on_unique_key_duplication order by event_time, product_id; DROP TABLE IF EXISTS unique_with_exception_on_unique_key_duplication; diff --git a/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.reference b/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.reference new file mode 100644 index 00000000000..a24d85ec97c --- /dev/null +++ b/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.reference @@ -0,0 +1,11 @@ +test1 2020-10-29 23:40:00 10001 10001AA 11 1100 +test2 2020-10-29 23:40:00 10001 10001AA 11 1100 +test3 2020-10-29 23:40:00 10001 10001AA 11 1100 +test3 2020-10-29 23:40:00 10003 10001C 3 300 +test4 2020-10-29 23:40:00 10001 10001AA 11 1100 +test4 2020-10-29 23:40:00 10003 10001C 3 300 +test5 2020-10-29 23:40:00 10001 10001AA 11 1100 +test5 2020-10-29 23:40:00 10003 10001CCCC 3333 333300 +test6 2020-10-29 23:50:00 10001 10001AA 11 1100 +test6 2020-10-29 23:40:00 10002 10002BB 22 2200 +test6 2020-10-29 23:40:00 10003 10003C 3 300 diff --git a/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.sql b/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.sql new file mode 100644 index 00000000000..c0fcd72e038 --- /dev/null +++ b/tests/queries/4_cnch_stateless/10045_uniquekey_test_insert_ignore_mode.sql @@ -0,0 +1,47 @@ +use test; +DROP TABLE IF EXISTS test_unique_ignore_mode; +DROP TABLE IF EXISTS test_unique_ignore_mode_version; + +CREATE TABLE test_unique_ignore_mode (event_time DateTime, id UInt64, s String, m1 UInt32, m2 UInt64) ENGINE = CnchMergeTree() PARTITION BY toDate(event_time) ORDER BY (s, id) PRIMARY KEY s UNIQUE KEY id; + +SET enable_staging_area_for_write = 1, dedup_key_mode = 'ignore'; +-- DB::Exception: Only UPSERT mode can write to staging area +INSERT INTO test_unique_ignore_mode VALUES ('2020-10-29 23:40:00', 10001, '10001A', 5, 500); -- { serverError 36 } + +SET enable_staging_area_for_write = 0; +-- delete the first row +INSERT INTO test_unique_ignore_mode (*, _delete_flag_) VALUES ('2020-10-29 23:40:00', 10001, '10001A', 1, 100, 1), ('2020-10-29 23:40:00', 10001, '10001AA', 11, 1100, 0); +select 'test1', * from test_unique_ignore_mode order by event_time, id; + +-- empty block +INSERT INTO test_unique_ignore_mode (*, _delete_flag_) VALUES ('2020-10-29 23:40:00', 10002, '10001B', 2, 200, 1), ('2020-10-29 23:40:00', 10002, '10001BB', 22, 2200, 1); +select 'test2', * from test_unique_ignore_mode order by event_time, id; + +-- keep the first row +INSERT INTO test_unique_ignore_mode (*) VALUES ('2020-10-29 23:40:00', 10003, '10001C', 3, 300), ('2020-10-29 23:40:00', 10003, '10001CC', 3, 3300); +select 'test3', * from test_unique_ignore_mode order by event_time, id; + +INSERT INTO test_unique_ignore_mode (*) VALUES ('2020-10-29 23:40:00', 10003, '10001CCC', 333, 33300); +select 'test4', * from test_unique_ignore_mode order by event_time, id; + +SET dedup_key_mode = 'replace'; +INSERT INTO test_unique_ignore_mode (*) VALUES ('2020-10-29 23:40:00', 10003, '10001CCCC', 3333, 333300); +select 'test5', * from test_unique_ignore_mode order by event_time, id; + +-- test insert throw mode +SET dedup_key_mode = 'throw'; +INSERT INTO test_unique_ignore_mode (*) VALUES ('2020-10-29 23:40:00', 10003, 'test throw', 3333, 333300); -- { serverError 117 } +INSERT INTO test_unique_ignore_mode (*) VALUES ('2020-10-29 23:40:00', 10004, 'test throw', 3333, 333300), ('2020-10-29 23:40:00', 10004, 'test throw', 3333, 333300); -- { serverError 117 } + +DROP TABLE IF EXISTS test_unique_ignore_mode_version; +CREATE TABLE test_unique_ignore_mode_version (event_time DateTime, id UInt64, s String, m1 UInt32, m2 UInt64) ENGINE = CnchMergeTree(event_time) PARTITION BY toDate(event_time) ORDER BY (s, id) PRIMARY KEY s UNIQUE KEY id; + +SET enable_staging_area_for_write = 0, dedup_key_mode = 'ignore'; +INSERT INTO test_unique_ignore_mode_version (*, _delete_flag_) VALUES ('2020-10-29 23:40:00', 10001, '10001A', 1, 100, 1), ('2020-10-29 23:50:00', 10001, '10001AA', 11, 1100, 0), ('2020-10-29 23:55:00', 10001, '10001AA', 111, 11100, 0); +INSERT INTO test_unique_ignore_mode_version (*, _delete_flag_) VALUES ('2020-10-29 23:40:00', 10002, '10002B', 2, 200, 1), ('2020-10-29 23:40:00', 10002, '10002BB', 22, 2200, 0); +INSERT INTO test_unique_ignore_mode_version (*, _delete_flag_) VALUES ('2020-10-29 23:40:00', 10002, '10002BBB', 222, 22200, 1); +INSERT INTO test_unique_ignore_mode_version (*) VALUES ('2020-10-29 23:40:00', 10003, '10003C', 3, 300); +select 'test6', * from test_unique_ignore_mode_version order by id, event_time; + +DROP TABLE IF EXISTS test_unique_ignore_mode; +DROP TABLE IF EXISTS test_unique_ignore_mode_version; From ff62397b7ddd173723d068553180018a6ebe3a0e Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:20:19 +0000 Subject: [PATCH 126/292] Merge 'cherry-pick-63156edf' into 'cnch-2.2' feat(clickhousech@m-17367449): [cp-cnch-2.2] set the ByteHouse logo to light cyan. See merge request: !23507 --- programs/client/Client.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 07cfc6c187e..78861d3ba3c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -2471,7 +2471,10 @@ class Client : public Poco::Util::Application static void showClientVersion() { - std::cout << R"( + #define RESET_ "\033[0m" + #define LIGHT_CYAN_ "\033[96m" + + std::cout << LIGHT_CYAN_ << R"( ______ _ _ _ | ___ \ | | | | | | | |_/ /_ _| |_ ___| |_| | ___ _ _ ___ ___ @@ -2480,7 +2483,7 @@ class Client : public Poco::Util::Application \____/ \__, |\__\___\_| |_/\___/ \__,_|___/\___| __/ | |___/ - )" << std::endl; + )" << RESET_ << std::endl; std::cout << VERSION_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } From 24a7486fbfed87cefb1c6d70555966d1b65fe5ae Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:34:53 +0000 Subject: [PATCH 127/292] Merge 'cherry-pick-61f43b27' into 'cnch-2.2' feat(clickhousech@m-3010810018):[TO CNCH2.2] Support regex match while infile from local, HDFS and S3 See merge request: !23472 # Conflicts: # .codebase/pipelines/ci.yaml # docker/CI/docker-compose-s3.yml # docker/CI/s3/docker-compose-patch.yml # src/Interpreters/InterpreterInsertQuery.cpp --- src/Common/FilePathMatcher.cpp | 68 +++++++ src/Common/FilePathMatcher.h | 36 ++++ src/Common/HDFSFilePathMatcher.cpp | 27 +++ src/Common/HDFSFilePathMatcher.h | 23 +++ src/Common/LocalFilePathMatcher.cpp | 17 ++ src/Common/LocalFilePathMatcher.h | 16 ++ src/Common/S3FilePathMatcher.cpp | 64 ++++++ src/Common/S3FilePathMatcher.h | 28 +++ src/IO/S3Common.cpp | 58 +++++- src/IO/S3Common.h | 2 + src/Interpreters/InterpreterInsertQuery.cpp | 183 ++++++++++++------ src/Storages/HDFS/HDFSCommon.cpp | 1 + .../02460_regex_infile_hdfs.reference | 2 + .../02460_regex_infile_hdfs.sh | 23 +++ .../00001_regex_infile_s3.reference | 2 + .../8_cnch_S3_only/00001_regex_infile_s3.sql | 15 ++ tests/queries/skip_list.json | 3 +- 17 files changed, 505 insertions(+), 63 deletions(-) create mode 100644 src/Common/FilePathMatcher.cpp create mode 100644 src/Common/FilePathMatcher.h create mode 100644 src/Common/HDFSFilePathMatcher.cpp create mode 100644 src/Common/HDFSFilePathMatcher.h create mode 100644 src/Common/LocalFilePathMatcher.cpp create mode 100644 src/Common/LocalFilePathMatcher.h create mode 100644 src/Common/S3FilePathMatcher.cpp create mode 100644 src/Common/S3FilePathMatcher.h create mode 100644 tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.reference create mode 100755 tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.sh create mode 100644 tests/queries/8_cnch_S3_only/00001_regex_infile_s3.reference create mode 100755 tests/queries/8_cnch_S3_only/00001_regex_infile_s3.sql diff --git a/src/Common/FilePathMatcher.cpp b/src/Common/FilePathMatcher.cpp new file mode 100644 index 00000000000..2600b24fc84 --- /dev/null +++ b/src/Common/FilePathMatcher.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/** + * @brief Recursive directory listing with matched paths as a result. + */ +Strings FilePathMatcher::regexMatchFiles(const String & path_for_ls, const String & for_match) +{ + const size_t first_glob = for_match.find_first_of("*?{"); + + const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' + String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' + + const size_t next_slash = suffix_with_globs.find('/', 1); + re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash))); + + Strings result; + FileInfos file_infos = getFileInfos(prefix_without_globs); + for (const FileInfo & file_info : file_infos) + { + const size_t last_slash = file_info.file_path.rfind('/'); + const String file_name = file_info.file_path.substr(last_slash); + const bool looking_for_directory = next_slash != std::string::npos; + /// Condition with type of current file_info means what kind of path is it in current iteration of ls + if (!file_info.is_directory && !looking_for_directory) + { + if (re2::RE2::FullMatch(file_name, matcher)) + { + result.push_back(getSchemeAndPrefix() + file_info.file_path); + } + } + else if (file_info.is_directory && looking_for_directory) + { + if (re2::RE2::FullMatch(file_name, matcher)) + { + Strings result_part + = regexMatchFiles(std::filesystem::path(file_info.file_path) / "", suffix_with_globs.substr(next_slash)); + /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + } + + return result; +} + +String FilePathMatcher::removeSchemeAndPrefix(const String & full_path) +{ + String match_path = full_path; + // remove scheme from path + Poco::URI uri(full_path); + // If there is a '?', substring after '?' will be recognized as a query + if (!uri.getQuery().empty()) + match_path = uri.getPathAndQuery(); + else + match_path = uri.getPath(); + + return match_path; +} +} diff --git a/src/Common/FilePathMatcher.h b/src/Common/FilePathMatcher.h new file mode 100644 index 00000000000..72f2097e1d5 --- /dev/null +++ b/src/Common/FilePathMatcher.h @@ -0,0 +1,36 @@ +#pragma once + +#include + +namespace DB +{ + +struct FileInfo +{ + String file_path; + bool is_directory; + + FileInfo(const String & file_path_, bool is_directory_) : file_path(file_path_), is_directory(is_directory_) { } +}; + +using FileInfos = std::vector; + +class FilePathMatcher +{ +public: + virtual ~FilePathMatcher() = default; + + Strings regexMatchFiles(const String & path_for_ls, const String & for_match); + + // For regex match, we remove scheme and prefix(S3 bucket) from full path. + virtual String removeSchemeAndPrefix(const String & full_path); + +protected: + virtual FileInfos getFileInfos(const String & prefix_path) = 0; + + // For regex match, we remove scheme and prefix(S3 bucket) from full path. + // But these prefix are needed when infile from some file system, so we will add it back. + virtual String getSchemeAndPrefix() { return ""; } +}; + +} diff --git a/src/Common/HDFSFilePathMatcher.cpp b/src/Common/HDFSFilePathMatcher.cpp new file mode 100644 index 00000000000..de37b049f34 --- /dev/null +++ b/src/Common/HDFSFilePathMatcher.cpp @@ -0,0 +1,27 @@ +#include +#include + +namespace DB +{ + +HDFSFilePathMatcher::HDFSFilePathMatcher(String & path, const ContextPtr & context_ptr) +{ + Poco::URI uri(path); + HDFSConnectionParams hdfs_params = context_ptr->getHdfsConnectionParams(); + HDFSBuilderPtr builder = hdfs_params.createBuilder(uri); + hdfs_fs = createHDFSFS(builder.get()); +} + +FileInfos HDFSFilePathMatcher::getFileInfos(const String & prefix_path) +{ + FileInfos file_infos; + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(hdfs_fs.get(), prefix_path.data(), &ls.length); + for (int i = 0; i < ls.length; i++) + { + file_infos.emplace_back(ls.file_info[i].mName, ls.file_info[i].mKind == kObjectKindDirectory); + } + return file_infos; +} + +} diff --git a/src/Common/HDFSFilePathMatcher.h b/src/Common/HDFSFilePathMatcher.h new file mode 100644 index 00000000000..308e85ad965 --- /dev/null +++ b/src/Common/HDFSFilePathMatcher.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class HDFSFilePathMatcher : public FilePathMatcher +{ +public: + HDFSFilePathMatcher(String & path, const ContextPtr & context_ptr); + + ~HDFSFilePathMatcher() override = default; + + FileInfos getFileInfos(const String & prefix_path) override; + +private: + HDFSFSPtr hdfs_fs; +}; + +} diff --git a/src/Common/LocalFilePathMatcher.cpp b/src/Common/LocalFilePathMatcher.cpp new file mode 100644 index 00000000000..c8c4b5b4779 --- /dev/null +++ b/src/Common/LocalFilePathMatcher.cpp @@ -0,0 +1,17 @@ +#include +#include + +namespace DB +{ + +FileInfos LocalFilePathMatcher::getFileInfos(const String & prefix_path) +{ + FileInfos file_infos; + for (const auto & entry : std::filesystem::directory_iterator(prefix_path)) + { + file_infos.emplace_back(entry.path(), entry.is_directory()); + } + return file_infos; +} + +} diff --git a/src/Common/LocalFilePathMatcher.h b/src/Common/LocalFilePathMatcher.h new file mode 100644 index 00000000000..200e88bd0f6 --- /dev/null +++ b/src/Common/LocalFilePathMatcher.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ + +class LocalFilePathMatcher : public FilePathMatcher +{ +public: + ~LocalFilePathMatcher() override = default; + + FileInfos getFileInfos(const String & prefix_path) override; +}; + +} diff --git a/src/Common/S3FilePathMatcher.cpp b/src/Common/S3FilePathMatcher.cpp new file mode 100644 index 00000000000..e1e77808f6d --- /dev/null +++ b/src/Common/S3FilePathMatcher.cpp @@ -0,0 +1,64 @@ +#include +#include + +namespace DB +{ + +S3FilePathMatcher::S3FilePathMatcher(const String & path, const ContextPtr & context_ptr) +{ + const auto & settings = context_ptr->getSettingsRef(); + S3::URI s3_uri(path); + String endpoint = !s3_uri.endpoint.empty() ? s3_uri.endpoint : settings.s3_endpoint.toString(); + S3::S3Config s3_cfg( + endpoint, + settings.s3_region.toString(), + s3_uri.bucket, + settings.s3_ak_id.toString(), + settings.s3_ak_secret.toString(), + "", + "", + settings.s3_use_virtual_hosted_style); + const std::shared_ptr client = s3_cfg.create(); + s3_util = std::make_unique(client, s3_uri.bucket, false); +} + + +FileInfos S3FilePathMatcher::getFileInfos(const String & prefix_path) +{ + FileInfos file_infos; + + // erase '/' at first to list objects in the bucket + String prefix_without_slash = prefix_path; + size_t pos = prefix_without_slash.find_first_not_of('/'); + if (pos != std::string::npos) + prefix_without_slash.erase(0, pos); + else + prefix_without_slash.clear(); + + S3::S3Util::S3ListResult s3_list_result = s3_util->listObjectsWithDelimiter(prefix_without_slash, "/", false); + + if (s3_list_result.object_names.empty()) + return file_infos; + + int ls_length = s3_list_result.object_names.size(); + for (int i = 0; i < ls_length; i++) + { + // add '/' at first to keep the same with other file system + String file_path = std::filesystem::path("/") / s3_list_result.object_names[i]; + file_infos.emplace_back(file_path, s3_list_result.is_common_prefix[i]); + } + + return file_infos; +} + +String S3FilePathMatcher::getSchemeAndPrefix() +{ + return S3_SCHEME + s3_util->getBucket(); +} + +String S3FilePathMatcher::removeSchemeAndPrefix(const String & full_path) +{ + // remove scheme and bucket from path, add '/' at first to keep the same with other file system + return std::filesystem::path("/") / S3::URI(full_path).key; +} +} diff --git a/src/Common/S3FilePathMatcher.h b/src/Common/S3FilePathMatcher.h new file mode 100644 index 00000000000..a48e3719407 --- /dev/null +++ b/src/Common/S3FilePathMatcher.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +const static String S3_SCHEME = "s3://"; + +class S3FilePathMatcher : public FilePathMatcher +{ +public: + S3FilePathMatcher(const String & path, const ContextPtr & context_ptr); + + ~S3FilePathMatcher() override = default; + + FileInfos getFileInfos(const String & prefix_path) override; + + String getSchemeAndPrefix() override; + + String removeSchemeAndPrefix(const String & full_path) override; + +private: + std::unique_ptr s3_util; +}; + +} diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 0e75e5091c8..e8a23bd5a51 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -355,7 +355,10 @@ namespace S3 validateBucket(bucket, uri); if (uri.getPath().length() <= 1) throw Exception("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); - key = uri.getPath().substr(1); + if (!uri.getQuery().empty()) + key = uri.getPathAndQuery().substr(1); + else + key = uri.getPath().substr(1); is_virtual_hosted_style = false; return; } @@ -693,6 +696,59 @@ namespace S3 } } + S3Util::S3ListResult S3Util::listObjectsWithDelimiter(const String & prefix, String delimiter, bool include_delimiter) const + { + ProfileEvents::increment(ProfileEvents::S3ListObjects); + Aws::S3::Model::ListObjectsV2Request request; + request.SetBucket(bucket); + request.SetPrefix(prefix); + request.SetDelimiter(delimiter); + + S3Util::S3ListResult result; + + while (result.has_more) + { + if (result.token) + request.SetContinuationToken(result.token.value()); + + Aws::S3::Model::ListObjectsV2Outcome outcome = client->ListObjectsV2(request); + + if (outcome.IsSuccess()) + { + const auto & list_result = outcome.GetResult(); + result.has_more = outcome.GetResult().GetIsTruncated(); + result.token = outcome.GetResult().GetNextContinuationToken(); + + size_t reserver_size = result.object_names.size() + list_result.GetContents().size() + list_result.GetCommonPrefixes().size(); + result.object_names.reserve(reserver_size); + result.object_sizes.reserve(reserver_size); + result.is_common_prefix.reserve(reserver_size); + for (const auto & content : list_result.GetContents()) + { + result.object_names.push_back(content.GetKey()); + result.object_sizes.push_back(content.GetSize()); + result.is_common_prefix.push_back(false); + } + for (const auto & common_prefix : list_result.GetCommonPrefixes()) + { + String prefix_path = common_prefix.GetPrefix(); + if (!include_delimiter) + prefix_path.erase(prefix_path.find_last_of(delimiter), delimiter.size()); + + result.object_names.push_back(prefix_path); + result.object_sizes.push_back(0); + result.is_common_prefix.push_back(true); + } + return result; + } + else + { + throw S3Exception(outcome.GetError(), fmt::format("Could not list objects in bucket {} with prefix {}", bucket, prefix)); + } + } + return result; + } + S3Util::S3ListResult S3Util::listObjectsWithPrefix(const String & prefix, const std::optional & token, int limit) const { ProfileEvents::increment(ProfileEvents::S3ListObjects); diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index de34aa26855..50ba91e8cc8 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -226,7 +226,9 @@ class S3Util std::optional token; Strings object_names; std::vector object_sizes; + std::vector is_common_prefix; }; + S3ListResult listObjectsWithDelimiter(const String & prefix, String delimiter = "/", bool include_delimiter = false) const; S3ListResult listObjectsWithPrefix(const String & prefix, const std::optional & token, int limit = 1000) const; // Write object diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 020441f3955..0d4a67eb420 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -19,17 +19,18 @@ * All Bytedance's Modifications are Copyright (2023) Bytedance Ltd. and/or its affiliates. */ +#include #include #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -61,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +71,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include "Interpreters/Context_fwd.h" #include @@ -644,6 +651,72 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons } } +void parseFuzzyName(const ContextPtr & context_ptr, std::vector & file_path_list, const String & source_uri, const String & scheme) +{ + // Assume no query and fragment in uri, todo, add sanity check + String fuzzy_file_name; + String uri_prefix = source_uri.substr(0, source_uri.find_last_of('/')); + if (uri_prefix.length() == source_uri.length()) + { + fuzzy_file_name = source_uri; + uri_prefix.clear(); + } + else + { + uri_prefix += "/"; + fuzzy_file_name = source_uri.substr(uri_prefix.length()); + } + + auto max_files = context_ptr->getSettingsRef().fuzzy_max_files; + std::vector parent_list = parseDescription(fuzzy_file_name, 0, fuzzy_file_name.length(), ',', max_files); + for (const auto & fuzzy_name : parent_list) + { + std::vector child_list = parseDescription(fuzzy_name, 0, fuzzy_name.length(), '|', max_files); + for (const auto & star_name : child_list) + { + String full_path = uri_prefix + star_name; + if (star_name.find_first_of("*?{") == std::string::npos) + { + file_path_list.emplace_back(full_path); + continue; + } + + std::shared_ptr matcher; + if (scheme.empty() || scheme == "file") + { + matcher = std::make_shared(); + } +#if USE_HDFS + else if (DB::isHdfsOrCfsScheme(scheme)) + { + matcher = std::make_shared(full_path, context_ptr); + } +#endif +#if USE_AWS_S3 + else if (isS3URIScheme(scheme)) + { + matcher = std::make_shared(full_path, context_ptr); + } +#endif + else + { + file_path_list.emplace_back(full_path); + } + + if (matcher) + { + // match files + String match_path = matcher->removeSchemeAndPrefix(full_path); + Strings match_file_list = matcher->regexMatchFiles("/", match_path); + file_path_list.insert(file_path_list.end(), match_file_list.begin(), match_file_list.end()); + } + + if (file_path_list.size() > max_files) + throw Exception(uri_prefix + fuzzy_file_name + " generates too many files, please modify the value of fuzzy_max_files.", ErrorCodes::BAD_ARGUMENTS); + } + } +} + BlockInputStreamPtr InterpreterInsertQuery::buildInputStreamFromSource( const ContextPtr context_ptr, const ColumnsDescription & columns, @@ -654,83 +727,71 @@ BlockInputStreamPtr InterpreterInsertQuery::buildInputStreamFromSource( bool is_enable_squash, const String & compression_method) { - // Assume no query and fragment in uri, todo, add sanity check - String fuzzyFileNames; - String uriPrefix = source_uri.substr(0, source_uri.find_last_of('/')); - if (uriPrefix.length() == source_uri.length()) - { - fuzzyFileNames = source_uri; - uriPrefix.clear(); - } - else - { - uriPrefix += "/"; - fuzzyFileNames = source_uri.substr(uriPrefix.length()); - } - - Poco::URI uri(uriPrefix); + Poco::URI uri(source_uri); const String & scheme = uri.getScheme(); BlockInputStreams inputs; { - auto max_files = context_ptr->getSettingsRef().fuzzy_max_files; - std::vector fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , max_files); - std::vector > fileNames; - for (auto fuzzyName : fuzzyNameList) - fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', max_files)); + std::vector file_path_list; + parseFuzzyName(context_ptr, file_path_list, source_uri, scheme); - for (auto & vecNames : fileNames) + for (auto & file_path : file_path_list) { - for (auto & name : vecNames) - { - std::unique_ptr read_buf = nullptr; + std::unique_ptr read_buf = nullptr; - if (scheme.empty() || scheme == "file") - { - read_buf = std::make_unique(Poco::URI(uriPrefix + name).getPath()); - } + if (scheme.empty() || scheme == "file") + { + read_buf = std::make_unique(Poco::URI(file_path).getPath()); + } #if USE_HDFS - else if (DB::isHdfsOrCfsScheme(scheme)) - { - ReadSettings read_settings; - read_settings.remote_throttler = context_ptr->getProcessList().getHDFSDownloadThrottler(); - read_buf = std::make_unique(uriPrefix + name, context_ptr->getHdfsConnectionParams(), read_settings); - } + else if (DB::isHdfsOrCfsScheme(scheme)) + { + ReadSettings read_settings; + read_settings.remote_throttler = context_ptr->getProcessList().getHDFSDownloadThrottler(); + read_buf = std::make_unique(file_path, context_ptr->getHdfsConnectionParams(), read_settings); + } #endif #if USE_AWS_S3 - else if (isS3URIScheme(scheme)) - { - S3::URI s3_uri(Poco::URI(uriPrefix + name)); - String endpoint = s3_uri.endpoint.empty() ? context_ptr->getSettingsRef().s3_endpoint.toString() : s3_uri.endpoint; - String bucket = s3_uri.bucket; - String key = s3_uri.key; - S3::S3Config s3_cfg(endpoint, context_ptr->getSettingsRef().s3_region.toString(), bucket, - context_ptr->getSettingsRef().s3_ak_id.toString(), context_ptr->getSettingsRef().s3_ak_secret.toString(), - "", "", context_ptr->getSettingsRef().s3_use_virtual_hosted_style); - const std::shared_ptr client = s3_cfg.create(); - read_buf = std::make_unique(client, bucket, key, context_ptr->getReadSettings()); - } + else if (isS3URIScheme(scheme)) + { + S3::URI s3_uri(file_path); + String endpoint = s3_uri.endpoint.empty() ? context_ptr->getSettingsRef().s3_endpoint.toString() : s3_uri.endpoint; + String bucket = s3_uri.bucket; + String key = s3_uri.key; + S3::S3Config s3_cfg( + endpoint, + context_ptr->getSettingsRef().s3_region.toString(), + bucket, + context_ptr->getSettingsRef().s3_ak_id.toString(), + context_ptr->getSettingsRef().s3_ak_secret.toString(), + "", + "", + context_ptr->getSettingsRef().s3_use_virtual_hosted_style); + const std::shared_ptr client = s3_cfg.create(); + read_buf = std::make_unique(client, bucket, key, context_ptr->getReadSettings()); + } #endif - else - { - throw Exception("URI scheme " + scheme + " is not supported with insert statement yet", ErrorCodes::NOT_IMPLEMENTED); - } + else + { + throw Exception("URI scheme " + scheme + " is not supported with insert statement yet", ErrorCodes::NOT_IMPLEMENTED); + } - read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buf), chooseCompressionMethod(name, compression_method), settings.snappy_format_blocked); + read_buf = wrapReadBufferWithCompressionMethod( + std::move(read_buf), chooseCompressionMethod(file_path, compression_method), settings.snappy_format_blocked); - inputs.emplace_back( - std::make_shared>( - context_ptr->getInputStreamByFormatNameAndBuffer(format, *read_buf, - sample, // sample_block - settings.max_insert_block_size, - columns), + inputs.emplace_back(std::make_shared>( + context_ptr->getInputStreamByFormatNameAndBuffer( + format, + *read_buf, + sample, // sample_block + settings.max_insert_block_size, + columns), std::move(read_buf))); - } } } - if (inputs.size() == 0) - throw Exception("Inputs interpreter error", ErrorCodes::LOGICAL_ERROR); + if (inputs.empty()) + throw Exception("Input files is empty.", ErrorCodes::LOGICAL_ERROR); auto stream = inputs[0]; if (inputs.size() > 1) diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 0f0514af7a1..76ebce489a9 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -320,6 +320,7 @@ std::optional hdfsParamsFromUrl(const Poco::URI & uri) user_info = "clickhouse"; return HDFSConnectionParams(HDFSConnectionParams::HDFSConnectionType::CONN_HDFS, user_info, {{uri.getHost(), uri.getPort()}}); } + } #endif diff --git a/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.reference b/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.reference new file mode 100644 index 00000000000..da7f84740bd --- /dev/null +++ b/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.reference @@ -0,0 +1,2 @@ +2 +4 diff --git a/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.sh b/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.sh new file mode 100755 index 00000000000..70420bcc193 --- /dev/null +++ b/tests/queries/4_cnch_stateless/02460_regex_infile_hdfs.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_regex_infile;" + +${CLICKHOUSE_CLIENT} --query "SELECT '1' INTO OUTFILE 'hdfs://${HDFS_PATH_ROOT}/outfile_02460/clickhouse_outfile_1.csv' FORMAT CSV COMPRESSION 'none' SETTINGS outfile_in_server_with_tcp = 1, overwrite_current_file=1;" + +${CLICKHOUSE_CLIENT} --query "SELECT '2' INTO OUTFILE 'hdfs://${HDFS_PATH_ROOT}/outfile_02460/clickhouse_outfile_2.csv' FORMAT CSV COMPRESSION 'none' SETTINGS outfile_in_server_with_tcp = 1, overwrite_current_file=1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_regex_infile;" + +${CLICKHOUSE_CLIENT} --query "Create TABLE test_regex_infile (a UInt8) ENGINE = CnchMergeTree() ORDER BY a;" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test_regex_infile FORMAT CSV INFILE 'hdfs://${HDFS_PATH_ROOT}/outfile_02460/*';" + +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM test_regex_infile;" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test_regex_infile FORMAT CSV INFILE 'hdfs://${HDFS_PATH_ROOT}/outfile_02460/clickhouse_outfile_?.csv';" + +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM test_regex_infile;" diff --git a/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.reference b/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.reference new file mode 100644 index 00000000000..da7f84740bd --- /dev/null +++ b/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.reference @@ -0,0 +1,2 @@ +2 +4 diff --git a/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.sql b/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.sql new file mode 100755 index 00000000000..c83f3cfc71b --- /dev/null +++ b/tests/queries/8_cnch_S3_only/00001_regex_infile_s3.sql @@ -0,0 +1,15 @@ +SELECT '1' INTO OUTFILE 's3://cnch/test_00001/clickhouse_outfile_1.csv' FORMAT CSV SETTINGS s3_ak_id = 'minio', s3_ak_secret = 'minio123', s3_region = 'cn-beijing', s3_endpoint = 'http://minio:9000', outfile_in_server_with_tcp = 1; + +SELECT '2' INTO OUTFILE 's3://cnch/test_00001/clickhouse_outfile_2.csv' FORMAT CSV SETTINGS s3_ak_id = 'minio', s3_ak_secret = 'minio123', s3_region = 'cn-beijing', s3_endpoint = 'http://minio:9000', outfile_in_server_with_tcp = 1; + +DROP TABLE IF EXISTS test_regex_infile; + +Create TABLE test_regex_infile (a UInt8) ENGINE = CnchMergeTree() ORDER BY a; + +INSERT INTO test_regex_infile FORMAT CSV INFILE 's3://cnch/test_00001/*' SETTINGS s3_ak_id = 'minio', s3_ak_secret = 'minio123', s3_region = 'cn-beijing', s3_endpoint = 'http://minio:9000'; + +SELECT count() FROM test_regex_infile; + +INSERT INTO test_regex_infile FORMAT CSV INFILE 's3://cnch/test_00001/clickhouse_outfile_?.csv' SETTINGS s3_ak_id = 'minio', s3_ak_secret = 'minio123', s3_region = 'cn-beijing', s3_endpoint = 'http://minio:9000'; + +SELECT count() FROM test_regex_infile; diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 328bb962beb..1a4b7e93071 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -637,7 +637,8 @@ "11007_atomic_attach_parts", "11007_attach_commit_time_bug", "00828_test_bitmap_format_parquet", - "02681_partial_part_columns" + "02681_partial_part_columns", + "02460_regex_infile_hdfs" ], /// Please do not add new tests to this list. It is only for backward compatibility. From 3b8aed3a95b6f24366026cd9b565efe8f4686f34 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:35:31 +0000 Subject: [PATCH 128/292] Merge 'fix_hybrid_again_2.2' into 'cnch-2.2' fix(clickhousech@m-4691886549): [TO CHCH-2.2] fix hybrid allocation bug when query multiple tables[2] See merge request: !23534 --- src/CloudServices/CnchServerResource.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CloudServices/CnchServerResource.cpp b/src/CloudServices/CnchServerResource.cpp index cbb6ebea5df..986177af9b7 100644 --- a/src/CloudServices/CnchServerResource.cpp +++ b/src/CloudServices/CnchServerResource.cpp @@ -62,6 +62,7 @@ AssignedResource::AssignedResource(AssignedResource && resource) replicated = resource.replicated; server_parts = std::move(resource.server_parts); + virtual_parts = std::move(resource.virtual_parts); hive_parts = std::move(resource.hive_parts); file_parts = std::move(resource.file_parts); part_names = resource.part_names; // don't call move here From adc6631c73edd03a9a67d810ebe1a9a5c93f22f9 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:35:49 +0000 Subject: [PATCH 129/292] Merge 'youzhiyuan_add_timeout_for_send_query_2_2' into 'cnch-2.2' fix(clickhousech@m-4770368288): add timeout for sendCreateQuery See merge request: !23495 --- src/CloudServices/CnchWorkerClient.cpp | 1 + src/CloudServices/CnchWorkerServiceImpl.cpp | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp index 9bc151d4111..042697eb8f2 100644 --- a/src/CloudServices/CnchWorkerClient.cpp +++ b/src/CloudServices/CnchWorkerClient.cpp @@ -221,6 +221,7 @@ void CnchWorkerClient::sendCreateQueries( for (const auto & cnch_table_create_query : cnch_table_create_queries) *request.mutable_cnch_table_create_queries()->Add() = cnch_table_create_query; + cntl.set_timeout_ms(settings.send_plan_segment_timeout_ms.totalMilliseconds()); stub->sendCreateQuery(&cntl, &request, &response, nullptr); assertController(cntl); diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index 57ce0be051a..47cad35f870 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -143,7 +143,10 @@ CnchWorkerServiceImpl::~CnchWorkerServiceImpl() RPCHelpers::handleException(response->mutable_exception()); \ } \ }; \ - THREADPOOL_SCHEDULE(_func); + Stopwatch watch; \ + THREADPOOL_SCHEDULE(_func); \ + UInt64 milliseconds = watch.elapsedMilliseconds(); \ + if (milliseconds > 100) LOG_DEBUG(log, "CnchWorkerService rpc request threadpool schedule cost : {} ", milliseconds); void CnchWorkerServiceImpl::executeSimpleQuery( From e328ab3a8a0cc721e520f6407a29e826238358ca Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:36:07 +0000 Subject: [PATCH 130/292] Merge 'cherry-pick-mr-23226-1' into 'cnch-2.2' fix(clickhousech@m-4691925668): Merge 'bsp-schedule-send-progress' into 'cnch-2.2' See merge request: !23467 --- src/Interpreters/Context.cpp | 10 +++++ src/Interpreters/Context.h | 4 ++ .../DistributedStages/MPPQueryCoordinator.cpp | 19 +++++++--- .../DistributedStages/ProgressManager.cpp | 37 +++++++++++++++++++ .../DistributedStages/ProgressManager.h | 21 +++++++++++ src/Interpreters/SegmentScheduler.h | 5 +++ src/Server/TCPHandler.cpp | 15 +++++--- .../4_cnch_stateless/13004_query_cost.sh | 8 ++-- 8 files changed, 103 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index c633c2754a9..0bd1daf6003 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2707,6 +2707,16 @@ ProgressCallback Context::getProgressCallback() const return progress_callback; } +void Context::setSendTCPProgress(std::function callback) +{ + send_tcp_progress = callback; +} + +std::function Context::getSendTCPProgress() const +{ + return send_tcp_progress; +} + void Context::setProcessListEntry(std::shared_ptr process_list_entry_) { process_list_entry = process_list_entry_; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b3108460ac7..46120fcf15a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -452,6 +452,7 @@ class ContextData using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. + std::function send_tcp_progress{nullptr}; using FileProgressCallback = std::function; FileProgressCallback file_progress_callback; /// Callback for tracking progress of file loading. @@ -1104,6 +1105,9 @@ class Context : public ContextData, public std::enable_shared_from_this void setProgressCallback(ProgressCallback callback); /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. ProgressCallback getProgressCallback() const; + void setSendTCPProgress(std::function callback); + /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. + std::function getSendTCPProgress() const; void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; } FileProgressCallback getFileProgressCallback() const { return file_progress_callback; } diff --git a/src/Interpreters/DistributedStages/MPPQueryCoordinator.cpp b/src/Interpreters/DistributedStages/MPPQueryCoordinator.cpp index 66c50ff06b0..0646c99ec59 100644 --- a/src/Interpreters/DistributedStages/MPPQueryCoordinator.cpp +++ b/src/Interpreters/DistributedStages/MPPQueryCoordinator.cpp @@ -6,17 +6,18 @@ #include #include #include -#include -#include -#include #include #include #include +#include +#include +#include +#include #include #include -#include -#include #include +#include +#include "Interpreters/DistributedStages/ProgressManager.h" #include @@ -208,7 +209,13 @@ BlockIO MPPQueryCoordinator::execute() process_list_elem_ptr->get().updateProgressIn(p); }); - scheduler_status = query_context->getSegmentScheduler()->insertPlanSegments(query_id, plan_segment_tree.get(), query_context); + { + /// only send progress before executing final plan segment, + /// working thread will join when this tcp progress sender is destroyed + auto sender = std::make_unique( + query_context->getSendTCPProgress(), query_context->getSettingsRef().interactive_delay / 1000); + scheduler_status = query_context->getSegmentScheduler()->insertPlanSegments(query_id, plan_segment_tree.get(), query_context); + } if (scheduler_status && !scheduler_status->exception.empty()) { diff --git a/src/Interpreters/DistributedStages/ProgressManager.cpp b/src/Interpreters/DistributedStages/ProgressManager.cpp index e5f8ad336d5..f0ebb1eebbb 100644 --- a/src/Interpreters/DistributedStages/ProgressManager.cpp +++ b/src/Interpreters/DistributedStages/ProgressManager.cpp @@ -1,8 +1,45 @@ +#include +#include +#include #include +#include +#include #include +#include namespace DB { + +TCPProgressSender::TCPProgressSender(std::function send_tcp_progress_, size_t interval_) + : logger(&Poco::Logger::get("ProgressManager")), send_tcp_progress(send_tcp_progress_), interval(interval_) +{ + if (send_tcp_progress && interval) + { + LOG_TRACE(logger, "TCPProgressSender started"); + thread = std::make_unique([&]() { + while (true) + { + std::unique_lock lock(mu); + var.wait_for(lock, std::chrono::milliseconds(this->interval), [&]() { return this->shutdown.load(); }); + if (shutdown) + { + LOG_TRACE(logger, "TCPProgressSender shutdown"); + break; + } + this->send_tcp_progress(); + } + }); + } +} + +TCPProgressSender::~TCPProgressSender() +{ + shutdown = true; + var.notify_all(); + if (thread && thread->joinable()) + thread->join(); +} + void ProgressManager::onProgress(UInt32 segment_id, UInt32 parallel_index, const Progress & progress_) { std::unique_lock lock(segment_progress_mutex); diff --git a/src/Interpreters/DistributedStages/ProgressManager.h b/src/Interpreters/DistributedStages/ProgressManager.h index e07bd18c29f..9f97bcdcae8 100644 --- a/src/Interpreters/DistributedStages/ProgressManager.h +++ b/src/Interpreters/DistributedStages/ProgressManager.h @@ -1,12 +1,33 @@ #pragma once +#include +#include #include #include #include #include +#include namespace DB { + +// send progress repeatedly +class TCPProgressSender +{ +public: + TCPProgressSender(std::function send_tcp_progress_, size_t interval_); + ~TCPProgressSender(); + +private: + Poco::Logger * logger; + std::atomic_bool shutdown = {false}; + std::mutex mu; + std::condition_variable var; + std::function send_tcp_progress; + std::unique_ptr thread; + size_t interval; +}; + class ProgressManager { public: diff --git a/src/Interpreters/SegmentScheduler.h b/src/Interpreters/SegmentScheduler.h index fb75643a4c2..d54bf80219e 100644 --- a/src/Interpreters/SegmentScheduler.h +++ b/src/Interpreters/SegmentScheduler.h @@ -62,6 +62,11 @@ using SegmentStatusMap = std::unordered_map>; enum class OverflowMode; +struct SegmentSchedulerOptions +{ + std::function send_progress_callback; +}; + class SegmentScheduler { public: diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fc331b8f307..0c762aa1b70 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -396,6 +396,12 @@ void TCPHandler::runImpl() interpretSettings(ast, query_context); } + if (query_context->getSettingsRef().bsp_mode) + { + /// for bsp mode, progress needs to be sent during scheduling. + query_context->setSendTCPProgress([&]() { this->sendProgress(); }); + } + auto * insert_query = ast->as(); if (!(insert_query && insert_query->data) && query_context->isAsyncMode()) { @@ -2110,12 +2116,9 @@ void TCPHandler::updateProgress(const Progress & value) void TCPHandler::sendProgress() { auto increment = state.progress.fetchAndResetPiecewiseAtomically(); - if (!increment.empty()) - { - writeVarUInt(Protocol::Server::Progress, *out); - increment.write(*out, client_tcp_protocol_version); - out->next(); - } + writeVarUInt(Protocol::Server::Progress, *out); + increment.write(*out, client_tcp_protocol_version); + out->next(); } void TCPHandler::sendLogs() diff --git a/tests/queries/4_cnch_stateless/13004_query_cost.sh b/tests/queries/4_cnch_stateless/13004_query_cost.sh index 5543840f6ed..2206d5dee5a 100755 --- a/tests/queries/4_cnch_stateless/13004_query_cost.sh +++ b/tests/queries/4_cnch_stateless/13004_query_cost.sh @@ -17,10 +17,10 @@ INSERT INTO test_query_cost_48021.t1_48021 (a, b) VALUES (1, 1); INSERT INTO test_query_cost_48021.t2_48021 (a, c) VALUES (1, 1); INSERT INTO test_query_cost_48021.t3_48021 (a, d) VALUES (1, 1); INSERT INTO test_query_cost_48021.t4_48021 (a, e) VALUES (1, 1); -SELECT * FROM test_query_cost_48021.t1_48021 SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000 FORMAT JSON; -SELECT sum(b) FROM test_query_cost_48021.t1_48021 GROUP BY a SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000 FORMAT JSON; -SELECT * FROM test_query_cost_48021.t1_48021,test_query_cost_48021.t2_48021,test_query_cost_48021.t3_48021,test_query_cost_48021.t4_48021 WHERE test_query_cost_48021.t1_48021.a=test_query_cost_48021.t2_48021.a and test_query_cost_48021.t2_48021.a=test_query_cost_48021.t3_48021.a and test_query_cost_48021.t3_48021.a=test_query_cost_48021.t4_48021.a SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000 FORMAT JSON; -SELECT a FROM test_query_cost_48021.t1_48021 UNION ALL SELECT 1 SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000 FORMAT JSON; +SELECT * FROM test_query_cost_48021.t1_48021 SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000,interactive_delay=10000000 FORMAT JSON; +SELECT sum(b) FROM test_query_cost_48021.t1_48021 GROUP BY a SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000,interactive_delay=10000000 FORMAT JSON; +SELECT * FROM test_query_cost_48021.t1_48021,test_query_cost_48021.t2_48021,test_query_cost_48021.t3_48021,test_query_cost_48021.t4_48021 WHERE test_query_cost_48021.t1_48021.a=test_query_cost_48021.t2_48021.a and test_query_cost_48021.t2_48021.a=test_query_cost_48021.t3_48021.a and test_query_cost_48021.t3_48021.a=test_query_cost_48021.t4_48021.a SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000,interactive_delay=10000000 FORMAT JSON; +SELECT a FROM test_query_cost_48021.t1_48021 UNION ALL SELECT 1 SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000,interactive_delay=10000000 FORMAT JSON; " | grep -E "rows_read|bytes_read"; ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=100000" -d 'SELECT * FROM test_query_cost_48021.t1_48021 SETTINGS enable_optimizer=1,enable_wait_for_post_processing=1,wait_for_post_processing_timeout_ms=10000 FORMAT JSON' 2>&1 | grep 'X-ClickHouse-Summary' From c563b1e992399901c09bffff2f6b8389ebdee728 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:36:25 +0000 Subject: [PATCH 131/292] Merge 'cherry-pick-commit-dc5a8bbe' into 'cnch-2.2' fix(clickhousech@m-4770366732): DM scan TR in a round-robin way. See merge request: !23545 --- src/Catalog/Catalog.cpp | 27 ++++++++++++++++++++++---- src/Catalog/Catalog.h | 2 +- src/Catalog/MetastoreProxy.cpp | 6 ++++-- src/Catalog/MetastoreProxy.h | 3 ++- src/DaemonManager/DaemonJobTxnGC.cpp | 6 ++++-- src/DaemonManager/DaemonJobTxnGC.h | 1 + src/Transaction/TransactionCleaner.cpp | 7 ++++--- 7 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index 5e1df13c0a1..b824b1562d6 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -4184,7 +4184,7 @@ namespace Catalog return getTransactionRecords(std::vector(txn_ids.begin(), txn_ids.end()), 100000); } - std::vector Catalog::getTransactionRecordsForGC(size_t max_result_number) + std::vector Catalog::getTransactionRecordsForGC(String & start_key, size_t max_result_number) { std::vector res; /// if exception occurs during get txn record, just return the partial result; @@ -4193,9 +4193,20 @@ namespace Catalog [&] { try { - auto it = meta_proxy->getAllTransactionRecord(name_space, max_result_number); + auto it = meta_proxy->getAllTransactionRecord(name_space, start_key, max_result_number); - while (it->next()) + if (!it->next()) + { + if (start_key.empty()) + return; + + start_key.clear(); + auto it = meta_proxy->getAllTransactionRecord(name_space, start_key, max_result_number); + if (!it->next()) + return; + } + + do { auto record = TransactionRecord::deserialize(it->value()); if (record.isSecondary()) @@ -4222,7 +4233,15 @@ namespace Catalog } res.push_back(std::move(record)); } - } + + } while (it->next()); + + // Save key so we can resume iteration in the next call. + if (!res.empty()) + start_key = meta_proxy->transactionRecordKey(name_space, res.back().txnID()); + + if (res.size() < max_result_number || max_result_number == 0) + start_key.clear(); } catch (...) { diff --git a/src/Catalog/Catalog.h b/src/Catalog/Catalog.h index 844ef3540f7..44e237ca360 100644 --- a/src/Catalog/Catalog.h +++ b/src/Catalog/Catalog.h @@ -525,7 +525,7 @@ class Catalog std::vector getTransactionRecords(const std::vector & txn_ids, size_t batch_size = 0); /// clean zombie records. If the total transaction record number is too large, it may be impossible to get all of them. We can /// pass a max_result_number to only get part of them and clean zombie records repeatedlly - std::vector getTransactionRecordsForGC(size_t max_result_number); + std::vector getTransactionRecordsForGC(String & start_key, size_t max_result_number); TransactionRecords getTransactionRecords(const ServerDataPartsVector & parts, const DeleteBitmapMetaPtrVector & bitmaps); /// Clear intents written by zombie transaction. diff --git a/src/Catalog/MetastoreProxy.cpp b/src/Catalog/MetastoreProxy.cpp index e6bcb6d69e8..1121e69b49b 100644 --- a/src/Catalog/MetastoreProxy.cpp +++ b/src/Catalog/MetastoreProxy.cpp @@ -1274,9 +1274,11 @@ std::vector> MetastoreProxy::getTransactionRecords(con return metastore_ptr->multiGet(txn_keys); } -IMetaStore::IteratorPtr MetastoreProxy::getAllTransactionRecord(const String & name_space, const size_t & max_result_number) +IMetaStore::IteratorPtr +MetastoreProxy::getAllTransactionRecord(const String & name_space, const String & start_key, const size_t & max_result_number) { - return metastore_ptr->getByPrefix(escapeString(name_space) + "_" + TRANSACTION_RECORD_PREFIX, max_result_number); + return metastore_ptr->getByPrefix( + escapeString(name_space) + "_" + TRANSACTION_RECORD_PREFIX, max_result_number, DEFAULT_SCAN_BATCH_COUNT, start_key); } std::pair MetastoreProxy::updateTransactionRecord(const String & name_space, const UInt64 & txn_id, const String & txn_data_old, const String & txn_data_new) diff --git a/src/Catalog/MetastoreProxy.h b/src/Catalog/MetastoreProxy.h index f872a78f163..c4e3269c2b5 100644 --- a/src/Catalog/MetastoreProxy.h +++ b/src/Catalog/MetastoreProxy.h @@ -1007,7 +1007,8 @@ class MetastoreProxy void removeTransactionRecord(const String & name_space, const UInt64 & txn_id); void removeTransactionRecords(const String & name_space, const std::vector & txn_ids); String getTransactionRecord(const String & name_space, const UInt64 & txn_id); - IMetaStore::IteratorPtr getAllTransactionRecord(const String & name_space, const size_t & max_result_number = 0); + IMetaStore::IteratorPtr + getAllTransactionRecord(const String & name_space, const String & start_key = "", const size_t & max_result_number = 0); std::pair updateTransactionRecord(const String & name_space, const UInt64 & txn_id, const String & txn_data_old, const String & txn_data_new); std::vector> getTransactionRecords(const String & name_space, const std::vector & txn_ids); diff --git a/src/DaemonManager/DaemonJobTxnGC.cpp b/src/DaemonManager/DaemonJobTxnGC.cpp index a841a6c490d..77c8bc3b832 100644 --- a/src/DaemonManager/DaemonJobTxnGC.cpp +++ b/src/DaemonManager/DaemonJobTxnGC.cpp @@ -39,8 +39,10 @@ bool DaemonJobTxnGC::executeImpl() { const Context & context = *getContext(); - auto txn_records - = context.getCnchCatalog()->getTransactionRecordsForGC(context.getConfigRef().getInt("cnch_txn_clean_batch_size", 200000)); + String last_start_key = start_key; + auto txn_records = context.getCnchCatalog()->getTransactionRecordsForGC( + start_key, context.getConfigRef().getInt("cnch_txn_clean_batch_size", 200000)); + LOG_DEBUG(log, "start_key changed from: {} to {}", last_start_key, start_key); if (!txn_records.empty()) { cleanTxnRecords(txn_records); diff --git a/src/DaemonManager/DaemonJobTxnGC.h b/src/DaemonManager/DaemonJobTxnGC.h index f82f168acbf..b670b91681e 100644 --- a/src/DaemonManager/DaemonJobTxnGC.h +++ b/src/DaemonManager/DaemonJobTxnGC.h @@ -69,6 +69,7 @@ class DaemonJobTxnGC : public DaemonJob using TransactionRecords = std::vector; private: + String start_key; void cleanTxnRecords(const TransactionRecords & records); void cleanUndoBuffers(const TransactionRecords & records); void cleanTxnRecord(const TransactionRecord & record, TxnTimestamp current_time, std::vector & cleanTxnIds, TxnGCLog & summary); diff --git a/src/Transaction/TransactionCleaner.cpp b/src/Transaction/TransactionCleaner.cpp index b3372a78066..4539792752f 100644 --- a/src/Transaction/TransactionCleaner.cpp +++ b/src/Transaction/TransactionCleaner.cpp @@ -125,10 +125,11 @@ void TransactionCleaner::cleanCommittedTxn(const TransactionRecord & txn_record) auto rpc_address = host_port.getRPCAddress(); if (!isLocalServer(rpc_address, std::to_string(global_context.getRPCPort()))) { - // TODO: need to fix for multi-table txn - LOG_DEBUG(log, "Forward clean task for txn {} to server {}", txn_record.txnID().toUInt64(), rpc_address); - global_context.getCnchServerClientPool().get(rpc_address)->cleanTransaction(txn_record); + + LOG_DEBUG(log, "(dry-run) Forward clean task for txn {} to server {}", txn_record.txnID().toUInt64(), rpc_address); return; + // TODO: need to fix for multi-table txn + // global_context.getCnchServerClientPool().get(rpc_address)->cleanTransaction(txn_record); } UndoResourceNames names = integrateResources(resources); From aed11a7352843038aff05c8a298408a66f51c67b Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:36:46 +0000 Subject: [PATCH 132/292] Merge 'cherry-pick-mr-23331' into 'cnch-2.2' fix(clickhousech@m-4724042591):fix sendResource parse object column schema forward compatibility See merge request: !23569 --- src/CloudServices/CnchWorkerServiceImpl.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index 47cad35f870..d9ff8ef10ee 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -765,9 +765,12 @@ void CnchWorkerServiceImpl::sendResources( for (int i = 0; i < request->create_queries_size(); i++) { auto create_query = request->create_queries().at(i); - auto object_columns = request->dynamic_object_column_schema().at(i); - worker_resource->executeCreateQuery(context_for_create, create_query, false, ColumnsDescription::parse(object_columns)); + ColumnsDescription object_columns; + if (i < request->dynamic_object_column_schema_size()) + object_columns = ColumnsDescription::parse(request->dynamic_object_column_schema().at(i)); + + worker_resource->executeCreateQuery(context_for_create, create_query, false, object_columns); } for (int i = 0; i < request->cacheable_create_queries_size(); i++) { From 7cb0a5ad1c9df224836b55a53112d03c6aae6180 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:37:03 +0000 Subject: [PATCH 133/292] Merge branch 'cherry-pick-7827f2d4' into 'cnch-2.2' fix(clickhousech@m-4717110049): fix hualloc cache size set properly See merge request dp/ClickHouse!23570 --- src/Common/HuAllocator.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Common/HuAllocator.h b/src/Common/HuAllocator.h index 5a4ea1e84d3..a4a9b768677 100644 --- a/src/Common/HuAllocator.h +++ b/src/Common/HuAllocator.h @@ -122,13 +122,14 @@ class HuAllocator static void InitHuAlloc(size_t cached) { static std::once_flag hualloc_init_flag; + static size_t use_cache = cached / 2; + if (use_cache <= 0) + use_cache = 1024 * (1ull << 20); /// If not set properly use 1G as default + std::call_once(hualloc_init_flag, [&]() { hu_check_init_w(); pthread_t tid; - size_t use_cache = cached / 2; - if (use_cache <= 0) - use_cache = 1024 * (1ull << 20); /// If not set properly use 1G as default pthread_create(&tid, nullptr, ReclaimThread, &use_cache); }); } From 8ef5b9c83bc48d13031b6d08e8d30214b6d70906 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:37:21 +0000 Subject: [PATCH 134/292] Merge 'fix_add_buffer_cte_cnch2.2' into 'cnch-2.2' fix(optimizer@m-4770066004): add UnifyNullableType for AddBufferForDeadlockCTE cnch-2.2 See merge request: !23576 --- .../Rewriter/AddBufferForDeadlockCTE.cpp | 2 ++ .../40052_deadlock_cte.reference | 1 + .../40052_deadlock_cte.sql | 28 +++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp index ad80a8adeb2..961d03a2fbf 100644 --- a/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp +++ b/src/Optimizer/Rewriter/AddBufferForDeadlockCTE.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -393,6 +394,7 @@ void AddBufferForDeadlockCTE::rewrite(QueryPlan & plan, ContextMutablePtr contex std::make_shared(), std::make_shared(false, true), std::make_shared(Rules::inlineProjectionRules(), "InlineProjection"), + std::make_shared(), std::make_shared(Rules::normalizeExpressionRules(), "NormalizeExpression"), std::make_shared(Rules::swapPredicateRules(), "SwapPredicate"), std::make_shared(Rules::simplifyExpressionRules(), "SimplifyExpression"), diff --git a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference index 48f65e07e47..151f92bab2e 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference +++ b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.reference @@ -335,3 +335,4 @@ CTEDef [1] │ Expressions: expr#rand(1):=rand(1) └─ Values Est. 1 rows note: CTE(Common Table Expression) is applied for 3 times. +1 \N diff --git a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql index 94bb8d6afe2..6ed0dca591d 100644 --- a/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql +++ b/tests/queries/4_cnch_stateless_no_tenant/40052_deadlock_cte.sql @@ -18,3 +18,31 @@ explain with c1 as (select rand(1) x), c2 as (select rand(2) x), c3 as (select r explain with c1 as (select rand(1) x), c2 as (select rand(2) x) select j1.x from (select t1.x from c1 t1 join c1 t2 on t1.x = t2.x) j1 join (select t3.x from c1 t3 join c2 t4 on t3.x = t4.x) j2 on j1.x = j2.x; explain with c1 as (select rand(1) x), c2 as (select t1.x as x from c1 t1 join c1 t2 on t1.x = t2.x) select x from c2 t3; explain with c1 as (select rand(1) x), c2 as (select t1.x as x from c1 t1 union all (select rand(2) x)) select t2.x from c2 t2 join c1 t3 on t2.x = t3.x; + +-- { echoOff } +set join_use_nulls=1; +WITH + c1 AS + ( + SELECT 1 AS x + ), + c2 AS + ( + SELECT 1 AS x + ) +SELECT * +FROM +( + SELECT + t5.x AS a, + t6.x AS b + FROM + ( + select t1.x from c1 t1 join c2 t2 on t1.x = t2.x + ) AS t5 + LEFT JOIN + ( + SELECT 2 AS x + ) AS t6 ON t5.x = t6.x +) +WHERE isNull(b); From d2f4e62035e2eebfc1597d52a5bf64b02842cde9 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:37:51 +0000 Subject: [PATCH 135/292] Merge branch 'fix-bitmapColumnCardinality-miss-partial-agg-cnch-2.2' into 'cnch-2.2' fix(clickhousech@m-4812137604): fix bitmapColumnCardinality miss partial agg cnch 2.2 See merge request dp/ClickHouse!23605 --- .../PushPartialStepThroughExchangeRules.cpp | 26 ++++++++++--------- ...fix_bitmap_func_miss_partial_agg.reference | 19 ++++++++++++++ ...40106_fix_bitmap_func_miss_partial_agg.sql | 8 ++++++ 3 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.reference create mode 100644 tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.sql diff --git a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp index b48ea35d715..a43e344b730 100644 --- a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp +++ b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp @@ -37,12 +37,20 @@ namespace DB { NameSet PushPartialAggThroughExchange::BLOCK_AGGS{ - "pathCount", - "attributionAnalysis", - "attributionCorrelationFuse", + "pathcount", + "attributionanalysis", + "attributioncorrelationfuse", "attribution", - "attributionCorrelation", -}; + "attributioncorrelation", + "bitmapjoinandcard", + "bitmapjoinandcard2", + "bitmapjoin", + "bitmapcount", + "bitmapextract", + "bitmapmulticount", + "bitmapmulticountwithdate", + "bitmapmaxlevel", + "bitmapcolumndiff"}; static std::pair canPushPartialWithHint(const AggregatingStep * step) { @@ -201,13 +209,7 @@ TransformResult PushPartialAggThroughExchange::transformImpl(PlanNodePtr node, c for (const auto & agg : step->getAggregates()) { - if (BLOCK_AGGS.count(agg.function->getName())) - { - return {}; - } - - // fixme: remove bitmap* if correctness problem fixed - if (Poco::toLower(agg.function->getName()).starts_with("bitmap")) + if (BLOCK_AGGS.count(Poco::toLower(agg.function->getName()))) { return {}; } diff --git a/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.reference b/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.reference new file mode 100644 index 00000000000..12309d0c324 --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.reference @@ -0,0 +1,19 @@ +Projection +│ Expressions: BitMapColumnCardinality(bm):=`expr#BitMapColumnCardinality(bm)` +└─ MergingAggregated + └─ Gather Exchange + └─ Aggregating + │ Group by: {} + │ Aggregates: expr#BitMapColumnCardinality(bm):=AggNull(bitmapColumnCardinality)(bm) + └─ TableScan default.t40106 + Outputs: [bm] +Projection +│ Expressions: BitMapCount(\'1\')(toInt64(1), bm):=`expr#BitMapCount(\'1\')(toInt64(1), bm)` +└─ Aggregating + │ Group by: {} + │ Aggregates: expr#BitMapCount(\'1\')(toInt64(1), bm):=AggNull(bitmapCount)(expr#toInt64(1),bm) + └─ Gather Exchange + └─ Projection + │ Expressions: [bm], expr#toInt64(1):=cast(1, \'Int64\') + └─ TableScan default.t40106 + Outputs: [bm] diff --git a/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.sql b/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.sql new file mode 100644 index 00000000000..4beb674077a --- /dev/null +++ b/tests/queries/4_cnch_stateless_no_tenant/40106_fix_bitmap_func_miss_partial_agg.sql @@ -0,0 +1,8 @@ +drop table if exists t40106; + +create table t40106(bm BitMap64) engine = CnchMergeTree() order by tuple(); + +explain stats=0 select bitmapColumnCardinality(bm) from t40106 settings enable_optimizer=1; +explain stats=0 select bitmapCount('1')(toInt64(1), bm) from t40106 settings enable_optimizer=1; + +drop table if exists t40106; From 7818b8d92943e9f1df81e110b1e8dca9211f7d27 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:38:08 +0000 Subject: [PATCH 136/292] Merge 'aeolus_table_access_2p2' into 'cnch-2.2' feat(clickhousech@m-4770461610): set accessible_table_names as alias to access_table_names See merge request: !23585 --- src/Common/SettingsChanges.cpp | 1 + src/Core/Settings.h | 1 + .../ClusterProxy/executeQuery.cpp | 3 + src/Interpreters/Context.cpp | 8 +- src/Interpreters/Context.h | 2 +- src/Interpreters/executeQuery.cpp | 89 ++++++++++--------- 6 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp index e8c585130de..492f3f040a3 100644 --- a/src/Common/SettingsChanges.cpp +++ b/src/Common/SettingsChanges.cpp @@ -162,6 +162,7 @@ void SettingsChanges::fillFromProto(const Protos::SettingsChanges & proto) } std::unordered_set SettingsChanges::WHITELIST_SETTINGS = { + "access_table_names", "accessible_table_names", "active_role", "add_http_cors_header", diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a7bd124ba64..1ce6272912d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1242,6 +1242,7 @@ enum PreloadLevelSettings : UInt64 M(Bool, rewrite_unknown_left_join_identifier, true, "Whether to rewrite unknown left join identifier, this is a deprecated feature but Aeolus SQL depends on it", 0) \ M(Bool, allow_mysql_having_name_resolution, false, "Whether to use MySQL special name resolution rules for HAVING clauses ", 0) \ M(String, access_table_names, "", "Session level restricted tables query can access", 0) \ + M(String, accessible_table_names, "", "Session level restricted tables query can access", 0) \ \ /** settings in cnch **/ \ M(Seconds, drop_range_memory_lock_timeout, 5, "The time that spend on wait for memory lock when doing drop range", 0) \ diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3c9791da74b..44aa4114618 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -126,6 +126,9 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c if (!settings.access_table_names.toString().empty()) new_settings.setString("access_table_names", ""); + if (!settings.accessible_table_names.toString().empty()) + new_settings.setString("accessible_table_names", ""); + auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); return new_context; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0bd1daf6003..d4c032a8fe9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1826,9 +1826,13 @@ std::shared_ptr Context::getAccess() const void Context::checkAeolusTableAccess(const String & database_name, const String & table_name) const { - String table_names = this->getSettingsRef().access_table_names; + String table_names = getSettingsRef().access_table_names; if (table_names.empty()) - return; + { + table_names = getSettingsRef().accessible_table_names; + if (table_names.empty()) + return; + } std::vector tables; boost::split(tables, table_names, boost::is_any_of(" ,")); /// avoid check temporary table. diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 46120fcf15a..1d1cffc01eb 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1195,7 +1195,7 @@ class Context : public ContextData, public std::enable_shared_from_this UInt32 getZooKeeperSessionUptime() const; - void addQueryPlanInfo(String & query_plan_) + void addQueryPlanInfo(String & query_plan_) { this->query_plan = query_plan_; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 35957ed1e62..272562865b1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1450,11 +1450,11 @@ static std::tuple executeQueryImpl( } } - if (settings.log_query_plan) + if (settings.log_query_plan) { elem.query_plan = context->getQueryContext()->getQueryPlan(); } - + interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); if (settings.log_query_settings) @@ -2444,48 +2444,55 @@ void adjustAccessTablesIfNeeded(ContextMutablePtr & context) { // In case access_table_names is set, this query will be readonly and // access right will be propagated to remote tables + bool is_access_table_names = true; String access_table_names = context->getSettingsRef().access_table_names; - if (!access_table_names.empty()) + if (access_table_names.empty()) { - auto add_access_table_name = [&](const String & db, const String & tbl) - { - access_table_names.append(",").append(db).append(".").append(tbl); - context->setSetting("access_table_names", access_table_names); - }; - std::vector tables; - boost::split(tables, access_table_names, boost::is_any_of(" ,")); + access_table_names = context->getSettingsRef().accessible_table_names; + is_access_table_names = false; + } - for (auto & table : tables) - { - char * begin = table.data(); - char * end = begin + table.size(); - Tokens tokens(begin, end); - IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth); - auto pos = token_iterator; - Expected expected; - String database_name, table_name; - if (!parseDatabaseAndTableName(pos, expected, database_name, table_name)) - continue; - - StorageID table_id{database_name, table_name}; - /// tryGetTable below requires resolved table id - StorageID resolved = context->tryResolveStorageID(table_id); - if (!resolved) - continue; - - // continue if current table is temporary table. - if (resolved.database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; - - /// access_table_names need to have resolved name, otherwise tryGetTable below will fail - if (table_id.database_name.empty() && !resolved.database_name.empty()) - add_access_table_name(resolved.getDatabaseName(), resolved.getTableName()); - - // auto storage_ptr = DatabaseCatalog::instance().tryGetTable(resolved, context); - // auto * distributed = dynamic_cast(storage_ptr.get()); - // if (distributed && !distributed->getRemoteTableName().empty()) - // add_access_table_name(distributed->getRemoteDatabaseName(), distributed->getRemoteTableName()); - } + if (access_table_names.empty()) + return; + + auto add_access_table_name = [&](const String & db, const String & tbl) + { + access_table_names.append(",").append(db).append(".").append(tbl); + context->setSetting(is_access_table_names ? "access_table_names" : "accessible_table_names", access_table_names); + }; + std::vector tables; + boost::split(tables, access_table_names, boost::is_any_of(" ,")); + + for (auto & table : tables) + { + char * begin = table.data(); + char * end = begin + table.size(); + Tokens tokens(begin, end); + IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth); + auto pos = token_iterator; + Expected expected; + String database_name, table_name; + if (!parseDatabaseAndTableName(pos, expected, database_name, table_name)) + continue; + + StorageID table_id{database_name, table_name}; + /// tryGetTable below requires resolved table id + StorageID resolved = context->tryResolveStorageID(table_id); + if (!resolved) + continue; + + // continue if current table is temporary table. + if (resolved.database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; + + /// access_table_names need to have resolved name, otherwise tryGetTable below will fail + if (table_id.database_name.empty() && !resolved.database_name.empty()) + add_access_table_name(resolved.getDatabaseName(), resolved.getTableName()); + + // auto storage_ptr = DatabaseCatalog::instance().tryGetTable(resolved, context); + // auto * distributed = dynamic_cast(storage_ptr.get()); + // if (distributed && !distributed->getRemoteTableName().empty()) + // add_access_table_name(distributed->getRemoteDatabaseName(), distributed->getRemoteTableName()); } } From e0875e76bca03cece8e0bdf01554e428619c9b8e Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:38:24 +0000 Subject: [PATCH 137/292] Merge branch 'cherry-pick-1ba8c804-2' into 'cnch-2.2' fix(clickhousech@m-4789434239): set correct relative path for UndoResourceType::Part See merge request dp/ClickHouse!23598 --- src/Transaction/TransactionCommon.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Transaction/TransactionCommon.cpp b/src/Transaction/TransactionCommon.cpp index e85d7020d7e..76ce515d28d 100644 --- a/src/Transaction/TransactionCommon.cpp +++ b/src/Transaction/TransactionCommon.cpp @@ -142,7 +142,9 @@ void UndoResource::clean(Catalog::Catalog & , [[maybe_unused]]MergeTreeMetaBase || type() == UndoResourceType::S3DetachDeleteBitmap || type() == UndoResourceType::S3AttachDeleteBitmap) { const auto & resource_relative_path = type() == UndoResourceType::S3AttachDeleteBitmap ? placeholders(4) : placeholders(1); - String rel_path = storage->getRelativeDataPath(IStorage::StorageLocation::MAIN) + resource_relative_path; + /// For HDFS, rel_path is {table_uuid} / {part_id}. + /// For S3, as storage->getRelativeDataPath returns "", rel_path is just {part_id} + String rel_path = fs::path(storage->getRelativeDataPath(IStorage::StorageLocation::MAIN)) / resource_relative_path; if (disk->exists(rel_path)) { if ((type() == UndoResourceType::Part || type() == UndoResourceType::StagedPart) @@ -226,7 +228,7 @@ void UndoResource::commit(const Context & context) const DataModelDeleteBitmapPtr model_ptr = std::make_shared(); model_ptr->ParseFromString(former_bitmap_meta); const auto & relative_path = DeleteBitmapMeta::deleteBitmapFileRelativePath(*model_ptr); - String rel_path = storage->getRelativeDataPath(IStorage::StorageLocation::MAIN) + relative_path; + String rel_path = fs::path(storage->getRelativeDataPath(IStorage::StorageLocation::MAIN)) / relative_path; if (disk->exists(rel_path)) { LOG_DEBUG(log, "Will remove Disk {} undo path {}", disk->getPath(), rel_path); From 3e617d334f589e1b9894be32780bee92e33df6d8 Mon Sep 17 00:00:00 2001 From: fredwang Date: Mon, 5 Aug 2024 08:43:53 +0000 Subject: [PATCH 138/292] Merge 'support_timezone_2.2' into 'cnch-2.2' feat(clickhousech@m-4692103199): [TO CNCH-2.2] Support session level timezone See merge request: !23614 # Conflicts: # .gitmodules # contrib/bytehouse-gis # programs/client/Client.cpp # src/Core/Settings.h # src/Processors/Exchange/DataTrans/Brpc/BrpcRemoteBroadcastReceiver.cpp --- .github/workflows/ci.yml | 1 + base/common/chrono_io.h | 2 +- base/daemon/BaseDaemon.cpp | 2 +- programs/client/Client.cpp | 62 ++++++++++++++++--- programs/copier/ClusterCopierApp.cpp | 2 +- programs/keeper/Keeper.cpp | 4 +- programs/obfuscator/Obfuscator.cpp | 2 +- programs/server/Server.cpp | 4 +- .../AggregateFunctionGenArrayMonth.cpp | 2 +- src/Analyzers/RewriteFusionMerge.cpp | 4 +- src/Catalog/Catalog.cpp | 3 +- src/Client/Connection.cpp | 5 ++ src/Client/Connection.h | 2 + src/Client/HedgedConnections.cpp | 1 + src/Client/MultiplexedConnections.cpp | 2 + src/CloudServices/CnchWorkerClient.cpp | 2 + src/CloudServices/CnchWorkerResource.cpp | 2 +- src/CloudServices/CnchWorkerServiceImpl.cpp | 4 ++ src/Common/DateLUT.cpp | 47 +++++++++++++- src/Common/DateLUT.h | 26 ++++++-- src/Common/LocalDate.h | 29 +++------ src/Common/LocalDateTime.h | 9 +-- src/Common/ThreadStatus.h | 5 ++ src/Common/Trace/DirectSystemLogExporter.cpp | 2 +- src/Core/MySQL/MySQLReplication.cpp | 18 ++++-- src/Core/Protocol.h | 6 +- src/Core/ProtocolDefines.h | 4 +- src/Core/Settings.h | 53 ++++------------ src/Core/SettingsFields.cpp | 19 ++++++ src/Core/SettingsFields.h | 31 ++++++++++ src/DataStreams/ITTLAlgorithm.cpp | 5 +- src/DataStreams/MongoDBBlockInputStream.cpp | 2 +- src/DataTypes/DataTypeDate32.h | 5 +- src/DataTypes/DataTypeDateTime.h | 4 +- .../Serializations/SerializationDate.cpp | 8 +-- .../Serializations/SerializationDate.h | 6 ++ .../Serializations/SerializationDate32.cpp | 8 +-- .../Serializations/SerializationDate32.h | 6 ++ .../ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- .../PostgreSQLDictionarySource.cpp | 2 +- src/Dictionaries/XDBCDictionarySource.cpp | 2 +- src/Functions/FunctionAddTime.cpp | 6 +- src/Functions/FunctionCustomWeekToSomething.h | 2 +- .../FunctionDateOrDateTimeToSomething.h | 2 +- src/Functions/FunctionsConversion.h | 46 ++++++++------ src/Functions/IFunctionCustomWeek.h | 2 +- src/Functions/IFunctionDateOrDateTime.h | 2 +- src/Functions/currentTime.cpp | 2 +- src/Functions/dateDiff.cpp | 6 +- src/Functions/dateName.cpp | 2 +- .../extractTimeZoneFromFunctionArguments.cpp | 4 +- src/Functions/formatDateTime.cpp | 2 +- src/Functions/fromDaysAndToDays.cpp | 4 +- src/Functions/makeDate.cpp | 4 +- src/Functions/parseDateTime.cpp | 2 +- src/Functions/serverConstants.cpp | 22 ++++++- src/Functions/timestamp.cpp | 2 +- src/Functions/timezone.cpp | 2 +- src/Functions/today.cpp | 2 +- src/Functions/yesterday.cpp | 2 +- src/IO/ReadHelpers.h | 48 ++++++++------ src/IO/WriteHelpers.h | 27 +++++--- .../examples/parse_date_time_best_effort.cpp | 2 +- src/Interpreters/AsynchronousMetricLog.cpp | 2 +- src/Interpreters/Context.cpp | 5 ++ src/Interpreters/Context.h | 4 ++ src/Interpreters/CrashLog.cpp | 2 +- src/Interpreters/DatabaseCatalog.cpp | 2 +- src/Interpreters/KafkaLog.cpp | 2 +- src/Interpreters/MaterializedMySQLLog.cpp | 2 +- src/Interpreters/MetricLog.cpp | 2 +- src/Interpreters/MutationLog.cpp | 2 +- src/Interpreters/OpenTelemetrySpanLog.cpp | 2 +- src/Interpreters/PartLog.cpp | 2 +- src/Interpreters/PartMergeLog.cpp | 2 +- src/Interpreters/ProcessorsProfileLog.cpp | 2 +- src/Interpreters/QueryExchangeLog.cpp | 2 +- src/Interpreters/QueryLog.cpp | 2 +- src/Interpreters/QueryThreadLog.cpp | 2 +- src/Interpreters/RemoteReadLog.cpp | 2 +- src/Interpreters/ServerPartLog.cpp | 2 +- src/Interpreters/TextLog.cpp | 2 +- src/Interpreters/TraceLog.cpp | 2 +- src/Interpreters/UniqueTableLog.cpp | 2 +- src/Interpreters/ZooKeeperLog.cpp | 2 +- src/Interpreters/executeQuery.cpp | 6 +- src/MergeTreeCommon/MergeTreeMetaBase.cpp | 4 +- .../Brpc/BrpcRemoteBroadcastReceiver.cpp | 10 +-- .../DataTrans/Local/LocalBroadcastChannel.cpp | 2 +- .../Exchange/DataTrans/MultiPathReceiver.cpp | 3 +- .../GraphiteRollupSortedAlgorithm.cpp | 2 +- src/Protos/cnch_worker_rpc.proto | 1 + src/Server/TCPHandler.cpp | 16 ++++- src/Server/TCPHandler.h | 1 + src/Statistics/AutoStatisticsHelper.cpp | 6 +- src/Statistics/AutoStatisticsManager.cpp | 4 +- .../CnchTablePartitionMetricsHelper.cpp | 5 +- .../MaterializedView/ViewRefreshTaskLog.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +- .../MergeTree/MergeTreeDataWriter.cpp | 6 +- .../MergeTree/MergeTreeMutationEntry.cpp | 10 ++- src/Storages/MergeTree/MergeTreePartInfo.cpp | 4 +- src/Storages/MergeTree/MergeTreePartition.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 10 ++- .../ReplicatedMergeTreeMutationEntry.cpp | 10 ++- src/Storages/StorageCnchMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 13 ++-- src/mysqlxx/Value.h | 6 +- .../02552_client_format_settings.reference | 19 ++++++ .../02552_client_format_settings.sql | 5 ++ .../02737_timezone_setting.reference | 9 +++ .../02737_timezone_setting.sql | 26 ++++++++ .../02738_timezone_insert_query.reference | 11 ++++ .../02738_timezone_insert_query.sql | 31 ++++++++++ ...39_datetime_column_with_timezone.reference | 11 ++++ .../02739_datetime_column_with_timezone.sql | 29 +++++++++ .../convert-month-partitioned-parts/main.cpp | 2 +- 118 files changed, 652 insertions(+), 271 deletions(-) create mode 100644 tests/queries/4_cnch_stateless/02552_client_format_settings.reference create mode 100644 tests/queries/4_cnch_stateless/02552_client_format_settings.sql create mode 100644 tests/queries/4_cnch_stateless/02737_timezone_setting.reference create mode 100644 tests/queries/4_cnch_stateless/02737_timezone_setting.sql create mode 100644 tests/queries/4_cnch_stateless/02738_timezone_insert_query.reference create mode 100644 tests/queries/4_cnch_stateless/02738_timezone_insert_query.sql create mode 100644 tests/queries/4_cnch_stateless/02739_datetime_column_with_timezone.reference create mode 100644 tests/queries/4_cnch_stateless/02739_datetime_column_with_timezone.sql diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0501522c019..49afef71a0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,6 +102,7 @@ jobs: cp -r $GITHUB_WORKSPACE/tests/clickhouse-test ./ mkdir queries cp -r $GITHUB_WORKSPACE/tests/queries/4_cnch_stateless queries/ + cp -r $GITHUB_WORKSPACE/tests/queries/8_cnch_S3_only queries/ cp -r $GITHUB_WORKSPACE/tests/queries/shell_config.sh queries/ # We need skip-list to skip some tests. cp $GITHUB_WORKSPACE/tests/queries/skip_list.json queries/ diff --git a/base/common/chrono_io.h b/base/common/chrono_io.h index cb70b94a354..4c8b7ccc6cf 100644 --- a/base/common/chrono_io.h +++ b/base/common/chrono_io.h @@ -39,7 +39,7 @@ std::string to_string(const std::chrono::time_point & tp) // Don't use DateLUT because it shows weird characters for // TimePoint::max(). I wish we could use C++20 format, but it's not // there yet. - // return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp)); + // return DateLUT::serverTimezoneInstance().timeToString(std::chrono::system_clock::to_time_t(tp)); auto in_time_t = std::chrono::system_clock::to_time_t(tp); return to_string(in_time_t); diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 9a345a72cbf..fbe49056370 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -1128,7 +1128,7 @@ void BaseDaemon::shouldSetupWatchdog(char * argv0_) void BaseDaemon::setupWatchdog() { /// Initialize in advance to avoid double initialization in forked processes. - DateLUT::instance(); + DateLUT::serverTimezoneInstance(); std::string original_process_name; if (argv0) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 78861d3ba3c..df1c42f2110 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,7 +19,9 @@ * All Bytedance's Modifications are Copyright (2023) Bytedance Ltd. and/or its affiliates. */ +#include "Common/CurrentThread.h" #include "ConnectionParameters.h" +#include "Core/Protocol.h" #include "QueryFuzzer.h" #include "Storages/HDFS/HDFSCommon.h" #include "Suggest.h" @@ -457,7 +459,7 @@ class Client : public Poco::Util::Application if (current_time % 3 != 0) return false; - auto days = DateLUT::instance().toDayNum(current_time).toUnderType(); + auto days = DateLUT::sessionInstance().toDayNum(current_time).toUnderType(); for (auto d : chineseNewYearIndicators) { /// Let's celebrate until Lantern Festival @@ -552,11 +554,20 @@ class Client : public Poco::Util::Application int mainImpl() { UseSSL use_ssl; + MainThreadStatus::getInstance(); registerFormats(); registerFunctions(); registerAggregateFunctions(); + { + // All that just to set DB::CurrentThread::get().getGlobalContext() + // which is required for client timezone (pushed from server) to work. + auto thread_group = std::make_shared(); + const_cast(thread_group->global_context) = context; + CurrentThread::attachTo(thread_group); + } + /// Batch mode is enabled if one of the following is true: /// - -e (--query) command line option is present. /// The value of the option is used as the text of query (or of multiple queries). @@ -612,7 +623,7 @@ class Client : public Poco::Util::Application connect(); /// Initialize DateLUT here to avoid counting time spent here as query execution time. - const auto local_tz = DateLUT::instance().getTimeZone(); + const auto local_tz = DateLUT::sessionInstance().getTimeZone(); if (is_interactive) { @@ -1686,12 +1697,28 @@ class Client : public Poco::Util::Application context->applySettingsChanges(settings_ast.as()->changes); }; const auto * insert = parsed_query->as(); - if (insert && insert->settings_ast) + if (const auto * select = parsed_query->as(); select && select->settings()) + apply_query_settings(*select->settings()); + else if (const auto * select_with_union = parsed_query->as()) + { + const ASTs & children = select_with_union->list_of_selects->children; + if (!children.empty()) + { + // On the client it is enough to apply settings only for the + // last SELECT, since the only thing that is important to apply + // on the client is format settings. + const auto * last_select = children.back()->as(); + if (last_select && last_select->settings()) + { + apply_query_settings(*last_select->settings()); + } + } + } + else if (const auto * query_with_output = parsed_query->as(); + query_with_output && query_with_output->settings_ast) + apply_query_settings(*query_with_output->settings_ast); + else if (insert && insert->settings_ast) apply_query_settings(*insert->settings_ast); - /// FIXME: try to prettify this cast using `as<>()` - const auto * with_output = dynamic_cast(parsed_query.get()); - if (with_output && with_output->settings_ast) - apply_query_settings(*with_output->settings_ast); if (!connection->checkConnected()) connect(); @@ -2149,6 +2176,10 @@ class Client : public Poco::Util::Application case Protocol::Server::QueryMetrics: return true; + case Protocol::Server::TimezoneUpdate: + onTimezoneUpdate(packet.server_timezone); + return true; + default: throw Exception( ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); @@ -2181,9 +2212,13 @@ class Client : public Poco::Util::Application columns_description = ColumnsDescription::parse(packet.multistring_message[1]); return receiveSampleBlock(out, columns_description); + case Protocol::Server::TimezoneUpdate: + onTimezoneUpdate(packet.server_timezone); + break; + default: throw NetException( - "Unexpected packet from server (expected Data, Exception or Log, got " + "Unexpected packet from server (expected Data, Exception or Log or TimezoneUpdate , got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } @@ -2212,6 +2247,10 @@ class Client : public Poco::Util::Application onLogData(packet.block); break; + case Protocol::Server::TimezoneUpdate: + onTimezoneUpdate(packet.server_timezone); + break; + default: throw NetException( "Unexpected packet from server (expected Exception, EndOfStream or Log, got " @@ -2226,7 +2265,7 @@ class Client : public Poco::Util::Application { auto packet_type = connection->checkPacket(); - while (packet_type && *packet_type == Protocol::Server::Log) + while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::TimezoneUpdate)) { receiveAndProcessPacket(false); packet_type = connection->checkPacket(); @@ -2469,6 +2508,11 @@ class Client : public Poco::Util::Application } } + void onTimezoneUpdate(const String & tz) + { + context->setSetting("session_timezone", tz); + } + static void showClientVersion() { #define RESET_ "\033[0m" diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 69f2fd3383c..3a3b0bf9cb6 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -39,7 +39,7 @@ void ClusterCopierApp::initialize(Poco::Util::Application & self) time_t timestamp = Poco::Timestamp().epochTime(); auto curr_pid = Poco::Process::id(); - process_id = std::to_string(DateLUT::instance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); + process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id; process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id)); fs::create_directories(process_path); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 7ad6fd6ae7b..f50afe46097 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -379,8 +379,8 @@ int Keeper::main(const std::vector & /*args*/) /// Initialize DateLUT early, to not interfere with running time of first query. LOG_DEBUG(log, "Initializing DateLUT."); - DateLUT::instance(); - LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); + DateLUT::serverTimezoneInstance(); + LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone()); /// Don't want to use DNS cache DNSResolver::instance().setDisableCacheFlag(); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 00020fb1cef..a3327438a73 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -461,7 +461,7 @@ class DateTimeModel : public IModel const DateLUTImpl & date_lut; public: - explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::instance()) {} + explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::serverTimezoneInstance()) { } void train(const IColumn &) override {} void finalize() override {} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d07b70032b5..fbc145e771e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -852,8 +852,8 @@ int Server::main(const std::vector & /*args*/) /// Initialize DateLUT early, to not interfere with running time of first query. LOG_DEBUG(log, "Initializing DateLUT."); - DateLUT::instance(); - LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); + DateLUT::serverTimezoneInstance(); + LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone()); /// Storage with temporary data for processing of heavy queries. { diff --git a/src/AggregateFunctions/AggregateFunctionGenArrayMonth.cpp b/src/AggregateFunctions/AggregateFunctionGenArrayMonth.cpp index 6b3783caccf..c459e50d611 100644 --- a/src/AggregateFunctions/AggregateFunctionGenArrayMonth.cpp +++ b/src/AggregateFunctions/AggregateFunctionGenArrayMonth.cpp @@ -35,7 +35,7 @@ AggregateFunctionPtr createAggregateFunctionGenArrayMonth(const std::string & na String date_start = parameters[1].safeGet(); // use local timezone on default - String timezone = DateLUT::instance().getTimeZone(); + String timezone = DateLUT::sessionInstance().getTimeZone(); if (parameters.size() == 3) { timezone = parameters[2].safeGet(); } diff --git a/src/Analyzers/RewriteFusionMerge.cpp b/src/Analyzers/RewriteFusionMerge.cpp index 4021aea448e..8f1b053a1b9 100644 --- a/src/Analyzers/RewriteFusionMerge.cpp +++ b/src/Analyzers/RewriteFusionMerge.cpp @@ -88,12 +88,12 @@ namespace UInt64 field_time = timestamp.safeGet(); if (field_time > mills_test) { - String date = DateLUT::instance().dateToString(field_time / 1000); + String date = DateLUT::serverTimezoneInstance().dateToString(field_time / 1000); return std::make_shared(Field(date)); } else { - String date = DateLUT::instance().dateToString(field_time); + String date = DateLUT::serverTimezoneInstance().dateToString(field_time); return std::make_shared(Field(date)); } } diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp index b824b1562d6..4e8ef0050f1 100644 --- a/src/Catalog/Catalog.cpp +++ b/src/Catalog/Catalog.cpp @@ -1344,7 +1344,6 @@ namespace Catalog throw Exception("Table not found: " + database + "." + name, ErrorCodes::UNKNOWN_TABLE); } - auto cache_manager = context.getPartCacheManager(); bool is_host_server = false; const auto host_server = context.getCnchTopologyMaster()->getTargetServer(table_id->uuid(), getServerVwNameFrom(*table_id), true); @@ -1352,7 +1351,7 @@ namespace Catalog if (!host_server.empty()) is_host_server = isLocalServer(host_server.getRPCAddress(), std::to_string(context.getRPCPort())); - if (is_host_server && cache_manager) + if (is_host_server && cache_manager && !query_context.hasSessionTimeZone()) { auto cached_storage = cache_manager->getStorageFromCache(UUIDHelpers::toUUID(table_id->uuid()), host_server.topology_version); if (cached_storage && cached_storage->commit_time <= ts && cached_storage->getStorageID().database_name == database && cached_storage->getStorageID().table_name == name) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 223f713e273..14abe7f8897 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1040,6 +1040,11 @@ Packet Connection::receivePacket() case Protocol::Server::ReadTaskRequest: return res; + + case Protocol::Server::TimezoneUpdate: + readStringBinary(server_timezone, *in); + res.server_timezone = server_timezone; + return res; default: /// In unknown state, disconnect - to not leave unsynchronised connection. diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 7de4ceac26d..8ad348c71d4 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -96,6 +96,8 @@ struct Packet BlockStreamProfileInfo profile_info; std::vector part_uuids; + std::string server_timezone; + Packet() : type(Protocol::Server::Hello) {} }; diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8455ef3117e..355cd67544f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -262,6 +262,7 @@ Packet HedgedConnections::drain() case Protocol::Server::Totals: case Protocol::Server::Extremes: case Protocol::Server::EndOfStream: + case Protocol::Server::TimezoneUpdate: break; case Protocol::Server::Exception: diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 3eee6eb7f0f..5489e85f4e6 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -305,6 +305,7 @@ Packet MultiplexedConnections::drain() case Protocol::Server::Totals: case Protocol::Server::Extremes: case Protocol::Server::EndOfStream: + case Protocol::Server::TimezoneUpdate: break; case Protocol::Server::ProfileInfo: @@ -383,6 +384,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac case Protocol::Server::Totals: case Protocol::Server::Extremes: case Protocol::Server::Log: + case Protocol::Server::TimezoneUpdate: break; case Protocol::Server::EndOfStream: diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp index 042697eb8f2..174fbbcd414 100644 --- a/src/CloudServices/CnchWorkerClient.cpp +++ b/src/CloudServices/CnchWorkerClient.cpp @@ -415,6 +415,8 @@ brpc::CallId CnchWorkerClient::sendResources( /// so it should be larger than max_execution_time to make sure the session is not to be destroyed in advance. UInt64 recycle_timeout = max_execution_time > 0 ? max_execution_time + 60UL : 3600; request.set_timeout(recycle_timeout); + if (!settings.session_timezone.value.empty()) + request.set_session_timezone(settings.session_timezone.value); bool require_worker_info = false; for (const auto & resource: resources_to_send) diff --git a/src/CloudServices/CnchWorkerResource.cpp b/src/CloudServices/CnchWorkerResource.cpp index bc42995bfc1..a5e8fcacc16 100644 --- a/src/CloudServices/CnchWorkerResource.cpp +++ b/src/CloudServices/CnchWorkerResource.cpp @@ -88,7 +88,7 @@ void CnchWorkerResource::executeCacheableCreateQuery( static auto * log = &Poco::Logger::get("WorkerResource"); std::shared_ptr cached; - if (auto cache = context->tryGetCloudTableDefinitionCache()) + if (auto cache = context->tryGetCloudTableDefinitionCache(); cache && !context->hasSessionTimeZone()) { auto load = [&]() -> std::shared_ptr { diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp index d9ff8ef10ee..d2b6a2832b4 100644 --- a/src/CloudServices/CnchWorkerServiceImpl.cpp +++ b/src/CloudServices/CnchWorkerServiceImpl.cpp @@ -755,6 +755,10 @@ void CnchWorkerServiceImpl::sendResources( auto session = rpc_context->acquireNamedCnchSession(request->txn_id(), request->timeout(), false); auto query_context = session->context; query_context->setTemporaryTransaction(request->txn_id(), request->primary_txn_id()); + if (request->has_session_timezone()) + query_context->setSetting("session_timezone", request->session_timezone()); + + CurrentThread::QueryScope query_scope(query_context); auto worker_resource = query_context->getCnchWorkerResource(); /// store cloud tables in cnch_session_resource. diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index 2ab457099c6..b9d51f85d2f 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -1,5 +1,9 @@ #include +#include +#include +#include + #include #include #include @@ -29,12 +33,12 @@ std::string determineDefaultTimeZone() { namespace fs = std::filesystem; - const char * tzdir_env_var = std::getenv("TZDIR"); + const char * tzdir_env_var = std::getenv("TZDIR"); // NOLINT(concurrency-mt-unsafe) // ok, because it does not run concurrently with other getenv calls fs::path tz_database_path = tzdir_env_var ? tzdir_env_var : "/usr/share/zoneinfo/"; fs::path tz_file_path; std::string error_prefix; - const char * tz_env_var = std::getenv("TZ"); + const char * tz_env_var = std::getenv("TZ"); // NOLINT(concurrency-mt-unsafe) // ok, because it does not run concurrently with other getenv calls /// In recent tzdata packages some files now are symlinks and canonical path resolution /// may give wrong timezone names - store the name as it is, if possible. @@ -138,6 +142,38 @@ std::string determineDefaultTimeZone() } +const DateLUTImpl & DateLUT::sessionInstance() +{ + const auto & date_lut = getInstance(); + + if (DB::CurrentThread::isInitialized()) + { + std::string timezone_from_context; + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + { + timezone_from_context = extractTimezoneFromContext(query_context); + + if (!timezone_from_context.empty()) + return date_lut.getImplementation(timezone_from_context); + } + + /// On the server side, timezone is passed in query_context, + /// but on CH-client side we have no query context, + /// and each time we modify client's global context + const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext(); + if (global_context) + { + timezone_from_context = extractTimezoneFromContext(global_context); + + if (!timezone_from_context.empty()) + return date_lut.getImplementation(timezone_from_context); + } + } + return serverTimezoneInstance(); +} + DateLUT::DateLUT() { /// Initialize the pointer to the default DateLUTImpl. @@ -148,7 +184,7 @@ DateLUT::DateLUT() const DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); auto it = impls.emplace(time_zone, nullptr).first; if (!it->second) @@ -162,3 +198,8 @@ DateLUT & DateLUT::getInstance() static DateLUT ret; return ret; } + +std::string DateLUT::extractTimezoneFromContext(DB::ContextPtr query_context) +{ + return query_context->getSettingsRef().session_timezone.value; +} diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index bef65d8a026..ecb1cd541d1 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -11,13 +12,22 @@ #include #include +namespace DB +{ +class Context; +using ContextPtr = std::shared_ptr; +} + +class DateLUTImpl; + /// This class provides lazy initialization and lookup of singleton DateLUTImpl objects for a given timezone. class DateLUT : private boost::noncopyable { public: - /// Return singleton DateLUTImpl instance for the default time zone. - static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071 + /// The default instance will return singleton DateLUTImpl for the server time zone. + /// It may be set using 'timezone' server setting. + static ALWAYS_INLINE const DateLUTImpl & serverTimezoneInstance() { const auto & date_lut = getInstance(); return *date_lut.default_impl.load(std::memory_order_acquire); @@ -26,12 +36,18 @@ class DateLUT : private boost::noncopyable /// Return singleton DateLUTImpl instance for a given time zone. static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { - const auto & date_lut = getInstance(); if (time_zone.empty()) - return *date_lut.default_impl.load(std::memory_order_acquire); + return sessionInstance(); + const auto & date_lut = getInstance(); return date_lut.getImplementation(time_zone); } + + /// Return DateLUTImpl instance for session timezone. + /// session_timezone is a session-level setting. + /// If setting is not set, returns the server timezone. + static const DateLUTImpl & sessionInstance(); + static void setDefaultTimezone(const std::string & time_zone) { auto & date_lut = getInstance(); @@ -45,6 +61,8 @@ class DateLUT : private boost::noncopyable private: static DateLUT & getInstance(); + static std::string extractTimezoneFromContext(DB::ContextPtr query_context); + const DateLUTImpl & getImplementation(const std::string & time_zone) const; using DateLUTImplPtr = std::unique_ptr; diff --git a/src/Common/LocalDate.h b/src/Common/LocalDate.h index 0b8783e27d7..7284d46ede1 100644 --- a/src/Common/LocalDate.h +++ b/src/Common/LocalDate.h @@ -46,9 +46,8 @@ class LocalDate unsigned char m_month; unsigned char m_day; - void init(time_t time) + void init(time_t time, const DateLUTImpl & date_lut) { - const auto & date_lut = DateLUT::instance(); const auto & values = date_lut.getValues(time); m_year = values.year; @@ -78,22 +77,19 @@ class LocalDate } public: - explicit LocalDate(time_t time) - { - init(time); - } + explicit LocalDate(time_t time, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { init(time, time_zone); } - LocalDate(DayNum day_num) + LocalDate(DayNum day_num, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) /// NOLINT { - const auto & values = DateLUT::instance().getValues(day_num); + const auto & values = time_zone.getValues(day_num); m_year = values.year; m_month = values.month; m_day = values.day_of_month; } - explicit LocalDate(ExtendedDayNum day_num) + explicit LocalDate(ExtendedDayNum day_num, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { - const auto & values = DateLUT::instance().getValues(day_num); + const auto & values = time_zone.getValues(day_num); m_year = values.year; m_month = values.month; m_day = values.day_of_month; @@ -121,16 +117,14 @@ class LocalDate LocalDate(const LocalDate &) noexcept = default; LocalDate & operator= (const LocalDate &) noexcept = default; - DayNum getDayNum() const + DayNum getDayNum(const DateLUTImpl & lut = DateLUT::serverTimezoneInstance()) const { - const auto & lut = DateLUT::instance(); return DayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType()); } - ExtendedDayNum getExtendedDayNum() const + ExtendedDayNum getExtendedDayNum(const DateLUTImpl & lut = DateLUT::serverTimezoneInstance()) const { - const auto & lut = DateLUT::instance(); - return ExtendedDayNum (lut.makeDayNum(m_year, m_month, m_day).toUnderType()); + return ExtendedDayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType()); } operator DayNum() const @@ -138,10 +132,7 @@ class LocalDate return getDayNum(); } - operator time_t() const - { - return DateLUT::instance().makeDate(m_year, m_month, m_day); - } + operator time_t() const { return DateLUT::serverTimezoneInstance().makeDate(m_year, m_month, m_day); } unsigned short year() const { return m_year; } unsigned char month() const { return m_month; } diff --git a/src/Common/LocalDateTime.h b/src/Common/LocalDateTime.h index 0dc89ce11ca..54e785c8b52 100644 --- a/src/Common/LocalDateTime.h +++ b/src/Common/LocalDateTime.h @@ -81,10 +81,7 @@ class LocalDateTime } public: - explicit LocalDateTime(time_t time, const DateLUTImpl & time_zone = DateLUT::instance()) - { - init(time, time_zone); - } + explicit LocalDateTime(time_t time, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { init(time, time_zone); } LocalDateTime(unsigned short year_, unsigned char month_, unsigned char day_, unsigned char hour_, unsigned char minute_, unsigned char second_) @@ -115,9 +112,7 @@ class LocalDateTime operator time_t() const { - return m_year == 0 - ? 0 - : DateLUT::instance().makeDateTime(m_year, m_month, m_day, m_hour, m_minute, m_second); + return m_year == 0 ? 0 : DateLUT::serverTimezoneInstance().makeDateTime(m_year, m_month, m_day, m_hour, m_minute, m_second); } unsigned short year() const { return m_year; } diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 7f76cd5716d..744a34da4c1 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -313,6 +313,11 @@ class ThreadStatus : public boost::noncopyable return query_context.lock(); } + ContextPtr getGlobalContext() const + { + return global_context.lock(); + } + /// Starts new query and create new thread group for it, current thread becomes master thread of the query void initializeQuery(MemoryTracker * memory_tracker_ = nullptr); diff --git a/src/Common/Trace/DirectSystemLogExporter.cpp b/src/Common/Trace/DirectSystemLogExporter.cpp index cf7be55699e..177f2d6c8df 100644 --- a/src/Common/Trace/DirectSystemLogExporter.cpp +++ b/src/Common/Trace/DirectSystemLogExporter.cpp @@ -61,7 +61,7 @@ void OTELTraceLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(event_time); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time / 1000000000).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time / 1000000000).toUnderType()); columns[i++]->insert(trace_id); columns[i++]->insert(span_id); columns[i++]->insert(parent_span_id); diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index e29958ba2e7..83e67b45726 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -430,8 +430,11 @@ namespace MySQLReplication UInt32 i24 = 0; payload.readStrict(reinterpret_cast(&i24), 3); - const DayNum date_day_number(DateLUT::instance().makeDayNum( - static_cast((i24 >> 9) & 0x7fff), static_cast((i24 >> 5) & 0xf), static_cast(i24 & 0x1f)).toUnderType()); + const DayNum date_day_number( + DateLUT::serverTimezoneInstance() + .makeDayNum( + static_cast((i24 >> 9) & 0x7fff), static_cast((i24 >> 5) & 0xf), static_cast(i24 & 0x1f)) + .toUnderType()); row.push_back(Field(date_day_number.toUnderType())); break; @@ -536,10 +539,13 @@ namespace MySQLReplication readTimeFractionalPart(payload, fsp, meta); UInt32 year_month = readBits(val, 1, 17, 40); - time_t date_time = DateLUT::instance().makeDateTime( - year_month / 13, year_month % 13, readBits(val, 18, 5, 40) - , readBits(val, 23, 5, 40), readBits(val, 28, 6, 40), readBits(val, 34, 6, 40) - ); + time_t date_time = DateLUT::serverTimezoneInstance().makeDateTime( + year_month / 13, + year_month % 13, + readBits(val, 18, 5, 40), + readBits(val, 23, 5, 40), + readBits(val, 28, 6, 40), + readBits(val, 34, 6, 40)); if (!meta) // The max value of the 64 bit int flagged here exceeds the year value that is diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index bb24d5d9d94..ffd8bcb59c1 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -101,7 +101,8 @@ namespace Protocol /// This is such an inverted logic, where server sends requests /// And client returns back response QueryMetrics = 14, /// Query metrics in cnch worker side - MAX = QueryMetrics, + TimezoneUpdate = 15, + MAX = TimezoneUpdate, }; @@ -126,7 +127,8 @@ namespace Protocol "TableColumns", "PartUUIDs", "ReadTaskRequest", - "QueryMetrics" + "QueryMetrics", + "TimezoneUpdate" }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index bfb0ef1adbc..f6764f099df 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -59,9 +59,11 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54450 +#define DBMS_TCP_PROTOCOL_VERSION 54451 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 #define DBMS_MIN_REVISION_WITH_QUERY_METRICS 54450 +static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES = 54451; + diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1ce6272912d..8ee3163f403 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -460,46 +460,19 @@ enum PreloadLevelSettings : UInt64 M(String, count_distinct_implementation, "uniqExact", "What aggregate function to use for implementation of count(DISTINCT ...)", 0) \ \ M(Bool, add_http_cors_header, false, "Write add http CORS header.", 0) \ -\ - M(UInt64, \ - max_http_get_redirects, \ - 0, \ - "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server " \ - "to redirect your requests to unexpected services.", \ - 0) \ -\ - M(Bool, \ - use_client_time_zone, \ - false, \ - "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", \ - 0) \ -\ - M(Bool, \ - send_progress_in_http_headers, \ - false, \ - "Send progress notifications using X-ClickHouse-Progress headers. Some clients do not support high amount of HTTP headers (Python " \ - "requests in particular), so it is disabled by default.", \ - 0) \ -\ - M(UInt64, \ - http_headers_progress_interval_ms, \ - 100, \ - "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", \ - 0) \ -\ - M(Bool, \ - fsync_metadata, \ - 1, \ - "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with " \ - "high load of DDL queries and high load of disk subsystem.", \ - 0) \ -\ - M(Bool, \ - join_use_nulls, \ - 1, \ - "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding " \ - "columns data type.", \ - IMPORTANT) \ + \ + M(UInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.", 0) \ + \ + M(Bool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.", 0) \ + M(Timezone, session_timezone, "", "The default timezone for current session or query. The default value is server default timezone if empty.", 0) \ + \ + M(Bool, send_progress_in_http_headers, false, "Send progress notifications using X-ClickHouse-Progress headers. Some clients do not support high amount of HTTP headers (Python requests in particular), so it is disabled by default.", 0) \ + \ + M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ + \ + M(Bool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ + \ + M(Bool, join_use_nulls, 1, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ M(Bool, join_using_null_safe, 0, "Force null safe equal comparison for USING keys except the last key of ASOF join", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 0812fe0886c..c9f2bd9365b 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -31,6 +31,7 @@ #include #include #include +#include namespace DB @@ -327,6 +328,24 @@ String SettingFieldEnumHelpers::readBinary(ReadBuffer & in) return str; } +void SettingFieldTimezone::writeBinary(WriteBuffer & out) const +{ + writeStringBinary(value, out); +} + +void SettingFieldTimezone::readBinary(ReadBuffer & in) +{ + String str; + readStringBinary(str, in); + *this = std::move(str); +} + +void SettingFieldTimezone::validateTimezone(const std::string & tz_str) +{ + cctz::time_zone validated_tz; + if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); +} String SettingFieldCustom::toString() const { diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 74cf852f9f5..5602a108f0b 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -460,6 +460,37 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) return std::initializer_list> __VA_ARGS__ .size();\ } +/// Setting field for specifying user-defined timezone. It is basically a string, but it needs validation. +struct SettingFieldTimezone +{ + String value; + bool changed = false; + + explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } + explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; } + explicit SettingFieldTimezone(String && str) { validateTimezone(str); value = std::move(str); } + explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; } + explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet(); validateTimezone(str); value = str; } + + SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } + SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; } + SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; } + SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; } + SettingFieldTimezone & operator =(const Field & f) { *this = f.safeGet(); return *this; } + + operator const String &() const { return value; } /// NOLINT + explicit operator Field() const { return value; } + + const String & toString() const { return value; } + void parseFromString(const String & str) { *this = str; } + + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); + +private: + void validateTimezone(const std::string & tz_str); +}; + /// Can keep a value of any type. Used for user-defined settings. struct SettingFieldCustom { diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/DataStreams/ITTLAlgorithm.cpp index 7513e0c6ce0..854bcf53768 100644 --- a/src/DataStreams/ITTLAlgorithm.cpp +++ b/src/DataStreams/ITTLAlgorithm.cpp @@ -10,13 +10,12 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ITTLAlgorithm::ITTLAlgorithm( - const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) +ITTLAlgorithm::ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) : description(description_) , old_ttl_info(old_ttl_info_) , current_time(current_time_) , force(force_) - , date_lut(DateLUT::instance()) + , date_lut(DateLUT::serverTimezoneInstance()) { } diff --git a/src/DataStreams/MongoDBBlockInputStream.cpp b/src/DataStreams/MongoDBBlockInputStream.cpp index d583cb0d5b4..dc5920a1140 100644 --- a/src/DataStreams/MongoDBBlockInputStream.cpp +++ b/src/DataStreams/MongoDBBlockInputStream.cpp @@ -270,7 +270,7 @@ namespace throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name, ErrorCodes::TYPE_MISMATCH}; - assert_cast(column).getData().push_back(static_cast(DateLUT::instance().toDayNum( + assert_cast(column).getData().push_back(static_cast(DateLUT::serverTimezoneInstance().toDayNum( static_cast &>(value).value().epochTime()))); break; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index cc2e850c970..d97c466bb48 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -28,10 +28,7 @@ class DataTypeDate32 final : public DataTypeNumberBase TypeIndex getTypeId() const override { return TypeIndex::Date32; } const char * getFamilyName() const override { return family_name; } - Field getDefault() const override - { - return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); - } + Field getDefault() const override { return -static_cast(DateLUT::serverTimezoneInstance().getDayNumOffsetEpoch()); } bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 926d529a5d8..fa44099d8af 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -43,7 +43,9 @@ class TimezoneMixin * all types with different time zones are equivalent and may be used interchangingly. * Time zone only affects parsing and displaying in text formats. * - * If time zone is not specified (example: DateTime without parameter), then default time zone is used. + * If time zone is not specified (example: DateTime without parameter), + * then `session_timezone` setting value is used. + * If `session_timezone` is not set (or empty string), server default time zone is used. * Default time zone is server time zone, if server is doing transformations * and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client; * Server time zone is the time zone specified in 'timezone' parameter in configuration file, diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 942a0449323..87962d10947 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -33,7 +33,7 @@ void SerializationDate::checkDataOverflow(const FormatSettings & settings) void SerializationDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeDateText(DayNum(assert_cast(column).getData()[row_num]), ostr); + writeDateText(DayNum(assert_cast(column).getData()[row_num]), ostr, time_zone); } void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -44,7 +44,7 @@ void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DayNum x; - readDateText(x, istr); + readDateText(x, istr, time_zone); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); } @@ -65,7 +65,7 @@ void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & ist { DayNum x; assertChar('\'', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('\'', istr); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. @@ -82,7 +82,7 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, { DayNum x; assertChar('"', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('"', istr); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index 1cc21dbccbf..e76883f2649 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -8,6 +9,8 @@ namespace DB class SerializationDate final : public SerializationNumber { public: + explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::sessionInstance()): time_zone(time_zone_) {} + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -19,6 +22,9 @@ class SerializationDate final : public SerializationNumber void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; +protected: + const DateLUTImpl & time_zone; + private: static void checkDataOverflow(const FormatSettings & settings); }; diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index cb45c09b25d..03977accbc2 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -47,7 +47,7 @@ void SerializationDate32::checkDataOverflow(const FormatSettings & settings) void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr); + writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr, time_zone); } void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -60,7 +60,7 @@ void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & is void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ExtendedDayNum x; - readDateText(x, istr); + readDateText(x, istr, time_zone); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); } @@ -81,7 +81,7 @@ void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & i { ExtendedDayNum x; assertChar('\'', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('\'', istr); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. @@ -98,7 +98,7 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist { ExtendedDayNum x; assertChar('"', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('"', istr); checkDataOverflow(settings); assert_cast(column).getData().push_back(x); diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index e4fde183c4d..3063d82da3c 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -16,12 +16,15 @@ #pragma once #include +#include namespace DB { class SerializationDate32 final : public SerializationNumber { public: + explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::sessionInstance()): time_zone(time_zone_) {} + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -33,6 +36,9 @@ class SerializationDate32 final : public SerializationNumber void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; +protected: + const DateLUTImpl & time_zone; + private: static void checkDataOverflow(const FormatSettings & settings); }; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index a1057589e5e..aa37212bfaf 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -157,7 +157,7 @@ std::string ClickHouseDictionarySource::getUpdateFieldAndDate() if (update_time != std::chrono::system_clock::from_time_t(0)) { time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag; - std::string str_time = DateLUT::instance().timeToString(hr_time); + std::string str_time = DateLUT::serverTimezoneInstance().timeToString(hr_time); update_time = std::chrono::system_clock::now(); return query_builder.composeUpdateQuery(configuration.update_field, str_time); } diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index c6f6a0684fd..1d706c176d2 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -120,7 +120,7 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate() if (update_time != std::chrono::system_clock::from_time_t(0)) { time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag; - std::string str_time = DateLUT::instance().timeToString(hr_time); + std::string str_time = DateLUT::serverTimezoneInstance().timeToString(hr_time); update_time = std::chrono::system_clock::now(); return query_builder.composeUpdateQuery(configuration.update_field, str_time); } diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index f1ca3d4855b..ed80f67df86 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -145,7 +145,7 @@ std::string PostgreSQLDictionarySource::getUpdateFieldAndDate() if (update_time != std::chrono::system_clock::from_time_t(0)) { time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag; - std::string str_time = DateLUT::instance().timeToString(hr_time); + std::string str_time = DateLUT::serverTimezoneInstance().timeToString(hr_time); update_time = std::chrono::system_clock::now(); return query_builder.composeUpdateQuery(configuration.update_field, str_time); } diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 0a2c375f961..83ce0f7e5bc 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -143,7 +143,7 @@ std::string XDBCDictionarySource::getUpdateFieldAndDate() if (update_time != std::chrono::system_clock::from_time_t(0)) { time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag; - std::string str_time = DateLUT::instance().timeToString(hr_time); + std::string str_time = DateLUT::serverTimezoneInstance().timeToString(hr_time); update_time = std::chrono::system_clock::now(); return query_builder.composeUpdateQuery(configuration.update_field, str_time); } diff --git a/src/Functions/FunctionAddTime.cpp b/src/Functions/FunctionAddTime.cpp index 9b1cb06c8f7..c189fc7d9f8 100644 --- a/src/Functions/FunctionAddTime.cpp +++ b/src/Functions/FunctionAddTime.cpp @@ -238,12 +238,12 @@ class FunctionAddOrSubTime : public IFunction switch (base_type->getTypeId()) { case TypeIndex::Date: { - const auto & time_zone = DateLUT::instance(); + const auto & time_zone = DateLUT::sessionInstance(); executeInternal(base_col, delta_arg, result_col.get(), time_zone, 0); break; } case TypeIndex::Date32: { - const auto & time_zone = DateLUT::instance(); + const auto & time_zone = DateLUT::sessionInstance(); executeInternal(base_col, delta_arg, result_col.get(), time_zone, 0); break; } @@ -260,7 +260,7 @@ class FunctionAddOrSubTime : public IFunction } case TypeIndex::Time: { const auto & t = assert_cast(*arguments[0].type); - const auto & time_zone = DateLUT::instance(); + const auto & time_zone = DateLUT::sessionInstance(); executeInternal(base_col, delta_arg, result_col.get(), time_zone, t.getScale()); break; } diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index a62b7cce92e..e73cf771915 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -211,7 +211,7 @@ class FunctionCustomWeekToSomething : public IFunction } /// This method is called only if the function has one argument. Therefore, we do not care about the non-local time zone. - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::sessionInstance(); if (left.isNull() || right.isNull()) return is_not_monotonic; diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 9bff4839037..981a836775b 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -180,7 +180,7 @@ class FunctionDateOrDateTimeToSomething : public IFunctionDateOrDateTime(&type)) date_lut = &timezone->getTimeZone(); if (left.isNull() || right.isNull()) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 6b40cf4c4fb..c803256df4e 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -499,9 +499,9 @@ struct ConvertImpl && !std { static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt16 = 0) { - const auto & time_zone = DateLUT::instance(); + const auto & time_zone = DateLUT::sessionInstance(); const auto today = time_zone.toDayNum(time(nullptr)); - auto date_time = DateLUT::instance().fromDayNum(today); + auto date_time = DateLUT::sessionInstance().fromDayNum(today); if constexpr (std::is_same_v || std::is_same_v) { @@ -709,7 +709,7 @@ struct ToDate32Transform32Or64Signed static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { - static const Int32 daynum_min_offset = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); if (from < daynum_min_offset) return daynum_min_offset; return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) @@ -1020,18 +1020,18 @@ struct FormatImpl template <> struct FormatImpl { - static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *) + static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) { - writeDateText(DayNum(x), wb); + writeDateText(DayNum(x), wb, *time_zone); } }; template <> struct FormatImpl { - static void execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl *) + static void execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) { - writeDateText(ExtendedDayNum(x), wb); + writeDateText(ExtendedDayNum(x), wb, *time_zone); } }; @@ -1109,7 +1109,9 @@ struct ConvertImplconvertToFullColumnIfConst(); const DateLUTImpl * time_zone = nullptr; - /// For argument of DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || std::is_same_v) + time_zone = &DateLUT::sessionInstance(); + /// For argument of Date or DateTime type, second argument with time zone could be specified. if constexpr (std::is_same_v || std::is_same_v) time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); @@ -1264,18 +1266,18 @@ void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTI } template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { DayNum tmp(0); - readDateText(tmp, rb); + readDateText(tmp, rb, *time_zone); x = tmp; } template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { ExtendedDayNum tmp(0); - readDateText(tmp, rb); + readDateText(tmp, rb, *time_zone); x = tmp; } @@ -1322,20 +1324,20 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL } template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { DayNum tmp(0); - if (!tryReadDateText(tmp, rb)) + if (!tryReadDateText(tmp, rb, *time_zone)) return false; x = tmp; return true; } template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { ExtendedDayNum tmp(0); - if (!tryReadDateText(tmp, rb)) + if (!tryReadDateText(tmp, rb, *time_zone)) return false; // ExtendedDayNum is int32 and DataTypeData32::FieldType is also int32 // coverity[store_truncates_time_t] @@ -1549,7 +1551,7 @@ struct ConvertThroughParsing const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; - /// For conversion to DateTime type, second argument with time zone could be specified. + /// For conversion to Date or DateTime type, second argument with time zone could be specified. if constexpr (std::is_same_v || to_datetime64) { const auto result_type = removeNullable(res_type); @@ -1564,6 +1566,12 @@ struct ConvertThroughParsing if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) utc_time_zone = &DateLUT::instance("UTC"); } + else if constexpr (std::is_same_v || std::is_same_v) + { + // Timezone is more or less dummy when parsing Date/Date32 from string. + local_time_zone = &DateLUT::sessionInstance(); + utc_time_zone = &DateLUT::instance("UTC"); + } const IColumn * col_from = arguments[0].column.get(); const ColumnString * col_from_string = checkAndGetColumn(col_from); @@ -1807,7 +1815,7 @@ struct ConvertThroughParsing { if constexpr (std::is_same_v) { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + vec_to[i] = -static_cast(DateLUT::sessionInstance().getDayNumOffsetEpoch()); } else { @@ -2230,7 +2238,7 @@ class FunctionConvert : public IFunction || std::is_same_v // toDate(value[, timezone : String]) || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate(value[, timezone : String]) + // toDate32(value[, timezone : String]) || std::is_same_v // toDateTime(value[, timezone: String]) || std::is_same_v diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index f1408acf73e..9f81410fdfa 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -55,7 +55,7 @@ class IFunctionCustomWeek : public IFunction const IFunction::Monotonicity is_not_monotonic; /// This method is called only if the function has one argument. Therefore, we do not care about the non-local time zone. - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::sessionInstance(); if (left.isNull() || right.isNull()) return {}; diff --git a/src/Functions/IFunctionDateOrDateTime.h b/src/Functions/IFunctionDateOrDateTime.h index 600faba068c..b40705cbfad 100644 --- a/src/Functions/IFunctionDateOrDateTime.h +++ b/src/Functions/IFunctionDateOrDateTime.h @@ -63,7 +63,7 @@ class IFunctionDateOrDateTime : public IFunction const IFunction::Monotonicity is_monotonic(/* is_monotonic */ true, /* is_positive */ true, /* is_always_monotonic */false); const IFunction::Monotonicity is_not_monotonic; - const DateLUTImpl * date_lut = &DateLUT::instance(); + const DateLUTImpl * date_lut = &DateLUT::sessionInstance(); if (const auto * timezone = dynamic_cast(&type)) date_lut = &timezone->getTimeZone(); diff --git a/src/Functions/currentTime.cpp b/src/Functions/currentTime.cpp index ec60c3e3285..3871d04b343 100644 --- a/src/Functions/currentTime.cpp +++ b/src/Functions/currentTime.cpp @@ -130,7 +130,7 @@ class CurrentTimeOverloadResolver : public IFunctionOverloadResolver } DateTime64 dt64 = DB::nowSubsecondDt64(scale); ToTimeTransform transformer(scale, scale); - Decimal64::NativeType t = transformer.execute(dt64, intExp10(scale), DateLUT::instance()); + Decimal64::NativeType t = transformer.execute(dt64, intExp10(scale), DateLUT::sessionInstance()); return std::make_unique(t, scale, std::make_shared(scale)); } }; diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 49a54658c70..e5c0ee12fdb 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -444,7 +444,7 @@ class DateDiffImpl */ // Note: It is impossible to take the civil-time diff of 2 different timezones to mysql's timestampdiff. // Because, mysql will convert both absolute times to the same timezone given by the session time_zone variable. - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::sessionInstance(); bool should_swap = seconds_x > seconds_y; struct DateTimeComponents { const DateLUTImpl::Values & values; @@ -803,13 +803,13 @@ class FunctionTimeDiff : public IFunction { auto res = ColumnTime::create(rows, 0); - impl.dispatchForColumns(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns(x, y, DateLUT::sessionInstance(), DateLUT::sessionInstance(), res->getData()); return res; } else { auto res = ColumnInt64::create(rows); - impl.dispatchForColumns(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns(x, y, DateLUT::sessionInstance(), DateLUT::sessionInstance(), res->getData()); return res; } } diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index ae3a4edb32e..b55cc9e01b0 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -146,7 +146,7 @@ class FunctionDateNameImpl : public IFunction if (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1); else - time_zone_tmp = &DateLUT::instance(); + time_zone_tmp = &DateLUT::sessionInstance(); const auto & times_data = times->getData(); const DateLUTImpl & time_zone = *time_zone_tmp; diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index b6646c6d252..fdc4c930f64 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -66,7 +66,7 @@ const DateLUTImpl & extractTimeZoneFromFunctionArguments(const ColumnsWithTypeAn else { if (arguments.size() <= datetime_arg_num) - return DateLUT::instance(); + return DateLUT::sessionInstance(); const auto & dt_arg = arguments[datetime_arg_num].type.get(); /// If time zone is attached to an argument of type DateTime. @@ -75,7 +75,7 @@ const DateLUTImpl & extractTimeZoneFromFunctionArguments(const ColumnsWithTypeAn if (const auto * type = checkAndGetDataType(dt_arg)) return type->getTimeZone(); - return DateLUT::instance(); + return DateLUT::sessionInstance(); } } diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index e1f33899e6d..676ee02f638 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -1320,7 +1320,7 @@ namespace else if (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); else - time_zone_tmp = &DateLUT::instance(); + time_zone_tmp = &DateLUT::sessionInstance(); const DateLUTImpl & time_zone = *time_zone_tmp; const auto & vec = times->getData(); diff --git a/src/Functions/fromDaysAndToDays.cpp b/src/Functions/fromDaysAndToDays.cpp index d125d7caebf..8676f8602e5 100644 --- a/src/Functions/fromDaysAndToDays.cpp +++ b/src/Functions/fromDaysAndToDays.cpp @@ -109,7 +109,7 @@ namespace { const auto * col_from = checkAndGetColumn(column); - static const Int32 daynum_min_offset = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + static const Int32 daynum_min_offset = -static_cast(DateLUT::sessionInstance().getDayNumOffsetEpoch()); MutableColumnPtr res = DataTypeDate32().createColumn(); auto & res_data = dynamic_cast *>(res.get())->getData(); @@ -239,7 +239,7 @@ namespace if (col == nullptr) throw Exception("Column type does not match to the data type", ErrorCodes::ILLEGAL_COLUMN); - const auto & timezone = DateLUT::instance(); + const auto & timezone = DateLUT::sessionInstance(); auto & data = col->getData(); const auto row_size = data.size(); res_data.resize(row_size); diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 5efeb6ad822..20925291efa 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -131,7 +131,7 @@ namespace auto res_column = Traits::ReturnDataType::ColumnType::create(input_rows_count); auto & result_data = res_column->getData(); - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::sessionInstance(); const Int32 max_days_since_epoch = date_lut.makeDayNum(Traits::MAX_DATE[0], Traits::MAX_DATE[1], Traits::MAX_DATE[2]); if (is_year_month_day_variant) @@ -573,7 +573,7 @@ namespace const auto & minute_data = typeid_cast(*converted_arguments[1]).getData(); const auto & second_data = typeid_cast(*converted_arguments[2]).getData(); - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::sessionInstance(); const auto max_fraction = pow(10, precision) - 1; const auto min_time = minTime(date_lut); const auto max_time = maxTime(date_lut); diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 5beebe2a6ea..de23335dbfd 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -1970,7 +1970,7 @@ namespace const DateLUTImpl & getTimeZone(const ColumnsWithTypeAndName & arguments) const { if (arguments.size() < 3) - return DateLUT::instance(); + return DateLUT::sessionInstance(); const auto * col = checkAndGetColumnConst(arguments[2].column.get()); if (!col) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 13a9a1cbab6..d03ed92242b 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -59,16 +59,26 @@ namespace explicit FunctionTcpPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} }; - - /// Returns the server time zone. + /// Returns timezone for current session. class FunctionTimezone : public FunctionConstantBase { public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::sessionInstance().getTimeZone(), context->isDistributed()) {} }; + /// Returns the server time zone (timezone in which server runs). + class FunctionServerTimezone : public FunctionConstantBase + { + public: + static constexpr auto name = "serverTimezone"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionServerTimezone(ContextPtr context) + : FunctionConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) + { + } + }; /// Returns server uptime in seconds. class FunctionUptime : public FunctionConstantBase @@ -146,6 +156,12 @@ REGISTER_FUNCTION(Timezone) factory.registerAlias("timeZone", "timezone"); } +REGISTER_FUNCTION(ServerTimezone) +{ + factory.registerFunction(); + factory.registerAlias("serverTimeZone", "serverTimezone"); +} + REGISTER_FUNCTION(Uptime) { factory.registerFunction(); diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index a32356ca763..b7604b6ede4 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -60,7 +60,7 @@ class FunctionTimestamp : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const DateLUTImpl * local_time_zone = &DateLUT::instance(); + const DateLUTImpl * local_time_zone = &DateLUT::sessionInstance(); auto col_result = ColumnDateTime64::create(input_rows_count, DATETIME_SCALE); ColumnDateTime64::Container & vec_result = col_result->getData(); diff --git a/src/Functions/timezone.cpp b/src/Functions/timezone.cpp index 07f0d03a57b..40db95862dc 100644 --- a/src/Functions/timezone.cpp +++ b/src/Functions/timezone.cpp @@ -41,7 +41,7 @@ class FunctionTimezone : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return DataTypeString().createColumnConst(input_rows_count, DateLUT::instance().getTimeZone()); + return DataTypeString().createColumnConst(input_rows_count, DateLUT::sessionInstance().getTimeZone()); } }; diff --git a/src/Functions/today.cpp b/src/Functions/today.cpp index fb73603fde6..f9f6940255f 100644 --- a/src/Functions/today.cpp +++ b/src/Functions/today.cpp @@ -101,7 +101,7 @@ class TodayOverloadResolver : public IFunctionOverloadResolver FunctionBasePtr buildImpl(const ColumnsWithTypeAndName &, const DataTypePtr &) const override { - return std::make_unique(DayNum(DateLUT::instance().toDayNum(time(nullptr)).toUnderType())); + return std::make_unique(DayNum(DateLUT::sessionInstance().toDayNum(time(nullptr)).toUnderType())); } }; diff --git a/src/Functions/yesterday.cpp b/src/Functions/yesterday.cpp index fd1701b3335..314745207e9 100644 --- a/src/Functions/yesterday.cpp +++ b/src/Functions/yesterday.cpp @@ -78,7 +78,7 @@ class YesterdayOverloadResolver : public IFunctionOverloadResolver FunctionBasePtr buildImpl(const ColumnsWithTypeAndName &, const DataTypePtr &) const override { - auto day_num = DateLUT::instance().toDayNum(time(nullptr)) - 1; + auto day_num = DateLUT::sessionInstance().toDayNum(time(nullptr)) - 1; return std::make_unique(static_cast(day_num)); } }; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ae7d78fa437..6984b705079 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -738,7 +738,7 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) } template -inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) +inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) { static constexpr bool throw_exception = std::is_same_v; @@ -749,13 +749,13 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) else if (!readDateTextImpl(local_date, buf)) return false; - ExtendedDayNum ret = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); - convertToDayNum(date,ret); + ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); + convertToDayNum(date, ret); return ReturnType(true); } template -inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf) +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) { static constexpr bool throw_exception = std::is_same_v; @@ -765,8 +765,8 @@ inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf) readDateTextImpl(local_date, buf); else if (!readDateTextImpl(local_date, buf)) return false; - /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. - date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(DateLUT::instance().getDayNumOffsetEpoch())); + /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::serverTimezoneInstance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. + date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(date_lut.getDayNumOffsetEpoch())); return ReturnType(true); } @@ -776,14 +776,14 @@ inline void readDateText(LocalDate & date, ReadBuffer & buf) readDateTextImpl(date, buf); } -inline void readDateText(DayNum & date, ReadBuffer & buf) +inline void readDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { - readDateTextImpl(date, buf); + readDateTextImpl(date, buf, date_lut); } -inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf) +inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { - readDateTextImpl(date, buf); + readDateTextImpl(date, buf, date_lut); } inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) @@ -791,14 +791,14 @@ inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) return readDateTextImpl(date, buf); } -inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) +inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, date_lut); } -inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf) +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, date_lut); } template @@ -1277,12 +1277,13 @@ inline ReturnType readTimeTextImpl(Decimal64 & time, UInt32 scale, ReadBuffer & return ReturnType(true); } -inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { readDateTimeTextImpl(datetime, buf, time_zone); } -inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline void readDateTime64Text( + DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { readDateTimeTextImpl(datetime64, scale, buf, date_lut); } @@ -1297,12 +1298,13 @@ inline bool tryReadTimeText(Decimal64 & time, UInt32 scale, ReadBuffer & buf) return readTimeTextImpl(time, scale, buf); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { return readDateTimeTextImpl(datetime, buf, time_zone); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline bool tryReadDateTime64Text( + DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance()) { return readDateTimeTextImpl(datetime64, scale, buf, date_lut); } @@ -1505,8 +1507,14 @@ tryReadText(T & x, ReadBuffer & buf) { return tryReadFloatText(x, buf); } inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); } inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } -inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } -inline void readText(ExtendedDayNum & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) +{ + readDateText(x, buf, time_zone); +} +inline void readText(ExtendedDayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) +{ + readDateText(x, buf, time_zone); +} inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 8e62c3d8943..64d04a57903 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -757,15 +757,15 @@ inline void writeDateText(const LocalDate & date, WriteBuffer & buf) } template -inline void writeDateText(DayNum date, WriteBuffer & buf) +inline void writeDateText(DayNum date, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { - writeDateText(LocalDate(date), buf); + writeDateText(LocalDate(date, time_zone), buf); } template -inline void writeDateText(ExtendedDayNum date, WriteBuffer & buf) +inline void writeDateText(ExtendedDayNum date, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { - writeDateText(LocalDate(date), buf); + writeDateText(LocalDate(date, time_zone), buf); } /// In the format YYYY-MM-DD HH:MM:SS @@ -818,14 +818,19 @@ inline void writeDateTimeText(const LocalDateTime & datetime, WriteBuffer & buf) /// In the format YYYY-MM-DD HH:MM:SS, according to the specified time zone. template -inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { writeDateTimeText(LocalDateTime(datetime, time_zone), buf); } /// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone. -template -inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +template < + char date_delimeter = '-', + char time_delimeter = ':', + char between_date_time_delimiter = ' ', + char fractional_time_delimiter = '.'> +inline void +writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { static constexpr UInt32 MaxScale = DecimalUtils::max_precision; scale = scale > MaxScale ? MaxScale : scale; @@ -892,7 +897,7 @@ inline void writeTimeText(Decimal64 time, UInt32 scale, WriteBuffer & buf) /// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT. /// This is needed for HTTP requests. -inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) { const auto & values = time_zone.getValues(datetime); @@ -996,8 +1001,10 @@ template <> inline void writeText(const bool & x, WriteBuffer & buf) { wri /// assumes here that `x` is a null-terminated string. inline void writeText(const char * x, WriteBuffer & buf) { writeCString(x, buf); } inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeString(x, size, buf); } - -inline void writeText(const DayNum & x, WriteBuffer & buf) { writeDateText(LocalDate(x), buf); } +inline void writeText(const DayNum & x, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::serverTimezoneInstance()) +{ + writeDateText(LocalDate(x, time_zone), buf); +} inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); } inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); } inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } diff --git a/src/IO/examples/parse_date_time_best_effort.cpp b/src/IO/examples/parse_date_time_best_effort.cpp index fc5755f1f95..f238125a142 100644 --- a/src/IO/examples/parse_date_time_best_effort.cpp +++ b/src/IO/examples/parse_date_time_best_effort.cpp @@ -12,7 +12,7 @@ using namespace DB; int main(int, char **) try { - const DateLUTImpl & local_time_zone = DateLUT::instance(); + const DateLUTImpl & local_time_zone = DateLUT::serverTimezoneInstance(); const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); ReadBufferFromFileDescriptor in(STDIN_FILENO); diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index 09345ecca7c..e7110b8286b 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -44,7 +44,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values) const auto now = std::chrono::system_clock::now(); element.event_time = time_in_seconds(now); element.event_time_microseconds = time_in_microseconds(now); - element.event_date = DateLUT::instance().toDayNum(element.event_time); + element.event_date = DateLUT::serverTimezoneInstance().toDayNum(element.event_time); for (const auto & [key, value] : values) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d4c032a8fe9..5a9ef6508e0 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5810,6 +5810,11 @@ Context::HybridPartAllocator Context::getHybridPartAllocationAlgo() const } } +bool Context::hasSessionTimeZone() const +{ + return !settings.session_timezone.value.empty(); +} + void Context::createPlanNodeIdAllocator(int max_id) { id_allocator = std::make_shared(max_id); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1d1cffc01eb..87f16a81d94 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1691,6 +1691,10 @@ class Context : public ContextData, public std::enable_shared_from_this }; HybridPartAllocator getHybridPartAllocationAlgo() const; + // If session timezone is specified, some cache which involves creating table/storage can't be used. + // Because it may use wrong timezone for DateTime column, which leads to incorrect result. + bool hasSessionTimeZone() const; + String getDefaultCnchPolicyName() const; String getCnchAuxilityPolicyName() const; diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index a9da804f1d2..3b695547cd9 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -40,7 +40,7 @@ void CrashLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(timestamp_ns); columns[i++]->insert(signal); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 236d6eaad12..ddc61b22870 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1216,7 +1216,7 @@ DatabasePtr DatabaseCatalog::tryGetDatabaseCnch(const String & database_name, Co return res; res = getDatabaseFromCnchOrHiveCatalog( database_name, - getContext(), + local_context, txn ? txn->getStartTime() : TxnTimestamp::maxTS(), local_context->getSettingsRef().enable_three_part_identifier); if (res && txn) diff --git a/src/Interpreters/KafkaLog.cpp b/src/Interpreters/KafkaLog.cpp index 42e9c3c6246..356fefcfb87 100644 --- a/src/Interpreters/KafkaLog.cpp +++ b/src/Interpreters/KafkaLog.cpp @@ -67,7 +67,7 @@ void KafkaLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(UInt64(event_type)); - columns[i++]->insert(UInt64(DateLUT::instance().toDayNum(event_time))); + columns[i++]->insert(UInt64(DateLUT::serverTimezoneInstance().toDayNum(event_time))); columns[i++]->insert(UInt64(event_time)); columns[i++]->insert(UInt64(duration_ms)); diff --git a/src/Interpreters/MaterializedMySQLLog.cpp b/src/Interpreters/MaterializedMySQLLog.cpp index 97054b34bf3..36145d11d81 100644 --- a/src/Interpreters/MaterializedMySQLLog.cpp +++ b/src/Interpreters/MaterializedMySQLLog.cpp @@ -62,7 +62,7 @@ void MaterializedMySQLLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(nameset_2_array(tables)); columns[i++]->insert(type); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(resync_table); diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index ae0b85d4d8d..c625a9c0731 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -42,7 +42,7 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const { size_t column_idx = 0; - columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[column_idx++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[column_idx++]->insert(event_time); columns[column_idx++]->insert(event_time_microseconds); columns[column_idx++]->insert(milliseconds); diff --git a/src/Interpreters/MutationLog.cpp b/src/Interpreters/MutationLog.cpp index 87d071cc672..790999c06c2 100644 --- a/src/Interpreters/MutationLog.cpp +++ b/src/Interpreters/MutationLog.cpp @@ -56,7 +56,7 @@ void MutationLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(UInt64(event_type)); - columns[i++]->insert(UInt64(DateLUT::instance().toDayNum(event_time))); + columns[i++]->insert(UInt64(DateLUT::serverTimezoneInstance().toDayNum(event_time))); columns[i++]->insert(UInt64(event_time)); columns[i++]->insert(database_name); diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 46c67a8e4e7..058558b21e7 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -58,7 +58,7 @@ void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(operation_name); columns[i++]->insert(start_time_us); columns[i++]->insert(finish_time_us); - columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(finish_time_us / 1000000).toUnderType()); // The user might add some ints values, and we will have Int Field, and the // insert will fail because the column requires Strings. Convert the fields // here, because it's hard to remember to convert them in all other places. diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 94cc51f2b38..35d13338366 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -78,7 +78,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(query_id); columns[i++]->insert(event_type); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(start_time); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); diff --git a/src/Interpreters/PartMergeLog.cpp b/src/Interpreters/PartMergeLog.cpp index feb3e1721f3..c7e9dcaacb2 100644 --- a/src/Interpreters/PartMergeLog.cpp +++ b/src/Interpreters/PartMergeLog.cpp @@ -64,7 +64,7 @@ void PartMergeLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(UInt64(event_type)); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(UInt64(event_time)); columns[i++]->insert(database); diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 5d44b61e064..5fa5b4a1047 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -60,7 +60,7 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); diff --git a/src/Interpreters/QueryExchangeLog.cpp b/src/Interpreters/QueryExchangeLog.cpp index 97a4d636ed0..5281d88bf60 100644 --- a/src/Interpreters/QueryExchangeLog.cpp +++ b/src/Interpreters/QueryExchangeLog.cpp @@ -85,7 +85,7 @@ void QueryExchangeLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; columns[i++]->insert(initial_query_id); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insertData(type.data(), type.size()); columns[i++]->insert(exchange_id); diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 78b4f589759..40aa8b93f3e 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -174,7 +174,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(type); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); columns[i++]->insert(query_start_time); diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index 647a1d89c7a..55d369e4ebf 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -109,7 +109,7 @@ void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); columns[i++]->insert(query_start_time); diff --git a/src/Interpreters/RemoteReadLog.cpp b/src/Interpreters/RemoteReadLog.cpp index 7b812b98358..21a7a025117 100644 --- a/src/Interpreters/RemoteReadLog.cpp +++ b/src/Interpreters/RemoteReadLog.cpp @@ -35,7 +35,7 @@ void RemoteReadLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::sessionInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(request_time_microseconds); columns[i++]->insert(context); diff --git a/src/Interpreters/ServerPartLog.cpp b/src/Interpreters/ServerPartLog.cpp index de6d1ce6460..210ee797710 100644 --- a/src/Interpreters/ServerPartLog.cpp +++ b/src/Interpreters/ServerPartLog.cpp @@ -82,7 +82,7 @@ void ServerPartLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(static_cast(event_type)); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(txn_id); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index baf98b6771d..b129c06b2c5 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -55,7 +55,7 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); columns[i++]->insert(microseconds); diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index dac27aebe58..742c275acab 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -42,7 +42,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); columns[i++]->insert(timestamp_ns); diff --git a/src/Interpreters/UniqueTableLog.cpp b/src/Interpreters/UniqueTableLog.cpp index eec6e295083..b2b8eddd5d7 100644 --- a/src/Interpreters/UniqueTableLog.cpp +++ b/src/Interpreters/UniqueTableLog.cpp @@ -44,7 +44,7 @@ void UniqueTableLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table); columns[i++]->insert(type); - columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[i++]->insert(DateLUT::sessionInstance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insert(txn_id); diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index d24fa78ea17..f3d86d6453a 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -172,7 +172,7 @@ void ZooKeeperLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(type); auto event_time_seconds = event_time / 1000000; - columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType()); + columns[i++]->insert(DateLUT::serverTimezoneInstance().toDayNum(event_time_seconds).toUnderType()); columns[i++]->insert(event_time); columns[i++]->insertData(IPv6ToBinary(address.host()).data(), 16); columns[i++]->insert(address.port()); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 272562865b1..58e124d6534 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -2135,7 +2135,7 @@ void executeQuery( if (set_result_details) set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone(), streams.coordinator); + context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::serverTimezoneInstance().getTimeZone(), streams.coordinator); copyData( *streams.in, *out, []() { return false; }, [&out](const Block &) { out->flush(); }); @@ -2192,7 +2192,7 @@ void executeQuery( if (set_result_details) set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone(), streams.coordinator); + context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::serverTimezoneInstance().getTimeZone(), streams.coordinator); pipeline.setOutputFormat(std::move(out)); } @@ -2339,7 +2339,7 @@ void executeHttpQueryInAsyncMode( query.data(), query.data() + query.size(), ast, context, false, QueryProcessingStage::Complete, has_query_tail, istr); auto & pipeline = streams.pipeline; if (set_result_details_cp) - set_result_details_cp(query_id, "text/plain; charset=UTF-8", format_name1_cp, DateLUT::instance().getTimeZone(), streams.coordinator); + set_result_details_cp(query_id, "text/plain; charset=UTF-8", format_name1_cp, DateLUT::serverTimezoneInstance().getTimeZone(), streams.coordinator); if (streams.in) { const auto * ast_query_with_output = dynamic_cast(ast.get()); diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp index 6844528f5c2..c2684c06c11 100644 --- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp +++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp @@ -2111,7 +2111,7 @@ void MergeTreeMetaBase::filterPartitionByTTL(std::vector(column)) { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); for (size_t index = 0; index < column->size(); index++) { auto ttl_value = date_lut.fromDayNum(DayNum(column_date->getElement(index))); @@ -2136,7 +2136,7 @@ void MergeTreeMetaBase::filterPartitionByTTL(std::vector(column)) // { - // const auto & date_lut = DateLUT::instance(); + // const auto & date_lut = DateLUT::serverTimezoneInstance(); // ttl_value = date_lut.fromDayNum(DayNum(column_date->getElement(index))); // } // else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) diff --git a/src/Processors/Exchange/DataTrans/Brpc/BrpcRemoteBroadcastReceiver.cpp b/src/Processors/Exchange/DataTrans/Brpc/BrpcRemoteBroadcastReceiver.cpp index ad0d89b7090..4d8e8530c4b 100644 --- a/src/Processors/Exchange/DataTrans/Brpc/BrpcRemoteBroadcastReceiver.cpp +++ b/src/Processors/Exchange/DataTrans/Brpc/BrpcRemoteBroadcastReceiver.cpp @@ -184,8 +184,9 @@ void BrpcRemoteBroadcastReceiver::pushReceiveQueue(MultiPathDataPacket packet) return; } throw Exception( - "Push exchange data to receiver for " + getName() + " timeout from " + DateLUT::instance().timeToString(context->getClientInfo().initial_query_start_time) + - " to " + DateLUT::instance().timeToString(context->getQueryExpirationTimeStamp().tv_sec), + "Push exchange data to receiver for " + getName() + " timeout from " + + DateLUT::serverTimezoneInstance().timeToString(context->getClientInfo().initial_query_start_time) + " to " + + DateLUT::serverTimezoneInstance().timeToString(context->getQueryExpirationTimeStamp().tv_sec), ErrorCodes::DISTRIBUTE_STAGE_QUERY_EXCEPTION); } } @@ -196,8 +197,9 @@ RecvDataPacket BrpcRemoteBroadcastReceiver::recv(timespec timeout_ts) noexcept MultiPathDataPacket data_packet; if (!queue->tryPopUntil(data_packet, timeout_ts)) { - const auto error_msg = "Try pop receive queue for " + getName() + " timeout, from " + - DateLUT::instance().timeToString(context->getClientInfo().initial_query_start_time) + " to " + DateLUT::instance().timeToString(timeout_ts.tv_sec); + const auto error_msg = "Try pop receive queue for " + getName() + " timeout, from " + + DateLUT::serverTimezoneInstance().timeToString(context->getClientInfo().initial_query_start_time) + " to " + + DateLUT::serverTimezoneInstance().timeToString(timeout_ts.tv_sec); BroadcastStatus current_status = finish(BroadcastStatusCode::RECV_TIMEOUT, error_msg); return std::move(current_status); } diff --git a/src/Processors/Exchange/DataTrans/Local/LocalBroadcastChannel.cpp b/src/Processors/Exchange/DataTrans/Local/LocalBroadcastChannel.cpp index 1889494dc96..10db83fc261 100644 --- a/src/Processors/Exchange/DataTrans/Local/LocalBroadcastChannel.cpp +++ b/src/Processors/Exchange/DataTrans/Local/LocalBroadcastChannel.cpp @@ -85,7 +85,7 @@ RecvDataPacket LocalBroadcastChannel::recv(timespec timeout_ts) BroadcastStatus current_status = finish( BroadcastStatusCode::RECV_TIMEOUT, - "Receive from channel " + name + " timeout after ms: " + DateLUT::instance().timeToString(timeout_ts.tv_sec)); + "Receive from channel " + name + " timeout after ms: " + DateLUT::serverTimezoneInstance().timeToString(timeout_ts.tv_sec)); if (enable_receiver_metrics) receiver_metrics.recv_time_ms << s.elapsedMilliseconds(); return current_status; diff --git a/src/Processors/Exchange/DataTrans/MultiPathReceiver.cpp b/src/Processors/Exchange/DataTrans/MultiPathReceiver.cpp index 3d7ab72e27a..69176902cbf 100644 --- a/src/Processors/Exchange/DataTrans/MultiPathReceiver.cpp +++ b/src/Processors/Exchange/DataTrans/MultiPathReceiver.cpp @@ -274,7 +274,8 @@ RecvDataPacket MultiPathReceiver::recv(timespec timeout_ts) { bool collector_closed = collector->closed(); String error_msg = "Try pop receive collector for " + name; - error_msg.append(collector_closed ? " interrupted" : " timeout at " + DateLUT::instance().timeToString(timeout_ts.tv_sec)); + error_msg.append( + collector_closed ? " interrupted" : " timeout at " + DateLUT::serverTimezoneInstance().timeToString(timeout_ts.tv_sec)); BroadcastStatus current_status = finish(collector_closed ? BroadcastStatusCode::RECV_UNKNOWN_ERROR : BroadcastStatusCode::RECV_TIMEOUT, error_msg); diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index c987b26b6be..d115480ea87 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -145,7 +145,7 @@ static time_t roundTimeToPrecision(const DateLUTImpl & date_lut, time_t time, UI IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() { - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::serverTimezoneInstance(); /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows. /// diff --git a/src/Protos/cnch_worker_rpc.proto b/src/Protos/cnch_worker_rpc.proto index b637752deb7..f6456eb3e84 100644 --- a/src/Protos/cnch_worker_rpc.proto +++ b/src/Protos/cnch_worker_rpc.proto @@ -522,6 +522,7 @@ message SendResourcesReq optional WorkerInfo worker_info = 9; // can coexist with `create_queries' repeated CacheableTableDefinition cacheable_create_queries = 10; + optional string session_timezone = 11; } message SendResourcesResp diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 0c762aa1b70..0c303688308 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -349,6 +349,7 @@ void TCPHandler::runImpl() /// Send block to the client - input storage structure. state.input_header = metadata_snapshot->getSampleBlock(); sendData(state.input_header); + sendTimezone(); }); query_context->setInputBlocksReaderCallback([&connection_settings, this](ContextPtr context) -> Block { @@ -1052,6 +1053,19 @@ void TCPHandler::sendExtremes(const Block & extremes) } } +void TCPHandler::sendTimezone() +{ + if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES) + return; + + const String & tz = query_context->getSettingsRef().session_timezone.value; + + LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); + writeVarUInt(Protocol::Server::TimezoneUpdate, *out); + writeStringBinary(tz, *out); + out->next(); +} + bool TCPHandler::receiveProxyHeader() { if (in->eof()) @@ -1262,7 +1276,7 @@ void TCPHandler::sendHello() writeVarUInt(VERSION_MINOR, *out); writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) - writeStringBinary(DateLUT::instance().getTimeZone(), *out); + writeStringBinary(DateLUT::serverTimezoneInstance().getTimeZone(), *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index e0bfae14568..fbbd1be7dc1 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -242,6 +242,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection void sendProfileInfo(const BlockStreamProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); + void sendTimezone(); /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); diff --git a/src/Statistics/AutoStatisticsHelper.cpp b/src/Statistics/AutoStatisticsHelper.cpp index ae4e31d33c5..e580d7e6358 100644 --- a/src/Statistics/AutoStatisticsHelper.cpp +++ b/src/Statistics/AutoStatisticsHelper.cpp @@ -86,7 +86,7 @@ TimePoint nowTimePoint() ExtendedDayNum convertToDate(DateTime64 time) { time_t ts = time.value / DecimalUtils::scaleMultiplier(DataTypeDateTime64::default_scale); - auto date = DateLUT::instance().toDayNum(ts); + auto date = DateLUT::serverTimezoneInstance().toDayNum(ts); return date; } @@ -107,7 +107,7 @@ std::optional