diff --git a/DEPS.bzl b/DEPS.bzl index b4000908564c9..24e2920abe663 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -6582,13 +6582,13 @@ def go_deps(): name = "com_github_pingcap_tipb", build_file_proto_mode = "disable_global", importpath = "github.com/pingcap/tipb", - sha256 = "68768a27ed6c35716fcb01a0b4a15ff13e5c1a5dc11acc7a3d44ba02a2742077", - strip_prefix = "github.com/pingcap/tipb@v0.0.0-20260414032333-da912b84de6f", + sha256 = "a47ec816b2fa1924a4db5c2270a3bfb70f7c5bcc790b59287b5e9680b71bfbcd", + strip_prefix = "github.com/pingcap/tipb@v0.0.0-20260515142222-a4d204a193b4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip", - "http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip", - "https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260414032333-da912b84de6f.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip", + "http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip", + "https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20260515142222-a4d204a193b4.zip", ], ) go_repository( diff --git a/cmd/tidb-server/BUILD.bazel b/cmd/tidb-server/BUILD.bazel index 4ff8cdac2f33f..039c810a79eb3 100644 --- a/cmd/tidb-server/BUILD.bazel +++ b/cmd/tidb-server/BUILD.bazel @@ -107,7 +107,7 @@ go_test( srcs = ["main_test.go"], embed = [":tidb-server_lib"], flaky = True, - shard_count = 6, + shard_count = 7, deps = [ "//pkg/config", "//pkg/config/deploymode", diff --git a/go.mod b/go.mod index 5025a5547552e..646b0843c1fd4 100644 --- a/go.mod +++ b/go.mod @@ -106,7 +106,7 @@ require ( github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 github.com/pingcap/tidb/pkg/parser v0.0.0-20211011031125-9b13dc409c5e - github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f + github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4 github.com/prometheus/client_golang v1.23.0 github.com/prometheus/client_model v0.6.2 github.com/prometheus/common v0.65.0 @@ -352,7 +352,7 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect google.golang.org/protobuf v1.36.10 gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/natefinch/lumberjack.v2 v2.2.1 + gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apimachinery v0.29.11 // indirect k8s.io/klog/v2 v2.120.1 // indirect diff --git a/go.sum b/go.sum index f572097e64f7e..f57340a8a3fc6 100644 --- a/go.sum +++ b/go.sum @@ -738,8 +738,8 @@ github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d h1:5JCgncG9X7 github.com/pingcap/metering_sdk v0.0.0-20260324055927-14fead745f1d/go.mod h1:HMNxmg0/lrn3SPGJ6LTZqP0WwEpcXMu9s/4TWJbzT8w= github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeAhmOQHNPRRogMYxP13V7BkYw3ZsoSfE= github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530= -github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f h1:+IEEq1wl/kxfGK/qOCe9Bu0Kk9ERqxrzeGoKazevWrw= -github.com/pingcap/tipb v0.0.0-20260414032333-da912b84de6f/go.mod h1:RM8iRcMalzOthG2XJxnNBniM4xFGb/lDwHUwqkaVzt4= +github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4 h1:7kN995aOhNamG8IOnN7Rj6nNqq+F3Z2AyfPGjCNdqoI= +github.com/pingcap/tipb v0.0.0-20260515142222-a4d204a193b4/go.mod h1:RM8iRcMalzOthG2XJxnNBniM4xFGb/lDwHUwqkaVzt4= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= diff --git a/pkg/executor/analyze_col_sampling.go b/pkg/executor/analyze_col_sampling.go index 53b62e4bedd91..5bf5017acbd0b 100644 --- a/pkg/executor/analyze_col_sampling.go +++ b/pkg/executor/analyze_col_sampling.go @@ -217,9 +217,9 @@ func (e *AnalyzeColumnsExec) buildSamplingStats( } }() - l := len(e.analyzePB.ColReq.ColumnsInfo) + len(e.analyzePB.ColReq.ColumnGroups) - rootRowCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l) - for range l { + totalLen := len(e.analyzePB.ColReq.ColumnsInfo) + len(e.analyzePB.ColReq.ColumnGroups) + rootRowCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen) + for range totalLen { rootRowCollector.Base().FMSketches = append(rootRowCollector.Base().FMSketches, statistics.NewFMSketch(statistics.MaxSketchSize)) } @@ -251,7 +251,7 @@ func (e *AnalyzeColumnsExec) buildSamplingStats( for i := range samplingStatsConcurrency { id := i gp.Go(func() { - e.subMergeWorker(mergeCtx, taskCancel, mergeResultCh, mergeTaskCh, l, id) + e.subMergeWorker(mergeCtx, taskCancel, mergeResultCh, mergeTaskCh, totalLen, id) }) } // Merge the result from collectors. @@ -342,7 +342,6 @@ func (e *AnalyzeColumnsExec) buildSamplingStats( return i.Handle.Compare(j.Handle) }) - totalLen := len(e.colsInfo) + len(e.indexes) hists = make([]*statistics.Histogram, totalLen) topns = make([]*statistics.TopN, totalLen) fmSketches = make([]*statistics.FMSketch, 0, totalLen) @@ -602,7 +601,7 @@ func (e *AnalyzeColumnsExec) subMergeWorker( cancel context.CancelCauseFunc, resultCh chan<- *samplingMergeResult, taskCh <-chan []byte, - l int, + totalLen int, index int, ) { // Only close the resultCh in the first worker. @@ -640,8 +639,8 @@ func (e *AnalyzeColumnsExec) subMergeWorker( } }) // Keep one private collector per merge worker and flush it when taskCh is closed. - retCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l) - for range l { + retCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen) + for range totalLen { retCollector.Base().FMSketches = append(retCollector.Base().FMSketches, statistics.NewFMSketch(statistics.MaxSketchSize)) } // Early-return paths need to release the worker-local collector explicitly. @@ -671,7 +670,7 @@ func (e *AnalyzeColumnsExec) subMergeWorker( inflightRespSize = int64(colResp.Size()) e.memTracker.Consume(inflightRespSize) - subCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), l) + subCollector := statistics.NewRowSampleCollector(int(e.analyzePB.ColReq.SampleSize), e.analyzePB.ColReq.GetSampleRate(), totalLen) subCollector.Base().FromProto(colResp.RowCollector, e.memTracker) statsHandle.UpdateAnalyzeJobProgress(e.job, subCollector.Base().Count) diff --git a/pkg/statistics/fmsketch.go b/pkg/statistics/fmsketch.go index c3d667c3a71a9..4c624fc760c6b 100644 --- a/pkg/statistics/fmsketch.go +++ b/pkg/statistics/fmsketch.go @@ -121,24 +121,41 @@ func (s *FMSketch) insertHashValue(hashVal uint64) { // InsertValue inserts a value into the FM sketch. func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum) error { + hashVal, err := hashDatum(sc, value) + if err != nil { + return errors.Trace(err) + } + s.insertHashValue(hashVal) + return nil +} + +// InsertRowValue inserts multi-column values to the sketch. +func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error { + hashVal, err := hashRow(sc, values) + if err != nil { + return errors.Trace(err) + } + s.insertHashValue(hashVal) + return nil +} + +func hashDatum(sc *stmtctx.StatementContext, value types.Datum) (uint64, error) { bytes, err := codec.EncodeValue(sc.TimeZone(), nil, value) err = sc.HandleError(err) if err != nil { - return errors.Trace(err) + return 0, err } hashFunc := murmur3Pool.Get().(hash.Hash64) hashFunc.Reset() defer murmur3Pool.Put(hashFunc) _, err = hashFunc.Write(bytes) if err != nil { - return errors.Trace(err) + return 0, err } - s.insertHashValue(hashFunc.Sum64()) - return nil + return hashFunc.Sum64(), nil } -// InsertRowValue inserts multi-column values to the sketch. -func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error { +func hashRow(sc *stmtctx.StatementContext, values []types.Datum) (uint64, error) { b := make([]byte, 0, 8) hashFunc := murmur3Pool.Get().(hash.Hash64) hashFunc.Reset() @@ -150,15 +167,14 @@ func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.D b, err := codec.EncodeValue(sc.TimeZone(), b, v) err = errCtx.HandleError(err) if err != nil { - return err + return 0, err } _, err = hashFunc.Write(b) if err != nil { - return err + return 0, err } } - s.insertHashValue(hashFunc.Sum64()) - return nil + return hashFunc.Sum64(), nil } // MergeFMSketch merges two FM Sketch.