From 9e7c6f1d8e2c39efd47098174387c37ff9685945 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 17:12:11 +0530 Subject: [PATCH 01/23] feat: implement CatchUpSync functionality for improved block synchronization - Added CatchUpSync method to CommandHandler for syncing from a specified block. - Updated main.go to trigger CatchUpSync on startup based on configuration settings. - Enhanced FastSyncSettings struct to include CatchUpFromBlock and CatchUpPeer options. - Updated CLI commands to support new catchup functionality and provide usage instructions. - Added relevant entries to .gitignore for new files and documentation. This change enhances the synchronization process by allowing nodes to catch up from a specific block, improving overall efficiency and reliability in block data management. --- .gitignore | 11 ++ CLI/CLI.go | 38 ++++- CLI/CLI_GRPC.go | 31 ++++ CLI/GRPC_Server.go | 16 +- CLI/client.go | 9 + CLI/proto/Connection.pb.go | 196 ++++++++++++++-------- CLI/proto/Connection.proto | 6 + CLI/proto/Connection_grpc.pb.go | 40 ++++- FastsyncV2/catchup.go | 289 ++++++++++++++++++++++++++++++++ config/settings/config.go | 11 ++ config/settings/defaults.go | 4 +- config/settings/loader.go | 2 + main.go | 102 +++++------ 13 files changed, 629 insertions(+), 126 deletions(-) create mode 100644 FastsyncV2/catchup.go diff --git a/.gitignore b/.gitignore index e3be5b64..9b11e888 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,14 @@ internal/WAL/.tmp/* .code-review-graph/* .cursor/* test_results/ +docs/FASTSYNC_V3_MIGRATION_PLAN.md +eventlog.duckdb +Scripts/sign_tx.go +storage/thebe-kv/000017.vlog +storage/thebe-kv/000018.vlog +storage/thebe-kv/DISCARD +storage/thebe-kv/KEYREGISTRY +storage/thebe-kv/MANIFEST +storage/thebe-kv/outbox.db +dlq/DLQ.log +jmdn2 diff --git a/CLI/CLI.go b/CLI/CLI.go index 87e33bb1..92481754 100644 --- a/CLI/CLI.go +++ b/CLI/CLI.go @@ -6,6 +6,7 @@ import ( "fmt" "log" "os" + "strconv" "strings" "time" @@ -107,8 +108,9 @@ func PrintFuncs() { fmt.Println(" mempoolStats - Show mempool statistics") fmt.Println(" stats - Show messaging statistics") fmt.Println(" broadcast - Broadcast a message to all connected peers") - fmt.Println(" fastsync - Fast sync blockchain data with a peer (V2 Engine)") - fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") + fmt.Println(" fastsync - Fast sync blockchain data with a peer (V2 Engine)") + fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap reconciliation)") + fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") fmt.Println(" dbstate - Show current ImmuDB database state") fmt.Println(" propagateDID - Propagate a DID to the network") fmt.Println(" getDID - Get a DID document from the network") @@ -268,6 +270,8 @@ func (h *CommandHandler) handleCommand(parts []string) { h.handleBroadcast(parts) case "fastsync", "fastsyncv2", "firstsync": h.handleFastSync(parts) + case "catchup": + h.handleCatchUpSync(parts) case "accountsync": h.handleAccountSync(parts) case "propagateDID": @@ -632,6 +636,36 @@ func (h *CommandHandler) handleFastSync(parts []string) { printDashes() } +func (h *CommandHandler) handleCatchUpSync(parts []string) { + if len(parts) != 3 { + fmt.Println("Usage: catchup ") + fmt.Println(" peer_multiaddr full multiaddr with peer ID, e.g. /ip4/1.2.3.4/tcp/15000/p2p/12D3KooW...") + fmt.Println(" from_block first block NOT in your local DB (bootstrap snapshot tip + 1)") + return + } + if h.FastSyncerV2 == nil { + fmt.Println("Error: FastsyncV2 engine is not initialized") + return + } + + fromBlock, err := strconv.ParseUint(parts[2], 10, 64) + if err != nil { + fmt.Printf("Invalid from_block %q: %v\n", parts[2], err) + return + } + + fmt.Printf("Starting catch-up sync from block %d with peer %s\n", fromBlock, parts[1]) + startTime := time.Now() + + if err := h.FastSyncerV2.HandleCatchUpSync(fromBlock, parts[1]); err != nil { + fmt.Printf("CatchUpSync failed: %v\n", err) + return + } + + fmt.Printf("CatchUpSync completed in %v\n", time.Since(startTime)) + printDashes() +} + func (h *CommandHandler) handleAccountSync(parts []string) { if len(parts) != 2 { fmt.Println("Usage: accountsync ") diff --git a/CLI/CLI_GRPC.go b/CLI/CLI_GRPC.go index efc2fa58..b5ea6e06 100644 --- a/CLI/CLI_GRPC.go +++ b/CLI/CLI_GRPC.go @@ -330,6 +330,37 @@ func (h *CommandHandler) HandleFastSyncV2(peeraddr string) (SyncStats, error) { }, nil } +func (h *CommandHandler) HandleCatchUpSync(peeraddr string, fromBlock uint64) (SyncStats, error) { + if peeraddr == "" || fromBlock == 0 { + return SyncStats{}, fmt.Errorf("usage: catchup ") + } + if !h.PullAllowed { + return SyncStats{}, fmt.Errorf("node is configured as a serve-only participant (pulling disabled). cannot pull data") + } + if h.FastSyncerV2 == nil { + return SyncStats{}, fmt.Errorf("FastsyncV2 engine is inactive") + } + + startTime := time.Now().UTC() + if err := h.FastSyncerV2.HandleCatchUpSync(fromBlock, peeraddr); err != nil { + return SyncStats{}, fmt.Errorf("CatchUpSync failed: %w", err) + } + + var newMainState, newAccountsState *schema.ImmutableState + if h.MainClient != nil { + newMainState, _ = DB_OPs.GetDatabaseState(h.MainClient.Client) + } + if h.DIDClient != nil { + newAccountsState, _ = DB_OPs.GetDatabaseState(h.DIDClient.Client) + } + + return SyncStats{ + TimeTaken: time.Since(startTime), + MainState: newMainState, + AccountsState: newAccountsState, + }, nil +} + func (h *CommandHandler) HandleAccountSync(peeraddr string) (SyncStats, error) { if peeraddr == "" { return SyncStats{}, fmt.Errorf("usage: accountsync ") diff --git a/CLI/GRPC_Server.go b/CLI/GRPC_Server.go index 1c849a9d..0c8e3ee2 100644 --- a/CLI/GRPC_Server.go +++ b/CLI/GRPC_Server.go @@ -256,9 +256,19 @@ func (s *CLIServer) AccountSync(ctx context.Context, req *pb.PeerRequest) (*pb.S func (s *CLIServer) FirstSync(ctx context.Context, req *pb.FirstSyncRequest) (*pb.SyncStats, error) { stats, err := s.handler.HandleFirstSync(req.Peer, req.Mode) if err != nil { - return &pb.SyncStats{ - Error: err.Error(), - }, nil + return &pb.SyncStats{Error: err.Error()}, nil + } + return &pb.SyncStats{ + TimeTaken: int64(stats.TimeTaken.Seconds()), + MainState: convertDBState(stats.MainState), + AccountsState: convertDBState(stats.AccountsState), + }, nil +} + +func (s *CLIServer) CatchUpSync(ctx context.Context, req *pb.CatchUpRequest) (*pb.SyncStats, error) { + stats, err := s.handler.HandleCatchUpSync(req.Peer, req.FromBlock) + if err != nil { + return &pb.SyncStats{Error: err.Error()}, nil } return &pb.SyncStats{ TimeTaken: int64(stats.TimeTaken.Seconds()), diff --git a/CLI/client.go b/CLI/client.go index a019353f..3c5d11fa 100644 --- a/CLI/client.go +++ b/CLI/client.go @@ -179,6 +179,15 @@ func (c *Client) FirstSync(peerAddr string, mode string) (*pb.SyncStats, error) }) } +// CatchUpSync reconciles blocks [fromBlock..remoteTip] without Merkle bisection. +func (c *Client) CatchUpSync(peerAddr string, fromBlock uint64) (*pb.SyncStats, error) { + ctx := context.Background() + return c.conn.CatchUpSync(ctx, &pb.CatchUpRequest{ + Peer: peerAddr, + FromBlock: fromBlock, + }) +} + // GetDatabaseState returns the current database state func (c *Client) GetDatabaseState() (*pb.DatabaseStates, error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) diff --git a/CLI/proto/Connection.pb.go b/CLI/proto/Connection.pb.go index 39e3c52a..cae3c9bf 100644 --- a/CLI/proto/Connection.pb.go +++ b/CLI/proto/Connection.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.11 -// protoc v7.34.1 +// protoc v4.25.3 // source: Connection.proto package proto @@ -893,6 +893,58 @@ func (x *FirstSyncRequest) GetMode() string { return "" } +type CatchUpRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Peer string `protobuf:"bytes,1,opt,name=peer,proto3" json:"peer,omitempty"` // full libp2p multiaddr with peer ID + FromBlock uint64 `protobuf:"varint,2,opt,name=from_block,json=fromBlock,proto3" json:"from_block,omitempty"` // first block NOT in local DB (bootstrap tip + 1) + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CatchUpRequest) Reset() { + *x = CatchUpRequest{} + mi := &file_Connection_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CatchUpRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CatchUpRequest) ProtoMessage() {} + +func (x *CatchUpRequest) ProtoReflect() protoreflect.Message { + mi := &file_Connection_proto_msgTypes[14] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CatchUpRequest.ProtoReflect.Descriptor instead. +func (*CatchUpRequest) Descriptor() ([]byte, []int) { + return file_Connection_proto_rawDescGZIP(), []int{14} +} + +func (x *CatchUpRequest) GetPeer() string { + if x != nil { + return x.Peer + } + return "" +} + +func (x *CatchUpRequest) GetFromBlock() uint64 { + if x != nil { + return x.FromBlock + } + return 0 +} + type SyncInfo struct { state protoimpl.MessageState `protogen:"open.v1"` BatchSize int64 `protobuf:"varint,1,opt,name=batch_size,json=batchSize,proto3" json:"batch_size,omitempty"` @@ -904,7 +956,7 @@ type SyncInfo struct { func (x *SyncInfo) Reset() { *x = SyncInfo{} - mi := &file_Connection_proto_msgTypes[14] + mi := &file_Connection_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -916,7 +968,7 @@ func (x *SyncInfo) String() string { func (*SyncInfo) ProtoMessage() {} func (x *SyncInfo) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[14] + mi := &file_Connection_proto_msgTypes[15] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -929,7 +981,7 @@ func (x *SyncInfo) ProtoReflect() protoreflect.Message { // Deprecated: Use SyncInfo.ProtoReflect.Descriptor instead. func (*SyncInfo) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{14} + return file_Connection_proto_rawDescGZIP(), []int{15} } func (x *SyncInfo) GetBatchSize() int64 { @@ -964,7 +1016,7 @@ type GethStatus struct { func (x *GethStatus) Reset() { *x = GethStatus{} - mi := &file_Connection_proto_msgTypes[15] + mi := &file_Connection_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -976,7 +1028,7 @@ func (x *GethStatus) String() string { func (*GethStatus) ProtoMessage() {} func (x *GethStatus) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[15] + mi := &file_Connection_proto_msgTypes[16] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -989,7 +1041,7 @@ func (x *GethStatus) ProtoReflect() protoreflect.Message { // Deprecated: Use GethStatus.ProtoReflect.Descriptor instead. func (*GethStatus) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{15} + return file_Connection_proto_rawDescGZIP(), []int{16} } func (x *GethStatus) GetChainId() int32 { @@ -1022,7 +1074,7 @@ type AliasList struct { func (x *AliasList) Reset() { *x = AliasList{} - mi := &file_Connection_proto_msgTypes[16] + mi := &file_Connection_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1034,7 +1086,7 @@ func (x *AliasList) String() string { func (*AliasList) ProtoMessage() {} func (x *AliasList) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[16] + mi := &file_Connection_proto_msgTypes[17] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1047,7 +1099,7 @@ func (x *AliasList) ProtoReflect() protoreflect.Message { // Deprecated: Use AliasList.ProtoReflect.Descriptor instead. func (*AliasList) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{16} + return file_Connection_proto_rawDescGZIP(), []int{17} } func (x *AliasList) GetAliases() []string { @@ -1067,7 +1119,7 @@ type OperationResponse struct { func (x *OperationResponse) Reset() { *x = OperationResponse{} - mi := &file_Connection_proto_msgTypes[17] + mi := &file_Connection_proto_msgTypes[18] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1079,7 +1131,7 @@ func (x *OperationResponse) String() string { func (*OperationResponse) ProtoMessage() {} func (x *OperationResponse) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[17] + mi := &file_Connection_proto_msgTypes[18] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1092,7 +1144,7 @@ func (x *OperationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use OperationResponse.ProtoReflect.Descriptor instead. func (*OperationResponse) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{17} + return file_Connection_proto_rawDescGZIP(), []int{18} } func (x *OperationResponse) GetSuccess() bool { @@ -1119,7 +1171,7 @@ type CleanPeersResponse struct { func (x *CleanPeersResponse) Reset() { *x = CleanPeersResponse{} - mi := &file_Connection_proto_msgTypes[18] + mi := &file_Connection_proto_msgTypes[19] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1131,7 +1183,7 @@ func (x *CleanPeersResponse) String() string { func (*CleanPeersResponse) ProtoMessage() {} func (x *CleanPeersResponse) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[18] + mi := &file_Connection_proto_msgTypes[19] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1144,7 +1196,7 @@ func (x *CleanPeersResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CleanPeersResponse.ProtoReflect.Descriptor instead. func (*CleanPeersResponse) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{18} + return file_Connection_proto_rawDescGZIP(), []int{19} } func (x *CleanPeersResponse) GetCleanedCount() int32 { @@ -1171,7 +1223,7 @@ type DatabaseStates struct { func (x *DatabaseStates) Reset() { *x = DatabaseStates{} - mi := &file_Connection_proto_msgTypes[19] + mi := &file_Connection_proto_msgTypes[20] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1183,7 +1235,7 @@ func (x *DatabaseStates) String() string { func (*DatabaseStates) ProtoMessage() {} func (x *DatabaseStates) ProtoReflect() protoreflect.Message { - mi := &file_Connection_proto_msgTypes[19] + mi := &file_Connection_proto_msgTypes[20] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1196,7 +1248,7 @@ func (x *DatabaseStates) ProtoReflect() protoreflect.Message { // Deprecated: Use DatabaseStates.ProtoReflect.Descriptor instead. func (*DatabaseStates) Descriptor() ([]byte, []int) { - return file_Connection_proto_rawDescGZIP(), []int{19} + return file_Connection_proto_rawDescGZIP(), []int{20} } func (x *DatabaseStates) GetMainDb() *DatabaseState { @@ -1287,7 +1339,11 @@ const file_Connection_proto_rawDesc = "" + "\abalance\x18\x03 \x01(\tR\abalance\":\n" + "\x10FirstSyncRequest\x12\x12\n" + "\x04peer\x18\x01 \x01(\tR\x04peer\x12\x12\n" + - "\x04mode\x18\x02 \x01(\tR\x04mode\"\x8b\x01\n" + + "\x04mode\x18\x02 \x01(\tR\x04mode\"C\n" + + "\x0eCatchUpRequest\x12\x12\n" + + "\x04peer\x18\x01 \x01(\tR\x04peer\x12\x1d\n" + + "\n" + + "from_block\x18\x02 \x01(\x04R\tfromBlock\"\x8b\x01\n" + "\bSyncInfo\x12\x1d\n" + "\n" + "batch_size\x18\x01 \x01(\x03R\tbatchSize\x12.\n" + @@ -1310,7 +1366,7 @@ const file_Connection_proto_rawDesc = "" + "\x0eDatabaseStates\x12+\n" + "\amain_db\x18\x01 \x01(\v2\x12.cli.DatabaseStateR\x06mainDb\x123\n" + "\vaccounts_db\x18\x02 \x01(\v2\x12.cli.DatabaseStateR\n" + - "accountsDb2\x9e\n" + + "accountsDb2\xd4\n" + "\n" + "\n" + "CLIService\x124\n" + @@ -1331,7 +1387,8 @@ const file_Connection_proto_rawDesc = "" + "\n" + "FastSyncV2\x12\x10.cli.PeerRequest\x1a\x0e.cli.SyncStats\"\x00\x121\n" + "\vAccountSync\x12\x10.cli.PeerRequest\x1a\x0e.cli.SyncStats\"\x00\x124\n" + - "\tFirstSync\x12\x15.cli.FirstSyncRequest\x1a\x0e.cli.SyncStats\"\x00\x12A\n" + + "\tFirstSync\x12\x15.cli.FirstSyncRequest\x1a\x0e.cli.SyncStats\"\x00\x124\n" + + "\vCatchUpSync\x12\x13.cli.CatchUpRequest\x1a\x0e.cli.SyncStats\"\x00\x12A\n" + "\x10GetDatabaseState\x12\x16.google.protobuf.Empty\x1a\x13.cli.DatabaseStates\"\x00\x123\n" + "\vReturnAddrs\x12\x16.google.protobuf.Empty\x1a\n" + ".cli.Addrs\"\x00\x126\n" + @@ -1353,7 +1410,7 @@ func file_Connection_proto_rawDescGZIP() []byte { return file_Connection_proto_rawDescData } -var file_Connection_proto_msgTypes = make([]protoimpl.MessageInfo, 20) +var file_Connection_proto_msgTypes = make([]protoimpl.MessageInfo, 21) var file_Connection_proto_goTypes = []any{ (*Peer)(nil), // 0: cli.Peer (*PeerList)(nil), // 1: cli.PeerList @@ -1369,69 +1426,72 @@ var file_Connection_proto_goTypes = []any{ (*DIDRequest)(nil), // 11: cli.DIDRequest (*DIDPropagationRequest)(nil), // 12: cli.DIDPropagationRequest (*FirstSyncRequest)(nil), // 13: cli.FirstSyncRequest - (*SyncInfo)(nil), // 14: cli.SyncInfo - (*GethStatus)(nil), // 15: cli.GethStatus - (*AliasList)(nil), // 16: cli.AliasList - (*OperationResponse)(nil), // 17: cli.OperationResponse - (*CleanPeersResponse)(nil), // 18: cli.CleanPeersResponse - (*DatabaseStates)(nil), // 19: cli.DatabaseStates - (*timestamppb.Timestamp)(nil), // 20: google.protobuf.Timestamp - (*emptypb.Empty)(nil), // 21: google.protobuf.Empty + (*CatchUpRequest)(nil), // 14: cli.CatchUpRequest + (*SyncInfo)(nil), // 15: cli.SyncInfo + (*GethStatus)(nil), // 16: cli.GethStatus + (*AliasList)(nil), // 17: cli.AliasList + (*OperationResponse)(nil), // 18: cli.OperationResponse + (*CleanPeersResponse)(nil), // 19: cli.CleanPeersResponse + (*DatabaseStates)(nil), // 20: cli.DatabaseStates + (*timestamppb.Timestamp)(nil), // 21: google.protobuf.Timestamp + (*emptypb.Empty)(nil), // 22: google.protobuf.Empty } var file_Connection_proto_depIdxs = []int32{ 0, // 0: cli.PeerList.peers:type_name -> cli.Peer - 20, // 1: cli.DIDDocument.created_at:type_name -> google.protobuf.Timestamp - 20, // 2: cli.DIDDocument.updated_at:type_name -> google.protobuf.Timestamp + 21, // 1: cli.DIDDocument.created_at:type_name -> google.protobuf.Timestamp + 21, // 2: cli.DIDDocument.updated_at:type_name -> google.protobuf.Timestamp 3, // 3: cli.SyncStats.main_state:type_name -> cli.DatabaseState 3, // 4: cli.SyncStats.accounts_state:type_name -> cli.DatabaseState 3, // 5: cli.DatabaseStates.main_db:type_name -> cli.DatabaseState 3, // 6: cli.DatabaseStates.accounts_db:type_name -> cli.DatabaseState - 21, // 7: cli.CLIService.ListPeers:input_type -> google.protobuf.Empty + 22, // 7: cli.CLIService.ListPeers:input_type -> google.protobuf.Empty 8, // 8: cli.CLIService.AddPeer:input_type -> cli.PeerRequest 8, // 9: cli.CLIService.RemovePeer:input_type -> cli.PeerRequest - 21, // 10: cli.CLIService.CleanPeers:input_type -> google.protobuf.Empty + 22, // 10: cli.CLIService.CleanPeers:input_type -> google.protobuf.Empty 9, // 11: cli.CLIService.SendMessage:input_type -> cli.MessageRequest 9, // 12: cli.CLIService.SendYggdrasilMessage:input_type -> cli.MessageRequest 10, // 13: cli.CLIService.SendFile:input_type -> cli.FileRequest 9, // 14: cli.CLIService.BroadcastMessage:input_type -> cli.MessageRequest - 21, // 15: cli.CLIService.GetMessageStats:input_type -> google.protobuf.Empty + 22, // 15: cli.CLIService.GetMessageStats:input_type -> google.protobuf.Empty 11, // 16: cli.CLIService.GetDID:input_type -> cli.DIDRequest 12, // 17: cli.CLIService.PropagateDID:input_type -> cli.DIDPropagationRequest 8, // 18: cli.CLIService.FastSync:input_type -> cli.PeerRequest 8, // 19: cli.CLIService.FastSyncV2:input_type -> cli.PeerRequest 8, // 20: cli.CLIService.AccountSync:input_type -> cli.PeerRequest 13, // 21: cli.CLIService.FirstSync:input_type -> cli.FirstSyncRequest - 21, // 22: cli.CLIService.GetDatabaseState:input_type -> google.protobuf.Empty - 21, // 23: cli.CLIService.ReturnAddrs:input_type -> google.protobuf.Empty - 21, // 24: cli.CLIService.GetSyncInfo:input_type -> google.protobuf.Empty - 21, // 25: cli.CLIService.GetGethStatus:input_type -> google.protobuf.Empty - 21, // 26: cli.CLIService.DiscoverNeighbors:input_type -> google.protobuf.Empty - 21, // 27: cli.CLIService.ListAliases:input_type -> google.protobuf.Empty - 21, // 28: cli.CLIService.GetNodeVersion:input_type -> google.protobuf.Empty - 1, // 29: cli.CLIService.ListPeers:output_type -> cli.PeerList - 17, // 30: cli.CLIService.AddPeer:output_type -> cli.OperationResponse - 17, // 31: cli.CLIService.RemovePeer:output_type -> cli.OperationResponse - 18, // 32: cli.CLIService.CleanPeers:output_type -> cli.CleanPeersResponse - 17, // 33: cli.CLIService.SendMessage:output_type -> cli.OperationResponse - 17, // 34: cli.CLIService.SendYggdrasilMessage:output_type -> cli.OperationResponse - 17, // 35: cli.CLIService.SendFile:output_type -> cli.OperationResponse - 17, // 36: cli.CLIService.BroadcastMessage:output_type -> cli.OperationResponse - 2, // 37: cli.CLIService.GetMessageStats:output_type -> cli.MessageStats - 4, // 38: cli.CLIService.GetDID:output_type -> cli.DIDDocument - 17, // 39: cli.CLIService.PropagateDID:output_type -> cli.OperationResponse - 5, // 40: cli.CLIService.FastSync:output_type -> cli.SyncStats - 5, // 41: cli.CLIService.FastSyncV2:output_type -> cli.SyncStats - 5, // 42: cli.CLIService.AccountSync:output_type -> cli.SyncStats - 5, // 43: cli.CLIService.FirstSync:output_type -> cli.SyncStats - 19, // 44: cli.CLIService.GetDatabaseState:output_type -> cli.DatabaseStates - 6, // 45: cli.CLIService.ReturnAddrs:output_type -> cli.Addrs - 14, // 46: cli.CLIService.GetSyncInfo:output_type -> cli.SyncInfo - 15, // 47: cli.CLIService.GetGethStatus:output_type -> cli.GethStatus - 17, // 48: cli.CLIService.DiscoverNeighbors:output_type -> cli.OperationResponse - 16, // 49: cli.CLIService.ListAliases:output_type -> cli.AliasList - 7, // 50: cli.CLIService.GetNodeVersion:output_type -> cli.VersionInfo - 29, // [29:51] is the sub-list for method output_type - 7, // [7:29] is the sub-list for method input_type + 14, // 22: cli.CLIService.CatchUpSync:input_type -> cli.CatchUpRequest + 22, // 23: cli.CLIService.GetDatabaseState:input_type -> google.protobuf.Empty + 22, // 24: cli.CLIService.ReturnAddrs:input_type -> google.protobuf.Empty + 22, // 25: cli.CLIService.GetSyncInfo:input_type -> google.protobuf.Empty + 22, // 26: cli.CLIService.GetGethStatus:input_type -> google.protobuf.Empty + 22, // 27: cli.CLIService.DiscoverNeighbors:input_type -> google.protobuf.Empty + 22, // 28: cli.CLIService.ListAliases:input_type -> google.protobuf.Empty + 22, // 29: cli.CLIService.GetNodeVersion:input_type -> google.protobuf.Empty + 1, // 30: cli.CLIService.ListPeers:output_type -> cli.PeerList + 18, // 31: cli.CLIService.AddPeer:output_type -> cli.OperationResponse + 18, // 32: cli.CLIService.RemovePeer:output_type -> cli.OperationResponse + 19, // 33: cli.CLIService.CleanPeers:output_type -> cli.CleanPeersResponse + 18, // 34: cli.CLIService.SendMessage:output_type -> cli.OperationResponse + 18, // 35: cli.CLIService.SendYggdrasilMessage:output_type -> cli.OperationResponse + 18, // 36: cli.CLIService.SendFile:output_type -> cli.OperationResponse + 18, // 37: cli.CLIService.BroadcastMessage:output_type -> cli.OperationResponse + 2, // 38: cli.CLIService.GetMessageStats:output_type -> cli.MessageStats + 4, // 39: cli.CLIService.GetDID:output_type -> cli.DIDDocument + 18, // 40: cli.CLIService.PropagateDID:output_type -> cli.OperationResponse + 5, // 41: cli.CLIService.FastSync:output_type -> cli.SyncStats + 5, // 42: cli.CLIService.FastSyncV2:output_type -> cli.SyncStats + 5, // 43: cli.CLIService.AccountSync:output_type -> cli.SyncStats + 5, // 44: cli.CLIService.FirstSync:output_type -> cli.SyncStats + 5, // 45: cli.CLIService.CatchUpSync:output_type -> cli.SyncStats + 20, // 46: cli.CLIService.GetDatabaseState:output_type -> cli.DatabaseStates + 6, // 47: cli.CLIService.ReturnAddrs:output_type -> cli.Addrs + 15, // 48: cli.CLIService.GetSyncInfo:output_type -> cli.SyncInfo + 16, // 49: cli.CLIService.GetGethStatus:output_type -> cli.GethStatus + 18, // 50: cli.CLIService.DiscoverNeighbors:output_type -> cli.OperationResponse + 17, // 51: cli.CLIService.ListAliases:output_type -> cli.AliasList + 7, // 52: cli.CLIService.GetNodeVersion:output_type -> cli.VersionInfo + 30, // [30:53] is the sub-list for method output_type + 7, // [7:30] is the sub-list for method input_type 7, // [7:7] is the sub-list for extension type_name 7, // [7:7] is the sub-list for extension extendee 0, // [0:7] is the sub-list for field type_name @@ -1448,7 +1508,7 @@ func file_Connection_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_Connection_proto_rawDesc), len(file_Connection_proto_rawDesc)), NumEnums: 0, - NumMessages: 20, + NumMessages: 21, NumExtensions: 0, NumServices: 1, }, diff --git a/CLI/proto/Connection.proto b/CLI/proto/Connection.proto index 95ee21a0..c8908efe 100644 --- a/CLI/proto/Connection.proto +++ b/CLI/proto/Connection.proto @@ -94,6 +94,7 @@ service CLIService { rpc FastSyncV2(PeerRequest) returns (SyncStats) {} rpc AccountSync(PeerRequest) returns (SyncStats) {} rpc FirstSync(FirstSyncRequest) returns (SyncStats) {} + rpc CatchUpSync(CatchUpRequest) returns (SyncStats) {} rpc GetDatabaseState(google.protobuf.Empty) returns (DatabaseStates) {} // Node Operations @@ -140,6 +141,11 @@ message FirstSyncRequest { string mode = 2; // "server" or "client" } +message CatchUpRequest { + string peer = 1; // full libp2p multiaddr with peer ID + uint64 from_block = 2; // first block NOT in local DB (bootstrap tip + 1) +} + message SyncInfo { int64 batch_size = 1; int64 request_timeout_sec = 2; diff --git a/CLI/proto/Connection_grpc.pb.go b/CLI/proto/Connection_grpc.pb.go index 4c4d24b5..b086e325 100644 --- a/CLI/proto/Connection_grpc.pb.go +++ b/CLI/proto/Connection_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.6.2 -// - protoc v7.34.1 +// - protoc v4.25.3 // source: Connection.proto package proto @@ -35,6 +35,7 @@ const ( CLIService_FastSyncV2_FullMethodName = "/cli.CLIService/FastSyncV2" CLIService_AccountSync_FullMethodName = "/cli.CLIService/AccountSync" CLIService_FirstSync_FullMethodName = "/cli.CLIService/FirstSync" + CLIService_CatchUpSync_FullMethodName = "/cli.CLIService/CatchUpSync" CLIService_GetDatabaseState_FullMethodName = "/cli.CLIService/GetDatabaseState" CLIService_ReturnAddrs_FullMethodName = "/cli.CLIService/ReturnAddrs" CLIService_GetSyncInfo_FullMethodName = "/cli.CLIService/GetSyncInfo" @@ -69,6 +70,7 @@ type CLIServiceClient interface { FastSyncV2(ctx context.Context, in *PeerRequest, opts ...grpc.CallOption) (*SyncStats, error) AccountSync(ctx context.Context, in *PeerRequest, opts ...grpc.CallOption) (*SyncStats, error) FirstSync(ctx context.Context, in *FirstSyncRequest, opts ...grpc.CallOption) (*SyncStats, error) + CatchUpSync(ctx context.Context, in *CatchUpRequest, opts ...grpc.CallOption) (*SyncStats, error) GetDatabaseState(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*DatabaseStates, error) // Node Operations ReturnAddrs(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*Addrs, error) @@ -239,6 +241,16 @@ func (c *cLIServiceClient) FirstSync(ctx context.Context, in *FirstSyncRequest, return out, nil } +func (c *cLIServiceClient) CatchUpSync(ctx context.Context, in *CatchUpRequest, opts ...grpc.CallOption) (*SyncStats, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(SyncStats) + err := c.cc.Invoke(ctx, CLIService_CatchUpSync_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *cLIServiceClient) GetDatabaseState(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*DatabaseStates, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) out := new(DatabaseStates) @@ -334,6 +346,7 @@ type CLIServiceServer interface { FastSyncV2(context.Context, *PeerRequest) (*SyncStats, error) AccountSync(context.Context, *PeerRequest) (*SyncStats, error) FirstSync(context.Context, *FirstSyncRequest) (*SyncStats, error) + CatchUpSync(context.Context, *CatchUpRequest) (*SyncStats, error) GetDatabaseState(context.Context, *emptypb.Empty) (*DatabaseStates, error) // Node Operations ReturnAddrs(context.Context, *emptypb.Empty) (*Addrs, error) @@ -399,6 +412,9 @@ func (UnimplementedCLIServiceServer) AccountSync(context.Context, *PeerRequest) func (UnimplementedCLIServiceServer) FirstSync(context.Context, *FirstSyncRequest) (*SyncStats, error) { return nil, status.Error(codes.Unimplemented, "method FirstSync not implemented") } +func (UnimplementedCLIServiceServer) CatchUpSync(context.Context, *CatchUpRequest) (*SyncStats, error) { + return nil, status.Error(codes.Unimplemented, "method CatchUpSync not implemented") +} func (UnimplementedCLIServiceServer) GetDatabaseState(context.Context, *emptypb.Empty) (*DatabaseStates, error) { return nil, status.Error(codes.Unimplemented, "method GetDatabaseState not implemented") } @@ -711,6 +727,24 @@ func _CLIService_FirstSync_Handler(srv interface{}, ctx context.Context, dec fun return interceptor(ctx, in, info, handler) } +func _CLIService_CatchUpSync_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CatchUpRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CLIServiceServer).CatchUpSync(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: CLIService_CatchUpSync_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CLIServiceServer).CatchUpSync(ctx, req.(*CatchUpRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _CLIService_GetDatabaseState_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(emptypb.Empty) if err := dec(in); err != nil { @@ -904,6 +938,10 @@ var CLIService_ServiceDesc = grpc.ServiceDesc{ MethodName: "FirstSync", Handler: _CLIService_FirstSync_Handler, }, + { + MethodName: "CatchUpSync", + Handler: _CLIService_CatchUpSync_Handler, + }, { MethodName: "GetDatabaseState", Handler: _CLIService_GetDatabaseState_Handler, diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go new file mode 100644 index 00000000..67fd0e30 --- /dev/null +++ b/FastsyncV2/catchup.go @@ -0,0 +1,289 @@ +package FastsyncV2 + +// HandleCatchUpSync syncs blocks [fromBlock..remoteTip] without Merkle bisection. +// +// Use this after a bootstrap snapshot has loaded blocks [0..X]: call +// HandleCatchUpSync(X+1, targetPeer) to reconcile the remaining blocks to the +// current chain tip. +// +// Unlike HandleSync / HandleStartupSync, this path skips PriorSync entirely. +// It builds the missing range directly from the availability response: +// +// Phase 1 — Availability → get auth token, discover remoteTip +// Phase 2 — HeaderSync → fetch headers [fromBlock..remoteTip] (no Merkle confirmation) +// Phase 3 — DataSync → fetch block bodies +// Phase 4 — AccountSync → sync zero-tx accounts not covered by DataSync +// Phase 5 — Reconciliation → replay txs, commit account balances +// Phase 6 — Re-auth → refresh expired token before PoTS +// Phase 7 — PoTS → fetch blocks produced while phases 2-5 ran +// +// targetPeer must be a libp2p multiaddr with an embedded peer ID, e.g.: +// +// /ip4/192.168.1.5/tcp/15000/p2p/12D3KooW... +import ( + "context" + "fmt" + "log" + "math" + "time" + + availabilitypb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability" + headersyncpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/headersync" + taggingpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/tagging" + "github.com/JupiterMetaLabs/JMDN-FastSync/common/types" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multiaddr" +) + +// Note: tryRefreshAuth is defined below but currently unused (AUTH_TTL = 48h). +// Kept for reference — re-enable the commented blocks above if TTL is reduced. + +// HandleCatchUpSync is the public entry point. See package-level doc above. +func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) error { + catchUpStart := time.Now() + + // Use a generous timeout — catching up on days of blocks takes much longer + // than a normal incremental sync. Callers can wrap in their own deadline if needed. + ctx, cancel := context.WithTimeout(context.Background(), fs.syncTimeout) + defer cancel() + + // ── Parse and connect ───────────────────────────────────────────────── + maddr, err := multiaddr.NewMultiaddr(targetPeer) + if err != nil { + return fmt.Errorf("catchup: invalid multiaddr %q: %w", targetPeer, err) + } + info, err := peer.AddrInfoFromP2pAddr(maddr) + if err != nil { + return fmt.Errorf("catchup: extract peer info: %w", err) + } + if err := fs.Host.Connect(ctx, *info); err != nil { + return fmt.Errorf("catchup: connect to peer %s: %w", info.ID, err) + } + + peerAddrs := fs.Host.Peerstore().Addrs(info.ID) + if len(peerAddrs) == 0 { + peerAddrs = info.Addrs + } + targetNodeInfo := &types.Nodeinfo{ + PeerID: info.ID, + Multiaddr: peerAddrs, + Version: commsVersion, + } + + log.Printf("[CatchUpSync] starting from block %d → peer %s", fromBlock, info.ID) + + // ── Phase 1: Availability ───────────────────────────────────────────── + log.Printf("[CatchUpSync] phase 1: availability probe") + + availResp, err := fs.AvailRouter.SendAvailabilityRequest( + ctx, fs.PriorRouter.GetSyncVars(), *targetNodeInfo, fromBlock, math.MaxUint64, + ) + if err != nil { + return fmt.Errorf("catchup: availability: %w", err) + } + if !availResp.IsAvailable { + return fmt.Errorf("catchup: peer %s not available", info.ID) + } + if availResp.Auth == nil || availResp.Auth.UUID == "" { + return fmt.Errorf("catchup: peer %s returned no auth token", info.ID) + } + + // remoteTip is the highest block the peer reported. + remoteTip := uint64(availResp.BlockMerge) + if remoteTip < fromBlock { + return fmt.Errorf("catchup: remoteTip %d < fromBlock %d — nothing to sync", remoteTip, fromBlock) + } + + log.Printf("[CatchUpSync] phase 1 complete: remoteTip=%d auth=%s", remoteTip, availResp.Auth.UUID) + + remotes := []*availabilitypb.AvailabilityResponse{availResp} + + // ── Build the catch-up tag (sparse gap detection) ──────────────────── + // Scan local DB for blocks already present in [fromBlock..remoteTip] and + // compute the complement — only the gaps are fetched, not the full range. + catchUpTag, err := fs.buildMissingTag(fromBlock, remoteTip) + if err != nil { + return fmt.Errorf("catchup: scan local blocks: %w", err) + } + if len(catchUpTag.Range) == 0 && len(catchUpTag.BlockNumber) == 0 { + log.Printf("[CatchUpSync] all blocks [%d..%d] already present locally", fromBlock, remoteTip) + return nil + } + log.Printf("[CatchUpSync] %d missing range(s) to fetch", len(catchUpTag.Range)) + + // ── Phase 2: HeaderSync ─────────────────────────────────────────────── + log.Printf("[CatchUpSync] phase 2: header sync [%d..%d]", fromBlock, remoteTip) + + dataSyncReq, err := fs.HeaderRouter.HeaderSync( + &headersyncpb.HeaderSyncRequest{Tag: catchUpTag}, + remotes, + false, // syncConfirmation=false: skip Merkle, we know the exact range + ) + if err != nil { + return fmt.Errorf("catchup: header sync: %w", err) + } + log.Printf("[CatchUpSync] phase 2 complete") + + // ── Phase 3: DataSync ───────────────────────────────────────────────── + log.Printf("[CatchUpSync] phase 3: data sync") + + // dataSyncReq is nil if HeaderSync found no blocks to write (range already + // present locally). Skip DataSync in that case — same behaviour as HandleSync. + if dataSyncReq == nil { + log.Printf("[CatchUpSync] phase 3 skipped: no DataSync request from HeaderSync") + return nil + } + + taggedAccounts, err := fs.DataRouter.DataSync(dataSyncReq, remotes) + if err != nil { + return fmt.Errorf("catchup: data sync: %w", err) + } + log.Printf("[CatchUpSync] phase 3 complete") + + // Refresh the local block marker after writing a large batch of data. + fs.reconcileLocalLatestBlock() + + // ── Phase 3.5: FetchAccounts — pull tagged accounts missing locally ─── + if taggedAccounts != nil && len(taggedAccounts.Accounts) > 0 { + // AUTH_TTL is now 48h so no re-auth needed here. + // if refreshed, ok := fs.tryRefreshAuth(ctx, targetNodeInfo, fromBlock); ok { + // availResp = refreshed + // remotes = []*availabilitypb.AvailabilityResponse{availResp} + // } + + missingMap := make(map[string]bool) + accountMgr := fs.blockInfoAdapter.NewAccountManager() + for addr := range taggedAccounts.Accounts { + acc, err := accountMgr.GetAccountByAddress(addr) + if err == nil && acc == nil { + missingMap[addr] = true + } + } + if len(missingMap) > 0 { + log.Printf("[CatchUpSync] phase 3.5: fetching %d missing tagged accounts", len(missingMap)) + resp, err := fs.AccountSyncRouter.FetchAccounts(availResp, missingMap) + if err != nil { + log.Printf("[CatchUpSync] phase 3.5 warning: FetchAccounts failed: %v", err) + } else if resp != nil && len(resp.GetAccounts()) > 0 { + accounts := protoAccountsToTypes(resp.GetAccounts()) + if writeErr := accountMgr.WriteAccounts(accounts); writeErr != nil { + log.Printf("[CatchUpSync] phase 3.5 warning: WriteAccounts failed: %v", writeErr) + } else { + log.Printf("[CatchUpSync] phase 3.5 complete: wrote %d accounts", len(accounts)) + } + } + } + } + + // ── Phase 4: AccountSync ────────────────────────────────────────────── + // Syncs zero-tx accounts not covered by DataSync TaggedAccounts. + log.Printf("[CatchUpSync] phase 4: account sync") + + totalMissing, err := fs.AccountSyncRouter.AccountSync(availResp) + if err != nil { + log.Printf("[CatchUpSync] phase 4 warning: account sync failed: %v", err) + } else { + log.Printf("[CatchUpSync] phase 4 complete: %d accounts synced", totalMissing) + } + + // ── Phase 5: Reconciliation ─────────────────────────────────────────── + log.Printf("[CatchUpSync] phase 5: reconciliation") + + reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp) + if err != nil { + log.Printf("[CatchUpSync] phase 5 warning: %v", err) + } + log.Printf("[CatchUpSync] phase 5 complete: %d committed, %d failed", reconCount, len(failedAccounts)) + + // ── Phase 6: Re-auth before PoTS (disabled — AUTH_TTL is now 48h) ───── + // if refreshed, ok := fs.tryRefreshAuth(ctx, targetNodeInfo, 0); ok { + // availResp = refreshed + // remotes = []*availabilitypb.AvailabilityResponse{availResp} + // log.Printf("[CatchUpSync] phase 6: re-auth ok (UUID=%s)", availResp.Auth.UUID) + // } else { + // log.Printf("[CatchUpSync] phase 6: re-auth failed — proceeding with stale token") + // } + + // ── Phase 7: PoTS ───────────────────────────────────────────────────── + log.Printf("[CatchUpSync] phase 7: PoTS gap fill") + + if err := fs.executePoTS(ctx, targetNodeInfo, remotes, availResp); err != nil { + log.Printf("[CatchUpSync] phase 7 warning: PoTS failed: %v", err) + } else { + log.Printf("[CatchUpSync] phase 7 complete") + } + + log.Printf("[CatchUpSync] done in %s", time.Since(catchUpStart).Round(time.Millisecond)) + return nil +} + +// tryRefreshAuth sends a fresh availability request and returns the new response +// if the peer is still available and returns a valid token. +func (fs *FastsyncV2) tryRefreshAuth(ctx context.Context, targetNodeInfo *types.Nodeinfo, startBlock uint64) (*availabilitypb.AvailabilityResponse, bool) { + resp, err := fs.AvailRouter.SendAvailabilityRequest( + ctx, fs.PriorRouter.GetSyncVars(), *targetNodeInfo, startBlock, math.MaxUint64, + ) + if err != nil { + log.Printf("[CatchUpSync] auth refresh failed: %v", err) + return nil, false + } + if !resp.IsAvailable || resp.Auth == nil || resp.Auth.UUID == "" { + return nil, false + } + return resp, true +} + +// buildMissingTag scans the local DB over [fromBlock..remoteTip] and returns a +// Tag containing only the ranges absent locally. +// +// Algorithm — O(n) time, O(batch) space: +// +// 1. Iterate local blocks in ascending order via BlockIterator. +// 2. Keep a "cursor" at the next expected block number, starting at fromBlock. +// 3. For each present block B: +// - If cursor < B → gap [cursor..B-1] is missing → emit RangeTag. +// - Advance cursor to B+1. +// 4. After iteration: if cursor ≤ remoteTip, emit the trailing gap. +// +// This produces the minimal set of contiguous ranges to request from the peer. +// Example: present={0,1,3,7,9,10}, fromBlock=0, remoteTip=10 +// +// → gaps: [2..2], [4..6], [8..8] +const catchUpBatchSize = 500 + +func (fs *FastsyncV2) buildMissingTag(fromBlock, remoteTip uint64) (*taggingpb.Tag, error) { + iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, remoteTip, catchUpBatchSize) + defer iter.Close() + + var ranges []*taggingpb.RangeTag + cursor := fromBlock + + for { + batch, err := iter.Next() + if err != nil { + return nil, fmt.Errorf("block iterator: %w", err) + } + if len(batch) == 0 { + break // end of iteration + } + + for _, blk := range batch { + b := blk.BlockNumber + if b < cursor { + continue // already accounted for (shouldn't happen with sorted iterator) + } + if b > cursor { + // Gap: [cursor .. b-1] is missing + ranges = append(ranges, &taggingpb.RangeTag{Start: cursor, End: b - 1}) + } + cursor = b + 1 + } + } + + // Trailing gap: blocks after the last present one up to remoteTip + if cursor <= remoteTip { + ranges = append(ranges, &taggingpb.RangeTag{Start: cursor, End: remoteTip}) + } + + return &taggingpb.Tag{Range: ranges}, nil +} diff --git a/config/settings/config.go b/config/settings/config.go index d22dcf52..cea614be 100644 --- a/config/settings/config.go +++ b/config/settings/config.go @@ -167,4 +167,15 @@ type FastSyncSettings struct { // AllowedPeers is an optional whitelist of libp2p peer IDs this node will // accept sync data FROM. Empty list = accept from any peer. AllowedPeers []string `mapstructure:"allowed_peers" yaml:"allowed_peers"` + + // CatchUpFromBlock, when non-zero, triggers HandleCatchUpSync on startup + // instead of HandleStartupSync. Set to the first block NOT present in the + // local DB after a bootstrap snapshot load (i.e. snapshot_tip + 1). + // Requires CatchUpPeer to be set. + CatchUpFromBlock uint64 `mapstructure:"catch_up_from_block" yaml:"catch_up_from_block"` + + // CatchUpPeer is the libp2p multiaddr (with embedded peer ID) of the node + // to catch up from. Example: /ip4/1.2.3.4/tcp/15000/p2p/12D3KooW... + // Only used when CatchUpFromBlock > 0. + CatchUpPeer string `mapstructure:"catch_up_peer" yaml:"catch_up_peer"` } diff --git a/config/settings/defaults.go b/config/settings/defaults.go index 60ae4cac..bf41097f 100644 --- a/config/settings/defaults.go +++ b/config/settings/defaults.go @@ -86,7 +86,9 @@ func DefaultConfig() NodeConfig { EnablePulling: true, PullOnStartup: true, SyncTimeout: 10 * time.Minute, - AllowedPeers: []string{}, + AllowedPeers: []string{}, + CatchUpFromBlock: 0, + CatchUpPeer: "", }, Security: DefaultSecurityConfig(), Alerts: DefaultAlertsConfig(), diff --git a/config/settings/loader.go b/config/settings/loader.go index 3c60233a..095d2df3 100644 --- a/config/settings/loader.go +++ b/config/settings/loader.go @@ -170,6 +170,8 @@ func setDefaults(v *viper.Viper) { v.SetDefault("fastsync.pull_on_startup", d.FastSync.PullOnStartup) v.SetDefault("fastsync.sync_timeout", d.FastSync.SyncTimeout) v.SetDefault("fastsync.allowed_peers", d.FastSync.AllowedPeers) + v.SetDefault("fastsync.catch_up_from_block", d.FastSync.CatchUpFromBlock) + v.SetDefault("fastsync.catch_up_peer", d.FastSync.CatchUpPeer) // Security v.SetDefault("security.enabled", d.Security.Enabled) diff --git a/main.go b/main.go index 433e69c6..96d8d2c0 100644 --- a/main.go +++ b/main.go @@ -260,8 +260,9 @@ func runCommand(command string, args []string, grpcPort int) { fmt.Println(" broadcast - Broadcast message") fmt.Println(" getdid - Get DID document") fmt.Println(" propagatedid [balance] - Propagate DID to network") - fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") - fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") + fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") + fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap)") + fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") fmt.Println("\nUsage: ./jmdn -cmd [args...]") fmt.Println("\nNote: Some interactive commands (mempoolStats, seednodeStats, etc.)") fmt.Println("are only available in interactive mode.") @@ -449,6 +450,30 @@ func runCommand(command string, args []string, grpcPort int) { fmt.Printf(" Accounts DB TxID: %d\n", stats.AccountsState.TxId) } + case "catchup": + if len(args) < 2 { + fmt.Println("Usage: jmdn -cmd catchup ") + os.Exit(1) + } + fromBlock, err := strconv.ParseUint(args[1], 10, 64) + if err != nil { + fmt.Printf("Invalid from_block %q: %v\n", args[1], err) + os.Exit(1) + } + fmt.Printf("Starting CatchUpSync from block %d...\n", fromBlock) + stats, err := client.CatchUpSync(args[0], fromBlock) + if err != nil { + fmt.Printf("Error: %v\n", err) + os.Exit(1) + } + if stats != nil && stats.Error != "" { + fmt.Printf("CatchUpSync failed: %s\n", stats.Error) + os.Exit(1) + } + if stats != nil { + fmt.Printf("CatchUpSync completed in %ds\n", stats.TimeTaken) + } + case "accountsync": if len(args) < 1 { fmt.Println("Usage: jmdn -cmd accountsync ") @@ -509,8 +534,9 @@ func runCommand(command string, args []string, grpcPort int) { fmt.Println(" sendfile - Send file") fmt.Println(" broadcast - Broadcast message") fmt.Println(" getdid - Get DID document") - fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") - fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") + fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") + fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap)") + fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") os.Exit(1) } } @@ -967,57 +993,31 @@ func main() { log.Info().Msg("[FastSync] disabled by config — protocol handlers not registered") } - // Startup sync: catch up on blocks missed while offline. - if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.PullOnStartup { - if err := goMaybeTracked(MainLM, GRO.MainAM, GRO.MainLM, GRO.StartupSyncThread, func(ctx context.Context) error { - // Wait for peer connections to establish after node startup - time.Sleep(5 * time.Second) - - peers := n.Host.Network().Peers() - if len(peers) == 0 { - // TODO: Query seed node for available sync peers when no direct peers are connected - log.Info().Msg("[StartupSync] No peers connected, skipping startup sync") - return nil - } - - log.Info().Int("peers", len(peers)).Msg("[StartupSync] Attempting startup sync with connected peers") - - for _, peerID := range peers { - // Honour allowed_peers whitelist if configured - if len(cfg.FastSync.AllowedPeers) > 0 { - allowed := false - for _, ap := range cfg.FastSync.AllowedPeers { - if ap == peerID.String() { - allowed = true - break - } - } - if !allowed { - log.Info().Str("peer", peerID.String()).Msg("[StartupSync] Skipping peer not in allowed_peers") - continue - } - } - - addrs := n.Host.Peerstore().Addrs(peerID) - if len(addrs) == 0 { - continue - } - - log.Info().Str("peer", peerID.String()).Msg("[StartupSync] Trying peer") - if err := fastSyncerV2.HandleStartupSync(peerID, addrs); err != nil { - log.Warn().Err(err).Str("peer", peerID.String()).Msg("[StartupSync] Failed, trying next peer") - continue + // CatchUp sync: post-bootstrap reconciliation from a known block to realtime. + // Triggered when catch_up_from_block > 0 in config. Runs once on startup, + // then the node falls through to normal operation / pubsub. + if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.CatchUpFromBlock > 0 { + if cfg.FastSync.CatchUpPeer == "" { + log.Error().Msg("[CatchUpSync] catch_up_from_block is set but catch_up_peer is empty — skipping") + } else { + fromBlock := cfg.FastSync.CatchUpFromBlock + peer := cfg.FastSync.CatchUpPeer + if err := goMaybeTracked(MainLM, GRO.MainAM, GRO.MainLM, GRO.StartupSyncThread, func(ctx context.Context) error { + time.Sleep(5 * time.Second) // allow peer connections to establish + log.Info().Uint64("from_block", fromBlock).Str("peer", peer).Msg("[CatchUpSync] starting post-bootstrap catch-up") + if err := fastSyncerV2.HandleCatchUpSync(fromBlock, peer); err != nil { + log.Error().Err(err).Msg("[CatchUpSync] failed") + return err } - - log.Info().Str("peer", peerID.String()).Msg("[StartupSync] Sync completed successfully") + log.Info().Msg("[CatchUpSync] completed successfully") return nil + }); err != nil { + log.Error().Err(err).Str("thread", GRO.StartupSyncThread).Msg("Failed to start CatchUpSync goroutine") } - - log.Warn().Msg("[StartupSync] Failed to sync with any connected peer") - return nil - }); err != nil { - log.Error().Err(err).Str("thread", GRO.StartupSyncThread).Msg("Failed to start startup sync goroutine") } + // StartupSync (HandleStartupSync) disabled — catchup is the only startup sync path. + // } else if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.PullOnStartup { + // ... } else if fastSyncerV2 != nil && !cfg.FastSync.EnablePulling { log.Info().Msg("[FastSync] Node configured with enable_pulling=false (serve-only participant); skipping StartupSync") } From 4de0d71d5d1d912a0269a33afc0f010a50048f15 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 17:29:13 +0530 Subject: [PATCH 02/23] feat: enhance catchup command and synchronization logic - Updated CLI commands to reflect new usage patterns for the catchup command, allowing an optional from_block parameter. - Improved error handling and user feedback for catchup synchronization, defaulting from_block to 0 for auto-detection. - Enhanced FastSyncSettings to clarify the purpose of CatchUpFromBlock and CatchUpPeer. - Refactored the HandleCatchUpSync method to support improved synchronization logic and peer address resolution. These changes streamline the catchup process, making it more user-friendly and robust for block synchronization. --- CLI/CLI.go | 25 +++++++----- CLI/CLI_GRPC.go | 5 ++- FastsyncV2/catchup.go | 23 +++++++++++ config/settings/config.go | 20 ++++++---- main.go | 82 ++++++++++++++++++++++++++------------- 5 files changed, 108 insertions(+), 47 deletions(-) diff --git a/CLI/CLI.go b/CLI/CLI.go index 92481754..37800a44 100644 --- a/CLI/CLI.go +++ b/CLI/CLI.go @@ -109,7 +109,7 @@ func PrintFuncs() { fmt.Println(" stats - Show messaging statistics") fmt.Println(" broadcast - Broadcast a message to all connected peers") fmt.Println(" fastsync - Fast sync blockchain data with a peer (V2 Engine)") - fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap reconciliation)") + fmt.Println(" catchup [from_block] - Catch up to chain tip; from_block defaults to auto-detect (localTip+1)") fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") fmt.Println(" dbstate - Show current ImmuDB database state") fmt.Println(" propagateDID - Propagate a DID to the network") @@ -637,10 +637,11 @@ func (h *CommandHandler) handleFastSync(parts []string) { } func (h *CommandHandler) handleCatchUpSync(parts []string) { - if len(parts) != 3 { - fmt.Println("Usage: catchup ") + if len(parts) < 2 { + fmt.Println("Usage: catchup [from_block]") fmt.Println(" peer_multiaddr full multiaddr with peer ID, e.g. /ip4/1.2.3.4/tcp/15000/p2p/12D3KooW...") - fmt.Println(" from_block first block NOT in your local DB (bootstrap snapshot tip + 1)") + fmt.Println(" from_block optional; defaults to 0 (auto-detect from local DB tip)") + fmt.Println(" pass 1 to force a full scan from genesis") return } if h.FastSyncerV2 == nil { @@ -648,13 +649,17 @@ func (h *CommandHandler) handleCatchUpSync(parts []string) { return } - fromBlock, err := strconv.ParseUint(parts[2], 10, 64) - if err != nil { - fmt.Printf("Invalid from_block %q: %v\n", parts[2], err) - return - } + var fromBlock uint64 + if len(parts) >= 3 { + var err error + fromBlock, err = strconv.ParseUint(parts[2], 10, 64) + if err != nil { + fmt.Printf("Invalid from_block %q: %v\n", parts[2], err) + return + } + } // fromBlock=0 → auto-detect inside HandleCatchUpSync - fmt.Printf("Starting catch-up sync from block %d with peer %s\n", fromBlock, parts[1]) + fmt.Printf("Starting catch-up sync (from_block=%d) with peer %s\n", fromBlock, parts[1]) startTime := time.Now() if err := h.FastSyncerV2.HandleCatchUpSync(fromBlock, parts[1]); err != nil { diff --git a/CLI/CLI_GRPC.go b/CLI/CLI_GRPC.go index b5ea6e06..84f71f65 100644 --- a/CLI/CLI_GRPC.go +++ b/CLI/CLI_GRPC.go @@ -331,9 +331,10 @@ func (h *CommandHandler) HandleFastSyncV2(peeraddr string) (SyncStats, error) { } func (h *CommandHandler) HandleCatchUpSync(peeraddr string, fromBlock uint64) (SyncStats, error) { - if peeraddr == "" || fromBlock == 0 { - return SyncStats{}, fmt.Errorf("usage: catchup ") + if peeraddr == "" { + return SyncStats{}, fmt.Errorf("usage: catchup [from_block]") } + // fromBlock=0 → auto-detect inside HandleCatchUpSync if !h.PullAllowed { return SyncStats{}, fmt.Errorf("node is configured as a serve-only participant (pulling disabled). cannot pull data") } diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 67fd0e30..9ef82369 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -39,9 +39,32 @@ import ( // Kept for reference — re-enable the commented blocks above if TTL is reduced. // HandleCatchUpSync is the public entry point. See package-level doc above. +// +// fromBlock is the first block AFTER the guaranteed-complete bootstrap range. +// It anchors the gap scan — buildMissingTag scans [fromBlock..remoteTip] and +// fetches only what is absent locally. +// +// Lifecycle: +// +// Stage 1 — bootstrap loads [0..X] (complete, no gaps) +// Stage 2 — HandleCatchUpSync(X+1, peer) → syncs [X+1..T1], no gaps expected +// Stage 3 — node offline, misses Y blocks; HandleCatchUpSync(X+1, peer) again +// → buildMissingTag finds any Stage-2 gaps + new [lastSynced+1..T2] +// +// fromBlock should always be bootstrapTip+1 (set in fastsync.catch_up_from_block +// config). Never use localTip+1: if Stage 2 was partial, localTip may be in the +// middle of a gap and the scan would skip missing blocks below it. func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) error { catchUpStart := time.Now() + // fromBlock=0 is a safety fallback only — callers should always pass + // bootstrapTip+1 (from config catch_up_from_block). Using localTip+1 here + // would silently skip gaps below localTip if Stage 2 was interrupted. + if fromBlock == 0 { + fromBlock = 1 + log.Printf("[CatchUpSync] fromBlock not set, defaulting to 1 (full scan from genesis)") + } + // Use a generous timeout — catching up on days of blocks takes much longer // than a normal incremental sync. Callers can wrap in their own deadline if needed. ctx, cancel := context.WithTimeout(context.Background(), fs.syncTimeout) diff --git a/config/settings/config.go b/config/settings/config.go index cea614be..365cedc0 100644 --- a/config/settings/config.go +++ b/config/settings/config.go @@ -168,14 +168,20 @@ type FastSyncSettings struct { // accept sync data FROM. Empty list = accept from any peer. AllowedPeers []string `mapstructure:"allowed_peers" yaml:"allowed_peers"` - // CatchUpFromBlock, when non-zero, triggers HandleCatchUpSync on startup - // instead of HandleStartupSync. Set to the first block NOT present in the - // local DB after a bootstrap snapshot load (i.e. snapshot_tip + 1). - // Requires CatchUpPeer to be set. + // CatchUpFromBlock is the first block AFTER the bootstrap snapshot + // (i.e. bootstrapTip + 1). Set this once after loading the bootstrap and + // never change it. Every catchup run — including after the node goes offline + // and comes back — scans from this block to remoteTip to find all gaps. + // + // 0 = full scan from block 1 (genesis). Use this if no bootstrap was loaded. + // N = scan from N; bootstrap guaranteed to cover [0..N-1] with no gaps. + // + // Do NOT set this to localTip+1: if a previous catchup was partial, + // localTip may be ahead of gaps that would be silently skipped. CatchUpFromBlock uint64 `mapstructure:"catch_up_from_block" yaml:"catch_up_from_block"` - // CatchUpPeer is the libp2p multiaddr (with embedded peer ID) of the node - // to catch up from. Example: /ip4/1.2.3.4/tcp/15000/p2p/12D3KooW... - // Only used when CatchUpFromBlock > 0. + // CatchUpPeer is the libp2p peer ID of the node to catch up from. + // Example: 12D3KooWAbCdEf... + // The node must be connected (in peerstore) at startup for addresses to resolve. CatchUpPeer string `mapstructure:"catch_up_peer" yaml:"catch_up_peer"` } diff --git a/main.go b/main.go index 96d8d2c0..4e8e3509 100644 --- a/main.go +++ b/main.go @@ -51,6 +51,7 @@ import ( "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" + "github.com/libp2p/go-libp2p/core/peer" _ "github.com/mattn/go-sqlite3" "github.com/redis/go-redis/v9" "github.com/rs/zerolog/log" @@ -261,7 +262,7 @@ func runCommand(command string, args []string, grpcPort int) { fmt.Println(" getdid - Get DID document") fmt.Println(" propagatedid [balance] - Propagate DID to network") fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") - fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap)") + fmt.Println(" catchup [from_block] - Catch up to chain tip; from_block defaults to auto-detect (localTip+1)") fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") fmt.Println("\nUsage: ./jmdn -cmd [args...]") fmt.Println("\nNote: Some interactive commands (mempoolStats, seednodeStats, etc.)") @@ -451,16 +452,21 @@ func runCommand(command string, args []string, grpcPort int) { } case "catchup": - if len(args) < 2 { - fmt.Println("Usage: jmdn -cmd catchup ") + if len(args) < 1 { + fmt.Println("Usage: jmdn -cmd catchup [from_block]") + fmt.Println(" from_block defaults to 0 (auto-detect from local DB tip)") os.Exit(1) } - fromBlock, err := strconv.ParseUint(args[1], 10, 64) - if err != nil { - fmt.Printf("Invalid from_block %q: %v\n", args[1], err) - os.Exit(1) + var fromBlock uint64 + if len(args) >= 2 { + var err error + fromBlock, err = strconv.ParseUint(args[1], 10, 64) + if err != nil { + fmt.Printf("Invalid from_block %q: %v\n", args[1], err) + os.Exit(1) + } } - fmt.Printf("Starting CatchUpSync from block %d...\n", fromBlock) + fmt.Printf("Starting CatchUpSync (from_block=%d)...\n", fromBlock) stats, err := client.CatchUpSync(args[0], fromBlock) if err != nil { fmt.Printf("Error: %v\n", err) @@ -535,7 +541,7 @@ func runCommand(command string, args []string, grpcPort int) { fmt.Println(" broadcast - Broadcast message") fmt.Println(" getdid - Get DID document") fmt.Println(" fastsync - Fast sync with peer (V2 Engine)") - fmt.Println(" catchup - Catch up from a known block to chain tip (post-bootstrap)") + fmt.Println(" catchup [from_block] - Catch up to chain tip; from_block defaults to auto-detect (localTip+1)") fmt.Println(" accountsync - Sync missing accounts only (skip block sync)") os.Exit(1) } @@ -994,26 +1000,46 @@ func main() { } // CatchUp sync: post-bootstrap reconciliation from a known block to realtime. - // Triggered when catch_up_from_block > 0 in config. Runs once on startup, - // then the node falls through to normal operation / pubsub. - if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.CatchUpFromBlock > 0 { - if cfg.FastSync.CatchUpPeer == "" { - log.Error().Msg("[CatchUpSync] catch_up_from_block is set but catch_up_peer is empty — skipping") - } else { - fromBlock := cfg.FastSync.CatchUpFromBlock - peer := cfg.FastSync.CatchUpPeer - if err := goMaybeTracked(MainLM, GRO.MainAM, GRO.MainLM, GRO.StartupSyncThread, func(ctx context.Context) error { - time.Sleep(5 * time.Second) // allow peer connections to establish - log.Info().Uint64("from_block", fromBlock).Str("peer", peer).Msg("[CatchUpSync] starting post-bootstrap catch-up") - if err := fastSyncerV2.HandleCatchUpSync(fromBlock, peer); err != nil { - log.Error().Err(err).Msg("[CatchUpSync] failed") - return err - } - log.Info().Msg("[CatchUpSync] completed successfully") - return nil - }); err != nil { - log.Error().Err(err).Str("thread", GRO.StartupSyncThread).Msg("Failed to start CatchUpSync goroutine") + // catch_up_peer is a plain peer ID (e.g. 12D3KooW...) — resolved from peerstore, + // same pattern as the old HandleStartupSync startup path. + if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.CatchUpPeer != "" { + if cfg.FastSync.CatchUpFromBlock == 0 { + log.Warn().Msg("[CatchUpSync] catch_up_from_block not set — defaulting to 1 (full scan). Set to bootstrapTip+1 to limit scan range.") + } + catchUpPeerIDStr := cfg.FastSync.CatchUpPeer + fromBlock := cfg.FastSync.CatchUpFromBlock + if err := goMaybeTracked(MainLM, GRO.MainAM, GRO.MainLM, GRO.StartupSyncThread, func(ctx context.Context) error { + time.Sleep(5 * time.Second) // allow peer connections to establish + + // Resolve the configured peer ID to a libp2p peer.ID. + catchUpPeerID, err := peer.Decode(catchUpPeerIDStr) + if err != nil { + return fmt.Errorf("[CatchUpSync] invalid catch_up_peer %q: %w", catchUpPeerIDStr, err) + } + + // Get addresses from peerstore — same as HandleStartupSync. + addrs := n.Host.Peerstore().Addrs(catchUpPeerID) + if len(addrs) == 0 { + return fmt.Errorf("[CatchUpSync] peer %s not in peerstore — not connected yet", catchUpPeerIDStr) } + + // Build full multiaddr with embedded peer ID, matching HandleStartupSync pattern. + targetMultiaddr := fmt.Sprintf("%s/p2p/%s", addrs[0].String(), catchUpPeerID.String()) + + log.Info(). + Uint64("from_block", fromBlock). + Str("peer", catchUpPeerIDStr). + Str("addr", targetMultiaddr). + Msg("[CatchUpSync] starting") + + if err := fastSyncerV2.HandleCatchUpSync(fromBlock, targetMultiaddr); err != nil { + log.Error().Err(err).Msg("[CatchUpSync] failed") + return err + } + log.Info().Msg("[CatchUpSync] completed successfully") + return nil + }); err != nil { + log.Error().Err(err).Str("thread", GRO.StartupSyncThread).Msg("Failed to start CatchUpSync goroutine") } // StartupSync (HandleStartupSync) disabled — catchup is the only startup sync path. // } else if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.PullOnStartup { From 58069c2507358b4d7c540608d4260e0c2c41f74d Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 17:54:30 +0530 Subject: [PATCH 03/23] fix: update catchup synchronization logic and dependency cleanup - Modified the HandleCatchUpSync method to use BlockHeight instead of BlockMerge for determining the latest block number from peers, improving accuracy in synchronization. - Added error handling for cases where the remote block height is zero, enhancing robustness against outdated server responses. - Cleaned up go.sum by removing unused dependencies related to JMDN-FastSync, streamlining the dependency management. These changes enhance the reliability of the catchup process and ensure a cleaner project structure. --- FastsyncV2/catchup.go | 8 ++++++-- go.sum | 2 -- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 9ef82369..e2509091 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -111,8 +111,12 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err return fmt.Errorf("catchup: peer %s returned no auth token", info.ID) } - // remoteTip is the highest block the peer reported. - remoteTip := uint64(availResp.BlockMerge) + // remoteTip is the peer's latest block number (BlockHeight field). + // Note: BlockMerge is a Merkle tree parameter, NOT the block count. + remoteTip := availResp.BlockHeight + if remoteTip == 0 { + return fmt.Errorf("catchup: peer %s returned block_height=0 (server may be outdated)", info.ID) + } if remoteTip < fromBlock { return fmt.Errorf("catchup: remoteTip %d < fromBlock %d — nothing to sync", remoteTip, fromBlock) } diff --git a/go.sum b/go.sum index 4f204b47..a324e23f 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623100612-88e18669b3f7 h1:PQloBFnhSs5YBpbvVNOyWMyV3aaM6x3XiLP1Lzl7ZVE= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623100612-88e18669b3f7/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 57c57c103dc5e09452ec3f2b479ca1aaee41b0b8 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 17:55:37 +0530 Subject: [PATCH 04/23] Fix fastsy --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d30ace05..330d81ba 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623100612-88e18669b3f7 + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623122144-03caeb87c1bc github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 From b83a402f2ee1999155773a9121dba30d22c173a1 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 21:59:13 +0530 Subject: [PATCH 05/23] chore(deps): update JMDN-FastSync dependency version in go.mod - Updated the JMDN-FastSync dependency to version v0.0.0-20260623122144-03caeb87c1bc in go.mod to ensure compatibility with recent changes. --- FastsyncV2/catchup.go | 210 +++++++++++++++++++++++++++++++++++------- 1 file changed, 178 insertions(+), 32 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index e2509091..94c4656e 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -28,9 +28,14 @@ import ( "time" availabilitypb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability" + authpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability/auth" + ackpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/ack" + datasyncpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/datasync" headersyncpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/headersync" + phasepb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/phase" taggingpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/tagging" "github.com/JupiterMetaLabs/JMDN-FastSync/common/types" + "github.com/JupiterMetaLabs/JMDN-FastSync/common/types/constants" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multiaddr" ) @@ -111,64 +116,94 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err return fmt.Errorf("catchup: peer %s returned no auth token", info.ID) } - // remoteTip is the peer's latest block number (BlockHeight field). - // Note: BlockMerge is a Merkle tree parameter, NOT the block count. + // remoteTip is the peer's latest block number (BlockHeight field, added in add/catchup). + // Old peers (pre-add/catchup binary) leave this as 0 — fall back to our own local tip + // so we can at least close any gaps within our already-downloaded range. + // New blocks beyond our local tip will be picked up once the peer is updated. remoteTip := availResp.BlockHeight if remoteTip == 0 { - return fmt.Errorf("catchup: peer %s returned block_height=0 (server may be outdated)", info.ID) + localTip := fs.blockInfoAdapter.GetBlockNumber() + if localTip == 0 { + return fmt.Errorf("catchup: peer %s returned block_height=0 and local tip is also 0 — peer needs the add/catchup binary update", info.ID) + } + log.Printf("[CatchUpSync] WARNING: peer %s returned block_height=0 (pre-add/catchup binary). "+ + "Falling back to local tip %d. Update the peer node to sync new blocks beyond local tip.", + info.ID, localTip) + remoteTip = localTip } if remoteTip < fromBlock { - return fmt.Errorf("catchup: remoteTip %d < fromBlock %d — nothing to sync", remoteTip, fromBlock) + log.Printf("[CatchUpSync] remoteTip %d < fromBlock %d — nothing to sync", remoteTip, fromBlock) + return nil } log.Printf("[CatchUpSync] phase 1 complete: remoteTip=%d auth=%s", remoteTip, availResp.Auth.UUID) remotes := []*availabilitypb.AvailabilityResponse{availResp} - // ── Build the catch-up tag (sparse gap detection) ──────────────────── + // ── Build header-missing tag (sparse gap detection) ───────────────── // Scan local DB for blocks already present in [fromBlock..remoteTip] and - // compute the complement — only the gaps are fetched, not the full range. + // compute the complement — only header-missing blocks are fetched here. + // NOTE: PubSub announcements may have already written block headers without + // transaction data. Those blocks appear "present" to the iterator but lack + // NonHeaders data. Phase 3 (DataSync) always runs for the full range to fix + // this independently of whether Phase 2 (HeaderSync) found anything to do. catchUpTag, err := fs.buildMissingTag(fromBlock, remoteTip) if err != nil { return fmt.Errorf("catchup: scan local blocks: %w", err) } - if len(catchUpTag.Range) == 0 && len(catchUpTag.BlockNumber) == 0 { - log.Printf("[CatchUpSync] all blocks [%d..%d] already present locally", fromBlock, remoteTip) - return nil - } - log.Printf("[CatchUpSync] %d missing range(s) to fetch", len(catchUpTag.Range)) // ── Phase 2: HeaderSync ─────────────────────────────────────────────── log.Printf("[CatchUpSync] phase 2: header sync [%d..%d]", fromBlock, remoteTip) - dataSyncReq, err := fs.HeaderRouter.HeaderSync( - &headersyncpb.HeaderSyncRequest{Tag: catchUpTag}, - remotes, - false, // syncConfirmation=false: skip Merkle, we know the exact range - ) - if err != nil { - return fmt.Errorf("catchup: header sync: %w", err) + if len(catchUpTag.Range) > 0 || len(catchUpTag.BlockNumber) > 0 { + log.Printf("[CatchUpSync] %d missing header range(s) to fetch", len(catchUpTag.Range)) + _, err = fs.HeaderRouter.HeaderSync( + &headersyncpb.HeaderSyncRequest{Tag: catchUpTag}, + remotes, + false, // syncConfirmation=false: skip Merkle, we know the exact range + ) + if err != nil { + return fmt.Errorf("catchup: header sync: %w", err) + } + log.Printf("[CatchUpSync] phase 2 complete") + } else { + log.Printf("[CatchUpSync] phase 2 skipped: all headers present in [%d..%d]", fromBlock, remoteTip) } - log.Printf("[CatchUpSync] phase 2 complete") // ── Phase 3: DataSync ───────────────────────────────────────────────── - log.Printf("[CatchUpSync] phase 3: data sync") + // Scan local blocks to find which ones are missing NonHeaders data. + // StarkProof is written ONLY by DataSync (immudb_data_writer.go) — absent or + // empty means the block needs DataSync regardless of whether HeaderSync ran. + // Blocks written only by PubSub/HeaderSync will have StarkProof==nil. + log.Printf("[CatchUpSync] phase 3: scanning for data-missing blocks [%d..%d]", fromBlock, remoteTip) - // dataSyncReq is nil if HeaderSync found no blocks to write (range already - // present locally). Skip DataSync in that case — same behaviour as HandleSync. - if dataSyncReq == nil { - log.Printf("[CatchUpSync] phase 3 skipped: no DataSync request from HeaderSync") - return nil - } - - taggedAccounts, err := fs.DataRouter.DataSync(dataSyncReq, remotes) + dataMissingTag, err := fs.buildDataMissingTag(fromBlock, remoteTip) if err != nil { - return fmt.Errorf("catchup: data sync: %w", err) + return fmt.Errorf("catchup: scan data-missing blocks: %w", err) } - log.Printf("[CatchUpSync] phase 3 complete") - // Refresh the local block marker after writing a large batch of data. - fs.reconcileLocalLatestBlock() + var taggedAccounts *taggingpb.TaggedAccounts + if len(dataMissingTag.Range) == 0 && len(dataMissingTag.BlockNumber) == 0 { + log.Printf("[CatchUpSync] phase 3 skipped: all blocks in [%d..%d] already have data", fromBlock, remoteTip) + } else { + log.Printf("[CatchUpSync] phase 3: %d data-missing range(s) to fetch", len(dataMissingTag.Range)) + dataSyncReq := &datasyncpb.DataSyncRequest{ + Tag: dataMissingTag, + Version: uint32(commsVersion), + Ack: &ackpb.Ack{Ok: true}, + Phase: &phasepb.Phase{ + PresentPhase: constants.HEADER_SYNC_RESPONSE, + SuccessivePhase: constants.DATA_SYNC_REQUEST, + Success: true, + Auth: &authpb.Auth{UUID: availResp.Auth.UUID}, + }, + } + taggedAccounts, err = fs.DataRouter.DataSync(dataSyncReq, remotes) + if err != nil { + return fmt.Errorf("catchup: data sync: %w", err) + } + log.Printf("[CatchUpSync] phase 3 complete") + } // ── Phase 3.5: FetchAccounts — pull tagged accounts missing locally ─── if taggedAccounts != nil && len(taggedAccounts.Accounts) > 0 { @@ -240,6 +275,32 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err log.Printf("[CatchUpSync] phase 7 complete") } + // Always update latest_block regardless of which phases ran. + // This is critical when PubSub blocks were header-only before this run. + fs.reconcileLocalLatestBlock() + + // ── Phase 8: Post-sync verification ────────────────────────────────── + // Re-run buildDataMissingTag over the same range. If sync succeeded, the + // returned tag will be empty (all blocks now have StarkProof set). + // Any non-empty ranges indicate blocks that are still data-incomplete. + log.Printf("[CatchUpSync] phase 8: verifying sync completeness [%d..%d]", fromBlock, remoteTip) + + verifyTag, verifyErr := fs.buildDataMissingTag(fromBlock, remoteTip) + if verifyErr != nil { + log.Printf("[CatchUpSync] phase 8 warning: verification scan failed: %v", verifyErr) + } else if len(verifyTag.Range) == 0 && len(verifyTag.BlockNumber) == 0 { + log.Printf("[CatchUpSync] phase 8: PASS — all blocks in [%d..%d] have data", fromBlock, remoteTip) + } else { + log.Printf("[CatchUpSync] phase 8: INCOMPLETE — %d range(s) still missing data:", len(verifyTag.Range)) + for _, r := range verifyTag.Range { + log.Printf("[CatchUpSync] missing data: blocks [%d..%d] (%d blocks)", + r.Start, r.End, r.End-r.Start+1) + } + for _, bn := range verifyTag.BlockNumber { + log.Printf("[CatchUpSync] missing data: block %d", bn) + } + } + log.Printf("[CatchUpSync] done in %s", time.Since(catchUpStart).Round(time.Millisecond)) return nil } @@ -278,6 +339,23 @@ func (fs *FastsyncV2) tryRefreshAuth(ctx context.Context, targetNodeInfo *types. // → gaps: [2..2], [4..6], [8..8] const catchUpBatchSize = 500 +// buildDataMissingTag scans [fromBlock..remoteTip] and returns a Tag covering +// blocks that need DataSync — i.e. blocks where NonHeaders (txs, ZK proof) have +// not been written yet. +// +// A block needs DataSync when: +// - It is absent from the local DB entirely (gap in the iterator), OR +// - It is present but StarkProof is empty. StarkProof is written ONLY by +// DataSync (immudb_data_writer.go:59); HeaderSync and PubSub never set it. +// +// Limitation: blocks with a genuinely empty ZK proof will always have +// len(StarkProof)==0 even after DataSync. They will be re-fetched on every +// catchup run. This is safe (DataSync is idempotent) and rare in practice on a +// ZK L2 where every finalized block carries a proof. +// +// Consecutive blocks needing DataSync are coalesced into a single RangeTag to +// minimise round-trips. + func (fs *FastsyncV2) buildMissingTag(fromBlock, remoteTip uint64) (*taggingpb.Tag, error) { iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, remoteTip, catchUpBatchSize) defer iter.Close() @@ -314,3 +392,71 @@ func (fs *FastsyncV2) buildMissingTag(fromBlock, remoteTip uint64) (*taggingpb.T return &taggingpb.Tag{Range: ranges}, nil } + +func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*taggingpb.Tag, error) { + iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, remoteTip, catchUpBatchSize) + defer iter.Close() + + var ranges []*taggingpb.RangeTag + cursor := fromBlock + runStart := uint64(0) + inRun := false + + // Start a new run at b (or extend if already in one). + addToRun := func(b uint64) { + if !inRun { + runStart = b + inRun = true + } + } + // Close the active run, capping it at end. + endRunAt := func(end uint64) { + if inRun { + ranges = append(ranges, &taggingpb.RangeTag{Start: runStart, End: end}) + inRun = false + } + } + + for { + batch, err := iter.Next() + if err != nil { + return nil, fmt.Errorf("data-missing block iterator: %w", err) + } + if len(batch) == 0 { + // Remaining [cursor..remoteTip] are absent — include them. + if cursor <= remoteTip { + addToRun(cursor) + endRunAt(remoteTip) + } + break + } + + for _, blk := range batch { + b := blk.BlockNumber + if b < cursor { + continue // shouldn't happen with a sorted iterator + } + + // Absent blocks [cursor..b-1]: they need DataSync — extend or start run. + if b > cursor { + addToRun(cursor) + // Run is now active through at least b-1. + // We decide below whether b also extends it or closes it. + } + + if len(blk.StarkProof) == 0 { + // Block b is present but data-incomplete — keep the run going. + addToRun(b) + } else { + // Block b is complete — close any active run just before b. + if inRun { + endRunAt(b - 1) + } + } + + cursor = b + 1 + } + } + + return &taggingpb.Tag{Range: ranges}, nil +} From 5e27aae271f91abb446c0b9a70e339edaed6a3cb Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 22:15:09 +0530 Subject: [PATCH 06/23] fix: improve immudb data writing and synchronization logic - Introduced a variable to track the highest block number written during the batch process, ensuring accurate updates to the latest block. - Removed the previous guard that prevented overwriting transactions in the DataSync response, allowing for proper handling of blocks with no transactions. - Enhanced error handling for block updates, ensuring that stale data is cleared and the latest block number is updated correctly after processing. - Updated comments for clarity on the logic and implications of changes made to transaction handling and block synchronization. These changes enhance the reliability and correctness of the immudb data writing process. --- DB_OPs/Nodeinfo/immudb_data_writer.go | 33 ++++++++++++++++------ FastsyncV2/catchup.go | 40 +++++++++++++++++---------- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/DB_OPs/Nodeinfo/immudb_data_writer.go b/DB_OPs/Nodeinfo/immudb_data_writer.go index d5a04b97..312a6481 100644 --- a/DB_OPs/Nodeinfo/immudb_data_writer.go +++ b/DB_OPs/Nodeinfo/immudb_data_writer.go @@ -36,6 +36,8 @@ func (dw *DataWriter) WriteData(data []*blockpb.NonHeaders) error { return err } + var highestWritten uint64 + for _, nh := range data { if nh == nil { continue @@ -140,12 +142,15 @@ func (dw *DataWriter) WriteData(data []*blockpb.NonHeaders) error { txs = append(txs, cfgTx) } - if len(txs) > 0 { - b.Transactions = txs - } + // Always overwrite Transactions from the DataSync response. + // The previous guard (if len(txs) > 0) was wrong: if the server sends + // transactions for this block, they must be written; if it sends none, + // the block genuinely has no transactions and we must clear any stale + // data left by PubSub/HeaderSync skeleton writes. + b.Transactions = txs if err := DB_OPs.StoreZKBlock(conn, b); err != nil { - // if err not nill, then force write or update + // if err not nil, then force write or update if strings.Contains(err.Error(), "already exists") { blockKey := fmt.Sprintf("%s%d", DB_OPs.PREFIX_BLOCK, b.BlockNumber) if err2 := DB_OPs.Update(blockKey, b); err2 != nil { @@ -157,14 +162,10 @@ func (dw *DataWriter) WriteData(data []*blockpb.NonHeaders) error { return fmt.Errorf("force update hash mapping failed: %w", err2) } - if err2 := DB_OPs.Update("latest_block", b.BlockNumber); err2 != nil { - return fmt.Errorf("force update latest block failed: %w", err2) - } - // Write tx: → blockNumber index for each transaction. // WriteHeaders stores blocks without transactions, so StoreZKBlock's tx // indexing loop runs 0 times there. This is the only place those index - // entries get written — required for GetTransactionByHash to work. + // entries get written for existing blocks — required for GetTransactionByHash. for _, tx := range b.Transactions { txKey := fmt.Sprintf("%s%s", DB_OPs.DEFAULT_PREFIX_TX, tx.Hash) if err2 := DB_OPs.Create(conn, txKey, b.BlockNumber); err2 != nil { @@ -177,6 +178,20 @@ func (dw *DataWriter) WriteData(data []*blockpb.NonHeaders) error { return err } } + + if b.BlockNumber > highestWritten { + highestWritten = b.BlockNumber + } + } + + // Update latest_block once to the highest block number written in this batch. + // Per-block updates (done inside the loop above) are non-deterministic when + // DataSync workers run concurrently — the last worker to finish may not hold + // the highest block. A single update at the end is authoritative. + if highestWritten > 0 { + if err2 := DB_OPs.Update("latest_block", highestWritten); err2 != nil { + return fmt.Errorf("update latest_block to %d failed: %w", highestWritten, err2) + } } return nil diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 94c4656e..f9a18ed9 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -281,7 +281,7 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err // ── Phase 8: Post-sync verification ────────────────────────────────── // Re-run buildDataMissingTag over the same range. If sync succeeded, the - // returned tag will be empty (all blocks now have StarkProof set). + // returned tag will be empty (all blocks pass blockNeedsDataSync check). // Any non-empty ranges indicate blocks that are still data-incomplete. log.Printf("[CatchUpSync] phase 8: verifying sync completeness [%d..%d]", fromBlock, remoteTip) @@ -339,20 +339,32 @@ func (fs *FastsyncV2) tryRefreshAuth(ctx context.Context, targetNodeInfo *types. // → gaps: [2..2], [4..6], [8..8] const catchUpBatchSize = 500 -// buildDataMissingTag scans [fromBlock..remoteTip] and returns a Tag covering -// blocks that need DataSync — i.e. blocks where NonHeaders (txs, ZK proof) have -// not been written yet. -// -// A block needs DataSync when: -// - It is absent from the local DB entirely (gap in the iterator), OR -// - It is present but StarkProof is empty. StarkProof is written ONLY by -// DataSync (immudb_data_writer.go:59); HeaderSync and PubSub never set it. +// blockNeedsDataSync returns true when a locally-present block is missing its +// NonHeaders data and must be (re-)fetched via DataSync. // -// Limitation: blocks with a genuinely empty ZK proof will always have -// len(StarkProof)==0 even after DataSync. They will be re-fetched on every -// catchup run. This is safe (DataSync is idempotent) and rare in practice on a -// ZK L2 where every finalized block carries a proof. +// Two conditions trigger a re-fetch: +// 1. StarkProof is empty — DataSync has never written ZK proof data for this +// block. StarkProof is set ONLY by DataSync (immudb_data_writer.go:59). +// 2. GasUsed > 0 but Transactions is empty — the block consumed gas so it +// must have transactions, but none were stored. This catches blocks where a +// previous DataSync run set StarkProof but failed to persist transactions +// (e.g. due to the old "if len(txs) > 0" guard that has since been removed). // +// Limitation: a block with GasUsed=0 and an empty ZK proof (legitimately no +// transactions and no proof) will be re-fetched on every run. This is safe +// (DataSync is idempotent) and extremely rare on a ZK L2 in practice. +func blockNeedsDataSync(blk *types.ZKBlock) bool { + if len(blk.StarkProof) == 0 { + return true + } + if blk.GasUsed > 0 && len(blk.Transactions) == 0 { + return true + } + return false +} + +// buildDataMissingTag scans [fromBlock..remoteTip] and returns a Tag covering +// blocks that need DataSync (absent or data-incomplete per blockNeedsDataSync). // Consecutive blocks needing DataSync are coalesced into a single RangeTag to // minimise round-trips. @@ -444,7 +456,7 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging // We decide below whether b also extends it or closes it. } - if len(blk.StarkProof) == 0 { + if blockNeedsDataSync(&blk) { // Block b is present but data-incomplete — keep the run going. addToRun(b) } else { From f6f26bb656f37b54d4e9dd6b5fde8c13733b2569 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 22:24:29 +0530 Subject: [PATCH 07/23] fix: correct block data synchronization logic in catchup process - Updated the blockNeedsDataSync function call to pass the block directly instead of as a pointer, improving clarity and consistency in the synchronization logic. - This change ensures that the synchronization process accurately assesses the need for data synchronization for each block, enhancing overall reliability in the catchup mechanism. --- FastsyncV2/catchup.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index f9a18ed9..a59d5714 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -456,7 +456,7 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging // We decide below whether b also extends it or closes it. } - if blockNeedsDataSync(&blk) { + if blockNeedsDataSync(blk) { // Block b is present but data-incomplete — keep the run going. addToRun(b) } else { From f522abb893d73ccc5e1e93cff518422a5fcd10dd Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 22:43:22 +0530 Subject: [PATCH 08/23] fix: update JMDN-FastSync dependency version and enhance catchup synchronization logic - Updated the JMDN-FastSync dependency to version v0.0.0-20260623170445-89b4a3381bf4 in go.mod for compatibility with recent changes. - Added logic in HandleCatchUpSync to advance the latest_block to remoteTip, ensuring accurate updates during the catchup process. - Improved logging for error handling when updating the latest_block, enhancing visibility into synchronization issues. --- FastsyncV2/catchup.go | 17 +++++++++++++++-- go.mod | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index a59d5714..c08b3cdf 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -27,6 +27,8 @@ import ( "math" "time" + "gossipnode/DB_OPs" + availabilitypb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability" authpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability/auth" ackpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/ack" @@ -290,6 +292,14 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err log.Printf("[CatchUpSync] phase 8 warning: verification scan failed: %v", verifyErr) } else if len(verifyTag.Range) == 0 && len(verifyTag.BlockNumber) == 0 { log.Printf("[CatchUpSync] phase 8: PASS — all blocks in [%d..%d] have data", fromBlock, remoteTip) + // Advance latest_block to remoteTip. This is the authoritative write: + // phases 2/3 may have been skipped (data already present), so WriteData + // never ran and the DB key was never updated on this run. + if updateErr := DB_OPs.Update("latest_block", remoteTip); updateErr != nil { + log.Printf("[CatchUpSync] phase 8 warning: failed to update latest_block to %d: %v", remoteTip, updateErr) + } else { + log.Printf("[CatchUpSync] phase 8: latest_block advanced to %d", remoteTip) + } } else { log.Printf("[CatchUpSync] phase 8: INCOMPLETE — %d range(s) still missing data:", len(verifyTag.Range)) for _, r := range verifyTag.Range { @@ -435,11 +445,14 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging return nil, fmt.Errorf("data-missing block iterator: %w", err) } if len(batch) == 0 { - // Remaining [cursor..remoteTip] are absent — include them. + // Remaining [cursor..remoteTip] are absent from the DB — include them. if cursor <= remoteTip { addToRun(cursor) - endRunAt(remoteTip) } + // Close any open run (covers both absent trailing blocks AND the case + // where the last DB block needed DataSync: cursor advanced past remoteTip + // but inRun is still true). endRunAt is a no-op when inRun==false. + endRunAt(remoteTip) break } diff --git a/go.mod b/go.mod index 330d81ba..f7b36034 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623122144-03caeb87c1bc + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623170445-89b4a3381bf4 github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 From 03cbc7f5acbc551b6327e04ffc7c295dacb4f213 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 22:54:03 +0530 Subject: [PATCH 09/23] chore(deps): update JMDN-FastSync dependency version in go.mod and go.sum - Updated the JMDN-FastSync dependency to version v0.0.0-20260623172225-ee9db84a5609 in go.mod for compatibility with recent changes. - Added new checksum entries in go.sum to reflect the updated dependency version. --- go.mod | 2 +- go.sum | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index f7b36034..50b9806f 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623170445-89b4a3381bf4 + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index a324e23f..19a5a9cb 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,10 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80 h1:LyfJvMbuIbidZqIHbs2z2mrXGn6i2vsCXeLMd2t5EPY= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 h1:4rGUBgm+T0UUZ9WdZ0xlgnZNbHej6pq5VEMq2Ugfpdo= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 5094817fb4cdd60c80e5352d889bde872b1b39c2 Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 23:09:56 +0530 Subject: [PATCH 10/23] fix: enhance account writing and synchronization logic in immudb_account_manager - Increased timeout for GetTransactionsForAccount to 60 seconds to accommodate longer processing times. - Added writeAccountsDirect function to handle account writing directly to ImmuDB when Redis is unavailable, ensuring data integrity without external dependencies. - Improved logging for direct writes to provide better visibility into account processing. - Introduced a delay in HandleCatchUpSync to allow ImmuDB to settle before reconciliation, enhancing synchronization reliability. These changes improve the robustness and reliability of account management and synchronization processes. --- DB_OPs/Nodeinfo/immudb_account_manager.go | 64 ++++++++++++++++++++++- FastsyncV2/catchup.go | 6 +++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/DB_OPs/Nodeinfo/immudb_account_manager.go b/DB_OPs/Nodeinfo/immudb_account_manager.go index 8774029c..61eb3e2b 100644 --- a/DB_OPs/Nodeinfo/immudb_account_manager.go +++ b/DB_OPs/Nodeinfo/immudb_account_manager.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "log" "math/big" "sort" "strings" @@ -85,7 +86,7 @@ func chunkCount(n int) int { // Time Complexity: O(N) where N is the total number of transactions scanned or retrieved func (am *account_manager) GetTransactionsForAccount(accountAddress string) ([]types.DBTransaction, error) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() conn, err := DB_OPs.GetMainDBConnectionandPutBack(ctx) @@ -245,7 +246,10 @@ func (am *account_manager) WriteAccounts(accounts []*types.Account) error { } s, mgr := getAccountQueue() if s == nil { - return fmt.Errorf("WriteAccounts: account queue not initialized; call StartAccountSyncWorker before use") + // Redis not available — write directly to ImmuDB synchronously. + // Slower (~15 s/batch) but correct; no external dependency required. + log.Printf("[accountqueue] Redis not available — writing %d accounts directly to ImmuDB", len(accounts)) + return writeAccountsDirect(accounts) } mgr.EnsureActive() @@ -258,6 +262,62 @@ func (am *account_manager) WriteAccounts(accounts []*types.Account) error { return nil } +// writeAccountsDirect writes accounts synchronously to ImmuDB without going through Redis. +// Used when the Redis queue is not configured. Uses the same dbEntry/BatchRestoreAccounts +// path as the worker so the write is LWW-idempotent. +func writeAccountsDirect(accounts []*types.Account) error { + entries := make([]dbEntry, 0, len(accounts)*2) + for _, acc := range accounts { + if acc == nil { + continue + } + dbAcc := &DB_OPs.Account{ + DIDAddress: acc.DIDAddress, + Address: acc.Address, + Balance: acc.Balance, + Nonce: acc.Nonce, + TxNonce: acc.TxNonce, + TxCountSent: acc.TxCountSent, + AccountType: acc.AccountType, + CreatedAt: acc.CreatedAt, + UpdatedAt: acc.UpdatedAt, + Metadata: acc.Metadata, + } + val, err := json.Marshal(dbAcc) + if err != nil { + return fmt.Errorf("writeAccountsDirect: marshal %s: %w", acc.Address.Hex(), err) + } + entries = append(entries, dbEntry{Key: DB_OPs.Prefix + acc.Address.Hex(), Value: val}) + if acc.DIDAddress != "" { + entries = append(entries, dbEntry{Key: DB_OPs.DIDPrefix + acc.DIDAddress, Value: val}) + } + } + + const batchSize = 500 + // Generous timeout: 60 s base + 2 s per batch to cover ImmuDB commit latency. + timeout := 60*time.Second + time.Duration(len(entries)/batchSize+1)*2*time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + conn, err := DB_OPs.GetAccountsConnections(ctx) + if err != nil { + return fmt.Errorf("writeAccountsDirect: get connection: %w", err) + } + defer DB_OPs.PutAccountsConnection(conn) + + for i := 0; i < len(entries); i += batchSize { + end := i + batchSize + if end > len(entries) { + end = len(entries) + } + if err := DB_OPs.BatchRestoreAccounts(ctx, conn, entries[i:end]); err != nil { + return fmt.Errorf("writeAccountsDirect: batch [%d:%d]: %w", i, end, err) + } + } + log.Printf("[accountqueue] direct write complete: %d accounts written to ImmuDB", len(accounts)) + return nil +} + // NewAccountNonceIterator returns a cursor-based iterator over all accounts. // Each NextBatch call advances a seekKey cursor — O(N) total scan across all batches. func (am *account_manager) NewAccountNonceIterator(batchSize int) types.AccountNonceIterator { diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index c08b3cdf..243b0fb9 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -251,6 +251,12 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err } // ── Phase 5: Reconciliation ─────────────────────────────────────────── + // Brief pause before reconciliation: DataSync may have just written thousands + // of blocks and ImmuDB needs a moment to settle its commit queue before read + // queries (GetTransactionsForAccount) can complete within their deadline. + const reconDelay = 5 * time.Second + log.Printf("[CatchUpSync] phase 5: waiting %s for ImmuDB to settle before reconciliation", reconDelay) + time.Sleep(reconDelay) log.Printf("[CatchUpSync] phase 5: reconciliation") reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp) From e0956657c638bceb76318e8df5b3a0a66dbc8e5e Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 23:22:38 +0530 Subject: [PATCH 11/23] fix: enhance account reconciliation during catchup synchronization - Added logic to collect tagged accounts from local blocks when data synchronization is skipped, ensuring that account balances are updated during reconciliation. - Implemented the collectTaggedAccountsFromBlocks function to scan local transactions and gather unique sender and receiver addresses for improved balance management. These changes improve the accuracy of account balance updates in the catchup process, enhancing overall synchronization reliability. --- FastsyncV2/catchup.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 243b0fb9..6a1a7279 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -187,6 +187,12 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err var taggedAccounts *taggingpb.TaggedAccounts if len(dataMissingTag.Range) == 0 && len(dataMissingTag.BlockNumber) == 0 { log.Printf("[CatchUpSync] phase 3 skipped: all blocks in [%d..%d] already have data", fromBlock, remoteTip) + // DataSync skipped → scan local blocks to collect accounts with transactions + // so reconciliation can still update balances on re-runs. + taggedAccounts = fs.collectTaggedAccountsFromBlocks(fromBlock, remoteTip) + if taggedAccounts != nil { + log.Printf("[CatchUpSync] phase 3: collected %d accounts from local blocks for reconciliation", len(taggedAccounts.Accounts)) + } } else { log.Printf("[CatchUpSync] phase 3: %d data-missing range(s) to fetch", len(dataMissingTag.Range)) dataSyncReq := &datasyncpb.DataSyncRequest{ @@ -421,6 +427,37 @@ func (fs *FastsyncV2) buildMissingTag(fromBlock, remoteTip uint64) (*taggingpb.T return &taggingpb.Tag{Range: ranges}, nil } +// collectTaggedAccountsFromBlocks scans local blocks [fromBlock..remoteTip] and +// returns a TaggedAccounts containing every unique sender and receiver address +// found in stored transactions. Used when DataSync is skipped (data already +// present) so that Phase 5 reconciliation still updates account balances. +func (fs *FastsyncV2) collectTaggedAccountsFromBlocks(fromBlock, remoteTip uint64) *taggingpb.TaggedAccounts { + iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, remoteTip, catchUpBatchSize) + defer iter.Close() + + accounts := make(map[string]bool) + for { + batch, err := iter.Next() + if err != nil || len(batch) == 0 { + break + } + for _, blk := range batch { + for _, tx := range blk.Transactions { + if tx.From != nil { + accounts[tx.From.Hex()] = true + } + if tx.To != nil { + accounts[tx.To.Hex()] = true + } + } + } + } + if len(accounts) == 0 { + return nil + } + return &taggingpb.TaggedAccounts{Accounts: accounts} +} + func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*taggingpb.Tag, error) { iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, remoteTip, catchUpBatchSize) defer iter.Close() From 3b5ebb5f839bb7844f497dce8a4887e01e21140a Mon Sep 17 00:00:00 2001 From: Doc Date: Tue, 23 Jun 2026 23:33:50 +0530 Subject: [PATCH 12/23] fix: add diagnostic counters for block synchronization in catchup process - Introduced diagnostic counters to track absent blocks, blocks without proofs, blocks with gas but no transactions, and complete blocks during the catchup synchronization. - Enhanced logging to provide insights into the synchronization process, improving visibility into the reasons for block pass/fail outcomes. These changes improve the monitoring and debugging capabilities of the catchup synchronization logic. --- FastsyncV2/catchup.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 6a1a7279..1c849c6d 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -467,6 +467,9 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging runStart := uint64(0) inRun := false + // Diagnostic counters — logged at end to explain why blocks pass/fail. + var nAbsent, nNoProof, nGasNoTx, nComplete uint64 + // Start a new run at b (or extend if already in one). addToRun := func(b uint64) { if !inRun { @@ -490,6 +493,7 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging if len(batch) == 0 { // Remaining [cursor..remoteTip] are absent from the DB — include them. if cursor <= remoteTip { + nAbsent += remoteTip - cursor + 1 addToRun(cursor) } // Close any open run (covers both absent trailing blocks AND the case @@ -507,11 +511,20 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging // Absent blocks [cursor..b-1]: they need DataSync — extend or start run. if b > cursor { + nAbsent += b - cursor addToRun(cursor) // Run is now active through at least b-1. // We decide below whether b also extends it or closes it. } + if len(blk.StarkProof) == 0 { + nNoProof++ + } else if blk.GasUsed > 0 && len(blk.Transactions) == 0 { + nGasNoTx++ + } else { + nComplete++ + } + if blockNeedsDataSync(blk) { // Block b is present but data-incomplete — keep the run going. addToRun(b) @@ -526,5 +539,8 @@ func (fs *FastsyncV2) buildDataMissingTag(fromBlock, remoteTip uint64) (*tagging } } + log.Printf("[CatchUpSync] phase 3 scan: absent=%d noProof=%d gasNoTx=%d complete=%d", + nAbsent, nNoProof, nGasNoTx, nComplete) + return &taggingpb.Tag{Range: ranges}, nil } From b3be6feb1800f2bdcfd5127baee754eb329fcbe8 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 00:00:26 +0530 Subject: [PATCH 13/23] fix: optimize transaction retrieval and account writing logic - Increased timeout for context in GetTransactionsByAccount to 120 seconds to accommodate larger batch processing. - Updated batch size from 100 to 500 to reduce the number of round-trips during block scans, significantly improving performance. - Refactored block retrieval to use GetBlocksRange for batch processing, enhancing efficiency and error handling. - Improved logging for error scenarios when retrieving block batches, ensuring better visibility into issues during account transaction processing. These changes enhance the performance and reliability of transaction retrieval and account management in the system. --- DB_OPs/Nodeinfo/immudb_account_manager.go | 10 +++-- DB_OPs/account_immuclient.go | 52 +++++++++++------------ 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/DB_OPs/Nodeinfo/immudb_account_manager.go b/DB_OPs/Nodeinfo/immudb_account_manager.go index 61eb3e2b..d39a7140 100644 --- a/DB_OPs/Nodeinfo/immudb_account_manager.go +++ b/DB_OPs/Nodeinfo/immudb_account_manager.go @@ -251,14 +251,18 @@ func (am *account_manager) WriteAccounts(accounts []*types.Account) error { log.Printf("[accountqueue] Redis not available — writing %d accounts directly to ImmuDB", len(accounts)) return writeAccountsDirect(accounts) } - mgr.EnsureActive() - chunks := chunkCount(len(accounts)) ctx, cancel := context.WithTimeout(context.Background(), enqueueTimeout(chunks)) defer cancel() if err := enqueueRecordsChunked(ctx, s, payloadTypeAccounts, accounts); err != nil { - return fmt.Errorf("WriteAccounts: enqueue %d accounts in %d messages: %w", len(accounts), chunks, err) + // Redis is configured but unreachable (server down, connection refused, etc). + // Fall back to direct ImmuDB write rather than dropping the accounts entirely. + // Do NOT call EnsureActive — no point starting the worker if Redis is down. + log.Printf("[accountqueue] Redis enqueue failed (%v) — falling back to direct ImmuDB write for %d accounts", err, len(accounts)) + return writeAccountsDirect(accounts) } + // Enqueue succeeded — ensure the drain worker is running to process it. + mgr.EnsureActive() return nil } diff --git a/DB_OPs/account_immuclient.go b/DB_OPs/account_immuclient.go index 417f6013..47cd6dda 100644 --- a/DB_OPs/account_immuclient.go +++ b/DB_OPs/account_immuclient.go @@ -1266,8 +1266,10 @@ func GetTransactionsByAccount(PooledConnection *config.PooledConnection, account var err error var shouldReturnConnection = false - // Define Function wide context for timeout - ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second) + // Define Function wide context for timeout. + // The scan reads every block from 0..latestBlock via batch GetAll calls (~24 batches + // for 11605 blocks). 120s gives ample headroom even under ImmuDB load. + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) defer cancel() if PooledConnection == nil || PooledConnection.Client == nil { @@ -1318,40 +1320,38 @@ func GetTransactionsByAccount(PooledConnection *config.PooledConnection, account } var matchingTxs []*config.Transaction - batchSize := uint64(100) // Process 100 blocks at a time + // Use large batches so GetAll makes ~24 round-trips for 11605 blocks instead + // of 11605 individual reads. This cuts scan time from minutes to seconds. + const batchSize = uint64(500) - // Start from block 0 (genesis block) to include all blocks for startBlock := uint64(0); startBlock <= latestBlockNumber; startBlock += batchSize { + if ctx.Err() != nil { + return nil, ctx.Err() + } endBlock := startBlock + batchSize - 1 if endBlock > latestBlockNumber { endBlock = latestBlockNumber } - // Process current batch of blocks - for i := startBlock; i <= endBlock; i++ { - if ctx.Err() != nil { - return nil, ctx.Err() - } - block, err := ReadZKBlockByNumber(ctx, PooledConnection, i) - if err != nil { - loggerCtx, cancel := context.WithCancel(context.Background()) - ic.Logger.Warn(loggerCtx, "Error retrieving block, skipping", - ion.String("error", err.Error()), - ion.Uint64("block_number", i), - ion.String("database", config.AccountsDBName), - ion.String("created_at", time.Now().UTC().Format(time.RFC3339)), - ion.String("log_file", LOG_FILE), - ion.String("topic", TOPIC), - ion.String("function", "DB_OPs.GetTransactionsByAccount")) - cancel() - continue - } + blocks, err := GetBlocksRange(PooledConnection, startBlock, endBlock) + if err != nil { + loggerCtx, cancel := context.WithCancel(context.Background()) + ic.Logger.Warn(loggerCtx, "Error retrieving block batch, skipping", + ion.String("error", err.Error()), + ion.Uint64("start_block", startBlock), + ion.Uint64("end_block", endBlock), + ion.String("database", config.DBName), + ion.String("created_at", time.Now().UTC().Format(time.RFC3339)), + ion.String("log_file", LOG_FILE), + ion.String("topic", TOPIC), + ion.String("function", "DB_OPs.GetTransactionsByAccount")) + cancel() + continue + } - // Check each transaction in the current block + for _, block := range blocks { for _, tx := range block.Transactions { - // Check if the transaction involves the given account if isTransactionInvolvingAccount(tx, accountAddr) { - // Create a copy of the transaction to avoid referencing the loop variable txCopy := tx matchingTxs = append(matchingTxs, &txCopy) } From 901add59ad26f56e33108d4beffd7cf319cbcc57 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 00:15:02 +0530 Subject: [PATCH 14/23] fix: enhance account reconciliation in catchup synchronization - Updated HandleCatchUpSync to always scan local blocks for tagged accounts, improving reconciliation accuracy when DataSync is skipped or partially completed. - Merged results from local scans with DataSync outcomes to ensure all relevant accounts are included for balance updates. - Improved logging to provide better insights into the number of accounts collected and merged during the reconciliation process. These changes enhance the reliability of account balance updates during the catchup synchronization, ensuring comprehensive account management. --- FastsyncV2/catchup.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 1c849c6d..924ceb23 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -187,12 +187,6 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err var taggedAccounts *taggingpb.TaggedAccounts if len(dataMissingTag.Range) == 0 && len(dataMissingTag.BlockNumber) == 0 { log.Printf("[CatchUpSync] phase 3 skipped: all blocks in [%d..%d] already have data", fromBlock, remoteTip) - // DataSync skipped → scan local blocks to collect accounts with transactions - // so reconciliation can still update balances on re-runs. - taggedAccounts = fs.collectTaggedAccountsFromBlocks(fromBlock, remoteTip) - if taggedAccounts != nil { - log.Printf("[CatchUpSync] phase 3: collected %d accounts from local blocks for reconciliation", len(taggedAccounts.Accounts)) - } } else { log.Printf("[CatchUpSync] phase 3: %d data-missing range(s) to fetch", len(dataMissingTag.Range)) dataSyncReq := &datasyncpb.DataSyncRequest{ @@ -213,6 +207,28 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err log.Printf("[CatchUpSync] phase 3 complete") } + // Always scan local blocks [fromBlock..remoteTip] for tagged accounts and merge + // with DataSync's results. This serves two purposes: + // 1. When DataSync was skipped (data already present), this is the only source + // of tagged accounts for reconciliation. + // 2. When DataSync ran for SOME new blocks only, previously-failed reconciliation + // accounts from already-synced blocks are re-included here so they are retried. + // With 500-block batch reads this is ~3 DB round-trips for a 1200-block range. + localTagged := fs.collectTaggedAccountsFromBlocks(fromBlock, remoteTip) + if localTagged != nil { + if taggedAccounts == nil { + taggedAccounts = localTagged + log.Printf("[CatchUpSync] phase 3: %d accounts from local scan", len(taggedAccounts.Accounts)) + } else { + before := len(taggedAccounts.Accounts) + for addr := range localTagged.Accounts { + taggedAccounts.Accounts[addr] = true + } + log.Printf("[CatchUpSync] phase 3: merged local scan (+%d) → %d total accounts for reconciliation", + len(taggedAccounts.Accounts)-before, len(taggedAccounts.Accounts)) + } + } + // ── Phase 3.5: FetchAccounts — pull tagged accounts missing locally ─── if taggedAccounts != nil && len(taggedAccounts.Accounts) > 0 { // AUTH_TTL is now 48h so no re-auth needed here. From 9eb826bf695579e68e60e71b96a286aabdeb53a2 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 00:26:23 +0530 Subject: [PATCH 15/23] fix: improve batch account update handling in immudb_account_manager - Enhanced the BatchUpdateAccounts function to handle cases where the account queue is not initialized by logging the situation and directly writing updates to ImmuDB. - Introduced a new batchUpdateAccountsDirect function to manage synchronous updates to ImmuDB, ensuring data integrity when Redis is unavailable. - Improved error logging for enqueue failures, providing better visibility into fallback scenarios during account updates. These changes enhance the robustness of account updates and ensure reliable processing in various operational conditions. --- DB_OPs/Nodeinfo/immudb_account_manager.go | 26 ++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/DB_OPs/Nodeinfo/immudb_account_manager.go b/DB_OPs/Nodeinfo/immudb_account_manager.go index d39a7140..6d8f8120 100644 --- a/DB_OPs/Nodeinfo/immudb_account_manager.go +++ b/DB_OPs/Nodeinfo/immudb_account_manager.go @@ -460,9 +460,10 @@ func (am *account_manager) BatchUpdateAccounts(updates []types.AccountUpdate) er } s, mgr := getAccountQueue() if s == nil { - return fmt.Errorf("BatchUpdateAccounts: account queue not initialized; call StartAccountSyncWorker before use") + log.Printf("[accountqueue] BatchUpdateAccounts: queue not initialized — writing %d updates directly to ImmuDB", len(updates)) + return batchUpdateAccountsDirect(am, updates) } - mgr.EnsureActive() + // Convert to wire type for stable JSON serialization. // big.Int.String() produces a decimal string; accountUpdateWire makes the format explicit. wires := make([]accountUpdateWire, len(updates)) @@ -478,7 +479,26 @@ func (am *account_manager) BatchUpdateAccounts(updates []types.AccountUpdate) er ctx, cancel := context.WithTimeout(context.Background(), enqueueTimeout(chunks)) defer cancel() if err := enqueueRecordsChunked(ctx, s, payloadTypeUpdates, wires); err != nil { - return fmt.Errorf("BatchUpdateAccounts: enqueue %d updates in %d messages: %w", len(updates), chunks, err) + log.Printf("[accountqueue] Redis enqueue failed (%v) — falling back to direct ImmuDB write for %d updates", err, len(updates)) + return batchUpdateAccountsDirect(am, updates) + } + mgr.EnsureActive() + return nil +} + +// batchUpdateAccountsDirect writes account balance updates synchronously to ImmuDB, +// bypassing Redis. Used when Redis is unavailable. +func batchUpdateAccountsDirect(am *account_manager, updates []types.AccountUpdate) error { + for _, u := range updates { + if u.IsNewAccount { + if err := am.CreateAccount(u.Address, u.NewBalance, u.Nonce); err != nil { + return fmt.Errorf("batchUpdateAccountsDirect: create %s: %w", u.Address, err) + } + } else { + if err := am.UpdateAccountBalance(u.Address, u.NewBalance, u.Nonce); err != nil { + return fmt.Errorf("batchUpdateAccountsDirect: update %s: %w", u.Address, err) + } + } } return nil } From 83e230750b356b109308c2210bf26921116314d6 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 00:35:17 +0530 Subject: [PATCH 16/23] fix: update JMDN-FastSync dependency version and enhance logging in catchup synchronization - Updated the JMDN-FastSync dependency to version v0.0.0-20260623190349-1e218bd06f7b in go.mod and go.sum for compatibility with recent changes. - Improved logging in HandleCatchUpSync to include the number of accounts processed during reconciliation and the duration of the reconciliation phase, enhancing visibility into the synchronization process. --- FastsyncV2/catchup.go | 10 ++++++++-- go.mod | 2 +- go.sum | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 924ceb23..d2446176 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -279,13 +279,19 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err const reconDelay = 5 * time.Second log.Printf("[CatchUpSync] phase 5: waiting %s for ImmuDB to settle before reconciliation", reconDelay) time.Sleep(reconDelay) - log.Printf("[CatchUpSync] phase 5: reconciliation") + accountCount := 0 + if taggedAccounts != nil { + accountCount = len(taggedAccounts.Accounts) + } + log.Printf("[CatchUpSync] phase 5: reconciliation — %d accounts to process", accountCount) + reconStart := time.Now() reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp) if err != nil { log.Printf("[CatchUpSync] phase 5 warning: %v", err) } - log.Printf("[CatchUpSync] phase 5 complete: %d committed, %d failed", reconCount, len(failedAccounts)) + log.Printf("[CatchUpSync] phase 5 complete: %d committed, %d failed, took %s", + reconCount, len(failedAccounts), time.Since(reconStart).Round(time.Millisecond)) // ── Phase 6: Re-auth before PoTS (disabled — AUTH_TTL is now 48h) ───── // if refreshed, ok := fs.tryRefreshAuth(ctx, targetNodeInfo, 0); ok { diff --git a/go.mod b/go.mod index 50b9806f..0559f99e 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index 19a5a9cb..4e1da05f 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,8 @@ github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80 h1:L github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 h1:4rGUBgm+T0UUZ9WdZ0xlgnZNbHej6pq5VEMq2Ugfpdo= github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b h1:58QlmKb4FGT8F8xFFQ6gUG61bbAXNnrTan+PNgh67VU= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 61bffd847b9b6be5dea10a4b068bd7bd56e762df Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 00:45:29 +0530 Subject: [PATCH 17/23] fix: update JMDN-FastSync dependency version in go.mod and go.sum - Updated the JMDN-FastSync dependency to version v0.0.0-20260623191406-56133298595b in go.mod and added corresponding checksum entries in go.sum to ensure compatibility with recent changes. --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0559f99e..08cc1e6e 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index 4e1da05f..4739c584 100644 --- a/go.sum +++ b/go.sum @@ -7,6 +7,8 @@ github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 h1:4 github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b h1:58QlmKb4FGT8F8xFFQ6gUG61bbAXNnrTan+PNgh67VU= github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b h1:/Tz6f/DmUW+3AH6zvlfzZk987yd0Xla0+cve9YgNM1Y= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 5b2126105d7a1dec7f8fecce13673fd8fb899ee1 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 10:20:27 +0530 Subject: [PATCH 18/23] fix: update JMDN-FastSync dependency version and add transaction retrieval in range - Updated the JMDN-FastSync dependency to version v0.0.0-20260624044452-d59628408292 in go.mod and go.sum for compatibility with recent changes. - Introduced GetTransactionsByAccountInRange function to retrieve transactions for an account within a specified block range, enhancing transaction management capabilities. - Added GetTransactionsForAccountInRange method in immudb_account_manager to facilitate transaction retrieval in a specified range, improving account transaction handling. --- DB_OPs/Nodeinfo/immudb_account_manager.go | 19 +++++ DB_OPs/account_immuclient.go | 70 +++++++++++++++++ FastsyncV2/catchup.go | 21 +++-- FastsyncV2/fastsyncv2.go | 94 ++++++++++++++++++++--- go.mod | 2 +- go.sum | 10 +-- 6 files changed, 192 insertions(+), 24 deletions(-) diff --git a/DB_OPs/Nodeinfo/immudb_account_manager.go b/DB_OPs/Nodeinfo/immudb_account_manager.go index 6d8f8120..4e608811 100644 --- a/DB_OPs/Nodeinfo/immudb_account_manager.go +++ b/DB_OPs/Nodeinfo/immudb_account_manager.go @@ -107,6 +107,25 @@ func (am *account_manager) GetTransactionsForAccount(accountAddress string) ([]t return result, nil } +func (am *account_manager) GetTransactionsForAccountInRange(accountAddress string, fromBlock, toBlock uint64) ([]types.DBTransaction, error) { + conn, err := DB_OPs.GetMainDBConnectionandPutBack(context.Background()) + if err != nil { + return nil, fmt.Errorf("failed to get main DB connection: %w", err) + } + + addr := common.HexToAddress(accountAddress) + cfgTxs, err := DB_OPs.GetTransactionsByAccountInRange(conn, &addr, fromBlock, toBlock) + if err != nil { + return nil, fmt.Errorf("failed to get transactions in range [%d..%d]: %w", fromBlock, toBlock, err) + } + + result := make([]types.DBTransaction, 0, len(cfgTxs)) + for _, tx := range cfgTxs { + result = append(result, configTxToDBTx(tx)) + } + return result, nil +} + // Time Complexity: O(1) func (am *account_manager) GetAccountBalance(accountAddress string) (*big.Int, uint64, error) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) diff --git a/DB_OPs/account_immuclient.go b/DB_OPs/account_immuclient.go index 47cd6dda..2223af1e 100644 --- a/DB_OPs/account_immuclient.go +++ b/DB_OPs/account_immuclient.go @@ -1373,6 +1373,76 @@ func GetTransactionsByAccount(PooledConnection *config.PooledConnection, account return matchingTxs, nil } +// GetTransactionsByAccountInRange retrieves transactions for an account in [fromBlock, toBlock]. +// Pass math.MaxUint64 for toBlock to scan up to the latest block in the DB. +// Identical to GetTransactionsByAccount but scans a bounded block range instead of 0..latest, +// enabling delta-only reconciliation so each sync run replays only new transactions. +func GetTransactionsByAccountInRange(PooledConnection *config.PooledConnection, accountAddr *common.Address, fromBlock, toBlock uint64) ([]*config.Transaction, error) { + var err error + var shouldReturnConnection = false + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + if PooledConnection == nil || PooledConnection.Client == nil { + PooledConnection, err = GetMainDBConnectionandPutBack(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get main DB connection from pool: %w - GetTransactionsByAccountInRange", err) + } + shouldReturnConnection = true + } + if shouldReturnConnection { + defer PutMainDBConnection(PooledConnection) + } + + latestBlockNumber, err := GetLatestBlockNumber(ctx, PooledConnection) + if err != nil { + return nil, fmt.Errorf("failed to get latest block number: %w", err) + } + + if toBlock > latestBlockNumber { + toBlock = latestBlockNumber + } + if fromBlock > toBlock { + // Nothing to scan — no new blocks in range + return nil, nil + } + + var matchingTxs []*config.Transaction + const batchSize = uint64(500) + + for startBlock := fromBlock; startBlock <= toBlock; startBlock += batchSize { + if ctx.Err() != nil { + return nil, ctx.Err() + } + endBlock := startBlock + batchSize - 1 + if endBlock > toBlock { + endBlock = toBlock + } + + blocks, err := GetBlocksRange(PooledConnection, startBlock, endBlock) + if err != nil { + PooledConnection.Client.Logger.Warn(ctx, "Error retrieving block batch, skipping", + ion.String("error", err.Error()), + ion.Uint64("start_block", startBlock), + ion.Uint64("end_block", endBlock), + ion.String("function", "DB_OPs.GetTransactionsByAccountInRange")) + continue + } + + for _, block := range blocks { + for _, tx := range block.Transactions { + if isTransactionInvolvingAccount(tx, accountAddr) { + txCopy := tx + matchingTxs = append(matchingTxs, &txCopy) + } + } + } + } + + return matchingTxs, nil +} + // isTransactionInvolvingAccount checks if a transaction involves a specific account func isTransactionInvolvingAccount(tx config.Transaction, accountAddr *common.Address) bool { // Compare address values, not pointers diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index d2446176..7e42e233 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -286,12 +286,23 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err log.Printf("[CatchUpSync] phase 5: reconciliation — %d accounts to process", accountCount) reconStart := time.Now() - reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp) - if err != nil { - log.Printf("[CatchUpSync] phase 5 warning: %v", err) + reconFrom, reconSkip := fs.effectiveReconRange(fromBlock, remoteTip) + if reconSkip { + log.Printf("[CatchUpSync] phase 5 skipped: range [%d..%d] already reconciled", fromBlock, remoteTip) + } else { + if reconFrom > fromBlock { + log.Printf("[CatchUpSync] phase 5: advancing fromBlock %d → %d (already reconciled)", fromBlock, reconFrom) + } + reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp, reconFrom, remoteTip) + if err != nil { + log.Printf("[CatchUpSync] phase 5 warning: %v", err) + } + log.Printf("[CatchUpSync] phase 5 complete: %d committed, %d failed, took %s", + reconCount, len(failedAccounts), time.Since(reconStart).Round(time.Millisecond)) + if err == nil { + fs.markReconComplete(remoteTip) + } } - log.Printf("[CatchUpSync] phase 5 complete: %d committed, %d failed, took %s", - reconCount, len(failedAccounts), time.Since(reconStart).Round(time.Millisecond)) // ── Phase 6: Re-auth before PoTS (disabled — AUTH_TTL is now 48h) ───── // if refreshed, ok := fs.tryRefreshAuth(ctx, targetNodeInfo, 0); ok { diff --git a/FastsyncV2/fastsyncv2.go b/FastsyncV2/fastsyncv2.go index 5158a9e5..f25f497e 100644 --- a/FastsyncV2/fastsyncv2.go +++ b/FastsyncV2/fastsyncv2.go @@ -20,9 +20,11 @@ import ( "math" "os" "path/filepath" + "strconv" "time" NodeInfo "gossipnode/DB_OPs/Nodeinfo" + "gossipnode/DB_OPs/sqlops" "github.com/JupiterMetaLabs/JMDN-FastSync/common/WAL" accountspb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/accounts" @@ -468,13 +470,28 @@ func (fs *FastsyncV2) handleSyncInternal(targetPeer string, startBlock uint64) e // 3. Atomic DB commit via AccountManager.BatchUpdateAccounts log.Println("[FastsyncV2] Phase 5: Reconciliation") - reconciledCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp) - if err != nil { - log.Printf("[FastsyncV2] Phase 5 warning: reconciliation returned error: %v", err) + remoteBlockNum := availResp.BlockHeight + if remoteBlockNum == 0 { + remoteBlockNum = math.MaxUint64 } - log.Printf("[FastsyncV2] Phase 5 complete: %d accounts reconciled, %d failed", reconciledCount, len(failedAccounts)) - if len(failedAccounts) > 0 { - log.Printf("[FastsyncV2] Failed accounts: %v", failedAccounts) + reconFrom, reconSkip := fs.effectiveReconRange(localBlockNum+1, remoteBlockNum) + if reconSkip { + log.Printf("[FastsyncV2] Phase 5 skipped: range [%d..%d] already reconciled", localBlockNum+1, remoteBlockNum) + } else { + if reconFrom > localBlockNum+1 { + log.Printf("[FastsyncV2] Phase 5: advancing fromBlock %d → %d (already reconciled)", localBlockNum+1, reconFrom) + } + reconciledCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp, reconFrom, remoteBlockNum) + if err != nil { + log.Printf("[FastsyncV2] Phase 5 warning: reconciliation returned error: %v", err) + } + log.Printf("[FastsyncV2] Phase 5 complete: %d accounts reconciled, %d failed", reconciledCount, len(failedAccounts)) + if len(failedAccounts) > 0 { + log.Printf("[FastsyncV2] Failed accounts: %v", failedAccounts) + } + if err == nil { + fs.markReconComplete(remoteBlockNum) + } } // ========================================================================= @@ -570,12 +587,25 @@ func (fs *FastsyncV2) executePoTS( } // Secondary Reconciliation for accounts affected by PoTS blocks. + // PoTS blocks are produced after availResp.BlockHeight, so fromBlock = BlockHeight+1. if potsTaggedAccts != nil { - reconCount, failed, err := fs.ReconRouter.Reconcile(potsTaggedAccts, availResp) - if err != nil { - log.Printf("[FastsyncV2] PoTS reconciliation warning: %v", err) + potsFromBlock := availResp.BlockHeight + 1 + if availResp.BlockHeight == 0 { + potsFromBlock = 1 + } + potsReconFrom, potsReconSkip := fs.effectiveReconRange(potsFromBlock, math.MaxUint64) + if potsReconSkip { + log.Printf("[FastsyncV2] PoTS reconciliation skipped: already reconciled") + } else { + reconCount, failed, err := fs.ReconRouter.Reconcile(potsTaggedAccts, availResp, potsReconFrom, math.MaxUint64) + if err != nil { + log.Printf("[FastsyncV2] PoTS reconciliation warning: %v", err) + } + log.Printf("[FastsyncV2] PoTS reconciled %d accounts, %d failed", reconCount, len(failed)) + if err == nil { + fs.markReconComplete(fs.blockInfoAdapter.GetBlockDetails().Blocknumber) + } } - log.Printf("[FastsyncV2] PoTS reconciled %d accounts, %d failed", reconCount, len(failed)) } } } else { @@ -637,6 +667,50 @@ func (fs *FastsyncV2) dumpPoTSWALToDB(ctx context.Context) error { return nil } +// reconBlockKey is the SQLite key_value key used to persist the last successfully +// reconciled block number. Reading it before each Reconcile call prevents +// double-counting on re-runs that cover an already-reconciled range. +const reconBlockKey = "fastsync:last_reconciled_block" + +// effectiveReconRange returns the adjusted [from, to] range that hasn't been +// reconciled yet, plus a skip flag when the entire range is already done. +// +// Algorithm: +// +// lastBlock = SQLite key_value["fastsync:last_reconciled_block"] (0 if absent) +// effectiveFrom = max(fromBlock, lastBlock+1) +// skip = effectiveFrom > toBlock +func (fs *FastsyncV2) effectiveReconRange(fromBlock, toBlock uint64) (from uint64, skip bool) { + udb, err := sqlops.NewUnifiedDB() + if err != nil { + log.Printf("[FastsyncV2] recon anchor: open SQLite failed (%v) — using fromBlock=%d as-is", err, fromBlock) + return fromBlock, false + } + defer udb.Close() + + from = fromBlock + if raw, err := udb.GetKeyValue(reconBlockKey); err == nil && raw != "" { + if last, err := strconv.ParseUint(raw, 10, 64); err == nil && last+1 > fromBlock { + from = last + 1 + } + } + return from, from > toBlock +} + +// markReconComplete stores toBlock as the last successfully reconciled block. +func (fs *FastsyncV2) markReconComplete(toBlock uint64) { + udb, err := sqlops.NewUnifiedDB() + if err != nil { + log.Printf("[FastsyncV2] recon anchor: open SQLite failed (%v) — last_reconciled_block not persisted", err) + return + } + defer udb.Close() + + if err := udb.StoreKeyValue(reconBlockKey, strconv.FormatUint(toBlock, 10)); err != nil { + log.Printf("[FastsyncV2] recon anchor: store failed (%v) — last_reconciled_block not persisted", err) + } +} + // Close tears down all routers and flushes WALs. // Call this when the node shuts down. func (fs *FastsyncV2) Close() { diff --git a/go.mod b/go.mod index 08cc1e6e..acd72007 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292 github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index 4739c584..00205acc 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80 h1:LyfJvMbuIbidZqIHbs2z2mrXGn6i2vsCXeLMd2t5EPY= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623171945-d171b0aa6d80/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609 h1:4rGUBgm+T0UUZ9WdZ0xlgnZNbHej6pq5VEMq2Ugfpdo= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623172225-ee9db84a5609/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b h1:58QlmKb4FGT8F8xFFQ6gUG61bbAXNnrTan+PNgh67VU= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623190349-1e218bd06f7b/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b h1:/Tz6f/DmUW+3AH6zvlfzZk987yd0Xla0+cve9YgNM1Y= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260623191406-56133298595b/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292 h1:9zgOGNN6oXq4tfui2K31KiZrbmz7XVHuhBNmEF8mxW0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 3b1d92e1fb018b3c7b96e69bbd4c005fa76b4eea Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 11:51:00 +0530 Subject: [PATCH 19/23] fix: update JMDN-FastSync dependency version and increase max connections in connection pool - Updated the JMDN-FastSync dependency to version v0.0.0-20260624061943-72fc8e50b45c in go.mod and go.sum for compatibility with recent changes. - Increased the MaxConnections in the DefaultConnectionPoolConfig from 20 to 30 to enhance connection handling capabilities. --- config/ConnectionPool.go | 2 +- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/ConnectionPool.go b/config/ConnectionPool.go index 4ba92bee..84f846d9 100644 --- a/config/ConnectionPool.go +++ b/config/ConnectionPool.go @@ -59,7 +59,7 @@ type PoolingConfig struct { func DefaultConnectionPoolConfig() *ConnectionPoolConfig { return &ConnectionPoolConfig{ MinConnections: 2, - MaxConnections: 20, + MaxConnections: 30, ConnectionTimeout: 30 * time.Second, IdleTimeout: 5 * time.Minute, MaxLifetime: 30 * time.Minute, diff --git a/go.mod b/go.mod index acd72007..fa4ddd14 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292 + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index 00205acc..736d52a3 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292 h1:9zgOGNN6oXq4tfui2K31KiZrbmz7XVHuhBNmEF8mxW0= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624044452-d59628408292/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c h1:DxH55rS7byJmCYL++vQq6aICLTLNIX/dqJ/MTWfTHSM= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From 49a4b77be87de47c1f0c0476c4d37c87764cb509 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 12:36:39 +0530 Subject: [PATCH 20/23] fix: enhance account update structure and reconciliation logic - Added new fields `TxNonce` and `TxCountSent` to the `accountUpdateWire` struct to support additional transaction metadata. - Updated the `BatchUpdateAccounts` function to include the new fields during account updates, improving data accuracy. - Refactored reconciliation logic in `HandleCatchUpSync` and `handleSyncInternal` to utilize a single-pass delta approach for account updates, optimizing performance and reducing database scans. These changes enhance the efficiency and reliability of account updates and reconciliation processes. --- DB_OPs/Nodeinfo/account_sync_worker.go | 12 +- DB_OPs/Nodeinfo/immudb_account_manager.go | 8 +- FastsyncV2/catchup.go | 18 +-- FastsyncV2/deltas.go | 165 ++++++++++++++++++++++ FastsyncV2/fastsyncv2.go | 17 ++- go.mod | 2 +- go.sum | 4 +- 7 files changed, 197 insertions(+), 29 deletions(-) create mode 100644 FastsyncV2/deltas.go diff --git a/DB_OPs/Nodeinfo/account_sync_worker.go b/DB_OPs/Nodeinfo/account_sync_worker.go index 84926a5b..c7444a1c 100644 --- a/DB_OPs/Nodeinfo/account_sync_worker.go +++ b/DB_OPs/Nodeinfo/account_sync_worker.go @@ -71,11 +71,13 @@ type dbEntry = struct { // surprises (math/big.Int marshals as a quoted decimal string, but that behaviour // is implementation-defined and not guaranteed across versions). // -// Stored in the stream as: {"address":"0x...","new_balance":"1000000","nonce":42} +// Stored in the stream as: {"address":"0x...","new_balance":"1000000","nonce":42,"tx_nonce":43,"tx_count_sent":5} type accountUpdateWire struct { - Address string `json:"address"` - NewBalance string `json:"new_balance"` // decimal string from big.Int.String() - Nonce uint64 `json:"nonce"` + Address string `json:"address"` + NewBalance string `json:"new_balance"` // decimal string from big.Int.String() + Nonce uint64 `json:"nonce"` + TxNonce uint64 `json:"tx_nonce"` + TxCountSent uint64 `json:"tx_count_sent"` } // ─── Configuration ──────────────────────────────────────────────────────────── @@ -462,6 +464,8 @@ func parseUpdatesPayload(dataStr string) ([]dbEntry, error) { Address: addr, Balance: balance.String(), Nonce: w.Nonce, + TxNonce: w.TxNonce, + TxCountSent: w.TxCountSent, AccountType: "user", UpdatedAt: time.Now().UTC().UnixNano(), } diff --git a/DB_OPs/Nodeinfo/immudb_account_manager.go b/DB_OPs/Nodeinfo/immudb_account_manager.go index 4e608811..31d00ced 100644 --- a/DB_OPs/Nodeinfo/immudb_account_manager.go +++ b/DB_OPs/Nodeinfo/immudb_account_manager.go @@ -488,9 +488,11 @@ func (am *account_manager) BatchUpdateAccounts(updates []types.AccountUpdate) er wires := make([]accountUpdateWire, len(updates)) for i, u := range updates { wires[i] = accountUpdateWire{ - Address: u.Address, - NewBalance: u.NewBalance.String(), - Nonce: u.Nonce, + Address: u.Address, + NewBalance: u.NewBalance.String(), + Nonce: u.Nonce, + TxNonce: u.TxNonce, + TxCountSent: u.TxCountSent, } } diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 7e42e233..0f18e7e1 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -273,17 +273,9 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err } // ── Phase 5: Reconciliation ─────────────────────────────────────────── - // Brief pause before reconciliation: DataSync may have just written thousands - // of blocks and ImmuDB needs a moment to settle its commit queue before read - // queries (GetTransactionsForAccount) can complete within their deadline. - const reconDelay = 5 * time.Second - log.Printf("[CatchUpSync] phase 5: waiting %s for ImmuDB to settle before reconciliation", reconDelay) - time.Sleep(reconDelay) - accountCount := 0 - if taggedAccounts != nil { - accountCount = len(taggedAccounts.Accounts) - } - log.Printf("[CatchUpSync] phase 5: reconciliation — %d accounts to process", accountCount) + // Single-pass delta approach: one BlockIterator scan over [reconFrom..remoteTip] + // computes all account deltas in memory — no per-account DB scan needed. + log.Printf("[CatchUpSync] phase 5: reconciliation (delta approach)") reconStart := time.Now() reconFrom, reconSkip := fs.effectiveReconRange(fromBlock, remoteTip) @@ -293,7 +285,9 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err if reconFrom > fromBlock { log.Printf("[CatchUpSync] phase 5: advancing fromBlock %d → %d (already reconciled)", fromBlock, reconFrom) } - reconCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp, reconFrom, remoteTip) + deltas := fs.computeAccountDeltas(reconFrom, remoteTip) + log.Printf("[CatchUpSync] phase 5: computed deltas for %d accounts", len(deltas)) + reconCount, failedAccounts, err := fs.ReconRouter.ReconcileWithDeltas(deltas, availResp) if err != nil { log.Printf("[CatchUpSync] phase 5 warning: %v", err) } diff --git a/FastsyncV2/deltas.go b/FastsyncV2/deltas.go new file mode 100644 index 00000000..24533401 --- /dev/null +++ b/FastsyncV2/deltas.go @@ -0,0 +1,165 @@ +package FastsyncV2 + +// computeAccountDeltas performs a single forward pass over the locally stored blocks +// in [fromBlock..toBlock] and computes per-account balance/nonce deltas. +// +// This replaces the prior per-account GetTransactionsForAccountInRange scan: +// instead of O(accounts × blocks) DB queries, this is one O(blocks) iterator pass. +// +// Balance rules follow processBlockTransactions in messaging/BlockProcessing/Processing.go: +// +// Sender → deduct value + gasFee; advance Nonce and TxCountSent +// Receiver → credit value +// Coinbase → credit gasFee/2 + gasFee%2 (half + remainder) +// ZKVM → credit gasFee/2 +// +// Gas fee: +// +// EIP-1559 (type 2): effectiveGasPrice = MaxFee ?? MaxPriorityFee ?? GasPrice ?? 1e9 +// Legacy (type 0/1): effectiveGasPrice = GasPrice ?? MaxFee ?? MaxPriorityFee ?? 1e9 +// gasFee = gasLimit * effectiveGasPrice + +import ( + "math/big" + "strings" + + "github.com/JupiterMetaLabs/JMDN-FastSync/common/types" +) + +// computeAccountDeltas iterates all blocks in [fromBlock..toBlock] and returns a map +// of lowercase-hex-address → *types.AccountDelta. Accounts not touched in the range +// are absent from the map. +func (fs *FastsyncV2) computeAccountDeltas(fromBlock, toBlock uint64) map[string]*types.AccountDelta { + const batchSize = 500 + iter := fs.blockInfoAdapter.NewBlockIterator(fromBlock, toBlock, batchSize) + defer iter.Close() + + deltas := make(map[string]*types.AccountDelta) + + for { + batch, err := iter.Next() + if err != nil || len(batch) == 0 { + break + } + for _, blk := range batch { + applyBlockDeltas(blk, deltas) + } + } + + return deltas +} + +// applyBlockDeltas applies the transaction effects of one ZKBlock to the delta map. +func applyBlockDeltas(blk *types.ZKBlock, deltas map[string]*types.AccountDelta) { + var coinbaseAddr, zkvmAddr string + if blk.CoinbaseAddr != nil { + coinbaseAddr = strings.ToLower(blk.CoinbaseAddr.Hex()) + } + if blk.ZKVMAddr != nil { + zkvmAddr = strings.ToLower(blk.ZKVMAddr.Hex()) + } + + for i := range blk.Transactions { + tx := &blk.Transactions[i] + + var fromAddr, toAddr string + if tx.From != nil { + fromAddr = strings.ToLower(tx.From.Hex()) + } + if tx.To != nil { + toAddr = strings.ToLower(tx.To.Hex()) + } + + gasFee := computeGasFee(tx) + + halfGas := new(big.Int).Div(gasFee, big.NewInt(2)) + remainder := new(big.Int).Mod(gasFee, big.NewInt(2)) + coinbaseGas := new(big.Int).Add(halfGas, remainder) + zkvmGas := new(big.Int).Set(halfGas) + + // Sender: deduct value + gasFee; advance nonce; increment TxCountSent + if fromAddr != "" { + d := getDelta(deltas, fromAddr) + d.BalanceDelta.Sub(d.BalanceDelta, gasFee) + if tx.Value != nil && tx.Value.Sign() > 0 { + d.BalanceDelta.Sub(d.BalanceDelta, tx.Value) + } + if tx.Nonce > d.Nonce { + d.Nonce = tx.Nonce + d.TxNonce = tx.Nonce + 1 + } + d.TxCountSent++ + d.IsSender = true + } + + // Receiver: credit value only + if toAddr != "" && tx.Value != nil && tx.Value.Sign() > 0 { + d := getDelta(deltas, toAddr) + d.BalanceDelta.Add(d.BalanceDelta, tx.Value) + } + + // Coinbase: credit half + remainder of gasFee + if coinbaseAddr != "" { + d := getDelta(deltas, coinbaseAddr) + d.BalanceDelta.Add(d.BalanceDelta, coinbaseGas) + } + + // ZKVM: credit exact half of gasFee + if zkvmAddr != "" { + d := getDelta(deltas, zkvmAddr) + d.BalanceDelta.Add(d.BalanceDelta, zkvmGas) + } + } +} + +// getDelta returns the existing delta for addr, creating a zero entry if absent. +func getDelta(deltas map[string]*types.AccountDelta, addr string) *types.AccountDelta { + d, ok := deltas[addr] + if !ok { + d = &types.AccountDelta{BalanceDelta: big.NewInt(0)} + deltas[addr] = d + } + return d +} + +// computeGasFee returns gasLimit * effectiveGasPrice following Processing.go rules. +func computeGasFee(tx *types.Transaction) *big.Int { + if tx.GasLimit == 0 { + return big.NewInt(0) + } + gasLimit := new(big.Int).SetUint64(tx.GasLimit) + effectivePrice := effectiveGasPrice(tx) + return new(big.Int).Mul(gasLimit, effectivePrice) +} + +var oneGwei = big.NewInt(1_000_000_000) + +// effectiveGasPrice returns the effective gas price for a transaction. +// +// EIP-1559 (type 2): MaxFee → MaxPriorityFee → GasPrice → 1 Gwei +// Legacy (0/1): GasPrice → MaxFee → MaxPriorityFee → 1 Gwei +func effectiveGasPrice(tx *types.Transaction) *big.Int { + switch tx.Type { + case 2: // EIP-1559 + if tx.MaxFee != nil && tx.MaxFee.Sign() > 0 { + return tx.MaxFee + } + if tx.MaxPriorityFee != nil && tx.MaxPriorityFee.Sign() > 0 { + return tx.MaxPriorityFee + } + if tx.GasPrice != nil && tx.GasPrice.Sign() > 0 { + return tx.GasPrice + } + default: // Legacy / EIP-2930 + if tx.GasPrice != nil && tx.GasPrice.Sign() > 0 { + return tx.GasPrice + } + if tx.MaxFee != nil && tx.MaxFee.Sign() > 0 { + return tx.MaxFee + } + if tx.MaxPriorityFee != nil && tx.MaxPriorityFee.Sign() > 0 { + return tx.MaxPriorityFee + } + } + return oneGwei +} diff --git a/FastsyncV2/fastsyncv2.go b/FastsyncV2/fastsyncv2.go index f25f497e..1589bcfa 100644 --- a/FastsyncV2/fastsyncv2.go +++ b/FastsyncV2/fastsyncv2.go @@ -464,10 +464,8 @@ func (fs *FastsyncV2) handleSyncInternal(targetPeer string, startBlock uint64) e // ========================================================================= // PHASE 5: Reconciliation — recompute and commit account balances // ========================================================================= - // Three-phase atomic operation: - // 1. Concurrent balance computation (up to 15 goroutines replay transactions) - // 2. WAL batch write (single ReconciliationBatchEvent for crash recovery) - // 3. Atomic DB commit via AccountManager.BatchUpdateAccounts + // Single-pass approach: one BlockIterator scan computes all account deltas + // (O(blocks)), then applies them — no per-account DB scan. log.Println("[FastsyncV2] Phase 5: Reconciliation") remoteBlockNum := availResp.BlockHeight @@ -481,7 +479,9 @@ func (fs *FastsyncV2) handleSyncInternal(targetPeer string, startBlock uint64) e if reconFrom > localBlockNum+1 { log.Printf("[FastsyncV2] Phase 5: advancing fromBlock %d → %d (already reconciled)", localBlockNum+1, reconFrom) } - reconciledCount, failedAccounts, err := fs.ReconRouter.Reconcile(taggedAccounts, availResp, reconFrom, remoteBlockNum) + deltas := fs.computeAccountDeltas(reconFrom, remoteBlockNum) + log.Printf("[FastsyncV2] Phase 5: computed deltas for %d accounts", len(deltas)) + reconciledCount, failedAccounts, err := fs.ReconRouter.ReconcileWithDeltas(deltas, availResp) if err != nil { log.Printf("[FastsyncV2] Phase 5 warning: reconciliation returned error: %v", err) } @@ -597,13 +597,16 @@ func (fs *FastsyncV2) executePoTS( if potsReconSkip { log.Printf("[FastsyncV2] PoTS reconciliation skipped: already reconciled") } else { - reconCount, failed, err := fs.ReconRouter.Reconcile(potsTaggedAccts, availResp, potsReconFrom, math.MaxUint64) + potsLatest := fs.blockInfoAdapter.GetBlockDetails().Blocknumber + potsDeltas := fs.computeAccountDeltas(potsReconFrom, potsLatest) + log.Printf("[FastsyncV2] PoTS: computed deltas for %d accounts", len(potsDeltas)) + reconCount, failed, err := fs.ReconRouter.ReconcileWithDeltas(potsDeltas, availResp) if err != nil { log.Printf("[FastsyncV2] PoTS reconciliation warning: %v", err) } log.Printf("[FastsyncV2] PoTS reconciled %d accounts, %d failed", reconCount, len(failed)) if err == nil { - fs.markReconComplete(fs.blockInfoAdapter.GetBlockDetails().Blocknumber) + fs.markReconComplete(potsLatest) } } } diff --git a/go.mod b/go.mod index fa4ddd14..055357e7 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module gossipnode go 1.25.0 require ( - github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c + github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624070201-0e662a93ce62 github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 github.com/JupiterMetaLabs/ion v0.4.2 diff --git a/go.sum b/go.sum index 736d52a3..673f4163 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c h1:DxH55rS7byJmCYL++vQq6aICLTLNIX/dqJ/MTWfTHSM= -github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624061943-72fc8e50b45c/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624070201-0e662a93ce62 h1:o2bXM609Fa3BlDfN0FRuo2MKyd15fOOPeeAERew5AzY= +github.com/JupiterMetaLabs/JMDN-FastSync v0.0.0-20260624070201-0e662a93ce62/go.mod h1:0erT7gGH4TYtitRik+Y3GfxSa5KGLacr9rJovV3vNB0= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8 h1:yPrYb6g6NnqGsiCVqMf0zndEYTuelL3B03Fee+utLWA= github.com/JupiterMetaLabs/JMDN_Merkletree v0.0.0-20260413092720-b819e61566f8/go.mod h1:zM8F31G2SiPXzTo1WzbDFZ5iOOAkqrkuZjS0QVDW4ew= github.com/JupiterMetaLabs/goroutine-orchestrator v0.1.5 h1:S9+s6JeWSrGJ6ooYb4f8iRlJxwPUZ8X/EA4EgxKS3zc= From c9f4eda855cc1f4ca6e4bd77b249ac8ca05d641a Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 14:36:30 +0530 Subject: [PATCH 21/23] fix: add EnableCatchup configuration for automatic catchup synchronization - Introduced the EnableCatchup field in FastSyncSettings to control automatic execution of HandleCatchUpSync on startup, requiring EnablePulling to be true. - Updated main logic to default catch_up_from_block to 0 for full scans from genesis when EnableCatchup is enabled. - Set default value for EnableCatchup to false in the default configuration. These changes enhance the flexibility of the fast synchronization process by allowing users to enable or disable automatic catchup behavior. --- FastsyncV2/catchup.go | 18 +----------------- config/settings/config.go | 5 +++++ config/settings/defaults.go | 9 +++++---- config/settings/loader.go | 1 + main.go | 10 +++++----- 5 files changed, 17 insertions(+), 26 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 0f18e7e1..043dcb81 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -29,9 +29,9 @@ import ( "gossipnode/DB_OPs" + ackpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/ack" availabilitypb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability" authpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/availability/auth" - ackpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/ack" datasyncpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/datasync" headersyncpb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/headersync" phasepb "github.com/JupiterMetaLabs/JMDN-FastSync/common/proto/phase" @@ -354,22 +354,6 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err return nil } -// tryRefreshAuth sends a fresh availability request and returns the new response -// if the peer is still available and returns a valid token. -func (fs *FastsyncV2) tryRefreshAuth(ctx context.Context, targetNodeInfo *types.Nodeinfo, startBlock uint64) (*availabilitypb.AvailabilityResponse, bool) { - resp, err := fs.AvailRouter.SendAvailabilityRequest( - ctx, fs.PriorRouter.GetSyncVars(), *targetNodeInfo, startBlock, math.MaxUint64, - ) - if err != nil { - log.Printf("[CatchUpSync] auth refresh failed: %v", err) - return nil, false - } - if !resp.IsAvailable || resp.Auth == nil || resp.Auth.UUID == "" { - return nil, false - } - return resp, true -} - // buildMissingTag scans the local DB over [fromBlock..remoteTip] and returns a // Tag containing only the ranges absent locally. // diff --git a/config/settings/config.go b/config/settings/config.go index 365cedc0..06b4ba8e 100644 --- a/config/settings/config.go +++ b/config/settings/config.go @@ -156,6 +156,11 @@ type FastSyncSettings struct { // local DB. false = read-only participant (serves data, never updates itself). EnablePulling bool `mapstructure:"enable_pulling" yaml:"enable_pulling"` + // EnableCatchup controls whether the node automatically runs HandleCatchUpSync on + // startup using catch_up_peer and catch_up_from_block. Requires enable_pulling=true. + // Set false to disable automatic catchup while still allowing manual CLI catchup. + EnableCatchup bool `mapstructure:"enable_catchup" yaml:"enable_catchup"` + // PullOnStartup controls whether the node attempts to catch up on missed blocks // automatically when it (re)starts and connects to peers. PullOnStartup bool `mapstructure:"pull_on_startup" yaml:"pull_on_startup"` diff --git a/config/settings/defaults.go b/config/settings/defaults.go index bf41097f..747ce7b5 100644 --- a/config/settings/defaults.go +++ b/config/settings/defaults.go @@ -82,10 +82,11 @@ func DefaultConfig() NodeConfig { GROTrack: false, }, FastSync: FastSyncSettings{ - Enabled: true, - EnablePulling: true, - PullOnStartup: true, - SyncTimeout: 10 * time.Minute, + Enabled: true, + EnablePulling: true, + EnableCatchup: false, + PullOnStartup: true, + SyncTimeout: 10 * time.Minute, AllowedPeers: []string{}, CatchUpFromBlock: 0, CatchUpPeer: "", diff --git a/config/settings/loader.go b/config/settings/loader.go index 095d2df3..017c4765 100644 --- a/config/settings/loader.go +++ b/config/settings/loader.go @@ -167,6 +167,7 @@ func setDefaults(v *viper.Viper) { // FastSync v.SetDefault("fastsync.enabled", d.FastSync.Enabled) v.SetDefault("fastsync.enable_pulling", d.FastSync.EnablePulling) + v.SetDefault("fastsync.enable_catchup", d.FastSync.EnableCatchup) v.SetDefault("fastsync.pull_on_startup", d.FastSync.PullOnStartup) v.SetDefault("fastsync.sync_timeout", d.FastSync.SyncTimeout) v.SetDefault("fastsync.allowed_peers", d.FastSync.AllowedPeers) diff --git a/main.go b/main.go index 4e8e3509..cc906517 100644 --- a/main.go +++ b/main.go @@ -1002,9 +1002,9 @@ func main() { // CatchUp sync: post-bootstrap reconciliation from a known block to realtime. // catch_up_peer is a plain peer ID (e.g. 12D3KooW...) — resolved from peerstore, // same pattern as the old HandleStartupSync startup path. - if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.CatchUpPeer != "" { + if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.EnableCatchup && cfg.FastSync.CatchUpPeer != "" { if cfg.FastSync.CatchUpFromBlock == 0 { - log.Warn().Msg("[CatchUpSync] catch_up_from_block not set — defaulting to 1 (full scan). Set to bootstrapTip+1 to limit scan range.") + log.Warn().Msg("[CatchUpSync] catch_up_from_block not set — defaulting to 0 (full scan from genesis). Set to bootstrapTip+1 to limit scan range.") } catchUpPeerIDStr := cfg.FastSync.CatchUpPeer fromBlock := cfg.FastSync.CatchUpFromBlock @@ -1041,9 +1041,9 @@ func main() { }); err != nil { log.Error().Err(err).Str("thread", GRO.StartupSyncThread).Msg("Failed to start CatchUpSync goroutine") } - // StartupSync (HandleStartupSync) disabled — catchup is the only startup sync path. - // } else if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.PullOnStartup { - // ... + // StartupSync (HandleStartupSync) disabled — catchup is the only startup sync path. + // } else if fastSyncerV2 != nil && cfg.FastSync.EnablePulling && cfg.FastSync.PullOnStartup { + // ... } else if fastSyncerV2 != nil && !cfg.FastSync.EnablePulling { log.Info().Msg("[FastSync] Node configured with enable_pulling=false (serve-only participant); skipping StartupSync") } From 9bfbbd7db848b9f9f8fae64784eeab25c32c58f1 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 15:31:56 +0530 Subject: [PATCH 22/23] refactor: simplify HandleCatchUpSync logic by removing default fromBlock handling - Removed the fallback logic for fromBlock in HandleCatchUpSync, which previously defaulted to 1 when fromBlock was 0. This change encourages callers to explicitly set fromBlock for clarity and correctness. - Updated comments to reflect the new expectations for the fromBlock parameter, enhancing code readability. These changes streamline the catchup synchronization process and improve the clarity of parameter usage. --- FastsyncV2/catchup.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 043dcb81..8e7d7995 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -64,14 +64,6 @@ import ( func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) error { catchUpStart := time.Now() - // fromBlock=0 is a safety fallback only — callers should always pass - // bootstrapTip+1 (from config catch_up_from_block). Using localTip+1 here - // would silently skip gaps below localTip if Stage 2 was interrupted. - if fromBlock == 0 { - fromBlock = 1 - log.Printf("[CatchUpSync] fromBlock not set, defaulting to 1 (full scan from genesis)") - } - // Use a generous timeout — catching up on days of blocks takes much longer // than a normal incremental sync. Callers can wrap in their own deadline if needed. ctx, cancel := context.WithTimeout(context.Background(), fs.syncTimeout) From e9f087ef5c7310bb3260ba02dc1792b1b32edf56 Mon Sep 17 00:00:00 2001 From: Doc Date: Wed, 24 Jun 2026 15:37:21 +0530 Subject: [PATCH 23/23] enhance: improve logging and progress tracking in HandleCatchUpSync - Added detailed logging for header and block fetching phases in HandleCatchUpSync, including the number of headers/blocks being processed and the duration of each phase. - Introduced tagBlockCount function to calculate the total number of blocks described by a Tag, improving clarity in block counting. - Implemented watchProgress function to log ongoing progress during header and block synchronization, enhancing visibility into the catchup process. These changes improve the observability and performance tracking of the catchup synchronization process, providing better insights during execution. --- FastsyncV2/catchup.go | 58 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/FastsyncV2/catchup.go b/FastsyncV2/catchup.go index 8e7d7995..53773bc3 100644 --- a/FastsyncV2/catchup.go +++ b/FastsyncV2/catchup.go @@ -150,16 +150,23 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err log.Printf("[CatchUpSync] phase 2: header sync [%d..%d]", fromBlock, remoteTip) if len(catchUpTag.Range) > 0 || len(catchUpTag.BlockNumber) > 0 { - log.Printf("[CatchUpSync] %d missing header range(s) to fetch", len(catchUpTag.Range)) + p2Blocks := tagBlockCount(catchUpTag) + p2Batches := (p2Blocks + constants.MAX_HEADERS_PER_REQUEST - 1) / constants.MAX_HEADERS_PER_REQUEST + log.Printf("[CatchUpSync] phase 2: fetching %d headers across ~%d batches (%d blocks/batch)", + p2Blocks, p2Batches, constants.MAX_HEADERS_PER_REQUEST) + p2Start := time.Now() + stopP2 := watchProgress("[CatchUpSync] phase 2", "headers", p2Blocks, p2Batches, p2Start) _, err = fs.HeaderRouter.HeaderSync( &headersyncpb.HeaderSyncRequest{Tag: catchUpTag}, remotes, false, // syncConfirmation=false: skip Merkle, we know the exact range ) + stopP2() if err != nil { return fmt.Errorf("catchup: header sync: %w", err) } - log.Printf("[CatchUpSync] phase 2 complete") + log.Printf("[CatchUpSync] phase 2 complete: %d headers in %s", + p2Blocks, time.Since(p2Start).Round(time.Millisecond)) } else { log.Printf("[CatchUpSync] phase 2 skipped: all headers present in [%d..%d]", fromBlock, remoteTip) } @@ -180,7 +187,12 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err if len(dataMissingTag.Range) == 0 && len(dataMissingTag.BlockNumber) == 0 { log.Printf("[CatchUpSync] phase 3 skipped: all blocks in [%d..%d] already have data", fromBlock, remoteTip) } else { - log.Printf("[CatchUpSync] phase 3: %d data-missing range(s) to fetch", len(dataMissingTag.Range)) + p3Blocks := tagBlockCount(dataMissingTag) + p3Batches := (p3Blocks + constants.MAX_DATA_PER_REQUEST - 1) / constants.MAX_DATA_PER_REQUEST + log.Printf("[CatchUpSync] phase 3: fetching %d blocks across ~%d batches (%d blocks/batch)", + p3Blocks, p3Batches, constants.MAX_DATA_PER_REQUEST) + p3Start := time.Now() + stopP3 := watchProgress("[CatchUpSync] phase 3", "blocks", p3Blocks, p3Batches, p3Start) dataSyncReq := &datasyncpb.DataSyncRequest{ Tag: dataMissingTag, Version: uint32(commsVersion), @@ -193,10 +205,12 @@ func (fs *FastsyncV2) HandleCatchUpSync(fromBlock uint64, targetPeer string) err }, } taggedAccounts, err = fs.DataRouter.DataSync(dataSyncReq, remotes) + stopP3() if err != nil { return fmt.Errorf("catchup: data sync: %w", err) } - log.Printf("[CatchUpSync] phase 3 complete") + log.Printf("[CatchUpSync] phase 3 complete: %d blocks in %s", + p3Blocks, time.Since(p3Start).Round(time.Millisecond)) } // Always scan local blocks [fromBlock..remoteTip] for tagged accounts and merge @@ -388,6 +402,42 @@ func blockNeedsDataSync(blk *types.ZKBlock) bool { return false } +// tagBlockCount returns the total number of blocks described by a Tag. +// Range entries contribute (end-start+1) each; individual BlockNumbers contribute 1 each. +func tagBlockCount(tag *taggingpb.Tag) uint64 { + if tag == nil { + return 0 + } + var n uint64 + for _, r := range tag.Range { + if r.End >= r.Start { + n += r.End - r.Start + 1 + } + } + n += uint64(len(tag.BlockNumber)) + return n +} + +// watchProgress starts a background goroutine that logs a "[phase] still running…" +// line every 10 s. Call the returned stop func to shut it down before logging completion. +func watchProgress(label, unit string, total, batches uint64, start time.Time) func() { + done := make(chan struct{}) + go func() { + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + for { + select { + case <-done: + return + case <-ticker.C: + log.Printf("%s: still running — %d %s / ~%d batches (elapsed %s)", + label, total, unit, batches, time.Since(start).Round(time.Second)) + } + } + }() + return func() { close(done) } +} + // buildDataMissingTag scans [fromBlock..remoteTip] and returns a Tag covering // blocks that need DataSync (absent or data-incomplete per blockNeedsDataSync). // Consecutive blocks needing DataSync are coalesced into a single RangeTag to