From 52c188da56ce94c89693f2c2bb669d4a8086eecb Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 14 Apr 2026 15:55:03 +0530 Subject: [PATCH 01/37] feat(bitswap): implement batch fetching of blocks and enhance file retrieval process for compatibility wth kubo --- examples/bitswap/bitswap.py | 28 ++-- libp2p/bitswap/cid.py | 11 +- libp2p/bitswap/client.py | 81 +++++++++- libp2p/bitswap/dag.py | 302 ++++++++++++++++++++---------------- 4 files changed, 270 insertions(+), 152 deletions(-) diff --git a/examples/bitswap/bitswap.py b/examples/bitswap/bitswap.py index 1a9c31cac..24437a772 100755 --- a/examples/bitswap/bitswap.py +++ b/examples/bitswap/bitswap.py @@ -76,9 +76,8 @@ async def run_provider(file_path: str, port: int = 0): # Create host host = new_host() - async with host.run(listen_addrs=listen_addrs): - peer_id = host.get_id() - logger.info(f"Peer ID: {peer_id}") + async with host.run(listen_addrs=listen_addrs), trio.open_nursery() as nursery: + logger.info(f"Peer ID: {host.get_id()}") # Get actual listening addresses addrs = host.get_addrs() @@ -91,7 +90,8 @@ async def run_provider(file_path: str, port: int = 0): await bitswap.start() logger.info("✓ Bitswap started") - # Create Merkle DAG + # Set nursery so bitswap can spawn background tasks + bitswap.set_nursery(nursery) dag = MerkleDag(bitswap) logger.info("") @@ -198,13 +198,14 @@ async def run_client( # Create host host = new_host() - async with host.run(listen_addrs=listen_addrs): + async with host.run(listen_addrs=listen_addrs), trio.open_nursery() as nursery: logger.info(f"Client Peer ID: {host.get_id()}") # Start Bitswap bitswap = BitswapClient(host) await bitswap.start() logger.info("✓ Bitswap started") + bitswap.set_nursery(nursery) try: # Connect to provider @@ -214,7 +215,6 @@ async def run_client( await host.connect(peer_info) logger.info("✓ Connected") - # Create Merkle DAG dag = MerkleDag(bitswap) logger.info("") @@ -232,7 +232,7 @@ def progress_callback(current: int, total: int, status: str): # Fetch file with automatic filename extraction try: file_data, filename = await dag.fetch_file( - root_cid, progress_callback=progress_callback + root_cid, progress_callback=progress_callback, timeout=120.0 ) # Show fetch statistics @@ -284,18 +284,18 @@ def progress_callback(current: int, total: int, status: str): logger.info("=" * 70) logger.info(f"Size: {format_size(len(file_data))}") - # Determine output filename + # Determine output filename (priority: metadata > generated) if filename: - output_filename = filename - logger.info(f"Filename: {filename} (from metadata)") + final_filename = filename + logger.info(f"Filename: {final_filename} (from metadata)") else: - output_filename = ( + final_filename = ( f"file_{format_cid_for_display(root_cid, max_len=16)}.bin" ) - logger.info(f"Filename: {output_filename} (no metadata)") + logger.info(f"Filename: {final_filename} (generated from CID)") # Handle filename conflicts - output_file = output_path / output_filename + output_file = output_path / final_filename if output_file.exists(): stem = output_file.stem suffix = output_file.suffix @@ -315,7 +315,9 @@ def progress_callback(current: int, total: int, status: str): except Exception as e: logger.error(f"Failed: {e}") logger.exception("Full traceback:") + raise finally: + pass # Nursery will cleanup background tasks await bitswap.stop() diff --git a/libp2p/bitswap/cid.py b/libp2p/bitswap/cid.py index 9f21d90de..0056d0710 100644 --- a/libp2p/bitswap/cid.py +++ b/libp2p/bitswap/cid.py @@ -209,7 +209,16 @@ def parse_cid(value: CIDInput) -> CIDv0 | CIDv1: return value if isinstance(value, bytes): - return make_cid(value) + try: + return make_cid(value) + except ValueError: + # make_cid(bytes) fails for raw CIDv0 buffers (multihash bytes). + # CIDv0 is simply a bare multihash, so try constructing directly. + try: + return CIDv0(value) + except Exception: + pass + raise if isinstance(value, str): cid_str = value.strip() diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 96913567f..d9380bea9 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -153,6 +153,85 @@ async def add_block(self, cid: CIDInput, data: bytes) -> None: # Notify peers who wanted this block await self._notify_peers_about_block(cid_obj, data) + async def get_blocks_batch( + self, + cids: list[CIDInput], + peer_id: PeerID | None = None, + timeout: float = DEFAULT_TIMEOUT, + batch_size: int = 32, + ) -> dict[bytes, bytes]: + """ + Fetch multiple blocks in batches using a single wantlist per batch. + + Sends all CIDs in one wantlist message, waits for all responses on the + same stream. This avoids opening hundreds of individual streams which + causes Kubo to send GO_AWAY. + + Args: + cids: List of CIDs to fetch + peer_id: Optional specific peer to request from + timeout: Timeout per batch in seconds + batch_size: How many CIDs to request per wantlist message + + Returns: + Dict mapping cid_bytes -> block_data for all successfully fetched blocks + """ + results: dict[bytes, bytes] = {} + cid_objs = [parse_cid(c) for c in cids] + + # Check local store first + remaining: list[CIDObject] = [] + for cid_obj in cid_objs: + data = await self.block_store.get_block(cid_obj) + if data is not None: + results[cid_obj.buffer] = data + else: + remaining.append(cid_obj) + + if not remaining: + return results + + # Process in batches to avoid overwhelming the peer + for batch_start in range(0, len(remaining), batch_size): + batch = remaining[batch_start : batch_start + batch_size] + + # Register pending events for all CIDs in batch + for cid_obj in batch: + if cid_obj not in self._pending_requests: + self._pending_requests[cid_obj] = trio.Event() + await self.want_block(cid_obj, send_dont_have=True) + + # Send all CIDs in a single wantlist to the peer + if peer_id: + await self._send_wantlist_to_peer(peer_id, batch) + else: + await self._broadcast_wantlist(batch) + + # Wait for all blocks in this batch + try: + with trio.fail_after(timeout): + for cid_obj in batch: + if cid_obj in self._pending_requests: + await self._pending_requests[cid_obj].wait() + except trio.TooSlowError: + logger.warning(f"Batch timeout: {len(batch)} blocks, got partial results") + + # Collect results and clean up + for cid_obj in batch: + data = await self.block_store.get_block(cid_obj) + if data is not None: + results[cid_obj.buffer] = data + else: + logger.warning(f"Block not received: {format_cid_for_display(cid_obj)}") + + # Cleanup + if cid_obj in self._pending_requests: + del self._pending_requests[cid_obj] + if cid_obj in self._wantlist: + del self._wantlist[cid_obj] + + return results + async def get_block( self, cid: CIDInput, @@ -286,10 +365,8 @@ async def _request_block( # Send wantlist to peers if peer_id: - logger.info(f" → Sending wantlist to peer {peer_id}") await self._send_wantlist_to_peer(peer_id, [cid]) else: - logger.info(" → Broadcasting wantlist") await self._broadcast_wantlist([cid]) # Wait for block to arrive diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 98ce469db..f2671357f 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -32,6 +32,7 @@ verify_cid, ) from .client import BitswapClient +from .errors import BlockNotFoundError from .dag_pb import ( create_file_node, decode_dag_pb, @@ -417,160 +418,189 @@ async def fetch_file( """ root_cid_bytes = cid_to_bytes(root_cid) - logger.info( - f"Fetching file: {format_cid_for_display(root_cid_bytes, max_len=16)}" - ) - logger.info( - "=== Starting file fetch for CID: " - f"{format_cid_for_display(root_cid_bytes)} ===" - ) + logger.info(f"Fetching file: {format_cid_for_display(root_cid_bytes)}") - # Get root block + # Step 1: Fetch the root block root_data = await self.bitswap.get_block(root_cid_bytes, peer_id, timeout) - - # Verify root block if not verify_cid(root_cid_bytes, root_data): - raise ValueError( - "Root block verification failed: " - f"{format_cid_for_display(root_cid_bytes)}" - ) + raise ValueError(f"Root block CID verification failed: {format_cid_for_display(root_cid_bytes)}") - # Check if it's a directory wrapper (IPFS-standard way for filename) + # Step 2: Handle directory wrapper (produced by `ipfs add --wrap-with-directory`) filename = None actual_file_cid = root_cid_bytes actual_file_data = root_data if is_directory_node(root_data): - logger.info("Root is a directory node, extracting file entry...") - links, _ = decode_dag_pb(root_data) - - if links: - # Get the first (and typically only) file entry - first_link = links[0] - filename = first_link.name if first_link.name else None + logger.info("Root is a directory node — extracting filename and file CID") + dir_links, _ = decode_dag_pb(root_data) + if dir_links: + first_link = dir_links[0] + filename = first_link.name or None actual_file_cid = first_link.cid - - logger.info(f"Extracted filename: {filename}") - logger.info( - f"Actual file CID: " - f"{format_cid_for_display(actual_file_cid, max_len=16)}" - ) - - # Fetch the actual file block - actual_file_data = await self.bitswap.get_block( - actual_file_cid, peer_id, timeout - ) - + logger.info(f"Filename from directory: {filename!r}") + actual_file_data = await self.bitswap.get_block(actual_file_cid, peer_id, timeout) if not verify_cid(actual_file_cid, actual_file_data): - raise ValueError( - "File block verification failed: " - f"{format_cid_for_display(actual_file_cid)}" - ) - - # Now process the actual file data - # Check if it's a DAG-PB file node - if is_file_node(actual_file_data): - logger.debug("Root is a DAG-PB file node, resolving chunks...") - - # Decode to get links and metadata - links, unixfs_data = decode_dag_pb(actual_file_data) - - if not links: - # File with inline data (small file) - logger.debug("File has inline data") - file_data = ( - unixfs_data.data if unixfs_data and unixfs_data.data else b"" - ) - - # Notify progress callback with metadata - if progress_callback: - await _call_progress_callback( - progress_callback, - len(file_data), - len(file_data), - f"metadata: size={len(file_data)}, chunks=0", - ) - - return file_data, filename - - # File with multiple chunks - total_size = unixfs_data.filesize if unixfs_data else 0 - logger.debug(f"File has {len(links)} chunks, total size: {total_size}") - logger.info( - f"Fetching multi-chunk file: {len(links)} chunks, {total_size} bytes" - ) - logger.info("=== Chunk CIDs to fetch ===") - for i, link in enumerate(links): - logger.info( - f" Chunk {i}: {format_cid_for_display(link.cid)} " - f"({link.size} bytes)" - ) - logger.info("=" * 50) - - # Notify progress callback with file metadata at the start + raise ValueError(f"File block CID verification failed: {format_cid_for_display(actual_file_cid)}") + + # Step 3: Handle raw block (not a DAG-PB node at all) + if not is_file_node(actual_file_data): + logger.info(f"Root is a raw block: {len(actual_file_data)} bytes") + return actual_file_data, filename + + # Step 4: Parse the file node + top_links, top_unixfs = decode_dag_pb(actual_file_data) + total_size = (top_unixfs.filesize if top_unixfs else 0) or sum(lnk.size for lnk in top_links) + logger.info(f"File node: {len(top_links)} top-level links, total size={total_size} bytes") + + # Step 5: Small file with inline data (no links) + if not top_links: + file_data = top_unixfs.data if top_unixfs and top_unixfs.data else b"" + logger.info(f"Inline file data: {len(file_data)} bytes") if progress_callback: - await _call_progress_callback( - progress_callback, - 0, - total_size, - f"metadata: size={total_size}, chunks={len(links)}", - ) - - file_data = b"" - bytes_fetched = 0 - - # Fetch each chunk - for i, link in enumerate(links): - if progress_callback: - await _call_progress_callback( - progress_callback, - bytes_fetched, - total_size, - f"fetching chunk {i + 1}/{len(links)}", - ) - - logger.info( - f"Fetching chunk {i + 1}/{len(links)}: " - f"CID={format_cid_for_display(link.cid)}" - ) - - # Fetch chunk - chunk_data = await self.bitswap.get_block(link.cid, peer_id, timeout) - - # Verify chunk - if not verify_cid(link.cid, chunk_data): - raise ValueError( - f"Chunk verification failed: {format_cid_for_display(link.cid)}" - ) + await _call_progress_callback(progress_callback, len(file_data), len(file_data), "completed") + return file_data, filename - file_data += chunk_data - bytes_fetched += len(chunk_data) + # Step 6: Collect all leaf CIDs without opening streams for intermediate nodes + # Strategy: Recursively batch-fetch all DAG nodes, then traverse locally to collect leaves + + logger.info(f"[DAG] Recursively batch-fetching DAG tree ({len(top_links)} top links)...") + print(f"[FETCH] Recursively batch-fetching DAG tree ({len(top_links)} top links)...", flush=True) + + # Map to store ALL fetched blocks (both intermediate and leaves) + all_blocks_map: dict[bytes, bytes] = {} + + async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: + """Recursively batch-fetch a level of DAG nodes and queue their children.""" + if not cid_list: + return + + logger.info(f"[DAG] Depth {depth}: batch-fetching {len(cid_list)} blocks...") + print(f"[FETCH] Depth {depth}: batch-fetching {len(cid_list)} blocks...", flush=True) + + # Batch-fetch this level's blocks + level_blocks = await self.bitswap.get_blocks_batch( + cid_list, peer_id=peer_id, timeout=timeout, batch_size=32 + ) + logger.info(f"[DAG] Depth {depth}: ✓ received {len(level_blocks)} blocks") + all_blocks_map.update(level_blocks) + + # Collect child CIDs for recursion + child_cids: list[bytes] = [] + for cid_bytes in cid_list: + block_data = level_blocks.get(cid_bytes) + if block_data is None: + logger.warning(f"[DAG] Depth {depth}: block {format_cid_for_display(cid_bytes)} missing after fetch") + continue + + if is_file_node(block_data): + node_links, _ = decode_dag_pb(block_data) + logger.debug(f"[DAG] Depth {depth}: {format_cid_for_display(cid_bytes)} has {len(node_links)} children") + for link in node_links: + child_cids.append(link.cid) + + # Recursively fetch next level if there are children + if child_cids: + logger.info(f"[DAG] Depth {depth}: found {len(child_cids)} child CIDs, fetching next level...") + await _batch_fetch_tree(child_cids, depth + 1) + + # Starting from the top-level links + await _batch_fetch_tree([top_link.cid for top_link in top_links], depth=1) + logger.info(f"[DAG] ✓ Tree fetch complete: {len(all_blocks_map)} total blocks") + print(f"[FETCH] ✓ Tree fetch complete: {len(all_blocks_map)} total blocks", flush=True) + + # Now traverse locally to collect leaf CIDs in order + ordered_leaf_cids: list[bytes] = [] + + def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: + """Traverse locally-fetched blocks to collect leaf CIDs.""" + block_data = all_blocks_map.get(cid_bytes) + if block_data is None: + logger.warning(f"[DAG] Depth {depth}: block {format_cid_for_display(cid_bytes)} not in map") + return + + if not is_file_node(block_data): + # Raw block - it's a leaf + logger.debug(f"[DAG] Depth {depth}: raw block (leaf)") + ordered_leaf_cids.append(cid_bytes) + return + + node_links, _ = decode_dag_pb(block_data) + logger.debug(f"[DAG] Depth {depth}: {len(node_links)} links") + + if not node_links: + # Leaf node (no children, data is inline in UnixFS) + logger.debug(f"[DAG] Depth {depth}: file node with inline data (leaf)") + ordered_leaf_cids.append(cid_bytes) + return + + # Intermediate node - recursively process children + for j, child_link in enumerate(node_links): + logger.debug(f"[DAG] Depth {depth}: processing child {j+1}/{len(node_links)}") + _collect_leaves_local(child_link.cid, depth + 1) + + # Traverse each top-level block + for i, top_link in enumerate(top_links): + logger.info(f"[DAG] Traversing top-level {i+1}/{len(top_links)}...") + _collect_leaves_local(top_link.cid, depth=1) + + logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") + + # Step 7: Batch-fetch all leaf blocks (single wantlist per batch → avoids GO_AWAY) + if progress_callback: + await _call_progress_callback( + progress_callback, 0, total_size, + f"fetching {len(ordered_leaf_cids)} leaf blocks in batches" + ) - logger.info( - f"✓ Chunk {i + 1} fetched and verified: " - f"{len(chunk_data)} bytes (total: {bytes_fetched}/{total_size})" - ) - logger.debug( - f"Fetched chunk {i + 1}/{len(links)}: " - f"{format_cid_for_display(link.cid, max_len=16)} " - f"({len(chunk_data)} bytes)" - ) + logger.info(f"[DAG] Starting batch fetch of {len(ordered_leaf_cids)} leaves with batch_size=32, timeout={timeout}s") + print(f"[FETCH] Batch fetching {len(ordered_leaf_cids)} leaves (batch_size=32, timeout={timeout}s)", flush=True) + block_map = await self.bitswap.get_blocks_batch( + ordered_leaf_cids, peer_id=peer_id, timeout=timeout, batch_size=32 + ) + logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") + print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) + + # Step 8: Reassemble data in order, extracting UnixFS inline data from leaf nodes + file_data = b"" + bytes_fetched = 0 + missing_blocks = [] + for idx, leaf_cid in enumerate(ordered_leaf_cids): + leaf_raw = block_map.get(bytes(leaf_cid)) + if leaf_raw is None: + logger.error(f"[DAG] Leaf block {idx+1}/{len(ordered_leaf_cids)} MISSING: {format_cid_for_display(leaf_cid)}") + print(f"[FETCH] ✗ Leaf {idx+1}/{len(ordered_leaf_cids)} MISSING", flush=True) + missing_blocks.append(leaf_cid) + continue + + # Extract data: leaf blocks are UnixFS file nodes with inline data + if is_file_node(leaf_raw): + _, leaf_unixfs = decode_dag_pb(leaf_raw) + chunk = leaf_unixfs.data if leaf_unixfs and leaf_unixfs.data else b"" + logger.debug(f"[DAG] Leaf {idx+1}: extracted {len(chunk)} bytes from file node") + else: + chunk = leaf_raw + logger.debug(f"[DAG] Leaf {idx+1}: raw block {len(chunk)} bytes") + + file_data += chunk + bytes_fetched += len(chunk) + + if (idx + 1) % 10 == 0 or idx == len(ordered_leaf_cids) - 1: + logger.info(f"[DAG] Reassembled {idx+1}/{len(ordered_leaf_cids)} leaves: {bytes_fetched}/{total_size} bytes") + print(f"[FETCH] Reassembled {idx+1}/{len(ordered_leaf_cids)} leaves: {bytes_fetched}/{total_size} bytes", flush=True) if progress_callback: - await _call_progress_callback( - progress_callback, total_size, total_size, "completed" - ) + await _call_progress_callback(progress_callback, bytes_fetched, total_size, "downloading") - logger.info("=== File fetch completed ===") - logger.info(f"Total bytes fetched: {len(file_data)}") - logger.info(f"All {len(links)} chunks verified successfully") - logger.info("=" * 50) - logger.info(f"Fetched file: {len(file_data)} bytes") - return file_data, filename + if missing_blocks: + logger.error(f"[DAG] ✗ {len(missing_blocks)} blocks missing after batch fetch!") + raise BlockNotFoundError(f"{len(missing_blocks)} leaf blocks missing: {[format_cid_for_display(cid) for cid in missing_blocks[:5]]}...") + + if progress_callback: + await _call_progress_callback(progress_callback, total_size, total_size, "completed") - # Not a DAG-PB file node - return as raw data - logger.debug("Root is a raw block, returning directly") - return actual_file_data, filename + logger.info(f"[DAG] ✓ File fetch complete: {len(file_data)} bytes, filename={filename!r}") + print(f"[FETCH] ✓ DOWNLOAD COMPLETE: {len(file_data)} bytes", flush=True) + return file_data, filename async def get_file_info( self, root_cid: CIDInput, peer_id: PeerID | None = None, timeout: float = 30.0 From c7d74b0ee619eb10428fc7c9c8411724ccd00ac7 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sat, 18 Apr 2026 17:39:18 +0530 Subject: [PATCH 02/37] feat: enhance DHT record handling with signing and verification for icompatibility with ipfs kubo - Added support for signed records in the DHT by introducing `make_signed_put_record` function. - Updated `ValueStore` to create signed records when storing values. - Enhanced `Envelope` class to handle raw payload types for peer records. - Introduced utility functions for signing and verifying DHT records. - Updated protobuf definitions to include author and signature fields in records. - Improved logging and debug messages for better traceability. --- libp2p/bitswap/client.py | 7 +- libp2p/bitswap/dag.py | 180 ++++++++++++++++-------- libp2p/kad_dht/__init__.py | 2 + libp2p/kad_dht/kad_dht.py | 4 +- libp2p/kad_dht/pb/kademlia.proto | 6 +- libp2p/kad_dht/pb/kademlia_pb2.py | 38 ++--- libp2p/kad_dht/pb/kademlia_pb2.pyi | 218 ++++++++++------------------- libp2p/kad_dht/value_store.py | 16 ++- libp2p/peer/envelope.py | 61 +++++--- libp2p/peer/peer_record.py | 7 +- libp2p/records/record.py | 34 +++++ libp2p/records/utils.py | 52 +++++++ 12 files changed, 364 insertions(+), 261 deletions(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index d9380bea9..8534c5444 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -175,6 +175,7 @@ async def get_blocks_batch( Returns: Dict mapping cid_bytes -> block_data for all successfully fetched blocks + """ results: dict[bytes, bytes] = {} cid_objs = [parse_cid(c) for c in cids] @@ -214,7 +215,8 @@ async def get_blocks_batch( if cid_obj in self._pending_requests: await self._pending_requests[cid_obj].wait() except trio.TooSlowError: - logger.warning(f"Batch timeout: {len(batch)} blocks, got partial results") + msg = f"Batch timeout: {len(batch)} blocks, got partial results" + logger.warning(msg) # Collect results and clean up for cid_obj in batch: @@ -222,7 +224,8 @@ async def get_blocks_batch( if data is not None: results[cid_obj.buffer] = data else: - logger.warning(f"Block not received: {format_cid_for_display(cid_obj)}") + cid_str = format_cid_for_display(cid_obj) + logger.warning(f"Block not received: {cid_str}") # Cleanup if cid_obj in self._pending_requests: diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index f2671357f..3112f21e5 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -32,13 +32,13 @@ verify_cid, ) from .client import BitswapClient -from .errors import BlockNotFoundError from .dag_pb import ( create_file_node, decode_dag_pb, is_directory_node, is_file_node, ) +from .errors import BlockNotFoundError logger = logging.getLogger(__name__) @@ -423,9 +423,11 @@ async def fetch_file( # Step 1: Fetch the root block root_data = await self.bitswap.get_block(root_cid_bytes, peer_id, timeout) if not verify_cid(root_cid_bytes, root_data): - raise ValueError(f"Root block CID verification failed: {format_cid_for_display(root_cid_bytes)}") + root_cid_str = format_cid_for_display(root_cid_bytes) + raise ValueError(f"Root block CID verification failed: {root_cid_str}") - # Step 2: Handle directory wrapper (produced by `ipfs add --wrap-with-directory`) + # Step 2: Handle directory wrapper + # (produced by `ipfs add --wrap-with-directory`) filename = None actual_file_cid = root_cid_bytes actual_file_data = root_data @@ -438,9 +440,13 @@ async def fetch_file( filename = first_link.name or None actual_file_cid = first_link.cid logger.info(f"Filename from directory: {filename!r}") - actual_file_data = await self.bitswap.get_block(actual_file_cid, peer_id, timeout) + actual_file_data = await self.bitswap.get_block( + actual_file_cid, peer_id, timeout + ) if not verify_cid(actual_file_cid, actual_file_data): - raise ValueError(f"File block CID verification failed: {format_cid_for_display(actual_file_cid)}") + f_cid_str = format_cid_for_display(actual_file_cid) + err_msg = f"File block CID verification failed: {f_cid_str}" + raise ValueError(err_msg) # Step 3: Handle raw block (not a DAG-PB node at all) if not is_file_node(actual_file_data): @@ -449,157 +455,211 @@ async def fetch_file( # Step 4: Parse the file node top_links, top_unixfs = decode_dag_pb(actual_file_data) - total_size = (top_unixfs.filesize if top_unixfs else 0) or sum(lnk.size for lnk in top_links) - logger.info(f"File node: {len(top_links)} top-level links, total size={total_size} bytes") + filesize = top_unixfs.filesize if top_unixfs else 0 + total_size = filesize or sum(lnk.size for lnk in top_links) + msg = f"File node: {len(top_links)} top-level links, total size={total_size}" + logger.info(f"{msg} bytes") # Step 5: Small file with inline data (no links) if not top_links: file_data = top_unixfs.data if top_unixfs and top_unixfs.data else b"" logger.info(f"Inline file data: {len(file_data)} bytes") if progress_callback: - await _call_progress_callback(progress_callback, len(file_data), len(file_data), "completed") + data_len = len(file_data) + await _call_progress_callback( + progress_callback, data_len, data_len, "completed" + ) return file_data, filename - # Step 6: Collect all leaf CIDs without opening streams for intermediate nodes - # Strategy: Recursively batch-fetch all DAG nodes, then traverse locally to collect leaves - - logger.info(f"[DAG] Recursively batch-fetching DAG tree ({len(top_links)} top links)...") - print(f"[FETCH] Recursively batch-fetching DAG tree ({len(top_links)} top links)...", flush=True) - + # Step 6: Collect all leaf CIDs without opening streams + # Strategy: Recursively batch-fetch all DAG nodes + # then traverse locally to collect leaves + + top_len = len(top_links) + msg1 = f"[DAG] Recursively batch-fetching DAG tree ({top_len} top links)..." + logger.info(msg1) + msg2 = f"[FETCH] Recursively batch-fetching DAG tree ({top_len} top links)..." + print(msg2, flush=True) + # Map to store ALL fetched blocks (both intermediate and leaves) all_blocks_map: dict[bytes, bytes] = {} - + async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: """Recursively batch-fetch a level of DAG nodes and queue their children.""" if not cid_list: return - - logger.info(f"[DAG] Depth {depth}: batch-fetching {len(cid_list)} blocks...") - print(f"[FETCH] Depth {depth}: batch-fetching {len(cid_list)} blocks...", flush=True) - + + c_count = len(cid_list) + msg1 = f"[DAG] Depth {depth}: batch-fetching {c_count} blocks..." + logger.info(msg1) + msg2 = f"[FETCH] Depth {depth}: batch-fetching {c_count} blocks..." + print(msg2, flush=True) + # Batch-fetch this level's blocks level_blocks = await self.bitswap.get_blocks_batch( - cid_list, peer_id=peer_id, timeout=timeout, batch_size=32 + list(cid_list), peer_id=peer_id, timeout=timeout, batch_size=32 ) logger.info(f"[DAG] Depth {depth}: ✓ received {len(level_blocks)} blocks") all_blocks_map.update(level_blocks) - + # Collect child CIDs for recursion child_cids: list[bytes] = [] for cid_bytes in cid_list: block_data = level_blocks.get(cid_bytes) if block_data is None: - logger.warning(f"[DAG] Depth {depth}: block {format_cid_for_display(cid_bytes)} missing after fetch") + c_str = format_cid_for_display(cid_bytes) + msg = f"[DAG] Depth {depth}: block {c_str} missing after" + logger.warning(f"{msg} fetch") continue - + if is_file_node(block_data): node_links, _ = decode_dag_pb(block_data) - logger.debug(f"[DAG] Depth {depth}: {format_cid_for_display(cid_bytes)} has {len(node_links)} children") + cid_str = format_cid_for_display(cid_bytes) + msg = f"[DAG] Depth {depth}: {cid_str} has {len(node_links)}" + logger.debug(f"{msg} children") for link in node_links: child_cids.append(link.cid) - + # Recursively fetch next level if there are children if child_cids: - logger.info(f"[DAG] Depth {depth}: found {len(child_cids)} child CIDs, fetching next level...") + ch_count = len(child_cids) + msg = f"[DAG] Depth {depth}: found {ch_count} child CIDs" + logger.info(f"{msg}, fetching next level...") await _batch_fetch_tree(child_cids, depth + 1) - + # Starting from the top-level links await _batch_fetch_tree([top_link.cid for top_link in top_links], depth=1) - logger.info(f"[DAG] ✓ Tree fetch complete: {len(all_blocks_map)} total blocks") - print(f"[FETCH] ✓ Tree fetch complete: {len(all_blocks_map)} total blocks", flush=True) - + blocks_count = len(all_blocks_map) + logger.info(f"[DAG] ✓ Tree fetch complete: {blocks_count} total blocks") + print(f"[FETCH] ✓ Tree fetch complete: {blocks_count} total blocks", flush=True) + # Now traverse locally to collect leaf CIDs in order ordered_leaf_cids: list[bytes] = [] - + def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: """Traverse locally-fetched blocks to collect leaf CIDs.""" block_data = all_blocks_map.get(cid_bytes) if block_data is None: - logger.warning(f"[DAG] Depth {depth}: block {format_cid_for_display(cid_bytes)} not in map") + cid_str = format_cid_for_display(cid_bytes) + logger.warning(f"[DAG] Depth {depth}: block {cid_str} not in map") return - + if not is_file_node(block_data): # Raw block - it's a leaf logger.debug(f"[DAG] Depth {depth}: raw block (leaf)") ordered_leaf_cids.append(cid_bytes) return - + node_links, _ = decode_dag_pb(block_data) logger.debug(f"[DAG] Depth {depth}: {len(node_links)} links") - + if not node_links: # Leaf node (no children, data is inline in UnixFS) logger.debug(f"[DAG] Depth {depth}: file node with inline data (leaf)") ordered_leaf_cids.append(cid_bytes) return - + # Intermediate node - recursively process children for j, child_link in enumerate(node_links): - logger.debug(f"[DAG] Depth {depth}: processing child {j+1}/{len(node_links)}") + c_idx = j + 1 + c_tot = len(node_links) + msg = f"[DAG] Depth {depth}: processing child {c_idx}/{c_tot}" + logger.debug(msg) _collect_leaves_local(child_link.cid, depth + 1) - + # Traverse each top-level block for i, top_link in enumerate(top_links): - logger.info(f"[DAG] Traversing top-level {i+1}/{len(top_links)}...") + logger.info(f"[DAG] Traversing top-level {i + 1}/{len(top_links)}...") _collect_leaves_local(top_link.cid, depth=1) - + logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") - # Step 7: Batch-fetch all leaf blocks (single wantlist per batch → avoids GO_AWAY) + # Step 7: Batch-fetch all leaf blocks + # (single wantlist per batch → avoids GO_AWAY) if progress_callback: await _call_progress_callback( - progress_callback, 0, total_size, - f"fetching {len(ordered_leaf_cids)} leaf blocks in batches" + progress_callback, + 0, + total_size, + f"fetching {len(ordered_leaf_cids)} leaf blocks in batches", ) - logger.info(f"[DAG] Starting batch fetch of {len(ordered_leaf_cids)} leaves with batch_size=32, timeout={timeout}s") - print(f"[FETCH] Batch fetching {len(ordered_leaf_cids)} leaves (batch_size=32, timeout={timeout}s)", flush=True) + l_count = len(ordered_leaf_cids) + msg1 = f"[DAG] Starting batch fetch of {l_count} leaves with batch_size=32" + logger.info(f"{msg1}, timeout={timeout}s") + msg2 = ( + f"[FETCH] Batch fetching {l_count} leaves " + f"(batch_size=32, timeout={timeout}s)" + ) + print(msg2, flush=True) block_map = await self.bitswap.get_blocks_batch( - ordered_leaf_cids, peer_id=peer_id, timeout=timeout, batch_size=32 + list(ordered_leaf_cids), peer_id=peer_id, timeout=timeout, batch_size=32 ) logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) - # Step 8: Reassemble data in order, extracting UnixFS inline data from leaf nodes + # Step 8: Reassemble data in order + # extracting UnixFS inline data from leaf nodes file_data = b"" bytes_fetched = 0 missing_blocks = [] for idx, leaf_cid in enumerate(ordered_leaf_cids): leaf_raw = block_map.get(bytes(leaf_cid)) if leaf_raw is None: - logger.error(f"[DAG] Leaf block {idx+1}/{len(ordered_leaf_cids)} MISSING: {format_cid_for_display(leaf_cid)}") - print(f"[FETCH] ✗ Leaf {idx+1}/{len(ordered_leaf_cids)} MISSING", flush=True) + l_idx = idx + 1 + t_leaves = len(ordered_leaf_cids) + c_str = format_cid_for_display(leaf_cid) + msg = f"[DAG] Leaf block {l_idx}/{t_leaves} MISSING: {c_str}" + logger.error(msg) + print(f"[FETCH] ✗ Leaf {l_idx}/{t_leaves} MISSING", flush=True) missing_blocks.append(leaf_cid) continue # Extract data: leaf blocks are UnixFS file nodes with inline data if is_file_node(leaf_raw): _, leaf_unixfs = decode_dag_pb(leaf_raw) - chunk = leaf_unixfs.data if leaf_unixfs and leaf_unixfs.data else b"" - logger.debug(f"[DAG] Leaf {idx+1}: extracted {len(chunk)} bytes from file node") + if leaf_unixfs is not None and leaf_unixfs.data: + chunk = leaf_unixfs.data + else: + chunk = b"" + chunk_len = len(chunk) + msg = f"[DAG] Leaf {idx + 1}: extracted {chunk_len} bytes" + logger.debug(f"{msg} from file node") else: chunk = leaf_raw - logger.debug(f"[DAG] Leaf {idx+1}: raw block {len(chunk)} bytes") + logger.debug(f"[DAG] Leaf {idx + 1}: raw block {len(chunk)} bytes") file_data += chunk bytes_fetched += len(chunk) if (idx + 1) % 10 == 0 or idx == len(ordered_leaf_cids) - 1: - logger.info(f"[DAG] Reassembled {idx+1}/{len(ordered_leaf_cids)} leaves: {bytes_fetched}/{total_size} bytes") - print(f"[FETCH] Reassembled {idx+1}/{len(ordered_leaf_cids)} leaves: {bytes_fetched}/{total_size} bytes", flush=True) + i_p = idx + 1 + t_l = len(ordered_leaf_cids) + p_str = f"{bytes_fetched}/{total_size} bytes" + logger.info(f"[DAG] Reassembled {i_p}/{t_l} leaves: {p_str}") + print(f"[FETCH] Reassembled {i_p}/{t_l} leaves: {p_str}", flush=True) if progress_callback: - await _call_progress_callback(progress_callback, bytes_fetched, total_size, "downloading") + await _call_progress_callback( + progress_callback, bytes_fetched, total_size, "downloading" + ) if missing_blocks: - logger.error(f"[DAG] ✗ {len(missing_blocks)} blocks missing after batch fetch!") - raise BlockNotFoundError(f"{len(missing_blocks)} leaf blocks missing: {[format_cid_for_display(cid) for cid in missing_blocks[:5]]}...") + missing_count = len(missing_blocks) + logger.error(f"[DAG] ✗ {missing_count} blocks missing after batch fetch!") + missing_list = [format_cid_for_display(cid) for cid in missing_blocks[:5]] + msg = f"{missing_count} leaf blocks missing: {missing_list}..." + raise BlockNotFoundError(msg) if progress_callback: - await _call_progress_callback(progress_callback, total_size, total_size, "completed") + await _call_progress_callback( + progress_callback, total_size, total_size, "completed" + ) - logger.info(f"[DAG] ✓ File fetch complete: {len(file_data)} bytes, filename={filename!r}") - print(f"[FETCH] ✓ DOWNLOAD COMPLETE: {len(file_data)} bytes", flush=True) + file_len = len(file_data) + msg = f"[DAG] ✓ File fetch complete: {file_len} bytes, filename={filename!r}" + logger.info(msg) + print(f"[FETCH] ✓ DOWNLOAD COMPLETE: {file_len} bytes", flush=True) return file_data, filename async def get_file_info( diff --git a/libp2p/kad_dht/__init__.py b/libp2p/kad_dht/__init__.py index 690d37bae..cf58e878f 100644 --- a/libp2p/kad_dht/__init__.py +++ b/libp2p/kad_dht/__init__.py @@ -7,6 +7,7 @@ from .kad_dht import ( KadDHT, + DHTMode, ) from .peer_routing import ( PeerRouting, @@ -23,6 +24,7 @@ __all__ = [ "KadDHT", + "DHTMode", "RoutingTable", "PeerRouting", "ValueStore", diff --git a/libp2p/kad_dht/kad_dht.py b/libp2p/kad_dht/kad_dht.py index 01aa23afc..bb11f1cb6 100644 --- a/libp2p/kad_dht/kad_dht.py +++ b/libp2p/kad_dht/kad_dht.py @@ -1058,7 +1058,7 @@ async def query_one(peer: ID) -> None: values = [rec.value for _p, rec in valid_records] best_idx = self.validator.select(key, values) logger.debug( - f"Selected best value at index {best_idx}using validator.select()" + f"Selected best value at index {best_idx} using validator.select()" ) best_peer, best_rec = valid_records[best_idx] @@ -1074,7 +1074,7 @@ async def query_one(peer: ID) -> None: if outdated_peers: logger.debug( - f"Propagating best value to {len(outdated_peers)}" + f"Propagating best value to {len(outdated_peers)} " "peers with outdated values" ) diff --git a/libp2p/kad_dht/pb/kademlia.proto b/libp2p/kad_dht/pb/kademlia.proto index 8d66cca5c..93fe526c3 100644 --- a/libp2p/kad_dht/pb/kademlia.proto +++ b/libp2p/kad_dht/pb/kademlia.proto @@ -4,6 +4,11 @@ message Record { bytes key = 1; bytes value = 2; string timeReceived = 5; + // author is the serialized public key of the record author (for unsigned records) + optional bytes author = 3; + // signature is the Ed25519/Secp256k1 signature over the record + // signing payload: "libp2p-record:" + key + value + optional bytes signature = 4; }; message Message { @@ -39,4 +44,3 @@ message Message { optional bytes senderRecord = 11; // Envelope(PeerRecord) encoded } -` diff --git a/libp2p/kad_dht/pb/kademlia_pb2.py b/libp2p/kad_dht/pb/kademlia_pb2.py index e41bb5292..19b4c2ca2 100644 --- a/libp2p/kad_dht/pb/kademlia_pb2.py +++ b/libp2p/kad_dht/pb/kademlia_pb2.py @@ -1,22 +1,12 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# NO CHECKED-IN PROTOBUF GENCODE # source: libp2p/kad_dht/pb/kademlia.proto -# Protobuf Python Version: 5.29.3 +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import runtime_version as _runtime_version from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder -_runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, - 5, - 29, - 3, - '', - 'libp2p/kad_dht/pb/kademlia.proto' -) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -24,21 +14,21 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n libp2p/kad_dht/pb/kademlia.proto\":\n\x06Record\x12\x0b\n\x03key\x18\x01 \x01(\x0c\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x14\n\x0ctimeReceived\x18\x05 \x01(\t\"\xa2\x04\n\x07Message\x12\"\n\x04type\x18\x01 \x01(\x0e\x32\x14.Message.MessageType\x12\x17\n\x0f\x63lusterLevelRaw\x18\n \x01(\x05\x12\x0b\n\x03key\x18\x02 \x01(\x0c\x12\x17\n\x06record\x18\x03 \x01(\x0b\x32\x07.Record\x12\"\n\x0b\x63loserPeers\x18\x08 \x03(\x0b\x32\r.Message.Peer\x12$\n\rproviderPeers\x18\t \x03(\x0b\x32\r.Message.Peer\x12\x19\n\x0csenderRecord\x18\x0b \x01(\x0cH\x00\x88\x01\x01\x1az\n\x04Peer\x12\n\n\x02id\x18\x01 \x01(\x0c\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x03(\x0c\x12+\n\nconnection\x18\x03 \x01(\x0e\x32\x17.Message.ConnectionType\x12\x19\n\x0csignedRecord\x18\x04 \x01(\x0cH\x00\x88\x01\x01\x42\x0f\n\r_signedRecord\"i\n\x0bMessageType\x12\r\n\tPUT_VALUE\x10\x00\x12\r\n\tGET_VALUE\x10\x01\x12\x10\n\x0c\x41\x44\x44_PROVIDER\x10\x02\x12\x11\n\rGET_PROVIDERS\x10\x03\x12\r\n\tFIND_NODE\x10\x04\x12\x08\n\x04PING\x10\x05\"W\n\x0e\x43onnectionType\x12\x11\n\rNOT_CONNECTED\x10\x00\x12\r\n\tCONNECTED\x10\x01\x12\x0f\n\x0b\x43\x41N_CONNECT\x10\x02\x12\x12\n\x0e\x43\x41NNOT_CONNECT\x10\x03\x42\x0f\n\r_senderRecordb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n libp2p/kad_dht/pb/kademlia.proto\"\x80\x01\n\x06Record\x12\x0b\n\x03key\x18\x01 \x01(\x0c\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x14\n\x0ctimeReceived\x18\x05 \x01(\t\x12\x13\n\x06\x61uthor\x18\x03 \x01(\x0cH\x00\x88\x01\x01\x12\x16\n\tsignature\x18\x04 \x01(\x0cH\x01\x88\x01\x01\x42\t\n\x07_authorB\x0c\n\n_signature\"\xa2\x04\n\x07Message\x12\"\n\x04type\x18\x01 \x01(\x0e\x32\x14.Message.MessageType\x12\x17\n\x0f\x63lusterLevelRaw\x18\n \x01(\x05\x12\x0b\n\x03key\x18\x02 \x01(\x0c\x12\x17\n\x06record\x18\x03 \x01(\x0b\x32\x07.Record\x12\"\n\x0b\x63loserPeers\x18\x08 \x03(\x0b\x32\r.Message.Peer\x12$\n\rproviderPeers\x18\t \x03(\x0b\x32\r.Message.Peer\x12\x19\n\x0csenderRecord\x18\x0b \x01(\x0cH\x00\x88\x01\x01\x1az\n\x04Peer\x12\n\n\x02id\x18\x01 \x01(\x0c\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x03(\x0c\x12+\n\nconnection\x18\x03 \x01(\x0e\x32\x17.Message.ConnectionType\x12\x19\n\x0csignedRecord\x18\x04 \x01(\x0cH\x00\x88\x01\x01\x42\x0f\n\r_signedRecord\"i\n\x0bMessageType\x12\r\n\tPUT_VALUE\x10\x00\x12\r\n\tGET_VALUE\x10\x01\x12\x10\n\x0c\x41\x44\x44_PROVIDER\x10\x02\x12\x11\n\rGET_PROVIDERS\x10\x03\x12\r\n\tFIND_NODE\x10\x04\x12\x08\n\x04PING\x10\x05\"W\n\x0e\x43onnectionType\x12\x11\n\rNOT_CONNECTED\x10\x00\x12\r\n\tCONNECTED\x10\x01\x12\x0f\n\x0b\x43\x41N_CONNECT\x10\x02\x12\x12\n\x0e\x43\x41NNOT_CONNECT\x10\x03\x42\x0f\n\r_senderRecordb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'libp2p.kad_dht.pb.kademlia_pb2', _globals) -if not _descriptor._USE_C_DESCRIPTORS: - DESCRIPTOR._loaded_options = None - _globals['_RECORD']._serialized_start=36 - _globals['_RECORD']._serialized_end=94 - _globals['_MESSAGE']._serialized_start=97 - _globals['_MESSAGE']._serialized_end=643 - _globals['_MESSAGE_PEER']._serialized_start=308 - _globals['_MESSAGE_PEER']._serialized_end=430 - _globals['_MESSAGE_MESSAGETYPE']._serialized_start=432 - _globals['_MESSAGE_MESSAGETYPE']._serialized_end=537 - _globals['_MESSAGE_CONNECTIONTYPE']._serialized_start=539 - _globals['_MESSAGE_CONNECTIONTYPE']._serialized_end=626 +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _globals['_RECORD']._serialized_start=37 + _globals['_RECORD']._serialized_end=165 + _globals['_MESSAGE']._serialized_start=168 + _globals['_MESSAGE']._serialized_end=714 + _globals['_MESSAGE_PEER']._serialized_start=379 + _globals['_MESSAGE_PEER']._serialized_end=501 + _globals['_MESSAGE_MESSAGETYPE']._serialized_start=503 + _globals['_MESSAGE_MESSAGETYPE']._serialized_end=608 + _globals['_MESSAGE_CONNECTIONTYPE']._serialized_start=610 + _globals['_MESSAGE_CONNECTIONTYPE']._serialized_end=697 # @@protoc_insertion_point(module_scope) diff --git a/libp2p/kad_dht/pb/kademlia_pb2.pyi b/libp2p/kad_dht/pb/kademlia_pb2.pyi index 641ae66ae..9caf65ec9 100644 --- a/libp2p/kad_dht/pb/kademlia_pb2.pyi +++ b/libp2p/kad_dht/pb/kademlia_pb2.pyi @@ -1,144 +1,74 @@ -""" -@generated by mypy-protobuf. Do not edit manually! -isort:skip_file -""" - -import builtins -import collections.abc -import google.protobuf.descriptor -import google.protobuf.internal.containers -import google.protobuf.internal.enum_type_wrapper -import google.protobuf.message -import sys -import typing - -if sys.version_info >= (3, 10): - import typing as typing_extensions -else: - import typing_extensions - -DESCRIPTOR: google.protobuf.descriptor.FileDescriptor - -@typing.final -class Record(google.protobuf.message.Message): - DESCRIPTOR: google.protobuf.descriptor.Descriptor - - KEY_FIELD_NUMBER: builtins.int - VALUE_FIELD_NUMBER: builtins.int - TIMERECEIVED_FIELD_NUMBER: builtins.int - key: builtins.bytes - value: builtins.bytes - timeReceived: builtins.str - def __init__( - self, - *, - key: builtins.bytes = ..., - value: builtins.bytes = ..., - timeReceived: builtins.str = ..., - ) -> None: ... - def ClearField(self, field_name: typing.Literal["key", b"key", "timeReceived", b"timeReceived", "value", b"value"]) -> None: ... - -global___Record = Record - -@typing.final -class Message(google.protobuf.message.Message): - DESCRIPTOR: google.protobuf.descriptor.Descriptor - - class _MessageType: - ValueType = typing.NewType("ValueType", builtins.int) - V: typing_extensions.TypeAlias = ValueType - - class _MessageTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Message._MessageType.ValueType], builtins.type): - DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor - PUT_VALUE: Message._MessageType.ValueType # 0 - GET_VALUE: Message._MessageType.ValueType # 1 - ADD_PROVIDER: Message._MessageType.ValueType # 2 - GET_PROVIDERS: Message._MessageType.ValueType # 3 - FIND_NODE: Message._MessageType.ValueType # 4 - PING: Message._MessageType.ValueType # 5 - - class MessageType(_MessageType, metaclass=_MessageTypeEnumTypeWrapper): ... - PUT_VALUE: Message.MessageType.ValueType # 0 - GET_VALUE: Message.MessageType.ValueType # 1 - ADD_PROVIDER: Message.MessageType.ValueType # 2 - GET_PROVIDERS: Message.MessageType.ValueType # 3 - FIND_NODE: Message.MessageType.ValueType # 4 - PING: Message.MessageType.ValueType # 5 - - class _ConnectionType: - ValueType = typing.NewType("ValueType", builtins.int) - V: typing_extensions.TypeAlias = ValueType - - class _ConnectionTypeEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Message._ConnectionType.ValueType], builtins.type): - DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor - NOT_CONNECTED: Message._ConnectionType.ValueType # 0 - CONNECTED: Message._ConnectionType.ValueType # 1 - CAN_CONNECT: Message._ConnectionType.ValueType # 2 - CANNOT_CONNECT: Message._ConnectionType.ValueType # 3 - - class ConnectionType(_ConnectionType, metaclass=_ConnectionTypeEnumTypeWrapper): ... - NOT_CONNECTED: Message.ConnectionType.ValueType # 0 - CONNECTED: Message.ConnectionType.ValueType # 1 - CAN_CONNECT: Message.ConnectionType.ValueType # 2 - CANNOT_CONNECT: Message.ConnectionType.ValueType # 3 - - @typing.final - class Peer(google.protobuf.message.Message): - DESCRIPTOR: google.protobuf.descriptor.Descriptor - - ID_FIELD_NUMBER: builtins.int - ADDRS_FIELD_NUMBER: builtins.int - CONNECTION_FIELD_NUMBER: builtins.int - SIGNEDRECORD_FIELD_NUMBER: builtins.int - id: builtins.bytes - connection: global___Message.ConnectionType.ValueType - signedRecord: builtins.bytes - """Envelope(PeerRecord) encoded""" - @property - def addrs(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.bytes]: ... - def __init__( - self, - *, - id: builtins.bytes = ..., - addrs: collections.abc.Iterable[builtins.bytes] | None = ..., - connection: global___Message.ConnectionType.ValueType = ..., - signedRecord: builtins.bytes | None = ..., - ) -> None: ... - def HasField(self, field_name: typing.Literal["_signedRecord", b"_signedRecord", "signedRecord", b"signedRecord"]) -> builtins.bool: ... - def ClearField(self, field_name: typing.Literal["_signedRecord", b"_signedRecord", "addrs", b"addrs", "connection", b"connection", "id", b"id", "signedRecord", b"signedRecord"]) -> None: ... - def WhichOneof(self, oneof_group: typing.Literal["_signedRecord", b"_signedRecord"]) -> typing.Literal["signedRecord"] | None: ... - - TYPE_FIELD_NUMBER: builtins.int - CLUSTERLEVELRAW_FIELD_NUMBER: builtins.int - KEY_FIELD_NUMBER: builtins.int - RECORD_FIELD_NUMBER: builtins.int - CLOSERPEERS_FIELD_NUMBER: builtins.int - PROVIDERPEERS_FIELD_NUMBER: builtins.int - SENDERRECORD_FIELD_NUMBER: builtins.int - type: global___Message.MessageType.ValueType - clusterLevelRaw: builtins.int - key: builtins.bytes - senderRecord: builtins.bytes - """Envelope(PeerRecord) encoded""" - @property - def record(self) -> global___Record: ... - @property - def closerPeers(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Message.Peer]: ... - @property - def providerPeers(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Message.Peer]: ... - def __init__( - self, - *, - type: global___Message.MessageType.ValueType = ..., - clusterLevelRaw: builtins.int = ..., - key: builtins.bytes = ..., - record: global___Record | None = ..., - closerPeers: collections.abc.Iterable[global___Message.Peer] | None = ..., - providerPeers: collections.abc.Iterable[global___Message.Peer] | None = ..., - senderRecord: builtins.bytes | None = ..., - ) -> None: ... - def HasField(self, field_name: typing.Literal["_senderRecord", b"_senderRecord", "record", b"record", "senderRecord", b"senderRecord"]) -> builtins.bool: ... - def ClearField(self, field_name: typing.Literal["_senderRecord", b"_senderRecord", "closerPeers", b"closerPeers", "clusterLevelRaw", b"clusterLevelRaw", "key", b"key", "providerPeers", b"providerPeers", "record", b"record", "senderRecord", b"senderRecord", "type", b"type"]) -> None: ... - def WhichOneof(self, oneof_group: typing.Literal["_senderRecord", b"_senderRecord"]) -> typing.Literal["senderRecord"] | None: ... - -global___Message = Message +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Record(_message.Message): + __slots__ = ("key", "value", "timeReceived", "author", "signature") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + TIMERECEIVED_FIELD_NUMBER: _ClassVar[int] + AUTHOR_FIELD_NUMBER: _ClassVar[int] + SIGNATURE_FIELD_NUMBER: _ClassVar[int] + key: bytes + value: bytes + timeReceived: str + author: bytes + signature: bytes + def __init__(self, key: _Optional[bytes] = ..., value: _Optional[bytes] = ..., timeReceived: _Optional[str] = ..., author: _Optional[bytes] = ..., signature: _Optional[bytes] = ...) -> None: ... + +class Message(_message.Message): + __slots__ = ("type", "clusterLevelRaw", "key", "record", "closerPeers", "providerPeers", "senderRecord") + class MessageType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + PUT_VALUE: _ClassVar[Message.MessageType] + GET_VALUE: _ClassVar[Message.MessageType] + ADD_PROVIDER: _ClassVar[Message.MessageType] + GET_PROVIDERS: _ClassVar[Message.MessageType] + FIND_NODE: _ClassVar[Message.MessageType] + PING: _ClassVar[Message.MessageType] + PUT_VALUE: Message.MessageType + GET_VALUE: Message.MessageType + ADD_PROVIDER: Message.MessageType + GET_PROVIDERS: Message.MessageType + FIND_NODE: Message.MessageType + PING: Message.MessageType + class ConnectionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + NOT_CONNECTED: _ClassVar[Message.ConnectionType] + CONNECTED: _ClassVar[Message.ConnectionType] + CAN_CONNECT: _ClassVar[Message.ConnectionType] + CANNOT_CONNECT: _ClassVar[Message.ConnectionType] + NOT_CONNECTED: Message.ConnectionType + CONNECTED: Message.ConnectionType + CAN_CONNECT: Message.ConnectionType + CANNOT_CONNECT: Message.ConnectionType + class Peer(_message.Message): + __slots__ = ("id", "addrs", "connection", "signedRecord") + ID_FIELD_NUMBER: _ClassVar[int] + ADDRS_FIELD_NUMBER: _ClassVar[int] + CONNECTION_FIELD_NUMBER: _ClassVar[int] + SIGNEDRECORD_FIELD_NUMBER: _ClassVar[int] + id: bytes + addrs: _containers.RepeatedScalarFieldContainer[bytes] + connection: Message.ConnectionType + signedRecord: bytes + def __init__(self, id: _Optional[bytes] = ..., addrs: _Optional[_Iterable[bytes]] = ..., connection: _Optional[_Union[Message.ConnectionType, str]] = ..., signedRecord: _Optional[bytes] = ...) -> None: ... + TYPE_FIELD_NUMBER: _ClassVar[int] + CLUSTERLEVELRAW_FIELD_NUMBER: _ClassVar[int] + KEY_FIELD_NUMBER: _ClassVar[int] + RECORD_FIELD_NUMBER: _ClassVar[int] + CLOSERPEERS_FIELD_NUMBER: _ClassVar[int] + PROVIDERPEERS_FIELD_NUMBER: _ClassVar[int] + SENDERRECORD_FIELD_NUMBER: _ClassVar[int] + type: Message.MessageType + clusterLevelRaw: int + key: bytes + record: Record + closerPeers: _containers.RepeatedCompositeFieldContainer[Message.Peer] + providerPeers: _containers.RepeatedCompositeFieldContainer[Message.Peer] + senderRecord: bytes + def __init__(self, type: _Optional[_Union[Message.MessageType, str]] = ..., clusterLevelRaw: _Optional[int] = ..., key: _Optional[bytes] = ..., record: _Optional[_Union[Record, _Mapping]] = ..., closerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping]]] = ..., providerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping]]] = ..., senderRecord: _Optional[bytes] = ...) -> None: ... diff --git a/libp2p/kad_dht/value_store.py b/libp2p/kad_dht/value_store.py index 90cd77ae4..a101a7289 100644 --- a/libp2p/kad_dht/value_store.py +++ b/libp2p/kad_dht/value_store.py @@ -20,6 +20,7 @@ ID, ) from libp2p.peer.peerstore import env_to_send_in_RPC +from libp2p.records.record import make_signed_put_record from .common import ( DEFAULT_TTL, @@ -65,14 +66,17 @@ def put(self, key: bytes, value: bytes, validity: float = 0.0) -> None: None """ - from libp2p.records.record import make_put_record - if validity == 0.0: validity = time.time() + DEFAULT_TTL logger.debug( "Storing value for key %s... with validity %s", key.hex(), validity ) - record = make_put_record(key, value) + + # Create a signed record using the host's private key + private_key = self.host.get_private_key() + record = make_signed_put_record(key, value, private_key) + + # Set timeReceived when storing locally record.timeReceived = str(time.time()) self.store[key] = (record, validity) @@ -127,7 +131,7 @@ async def _store_at_peer(self, peer_id: ID, key: bytes, value: bytes) -> bool: message.key = key message.record.key = key message.record.value = value - message.record.timeReceived = str(time.time()) + # Note: timeReceived will be set by the receiving peer when storing # Serialize and send the protobuf message with length prefix proto_bytes = message.SerializeToString() @@ -320,6 +324,10 @@ async def _get_from_peer( logger.debug( f"Received value for key {key.hex()} from peer {peer_id}" ) + + # Update timeReceived to current time (when we received it locally) + response.record.timeReceived = str(time.time()) + return response.record if return_record else response.record.value # Handle case where value is not found but peer infos are returned diff --git a/libp2p/peer/envelope.py b/libp2p/peer/envelope.py index 1fcbb1c75..9a7f6466f 100644 --- a/libp2p/peer/envelope.py +++ b/libp2p/peer/envelope.py @@ -1,7 +1,7 @@ from typing import Any, cast import multiaddr -from multicodec import Code, get_codec, get_prefix +from multicodec import Code, get_prefix from multicodec.code_table import LIBP2P_PEER_RECORD from libp2p.crypto.ed25519 import Ed25519PublicKey @@ -12,6 +12,7 @@ import libp2p.peer.pb.envelope_pb2 as pb import libp2p.peer.pb.peer_record_pb2 as record_pb from libp2p.peer.peer_record import ( + PEER_RECORD_ENVELOPE_PAYLOAD_TYPE, PeerRecord, peer_record_from_protobuf, unmarshal_record, @@ -19,9 +20,10 @@ from libp2p.utils.varint import encode_uvarint ENVELOPE_DOMAIN = "libp2p-peer-record" -# Multicodec-based codec for peer records +# Multicodec Code object (for internal use / comparison only) PEER_RECORD_CODE: Code = LIBP2P_PEER_RECORD -PEER_RECORD_CODEC: bytes = get_prefix(str(PEER_RECORD_CODE)) +# Wire-format payload type bytes — matches go-libp2p: []byte{0x03, 0x01} +PEER_RECORD_CODEC: bytes = PEER_RECORD_ENVELOPE_PAYLOAD_TYPE class Envelope: @@ -40,7 +42,9 @@ class Envelope: """ public_key: PublicKey - payload_type_code: Code + # payload_type is stored as raw bytes (wire format), matching go-libp2p. + # For PeerRecord envelopes this is bytes([0x03, 0x01]), NOT varint-encoded. + _payload_type: bytes raw_payload: bytes signature: bytes @@ -56,28 +60,42 @@ def __init__( ): self.public_key = public_key - # Normalise payload_type to a Code instance + # Normalise payload_type to raw bytes if isinstance(payload_type, bytes): - try: - codec_name = get_codec(payload_type) - self.payload_type_code = Code.from_string(codec_name) - except Exception as e: - raise ValueError(f"Invalid codec: {e}") + # Already raw bytes — use as-is (this is the go-libp2p wire format) + self._payload_type = payload_type elif isinstance(payload_type, str): - try: - self.payload_type_code = Code.from_string(payload_type) - except Exception as e: - raise ValueError(f"Invalid codec: {e}") + # Treat as codec name, encode to raw prefix bytes + self._payload_type = get_prefix(payload_type) + elif isinstance(payload_type, Code): + if payload_type == PEER_RECORD_CODE: + # Use the go-libp2p compatible raw bytes, not varint + self._payload_type = PEER_RECORD_ENVELOPE_PAYLOAD_TYPE + else: + self._payload_type = get_prefix(str(payload_type)) else: - self.payload_type_code = payload_type + self._payload_type = bytes(payload_type) self.raw_payload = raw_payload self.signature = signature @property def payload_type(self) -> bytes: - """Return the multicodec-prefixed payload type.""" - return get_prefix(str(self.payload_type_code)) + """Return the raw payload type bytes (wire format).""" + return self._payload_type + + @property + def payload_type_code(self) -> Code: + """Return the multicodec Code for this payload type (best-effort).""" + return PEER_RECORD_CODE + + @payload_type_code.setter + def payload_type_code(self, value: Code) -> None: + """Update the raw payload_type bytes from a Code value.""" + if value == PEER_RECORD_CODE: + self._payload_type = PEER_RECORD_ENVELOPE_PAYLOAD_TYPE + else: + self._payload_type = get_prefix(str(value)) def marshal_envelope(self) -> bytes: """ @@ -125,10 +143,9 @@ def record(self) -> PeerRecord: return self._cached_record try: - if self.payload_type_code != PEER_RECORD_CODE: + if self._payload_type != PEER_RECORD_ENVELOPE_PAYLOAD_TYPE: raise ValueError( - f"Unsupported payload type in envelope: " - f"{self.payload_type_code.name}" + f"Unsupported payload type in envelope: {self._payload_type.hex()}" ) msg = record_pb.PeerRecord() msg.ParseFromString(self.raw_payload) @@ -154,7 +171,7 @@ def equal(self, other: Any) -> bool: if isinstance(other, Envelope): return ( self.public_key.__eq__(other.public_key) - and self.payload_type_code == other.payload_type_code + and self._payload_type == other._payload_type and self.signature == other.signature and self.raw_payload == other.raw_payload ) @@ -217,7 +234,7 @@ def seal_record(record: PeerRecord, private_key: PrivateKey) -> Envelope: return Envelope( public_key=private_key.get_public_key(), - payload_type=PEER_RECORD_CODE, + payload_type=PEER_RECORD_ENVELOPE_PAYLOAD_TYPE, raw_payload=payload, signature=signature, ) diff --git a/libp2p/peer/peer_record.py b/libp2p/peer/peer_record.py index 0fff196f0..26676f983 100644 --- a/libp2p/peer/peer_record.py +++ b/libp2p/peer/peer_record.py @@ -4,7 +4,7 @@ from typing import Any from multiaddr import Multiaddr -from multicodec import Code, get_prefix +from multicodec import Code from multicodec.code_table import LIBP2P_PEER_RECORD from libp2p.abc import IPeerRecord @@ -14,7 +14,10 @@ PEER_RECORD_ENVELOPE_DOMAIN = "libp2p-peer-record" PEER_RECORD_ENVELOPE_CODE: Code = LIBP2P_PEER_RECORD -PEER_RECORD_ENVELOPE_PAYLOAD_TYPE = get_prefix(str(PEER_RECORD_ENVELOPE_CODE)) +# go-libp2p uses raw bytes [0x03, 0x01] for the peer-record payload type +# (NOT varint-encoded). See: https://github.com/libp2p/go-libp2p/blob/master/core/peer/record.go +# PeerRecordEnvelopePayloadType = []byte{0x03, 0x01} +PEER_RECORD_ENVELOPE_PAYLOAD_TYPE = bytes([0x03, 0x01]) _last_timestamp_lock = threading.Lock() _last_timestamp: int = 0 diff --git a/libp2p/records/record.py b/libp2p/records/record.py index 8644e3c09..87dd96b1c 100644 --- a/libp2p/records/record.py +++ b/libp2p/records/record.py @@ -1,4 +1,6 @@ +from libp2p.crypto.keys import PrivateKey from libp2p.kad_dht.pb import kademlia_pb2 as record_pb2 +from libp2p.records.utils import sign_record def make_put_record(key: bytes, value: bytes) -> record_pb2.Record: @@ -17,3 +19,35 @@ def make_put_record(key: bytes, value: bytes) -> record_pb2.Record: record.key = key record.value = value return record + + +def make_signed_put_record( + key: bytes, value: bytes, private_key: PrivateKey +) -> record_pb2.Record: + """ + Create a signed Record object with the specified key, value, and signature. + + The record is signed using the libp2p record signing convention: + signature = sign("libp2p-record:" + key + value) + + This matches go-libp2p's record signing behavior for DHT PUT_VALUE. + + Args: + key (bytes): The key for the record. + value (bytes): The value to associate with the key in the record. + private_key (PrivateKey): The private key to sign the record with. + + Returns: + record_pb2.Record: A signed Record object. + + """ + record = record_pb2.Record() + record.key = key + record.value = value + + # Sign the record + signature, author_public_key = sign_record(private_key, key, value) + record.signature = signature + record.author = author_public_key + + return record diff --git a/libp2p/records/utils.py b/libp2p/records/utils.py index 82161beb3..35d760dc3 100644 --- a/libp2p/records/utils.py +++ b/libp2p/records/utils.py @@ -1,7 +1,59 @@ +from libp2p.crypto.ed25519 import Ed25519PublicKey +from libp2p.crypto.keys import PrivateKey + + class InvalidRecordType(Exception): pass +def sign_record( + private_key: PrivateKey, key: bytes, value: bytes +) -> tuple[bytes, bytes]: + """ + Sign a DHT record using the given private key. + + The signature is computed over "libp2p-record:" + key + value. + + Args: + private_key: The private key to sign with + key: The record key + value: The record value + + Returns: + tuple[bytes, bytes]: A tuple of (signature, author_public_key_bytes) + + """ + signing_payload = b"libp2p-record:" + key + value + signature = private_key.sign(signing_payload) + public_key = private_key.get_public_key() + author_bytes = public_key.to_bytes() + return signature, author_bytes + + +def verify_record( + signature: bytes, author_public_key: bytes, key: bytes, value: bytes +) -> bool: + """ + Verify a signed DHT record. + + Args: + signature: The record signature + author_public_key: The serialized public key of the author + key: The record key + value: The record value + + Returns: + bool: True if the signature is valid, False otherwise + + """ + try: + public_key = Ed25519PublicKey.from_bytes(author_public_key) + signing_payload = b"libp2p-record:" + key + value + return public_key.verify(signing_payload, signature) + except Exception: + return False + + def split_key(key: str) -> tuple[str, str]: """ Split a record key into its type and the rest. The key must start with From 5637cc30cfd456b77fddb3741dcd8f2ad6bcb9ec Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 12:33:41 +0530 Subject: [PATCH 03/37] feat: add FilesystemBlockStore for persistent block storage and enhance DAG-PB encoding Co-authored-by: Copilot --- libp2p/bitswap/__init__.py | 3 +- libp2p/bitswap/block_store.py | 98 +++++++++++++++++++++++++++++++++++ libp2p/bitswap/dag_pb.py | 43 +++++++++------ 3 files changed, 127 insertions(+), 17 deletions(-) diff --git a/libp2p/bitswap/__init__.py b/libp2p/bitswap/__init__.py index 756ad5793..12abf467c 100644 --- a/libp2p/bitswap/__init__.py +++ b/libp2p/bitswap/__init__.py @@ -31,7 +31,7 @@ New code should prefer the object-returning variants above. """ -from .block_store import BlockStore, MemoryBlockStore +from .block_store import BlockStore, FilesystemBlockStore, MemoryBlockStore from .cid import ( CID_V0, CID_V1, @@ -71,6 +71,7 @@ "BitswapClient", "BlockStore", "MemoryBlockStore", + "FilesystemBlockStore", # CID types "CIDInput", "CIDObject", diff --git a/libp2p/bitswap/block_store.py b/libp2p/bitswap/block_store.py index 12eee5aab..39e6c85d2 100644 --- a/libp2p/bitswap/block_store.py +++ b/libp2p/bitswap/block_store.py @@ -3,6 +3,9 @@ """ from abc import ABC, abstractmethod +from pathlib import Path + +import trio from .cid import CIDInput, CIDObject, parse_cid @@ -118,3 +121,98 @@ def get_all_cids(self) -> list[bytes]: def size(self) -> int: """Get the number of blocks in the store.""" return len(self._blocks) + + +class FilesystemBlockStore(BlockStore): + """ + Filesystem-based block store. Persists blocks to disk as files. + + Each block is stored as a file at: + // + + This two-level directory structure avoids having too many files in a + single directory and matches the layout used by py-ipfs-lite. + + Args: + base_path: Root directory for block storage. Created if it doesn't exist. + + Example: + >>> store = FilesystemBlockStore("/var/lib/myapp/blocks") + >>> bitswap = BitswapClient(host, store) + >>> # Blocks now survive process restarts! + + >>> # Drop-in replacement for MemoryBlockStore: + >>> # store = MemoryBlockStore() # before + >>> store = FilesystemBlockStore("./blocks") # after — persistent + """ + + def __init__(self, base_path: str | Path) -> None: + """Initialize the filesystem block store.""" + self._path = Path(base_path) + self._path.mkdir(parents=True, exist_ok=True) + + def _cid_to_path(self, cid: CIDInput) -> Path: + """Convert a CID to a filesystem path using 2-char prefix directories.""" + cid_str = str(_normalize_cid(cid)) + # e.g. bafybeiabc... → /ba/fybeiabc... + return self._path / cid_str[:2] / cid_str[2:] + + async def get_block(self, cid: CIDInput) -> bytes | None: + """Get a block by CID. Returns None if not found on disk.""" + path = self._cid_to_path(cid) + if not path.exists(): + return None + return await trio.to_thread.run_sync(path.read_bytes) + + async def put_block(self, cid: CIDInput, data: bytes) -> None: + """Write a block to disk.""" + path = self._cid_to_path(cid) + await trio.to_thread.run_sync( + lambda: path.parent.mkdir(parents=True, exist_ok=True) + ) + await trio.to_thread.run_sync(path.write_bytes, data) + + async def has_block(self, cid: CIDInput) -> bool: + """Check if a block file exists on disk.""" + return self._cid_to_path(cid).exists() + + async def delete_block(self, cid: CIDInput) -> None: + """Delete a block file from disk.""" + path = self._cid_to_path(cid) + if path.exists(): + await trio.to_thread.run_sync(path.unlink) + + def get_all_cids(self) -> list[bytes]: + """Return all stored CIDs as bytes by scanning the directory tree.""" + cids = [] + if not self._path.exists(): + return cids + for subdir in self._path.iterdir(): + if not subdir.is_dir(): + continue + for entry in subdir.iterdir(): + if not entry.is_file(): + continue + cid_str = subdir.name + entry.name + try: + cid_obj = _normalize_cid(cid_str) + cids.append(cid_obj.buffer) + except Exception: + pass # skip files that aren't valid CIDs + return cids + + def size(self) -> int: + """Return the number of stored blocks.""" + if not self._path.exists(): + return 0 + return sum( + 1 + for d in self._path.iterdir() + if d.is_dir() + for f in d.iterdir() + if f.is_file() + ) + + def base_path(self) -> Path: + """Return the root directory where blocks are stored.""" + return self._path diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index 74bbcddc2..dadb71996 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -10,12 +10,22 @@ import logging from .cid import CIDInput, cid_to_bytes -from .pb.dag_pb_pb2 import PBNode +from .pb.dag_pb_pb2 import PBLink, PBNode from .pb.unixfs_pb2 import Data as PBUnixFSData logger = logging.getLogger(__name__) +def _encode_varint(value: int) -> bytes: + """Encode an unsigned integer as a protobuf varint.""" + buf = [] + while value > 0x7F: + buf.append((value & 0x7F) | 0x80) + value >>= 7 + buf.append(value & 0x7F) + return bytes(buf) + + def _normalize_link_cid(cid: CIDInput) -> bytes: """Normalize CID input for DAG links while preserving raw-bytes compatibility.""" if isinstance(cid, bytes): @@ -103,38 +113,39 @@ def encode_dag_pb(links: list[Link], unixfs_data: UnixFSData | None = None) -> b >>> encoded = encode_dag_pb(links, data) """ - # Create PBNode - pb_node = PBNode() + # DAG-PB canonical format requires Links (field 2) BEFORE Data (field 1). + # Standard protobuf SerializeToString() emits fields in field-number order + # (Data=1 first, Links=2 second), producing different bytes and a different + # CID than Kubo for the same logical content. + # We manually construct the wire format to enforce the correct ordering. + + result = b"" - # Add links + # 1. Serialize each Link first — field 2, wire type 2 (length-delimited) = tag 0x12 for link in links: - pb_link = pb_node.Links.add() + pb_link = PBLink() pb_link.Hash = link.cid pb_link.Name = link.name pb_link.Tsize = link.size + link_bytes = pb_link.SerializeToString() + result += b"\x12" + _encode_varint(len(link_bytes)) + link_bytes - # Add UnixFS data if provided - if unixfs_data: - # Create UnixFS data structure + # 2. Serialize Data after Links — field 1, wire type 2 = tag 0x0a + if unixfs_data is not None: pb_unixfs = PBUnixFSData() pb_unixfs.Type = UnixFSData.TYPE_MAP[unixfs_data.type] # type: ignore[assignment] pb_unixfs.Data = unixfs_data.data pb_unixfs.filesize = unixfs_data.filesize - - # Add blocksizes for blocksize in unixfs_data.blocksizes: pb_unixfs.blocksizes.append(blocksize) - if unixfs_data.hash_type: pb_unixfs.hashType = unixfs_data.hash_type if unixfs_data.fanout: pb_unixfs.fanout = unixfs_data.fanout + data_bytes = pb_unixfs.SerializeToString() + result += b"\x0a" + _encode_varint(len(data_bytes)) + data_bytes - # Serialize UnixFS data and add to PBNode - pb_node.Data = pb_unixfs.SerializeToString() - - # Serialize PBNode - return pb_node.SerializeToString() + return result def decode_dag_pb(data: bytes) -> tuple[list[Link], UnixFSData | None]: From c5982e7bdbe9b274958faccb387e7294b7e43966 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 12:40:16 +0530 Subject: [PATCH 04/37] feat: enhance Merkle DAG handling with DAG-PB leaf nodes and balanced layout Co-authored-by: Copilot --- libp2p/bitswap/dag.py | 67 +++++++++++++--------------- libp2p/bitswap/dag_pb.py | 96 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 37 deletions(-) diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 3112f21e5..146501a5a 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -33,7 +33,8 @@ ) from .client import BitswapClient from .dag_pb import ( - create_file_node, + balanced_layout, + create_leaf_node, decode_dag_pb, is_directory_node, is_file_node, @@ -155,16 +156,17 @@ async def add_file( logger.debug(f"Using chunk size: {chunk_size} bytes") - # If file is small enough, store as single RAW block + # If file is small enough, store as single dag-pb leaf block if file_size <= chunk_size: logger.debug("File fits in single block") with open(file_path, "rb") as f: data = f.read() - cid = compute_cid_v1(data, codec=CODEC_RAW) + leaf_block = create_leaf_node(data) + cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self.bitswap.add_block(cid, data) + await self.bitswap.add_block(cid, leaf_block) if progress_callback: await _call_progress_callback( @@ -203,19 +205,18 @@ async def add_file( logger.debug(f"Chunking file into ~{estimated_chunks} chunks") logger.info("=== Starting file chunking process ===") - chunks_data: list[tuple[bytes, int]] = [] + # leaf_triples: (cid_bytes, leaf_block_bytes, raw_data_size) + leaf_triples: list[tuple[bytes, bytes, int]] = [] bytes_processed = 0 # Process file in chunks (memory efficient) for i, chunk_data in enumerate(chunk_file(file_path, chunk_size)): - # Compute CID for chunk - chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + # Wrap chunk in UnixFS dag-pb leaf (matches Kubo's RawLeaves=false) + leaf_block = create_leaf_node(chunk_data) + chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - # Store chunk - await self.bitswap.add_block(chunk_cid, chunk_data) - - # Track chunk info - chunks_data.append((chunk_cid, len(chunk_data))) + await self.bitswap.add_block(chunk_cid, leaf_block) + leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) bytes_processed += len(chunk_data) # Progress callback @@ -227,43 +228,36 @@ async def add_file( f"chunking ({i + 1} chunks)", ) - # Enhanced logging with full CID logger.info( f"Chunk {i + 1}: CID={format_cid_for_display(chunk_cid)}, " f"Size={len(chunk_data)} bytes, " f"Progress={bytes_processed}/{file_size}" ) logger.debug( - f"Stored chunk {i}: {format_cid_for_display(chunk_cid, max_len=16)} " + f"Stored leaf {i}: {format_cid_for_display(chunk_cid, max_len=16)} " f"({len(chunk_data)} bytes)" ) - # Create root node with links to all chunks + # Build balanced DAG tree (max 174 links/node, matches Kubo) if progress_callback: await _call_progress_callback( progress_callback, file_size, file_size, "creating root node" ) - root_data = create_file_node(chunks_data) - root_cid = compute_cid_v1(root_data, codec=CODEC_DAG_PB) + root_cid, root_data = balanced_layout(leaf_triples) await self.bitswap.add_block(root_cid, root_data) # Enhanced logging for root CID logger.info("=== File chunking completed ===") logger.info( f"Root CID: {format_cid_for_display(root_cid)} " - f"(Links to {len(chunks_data)} chunks)" + f"(Balanced DAG over {len(leaf_triples)} leaves)" ) logger.info(f"Total file size: {file_size} bytes") - logger.info("=== Chunk CIDs ===") - for i, (chunk_cid, chunk_size) in enumerate(chunks_data): - logger.info( - f" Chunk {i}: {format_cid_for_display(chunk_cid)} ({chunk_size} bytes)" - ) logger.info("=" * 50) logger.info( - f"Added file with {len(chunks_data)} chunks. " + f"Added file with {len(leaf_triples)} leaves. " f"Root CID: {format_cid_for_display(root_cid, max_len=16)}" ) @@ -323,10 +317,11 @@ async def add_bytes( if chunk_size is None: chunk_size = DEFAULT_CHUNK_SIZE - # If data is small, store as single block + # If data is small, store as single dag-pb leaf block if file_size <= chunk_size: - cid = compute_cid_v1(data, codec=CODEC_RAW) - await self.bitswap.add_block(cid, data) + leaf_block = create_leaf_node(data) + cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + await self.bitswap.add_block(cid, leaf_block) if progress_callback: await _call_progress_callback( @@ -335,17 +330,18 @@ async def add_bytes( return cid - # Chunk the data + # Chunk the data and wrap each chunk as a dag-pb leaf chunks = chunk_bytes(data, chunk_size) - chunks_data: list[tuple[bytes, int]] = [] + leaf_triples: list[tuple[bytes, bytes, int]] = [] for i, chunk_data in enumerate(chunks): - chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) - await self.bitswap.add_block(chunk_cid, chunk_data) - chunks_data.append((chunk_cid, len(chunk_data))) + leaf_block = create_leaf_node(chunk_data) + chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + await self.bitswap.add_block(chunk_cid, leaf_block) + leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) if progress_callback: - bytes_processed = sum(size for _, size in chunks_data) + bytes_processed = sum(s for _, _, s in leaf_triples) await _call_progress_callback( progress_callback, bytes_processed, @@ -353,9 +349,8 @@ async def add_bytes( f"chunking ({i + 1}/{len(chunks)})", ) - # Create root node - root_data = create_file_node(chunks_data) - root_cid = compute_cid_v1(root_data, codec=CODEC_DAG_PB) + # Build balanced DAG tree + root_cid, root_data = balanced_layout(leaf_triples) await self.bitswap.add_block(root_cid, root_data) if progress_callback: diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index dadb71996..2825f448b 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -9,10 +9,13 @@ from dataclasses import dataclass, field import logging -from .cid import CIDInput, cid_to_bytes +from .cid import CODEC_DAG_PB, CIDInput, cid_to_bytes, compute_cid_v1 from .pb.dag_pb_pb2 import PBLink, PBNode from .pb.unixfs_pb2 import Data as PBUnixFSData +# Maximum links per internal DAG-PB node — matches Go's balanced.Layout default +MAX_LINKS_PER_NODE = 174 + logger = logging.getLogger(__name__) @@ -293,3 +296,94 @@ def get_file_size(data: bytes) -> int: if unixfs_data and unixfs_data.type == "file": return unixfs_data.filesize return 0 + + +def create_leaf_node(data: bytes) -> bytes: + """ + Create a DAG-PB leaf node for a single file chunk. + + Wraps raw bytes in UnixFS Data(type=File, data=chunk, filesize=len(chunk)) + inside a PBNode with no links. This matches Kubo's default behaviour + (RawLeaves=false), ensuring leaf CIDs are byte-identical to those + produced by `ipfs add`. + + Args: + data: Raw chunk bytes (may be empty for an empty file) + + Returns: + Encoded DAG-PB bytes, suitable for storage as a dag-pb block + """ + unixfs_data = UnixFSData(type="file", data=data, filesize=len(data)) + return encode_dag_pb([], unixfs_data) + + +def balanced_layout( + leaves: list[tuple[bytes, bytes, int]], + max_links: int = MAX_LINKS_PER_NODE, +) -> tuple[bytes, bytes]: + """ + Build a balanced Merkle DAG from a flat list of leaf blocks. + + Groups leaves into batches of `max_links` (default 174), creates an + internal DAG-PB node for each batch, then repeats level by level until + a single root remains. Matches Go's balanced.Layout exactly. + + Args: + leaves: List of (cid_bytes, block_bytes, file_data_size) tuples where + - cid_bytes: CID of the leaf block as raw bytes + - block_bytes: The encoded dag-pb leaf block bytes + - file_data_size: Size of the raw file data inside this leaf + (i.e. len(original chunk), NOT len(block)) + max_links: Max links per internal node (default 174, matches Kubo) + + Returns: + (root_cid_bytes, root_block_bytes) + + Raises: + ValueError: If leaves is empty + """ + if not leaves: + raise ValueError("Cannot build balanced layout from empty leaf list") + + if len(leaves) == 1: + return leaves[0][0], leaves[0][1] + + # Each level entry: (cid_bytes, block_bytes, file_data_size, cumulative_block_size) + # cumulative_block_size = len(this block) + sum(children's cumulative sizes) + level: list[tuple[bytes, bytes, int, int]] = [ + (cid, blk, fsize, len(blk)) for cid, blk, fsize in leaves + ] + + while len(level) > 1: + next_level: list[tuple[bytes, bytes, int, int]] = [] + for i in range(0, len(level), max_links): + batch = level[i : i + max_links] + if len(batch) == 1: + next_level.append(batch[0]) + continue + + # Build internal node: links to each child, UnixFS blocksizes + internal_links: list[Link] = [] + blocksizes: list[int] = [] + total_filesize = 0 + total_cum = 0 + for cid_b, _, fsize, cum in batch: + # Tsize = cumulative block size of the subtree rooted at this child + internal_links.append(Link(cid=cid_b, name="", size=cum)) + blocksizes.append(fsize) + total_filesize += fsize + total_cum += cum + + unixfs_data = UnixFSData( + type="file", filesize=total_filesize, blocksizes=blocksizes + ) + internal_block = encode_dag_pb(internal_links, unixfs_data) + internal_cid = compute_cid_v1(internal_block, codec=CODEC_DAG_PB) + # This node's cumulative size = its own block + sum of children's cumulative sizes + cum_size = len(internal_block) + total_cum + next_level.append( + (internal_cid, internal_block, total_filesize, cum_size) + ) + level = next_level + + return level[0][0], level[0][1] From 5f4e18a18674ae9645a49e8ee5b39798d9d014a6 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 13:01:44 +0530 Subject: [PATCH 05/37] feat: introduce BlockService for enhanced block retrieval and caching in MerkleDag Co-authored-by: Copilot --- libp2p/bitswap/__init__.py | 2 + libp2p/bitswap/block_service.py | 189 ++++++++++++++++++++++++++++++++ libp2p/bitswap/dag.py | 83 +++++++++++--- 3 files changed, 260 insertions(+), 14 deletions(-) create mode 100644 libp2p/bitswap/block_service.py diff --git a/libp2p/bitswap/__init__.py b/libp2p/bitswap/__init__.py index 12abf467c..9412fb9aa 100644 --- a/libp2p/bitswap/__init__.py +++ b/libp2p/bitswap/__init__.py @@ -31,6 +31,7 @@ New code should prefer the object-returning variants above. """ +from .block_service import BlockService from .block_store import BlockStore, FilesystemBlockStore, MemoryBlockStore from .cid import ( CID_V0, @@ -69,6 +70,7 @@ __all__ = [ # Core "BitswapClient", + "BlockService", "BlockStore", "MemoryBlockStore", "FilesystemBlockStore", diff --git a/libp2p/bitswap/block_service.py b/libp2p/bitswap/block_service.py new file mode 100644 index 000000000..82f84e715 --- /dev/null +++ b/libp2p/bitswap/block_service.py @@ -0,0 +1,189 @@ +""" +BlockService: transparent local→network fallback for block retrieval. + +Sits between MerkleDag and BitswapClient, providing: + - Local-first lookup (no network cost if block is already stored) + - Automatic caching of network-fetched blocks into the local store + - Peer announcement when new blocks are stored locally + - A clean abstraction so MerkleDag is not hardwired to BitswapClient +""" +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from .block_store import BlockStore +from .cid import CIDInput, cid_to_bytes, format_cid_for_display, parse_cid + +if TYPE_CHECKING: + from libp2p.peer.id import ID as PeerID + from .client import BitswapClient + +logger = logging.getLogger(__name__) + + +class BlockService: + """ + Combines a local BlockStore with a BitswapClient into one unified interface. + + get_block() flow: + 1. Check local BlockStore → return immediately if found (no network) + 2. Fetch via BitswapClient → goes to the network + 3. Auto-cache the result → store locally so next call is free + + put_block() flow: + 1. Write to local BlockStore + 2. Call bitswap.add_block() so peers who have this CID in their + wantlist are notified and can receive it + + This is a drop-in wrapper: MerkleDag can use BlockService instead of + calling bitswap directly, and the behaviour is identical but with the + caching and announcement benefits added transparently. + + Example: + >>> store = FilesystemBlockStore("./blocks") + >>> service = BlockService(store, bitswap) + >>> dag = MerkleDag(bitswap, block_service=service) + """ + + def __init__(self, store: BlockStore, bitswap: "BitswapClient") -> None: + self.store = store + self.bitswap = bitswap + + async def get_block( + self, + cid: CIDInput, + peer_id: "PeerID | None" = None, + timeout: float = 30.0, + ) -> bytes | None: + """ + Get a block. Checks local store first, then fetches from network. + Any block fetched from the network is automatically cached locally. + + Args: + cid: The CID of the block to retrieve + peer_id: Optional specific peer to fetch from (passed to bitswap) + timeout: Network timeout in seconds + + Returns: + Block data bytes, or None if not found anywhere + """ + cid_bytes = cid_to_bytes(cid) + cid_obj = parse_cid(cid_bytes) + + # 1. Local lookup — instant, no network cost + data = await self.store.get_block(cid_obj) + if data is not None: + logger.debug( + f"BlockService: local hit {format_cid_for_display(cid_obj, max_len=12)}" + ) + return data + + # 2. Network fetch via Bitswap + logger.debug( + f"BlockService: local miss, fetching from network " + f"{format_cid_for_display(cid_obj, max_len=12)}" + ) + try: + data = await self.bitswap.get_block(cid_bytes, peer_id, timeout) + except Exception as e: + logger.warning(f"BlockService: network fetch failed: {e}") + return None + + if data is not None: + # 3. Auto-cache locally — future requests for this block are free + await self.store.put_block(cid_obj, data) + logger.debug( + f"BlockService: cached fetched block " + f"{format_cid_for_display(cid_obj, max_len=12)}" + ) + + return data + + async def put_block(self, cid: CIDInput, data: bytes) -> None: + """ + Store a block locally and announce it to waiting peers. + + Calling bitswap.add_block() both writes to bitswap's own store AND + notifies any peers who have this CID in their pending wantlist. + We also write to our own store so get_block() local-hits on it. + + Args: + cid: The CID of the block + data: The block data bytes + """ + cid_obj = parse_cid(cid_to_bytes(cid)) + + # Write to our local store + await self.store.put_block(cid_obj, data) + + # add_block() writes to bitswap's internal store AND calls + # _notify_peers_about_block() for any peers waiting on this CID + await self.bitswap.add_block(cid_obj, data) + + logger.debug( + f"BlockService: stored and announced " + f"{format_cid_for_display(cid_obj, max_len=12)}" + ) + + async def get_blocks_batch( + self, + cids: list[CIDInput], + peer_id: "PeerID | None" = None, + timeout: float = 30.0, + batch_size: int = 32, + ) -> dict[bytes, bytes]: + """ + Batch-fetch multiple blocks. Local hits are returned immediately; + only missing blocks go to the network. All network-fetched blocks + are auto-cached locally. + + Args: + cids: List of CIDs to fetch + peer_id: Optional specific peer to fetch from + timeout: Network timeout in seconds + batch_size: Wantlist batch size passed to bitswap + + Returns: + Dict mapping cid_bytes -> block_data for all found blocks + """ + results: dict[bytes, bytes] = {} + missing_cids: list[CIDInput] = [] + + # Local pass first + for cid in cids: + cid_bytes = cid_to_bytes(cid) + cid_obj = parse_cid(cid_bytes) + data = await self.store.get_block(cid_obj) + if data is not None: + results[cid_bytes] = data + else: + missing_cids.append(cid) + + if not missing_cids: + logger.debug(f"BlockService.get_blocks_batch: all {len(cids)} blocks local") + return results + + local_hits = len(cids) - len(missing_cids) + logger.debug( + f"BlockService.get_blocks_batch: {local_hits} local hits, " + f"{len(missing_cids)} fetching from network" + ) + + # Network pass for missing blocks + network_results = await self.bitswap.get_blocks_batch( + missing_cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) + + # Auto-cache all network-fetched blocks + for cid_bytes, data in network_results.items(): + cid_obj = parse_cid(cid_bytes) + await self.store.put_block(cid_obj, data) + results[cid_bytes] = data + + return results + + @property + def block_store(self) -> BlockStore: + """Expose the underlying BlockStore (used by MerkleDag internals).""" + return self.store diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 146501a5a..89b742a8d 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -14,6 +14,7 @@ from libp2p.peer.id import ID as PeerID +from .block_service import BlockService from .block_store import BlockStore from .chunker import ( DEFAULT_CHUNK_SIZE, @@ -100,17 +101,71 @@ class MerkleDag: """ - def __init__(self, bitswap: BitswapClient, block_store: BlockStore | None = None): + def __init__( + self, + bitswap: BitswapClient, + block_store: BlockStore | None = None, + block_service: BlockService | None = None, + ): """ Initialize Merkle DAG manager. Args: bitswap: Bitswap client for block exchange block_store: Optional block store (uses bitswap's store if None) + block_service: Optional BlockService for transparent local→network + fallback with auto-caching. When provided, all block + reads/writes go through it instead of bitswap directly. + Construct with: BlockService(your_store, bitswap) """ self.bitswap = bitswap self.block_store = block_store or bitswap.block_store + # If a BlockService is provided use it; otherwise fall back to + # calling bitswap directly (existing behaviour, no regression). + self._service: BlockService | None = block_service + + # ── private routing helpers ─────────────────────────────────────────────── + + async def _put_block(self, cid: CIDInput, data: bytes) -> None: + """Store a block. Routes through BlockService when available.""" + if self._service is not None: + await self._service.put_block(cid, data) + else: + await self.bitswap.add_block(cid, data) + + async def _get_block( + self, + cid: CIDInput, + peer_id: PeerID | None = None, + timeout: float = 30.0, + ) -> bytes: + """Fetch a block. Routes through BlockService when available.""" + if self._service is not None: + data = await self._service.get_block(cid, peer_id=peer_id, timeout=timeout) + if data is None: + from .cid import format_cid_for_display, cid_to_bytes + raise BlockNotFoundError( + f"Block not found: {format_cid_for_display(cid_to_bytes(cid))}" + ) + return data + return await self.bitswap.get_block(cid, peer_id, timeout) + + async def _get_blocks_batch( + self, + cids: list[CIDInput], + peer_id: PeerID | None = None, + timeout: float = 30.0, + batch_size: int = 32, + ) -> dict[bytes, bytes]: + """Batch-fetch blocks. Routes through BlockService when available.""" + if self._service is not None: + return await self._service.get_blocks_batch( + cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) + return await self.bitswap.get_blocks_batch( + cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) async def add_file( self, @@ -166,7 +221,7 @@ async def add_file( leaf_block = create_leaf_node(data) cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self.bitswap.add_block(cid, leaf_block) + await self._put_block(cid, leaf_block) if progress_callback: await _call_progress_callback( @@ -190,7 +245,7 @@ async def add_file( dir_data = create_directory_node([(filename, cid, file_size)]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) - await self.bitswap.add_block(dir_cid, dir_data) + await self._put_block(dir_cid, dir_data) logger.info( f"Created directory wrapper. Directory CID: " @@ -215,7 +270,7 @@ async def add_file( leaf_block = create_leaf_node(chunk_data) chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self.bitswap.add_block(chunk_cid, leaf_block) + await self._put_block(chunk_cid, leaf_block) leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) bytes_processed += len(chunk_data) @@ -245,7 +300,7 @@ async def add_file( ) root_cid, root_data = balanced_layout(leaf_triples) - await self.bitswap.add_block(root_cid, root_data) + await self._put_block(root_cid, root_data) # Enhanced logging for root CID logger.info("=== File chunking completed ===") @@ -278,7 +333,7 @@ async def add_file( # Create directory node with single entry pointing to the file dir_data = create_directory_node([(filename, root_cid, file_size)]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) - await self.bitswap.add_block(dir_cid, dir_data) + await self._put_block(dir_cid, dir_data) logger.info( "Created directory wrapper. Directory CID: " @@ -321,7 +376,7 @@ async def add_bytes( if file_size <= chunk_size: leaf_block = create_leaf_node(data) cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self.bitswap.add_block(cid, leaf_block) + await self._put_block(cid, leaf_block) if progress_callback: await _call_progress_callback( @@ -337,7 +392,7 @@ async def add_bytes( for i, chunk_data in enumerate(chunks): leaf_block = create_leaf_node(chunk_data) chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self.bitswap.add_block(chunk_cid, leaf_block) + await self._put_block(chunk_cid, leaf_block) leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) if progress_callback: @@ -351,7 +406,7 @@ async def add_bytes( # Build balanced DAG tree root_cid, root_data = balanced_layout(leaf_triples) - await self.bitswap.add_block(root_cid, root_data) + await self._put_block(root_cid, root_data) if progress_callback: await _call_progress_callback( @@ -416,7 +471,7 @@ async def fetch_file( logger.info(f"Fetching file: {format_cid_for_display(root_cid_bytes)}") # Step 1: Fetch the root block - root_data = await self.bitswap.get_block(root_cid_bytes, peer_id, timeout) + root_data = await self._get_block(root_cid_bytes, peer_id, timeout) if not verify_cid(root_cid_bytes, root_data): root_cid_str = format_cid_for_display(root_cid_bytes) raise ValueError(f"Root block CID verification failed: {root_cid_str}") @@ -435,7 +490,7 @@ async def fetch_file( filename = first_link.name or None actual_file_cid = first_link.cid logger.info(f"Filename from directory: {filename!r}") - actual_file_data = await self.bitswap.get_block( + actual_file_data = await self._get_block( actual_file_cid, peer_id, timeout ) if not verify_cid(actual_file_cid, actual_file_data): @@ -491,7 +546,7 @@ async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: print(msg2, flush=True) # Batch-fetch this level's blocks - level_blocks = await self.bitswap.get_blocks_batch( + level_blocks = await self._get_blocks_batch( list(cid_list), peer_id=peer_id, timeout=timeout, batch_size=32 ) logger.info(f"[DAG] Depth {depth}: ✓ received {len(level_blocks)} blocks") @@ -587,7 +642,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: f"(batch_size=32, timeout={timeout}s)" ) print(msg2, flush=True) - block_map = await self.bitswap.get_blocks_batch( + block_map = await self._get_blocks_batch( list(ordered_leaf_cids), peer_id=peer_id, timeout=timeout, batch_size=32 ) logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") @@ -682,7 +737,7 @@ async def get_file_info( """ # Get root block root_cid_bytes = cid_to_bytes(root_cid) - root_data = await self.bitswap.get_block(root_cid_bytes, peer_id, timeout) + root_data = await self._get_block(root_cid_bytes, peer_id, timeout) # Check if it's a DAG-PB file node if is_file_node(root_data): From 796b5d5f9c35b22b09e44f1db85f0897085e8a10 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 13:11:51 +0530 Subject: [PATCH 06/37] feat: add chunk_stream function for efficient streaming of file chunks and implement add_stream method in MerkleDag for handling io.IOBase streams Co-authored-by: Copilot --- libp2p/bitswap/chunker.py | 43 +++++++++++++++++++ libp2p/bitswap/dag.py | 89 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/libp2p/bitswap/chunker.py b/libp2p/bitswap/chunker.py index 10cb869b0..106534b51 100644 --- a/libp2p/bitswap/chunker.py +++ b/libp2p/bitswap/chunker.py @@ -7,6 +7,7 @@ """ from collections.abc import Callable, Iterator +import io from pathlib import Path # Default chunk size: 63 KB (py-libp2p accepts less than 64 KB) @@ -82,6 +83,48 @@ def chunk_file(file_path: str, chunk_size: int = DEFAULT_CHUNK_SIZE) -> Iterator yield chunk +def chunk_stream( + stream: io.IOBase, chunk_size: int = DEFAULT_CHUNK_SIZE +) -> Iterator[bytes]: + """ + Stream chunks from any readable io.IOBase object. + + Memory efficient — reads one chunk at a time without loading the + entire content into memory. Works with any Python stream: + open() file handles, BytesIO, GzipFile, BZ2File, network sockets, + or any object that implements io.IOBase.read(). + + Args: + stream: Any readable io.IOBase (open(), BytesIO, GzipFile, etc.) + chunk_size: Size of each chunk in bytes + + Yields: + Chunks of up to chunk_size bytes. The final chunk may be smaller. + + Example: + >>> import io + >>> data = b"hello world " * 100000 + >>> chunks = list(chunk_stream(io.BytesIO(data), chunk_size=256*1024)) + >>> print(f"Split into {len(chunks)} chunks") + + >>> # From a real file handle + >>> with open("movie.mp4", "rb") as f: + ... for chunk in chunk_stream(f): + ... process(chunk) + + >>> # From a gzip stream (decompress on-the-fly) + >>> import gzip + >>> with gzip.open("archive.gz", "rb") as f: + ... for chunk in chunk_stream(f): + ... process(chunk) + """ + while True: + chunk = stream.read(chunk_size) + if not chunk: + break + yield chunk + + def estimate_chunk_count(file_size: int, chunk_size: int = DEFAULT_CHUNK_SIZE) -> int: """ Estimate number of chunks for a given file size. diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 89b742a8d..f9f1d82b4 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -9,6 +9,7 @@ from collections.abc import Awaitable, Callable import inspect +import io import logging from typing import Union @@ -20,6 +21,7 @@ DEFAULT_CHUNK_SIZE, chunk_bytes, chunk_file, + chunk_stream, estimate_chunk_count, get_file_size, ) @@ -415,6 +417,93 @@ async def add_bytes( return root_cid + async def add_stream( + self, + stream: io.IOBase, + chunk_size: int | None = None, + progress_callback: ProgressCallback | None = None, + ) -> bytes: + """ + Add data from any io.IOBase stream to the DAG. + + More flexible than add_file() (accepts any stream, not just file paths) + and more memory efficient than add_bytes() (reads one chunk at a time, + so total memory usage is O(chunk_size) regardless of file size). + + Args: + stream: Any readable io.IOBase — open() handles, BytesIO, + GzipFile, BZ2File, network streams, pipes, etc. + chunk_size: Optional chunk size in bytes (auto-selected if None) + progress_callback: Optional callback(current, total, status). + Note: total is unknown for streams, so current + is reported as bytes processed so far. + + Returns: + Root CID bytes of the stored DAG + + Example: + >>> import io + >>> root_cid = await dag.add_stream(io.BytesIO(b"hello world")) + + >>> # Memory-efficient large file (no full read into RAM) + >>> with open("movie.mp4", "rb") as f: + ... root_cid = await dag.add_stream(f) + + >>> # Decompress and add in one pass + >>> import gzip + >>> with gzip.open("archive.gz", "rb") as f: + ... root_cid = await dag.add_stream(f) + + >>> # With BlockService for persistent caching + >>> service = BlockService(FilesystemBlockStore("./blocks"), bitswap) + >>> dag = MerkleDag(bitswap, block_service=service) + >>> with open("large.bin", "rb") as f: + ... root_cid = await dag.add_stream(f) # cached to disk + """ + if chunk_size is None: + chunk_size = DEFAULT_CHUNK_SIZE + + leaf_triples: list[tuple[bytes, bytes, int]] = [] + bytes_processed = 0 + + for i, chunk_data in enumerate(chunk_stream(stream, chunk_size)): + leaf_block = create_leaf_node(chunk_data) + chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + await self._put_block(chunk_cid, leaf_block) + leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) + bytes_processed += len(chunk_data) + + if progress_callback: + # total is unknown for streams — report bytes processed so far + await _call_progress_callback( + progress_callback, + bytes_processed, + bytes_processed, + f"chunking ({i + 1} chunks, {bytes_processed} bytes)", + ) + + # Empty stream — store a single empty leaf + if not leaf_triples: + leaf_block = create_leaf_node(b"") + cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + await self._put_block(cid, leaf_block) + return cid + + # Single chunk — return the leaf CID directly (no root node needed) + if len(leaf_triples) == 1: + return leaf_triples[0][0] + + # Multiple chunks — build balanced DAG tree + root_cid, root_data = balanced_layout(leaf_triples) + await self._put_block(root_cid, root_data) + + if progress_callback: + await _call_progress_callback( + progress_callback, bytes_processed, bytes_processed, "completed" + ) + + return root_cid + async def fetch_file( self, root_cid: CIDInput, From 3e8b881ad25daf381a05bf48f321fe4a5807ca0c Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 13:43:26 +0530 Subject: [PATCH 07/37] feat: enhance BlockService and FilesystemBlockStore with type hints, improve chunk_stream documentation, and add Wantlist functionality Co-authored-by: Copilot --- libp2p/bitswap/block_service.py | 12 +- libp2p/bitswap/block_store.py | 3 +- libp2p/bitswap/chunker.py | 1 + libp2p/bitswap/dag.py | 5 +- libp2p/bitswap/dag_pb.py | 8 +- libp2p/bitswap/wantlist.py | 368 ++++++++++++++++++++++++++++++++ 6 files changed, 387 insertions(+), 10 deletions(-) create mode 100644 libp2p/bitswap/wantlist.py diff --git a/libp2p/bitswap/block_service.py b/libp2p/bitswap/block_service.py index 82f84e715..21dca450c 100644 --- a/libp2p/bitswap/block_service.py +++ b/libp2p/bitswap/block_service.py @@ -7,6 +7,7 @@ - Peer announcement when new blocks are stored locally - A clean abstraction so MerkleDag is not hardwired to BitswapClient """ + from __future__ import annotations import logging @@ -17,6 +18,7 @@ if TYPE_CHECKING: from libp2p.peer.id import ID as PeerID + from .client import BitswapClient logger = logging.getLogger(__name__) @@ -44,16 +46,17 @@ class BlockService: >>> store = FilesystemBlockStore("./blocks") >>> service = BlockService(store, bitswap) >>> dag = MerkleDag(bitswap, block_service=service) + """ - def __init__(self, store: BlockStore, bitswap: "BitswapClient") -> None: + def __init__(self, store: BlockStore, bitswap: BitswapClient) -> None: self.store = store self.bitswap = bitswap async def get_block( self, cid: CIDInput, - peer_id: "PeerID | None" = None, + peer_id: PeerID | None = None, timeout: float = 30.0, ) -> bytes | None: """ @@ -67,6 +70,7 @@ async def get_block( Returns: Block data bytes, or None if not found anywhere + """ cid_bytes = cid_to_bytes(cid) cid_obj = parse_cid(cid_bytes) @@ -111,6 +115,7 @@ async def put_block(self, cid: CIDInput, data: bytes) -> None: Args: cid: The CID of the block data: The block data bytes + """ cid_obj = parse_cid(cid_to_bytes(cid)) @@ -129,7 +134,7 @@ async def put_block(self, cid: CIDInput, data: bytes) -> None: async def get_blocks_batch( self, cids: list[CIDInput], - peer_id: "PeerID | None" = None, + peer_id: PeerID | None = None, timeout: float = 30.0, batch_size: int = 32, ) -> dict[bytes, bytes]: @@ -146,6 +151,7 @@ async def get_blocks_batch( Returns: Dict mapping cid_bytes -> block_data for all found blocks + """ results: dict[bytes, bytes] = {} missing_cids: list[CIDInput] = [] diff --git a/libp2p/bitswap/block_store.py b/libp2p/bitswap/block_store.py index 39e6c85d2..bc36269ce 100644 --- a/libp2p/bitswap/block_store.py +++ b/libp2p/bitswap/block_store.py @@ -144,6 +144,7 @@ class FilesystemBlockStore(BlockStore): >>> # Drop-in replacement for MemoryBlockStore: >>> # store = MemoryBlockStore() # before >>> store = FilesystemBlockStore("./blocks") # after — persistent + """ def __init__(self, base_path: str | Path) -> None: @@ -184,7 +185,7 @@ async def delete_block(self, cid: CIDInput) -> None: def get_all_cids(self) -> list[bytes]: """Return all stored CIDs as bytes by scanning the directory tree.""" - cids = [] + cids: list[bytes] = [] if not self._path.exists(): return cids for subdir in self._path.iterdir(): diff --git a/libp2p/bitswap/chunker.py b/libp2p/bitswap/chunker.py index 106534b51..2a05beae4 100644 --- a/libp2p/bitswap/chunker.py +++ b/libp2p/bitswap/chunker.py @@ -117,6 +117,7 @@ def chunk_stream( >>> with gzip.open("archive.gz", "rb") as f: ... for chunk in chunk_stream(f): ... process(chunk) + """ while True: chunk = stream.read(chunk_size) diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index f9f1d82b4..cf2bc6bfa 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -27,7 +27,6 @@ ) from .cid import ( CODEC_DAG_PB, - CODEC_RAW, CIDInput, cid_to_bytes, compute_cid_v1, @@ -146,7 +145,8 @@ async def _get_block( if self._service is not None: data = await self._service.get_block(cid, peer_id=peer_id, timeout=timeout) if data is None: - from .cid import format_cid_for_display, cid_to_bytes + from .cid import cid_to_bytes, format_cid_for_display + raise BlockNotFoundError( f"Block not found: {format_cid_for_display(cid_to_bytes(cid))}" ) @@ -459,6 +459,7 @@ async def add_stream( >>> dag = MerkleDag(bitswap, block_service=service) >>> with open("large.bin", "rb") as f: ... root_cid = await dag.add_stream(f) # cached to disk + """ if chunk_size is None: chunk_size = DEFAULT_CHUNK_SIZE diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index 2825f448b..164add080 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -312,6 +312,7 @@ def create_leaf_node(data: bytes) -> bytes: Returns: Encoded DAG-PB bytes, suitable for storage as a dag-pb block + """ unixfs_data = UnixFSData(type="file", data=data, filesize=len(data)) return encode_dag_pb([], unixfs_data) @@ -341,6 +342,7 @@ def balanced_layout( Raises: ValueError: If leaves is empty + """ if not leaves: raise ValueError("Cannot build balanced layout from empty leaf list") @@ -379,11 +381,9 @@ def balanced_layout( ) internal_block = encode_dag_pb(internal_links, unixfs_data) internal_cid = compute_cid_v1(internal_block, codec=CODEC_DAG_PB) - # This node's cumulative size = its own block + sum of children's cumulative sizes + # cumulative size = own block + sum of children's cumulative sizes cum_size = len(internal_block) + total_cum - next_level.append( - (internal_cid, internal_block, total_filesize, cum_size) - ) + next_level.append((internal_cid, internal_block, total_filesize, cum_size)) level = next_level return level[0][0], level[0][1] diff --git a/libp2p/bitswap/wantlist.py b/libp2p/bitswap/wantlist.py new file mode 100644 index 000000000..30ba5f228 --- /dev/null +++ b/libp2p/bitswap/wantlist.py @@ -0,0 +1,368 @@ +""" +Typed dataclass wrappers for Bitswap wantlist entries and messages. + +Provides a clean, self-documenting Python API over the raw protobuf +Message format. All types here are pure Python dataclasses — no +protobuf dependency. Convert to/from protobuf via messages.py helpers. + +Usage: + from libp2p.bitswap.wantlist import ( + WantType, BlockPresenceType, + WantlistEntry, Wantlist, + BlockPresence, BitswapMessage, + ) + + # Build a wantlist + wl = Wantlist() + wl.add(my_cid, want_type=WantType.Block, send_dont_have=True) + wl.add(other_cid, want_type=WantType.Have) + + # Build a full message + msg = BitswapMessage() + msg.add_want(my_cid, want_type=WantType.Block) + msg.add_block(root_cid, block_data) + msg.add_have(peer_cid) + msg.add_dont_have(missing_cid) +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import List, Optional + +from .cid import CIDInput, cid_to_bytes +from .pb.bitswap_pb2 import Message as PBMessage + +# ── enums ───────────────────────────────────────────────────────────────────── + + +class WantType(Enum): + """ + Type of want request (Bitswap 1.2.0 wantType field). + + Block = 0 → "Send me the full block bytes." + Have = 1 → "Just tell me if you have it (HAVE/DONT_HAVE response)." + Cheaper than Block — useful for presence checks before + committing to a full block transfer. + """ + + Block = 0 + Have = 1 + + +class BlockPresenceType(Enum): + """ + Type of block presence response (Bitswap 1.2.0 BlockPresence.type field). + + Have = 0 → Peer has the block and can send it. + DontHave = 1 → Peer does not have the block. + """ + + Have = 0 + DontHave = 1 + + +# ── wantlist dataclasses ────────────────────────────────────────────────────── + + +@dataclass +class WantlistEntry: + """ + A single entry in a Bitswap wantlist. + + Prefer constructing via WantlistEntry.from_cid() which normalises + any CIDInput form to raw bytes. + + Attributes: + cid: CID of the requested block as raw bytes. + priority: Request urgency. Higher = more urgent. Default 1. + cancel: True to cancel a previously sent want for this CID. + want_type: WantType.Block (full data) or WantType.Have (presence). + send_dont_have: If True, ask the peer to send an explicit DontHave + response when it doesn't have the block. + + """ + + cid: bytes + priority: int = 1 + cancel: bool = False + want_type: WantType = WantType.Block + send_dont_have: bool = False + + @classmethod + def from_cid( + cls, + cid: CIDInput, + priority: int = 1, + cancel: bool = False, + want_type: WantType = WantType.Block, + send_dont_have: bool = False, + ) -> WantlistEntry: + """Create a WantlistEntry from any CIDInput form.""" + return cls( + cid=cid_to_bytes(cid), + priority=priority, + cancel=cancel, + want_type=want_type, + send_dont_have=send_dont_have, + ) + + +@dataclass +class Wantlist: + """ + A collection of wantlist entries. + + Attributes: + entries: List of WantlistEntry items. + full: True = this replaces the peer's entire wantlist. + False (default) = delta update, adds/cancels entries. + + Example: + >>> wl = Wantlist() + >>> wl.add(cid1, want_type=WantType.Block, send_dont_have=True) + >>> wl.add(cid2, want_type=WantType.Have) + >>> wl.cancel(cid3) + >>> print(len(wl)) # 3 + + """ + + entries: list[WantlistEntry] = field(default_factory=list) + full: bool = False + + def add( + self, + cid: CIDInput, + priority: int = 1, + want_type: WantType = WantType.Block, + send_dont_have: bool = False, + ) -> None: + """Add a want entry for the given CID.""" + self.entries.append( + WantlistEntry.from_cid( + cid, + priority=priority, + want_type=want_type, + send_dont_have=send_dont_have, + ) + ) + + def cancel(self, cid: CIDInput) -> None: + """Add a cancel entry for a previously wanted CID.""" + self.entries.append(WantlistEntry.from_cid(cid, cancel=True)) + + def contains(self, cid: CIDInput) -> bool: + """Return True if any non-cancel entry exists for this CID.""" + cid_bytes = cid_to_bytes(cid) + return any(e.cid == cid_bytes and not e.cancel for e in self.entries) + + def __len__(self) -> int: + return len(self.entries) + + def __bool__(self) -> bool: + return bool(self.entries) + + +# ── message dataclasses ─────────────────────────────────────────────────────── + + +@dataclass +class BlockPresence: + """ + A HAVE or DONT_HAVE response for a specific CID (Bitswap 1.2.0). + + Use the class-method constructors for convenience: + BlockPresence.have(cid) + BlockPresence.dont_have(cid) + """ + + cid: bytes + type: BlockPresenceType + + @classmethod + def have(cls, cid: CIDInput) -> BlockPresence: + """Create a HAVE response.""" + return cls(cid=cid_to_bytes(cid), type=BlockPresenceType.Have) + + @classmethod + def dont_have(cls, cid: CIDInput) -> BlockPresence: + """Create a DONT_HAVE response.""" + return cls(cid=cid_to_bytes(cid), type=BlockPresenceType.DontHave) + + +@dataclass +class BitswapMessage: + """ + High-level typed representation of a Bitswap protocol message. + + Wraps the three main message components with typed fields and + convenience builder methods. Does not depend on protobuf directly — + convert to/from protobuf using to_proto() / from_proto(). + + Attributes: + wantlist: Optional wantlist (want/cancel entries). + blocks: List of (cid_bytes, block_data) block payloads. + block_presences: List of HAVE/DONT_HAVE presence responses. + pending_bytes: Bytes queued to send (v1.2.0 flow-control hint). + + Properties: + is_want True if the message contains want entries. + has_blocks True if the message contains block payloads. + has_presences True if the message contains HAVE/DONT_HAVE entries. + + Example: + >>> msg = BitswapMessage() + >>> msg.add_want(cid1, want_type=WantType.Block, send_dont_have=True) + >>> msg.add_want(cid2, want_type=WantType.Have) + >>> msg.add_block(root_cid, data) + >>> msg.add_have(cid3) + >>> msg.add_dont_have(cid4) + >>> assert msg.is_want and msg.has_blocks and msg.has_presences + + """ + + wantlist: Wantlist | None = None + blocks: list[tuple[bytes, bytes]] = field(default_factory=list) # (cid, data) + block_presences: list[BlockPresence] = field(default_factory=list) + pending_bytes: int = 0 + + # ── read-only properties ────────────────────────────────────────────────── + + @property + def is_want(self) -> bool: + """True if this message contains wantlist entries.""" + return self.wantlist is not None and bool(self.wantlist) + + @property + def has_blocks(self) -> bool: + """True if this message carries block payloads.""" + return bool(self.blocks) + + @property + def has_presences(self) -> bool: + """True if this message carries HAVE/DONT_HAVE responses.""" + return bool(self.block_presences) + + # ── builder methods ─────────────────────────────────────────────────────── + + def add_want( + self, + cid: CIDInput, + priority: int = 1, + want_type: WantType = WantType.Block, + send_dont_have: bool = False, + ) -> None: + """Add a want entry. Creates the wantlist if not yet present.""" + if self.wantlist is None: + self.wantlist = Wantlist() + self.wantlist.add( + cid, + priority=priority, + want_type=want_type, + send_dont_have=send_dont_have, + ) + + def cancel_want(self, cid: CIDInput) -> None: + """Add a cancel entry for a previously wanted CID.""" + if self.wantlist is None: + self.wantlist = Wantlist() + self.wantlist.cancel(cid) + + def add_block(self, cid: CIDInput, data: bytes) -> None: + """Add a block payload to this message.""" + self.blocks.append((cid_to_bytes(cid), data)) + + def add_have(self, cid: CIDInput) -> None: + """Add a HAVE presence response.""" + self.block_presences.append(BlockPresence.have(cid)) + + def add_dont_have(self, cid: CIDInput) -> None: + """Add a DONT_HAVE presence response.""" + self.block_presences.append(BlockPresence.dont_have(cid)) + + # ── protobuf conversion ─────────────────────────────────────────────────── + + def to_proto(self) -> PBMessage: + """ + Convert to a raw protobuf Message object (pb.bitswap_pb2.Message). + + Returns: + A populated protobuf Message ready for serialisation. + + """ + proto = PBMessage() + + if self.wantlist is not None: + for entry in self.wantlist.entries: + pb_entry = proto.wantlist.entries.add() + pb_entry.block = entry.cid + pb_entry.priority = entry.priority + pb_entry.cancel = entry.cancel + pb_entry.wantType = entry.want_type.value # type: ignore[assignment] + pb_entry.sendDontHave = entry.send_dont_have + proto.wantlist.full = self.wantlist.full + + for cid_bytes, data in self.blocks: + from .cid import get_cid_prefix + + pb_block = proto.payload.add() + pb_block.prefix = get_cid_prefix(cid_bytes) + pb_block.data = data + + for presence in self.block_presences: + pb_presence = proto.blockPresences.add() + pb_presence.cid = presence.cid + pb_presence.type = presence.type.value # type: ignore[assignment] + + if self.pending_bytes: + proto.pendingBytes = self.pending_bytes + + return proto + + @classmethod + def from_proto(cls, proto: PBMessage) -> BitswapMessage: + """ + Build a BitswapMessage from a raw protobuf Message object. + + Args: + proto: A pb.bitswap_pb2.Message instance. + + Returns: + A populated BitswapMessage dataclass. + + """ + from .cid import reconstruct_cid_from_prefix_and_data + + msg = cls() + + if proto.HasField("wantlist") and proto.wantlist.entries: + wl = Wantlist(full=proto.wantlist.full) + for e in proto.wantlist.entries: + wl.entries.append( + WantlistEntry( + cid=bytes(e.block), + priority=e.priority, + cancel=e.cancel, + want_type=WantType(e.wantType), + send_dont_have=e.sendDontHave, + ) + ) + msg.wantlist = wl + + for pb_block in proto.payload: + cid_bytes = reconstruct_cid_from_prefix_and_data( + bytes(pb_block.prefix), bytes(pb_block.data) + ) + msg.blocks.append((cid_bytes, bytes(pb_block.data))) + + for pb_presence in proto.blockPresences: + msg.block_presences.append( + BlockPresence( + cid=bytes(pb_presence.cid), + type=BlockPresenceType(pb_presence.type), + ) + ) + + msg.pending_bytes = proto.pendingBytes + return msg From 5df7ca885f93907be42a5530ed3d4c74b849a384 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 14:01:05 +0530 Subject: [PATCH 08/37] Add comprehensive tests for Bitswap functionality - Introduced `test_block_service.py` to validate BlockService behavior including local hits, network fetches, auto-caching, and block storage. - Created `test_filesystem_blockstore.py` to manually test FilesystemBlockStore for basic operations, persistence, and directory structure. - Added `test_io_stream.py` to verify io.IOBase input support with chunk_stream and MerkleDag.add_stream functionalities. - Implemented `test_unixfs_encoding.py` to ensure add_file and add_bytes produce dag-pb leaf blocks and validate balanced layout tree structures. - Developed `test_wantlist.py` to test Wantlist and Message dataclasses, including backward compatibility and public API exports. --- tests/core/bitswap/test_block_service.py | 228 +++++++++++++++ .../bitswap/test_filesystem_blockstore.py | 189 ++++++++++++ tests/core/bitswap/test_io_stream.py | 269 ++++++++++++++++++ tests/core/bitswap/test_unixfs_encoding.py | 249 ++++++++++++++++ tests/core/bitswap/test_wantlist.py | 265 +++++++++++++++++ 5 files changed, 1200 insertions(+) create mode 100644 tests/core/bitswap/test_block_service.py create mode 100644 tests/core/bitswap/test_filesystem_blockstore.py create mode 100644 tests/core/bitswap/test_io_stream.py create mode 100644 tests/core/bitswap/test_unixfs_encoding.py create mode 100644 tests/core/bitswap/test_wantlist.py diff --git a/tests/core/bitswap/test_block_service.py b/tests/core/bitswap/test_block_service.py new file mode 100644 index 000000000..75a51a4b6 --- /dev/null +++ b/tests/core/bitswap/test_block_service.py @@ -0,0 +1,228 @@ +""" +Test BlockService — transparent local→network fallback with auto-caching. + +Run with: + python test_block_service.py +""" +import trio +from unittest.mock import AsyncMock, MagicMock, call + +from libp2p.bitswap.block_service import BlockService +from libp2p.bitswap.block_store import MemoryBlockStore +from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_text +from libp2p.bitswap.client import BitswapClient + + +def make_block(content: bytes): + cid = compute_cid_v1(content, codec=CODEC_RAW) + return cid, content + + +def ok(label): print(f" OK {label}") + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def make_service(network_blocks: dict = None): + """ + Build a BlockService with a real MemoryBlockStore and a mock BitswapClient. + network_blocks: cid_bytes -> data that the mock 'network' can return. + """ + store = MemoryBlockStore() + mock_bitswap = MagicMock(spec=BitswapClient) + mock_bitswap.block_store = store + network_blocks = network_blocks or {} + + async def fake_get_block(cid, peer_id=None, timeout=30.0): + return network_blocks.get(bytes(cid)) + + async def fake_add_block(cid, data): + pass # just accept it + + async def fake_get_blocks_batch(cids, peer_id=None, timeout=30.0, batch_size=32): + return {bytes(c): network_blocks[bytes(c)] + for c in cids if bytes(c) in network_blocks} + + mock_bitswap.get_block = AsyncMock(side_effect=fake_get_block) + mock_bitswap.add_block = AsyncMock(side_effect=fake_add_block) + mock_bitswap.get_blocks_batch = AsyncMock(side_effect=fake_get_blocks_batch) + + service = BlockService(store, mock_bitswap) + return service, store, mock_bitswap + + +# ── tests ───────────────────────────────────────────────────────────────────── + +async def test_local_hit_no_network(): + print("\n[1] Local hit — network is never called") + cid, data = make_block(b"already stored locally") + service, store, mock_bitswap = make_service() + + # Pre-populate local store + await store.put_block(cid, data) + + result = await service.get_block(cid) + assert result == data + ok("get_block returns local data") + + mock_bitswap.get_block.assert_not_called() + ok("network (bitswap.get_block) was NOT called") + + +async def test_local_miss_goes_to_network(): + print("\n[2] Local miss — fetches from network") + cid, data = make_block(b"only on the network") + service, store, mock_bitswap = make_service(network_blocks={bytes(cid): data}) + + result = await service.get_block(cid) + assert result == data + ok("get_block returns network data") + + mock_bitswap.get_block.assert_called_once() + ok("network (bitswap.get_block) was called exactly once") + + +async def test_auto_cache_after_network_fetch(): + print("\n[3] Auto-cache — network-fetched block stored locally") + cid, data = make_block(b"fetch and cache me") + service, store, mock_bitswap = make_service(network_blocks={bytes(cid): data}) + + # First call: local miss → network fetch → auto-cache + result1 = await service.get_block(cid) + assert result1 == data + + # Verify it's now in the local store + cached = await store.get_block(cid) + assert cached == data + ok("block is in local store after first network fetch") + + # Second call: must be a local hit, no second network call + result2 = await service.get_block(cid) + assert result2 == data + assert mock_bitswap.get_block.call_count == 1 # still only 1 network call + ok("second get_block is a local hit (network called only once total)") + + +async def test_put_block_stores_and_announces(): + print("\n[4] put_block — stores locally AND calls bitswap.add_block") + cid, data = make_block(b"new block to store") + service, store, mock_bitswap = make_service() + + await service.put_block(cid, data) + + # Must be in local store + cached = await store.get_block(cid) + assert cached == data + ok("block is in local store after put_block") + + # Must have called bitswap.add_block (announces to waiting peers) + mock_bitswap.add_block.assert_called_once() + ok("bitswap.add_block was called (peers notified)") + + +async def test_get_blocks_batch_local_hits_skip_network(): + print("\n[5] get_blocks_batch — local hits skip network") + blocks = [make_block(f"block {i}".encode()) for i in range(5)] + service, store, mock_bitswap = make_service() + + # Store all 5 locally + for cid, data in blocks: + await store.put_block(cid, data) + + cids = [cid for cid, _ in blocks] + results = await service.get_blocks_batch(cids) + + assert len(results) == 5 + ok("all 5 blocks returned from local store") + mock_bitswap.get_blocks_batch.assert_not_called() + ok("network batch fetch was NOT called") + + +async def test_get_blocks_batch_partial_local(): + print("\n[6] get_blocks_batch — partial local, rest from network") + local_blocks = [make_block(f"local {i}".encode()) for i in range(3)] + net_blocks = [make_block(f"remote {i}".encode()) for i in range(2)] + network_dict = {bytes(cid): data for cid, data in net_blocks} + + service, store, mock_bitswap = make_service(network_blocks=network_dict) + + # Store only local blocks + for cid, data in local_blocks: + await store.put_block(cid, data) + + all_cids = [cid for cid, _ in local_blocks + net_blocks] + results = await service.get_blocks_batch(all_cids) + + assert len(results) == 5 + ok("all 5 blocks returned (3 local + 2 network)") + mock_bitswap.get_blocks_batch.assert_called_once() + ok("network batch fetch called exactly once (only for 2 missing blocks)") + + # Network blocks must now be cached locally + for cid, data in net_blocks: + cached = await store.get_block(cid) + assert cached == data + ok("network-fetched blocks are now cached locally") + + +async def test_missing_block_returns_none(): + print("\n[7] get_block returns None when block not found anywhere") + cid, _ = make_block(b"this block does not exist") + service, store, mock_bitswap = make_service(network_blocks={}) # empty network + + result = await service.get_block(cid) + assert result is None + ok("get_block returns None for unknown block") + + +async def test_merkledag_uses_block_service(): + print("\n[8] MerkleDag.add_bytes routes through BlockService") + from libp2p.bitswap.dag import MerkleDag + from libp2p.bitswap.dag_pb import is_file_node + + service, store, mock_bitswap = make_service() + dag = MerkleDag(mock_bitswap, block_service=service) + + data = b"hello block service" * 100 + root_cid = await dag.add_bytes(data) + + # All blocks must be in the local store via BlockService + cached = await store.get_block(root_cid) + assert cached is not None + ok("root block is in local store via BlockService") + + # bitswap.add_block was called (for peer announcement) + assert mock_bitswap.add_block.called + ok("bitswap.add_block was called for peer announcement") + + # MerkleDag without BlockService still works (no regression) + service2, store2, mock_bitswap2 = make_service() + dag2 = MerkleDag(mock_bitswap2) # no block_service + root_cid2 = await dag2.add_bytes(data) + assert root_cid2 is not None + ok("MerkleDag without BlockService still works (no regression)") + + +# ── main ────────────────────────────────────────────────────────────────────── + +async def main(): + print("=" * 60) + print("BlockService — Test Suite") + print("=" * 60) + + await test_local_hit_no_network() + await test_local_miss_goes_to_network() + await test_auto_cache_after_network_fetch() + await test_put_block_stores_and_announces() + await test_get_blocks_batch_local_hits_skip_network() + await test_get_blocks_batch_partial_local() + await test_missing_block_returns_none() + await test_merkledag_uses_block_service() + + print("\n" + "=" * 60) + print("All tests passed!") + print("=" * 60) + + +if __name__ == "__main__": + trio.run(main) diff --git a/tests/core/bitswap/test_filesystem_blockstore.py b/tests/core/bitswap/test_filesystem_blockstore.py new file mode 100644 index 000000000..2bf1e2709 --- /dev/null +++ b/tests/core/bitswap/test_filesystem_blockstore.py @@ -0,0 +1,189 @@ +""" +Manual test for FilesystemBlockStore. + +Tests: + 1. Basic put/get/has/delete round-trip + 2. Persistence: blocks survive store re-creation (simulates process restart) + 3. get_all_cids: scans the directory tree and returns all stored CIDs + 4. Drop-in replacement: swapping MemoryBlockStore → FilesystemBlockStore + +Run with: + python test_filesystem_blockstore.py +""" + +import shutil +import tempfile +from pathlib import Path + +import trio + +from libp2p.bitswap.block_store import FilesystemBlockStore, MemoryBlockStore +from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_text + + +# ── helpers ────────────────────────────────────────────────────────────────── + +def make_block(content: bytes) -> tuple[bytes, bytes]: + """Return (cid_bytes, data) for a raw block.""" + cid = compute_cid_v1(content, codec=CODEC_RAW) + return cid, content + + +def pass_fail(label: str, ok: bool) -> None: + icon = "✅" if ok else "❌" + print(f" {icon} {label}") + if not ok: + raise AssertionError(f"FAILED: {label}") + + +# ── tests ───────────────────────────────────────────────────────────────────── + +async def test_basic_round_trip(store_path: str) -> None: + print("\n[1] Basic put / get / has / delete") + store = FilesystemBlockStore(store_path) + + cid, data = make_block(b"hello filesystem blockstore") + + # has_block → False before put + pass_fail("has_block returns False before put", + not await store.has_block(cid)) + + # put_block + await store.put_block(cid, data) + pass_fail("block file exists on disk after put", + store._cid_to_path(cid).exists()) + + # get_block + fetched = await store.get_block(cid) + pass_fail("get_block returns correct data", fetched == data) + + # has_block → True after put + pass_fail("has_block returns True after put", + await store.has_block(cid)) + + # delete_block + await store.delete_block(cid) + pass_fail("block file gone after delete", + not store._cid_to_path(cid).exists()) + pass_fail("get_block returns None after delete", + await store.get_block(cid) is None) + + +async def test_persistence(store_path: str) -> None: + print("\n[2] Persistence across store re-creation (simulates process restart)") + + # Write with first instance + store1 = FilesystemBlockStore(store_path) + cid1, data1 = make_block(b"block that should survive restart") + cid2, data2 = make_block(b"another persistent block") + await store1.put_block(cid1, data1) + await store1.put_block(cid2, data2) + pass_fail("2 blocks written by store1", + store1.size() == 2) + + # Create a brand-new store object pointing to the same path + # (simulates a process restart) + store2 = FilesystemBlockStore(store_path) + pass_fail("store2 sees block1 written by store1", + await store2.get_block(cid1) == data1) + pass_fail("store2 sees block2 written by store1", + await store2.get_block(cid2) == data2) + pass_fail("store2.size() == 2", + store2.size() == 2) + + print(f" Block directory: {store2.base_path()}") + print(f" CID1: {cid_to_text(cid1)}") + print(f" CID2: {cid_to_text(cid2)}") + + +async def test_get_all_cids(store_path: str) -> None: + print("\n[3] get_all_cids scans directory tree") + store = FilesystemBlockStore(store_path) + + blocks = [make_block(f"block {i}".encode()) for i in range(5)] + for cid, data in blocks: + await store.put_block(cid, data) + + all_cids = store.get_all_cids() + pass_fail(f"get_all_cids returns {len(blocks)} CIDs", + len(all_cids) == len(blocks)) + + stored_set = {bytes(c) for c in all_cids} + for cid, _ in blocks: + pass_fail(f"CID {cid_to_text(cid)[:20]}... is in get_all_cids", + bytes(cid) in stored_set) + + +async def test_get_missing_returns_none(store_path: str) -> None: + print("\n[4] get_block returns None for missing CID") + store = FilesystemBlockStore(store_path) + cid, _ = make_block(b"this block was never stored") + result = await store.get_block(cid) + pass_fail("get_block returns None for unknown CID", result is None) + + +async def test_drop_in_for_memory_store(store_path: str) -> None: + print("\n[5] Drop-in replacement for MemoryBlockStore") + + async def use_store(store) -> bytes: + """Same code works for both store types.""" + cid, data = make_block(b"drop-in replacement test") + await store.put_block(cid, data) + return await store.get_block(cid) + + mem_result = await use_store(MemoryBlockStore()) + fs_result = await use_store(FilesystemBlockStore(store_path)) + + pass_fail("MemoryBlockStore and FilesystemBlockStore return same data", + mem_result == fs_result) + + +async def test_directory_structure(store_path: str) -> None: + print("\n[6] 2-char prefix directory structure") + store = FilesystemBlockStore(store_path) + cid, data = make_block(b"check directory layout") + await store.put_block(cid, data) + + cid_str = cid_to_text(cid) + expected_dir = Path(store_path) / cid_str[:2] + expected_file = expected_dir / cid_str[2:] + + pass_fail(f"2-char prefix dir '{cid_str[:2]}' exists", + expected_dir.is_dir()) + pass_fail(f"block file '{cid_str[2:8]}...' exists inside prefix dir", + expected_file.exists()) + pass_fail("file contents match original data", + expected_file.read_bytes() == data) + + print(f" Path: {expected_file}") + + +# ── main ────────────────────────────────────────────────────────────────────── + +async def main() -> None: + print("=" * 60) + print("FilesystemBlockStore — Manual Test Suite") + print("=" * 60) + + # Each test gets its own temp directory so they don't interfere + dirs = [tempfile.mkdtemp(prefix="fs_blockstore_test_") for _ in range(6)] + + try: + await test_basic_round_trip(dirs[0]) + await test_persistence(dirs[1]) + await test_get_all_cids(dirs[2]) + await test_get_missing_returns_none(dirs[3]) + await test_drop_in_for_memory_store(dirs[4]) + await test_directory_structure(dirs[5]) + + print("\n" + "=" * 60) + print("✅ All tests passed!") + print("=" * 60) + + finally: + for d in dirs: + shutil.rmtree(d, ignore_errors=True) + + +if __name__ == "__main__": + trio.run(main) diff --git a/tests/core/bitswap/test_io_stream.py b/tests/core/bitswap/test_io_stream.py new file mode 100644 index 000000000..18386c87b --- /dev/null +++ b/tests/core/bitswap/test_io_stream.py @@ -0,0 +1,269 @@ +""" +Test io.IOBase input support — chunk_stream() and MerkleDag.add_stream(). + +Run with: + python test_io_stream.py +""" +import gzip +import io +import os +import tempfile + +import trio + +from libp2p.bitswap.block_store import MemoryBlockStore +from libp2p.bitswap.chunker import chunk_stream, DEFAULT_CHUNK_SIZE +from libp2p.bitswap.cid import compute_cid_v1, CODEC_DAG_PB, cid_to_text +from libp2p.bitswap.dag_pb import decode_dag_pb, is_file_node + + +def ok(label): print(f" OK {label}") + + +# ── 1. chunk_stream basics ──────────────────────────────────────────────────── + +def test_chunk_stream_bytesio(): + print("\n[1] chunk_stream — BytesIO") + data = b"x" * (DEFAULT_CHUNK_SIZE * 3 + 100) # 3 full + 1 partial chunk + chunks = list(chunk_stream(io.BytesIO(data), DEFAULT_CHUNK_SIZE)) + assert len(chunks) == 4 + assert b"".join(chunks) == data + assert len(chunks[0]) == DEFAULT_CHUNK_SIZE + assert len(chunks[-1]) == 100 + ok(f"4 chunks, sizes: {[len(c) for c in chunks]}") + + +def test_chunk_stream_empty(): + print("\n[2] chunk_stream — empty stream yields nothing") + chunks = list(chunk_stream(io.BytesIO(b""))) + assert chunks == [] + ok("empty stream yields no chunks") + + +def test_chunk_stream_file_handle(): + print("\n[3] chunk_stream — real file handle") + data = b"file handle test " * 5000 + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(data) + tmp = f.name + try: + with open(tmp, "rb") as fh: + chunks = list(chunk_stream(fh)) + assert b"".join(chunks) == data + ok(f"file handle: {len(chunks)} chunks, {len(data)} bytes total") + finally: + os.unlink(tmp) + + +def test_chunk_stream_gzip(): + print("\n[4] chunk_stream — gzip stream (decompress on-the-fly)") + original = b"compressed data " * 10000 + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(original) + buf.seek(0) + + with gzip.GzipFile(fileobj=buf, mode="rb") as gz: + chunks = list(chunk_stream(gz)) + + assert b"".join(chunks) == original + ok(f"gzip stream: {len(chunks)} chunks, {len(original)} bytes decompressed") + + +def test_chunk_stream_matches_chunk_bytes(): + print("\n[5] chunk_stream produces same chunks as chunk_bytes") + from libp2p.bitswap.chunker import chunk_bytes + data = os.urandom(DEFAULT_CHUNK_SIZE * 5 + 777) + stream_chunks = list(chunk_stream(io.BytesIO(data))) + bytes_chunks = chunk_bytes(data) + assert stream_chunks == bytes_chunks + ok(f"chunk_stream == chunk_bytes for {len(data)} bytes of random data") + + +# ── 2. MerkleDag.add_stream ─────────────────────────────────────────────────── + +async def test_add_stream_bytesio(): + print("\n[6] add_stream — BytesIO produces same CID as add_bytes") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + store = MemoryBlockStore() + mock = MagicMock(spec=BitswapClient) + mock.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block(cid, data): stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) + + dag = MerkleDag(mock) + data = b"same content " * 5000 + + cid_bytes = await dag.add_bytes(data) + stored.clear() + cid_stream = await dag.add_stream(io.BytesIO(data)) + + assert bytes(cid_bytes) == bytes(cid_stream), ( + f"CIDs differ:\n add_bytes: {cid_to_text(cid_bytes)}\n" + f" add_stream: {cid_to_text(cid_stream)}" + ) + ok(f"add_stream CID == add_bytes CID: {cid_to_text(cid_stream)[:30]}...") + + +async def test_add_stream_empty(): + print("\n[7] add_stream — empty stream stores single empty leaf") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + store = MemoryBlockStore() + mock = MagicMock(spec=BitswapClient) + mock.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block(cid, data): stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) + + dag = MerkleDag(mock) + root_cid = await dag.add_stream(io.BytesIO(b"")) + + assert len(stored) == 1 + block = list(stored.values())[0] + assert is_file_node(block) + _, unixfs = decode_dag_pb(block) + assert unixfs.filesize == 0 + ok("empty stream → 1 empty dag-pb leaf block stored") + + +async def test_add_stream_single_chunk(): + print("\n[8] add_stream — single chunk returns leaf CID directly (no root node)") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + store = MemoryBlockStore() + mock = MagicMock(spec=BitswapClient) + mock.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block(cid, data): stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) + + dag = MerkleDag(mock) + data = b"small enough to be one chunk" + root_cid = await dag.add_stream(io.BytesIO(data)) + + assert len(stored) == 1, f"expected 1 block, got {len(stored)}" + block = stored[bytes(root_cid)] + _, unixfs = decode_dag_pb(block) + assert unixfs.data == data + ok("single chunk: leaf CID returned directly, inline data correct") + + +async def test_add_stream_gzip(): + print("\n[9] add_stream — gzip stream decompresses and adds correctly") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + original = b"gzip content " * 20000 # ~260 KB — 2 chunks after decompress + + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(original) + compressed_size = buf.tell() + buf.seek(0) + + store = MemoryBlockStore() + mock = MagicMock(spec=BitswapClient) + mock.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block(cid, data): stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) + + dag = MerkleDag(mock) + + with gzip.GzipFile(fileobj=buf, mode="rb") as gz: + root_cid = await dag.add_stream(gz) + + # Reassemble all leaf data + root_block = stored[bytes(root_cid)] + links, _ = decode_dag_pb(root_block) + reassembled = b"" + for link in links: + leaf = stored[bytes(link.cid)] + _, leaf_unixfs = decode_dag_pb(leaf) + reassembled += leaf_unixfs.data + + assert reassembled == original + ok(f"gzip stream: {compressed_size} compressed → {len(original)} bytes added " + f"in {len(links)} chunks") + + +async def test_add_stream_vs_add_file_same_cid(): + print("\n[10] add_stream(open(f)) produces same CID as add_file(path)") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + data = b"compare stream vs file " * 8000 # ~176 KB, 3 chunks + + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(data) + tmp = f.name + + try: + def make_dag(): + store = MemoryBlockStore() + mock = MagicMock(spec=BitswapClient) + mock.block_store = store + stored = {} + async def add_block(cid, d): stored[bytes(cid)] = d + mock.add_block = AsyncMock(side_effect=add_block) + return MerkleDag(mock) + + dag1 = make_dag() + cid_file = await dag1.add_file(tmp, wrap_with_directory=False) + + dag2 = make_dag() + with open(tmp, "rb") as fh: + cid_stream = await dag2.add_stream(fh) + + assert bytes(cid_file) == bytes(cid_stream), ( + f"CIDs differ:\n add_file: {cid_to_text(cid_file)}\n" + f" add_stream: {cid_to_text(cid_stream)}" + ) + ok(f"add_file == add_stream CID: {cid_to_text(cid_file)[:30]}...") + finally: + os.unlink(tmp) + + +# ── main ────────────────────────────────────────────────────────────────────── + +async def main(): + print("=" * 60) + print("io.IOBase Input Support — Test Suite") + print("=" * 60) + + # sync tests + test_chunk_stream_bytesio() + test_chunk_stream_empty() + test_chunk_stream_file_handle() + test_chunk_stream_gzip() + test_chunk_stream_matches_chunk_bytes() + + # async tests + await test_add_stream_bytesio() + await test_add_stream_empty() + await test_add_stream_single_chunk() + await test_add_stream_gzip() + await test_add_stream_vs_add_file_same_cid() + + print("\n" + "=" * 60) + print("All tests passed!") + print("=" * 60) + + +if __name__ == "__main__": + trio.run(main) diff --git a/tests/core/bitswap/test_unixfs_encoding.py b/tests/core/bitswap/test_unixfs_encoding.py new file mode 100644 index 000000000..355590da7 --- /dev/null +++ b/tests/core/bitswap/test_unixfs_encoding.py @@ -0,0 +1,249 @@ +""" +Test that add_file / add_bytes now produce dag-pb leaf blocks (UnixFS-wrapped) +and that balanced_layout builds the correct tree structure. + +Run with: + python test_unixfs_encoding.py +""" +import hashlib +import tempfile +import os + +import trio + +from libp2p.bitswap.block_store import MemoryBlockStore +from libp2p.bitswap.cid import compute_cid_v1, CODEC_DAG_PB, CODEC_RAW, cid_to_text +from libp2p.bitswap.dag_pb import ( + create_leaf_node, + balanced_layout, + decode_dag_pb, + is_file_node, + UnixFSData, + Link, + encode_dag_pb, + MAX_LINKS_PER_NODE, +) + + +def ok(label): + print(f" OK {label}") + + +def fail(label, detail=""): + raise AssertionError(f"FAIL {label} {detail}") + + +# ── 1. create_leaf_node wraps data in dag-pb + UnixFS ──────────────────────── +def test_create_leaf_node(): + print("\n[1] create_leaf_node") + data = b"hello leaf" + leaf = create_leaf_node(data) + + # Must be a valid dag-pb file node + assert is_file_node(leaf), "leaf must be a dag-pb file node" + ok("create_leaf_node produces a dag-pb file node") + + # Decode and check inline data + links, unixfs = decode_dag_pb(leaf) + assert links == [], "leaf must have no links" + assert unixfs is not None + assert unixfs.data == data, f"inline data mismatch: {unixfs.data!r} != {data!r}" + assert unixfs.filesize == len(data) + ok(f"leaf contains inline data ({len(data)} bytes), filesize={unixfs.filesize}") + + # CID must be dag-pb, not raw + cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) + raw_cid = compute_cid_v1(data, codec=CODEC_RAW) + assert bytes(cid) != bytes(raw_cid), "dag-pb leaf CID must differ from raw CID" + ok(f"leaf CID is dag-pb (not raw): {cid_to_text(cid)[:30]}...") + + # Empty leaf + empty_leaf = create_leaf_node(b"") + _, empty_unixfs = decode_dag_pb(empty_leaf) + assert empty_unixfs.filesize == 0 + ok("empty leaf node is valid") + + +# ── 2. balanced_layout single leaf ─────────────────────────────────────────── +def test_balanced_layout_single(): + print("\n[2] balanced_layout — single leaf returns leaf unchanged") + data = b"only chunk" + leaf = create_leaf_node(data) + cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) + + root_cid, root_block = balanced_layout([(cid, leaf, len(data))]) + assert bytes(root_cid) == bytes(cid) + assert root_block == leaf + ok("single leaf: root_cid == leaf_cid") + + +# ── 3. balanced_layout two leaves ──────────────────────────────────────────── +def test_balanced_layout_two_leaves(): + print("\n[3] balanced_layout — two leaves builds one root") + leaves = [] + for i in range(2): + data = f"chunk {i}".encode() * 100 + leaf = create_leaf_node(data) + cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) + leaves.append((cid, leaf, len(data))) + + root_cid, root_block = balanced_layout(leaves) + + # Root must be a dag-pb file node with 2 links + assert is_file_node(root_block) + links, unixfs = decode_dag_pb(root_block) + assert len(links) == 2, f"expected 2 links, got {len(links)}" + assert unixfs.filesize == sum(s for _, _, s in leaves) + assert len(unixfs.blocksizes) == 2 + ok(f"root has 2 links, filesize={unixfs.filesize}, blocksizes={unixfs.blocksizes}") + + +# ── 4. balanced_layout 175 leaves builds 2-level tree ──────────────────────── +def test_balanced_layout_two_levels(): + print("\n[4] balanced_layout — 175 leaves builds 2-level tree (174 + 1)") + n = MAX_LINKS_PER_NODE + 1 # 175 + chunk_size = 100 + leaves = [] + for i in range(n): + data = bytes([i % 256]) * chunk_size + leaf = create_leaf_node(data) + cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) + leaves.append((cid, leaf, chunk_size)) + + root_cid, root_block = balanced_layout(leaves) + links, unixfs = decode_dag_pb(root_block) + + # Root should link to 2 internal nodes (174 + 1) + assert len(links) == 2, f"expected 2 top-level links, got {len(links)}" + assert unixfs.filesize == n * chunk_size + ok(f"175 leaves → root has 2 links (174-leaf node + 1-leaf node)") + ok(f"root filesize = {unixfs.filesize} = 175 * {chunk_size}") + + +# ── 5. balanced_layout 174 leaves stays flat ───────────────────────────────── +def test_balanced_layout_flat(): + print("\n[5] balanced_layout — exactly 174 leaves stays flat (1 level)") + n = MAX_LINKS_PER_NODE # 174 + leaves = [] + for i in range(n): + data = bytes([i % 256]) * 50 + leaf = create_leaf_node(data) + cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) + leaves.append((cid, leaf, 50)) + + root_cid, root_block = balanced_layout(leaves) + links, unixfs = decode_dag_pb(root_block) + + assert len(links) == 174, f"expected 174 direct links, got {len(links)}" + ok(f"174 leaves → flat root with 174 direct links") + + +# ── 6. add_file produces dag-pb leaves (not raw) via MerkleDag ─────────────── +async def test_add_file_produces_dag_pb_leaves(): + print("\n[6] MerkleDag.add_file produces dag-pb leaf blocks") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + store = MemoryBlockStore() + mock_client = MagicMock(spec=BitswapClient) + mock_client.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block_impl(cid, data): + stored[bytes(cid)] = data + mock_client.add_block = AsyncMock(side_effect=add_block_impl) + + dag = MerkleDag(mock_client) + + # Write a 3-chunk file + chunk_size = 63 * 1024 + content = b"x" * (chunk_size * 3 - 7) # 3 chunks + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(content) + tmp = f.name + + try: + root_cid = await dag.add_file(tmp, wrap_with_directory=False) + finally: + os.unlink(tmp) + + # Every stored block must be a dag-pb file node (no raw blocks) + raw_blocks = [] + for cid_bytes, block_data in stored.items(): + if not is_file_node(block_data): + raw_blocks.append(cid_to_text(cid_bytes)[:20]) + + assert raw_blocks == [], f"Found non-dag-pb blocks: {raw_blocks}" + ok(f"All {len(stored)} stored blocks are dag-pb file nodes (no raw blocks)") + + # Root must link to 3 leaves + root_block = stored[bytes(root_cid)] + links, unixfs = decode_dag_pb(root_block) + assert len(links) == 3, f"expected 3 links on root, got {len(links)}" + assert unixfs.filesize == len(content) + ok(f"root has 3 links, filesize={unixfs.filesize}") + + # Each leaf must contain inline UnixFS data + for link in links: + leaf_block = stored[bytes(link.cid)] + leaf_links, leaf_unixfs = decode_dag_pb(leaf_block) + assert leaf_links == [], "leaf must have no links" + assert leaf_unixfs is not None and leaf_unixfs.data != b"" + ok("each leaf contains inline UnixFS data") + + +# ── 7. add_bytes produces dag-pb leaves ────────────────────────────────────── +async def test_add_bytes_produces_dag_pb_leaves(): + print("\n[7] MerkleDag.add_bytes produces dag-pb leaf blocks") + from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient + from libp2p.bitswap.dag import MerkleDag + + store = MemoryBlockStore() + mock_client = MagicMock(spec=BitswapClient) + mock_client.block_store = store + stored: dict[bytes, bytes] = {} + + async def add_block_impl(cid, data): + stored[bytes(cid)] = data + mock_client.add_block = AsyncMock(side_effect=add_block_impl) + + dag = MerkleDag(mock_client) + content = b"y" * (63 * 1024 * 2 + 500) # 3 chunks + root_cid = await dag.add_bytes(content) + + raw_blocks = [ + cid_to_text(c)[:20] for c, d in stored.items() if not is_file_node(d) + ] + assert raw_blocks == [], f"Found non-dag-pb blocks: {raw_blocks}" + ok(f"All {len(stored)} stored blocks are dag-pb file nodes") + + root_block = stored[bytes(root_cid)] + links, unixfs = decode_dag_pb(root_block) + assert len(links) == 3 + assert unixfs.filesize == len(content) + ok(f"root has 3 links, filesize={unixfs.filesize}") + + +# ── main ────────────────────────────────────────────────────────────────────── +async def main(): + print("=" * 60) + print("UnixFSFile / Balanced DAG — Test Suite") + print("=" * 60) + + test_create_leaf_node() + test_balanced_layout_single() + test_balanced_layout_two_leaves() + test_balanced_layout_two_levels() + test_balanced_layout_flat() + await test_add_file_produces_dag_pb_leaves() + await test_add_bytes_produces_dag_pb_leaves() + + print("\n" + "=" * 60) + print("All tests passed!") + print("=" * 60) + + +if __name__ == "__main__": + trio.run(main) diff --git a/tests/core/bitswap/test_wantlist.py b/tests/core/bitswap/test_wantlist.py new file mode 100644 index 000000000..40232560f --- /dev/null +++ b/tests/core/bitswap/test_wantlist.py @@ -0,0 +1,265 @@ +""" +Test Wantlist / Message dataclasses. + +Run with: + python test_wantlist.py +""" +from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_bytes +from libp2p.bitswap.wantlist import ( + WantType, BlockPresenceType, + WantlistEntry, Wantlist, + BlockPresence, BitswapMessage, +) +from libp2p.bitswap.messages import create_wantlist_entry + + +def make_cid(content: bytes) -> bytes: + return cid_to_bytes(compute_cid_v1(content, codec=CODEC_RAW)) + + +def ok(label): print(f" OK {label}") + + +# ── WantType enum ───────────────────────────────────────────────────────────── + +def test_want_type_values(): + print("\n[1] WantType enum values match protobuf") + assert WantType.Block.value == 0 + assert WantType.Have.value == 1 + ok("WantType.Block == 0, WantType.Have == 1") + + +# ── WantlistEntry ───────────────────────────────────────────────────────────── + +def test_wantlist_entry_from_cid(): + print("\n[2] WantlistEntry.from_cid normalises any CIDInput") + cid = compute_cid_v1(b"entry test", codec=CODEC_RAW) + cid_bytes = cid_to_bytes(cid) + + # from bytes + e1 = WantlistEntry.from_cid(cid_bytes) + assert e1.cid == cid_bytes + assert e1.want_type == WantType.Block + assert e1.priority == 1 + assert not e1.cancel + ok("from bytes — defaults correct") + + # from CIDObject + e2 = WantlistEntry.from_cid(cid, want_type=WantType.Have, send_dont_have=True) + assert e2.want_type == WantType.Have + assert e2.send_dont_have + ok("from CIDObject — WantType.Have, send_dont_have=True") + + # cancel entry + e3 = WantlistEntry.from_cid(cid_bytes, cancel=True) + assert e3.cancel + ok("cancel entry") + + +# ── Wantlist ────────────────────────────────────────────────────────────────── + +def test_wantlist_add_cancel_contains(): + print("\n[3] Wantlist.add / cancel / contains") + cid1 = make_cid(b"block 1") + cid2 = make_cid(b"block 2") + cid3 = make_cid(b"block 3") + + wl = Wantlist() + assert len(wl) == 0 + assert not wl + + wl.add(cid1, want_type=WantType.Block, send_dont_have=True) + wl.add(cid2, want_type=WantType.Have) + wl.cancel(cid3) + + assert len(wl) == 3 + assert bool(wl) + ok("len(wl) == 3 after 2 adds + 1 cancel") + + assert wl.contains(cid1) + assert wl.contains(cid2) + assert not wl.contains(cid3) # cancel entry → not "contained" + ok("contains() returns True for non-cancel entries only") + + # Check entry fields + e1 = wl.entries[0] + assert e1.want_type == WantType.Block + assert e1.send_dont_have + e2 = wl.entries[1] + assert e2.want_type == WantType.Have + e3 = wl.entries[2] + assert e3.cancel + ok("entry fields correct (want_type, send_dont_have, cancel)") + + +def test_wantlist_full_flag(): + print("\n[4] Wantlist.full flag") + wl = Wantlist(full=True) + assert wl.full + ok("full=True preserved") + + +# ── BlockPresence ───────────────────────────────────────────────────────────── + +def test_block_presence(): + print("\n[5] BlockPresence constructors") + cid = make_cid(b"presence test") + + have = BlockPresence.have(cid) + assert have.cid == cid + assert have.type == BlockPresenceType.Have + ok("BlockPresence.have()") + + dont = BlockPresence.dont_have(cid) + assert dont.cid == cid + assert dont.type == BlockPresenceType.DontHave + ok("BlockPresence.dont_have()") + + assert BlockPresenceType.Have.value == 0 + assert BlockPresenceType.DontHave.value == 1 + ok("BlockPresenceType values match protobuf (Have=0, DontHave=1)") + + +# ── BitswapMessage ──────────────────────────────────────────────────────────── + +def test_bitswap_message_properties(): + print("\n[6] BitswapMessage builder + properties") + cid1 = make_cid(b"want me") + cid2 = make_cid(b"block data") + cid3 = make_cid(b"i have this") + cid4 = make_cid(b"i dont have this") + data = b"actual block content" + + msg = BitswapMessage() + assert not msg.is_want + assert not msg.has_blocks + assert not msg.has_presences + + msg.add_want(cid1, want_type=WantType.Block, send_dont_have=True) + assert msg.is_want + ok("is_want True after add_want()") + + msg.add_block(cid2, data) + assert msg.has_blocks + assert msg.blocks[0] == (cid2, data) + ok("has_blocks True after add_block()") + + msg.add_have(cid3) + msg.add_dont_have(cid4) + assert msg.has_presences + assert len(msg.block_presences) == 2 + assert msg.block_presences[0].type == BlockPresenceType.Have + assert msg.block_presences[1].type == BlockPresenceType.DontHave + ok("has_presences True, HAVE and DONT_HAVE entries correct") + + +def test_bitswap_message_cancel_want(): + print("\n[7] BitswapMessage.cancel_want()") + cid = make_cid(b"cancel me") + msg = BitswapMessage() + msg.cancel_want(cid) + assert msg.is_want + assert msg.wantlist.entries[0].cancel + ok("cancel_want() adds cancel entry") + + +# ── to_proto / from_proto round-trip ───────────────────────────────────────── + +def test_to_proto_from_proto_roundtrip(): + print("\n[8] BitswapMessage to_proto() / from_proto() round-trip") + cid1 = make_cid(b"want block") + cid2 = make_cid(b"block payload") + cid3 = make_cid(b"have this") + data = b"block payload data" + + original = BitswapMessage() + original.add_want(cid1, want_type=WantType.Block, send_dont_have=True) + original.add_block(cid2, data) + original.add_have(cid3) + original.add_dont_have(make_cid(b"dont have")) + + proto = original.to_proto() + restored = BitswapMessage.from_proto(proto) + + # Wantlist + assert restored.wantlist is not None + assert len(restored.wantlist.entries) == 1 + e = restored.wantlist.entries[0] + assert e.cid == cid1 + assert e.want_type == WantType.Block + assert e.send_dont_have + ok("wantlist entry round-trips correctly") + + # Block payload + assert len(restored.blocks) == 1 + restored_cid, restored_data = restored.blocks[0] + assert restored_data == data + ok("block payload round-trips correctly") + + # Block presences + assert len(restored.block_presences) == 2 + assert restored.block_presences[0].type == BlockPresenceType.Have + assert restored.block_presences[1].type == BlockPresenceType.DontHave + ok("block presences round-trip correctly") + + +# ── backward compat: create_wantlist_entry accepts int OR WantType ──────────── + +def test_create_wantlist_entry_backward_compat(): + print("\n[9] create_wantlist_entry — backward compat (int OR WantType)") + cid = make_cid(b"compat test") + + # Old style: raw int + e_int = create_wantlist_entry(cid, want_type=0) + assert e_int.wantType == 0 + ok("want_type=0 (int) still works") + + e_int2 = create_wantlist_entry(cid, want_type=1) + assert e_int2.wantType == 1 + ok("want_type=1 (int) still works") + + # New style: WantType enum + e_enum = create_wantlist_entry(cid, want_type=WantType.Block) + assert e_enum.wantType == 0 + ok("want_type=WantType.Block works") + + e_enum2 = create_wantlist_entry(cid, want_type=WantType.Have) + assert e_enum2.wantType == 1 + ok("want_type=WantType.Have works") + + +# ── public API exports ──────────────────────────────────────────────────────── + +def test_public_exports(): + print("\n[10] All types exported from libp2p.bitswap") + from libp2p.bitswap import ( + WantType, WantlistEntry, Wantlist, + BlockPresence, BlockPresenceType, BitswapMessage, + ) + assert WantType.Block.value == 0 + assert WantType.Have.value == 1 + ok("WantType, WantlistEntry, Wantlist, BlockPresence, BlockPresenceType, " + "BitswapMessage all importable from libp2p.bitswap") + + +# ── main ────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + print("=" * 60) + print("Wantlist / Message Dataclasses — Test Suite") + print("=" * 60) + + test_want_type_values() + test_wantlist_entry_from_cid() + test_wantlist_add_cancel_contains() + test_wantlist_full_flag() + test_block_presence() + test_bitswap_message_properties() + test_bitswap_message_cancel_want() + test_to_proto_from_proto_roundtrip() + test_create_wantlist_entry_backward_compat() + test_public_exports() + + print("\n" + "=" * 60) + print("All tests passed!") + print("=" * 60) From 58719a78ba64fd512991937d190a90bc45d748b5 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 14:20:20 +0530 Subject: [PATCH 09/37] refactor: clean up imports and improve code formatting across multiple test files --- libp2p/bitswap/wantlist.py | 1 - tests/core/bitswap/test_block_service.py | 22 ++++-- .../bitswap/test_filesystem_blockstore.py | 70 +++++++++---------- tests/core/bitswap/test_io_stream.py | 57 ++++++++++----- tests/core/bitswap/test_unixfs_encoding.py | 25 ++++--- tests/core/bitswap/test_wantlist.py | 37 +++++++--- 6 files changed, 129 insertions(+), 83 deletions(-) diff --git a/libp2p/bitswap/wantlist.py b/libp2p/bitswap/wantlist.py index 30ba5f228..8c3f80519 100644 --- a/libp2p/bitswap/wantlist.py +++ b/libp2p/bitswap/wantlist.py @@ -29,7 +29,6 @@ from dataclasses import dataclass, field from enum import Enum -from typing import List, Optional from .cid import CIDInput, cid_to_bytes from .pb.bitswap_pb2 import Message as PBMessage diff --git a/tests/core/bitswap/test_block_service.py b/tests/core/bitswap/test_block_service.py index 75a51a4b6..a7a02f7dc 100644 --- a/tests/core/bitswap/test_block_service.py +++ b/tests/core/bitswap/test_block_service.py @@ -4,12 +4,14 @@ Run with: python test_block_service.py """ + +from unittest.mock import AsyncMock, MagicMock + import trio -from unittest.mock import AsyncMock, MagicMock, call from libp2p.bitswap.block_service import BlockService from libp2p.bitswap.block_store import MemoryBlockStore -from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_text +from libp2p.bitswap.cid import CODEC_RAW, compute_cid_v1 from libp2p.bitswap.client import BitswapClient @@ -18,11 +20,13 @@ def make_block(content: bytes): return cid, content -def ok(label): print(f" OK {label}") +def ok(label): + print(f" OK {label}") # ── helpers ─────────────────────────────────────────────────────────────────── + def make_service(network_blocks: dict = None): """ Build a BlockService with a real MemoryBlockStore and a mock BitswapClient. @@ -40,8 +44,11 @@ async def fake_add_block(cid, data): pass # just accept it async def fake_get_blocks_batch(cids, peer_id=None, timeout=30.0, batch_size=32): - return {bytes(c): network_blocks[bytes(c)] - for c in cids if bytes(c) in network_blocks} + return { + bytes(c): network_blocks[bytes(c)] + for c in cids + if bytes(c) in network_blocks + } mock_bitswap.get_block = AsyncMock(side_effect=fake_get_block) mock_bitswap.add_block = AsyncMock(side_effect=fake_add_block) @@ -53,6 +60,7 @@ async def fake_get_blocks_batch(cids, peer_id=None, timeout=30.0, batch_size=32) # ── tests ───────────────────────────────────────────────────────────────────── + async def test_local_hit_no_network(): print("\n[1] Local hit — network is never called") cid, data = make_block(b"already stored locally") @@ -141,7 +149,7 @@ async def test_get_blocks_batch_local_hits_skip_network(): async def test_get_blocks_batch_partial_local(): print("\n[6] get_blocks_batch — partial local, rest from network") local_blocks = [make_block(f"local {i}".encode()) for i in range(3)] - net_blocks = [make_block(f"remote {i}".encode()) for i in range(2)] + net_blocks = [make_block(f"remote {i}".encode()) for i in range(2)] network_dict = {bytes(cid): data for cid, data in net_blocks} service, store, mock_bitswap = make_service(network_blocks=network_dict) @@ -178,7 +186,6 @@ async def test_missing_block_returns_none(): async def test_merkledag_uses_block_service(): print("\n[8] MerkleDag.add_bytes routes through BlockService") from libp2p.bitswap.dag import MerkleDag - from libp2p.bitswap.dag_pb import is_file_node service, store, mock_bitswap = make_service() dag = MerkleDag(mock_bitswap, block_service=service) @@ -205,6 +212,7 @@ async def test_merkledag_uses_block_service(): # ── main ────────────────────────────────────────────────────────────────────── + async def main(): print("=" * 60) print("BlockService — Test Suite") diff --git a/tests/core/bitswap/test_filesystem_blockstore.py b/tests/core/bitswap/test_filesystem_blockstore.py index 2bf1e2709..ffb51a9f2 100644 --- a/tests/core/bitswap/test_filesystem_blockstore.py +++ b/tests/core/bitswap/test_filesystem_blockstore.py @@ -11,18 +11,18 @@ python test_filesystem_blockstore.py """ +from pathlib import Path import shutil import tempfile -from pathlib import Path import trio from libp2p.bitswap.block_store import FilesystemBlockStore, MemoryBlockStore -from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_text - +from libp2p.bitswap.cid import CODEC_RAW, cid_to_text, compute_cid_v1 # ── helpers ────────────────────────────────────────────────────────────────── + def make_block(content: bytes) -> tuple[bytes, bytes]: """Return (cid_bytes, data) for a raw block.""" cid = compute_cid_v1(content, codec=CODEC_RAW) @@ -38,6 +38,7 @@ def pass_fail(label: str, ok: bool) -> None: # ── tests ───────────────────────────────────────────────────────────────────── + async def test_basic_round_trip(store_path: str) -> None: print("\n[1] Basic put / get / has / delete") store = FilesystemBlockStore(store_path) @@ -45,28 +46,23 @@ async def test_basic_round_trip(store_path: str) -> None: cid, data = make_block(b"hello filesystem blockstore") # has_block → False before put - pass_fail("has_block returns False before put", - not await store.has_block(cid)) + pass_fail("has_block returns False before put", not await store.has_block(cid)) # put_block await store.put_block(cid, data) - pass_fail("block file exists on disk after put", - store._cid_to_path(cid).exists()) + pass_fail("block file exists on disk after put", store._cid_to_path(cid).exists()) # get_block fetched = await store.get_block(cid) pass_fail("get_block returns correct data", fetched == data) # has_block → True after put - pass_fail("has_block returns True after put", - await store.has_block(cid)) + pass_fail("has_block returns True after put", await store.has_block(cid)) # delete_block await store.delete_block(cid) - pass_fail("block file gone after delete", - not store._cid_to_path(cid).exists()) - pass_fail("get_block returns None after delete", - await store.get_block(cid) is None) + pass_fail("block file gone after delete", not store._cid_to_path(cid).exists()) + pass_fail("get_block returns None after delete", await store.get_block(cid) is None) async def test_persistence(store_path: str) -> None: @@ -78,18 +74,18 @@ async def test_persistence(store_path: str) -> None: cid2, data2 = make_block(b"another persistent block") await store1.put_block(cid1, data1) await store1.put_block(cid2, data2) - pass_fail("2 blocks written by store1", - store1.size() == 2) + pass_fail("2 blocks written by store1", store1.size() == 2) # Create a brand-new store object pointing to the same path # (simulates a process restart) store2 = FilesystemBlockStore(store_path) - pass_fail("store2 sees block1 written by store1", - await store2.get_block(cid1) == data1) - pass_fail("store2 sees block2 written by store1", - await store2.get_block(cid2) == data2) - pass_fail("store2.size() == 2", - store2.size() == 2) + pass_fail( + "store2 sees block1 written by store1", await store2.get_block(cid1) == data1 + ) + pass_fail( + "store2 sees block2 written by store1", await store2.get_block(cid2) == data2 + ) + pass_fail("store2.size() == 2", store2.size() == 2) print(f" Block directory: {store2.base_path()}") print(f" CID1: {cid_to_text(cid1)}") @@ -105,13 +101,14 @@ async def test_get_all_cids(store_path: str) -> None: await store.put_block(cid, data) all_cids = store.get_all_cids() - pass_fail(f"get_all_cids returns {len(blocks)} CIDs", - len(all_cids) == len(blocks)) + pass_fail(f"get_all_cids returns {len(blocks)} CIDs", len(all_cids) == len(blocks)) stored_set = {bytes(c) for c in all_cids} for cid, _ in blocks: - pass_fail(f"CID {cid_to_text(cid)[:20]}... is in get_all_cids", - bytes(cid) in stored_set) + pass_fail( + f"CID {cid_to_text(cid)[:20]}... is in get_all_cids", + bytes(cid) in stored_set, + ) async def test_get_missing_returns_none(store_path: str) -> None: @@ -132,10 +129,12 @@ async def use_store(store) -> bytes: return await store.get_block(cid) mem_result = await use_store(MemoryBlockStore()) - fs_result = await use_store(FilesystemBlockStore(store_path)) + fs_result = await use_store(FilesystemBlockStore(store_path)) - pass_fail("MemoryBlockStore and FilesystemBlockStore return same data", - mem_result == fs_result) + pass_fail( + "MemoryBlockStore and FilesystemBlockStore return same data", + mem_result == fs_result, + ) async def test_directory_structure(store_path: str) -> None: @@ -145,21 +144,22 @@ async def test_directory_structure(store_path: str) -> None: await store.put_block(cid, data) cid_str = cid_to_text(cid) - expected_dir = Path(store_path) / cid_str[:2] + expected_dir = Path(store_path) / cid_str[:2] expected_file = expected_dir / cid_str[2:] - pass_fail(f"2-char prefix dir '{cid_str[:2]}' exists", - expected_dir.is_dir()) - pass_fail(f"block file '{cid_str[2:8]}...' exists inside prefix dir", - expected_file.exists()) - pass_fail("file contents match original data", - expected_file.read_bytes() == data) + pass_fail(f"2-char prefix dir '{cid_str[:2]}' exists", expected_dir.is_dir()) + pass_fail( + f"block file '{cid_str[2:8]}...' exists inside prefix dir", + expected_file.exists(), + ) + pass_fail("file contents match original data", expected_file.read_bytes() == data) print(f" Path: {expected_file}") # ── main ────────────────────────────────────────────────────────────────────── + async def main() -> None: print("=" * 60) print("FilesystemBlockStore — Manual Test Suite") diff --git a/tests/core/bitswap/test_io_stream.py b/tests/core/bitswap/test_io_stream.py index 18386c87b..6a778cb50 100644 --- a/tests/core/bitswap/test_io_stream.py +++ b/tests/core/bitswap/test_io_stream.py @@ -4,6 +4,7 @@ Run with: python test_io_stream.py """ + import gzip import io import os @@ -12,19 +13,21 @@ import trio from libp2p.bitswap.block_store import MemoryBlockStore -from libp2p.bitswap.chunker import chunk_stream, DEFAULT_CHUNK_SIZE -from libp2p.bitswap.cid import compute_cid_v1, CODEC_DAG_PB, cid_to_text +from libp2p.bitswap.chunker import DEFAULT_CHUNK_SIZE, chunk_stream +from libp2p.bitswap.cid import cid_to_text from libp2p.bitswap.dag_pb import decode_dag_pb, is_file_node -def ok(label): print(f" OK {label}") +def ok(label): + print(f" OK {label}") # ── 1. chunk_stream basics ──────────────────────────────────────────────────── + def test_chunk_stream_bytesio(): print("\n[1] chunk_stream — BytesIO") - data = b"x" * (DEFAULT_CHUNK_SIZE * 3 + 100) # 3 full + 1 partial chunk + data = b"x" * (DEFAULT_CHUNK_SIZE * 3 + 100) # 3 full + 1 partial chunk chunks = list(chunk_stream(io.BytesIO(data), DEFAULT_CHUNK_SIZE)) assert len(chunks) == 4 assert b"".join(chunks) == data @@ -73,18 +76,21 @@ def test_chunk_stream_gzip(): def test_chunk_stream_matches_chunk_bytes(): print("\n[5] chunk_stream produces same chunks as chunk_bytes") from libp2p.bitswap.chunker import chunk_bytes + data = os.urandom(DEFAULT_CHUNK_SIZE * 5 + 777) stream_chunks = list(chunk_stream(io.BytesIO(data))) - bytes_chunks = chunk_bytes(data) + bytes_chunks = chunk_bytes(data) assert stream_chunks == bytes_chunks ok(f"chunk_stream == chunk_bytes for {len(data)} bytes of random data") # ── 2. MerkleDag.add_stream ─────────────────────────────────────────────────── + async def test_add_stream_bytesio(): print("\n[6] add_stream — BytesIO produces same CID as add_bytes") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag @@ -93,13 +99,15 @@ async def test_add_stream_bytesio(): mock.block_store = store stored: dict[bytes, bytes] = {} - async def add_block(cid, data): stored[bytes(cid)] = data + async def add_block(cid, data): + stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) dag = MerkleDag(mock) data = b"same content " * 5000 - cid_bytes = await dag.add_bytes(data) + cid_bytes = await dag.add_bytes(data) stored.clear() cid_stream = await dag.add_stream(io.BytesIO(data)) @@ -113,6 +121,7 @@ async def add_block(cid, data): stored[bytes(cid)] = data async def test_add_stream_empty(): print("\n[7] add_stream — empty stream stores single empty leaf") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag @@ -121,11 +130,13 @@ async def test_add_stream_empty(): mock.block_store = store stored: dict[bytes, bytes] = {} - async def add_block(cid, data): stored[bytes(cid)] = data + async def add_block(cid, data): + stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) dag = MerkleDag(mock) - root_cid = await dag.add_stream(io.BytesIO(b"")) + await dag.add_stream(io.BytesIO(b"")) assert len(stored) == 1 block = list(stored.values())[0] @@ -138,6 +149,7 @@ async def add_block(cid, data): stored[bytes(cid)] = data async def test_add_stream_single_chunk(): print("\n[8] add_stream — single chunk returns leaf CID directly (no root node)") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag @@ -146,7 +158,9 @@ async def test_add_stream_single_chunk(): mock.block_store = store stored: dict[bytes, bytes] = {} - async def add_block(cid, data): stored[bytes(cid)] = data + async def add_block(cid, data): + stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) dag = MerkleDag(mock) @@ -163,10 +177,11 @@ async def add_block(cid, data): stored[bytes(cid)] = data async def test_add_stream_gzip(): print("\n[9] add_stream — gzip stream decompresses and adds correctly") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag - original = b"gzip content " * 20000 # ~260 KB — 2 chunks after decompress + original = b"gzip content " * 20000 # ~260 KB — 2 chunks after decompress buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode="wb") as gz: @@ -179,7 +194,9 @@ async def test_add_stream_gzip(): mock.block_store = store stored: dict[bytes, bytes] = {} - async def add_block(cid, data): stored[bytes(cid)] = data + async def add_block(cid, data): + stored[bytes(cid)] = data + mock.add_block = AsyncMock(side_effect=add_block) dag = MerkleDag(mock) @@ -197,29 +214,36 @@ async def add_block(cid, data): stored[bytes(cid)] = data reassembled += leaf_unixfs.data assert reassembled == original - ok(f"gzip stream: {compressed_size} compressed → {len(original)} bytes added " - f"in {len(links)} chunks") + ok( + f"gzip stream: {compressed_size} compressed → {len(original)} bytes added " + f"in {len(links)} chunks" + ) async def test_add_stream_vs_add_file_same_cid(): print("\n[10] add_stream(open(f)) produces same CID as add_file(path)") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag - data = b"compare stream vs file " * 8000 # ~176 KB, 3 chunks + data = b"compare stream vs file " * 8000 # ~176 KB, 3 chunks with tempfile.NamedTemporaryFile(delete=False) as f: f.write(data) tmp = f.name try: + def make_dag(): store = MemoryBlockStore() mock = MagicMock(spec=BitswapClient) mock.block_store = store stored = {} - async def add_block(cid, d): stored[bytes(cid)] = d + + async def add_block(cid, d): + stored[bytes(cid)] = d + mock.add_block = AsyncMock(side_effect=add_block) return MerkleDag(mock) @@ -241,6 +265,7 @@ async def add_block(cid, d): stored[bytes(cid)] = d # ── main ────────────────────────────────────────────────────────────────────── + async def main(): print("=" * 60) print("io.IOBase Input Support — Test Suite") diff --git a/tests/core/bitswap/test_unixfs_encoding.py b/tests/core/bitswap/test_unixfs_encoding.py index 355590da7..d638189fe 100644 --- a/tests/core/bitswap/test_unixfs_encoding.py +++ b/tests/core/bitswap/test_unixfs_encoding.py @@ -5,23 +5,20 @@ Run with: python test_unixfs_encoding.py """ -import hashlib -import tempfile + import os +import tempfile import trio from libp2p.bitswap.block_store import MemoryBlockStore -from libp2p.bitswap.cid import compute_cid_v1, CODEC_DAG_PB, CODEC_RAW, cid_to_text +from libp2p.bitswap.cid import CODEC_DAG_PB, CODEC_RAW, cid_to_text, compute_cid_v1 from libp2p.bitswap.dag_pb import ( - create_leaf_node, + MAX_LINKS_PER_NODE, balanced_layout, + create_leaf_node, decode_dag_pb, is_file_node, - UnixFSData, - Link, - encode_dag_pb, - MAX_LINKS_PER_NODE, ) @@ -116,7 +113,7 @@ def test_balanced_layout_two_levels(): # Root should link to 2 internal nodes (174 + 1) assert len(links) == 2, f"expected 2 top-level links, got {len(links)}" assert unixfs.filesize == n * chunk_size - ok(f"175 leaves → root has 2 links (174-leaf node + 1-leaf node)") + ok("175 leaves → root has 2 links (174-leaf node + 1-leaf node)") ok(f"root filesize = {unixfs.filesize} = 175 * {chunk_size}") @@ -135,13 +132,14 @@ def test_balanced_layout_flat(): links, unixfs = decode_dag_pb(root_block) assert len(links) == 174, f"expected 174 direct links, got {len(links)}" - ok(f"174 leaves → flat root with 174 direct links") + ok("174 leaves → flat root with 174 direct links") # ── 6. add_file produces dag-pb leaves (not raw) via MerkleDag ─────────────── async def test_add_file_produces_dag_pb_leaves(): print("\n[6] MerkleDag.add_file produces dag-pb leaf blocks") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag @@ -152,6 +150,7 @@ async def test_add_file_produces_dag_pb_leaves(): async def add_block_impl(cid, data): stored[bytes(cid)] = data + mock_client.add_block = AsyncMock(side_effect=add_block_impl) dag = MerkleDag(mock_client) @@ -197,6 +196,7 @@ async def add_block_impl(cid, data): async def test_add_bytes_produces_dag_pb_leaves(): print("\n[7] MerkleDag.add_bytes produces dag-pb leaf blocks") from unittest.mock import AsyncMock, MagicMock + from libp2p.bitswap.client import BitswapClient from libp2p.bitswap.dag import MerkleDag @@ -207,15 +207,14 @@ async def test_add_bytes_produces_dag_pb_leaves(): async def add_block_impl(cid, data): stored[bytes(cid)] = data + mock_client.add_block = AsyncMock(side_effect=add_block_impl) dag = MerkleDag(mock_client) content = b"y" * (63 * 1024 * 2 + 500) # 3 chunks root_cid = await dag.add_bytes(content) - raw_blocks = [ - cid_to_text(c)[:20] for c, d in stored.items() if not is_file_node(d) - ] + raw_blocks = [cid_to_text(c)[:20] for c, d in stored.items() if not is_file_node(d)] assert raw_blocks == [], f"Found non-dag-pb blocks: {raw_blocks}" ok(f"All {len(stored)} stored blocks are dag-pb file nodes") diff --git a/tests/core/bitswap/test_wantlist.py b/tests/core/bitswap/test_wantlist.py index 40232560f..effc59b5d 100644 --- a/tests/core/bitswap/test_wantlist.py +++ b/tests/core/bitswap/test_wantlist.py @@ -4,24 +4,30 @@ Run with: python test_wantlist.py """ -from libp2p.bitswap.cid import compute_cid_v1, CODEC_RAW, cid_to_bytes + +from libp2p.bitswap.cid import CODEC_RAW, cid_to_bytes, compute_cid_v1 +from libp2p.bitswap.messages import create_wantlist_entry from libp2p.bitswap.wantlist import ( - WantType, BlockPresenceType, - WantlistEntry, Wantlist, - BlockPresence, BitswapMessage, + BitswapMessage, + BlockPresence, + BlockPresenceType, + Wantlist, + WantlistEntry, + WantType, ) -from libp2p.bitswap.messages import create_wantlist_entry def make_cid(content: bytes) -> bytes: return cid_to_bytes(compute_cid_v1(content, codec=CODEC_RAW)) -def ok(label): print(f" OK {label}") +def ok(label): + print(f" OK {label}") # ── WantType enum ───────────────────────────────────────────────────────────── + def test_want_type_values(): print("\n[1] WantType enum values match protobuf") assert WantType.Block.value == 0 @@ -31,6 +37,7 @@ def test_want_type_values(): # ── WantlistEntry ───────────────────────────────────────────────────────────── + def test_wantlist_entry_from_cid(): print("\n[2] WantlistEntry.from_cid normalises any CIDInput") cid = compute_cid_v1(b"entry test", codec=CODEC_RAW) @@ -58,6 +65,7 @@ def test_wantlist_entry_from_cid(): # ── Wantlist ────────────────────────────────────────────────────────────────── + def test_wantlist_add_cancel_contains(): print("\n[3] Wantlist.add / cancel / contains") cid1 = make_cid(b"block 1") @@ -78,7 +86,7 @@ def test_wantlist_add_cancel_contains(): assert wl.contains(cid1) assert wl.contains(cid2) - assert not wl.contains(cid3) # cancel entry → not "contained" + assert not wl.contains(cid3) # cancel entry → not "contained" ok("contains() returns True for non-cancel entries only") # Check entry fields @@ -101,6 +109,7 @@ def test_wantlist_full_flag(): # ── BlockPresence ───────────────────────────────────────────────────────────── + def test_block_presence(): print("\n[5] BlockPresence constructors") cid = make_cid(b"presence test") @@ -122,6 +131,7 @@ def test_block_presence(): # ── BitswapMessage ──────────────────────────────────────────────────────────── + def test_bitswap_message_properties(): print("\n[6] BitswapMessage builder + properties") cid1 = make_cid(b"want me") @@ -165,6 +175,7 @@ def test_bitswap_message_cancel_want(): # ── to_proto / from_proto round-trip ───────────────────────────────────────── + def test_to_proto_from_proto_roundtrip(): print("\n[8] BitswapMessage to_proto() / from_proto() round-trip") cid1 = make_cid(b"want block") @@ -205,6 +216,7 @@ def test_to_proto_from_proto_roundtrip(): # ── backward compat: create_wantlist_entry accepts int OR WantType ──────────── + def test_create_wantlist_entry_backward_compat(): print("\n[9] create_wantlist_entry — backward compat (int OR WantType)") cid = make_cid(b"compat test") @@ -230,16 +242,19 @@ def test_create_wantlist_entry_backward_compat(): # ── public API exports ──────────────────────────────────────────────────────── + def test_public_exports(): print("\n[10] All types exported from libp2p.bitswap") from libp2p.bitswap import ( - WantType, WantlistEntry, Wantlist, - BlockPresence, BlockPresenceType, BitswapMessage, + WantType, ) + assert WantType.Block.value == 0 assert WantType.Have.value == 1 - ok("WantType, WantlistEntry, Wantlist, BlockPresence, BlockPresenceType, " - "BitswapMessage all importable from libp2p.bitswap") + ok( + "WantType, WantlistEntry, Wantlist, BlockPresence, BlockPresenceType, " + "BitswapMessage all importable from libp2p.bitswap" + ) # ── main ────────────────────────────────────────────────────────────────────── From ead47b0cf575722b4dae28688dfd9840184826c1 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 15:22:53 +0530 Subject: [PATCH 10/37] Refactor type hints and add assertions in tests - Updated type hints in `make_service` function to allow for None. - Specified type hints for lists of bytes in block retrieval tests. - Added assertions to check for non-null `unixfs` in various tests to ensure proper decoding of DAG PB blocks. - Enhanced type hints for observer and subscriber peers in Gossipsub tests. - Improved type hints for candidate lists in opportunistic grafting tests. - Added type ignore comments for factory Meta classes to suppress type checker warnings. - Updated import statements for ID to include type ignore comments in interop utilities. --- .gitignore | 2 + libp2p/bitswap/__init__.py | 15 + libp2p/bitswap/block_service.py | 3 +- libp2p/bitswap/messages.py | 6 +- libp2p/kad_dht/pb/kademlia_pb2.pyi | 4 +- logs.txt | 544 ++++++++++++++++++ tests/core/bitswap/test_block_service.py | 6 +- tests/core/bitswap/test_io_stream.py | 3 + tests/core/bitswap/test_unixfs_encoding.py | 5 + tests/core/bitswap/test_wantlist.py | 1 + .../pubsub/test_gossipsub_v1_3_extensions.py | 11 +- tests/core/pubsub/test_gossipsub_v2_0.py | 4 +- tests/utils/factories.py | 12 +- tests/utils/interop/utils.py | 4 +- 14 files changed, 600 insertions(+), 20 deletions(-) create mode 100644 logs.txt diff --git a/.gitignore b/.gitignore index 525f5696d..78a738d8d 100644 --- a/.gitignore +++ b/.gitignore @@ -200,3 +200,5 @@ libp2p-forge # OSO health report generated outputs reports/*.json reports/*.md + +my_blocks/ diff --git a/libp2p/bitswap/__init__.py b/libp2p/bitswap/__init__.py index 9412fb9aa..dcad9d1aa 100644 --- a/libp2p/bitswap/__init__.py +++ b/libp2p/bitswap/__init__.py @@ -66,6 +66,14 @@ MessageTooLargeError, TimeoutError, ) +from .wantlist import ( + BitswapMessage, + BlockPresence, + BlockPresenceType, + Wantlist, + WantlistEntry, + WantType, +) __all__ = [ # Core @@ -74,6 +82,13 @@ "BlockStore", "MemoryBlockStore", "FilesystemBlockStore", + # Messages + "BitswapMessage", + "BlockPresence", + "BlockPresenceType", + "Wantlist", + "WantlistEntry", + "WantType", # CID types "CIDInput", "CIDObject", diff --git a/libp2p/bitswap/block_service.py b/libp2p/bitswap/block_service.py index 21dca450c..c4e452d9a 100644 --- a/libp2p/bitswap/block_service.py +++ b/libp2p/bitswap/block_service.py @@ -10,6 +10,7 @@ from __future__ import annotations +from collections.abc import Sequence import logging from typing import TYPE_CHECKING @@ -133,7 +134,7 @@ async def put_block(self, cid: CIDInput, data: bytes) -> None: async def get_blocks_batch( self, - cids: list[CIDInput], + cids: Sequence[CIDInput], peer_id: PeerID | None = None, timeout: float = 30.0, batch_size: int = 32, diff --git a/libp2p/bitswap/messages.py b/libp2p/bitswap/messages.py index 8eea6535d..df8a1d16a 100644 --- a/libp2p/bitswap/messages.py +++ b/libp2p/bitswap/messages.py @@ -4,16 +4,20 @@ """ from collections.abc import Sequence +from typing import TYPE_CHECKING, Union from .cid import CIDInput, cid_to_bytes from .pb.bitswap_pb2 import Message +if TYPE_CHECKING: + from .wantlist import WantType + def create_wantlist_entry( block_cid: CIDInput, priority: int = 1, cancel: bool = False, - want_type: int = 0, # 0 = Block, 1 = Have (v1.2.0) + want_type: Union[int, "WantType"] = 0, # 0 = Block, 1 = Have (v1.2.0) send_dont_have: bool = False, # v1.2.0 ) -> Message.Wantlist.Entry: """ diff --git a/libp2p/kad_dht/pb/kademlia_pb2.pyi b/libp2p/kad_dht/pb/kademlia_pb2.pyi index 9caf65ec9..ae32c2361 100644 --- a/libp2p/kad_dht/pb/kademlia_pb2.pyi +++ b/libp2p/kad_dht/pb/kademlia_pb2.pyi @@ -2,7 +2,7 @@ from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union +from typing import Any, ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor @@ -71,4 +71,4 @@ class Message(_message.Message): closerPeers: _containers.RepeatedCompositeFieldContainer[Message.Peer] providerPeers: _containers.RepeatedCompositeFieldContainer[Message.Peer] senderRecord: bytes - def __init__(self, type: _Optional[_Union[Message.MessageType, str]] = ..., clusterLevelRaw: _Optional[int] = ..., key: _Optional[bytes] = ..., record: _Optional[_Union[Record, _Mapping]] = ..., closerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping]]] = ..., providerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping]]] = ..., senderRecord: _Optional[bytes] = ...) -> None: ... + def __init__(self, type: _Optional[_Union[Message.MessageType, str]] = ..., clusterLevelRaw: _Optional[int] = ..., key: _Optional[bytes] = ..., record: _Optional[_Union[Record, _Mapping[str, Any]]] = ..., closerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping[str, Any]]]] = ..., providerPeers: _Optional[_Iterable[_Union[Message.Peer, _Mapping[str, Any]]]] = ..., senderRecord: _Optional[bytes] = ...) -> None: ... diff --git a/logs.txt b/logs.txt new file mode 100644 index 000000000..083501d75 --- /dev/null +++ b/logs.txt @@ -0,0 +1,544 @@ +2026-05-03T08:50:39.1652278Z Current runner version: '2.334.0' +2026-05-03T08:50:39.1670799Z ##[group]Runner Image Provisioner +2026-05-03T08:50:39.1671487Z Hosted Compute Agent +2026-05-03T08:50:39.1671885Z Version: 20260213.493 +2026-05-03T08:50:39.1672360Z Commit: 5c115507f6dd24b8de37d8bbe0bb4509d0cc0fa3 +2026-05-03T08:50:39.1672899Z Build Date: 2026-02-13T00:28:41Z +2026-05-03T08:50:39.1673585Z Worker ID: {6bdef167-2765-47d3-be27-c534a59125f5} +2026-05-03T08:50:39.1674159Z Azure Region: westcentralus +2026-05-03T08:50:39.1674611Z ##[endgroup] +2026-05-03T08:50:39.1675886Z ##[group]Operating System +2026-05-03T08:50:39.1676329Z Ubuntu +2026-05-03T08:50:39.1676754Z 24.04.4 +2026-05-03T08:50:39.1677106Z LTS +2026-05-03T08:50:39.1677514Z ##[endgroup] +2026-05-03T08:50:39.1677886Z ##[group]Runner Image +2026-05-03T08:50:39.1678386Z Image: ubuntu-24.04 +2026-05-03T08:50:39.1678799Z Version: 20260413.86.1 +2026-05-03T08:50:39.1679737Z Included Software: https://github.com/actions/runner-images/blob/ubuntu24/20260413.86/images/ubuntu/Ubuntu2404-Readme.md +2026-05-03T08:50:39.1680838Z Image Release: https://github.com/actions/runner-images/releases/tag/ubuntu24%2F20260413.86 +2026-05-03T08:50:39.1681544Z ##[endgroup] +2026-05-03T08:50:39.1683863Z ##[group]GITHUB_TOKEN Permissions +2026-05-03T08:50:39.1685352Z Actions: read +2026-05-03T08:50:39.1685843Z ArtifactMetadata: read +2026-05-03T08:50:39.1686316Z Attestations: read +2026-05-03T08:50:39.1686742Z Checks: read +2026-05-03T08:50:39.1687161Z Contents: read +2026-05-03T08:50:39.1687566Z Deployments: read +2026-05-03T08:50:39.1687939Z Discussions: read +2026-05-03T08:50:39.1688398Z Issues: read +2026-05-03T08:50:39.1688753Z Metadata: read +2026-05-03T08:50:39.1689144Z Models: read +2026-05-03T08:50:39.1689517Z Packages: read +2026-05-03T08:50:39.1689937Z Pages: read +2026-05-03T08:50:39.1690388Z PullRequests: read +2026-05-03T08:50:39.1690811Z RepositoryProjects: read +2026-05-03T08:50:39.1691299Z SecurityEvents: read +2026-05-03T08:50:39.1691685Z Statuses: read +2026-05-03T08:50:39.1692128Z VulnerabilityAlerts: read +2026-05-03T08:50:39.1692593Z ##[endgroup] +2026-05-03T08:50:39.1694465Z Secret source: None +2026-05-03T08:50:39.1695044Z Prepare workflow directory +2026-05-03T08:50:39.2012185Z Prepare all required actions +2026-05-03T08:50:39.2054775Z Getting action download info +2026-05-03T08:50:39.6250067Z Download action repository 'actions/checkout@v4' (SHA:34e114876b0b11c390a56381ad16ebd13914f8d5) +2026-05-03T08:50:39.7960802Z Download action repository 'actions/setup-python@v5' (SHA:a26af69be951a213d495a4c3e4e4022e16d87065) +2026-05-03T08:50:40.0799901Z Download action repository 'astral-sh/setup-uv@v4' (SHA:38f3f104447c67c051c4a08e39b64a148898af3a) +2026-05-03T08:50:41.1424247Z Download action repository 'actions/cache@v4' (SHA:0057852bfaa89a56745cba8c7296529d2fc39830) +2026-05-03T08:50:41.5475363Z Download action repository 'actions/setup-node@v4' (SHA:49933ea5288caeca8642d1e84afbd3f7d6820020) +2026-05-03T08:50:42.0285390Z Complete job name: tox (3.10, lint) +2026-05-03T08:50:42.0909652Z ##[group]Run if [[ "$toxenv" == 'docs' ]]; then +2026-05-03T08:50:42.0910618Z if [[ "$toxenv" == 'docs' ]]; then +2026-05-03T08:50:42.0911474Z  echo 'TOXENV=docs' | tee -a $GITHUB_ENV +2026-05-03T08:50:42.0912286Z else +2026-05-03T08:50:42.0913122Z  echo "TOXENV=py${python}-${toxenv}" | tr -d '.' | tee -a $GITHUB_ENV +2026-05-03T08:50:42.0914306Z fi +2026-05-03T08:50:42.0937059Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} +2026-05-03T08:50:42.0938114Z env: +2026-05-03T08:50:42.0938606Z python: 3.10 +2026-05-03T08:50:42.0939145Z toxenv: lint +2026-05-03T08:50:42.0939680Z ##[endgroup] +2026-05-03T08:50:42.1045953Z TOXENV=py310-lint +2026-05-03T08:50:42.1242272Z ##[group]Run actions/checkout@v4 +2026-05-03T08:50:42.1243023Z with: +2026-05-03T08:50:42.1243820Z repository: libp2p/py-libp2p +2026-05-03T08:50:42.1244733Z token: *** +2026-05-03T08:50:42.1245260Z ssh-strict: true +2026-05-03T08:50:42.1245828Z ssh-user: git +2026-05-03T08:50:42.1246401Z persist-credentials: true +2026-05-03T08:50:42.1247270Z clean: true +2026-05-03T08:50:42.1247848Z sparse-checkout-cone-mode: true +2026-05-03T08:50:42.1248556Z fetch-depth: 1 +2026-05-03T08:50:42.1249122Z fetch-tags: false +2026-05-03T08:50:42.1249712Z show-progress: true +2026-05-03T08:50:42.1250297Z lfs: false +2026-05-03T08:50:42.1250828Z submodules: false +2026-05-03T08:50:42.1251426Z set-safe-directory: true +2026-05-03T08:50:42.1252067Z env: +2026-05-03T08:50:42.1252570Z TOXENV: py310-lint +2026-05-03T08:50:42.1253147Z ##[endgroup] +2026-05-03T08:50:42.2189958Z Syncing repository: libp2p/py-libp2p +2026-05-03T08:50:42.2192771Z ##[group]Getting Git version info +2026-05-03T08:50:42.2194470Z Working directory is '/home/runner/work/py-libp2p/py-libp2p' +2026-05-03T08:50:42.2196501Z [command]/usr/bin/git version +2026-05-03T08:50:42.2769151Z git version 2.53.0 +2026-05-03T08:50:42.2788313Z ##[endgroup] +2026-05-03T08:50:42.2800674Z Temporarily overriding HOME='/home/runner/work/_temp/280f354b-3234-4524-abbb-4bf3e2f57d2d' before making global git config changes +2026-05-03T08:50:42.2803212Z Adding repository directory to the temporary git global config as a safe directory +2026-05-03T08:50:42.2805476Z [command]/usr/bin/git config --global --add safe.directory /home/runner/work/py-libp2p/py-libp2p +2026-05-03T08:50:42.2831018Z Deleting the contents of '/home/runner/work/py-libp2p/py-libp2p' +2026-05-03T08:50:42.2833973Z ##[group]Initializing the repository +2026-05-03T08:50:42.2837052Z [command]/usr/bin/git init /home/runner/work/py-libp2p/py-libp2p +2026-05-03T08:50:42.2997621Z hint: Using 'master' as the name for the initial branch. This default branch name +2026-05-03T08:50:42.2999318Z hint: will change to "main" in Git 3.0. To configure the initial branch name +2026-05-03T08:50:42.3000789Z hint: to use in all of your new repositories, which will suppress this warning, +2026-05-03T08:50:42.3002234Z hint: call: +2026-05-03T08:50:42.3002900Z hint: +2026-05-03T08:50:42.3003888Z hint: git config --global init.defaultBranch +2026-05-03T08:50:42.3004879Z hint: +2026-05-03T08:50:42.3005833Z hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and +2026-05-03T08:50:42.3007603Z hint: 'development'. The just-created branch can be renamed via this command: +2026-05-03T08:50:42.3009009Z hint: +2026-05-03T08:50:42.3009721Z hint: git branch -m +2026-05-03T08:50:42.3010559Z hint: +2026-05-03T08:50:42.3011975Z hint: Disable this message with "git config set advice.defaultBranchName false" +2026-05-03T08:50:42.3013886Z Initialized empty Git repository in /home/runner/work/py-libp2p/py-libp2p/.git/ +2026-05-03T08:50:42.3016138Z [command]/usr/bin/git remote add origin https://github.com/libp2p/py-libp2p +2026-05-03T08:50:42.3035030Z ##[endgroup] +2026-05-03T08:50:42.3036478Z ##[group]Disabling automatic garbage collection +2026-05-03T08:50:42.3037981Z [command]/usr/bin/git config --local gc.auto 0 +2026-05-03T08:50:42.3061996Z ##[endgroup] +2026-05-03T08:50:42.3063521Z ##[group]Setting up auth +2026-05-03T08:50:42.3068223Z [command]/usr/bin/git config --local --name-only --get-regexp core\.sshCommand +2026-05-03T08:50:42.3092887Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'core\.sshCommand' && git config --local --unset-all 'core.sshCommand' || :" +2026-05-03T08:50:42.3331807Z [command]/usr/bin/git config --local --name-only --get-regexp http\.https\:\/\/github\.com\/\.extraheader +2026-05-03T08:50:42.3361911Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'http\.https\:\/\/github\.com\/\.extraheader' && git config --local --unset-all 'http.https://github.com/.extraheader' || :" +2026-05-03T08:50:42.3543069Z [command]/usr/bin/git config --local --name-only --get-regexp ^includeIf\.gitdir: +2026-05-03T08:50:42.3567245Z [command]/usr/bin/git submodule foreach --recursive git config --local --show-origin --name-only --get-regexp remote.origin.url +2026-05-03T08:50:42.3749227Z [command]/usr/bin/git config --local http.https://github.com/.extraheader AUTHORIZATION: basic *** +2026-05-03T08:50:42.3777229Z ##[endgroup] +2026-05-03T08:50:42.3783576Z ##[group]Fetching the repository +2026-05-03T08:50:42.3785524Z [command]/usr/bin/git -c protocol.version=2 fetch --no-tags --prune --no-recurse-submodules --depth=1 origin +79fa6a4965dd094f1168364b06f70f446048452d:refs/remotes/pull/1321/merge +2026-05-03T08:50:43.8242606Z From https://github.com/libp2p/py-libp2p +2026-05-03T08:50:43.8243185Z * [new ref] 79fa6a4965dd094f1168364b06f70f446048452d -> pull/1321/merge +2026-05-03T08:50:43.8265852Z ##[endgroup] +2026-05-03T08:50:43.8266310Z ##[group]Determining the checkout info +2026-05-03T08:50:43.8268290Z ##[endgroup] +2026-05-03T08:50:43.8273080Z [command]/usr/bin/git sparse-checkout disable +2026-05-03T08:50:43.8303282Z [command]/usr/bin/git config --local --unset-all extensions.worktreeConfig +2026-05-03T08:50:43.8323260Z ##[group]Checking out the ref +2026-05-03T08:50:43.8326267Z [command]/usr/bin/git checkout --progress --force refs/remotes/pull/1321/merge +2026-05-03T08:50:43.8851236Z Note: switching to 'refs/remotes/pull/1321/merge'. +2026-05-03T08:50:43.8851608Z +2026-05-03T08:50:43.8851869Z You are in 'detached HEAD' state. You can look around, make experimental +2026-05-03T08:50:43.8852442Z changes and commit them, and you can discard any commits you make in this +2026-05-03T08:50:43.8852995Z state without impacting any branches by switching back to a branch. +2026-05-03T08:50:43.8853500Z +2026-05-03T08:50:43.8853789Z If you want to create a new branch to retain commits you create, you may +2026-05-03T08:50:43.8854348Z do so (now or later) by using -c with the switch command. Example: +2026-05-03T08:50:43.8854551Z +2026-05-03T08:50:43.8854651Z git switch -c +2026-05-03T08:50:43.8854803Z +2026-05-03T08:50:43.8854888Z Or undo this operation with: +2026-05-03T08:50:43.8855021Z +2026-05-03T08:50:43.8855095Z git switch - +2026-05-03T08:50:43.8855229Z +2026-05-03T08:50:43.8855393Z Turn off this advice by setting config variable advice.detachedHead to false +2026-05-03T08:50:43.8855623Z +2026-05-03T08:50:43.8855896Z HEAD is now at 79fa6a4 Merge 58719a78ba64fd512991937d190a90bc45d748b5 into d9c50c0bec064e097a01ccafbb452e5b78c434e7 +2026-05-03T08:50:43.8858240Z ##[endgroup] +2026-05-03T08:50:43.8890211Z [command]/usr/bin/git log -1 --format=%H +2026-05-03T08:50:43.8908299Z 79fa6a4965dd094f1168364b06f70f446048452d +2026-05-03T08:50:43.9060008Z ##[group]Run actions/setup-python@v5 +2026-05-03T08:50:43.9060230Z with: +2026-05-03T08:50:43.9060397Z python-version: 3.10 +2026-05-03T08:50:43.9060575Z check-latest: false +2026-05-03T08:50:43.9060849Z token: *** +2026-05-03T08:50:43.9061019Z update-environment: true +2026-05-03T08:50:43.9061211Z allow-prereleases: false +2026-05-03T08:50:43.9061425Z freethreaded: false +2026-05-03T08:50:43.9061583Z env: +2026-05-03T08:50:43.9061738Z TOXENV: py310-lint +2026-05-03T08:50:43.9061893Z ##[endgroup] +2026-05-03T08:50:44.0434859Z ##[group]Installed versions +2026-05-03T08:50:44.0534645Z Successfully set up CPython (3.10.20) +2026-05-03T08:50:44.0539173Z ##[endgroup] +2026-05-03T08:50:44.0684971Z ##[group]Run astral-sh/setup-uv@v4 +2026-05-03T08:50:44.0685167Z with: +2026-05-03T08:50:44.0685310Z version: latest +2026-05-03T08:50:44.0685570Z github-token: *** +2026-05-03T08:50:44.0685732Z enable-cache: false +2026-05-03T08:50:44.0685897Z cache-dependency-glob: **/uv.lock +2026-05-03T08:50:44.0686094Z prune-cache: true +2026-05-03T08:50:44.0686258Z ignore-nothing-to-cache: false +2026-05-03T08:50:44.0686437Z env: +2026-05-03T08:50:44.0686577Z TOXENV: py310-lint +2026-05-03T08:50:44.0686781Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:44.0687107Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig +2026-05-03T08:50:44.0687417Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:44.0687694Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:44.0687958Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:44.0688266Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib +2026-05-03T08:50:44.0688631Z ##[endgroup] +2026-05-03T08:50:44.5566543Z Downloading uv from "https://github.com/astral-sh/uv/releases/download/0.11.8/uv-x86_64-unknown-linux-gnu.tar.gz" ... +2026-05-03T08:50:45.1270603Z [command]/usr/bin/tar xz --warning=no-unknown-keyword --overwrite -C /home/runner/work/_temp/eb456ef0-5fd2-4cca-80a6-59d4cc913925 -f /home/runner/work/_temp/425368e2-6920-4da8-9c29-e2eb033fc778 +2026-05-03T08:50:45.8868294Z Added /opt/hostedtoolcache/uv/0.11.8/x86_64 to the path +2026-05-03T08:50:45.8869251Z Added /home/runner/.local/bin to the path +2026-05-03T08:50:45.8878341Z Set UV_CACHE_DIR to /home/runner/work/_temp/setup-uv-cache +2026-05-03T08:50:45.8878773Z Successfully installed uv version 0.11.8 +2026-05-03T08:50:45.8964694Z ##[group]Run sudo apt-get update +2026-05-03T08:50:45.8964943Z sudo apt-get update +2026-05-03T08:50:45.8965146Z sudo apt-get install -y libgmp-dev +2026-05-03T08:50:45.9186725Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} +2026-05-03T08:50:45.9187010Z env: +2026-05-03T08:50:45.9187185Z TOXENV: py310-lint +2026-05-03T08:50:45.9187400Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:45.9187715Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig +2026-05-03T08:50:45.9188020Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:45.9188291Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:45.9188573Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:45.9188862Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib +2026-05-03T08:50:45.9189133Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache +2026-05-03T08:50:45.9189384Z ##[endgroup] +2026-05-03T08:50:46.0201616Z Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B] +2026-05-03T08:50:46.0559674Z Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease +2026-05-03T08:50:46.0573271Z Get:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease [126 kB] +2026-05-03T08:50:46.0611129Z Get:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease [126 kB] +2026-05-03T08:50:46.0644610Z Get:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease [126 kB] +2026-05-03T08:50:46.0734115Z Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease +2026-05-03T08:50:46.0954101Z Get:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease [3600 B] +2026-05-03T08:50:46.1492561Z Get:8 https://dl.google.com/linux/chrome-stable/deb stable InRelease [1825 B] +2026-05-03T08:50:46.2162179Z Get:9 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 Packages [1946 kB] +2026-05-03T08:50:46.2311865Z Get:10 http://azure.archive.ubuntu.com/ubuntu noble-updates/main Translation-en [348 kB] +2026-05-03T08:50:46.2335550Z Get:11 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 Components [177 kB] +2026-05-03T08:50:46.2356328Z Get:12 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 c-n-f Metadata [17.1 kB] +2026-05-03T08:50:46.2374633Z Get:13 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 Packages [1685 kB] +2026-05-03T08:50:46.2439427Z Get:14 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe Translation-en [324 kB] +2026-05-03T08:50:46.2460309Z Get:15 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 Components [386 kB] +2026-05-03T08:50:46.2490312Z Get:16 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 c-n-f Metadata [34.5 kB] +2026-05-03T08:50:46.2524149Z Get:17 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 Packages [3095 kB] +2026-05-03T08:50:46.2620778Z Get:18 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted Translation-en [715 kB] +2026-05-03T08:50:46.3066836Z Get:19 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 Components [212 B] +2026-05-03T08:50:46.3078519Z Get:20 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 c-n-f Metadata [480 B] +2026-05-03T08:50:46.3091347Z Get:21 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 Packages [44.4 kB] +2026-05-03T08:50:46.3105067Z Get:22 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse Translation-en [10.2 kB] +2026-05-03T08:50:46.3114449Z Get:23 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 Components [940 B] +2026-05-03T08:50:46.3126258Z Get:24 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 c-n-f Metadata [656 B] +2026-05-03T08:50:46.3139181Z Get:25 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 Packages [64.5 kB] +2026-05-03T08:50:46.3153707Z Get:26 http://azure.archive.ubuntu.com/ubuntu noble-backports/main Translation-en [9172 B] +2026-05-03T08:50:46.3164013Z Get:27 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 Components [7368 B] +2026-05-03T08:50:46.3176514Z Get:28 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 c-n-f Metadata [368 B] +2026-05-03T08:50:46.3187200Z Get:29 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 Packages [34.1 kB] +2026-05-03T08:50:46.3195820Z Get:30 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe Translation-en [18.2 kB] +2026-05-03T08:50:46.3204977Z Get:31 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 Components [10.5 kB] +2026-05-03T08:50:46.3214686Z Get:32 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 c-n-f Metadata [1484 B] +2026-05-03T08:50:46.3670235Z Get:33 http://azure.archive.ubuntu.com/ubuntu noble-backports/restricted amd64 Components [212 B] +2026-05-03T08:50:46.3680882Z Get:34 http://azure.archive.ubuntu.com/ubuntu noble-backports/multiverse amd64 Packages [748 B] +2026-05-03T08:50:46.3695651Z Get:35 http://azure.archive.ubuntu.com/ubuntu noble-backports/multiverse amd64 Components [212 B] +2026-05-03T08:50:46.3710818Z Get:36 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 Packages [1625 kB] +2026-05-03T08:50:46.3796468Z Get:37 http://azure.archive.ubuntu.com/ubuntu noble-security/main Translation-en [259 kB] +2026-05-03T08:50:46.3812984Z Get:38 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 Components [21.9 kB] +2026-05-03T08:50:46.3824854Z Get:39 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 c-n-f Metadata [11.0 kB] +2026-05-03T08:50:46.3835830Z Get:40 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 Packages [1182 kB] +2026-05-03T08:50:46.3893992Z Get:41 http://azure.archive.ubuntu.com/ubuntu noble-security/universe Translation-en [227 kB] +2026-05-03T08:50:46.3915962Z Get:42 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 Components [74.2 kB] +2026-05-03T08:50:46.3926311Z Get:43 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 c-n-f Metadata [23.1 kB] +2026-05-03T08:50:46.3938181Z Get:44 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted amd64 Packages [2844 kB] +2026-05-03T08:50:46.4058462Z Get:45 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted Translation-en [666 kB] +2026-05-03T08:50:46.4069280Z Get:50 https://packages.microsoft.com/ubuntu/24.04/prod noble/main arm64 Packages [107 kB] +2026-05-03T08:50:46.4093580Z Get:46 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted amd64 Components [212 B] +2026-05-03T08:50:46.4103090Z Get:47 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse amd64 Packages [28.8 kB] +2026-05-03T08:50:46.4117081Z Get:51 https://packages.microsoft.com/ubuntu/24.04/prod noble/main armhf Packages [11.6 kB] +2026-05-03T08:50:46.4145608Z Get:52 https://packages.microsoft.com/ubuntu/24.04/prod noble/main amd64 Packages [132 kB] +2026-05-03T08:50:46.4548104Z Get:48 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse Translation-en [7172 B] +2026-05-03T08:50:46.4558458Z Get:49 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse amd64 Components [208 B] +2026-05-03T08:50:46.4951182Z Get:53 https://dl.google.com/linux/chrome-stable/deb stable/main amd64 Packages [1216 B] +2026-05-03T08:50:56.0749376Z Fetched 16.5 MB in 2s (10.6 MB/s) +2026-05-03T08:50:56.6803818Z Reading package lists... +2026-05-03T08:50:56.7978051Z Reading package lists... +2026-05-03T08:50:56.9602716Z Building dependency tree... +2026-05-03T08:50:56.9609431Z Reading state information... +2026-05-03T08:50:57.1250664Z libgmp-dev is already the newest version (2:6.3.0+dfsg-2ubuntu6.1). +2026-05-03T08:50:57.1251258Z 0 upgraded, 0 newly installed, 0 to remove and 53 not upgraded. +2026-05-03T08:50:57.1331388Z ##[group]Run uv venv venv +2026-05-03T08:50:57.1331614Z uv venv venv +2026-05-03T08:50:57.1331780Z source venv/bin/activate +2026-05-03T08:50:57.1331994Z uv pip install --upgrade pip +2026-05-03T08:50:57.1332196Z uv pip install tox +2026-05-03T08:50:57.1351181Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} +2026-05-03T08:50:57.1351413Z env: +2026-05-03T08:50:57.1351553Z TOXENV: py310-lint +2026-05-03T08:50:57.1351758Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:57.1352072Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig +2026-05-03T08:50:57.1352388Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:57.1352658Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:57.1352928Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:57.1353193Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib +2026-05-03T08:50:57.1353645Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache +2026-05-03T08:50:57.1353861Z ##[endgroup] +2026-05-03T08:50:58.5508259Z Using CPython 3.10.20 interpreter at: /opt/hostedtoolcache/Python/3.10.20/x64/bin/python3 +2026-05-03T08:50:58.5508833Z Creating virtual environment at: venv +2026-05-03T08:50:58.5512459Z Activate with: venv/bin/activate +2026-05-03T08:50:58.6254497Z Using Python 3.10.20 environment at: venv +2026-05-03T08:50:58.7900612Z Resolved 1 package in 164ms +2026-05-03T08:50:58.8077019Z Downloading pip (1.7MiB) +2026-05-03T08:50:58.9464109Z Downloaded pip +2026-05-03T08:50:58.9466143Z Prepared 1 package in 156ms +2026-05-03T08:50:58.9565192Z Installed 1 package in 9ms +2026-05-03T08:50:58.9565667Z + pip==26.1 +2026-05-03T08:50:58.9634712Z Using Python 3.10.20 environment at: venv +2026-05-03T08:50:59.2420869Z Resolved 14 packages in 278ms +2026-05-03T08:50:59.2865079Z Downloading virtualenv (7.2MiB) +2026-05-03T08:50:59.4887062Z Downloaded virtualenv +2026-05-03T08:50:59.4889022Z Prepared 14 packages in 246ms +2026-05-03T08:50:59.4966270Z Installed 14 packages in 7ms +2026-05-03T08:50:59.4966562Z + cachetools==7.1.0 +2026-05-03T08:50:59.4966804Z + colorama==0.4.6 +2026-05-03T08:50:59.4967020Z + distlib==0.4.0 +2026-05-03T08:50:59.4967244Z + filelock==3.29.0 +2026-05-03T08:50:59.4967461Z + packaging==26.2 +2026-05-03T08:50:59.4967692Z + platformdirs==4.9.6 +2026-05-03T08:50:59.4968036Z + pluggy==1.6.0 +2026-05-03T08:50:59.4968363Z + pyproject-api==1.10.0 +2026-05-03T08:50:59.4968707Z + python-discovery==1.2.2 +2026-05-03T08:50:59.4968996Z + tomli==2.4.1 +2026-05-03T08:50:59.4969332Z + tomli-w==1.2.0 +2026-05-03T08:50:59.4969687Z + tox==4.53.1 +2026-05-03T08:50:59.4970049Z + typing-extensions==4.15.0 +2026-05-03T08:50:59.4970362Z + virtualenv==21.3.0 +2026-05-03T08:50:59.5036958Z ##[group]Run source venv/bin/activate +2026-05-03T08:50:59.5037223Z source venv/bin/activate +2026-05-03T08:50:59.5037428Z if [[ "lint" == 'docs' ]]; then +2026-05-03T08:50:59.5037632Z  export TOXENV=docs +2026-05-03T08:50:59.5037810Z else +2026-05-03T08:50:59.5038013Z  export TOXENV=$(echo "py3.10-lint" | tr -d '.') +2026-05-03T08:50:59.5038240Z fi +2026-05-03T08:50:59.5038413Z # Set PATH for nim commands during tox +2026-05-03T08:50:59.5038638Z if [[ "lint" == 'interop' ]]; then +2026-05-03T08:50:59.5038965Z  export PATH="$HOME/.nimble/bin:$HOME/.choosenim/toolchains/nim-stable/bin:$PATH" +2026-05-03T08:50:59.5039290Z fi +2026-05-03T08:50:59.5039603Z python -m tox run -r +2026-05-03T08:50:59.5058565Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} +2026-05-03T08:50:59.5058820Z env: +2026-05-03T08:50:59.5058968Z TOXENV: py310-lint +2026-05-03T08:50:59.5059224Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:59.5059536Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig +2026-05-03T08:50:59.5059846Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:59.5060119Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:59.5060391Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 +2026-05-03T08:50:59.5060686Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib +2026-05-03T08:50:59.5060960Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache +2026-05-03T08:50:59.5061188Z ##[endgroup] +2026-05-03T08:51:02.3471735Z .pkg: remove tox env folder /home/runner/work/py-libp2p/py-libp2p/.tox/.pkg +2026-05-03T08:51:02.6301868Z py310-lint: install_deps> python -I -m pip install pre-commit +2026-05-03T08:51:05.9671352Z .pkg: install_requires> python -I -m pip install 'setuptools>=42' wheel +2026-05-03T08:51:07.1268925Z .pkg: _optional_hooks> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta +2026-05-03T08:51:07.4136028Z .pkg: get_requires_for_build_editable> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta +2026-05-03T08:51:07.8991783Z .pkg: freeze> python -m pip freeze --all +2026-05-03T08:51:08.0790106Z .pkg: packaging==26.2,pip==26.1,setuptools==82.0.1,wheel==0.47.0 +2026-05-03T08:51:08.0794350Z .pkg: build_editable> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta +2026-05-03T08:51:08.8517413Z py310-lint: install_package_deps> python -I -m pip install 'aioquic>=1.2.0' 'anyio>=4.0' 'base58>=1.0.3' 'cbor2>=5.4.0' coincurve==21.0.0 'exceptiongroup>=1.2.0; python_version < "3.11"' 'fastecdsa==2.3.2; sys_platform != "win32"' 'grpcio>=1.41.0' 'httpx>=0.25.0' 'lru-dict>=1.1.6' 'miniupnpc<3.0,>=2.3' multiaddr==0.0.11 'mypy-protobuf>=3.0.0' 'noiseprotocol>=0.3.0' 'protobuf<7.0.0,>=4.25.0' 'py-cid>=0.5.0' 'py-multibase>=2.0.0' 'py-multicodec>=1.0.0' 'py-multihash>=3.0.0' 'pycryptodome>=3.9.2' 'pynacl>=1.3.0' 'requests>=2.28.0' 'rpcudp>=3.0.0' 'trio-typing>=0.0.4' 'trio-websocket>=0.11.0' 'trio>=0.26.0' types-requests 'zeroconf<0.148.0,>=0.147.0' +2026-05-03T08:51:32.5705371Z py310-lint: install_package> python -I -m pip install --force-reinstall --no-deps /home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl +2026-05-03T08:51:32.9081407Z py310-lint: freeze> python -m pip freeze --all +2026-05-03T08:51:33.1369371Z py310-lint: aioquic==1.3.0,anyio==4.13.0,async-generator==1.10,attrs==26.1.0,base58==2.1.1,blake3==1.0.8,cbor2==6.0.1,certifi==2026.4.22,cffi==2.0.0,cfgv==3.5.0,charset-normalizer==3.4.7,coincurve==21.0.0,cryptography==47.0.0,distlib==0.4.0,dnspython==2.8.0,exceptiongroup==1.3.1,fastecdsa==2.3.2,filelock==3.29.0,grpcio==1.80.0,h11==0.16.0,httpcore==1.0.9,httpx==0.28.1,identify==2.6.19,idna==3.13,ifaddr==0.2.0,importlib_metadata==9.0.0,libp2p @ file:///home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl#sha256=6f0fe388ecd3df637e7630940338cbf3d3eb067e9792e9fcbd17064693405a1b,lru-dict==1.4.1,miniupnpc==2.3.3,mmh3==5.2.1,morphys==1.0,multiaddr==0.0.11,mypy-protobuf==5.1.0,mypy_extensions==1.1.0,netaddr==1.3.0,nodeenv==1.10.0,noiseprotocol==0.3.1,outcome==1.3.0.post0,packaging==26.2,pip==26.1,platformdirs==4.9.6,pre_commit==4.6.0,protobuf==6.33.6,psutil==7.2.2,py-cid==0.5.0,py-multibase==2.0.0,py-multicodec==1.0.0,py-multihash==3.0.0,pyasn1==0.6.3,pyasn1_modules==0.4.2,pycparser==3.0,pycryptodome==3.23.0,pylsqpack==0.3.24,PyNaCl==1.6.2,pyOpenSSL==26.1.0,python-baseconv==1.2.2,python-discovery==1.2.2,PyYAML==6.0.3,requests==2.33.1,rpcudp==5.0.1,service-identity==24.2.0,setuptools==82.0.1,six==1.17.0,sniffio==1.3.1,sortedcontainers==2.4.0,trio==0.33.0,trio-typing==0.10.0,trio-websocket==0.12.2,types-protobuf==7.34.1.20260503,types-requests==2.33.0.20260503,typing_extensions==4.15.0,u-msgpack-python==2.8.0,urllib3==2.6.3,varint==1.0.2,virtualenv==21.3.0,wsproto==1.3.2,zeroconf==0.147.3,zipp==3.23.1 +2026-05-03T08:51:33.1374547Z py310-lint: commands_pre[0]> uv pip install --upgrade pip +2026-05-03T08:51:33.1900042Z Using Python 3.10.20 environment at: .tox/py310-lint +2026-05-03T08:51:33.2954857Z Resolved 1 package in 103ms +2026-05-03T08:51:33.2955970Z Checked 1 package in 0.06ms +2026-05-03T08:51:33.2972424Z py310-lint: commands_pre[1]> uv pip install --group dev -e . +2026-05-03T08:51:33.3514785Z Using Python 3.10.20 environment at: .tox/py310-lint +2026-05-03T08:51:34.0461313Z Resolved 166 packages in 693ms +2026-05-03T08:51:34.0500055Z Building libp2p @ file:///home/runner/work/py-libp2p/py-libp2p +2026-05-03T08:51:34.0662910Z Downloading babel (9.7MiB) +2026-05-03T08:51:34.1167498Z Downloading jedi (4.7MiB) +2026-05-03T08:51:34.1177079Z Downloading pygments (1.2MiB) +2026-05-03T08:51:34.1182166Z Downloading sphinx-rtd-theme (7.3MiB) +2026-05-03T08:51:34.1184711Z Downloading pyrefly (5.5MiB) +2026-05-03T08:51:34.1186040Z Downloading faker (1.9MiB) +2026-05-03T08:51:34.1190040Z Downloading ruff (10.8MiB) +2026-05-03T08:51:34.1191694Z Downloading mypy (14.0MiB) +2026-05-03T08:51:34.1197431Z Downloading pydantic-core (2.0MiB) +2026-05-03T08:51:34.1201466Z Downloading sphinx (3.3MiB) +2026-05-03T08:51:34.5484924Z Downloaded pydantic-core +2026-05-03T08:51:34.5491558Z Downloaded pygments +2026-05-03T08:51:35.1384876Z Downloaded sphinx +2026-05-03T08:51:35.2074896Z Downloaded pyrefly +2026-05-03T08:51:35.2793799Z Downloaded sphinx-rtd-theme +2026-05-03T08:51:35.2849741Z Downloaded faker +2026-05-03T08:51:35.3720244Z Downloaded ruff +2026-05-03T08:51:35.3824326Z Downloaded babel +2026-05-03T08:51:35.6597558Z Downloaded mypy +2026-05-03T08:51:35.9119524Z Downloaded jedi +2026-05-03T08:51:36.8250305Z Built libp2p @ file:///home/runner/work/py-libp2p/py-libp2p +2026-05-03T08:51:36.8377725Z Prepared 90 packages in 2.78s +2026-05-03T08:51:36.8388198Z Uninstalled 1 package in 0.96ms +2026-05-03T08:51:37.6322126Z Installed 90 packages in 793ms +2026-05-03T08:51:37.6322945Z + alabaster==1.0.0 +2026-05-03T08:51:37.6323192Z + annotated-types==0.7.0 +2026-05-03T08:51:37.6323607Z + asttokens==3.0.1 +2026-05-03T08:51:37.6323933Z + async-exit-stack==1.0.1 +2026-05-03T08:51:37.6324180Z + babel==2.18.0 +2026-05-03T08:51:37.6324642Z + backports-tarfile==1.2.0 +2026-05-03T08:51:37.6325045Z + bracex==2.6 +2026-05-03T08:51:37.6325252Z + build==1.5.0 +2026-05-03T08:51:37.6325450Z + bump-my-version==1.3.0 +2026-05-03T08:51:37.6325690Z + cachetools==7.1.0 +2026-05-03T08:51:37.6325892Z + click==8.3.3 +2026-05-03T08:51:37.6326099Z + colorama==0.4.6 +2026-05-03T08:51:37.6326317Z + decorator==5.2.1 +2026-05-03T08:51:37.6326536Z + docutils==0.21.2 +2026-05-03T08:51:37.6326747Z + execnet==2.1.2 +2026-05-03T08:51:37.6326981Z + executing==2.2.1 +2026-05-03T08:51:37.6327184Z + factory-boy==2.12.0 +2026-05-03T08:51:37.6327397Z + faker==40.15.0 +2026-05-03T08:51:37.6327585Z + id==1.6.1 +2026-05-03T08:51:37.6328039Z + imagesize==2.0.0 +2026-05-03T08:51:37.6328276Z + iniconfig==2.3.0 +2026-05-03T08:51:37.6328531Z + ipython==8.39.0 +2026-05-03T08:51:37.6328772Z + jaraco-classes==3.4.0 +2026-05-03T08:51:37.6330166Z + jaraco-context==6.1.2 +2026-05-03T08:51:37.6330457Z + jaraco-functools==4.4.0 +2026-05-03T08:51:37.6330721Z + jedi==0.20.0 +2026-05-03T08:51:37.6330932Z + jeepney==0.9.0 +2026-05-03T08:51:37.6331133Z + jinja2==3.1.6 +2026-05-03T08:51:37.6331341Z + keyring==25.7.0 +2026-05-03T08:51:37.6331938Z - libp2p==0.6.0 (from file:///home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl) +2026-05-03T08:51:37.6332709Z + libp2p==0.6.0 (from file:///home/runner/work/py-libp2p/py-libp2p) +2026-05-03T08:51:37.6333071Z + librt==0.9.0 +2026-05-03T08:51:37.6333697Z + markdown-it-py==4.0.0 +2026-05-03T08:51:37.6333943Z + markupsafe==3.0.3 +2026-05-03T08:51:37.6334182Z + matplotlib-inline==0.2.1 +2026-05-03T08:51:37.6334439Z + mdurl==0.1.2 +2026-05-03T08:51:37.6334648Z + more-itertools==11.0.2 +2026-05-03T08:51:37.6334893Z + mypy==1.20.2 +2026-05-03T08:51:37.6335079Z + nh3==0.3.5 +2026-05-03T08:51:37.6335281Z + p2pclient==0.2.1 +2026-05-03T08:51:37.6335491Z + parso==0.8.7 +2026-05-03T08:51:37.6335698Z + pathspec==1.1.1 +2026-05-03T08:51:37.6335905Z + pexpect==4.9.0 +2026-05-03T08:51:37.6336110Z + pluggy==1.6.0 +2026-05-03T08:51:37.6336325Z + prompt-toolkit==3.0.52 +2026-05-03T08:51:37.6336541Z + ptyprocess==0.7.0 +2026-05-03T08:51:37.6336735Z + pure-eval==0.2.3 +2026-05-03T08:51:37.6336915Z + pydantic==2.13.3 +2026-05-03T08:51:37.6337155Z + pydantic-core==2.46.3 +2026-05-03T08:51:37.6337401Z + pydantic-settings==2.14.0 +2026-05-03T08:51:37.6337624Z + pygments==2.20.0 +2026-05-03T08:51:37.6337807Z + pyproject-api==1.10.0 +2026-05-03T08:51:37.6338052Z + pyproject-hooks==1.2.0 +2026-05-03T08:51:37.6338267Z + pyrefly==0.17.1 +2026-05-03T08:51:37.6338469Z + pytest==9.0.3 +2026-05-03T08:51:37.6338687Z + pytest-mock==3.15.1 +2026-05-03T08:51:37.6338930Z + pytest-rerunfailures==16.1 +2026-05-03T08:51:37.6339179Z + pytest-timeout==2.4.0 +2026-05-03T08:51:37.6339390Z + pytest-trio==0.8.0 +2026-05-03T08:51:37.6339602Z + pytest-xdist==3.8.0 +2026-05-03T08:51:37.6339794Z + python-dotenv==1.2.2 +2026-05-03T08:51:37.6340003Z + questionary==2.1.1 +2026-05-03T08:51:37.6340195Z + readme-renderer==44.0 +2026-05-03T08:51:37.6340412Z + requests-toolbelt==1.0.0 +2026-05-03T08:51:37.6340766Z + rfc3986==2.0.0 +2026-05-03T08:51:37.6340979Z + rich==15.0.0 +2026-05-03T08:51:37.6341170Z + rich-click==1.9.7 +2026-05-03T08:51:37.6341350Z + ruff==0.15.12 +2026-05-03T08:51:37.6341550Z + secretstorage==3.5.0 +2026-05-03T08:51:37.6341855Z + snowballstemmer==3.0.1 +2026-05-03T08:51:37.6342111Z + sphinx==8.1.3 +2026-05-03T08:51:37.6342342Z + sphinx-rtd-theme==3.1.0 +2026-05-03T08:51:37.6342645Z + sphinxcontrib-applehelp==2.0.0 +2026-05-03T08:51:37.6343043Z + sphinxcontrib-devhelp==2.0.0 +2026-05-03T08:51:37.6343579Z + sphinxcontrib-htmlhelp==2.1.0 +2026-05-03T08:51:37.6343989Z + sphinxcontrib-jquery==4.1 +2026-05-03T08:51:37.6344237Z + sphinxcontrib-jsmath==1.0.1 +2026-05-03T08:51:37.6344492Z + sphinxcontrib-qthelp==2.0.0 +2026-05-03T08:51:37.6344754Z + sphinxcontrib-serializinghtml==2.0.0 +2026-05-03T08:51:37.6345177Z + stack-data==0.6.3 +2026-05-03T08:51:37.6345402Z + tomli==2.4.1 +2026-05-03T08:51:37.6345602Z + tomli-w==1.2.0 +2026-05-03T08:51:37.6345795Z + tomlkit==0.14.0 +2026-05-03T08:51:37.6345987Z + towncrier==25.8.0 +2026-05-03T08:51:37.6346222Z + tox==4.53.1 +2026-05-03T08:51:37.6346423Z + traitlets==5.14.3 +2026-05-03T08:51:37.6346625Z + twine==6.2.0 +2026-05-03T08:51:37.6346821Z + typing-inspection==0.4.2 +2026-05-03T08:51:37.6347029Z + wcmatch==10.1 +2026-05-03T08:51:37.6347214Z + wcwidth==0.7.0 +2026-05-03T08:51:37.6347386Z + wheel==0.47.0 +2026-05-03T08:51:37.6463895Z py310-lint: commands[0]> pre-commit install +2026-05-03T08:51:38.5706338Z pre-commit installed at .git/hooks/pre-commit +2026-05-03T08:51:38.5843977Z py310-lint: commands[1]> pre-commit run --all-files --show-diff-on-failure +2026-05-03T08:51:38.6811482Z [INFO] Initializing environment for https://github.com/pre-commit/pre-commit-hooks. +2026-05-03T08:51:39.4189459Z [INFO] Initializing environment for https://github.com/asottile/pyupgrade. +2026-05-03T08:51:40.4277806Z [INFO] Initializing environment for https://github.com/astral-sh/ruff-pre-commit. +2026-05-03T08:51:40.9532681Z [INFO] Initializing environment for https://github.com/executablebooks/mdformat. +2026-05-03T08:51:41.5220706Z [INFO] Initializing environment for https://github.com/executablebooks/mdformat:mdformat-gfm. +2026-05-03T08:51:42.0485756Z [INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks. +2026-05-03T08:51:42.0486348Z [INFO] Once installed this environment will be reused. +2026-05-03T08:51:42.0486678Z [INFO] This may take a few minutes... +2026-05-03T08:51:46.2720141Z [INFO] Installing environment for https://github.com/asottile/pyupgrade. +2026-05-03T08:51:46.2720708Z [INFO] Once installed this environment will be reused. +2026-05-03T08:51:46.2721128Z [INFO] This may take a few minutes... +2026-05-03T08:51:49.1167487Z [INFO] Installing environment for https://github.com/astral-sh/ruff-pre-commit. +2026-05-03T08:51:49.1168075Z [INFO] Once installed this environment will be reused. +2026-05-03T08:51:49.1168471Z [INFO] This may take a few minutes... +2026-05-03T08:51:52.4952081Z [INFO] Installing environment for https://github.com/executablebooks/mdformat. +2026-05-03T08:51:52.4952599Z [INFO] Once installed this environment will be reused. +2026-05-03T08:51:52.4952856Z [INFO] This may take a few minutes... +2026-05-03T08:51:55.7988540Z check yaml...............................................................Passed +2026-05-03T08:51:55.8606936Z check toml...............................................................Passed +2026-05-03T08:51:55.9314584Z fix end of files.........................................................Passed +2026-05-03T08:51:56.0326423Z trim trailing whitespace.................................................Passed +2026-05-03T08:51:58.5665786Z pyupgrade................................................................Passed +2026-05-03T08:51:58.6946122Z ruff (legacy alias)......................................................Passed +2026-05-03T08:51:58.8404559Z ruff format..............................................................Passed +2026-05-03T08:51:59.9613232Z mdformat.................................................................Passed +2026-05-03T08:52:07.6623116Z run mypy with all dev dependencies present...............................Failed +2026-05-03T08:52:07.6623813Z - hook id: mypy-local +2026-05-03T08:52:07.6624064Z - exit code: 1 +2026-05-03T08:52:07.6624161Z +2026-05-03T08:52:07.6624401Z libp2p/kad_dht/pb/kademlia_pb2.pyi:74: error: Missing type arguments for generic type "Mapping" [type-arg] +2026-05-03T08:52:07.6624809Z Found 1 error in 1 file (checked 335 source files) +2026-05-03T08:52:07.6625014Z +2026-05-03T08:52:08.7611656Z run pyrefly typecheck locally............................................Failed +2026-05-03T08:52:08.7612177Z - hook id: pyrefly-local +2026-05-03T08:52:08.7612392Z - exit code: 1 +2026-05-03T08:52:08.7612484Z +2026-05-03T08:52:08.7612961Z WARN PYTHONPATH environment variable is set to `/home/runner/work/py-libp2p/py-libp2p/.tox/py310-lint/lib/python3.10/site-packages:`. Checks in other environments may not include these paths. +2026-05-03T08:52:08.7613892Z INFO Checking project configured at `/home/runner/work/py-libp2p/py-libp2p/pyproject.toml` +2026-05-03T08:52:08.7614582Z WARN PYTHONPATH environment variable is set to `/home/runner/work/py-libp2p/py-libp2p/.tox/py310-lint/lib/python3.10/site-packages:`. Checks in other environments may not include these paths. +2026-05-03T08:52:08.7615585Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:30:41-45: Default `None` is not assignable to parameter `network_blocks` with type `dict[Unknown, Unknown]` [bad-function-definition] +2026-05-03T08:52:08.7617189Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:141:46-50: Argument `list[bytes]` is not assignable to parameter `cids` with type `list[CIDv0 | CIDv1 | bytes | str]` in function `libp2p.bitswap.block_service.BlockService.get_blocks_batch` [bad-argument-type] +2026-05-03T08:52:08.7618588Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:162:46-54: Argument `list[bytes]` is not assignable to parameter `cids` with type `list[CIDv0 | CIDv1 | bytes | str]` in function `libp2p.bitswap.block_service.BlockService.get_blocks_batch` [bad-argument-type] +2026-05-03T08:52:08.7619958Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:145:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7621000Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:173:12-23: Object of class `NoneType` has no attribute `data` [missing-attribute] +2026-05-03T08:52:08.7621920Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:214:24-40: Object of class `NoneType` has no attribute `data` [missing-attribute] +2026-05-03T08:52:08.7622682Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:60:12-33: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7623567Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:93:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7624342Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:94:16-33: Object of class `NoneType` has no attribute `blocksizes` [missing-attribute] +2026-05-03T08:52:08.7625119Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:115:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7625897Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:183:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7626664Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:224:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] +2026-05-03T08:52:08.7627415Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:172:12-32: Object of class `NoneType` has no attribute `entries` [missing-attribute] +2026-05-03T08:52:08.7628396Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:234:51-65: Argument `Literal[WantType.Block]` is not assignable to parameter `want_type` with type `int` in function `libp2p.bitswap.messages.create_wantlist_entry` [bad-argument-type] +2026-05-03T08:52:08.7629607Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:238:52-65: Argument `Literal[WantType.Have]` is not assignable to parameter `want_type` with type `int` in function `libp2p.bitswap.messages.create_wantlist_entry` [bad-argument-type] +2026-05-03T08:52:08.7630595Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:249:9-17: Could not import `WantType` from `libp2p.bitswap` [missing-module-attribute] +2026-05-03T08:52:08.7631305Z INFO 16 errors shown, 184 errors ignored, 734 modules, 1,662 transitive dependencies, 657,839 lines, took 1.04s, peak memory physical 204.9 MiB +2026-05-03T08:52:08.7631662Z +2026-05-03T08:52:08.8142574Z Check for .rst files in the top-level directory..........................Passed +2026-05-03T08:52:09.5667299Z Cross-platform path handling audit (P0/P1)...............................Passed +2026-05-03T08:52:09.5821611Z py310-lint: exit 1 (31.00 seconds) /home/runner/work/py-libp2p/py-libp2p> pre-commit run --all-files --show-diff-on-failure pid=3010 +2026-05-03T08:52:09.5848132Z py310-lint: FAIL code 1 (67.24=setup[30.80]+cmd[0.16,4.35,0.94,31.00] seconds) +2026-05-03T08:52:09.5848539Z evaluation failed :( (69.20 seconds) +2026-05-03T08:52:09.6166338Z ##[error]Process completed with exit code 1. +2026-05-03T08:52:09.6249957Z Post job cleanup. +2026-05-03T08:52:09.7055343Z [command]/usr/bin/git version +2026-05-03T08:52:09.7085377Z git version 2.53.0 +2026-05-03T08:52:09.7117444Z Temporarily overriding HOME='/home/runner/work/_temp/5d400f3f-c4d4-4ba4-ab16-52bd147b9418' before making global git config changes +2026-05-03T08:52:09.7118330Z Adding repository directory to the temporary git global config as a safe directory +2026-05-03T08:52:09.7121633Z [command]/usr/bin/git config --global --add safe.directory /home/runner/work/py-libp2p/py-libp2p +2026-05-03T08:52:09.8851499Z [command]/usr/bin/git config --local --name-only --get-regexp core\.sshCommand +2026-05-03T08:52:09.8882670Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'core\.sshCommand' && git config --local --unset-all 'core.sshCommand' || :" +2026-05-03T08:52:09.9066439Z fatal: No url found for submodule path 'extra/multihash-spec' in .gitmodules +2026-05-03T08:52:10.0343154Z ##[warning]The process '/usr/bin/git' failed with exit code 128 +2026-05-03T08:52:10.0429730Z Cleaning up orphan processes +2026-05-03T08:52:10.0667254Z ##[warning]Node.js 20 actions are deprecated. The following actions are running on Node.js 20 and may not work as expected: actions/checkout@v4, actions/setup-python@v5, astral-sh/setup-uv@v4. Actions will be forced to run with Node.js 24 by default starting June 2nd, 2026. Node.js 20 will be removed from the runner on September 16th, 2026. Please check if updated versions of these actions are available that support Node.js 24. To opt into Node.js 24 now, set the FORCE_JAVASCRIPT_ACTIONS_TO_NODE24=true environment variable on the runner or in your workflow file. Once Node.js 24 becomes the default, you can temporarily opt out by setting ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true. For more information see: https://github.blog/changelog/2025-09-19-deprecation-of-node-20-on-github-actions-runners/ \ No newline at end of file diff --git a/tests/core/bitswap/test_block_service.py b/tests/core/bitswap/test_block_service.py index a7a02f7dc..f4754dd7c 100644 --- a/tests/core/bitswap/test_block_service.py +++ b/tests/core/bitswap/test_block_service.py @@ -27,7 +27,7 @@ def ok(label): # ── helpers ─────────────────────────────────────────────────────────────────── -def make_service(network_blocks: dict = None): +def make_service(network_blocks: dict | None = None): """ Build a BlockService with a real MemoryBlockStore and a mock BitswapClient. network_blocks: cid_bytes -> data that the mock 'network' can return. @@ -137,7 +137,7 @@ async def test_get_blocks_batch_local_hits_skip_network(): for cid, data in blocks: await store.put_block(cid, data) - cids = [cid for cid, _ in blocks] + cids: list[bytes] = [cid for cid, _ in blocks] results = await service.get_blocks_batch(cids) assert len(results) == 5 @@ -158,7 +158,7 @@ async def test_get_blocks_batch_partial_local(): for cid, data in local_blocks: await store.put_block(cid, data) - all_cids = [cid for cid, _ in local_blocks + net_blocks] + all_cids: list[bytes] = [cid for cid, _ in local_blocks + net_blocks] results = await service.get_blocks_batch(all_cids) assert len(results) == 5 diff --git a/tests/core/bitswap/test_io_stream.py b/tests/core/bitswap/test_io_stream.py index 6a778cb50..bd1ecdecb 100644 --- a/tests/core/bitswap/test_io_stream.py +++ b/tests/core/bitswap/test_io_stream.py @@ -142,6 +142,7 @@ async def add_block(cid, data): block = list(stored.values())[0] assert is_file_node(block) _, unixfs = decode_dag_pb(block) + assert unixfs is not None assert unixfs.filesize == 0 ok("empty stream → 1 empty dag-pb leaf block stored") @@ -170,6 +171,7 @@ async def add_block(cid, data): assert len(stored) == 1, f"expected 1 block, got {len(stored)}" block = stored[bytes(root_cid)] _, unixfs = decode_dag_pb(block) + assert unixfs is not None assert unixfs.data == data ok("single chunk: leaf CID returned directly, inline data correct") @@ -211,6 +213,7 @@ async def add_block(cid, data): for link in links: leaf = stored[bytes(link.cid)] _, leaf_unixfs = decode_dag_pb(leaf) + assert leaf_unixfs is not None reassembled += leaf_unixfs.data assert reassembled == original diff --git a/tests/core/bitswap/test_unixfs_encoding.py b/tests/core/bitswap/test_unixfs_encoding.py index d638189fe..11ba00a56 100644 --- a/tests/core/bitswap/test_unixfs_encoding.py +++ b/tests/core/bitswap/test_unixfs_encoding.py @@ -57,6 +57,7 @@ def test_create_leaf_node(): # Empty leaf empty_leaf = create_leaf_node(b"") _, empty_unixfs = decode_dag_pb(empty_leaf) + assert empty_unixfs is not None assert empty_unixfs.filesize == 0 ok("empty leaf node is valid") @@ -90,6 +91,7 @@ def test_balanced_layout_two_leaves(): assert is_file_node(root_block) links, unixfs = decode_dag_pb(root_block) assert len(links) == 2, f"expected 2 links, got {len(links)}" + assert unixfs is not None assert unixfs.filesize == sum(s for _, _, s in leaves) assert len(unixfs.blocksizes) == 2 ok(f"root has 2 links, filesize={unixfs.filesize}, blocksizes={unixfs.blocksizes}") @@ -112,6 +114,7 @@ def test_balanced_layout_two_levels(): # Root should link to 2 internal nodes (174 + 1) assert len(links) == 2, f"expected 2 top-level links, got {len(links)}" + assert unixfs is not None assert unixfs.filesize == n * chunk_size ok("175 leaves → root has 2 links (174-leaf node + 1-leaf node)") ok(f"root filesize = {unixfs.filesize} = 175 * {chunk_size}") @@ -180,6 +183,7 @@ async def add_block_impl(cid, data): root_block = stored[bytes(root_cid)] links, unixfs = decode_dag_pb(root_block) assert len(links) == 3, f"expected 3 links on root, got {len(links)}" + assert unixfs is not None assert unixfs.filesize == len(content) ok(f"root has 3 links, filesize={unixfs.filesize}") @@ -221,6 +225,7 @@ async def add_block_impl(cid, data): root_block = stored[bytes(root_cid)] links, unixfs = decode_dag_pb(root_block) assert len(links) == 3 + assert unixfs is not None assert unixfs.filesize == len(content) ok(f"root has 3 links, filesize={unixfs.filesize}") diff --git a/tests/core/bitswap/test_wantlist.py b/tests/core/bitswap/test_wantlist.py index effc59b5d..a632fc80b 100644 --- a/tests/core/bitswap/test_wantlist.py +++ b/tests/core/bitswap/test_wantlist.py @@ -169,6 +169,7 @@ def test_bitswap_message_cancel_want(): msg = BitswapMessage() msg.cancel_want(cid) assert msg.is_want + assert msg.wantlist is not None assert msg.wantlist.entries[0].cancel ok("cancel_want() adds cancel entry") diff --git a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py index 79ff2ad2b..d8be5d50e 100644 --- a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py +++ b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py @@ -285,7 +285,8 @@ async def test_handle_observe_and_unobserve_manage_observers() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer = IDFactory() + + observer_peer: ID = IDFactory() # Simulate that the peer advertised topicObservation support via extensions. router.extensions_state._peer_extensions[observer_peer] = PeerExtensions( @@ -310,7 +311,7 @@ async def test_handle_observe_ignored_when_peer_did_not_advertise_extension() -> assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer = IDFactory() + observer_peer: ID = IDFactory() # Peer exists, but its advertised extensions do NOT include topicObservation. router.extensions_state._peer_extensions[observer_peer] = PeerExtensions( @@ -330,7 +331,7 @@ async def test_emit_observe_and_unobserve_update_observing_state() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - subscriber_peer = IDFactory() + subscriber_peer: ID = IDFactory() # Stub pubsub.peers so emit_control_message sees the peer as connected. class DummyPubsub: @@ -362,7 +363,7 @@ async def test_notify_observers_sends_ihave_to_each_observer() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer = IDFactory() + observer_peer: ID = IDFactory() msg_id = b"message-id" # Configure TopicObservationState with a single observer. @@ -395,7 +396,7 @@ async def test_start_and_stop_observing_topic_high_level_api() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - subscriber_peer = IDFactory() + subscriber_peer: ID = IDFactory() # Simulate pubsub state: subscriber_peer is subscribed to topic. class DummyPubsub: diff --git a/tests/core/pubsub/test_gossipsub_v2_0.py b/tests/core/pubsub/test_gossipsub_v2_0.py index 68e575fd7..c2f639cb4 100644 --- a/tests/core/pubsub/test_gossipsub_v2_0.py +++ b/tests/core/pubsub/test_gossipsub_v2_0.py @@ -16,6 +16,7 @@ import pytest import trio +from libp2p.peer.id import ID from libp2p.pubsub.gossipsub import PROTOCOL_ID_V20, GossipSub from libp2p.pubsub.pb import rpc_pb2 from libp2p.pubsub.pubsub import ValidationCache, ValidationResult @@ -465,7 +466,8 @@ def test_candidate_selection_for_diversity(self): scorer.ip_by_peer[peer] = "192.168.1.1" # Candidates from different IPs - candidates = [ + + candidates: list[tuple[ID, float]] = [ (IDFactory(), 5.0), # Different IP (IDFactory(), 4.0), # Same IP as mesh ] diff --git a/tests/utils/factories.py b/tests/utils/factories.py index 2198d2503..762044f74 100644 --- a/tests/utils/factories.py +++ b/tests/utils/factories.py @@ -447,7 +447,7 @@ async def upgrade_remote_conn(remote_conn: IRawConnection) -> None: class SwarmFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = Swarm class Params: @@ -511,7 +511,7 @@ async def create_batch_and_listen( class HostFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = BasicHost class Params: @@ -555,7 +555,7 @@ async def find_peer(self, peer_id: ID) -> PeerInfo | None: class RoutedHostFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = RoutedHost class Params: @@ -591,14 +591,14 @@ async def create_batch_and_listen( class FloodsubFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = FloodSub protocols = (FLOODSUB_PROTOCOL_ID,) class GossipsubFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = GossipSub protocols = (GOSSIPSUB_PROTOCOL_ID,) @@ -623,7 +623,7 @@ class Meta: class PubsubFactory(factory.Factory): - class Meta: + class Meta: # type: ignore[override] model = Pubsub host = factory.SubFactory(HostFactory) diff --git a/tests/utils/interop/utils.py b/tests/utils/interop/utils.py index 30b89197c..db299dcb5 100644 --- a/tests/utils/interop/utils.py +++ b/tests/utils/interop/utils.py @@ -5,7 +5,9 @@ from multiaddr import ( Multiaddr, ) -from p2pclient.libp2p_stubs.peer.id import ID as StubID +from p2pclient.libp2p_stubs.peer.id import ( + ID as StubID, # type: ignore[import-untyped, import-error] +) import trio from libp2p.abc import IHost From 6acceb273cdecb7fe080de681ec18701aecb2f33 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 15:46:06 +0530 Subject: [PATCH 11/37] removed logs file --- logs.txt | 544 ------------------------------------------------------- 1 file changed, 544 deletions(-) delete mode 100644 logs.txt diff --git a/logs.txt b/logs.txt deleted file mode 100644 index 083501d75..000000000 --- a/logs.txt +++ /dev/null @@ -1,544 +0,0 @@ -2026-05-03T08:50:39.1652278Z Current runner version: '2.334.0' -2026-05-03T08:50:39.1670799Z ##[group]Runner Image Provisioner -2026-05-03T08:50:39.1671487Z Hosted Compute Agent -2026-05-03T08:50:39.1671885Z Version: 20260213.493 -2026-05-03T08:50:39.1672360Z Commit: 5c115507f6dd24b8de37d8bbe0bb4509d0cc0fa3 -2026-05-03T08:50:39.1672899Z Build Date: 2026-02-13T00:28:41Z -2026-05-03T08:50:39.1673585Z Worker ID: {6bdef167-2765-47d3-be27-c534a59125f5} -2026-05-03T08:50:39.1674159Z Azure Region: westcentralus -2026-05-03T08:50:39.1674611Z ##[endgroup] -2026-05-03T08:50:39.1675886Z ##[group]Operating System -2026-05-03T08:50:39.1676329Z Ubuntu -2026-05-03T08:50:39.1676754Z 24.04.4 -2026-05-03T08:50:39.1677106Z LTS -2026-05-03T08:50:39.1677514Z ##[endgroup] -2026-05-03T08:50:39.1677886Z ##[group]Runner Image -2026-05-03T08:50:39.1678386Z Image: ubuntu-24.04 -2026-05-03T08:50:39.1678799Z Version: 20260413.86.1 -2026-05-03T08:50:39.1679737Z Included Software: https://github.com/actions/runner-images/blob/ubuntu24/20260413.86/images/ubuntu/Ubuntu2404-Readme.md -2026-05-03T08:50:39.1680838Z Image Release: https://github.com/actions/runner-images/releases/tag/ubuntu24%2F20260413.86 -2026-05-03T08:50:39.1681544Z ##[endgroup] -2026-05-03T08:50:39.1683863Z ##[group]GITHUB_TOKEN Permissions -2026-05-03T08:50:39.1685352Z Actions: read -2026-05-03T08:50:39.1685843Z ArtifactMetadata: read -2026-05-03T08:50:39.1686316Z Attestations: read -2026-05-03T08:50:39.1686742Z Checks: read -2026-05-03T08:50:39.1687161Z Contents: read -2026-05-03T08:50:39.1687566Z Deployments: read -2026-05-03T08:50:39.1687939Z Discussions: read -2026-05-03T08:50:39.1688398Z Issues: read -2026-05-03T08:50:39.1688753Z Metadata: read -2026-05-03T08:50:39.1689144Z Models: read -2026-05-03T08:50:39.1689517Z Packages: read -2026-05-03T08:50:39.1689937Z Pages: read -2026-05-03T08:50:39.1690388Z PullRequests: read -2026-05-03T08:50:39.1690811Z RepositoryProjects: read -2026-05-03T08:50:39.1691299Z SecurityEvents: read -2026-05-03T08:50:39.1691685Z Statuses: read -2026-05-03T08:50:39.1692128Z VulnerabilityAlerts: read -2026-05-03T08:50:39.1692593Z ##[endgroup] -2026-05-03T08:50:39.1694465Z Secret source: None -2026-05-03T08:50:39.1695044Z Prepare workflow directory -2026-05-03T08:50:39.2012185Z Prepare all required actions -2026-05-03T08:50:39.2054775Z Getting action download info -2026-05-03T08:50:39.6250067Z Download action repository 'actions/checkout@v4' (SHA:34e114876b0b11c390a56381ad16ebd13914f8d5) -2026-05-03T08:50:39.7960802Z Download action repository 'actions/setup-python@v5' (SHA:a26af69be951a213d495a4c3e4e4022e16d87065) -2026-05-03T08:50:40.0799901Z Download action repository 'astral-sh/setup-uv@v4' (SHA:38f3f104447c67c051c4a08e39b64a148898af3a) -2026-05-03T08:50:41.1424247Z Download action repository 'actions/cache@v4' (SHA:0057852bfaa89a56745cba8c7296529d2fc39830) -2026-05-03T08:50:41.5475363Z Download action repository 'actions/setup-node@v4' (SHA:49933ea5288caeca8642d1e84afbd3f7d6820020) -2026-05-03T08:50:42.0285390Z Complete job name: tox (3.10, lint) -2026-05-03T08:50:42.0909652Z ##[group]Run if [[ "$toxenv" == 'docs' ]]; then -2026-05-03T08:50:42.0910618Z if [[ "$toxenv" == 'docs' ]]; then -2026-05-03T08:50:42.0911474Z  echo 'TOXENV=docs' | tee -a $GITHUB_ENV -2026-05-03T08:50:42.0912286Z else -2026-05-03T08:50:42.0913122Z  echo "TOXENV=py${python}-${toxenv}" | tr -d '.' | tee -a $GITHUB_ENV -2026-05-03T08:50:42.0914306Z fi -2026-05-03T08:50:42.0937059Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} -2026-05-03T08:50:42.0938114Z env: -2026-05-03T08:50:42.0938606Z python: 3.10 -2026-05-03T08:50:42.0939145Z toxenv: lint -2026-05-03T08:50:42.0939680Z ##[endgroup] -2026-05-03T08:50:42.1045953Z TOXENV=py310-lint -2026-05-03T08:50:42.1242272Z ##[group]Run actions/checkout@v4 -2026-05-03T08:50:42.1243023Z with: -2026-05-03T08:50:42.1243820Z repository: libp2p/py-libp2p -2026-05-03T08:50:42.1244733Z token: *** -2026-05-03T08:50:42.1245260Z ssh-strict: true -2026-05-03T08:50:42.1245828Z ssh-user: git -2026-05-03T08:50:42.1246401Z persist-credentials: true -2026-05-03T08:50:42.1247270Z clean: true -2026-05-03T08:50:42.1247848Z sparse-checkout-cone-mode: true -2026-05-03T08:50:42.1248556Z fetch-depth: 1 -2026-05-03T08:50:42.1249122Z fetch-tags: false -2026-05-03T08:50:42.1249712Z show-progress: true -2026-05-03T08:50:42.1250297Z lfs: false -2026-05-03T08:50:42.1250828Z submodules: false -2026-05-03T08:50:42.1251426Z set-safe-directory: true -2026-05-03T08:50:42.1252067Z env: -2026-05-03T08:50:42.1252570Z TOXENV: py310-lint -2026-05-03T08:50:42.1253147Z ##[endgroup] -2026-05-03T08:50:42.2189958Z Syncing repository: libp2p/py-libp2p -2026-05-03T08:50:42.2192771Z ##[group]Getting Git version info -2026-05-03T08:50:42.2194470Z Working directory is '/home/runner/work/py-libp2p/py-libp2p' -2026-05-03T08:50:42.2196501Z [command]/usr/bin/git version -2026-05-03T08:50:42.2769151Z git version 2.53.0 -2026-05-03T08:50:42.2788313Z ##[endgroup] -2026-05-03T08:50:42.2800674Z Temporarily overriding HOME='/home/runner/work/_temp/280f354b-3234-4524-abbb-4bf3e2f57d2d' before making global git config changes -2026-05-03T08:50:42.2803212Z Adding repository directory to the temporary git global config as a safe directory -2026-05-03T08:50:42.2805476Z [command]/usr/bin/git config --global --add safe.directory /home/runner/work/py-libp2p/py-libp2p -2026-05-03T08:50:42.2831018Z Deleting the contents of '/home/runner/work/py-libp2p/py-libp2p' -2026-05-03T08:50:42.2833973Z ##[group]Initializing the repository -2026-05-03T08:50:42.2837052Z [command]/usr/bin/git init /home/runner/work/py-libp2p/py-libp2p -2026-05-03T08:50:42.2997621Z hint: Using 'master' as the name for the initial branch. This default branch name -2026-05-03T08:50:42.2999318Z hint: will change to "main" in Git 3.0. To configure the initial branch name -2026-05-03T08:50:42.3000789Z hint: to use in all of your new repositories, which will suppress this warning, -2026-05-03T08:50:42.3002234Z hint: call: -2026-05-03T08:50:42.3002900Z hint: -2026-05-03T08:50:42.3003888Z hint: git config --global init.defaultBranch -2026-05-03T08:50:42.3004879Z hint: -2026-05-03T08:50:42.3005833Z hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and -2026-05-03T08:50:42.3007603Z hint: 'development'. The just-created branch can be renamed via this command: -2026-05-03T08:50:42.3009009Z hint: -2026-05-03T08:50:42.3009721Z hint: git branch -m -2026-05-03T08:50:42.3010559Z hint: -2026-05-03T08:50:42.3011975Z hint: Disable this message with "git config set advice.defaultBranchName false" -2026-05-03T08:50:42.3013886Z Initialized empty Git repository in /home/runner/work/py-libp2p/py-libp2p/.git/ -2026-05-03T08:50:42.3016138Z [command]/usr/bin/git remote add origin https://github.com/libp2p/py-libp2p -2026-05-03T08:50:42.3035030Z ##[endgroup] -2026-05-03T08:50:42.3036478Z ##[group]Disabling automatic garbage collection -2026-05-03T08:50:42.3037981Z [command]/usr/bin/git config --local gc.auto 0 -2026-05-03T08:50:42.3061996Z ##[endgroup] -2026-05-03T08:50:42.3063521Z ##[group]Setting up auth -2026-05-03T08:50:42.3068223Z [command]/usr/bin/git config --local --name-only --get-regexp core\.sshCommand -2026-05-03T08:50:42.3092887Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'core\.sshCommand' && git config --local --unset-all 'core.sshCommand' || :" -2026-05-03T08:50:42.3331807Z [command]/usr/bin/git config --local --name-only --get-regexp http\.https\:\/\/github\.com\/\.extraheader -2026-05-03T08:50:42.3361911Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'http\.https\:\/\/github\.com\/\.extraheader' && git config --local --unset-all 'http.https://github.com/.extraheader' || :" -2026-05-03T08:50:42.3543069Z [command]/usr/bin/git config --local --name-only --get-regexp ^includeIf\.gitdir: -2026-05-03T08:50:42.3567245Z [command]/usr/bin/git submodule foreach --recursive git config --local --show-origin --name-only --get-regexp remote.origin.url -2026-05-03T08:50:42.3749227Z [command]/usr/bin/git config --local http.https://github.com/.extraheader AUTHORIZATION: basic *** -2026-05-03T08:50:42.3777229Z ##[endgroup] -2026-05-03T08:50:42.3783576Z ##[group]Fetching the repository -2026-05-03T08:50:42.3785524Z [command]/usr/bin/git -c protocol.version=2 fetch --no-tags --prune --no-recurse-submodules --depth=1 origin +79fa6a4965dd094f1168364b06f70f446048452d:refs/remotes/pull/1321/merge -2026-05-03T08:50:43.8242606Z From https://github.com/libp2p/py-libp2p -2026-05-03T08:50:43.8243185Z * [new ref] 79fa6a4965dd094f1168364b06f70f446048452d -> pull/1321/merge -2026-05-03T08:50:43.8265852Z ##[endgroup] -2026-05-03T08:50:43.8266310Z ##[group]Determining the checkout info -2026-05-03T08:50:43.8268290Z ##[endgroup] -2026-05-03T08:50:43.8273080Z [command]/usr/bin/git sparse-checkout disable -2026-05-03T08:50:43.8303282Z [command]/usr/bin/git config --local --unset-all extensions.worktreeConfig -2026-05-03T08:50:43.8323260Z ##[group]Checking out the ref -2026-05-03T08:50:43.8326267Z [command]/usr/bin/git checkout --progress --force refs/remotes/pull/1321/merge -2026-05-03T08:50:43.8851236Z Note: switching to 'refs/remotes/pull/1321/merge'. -2026-05-03T08:50:43.8851608Z -2026-05-03T08:50:43.8851869Z You are in 'detached HEAD' state. You can look around, make experimental -2026-05-03T08:50:43.8852442Z changes and commit them, and you can discard any commits you make in this -2026-05-03T08:50:43.8852995Z state without impacting any branches by switching back to a branch. -2026-05-03T08:50:43.8853500Z -2026-05-03T08:50:43.8853789Z If you want to create a new branch to retain commits you create, you may -2026-05-03T08:50:43.8854348Z do so (now or later) by using -c with the switch command. Example: -2026-05-03T08:50:43.8854551Z -2026-05-03T08:50:43.8854651Z git switch -c -2026-05-03T08:50:43.8854803Z -2026-05-03T08:50:43.8854888Z Or undo this operation with: -2026-05-03T08:50:43.8855021Z -2026-05-03T08:50:43.8855095Z git switch - -2026-05-03T08:50:43.8855229Z -2026-05-03T08:50:43.8855393Z Turn off this advice by setting config variable advice.detachedHead to false -2026-05-03T08:50:43.8855623Z -2026-05-03T08:50:43.8855896Z HEAD is now at 79fa6a4 Merge 58719a78ba64fd512991937d190a90bc45d748b5 into d9c50c0bec064e097a01ccafbb452e5b78c434e7 -2026-05-03T08:50:43.8858240Z ##[endgroup] -2026-05-03T08:50:43.8890211Z [command]/usr/bin/git log -1 --format=%H -2026-05-03T08:50:43.8908299Z 79fa6a4965dd094f1168364b06f70f446048452d -2026-05-03T08:50:43.9060008Z ##[group]Run actions/setup-python@v5 -2026-05-03T08:50:43.9060230Z with: -2026-05-03T08:50:43.9060397Z python-version: 3.10 -2026-05-03T08:50:43.9060575Z check-latest: false -2026-05-03T08:50:43.9060849Z token: *** -2026-05-03T08:50:43.9061019Z update-environment: true -2026-05-03T08:50:43.9061211Z allow-prereleases: false -2026-05-03T08:50:43.9061425Z freethreaded: false -2026-05-03T08:50:43.9061583Z env: -2026-05-03T08:50:43.9061738Z TOXENV: py310-lint -2026-05-03T08:50:43.9061893Z ##[endgroup] -2026-05-03T08:50:44.0434859Z ##[group]Installed versions -2026-05-03T08:50:44.0534645Z Successfully set up CPython (3.10.20) -2026-05-03T08:50:44.0539173Z ##[endgroup] -2026-05-03T08:50:44.0684971Z ##[group]Run astral-sh/setup-uv@v4 -2026-05-03T08:50:44.0685167Z with: -2026-05-03T08:50:44.0685310Z version: latest -2026-05-03T08:50:44.0685570Z github-token: *** -2026-05-03T08:50:44.0685732Z enable-cache: false -2026-05-03T08:50:44.0685897Z cache-dependency-glob: **/uv.lock -2026-05-03T08:50:44.0686094Z prune-cache: true -2026-05-03T08:50:44.0686258Z ignore-nothing-to-cache: false -2026-05-03T08:50:44.0686437Z env: -2026-05-03T08:50:44.0686577Z TOXENV: py310-lint -2026-05-03T08:50:44.0686781Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:44.0687107Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig -2026-05-03T08:50:44.0687417Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:44.0687694Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:44.0687958Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:44.0688266Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib -2026-05-03T08:50:44.0688631Z ##[endgroup] -2026-05-03T08:50:44.5566543Z Downloading uv from "https://github.com/astral-sh/uv/releases/download/0.11.8/uv-x86_64-unknown-linux-gnu.tar.gz" ... -2026-05-03T08:50:45.1270603Z [command]/usr/bin/tar xz --warning=no-unknown-keyword --overwrite -C /home/runner/work/_temp/eb456ef0-5fd2-4cca-80a6-59d4cc913925 -f /home/runner/work/_temp/425368e2-6920-4da8-9c29-e2eb033fc778 -2026-05-03T08:50:45.8868294Z Added /opt/hostedtoolcache/uv/0.11.8/x86_64 to the path -2026-05-03T08:50:45.8869251Z Added /home/runner/.local/bin to the path -2026-05-03T08:50:45.8878341Z Set UV_CACHE_DIR to /home/runner/work/_temp/setup-uv-cache -2026-05-03T08:50:45.8878773Z Successfully installed uv version 0.11.8 -2026-05-03T08:50:45.8964694Z ##[group]Run sudo apt-get update -2026-05-03T08:50:45.8964943Z sudo apt-get update -2026-05-03T08:50:45.8965146Z sudo apt-get install -y libgmp-dev -2026-05-03T08:50:45.9186725Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} -2026-05-03T08:50:45.9187010Z env: -2026-05-03T08:50:45.9187185Z TOXENV: py310-lint -2026-05-03T08:50:45.9187400Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:45.9187715Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig -2026-05-03T08:50:45.9188020Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:45.9188291Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:45.9188573Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:45.9188862Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib -2026-05-03T08:50:45.9189133Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache -2026-05-03T08:50:45.9189384Z ##[endgroup] -2026-05-03T08:50:46.0201616Z Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B] -2026-05-03T08:50:46.0559674Z Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease -2026-05-03T08:50:46.0573271Z Get:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease [126 kB] -2026-05-03T08:50:46.0611129Z Get:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease [126 kB] -2026-05-03T08:50:46.0644610Z Get:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease [126 kB] -2026-05-03T08:50:46.0734115Z Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease -2026-05-03T08:50:46.0954101Z Get:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease [3600 B] -2026-05-03T08:50:46.1492561Z Get:8 https://dl.google.com/linux/chrome-stable/deb stable InRelease [1825 B] -2026-05-03T08:50:46.2162179Z Get:9 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 Packages [1946 kB] -2026-05-03T08:50:46.2311865Z Get:10 http://azure.archive.ubuntu.com/ubuntu noble-updates/main Translation-en [348 kB] -2026-05-03T08:50:46.2335550Z Get:11 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 Components [177 kB] -2026-05-03T08:50:46.2356328Z Get:12 http://azure.archive.ubuntu.com/ubuntu noble-updates/main amd64 c-n-f Metadata [17.1 kB] -2026-05-03T08:50:46.2374633Z Get:13 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 Packages [1685 kB] -2026-05-03T08:50:46.2439427Z Get:14 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe Translation-en [324 kB] -2026-05-03T08:50:46.2460309Z Get:15 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 Components [386 kB] -2026-05-03T08:50:46.2490312Z Get:16 http://azure.archive.ubuntu.com/ubuntu noble-updates/universe amd64 c-n-f Metadata [34.5 kB] -2026-05-03T08:50:46.2524149Z Get:17 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 Packages [3095 kB] -2026-05-03T08:50:46.2620778Z Get:18 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted Translation-en [715 kB] -2026-05-03T08:50:46.3066836Z Get:19 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 Components [212 B] -2026-05-03T08:50:46.3078519Z Get:20 http://azure.archive.ubuntu.com/ubuntu noble-updates/restricted amd64 c-n-f Metadata [480 B] -2026-05-03T08:50:46.3091347Z Get:21 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 Packages [44.4 kB] -2026-05-03T08:50:46.3105067Z Get:22 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse Translation-en [10.2 kB] -2026-05-03T08:50:46.3114449Z Get:23 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 Components [940 B] -2026-05-03T08:50:46.3126258Z Get:24 http://azure.archive.ubuntu.com/ubuntu noble-updates/multiverse amd64 c-n-f Metadata [656 B] -2026-05-03T08:50:46.3139181Z Get:25 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 Packages [64.5 kB] -2026-05-03T08:50:46.3153707Z Get:26 http://azure.archive.ubuntu.com/ubuntu noble-backports/main Translation-en [9172 B] -2026-05-03T08:50:46.3164013Z Get:27 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 Components [7368 B] -2026-05-03T08:50:46.3176514Z Get:28 http://azure.archive.ubuntu.com/ubuntu noble-backports/main amd64 c-n-f Metadata [368 B] -2026-05-03T08:50:46.3187200Z Get:29 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 Packages [34.1 kB] -2026-05-03T08:50:46.3195820Z Get:30 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe Translation-en [18.2 kB] -2026-05-03T08:50:46.3204977Z Get:31 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 Components [10.5 kB] -2026-05-03T08:50:46.3214686Z Get:32 http://azure.archive.ubuntu.com/ubuntu noble-backports/universe amd64 c-n-f Metadata [1484 B] -2026-05-03T08:50:46.3670235Z Get:33 http://azure.archive.ubuntu.com/ubuntu noble-backports/restricted amd64 Components [212 B] -2026-05-03T08:50:46.3680882Z Get:34 http://azure.archive.ubuntu.com/ubuntu noble-backports/multiverse amd64 Packages [748 B] -2026-05-03T08:50:46.3695651Z Get:35 http://azure.archive.ubuntu.com/ubuntu noble-backports/multiverse amd64 Components [212 B] -2026-05-03T08:50:46.3710818Z Get:36 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 Packages [1625 kB] -2026-05-03T08:50:46.3796468Z Get:37 http://azure.archive.ubuntu.com/ubuntu noble-security/main Translation-en [259 kB] -2026-05-03T08:50:46.3812984Z Get:38 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 Components [21.9 kB] -2026-05-03T08:50:46.3824854Z Get:39 http://azure.archive.ubuntu.com/ubuntu noble-security/main amd64 c-n-f Metadata [11.0 kB] -2026-05-03T08:50:46.3835830Z Get:40 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 Packages [1182 kB] -2026-05-03T08:50:46.3893992Z Get:41 http://azure.archive.ubuntu.com/ubuntu noble-security/universe Translation-en [227 kB] -2026-05-03T08:50:46.3915962Z Get:42 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 Components [74.2 kB] -2026-05-03T08:50:46.3926311Z Get:43 http://azure.archive.ubuntu.com/ubuntu noble-security/universe amd64 c-n-f Metadata [23.1 kB] -2026-05-03T08:50:46.3938181Z Get:44 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted amd64 Packages [2844 kB] -2026-05-03T08:50:46.4058462Z Get:45 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted Translation-en [666 kB] -2026-05-03T08:50:46.4069280Z Get:50 https://packages.microsoft.com/ubuntu/24.04/prod noble/main arm64 Packages [107 kB] -2026-05-03T08:50:46.4093580Z Get:46 http://azure.archive.ubuntu.com/ubuntu noble-security/restricted amd64 Components [212 B] -2026-05-03T08:50:46.4103090Z Get:47 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse amd64 Packages [28.8 kB] -2026-05-03T08:50:46.4117081Z Get:51 https://packages.microsoft.com/ubuntu/24.04/prod noble/main armhf Packages [11.6 kB] -2026-05-03T08:50:46.4145608Z Get:52 https://packages.microsoft.com/ubuntu/24.04/prod noble/main amd64 Packages [132 kB] -2026-05-03T08:50:46.4548104Z Get:48 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse Translation-en [7172 B] -2026-05-03T08:50:46.4558458Z Get:49 http://azure.archive.ubuntu.com/ubuntu noble-security/multiverse amd64 Components [208 B] -2026-05-03T08:50:46.4951182Z Get:53 https://dl.google.com/linux/chrome-stable/deb stable/main amd64 Packages [1216 B] -2026-05-03T08:50:56.0749376Z Fetched 16.5 MB in 2s (10.6 MB/s) -2026-05-03T08:50:56.6803818Z Reading package lists... -2026-05-03T08:50:56.7978051Z Reading package lists... -2026-05-03T08:50:56.9602716Z Building dependency tree... -2026-05-03T08:50:56.9609431Z Reading state information... -2026-05-03T08:50:57.1250664Z libgmp-dev is already the newest version (2:6.3.0+dfsg-2ubuntu6.1). -2026-05-03T08:50:57.1251258Z 0 upgraded, 0 newly installed, 0 to remove and 53 not upgraded. -2026-05-03T08:50:57.1331388Z ##[group]Run uv venv venv -2026-05-03T08:50:57.1331614Z uv venv venv -2026-05-03T08:50:57.1331780Z source venv/bin/activate -2026-05-03T08:50:57.1331994Z uv pip install --upgrade pip -2026-05-03T08:50:57.1332196Z uv pip install tox -2026-05-03T08:50:57.1351181Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} -2026-05-03T08:50:57.1351413Z env: -2026-05-03T08:50:57.1351553Z TOXENV: py310-lint -2026-05-03T08:50:57.1351758Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:57.1352072Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig -2026-05-03T08:50:57.1352388Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:57.1352658Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:57.1352928Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:57.1353193Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib -2026-05-03T08:50:57.1353645Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache -2026-05-03T08:50:57.1353861Z ##[endgroup] -2026-05-03T08:50:58.5508259Z Using CPython 3.10.20 interpreter at: /opt/hostedtoolcache/Python/3.10.20/x64/bin/python3 -2026-05-03T08:50:58.5508833Z Creating virtual environment at: venv -2026-05-03T08:50:58.5512459Z Activate with: venv/bin/activate -2026-05-03T08:50:58.6254497Z Using Python 3.10.20 environment at: venv -2026-05-03T08:50:58.7900612Z Resolved 1 package in 164ms -2026-05-03T08:50:58.8077019Z Downloading pip (1.7MiB) -2026-05-03T08:50:58.9464109Z Downloaded pip -2026-05-03T08:50:58.9466143Z Prepared 1 package in 156ms -2026-05-03T08:50:58.9565192Z Installed 1 package in 9ms -2026-05-03T08:50:58.9565667Z + pip==26.1 -2026-05-03T08:50:58.9634712Z Using Python 3.10.20 environment at: venv -2026-05-03T08:50:59.2420869Z Resolved 14 packages in 278ms -2026-05-03T08:50:59.2865079Z Downloading virtualenv (7.2MiB) -2026-05-03T08:50:59.4887062Z Downloaded virtualenv -2026-05-03T08:50:59.4889022Z Prepared 14 packages in 246ms -2026-05-03T08:50:59.4966270Z Installed 14 packages in 7ms -2026-05-03T08:50:59.4966562Z + cachetools==7.1.0 -2026-05-03T08:50:59.4966804Z + colorama==0.4.6 -2026-05-03T08:50:59.4967020Z + distlib==0.4.0 -2026-05-03T08:50:59.4967244Z + filelock==3.29.0 -2026-05-03T08:50:59.4967461Z + packaging==26.2 -2026-05-03T08:50:59.4967692Z + platformdirs==4.9.6 -2026-05-03T08:50:59.4968036Z + pluggy==1.6.0 -2026-05-03T08:50:59.4968363Z + pyproject-api==1.10.0 -2026-05-03T08:50:59.4968707Z + python-discovery==1.2.2 -2026-05-03T08:50:59.4968996Z + tomli==2.4.1 -2026-05-03T08:50:59.4969332Z + tomli-w==1.2.0 -2026-05-03T08:50:59.4969687Z + tox==4.53.1 -2026-05-03T08:50:59.4970049Z + typing-extensions==4.15.0 -2026-05-03T08:50:59.4970362Z + virtualenv==21.3.0 -2026-05-03T08:50:59.5036958Z ##[group]Run source venv/bin/activate -2026-05-03T08:50:59.5037223Z source venv/bin/activate -2026-05-03T08:50:59.5037428Z if [[ "lint" == 'docs' ]]; then -2026-05-03T08:50:59.5037632Z  export TOXENV=docs -2026-05-03T08:50:59.5037810Z else -2026-05-03T08:50:59.5038013Z  export TOXENV=$(echo "py3.10-lint" | tr -d '.') -2026-05-03T08:50:59.5038240Z fi -2026-05-03T08:50:59.5038413Z # Set PATH for nim commands during tox -2026-05-03T08:50:59.5038638Z if [[ "lint" == 'interop' ]]; then -2026-05-03T08:50:59.5038965Z  export PATH="$HOME/.nimble/bin:$HOME/.choosenim/toolchains/nim-stable/bin:$PATH" -2026-05-03T08:50:59.5039290Z fi -2026-05-03T08:50:59.5039603Z python -m tox run -r -2026-05-03T08:50:59.5058565Z shell: /usr/bin/bash --noprofile --norc -e -o pipefail {0} -2026-05-03T08:50:59.5058820Z env: -2026-05-03T08:50:59.5058968Z TOXENV: py310-lint -2026-05-03T08:50:59.5059224Z pythonLocation: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:59.5059536Z PKG_CONFIG_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib/pkgconfig -2026-05-03T08:50:59.5059846Z Python_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:59.5060119Z Python2_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:59.5060391Z Python3_ROOT_DIR: /opt/hostedtoolcache/Python/3.10.20/x64 -2026-05-03T08:50:59.5060686Z LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.10.20/x64/lib -2026-05-03T08:50:59.5060960Z UV_CACHE_DIR: /home/runner/work/_temp/setup-uv-cache -2026-05-03T08:50:59.5061188Z ##[endgroup] -2026-05-03T08:51:02.3471735Z .pkg: remove tox env folder /home/runner/work/py-libp2p/py-libp2p/.tox/.pkg -2026-05-03T08:51:02.6301868Z py310-lint: install_deps> python -I -m pip install pre-commit -2026-05-03T08:51:05.9671352Z .pkg: install_requires> python -I -m pip install 'setuptools>=42' wheel -2026-05-03T08:51:07.1268925Z .pkg: _optional_hooks> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta -2026-05-03T08:51:07.4136028Z .pkg: get_requires_for_build_editable> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta -2026-05-03T08:51:07.8991783Z .pkg: freeze> python -m pip freeze --all -2026-05-03T08:51:08.0790106Z .pkg: packaging==26.2,pip==26.1,setuptools==82.0.1,wheel==0.47.0 -2026-05-03T08:51:08.0794350Z .pkg: build_editable> python /home/runner/work/py-libp2p/py-libp2p/venv/lib/python3.10/site-packages/pyproject_api/_backend.py True setuptools.build_meta -2026-05-03T08:51:08.8517413Z py310-lint: install_package_deps> python -I -m pip install 'aioquic>=1.2.0' 'anyio>=4.0' 'base58>=1.0.3' 'cbor2>=5.4.0' coincurve==21.0.0 'exceptiongroup>=1.2.0; python_version < "3.11"' 'fastecdsa==2.3.2; sys_platform != "win32"' 'grpcio>=1.41.0' 'httpx>=0.25.0' 'lru-dict>=1.1.6' 'miniupnpc<3.0,>=2.3' multiaddr==0.0.11 'mypy-protobuf>=3.0.0' 'noiseprotocol>=0.3.0' 'protobuf<7.0.0,>=4.25.0' 'py-cid>=0.5.0' 'py-multibase>=2.0.0' 'py-multicodec>=1.0.0' 'py-multihash>=3.0.0' 'pycryptodome>=3.9.2' 'pynacl>=1.3.0' 'requests>=2.28.0' 'rpcudp>=3.0.0' 'trio-typing>=0.0.4' 'trio-websocket>=0.11.0' 'trio>=0.26.0' types-requests 'zeroconf<0.148.0,>=0.147.0' -2026-05-03T08:51:32.5705371Z py310-lint: install_package> python -I -m pip install --force-reinstall --no-deps /home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl -2026-05-03T08:51:32.9081407Z py310-lint: freeze> python -m pip freeze --all -2026-05-03T08:51:33.1369371Z py310-lint: aioquic==1.3.0,anyio==4.13.0,async-generator==1.10,attrs==26.1.0,base58==2.1.1,blake3==1.0.8,cbor2==6.0.1,certifi==2026.4.22,cffi==2.0.0,cfgv==3.5.0,charset-normalizer==3.4.7,coincurve==21.0.0,cryptography==47.0.0,distlib==0.4.0,dnspython==2.8.0,exceptiongroup==1.3.1,fastecdsa==2.3.2,filelock==3.29.0,grpcio==1.80.0,h11==0.16.0,httpcore==1.0.9,httpx==0.28.1,identify==2.6.19,idna==3.13,ifaddr==0.2.0,importlib_metadata==9.0.0,libp2p @ file:///home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl#sha256=6f0fe388ecd3df637e7630940338cbf3d3eb067e9792e9fcbd17064693405a1b,lru-dict==1.4.1,miniupnpc==2.3.3,mmh3==5.2.1,morphys==1.0,multiaddr==0.0.11,mypy-protobuf==5.1.0,mypy_extensions==1.1.0,netaddr==1.3.0,nodeenv==1.10.0,noiseprotocol==0.3.1,outcome==1.3.0.post0,packaging==26.2,pip==26.1,platformdirs==4.9.6,pre_commit==4.6.0,protobuf==6.33.6,psutil==7.2.2,py-cid==0.5.0,py-multibase==2.0.0,py-multicodec==1.0.0,py-multihash==3.0.0,pyasn1==0.6.3,pyasn1_modules==0.4.2,pycparser==3.0,pycryptodome==3.23.0,pylsqpack==0.3.24,PyNaCl==1.6.2,pyOpenSSL==26.1.0,python-baseconv==1.2.2,python-discovery==1.2.2,PyYAML==6.0.3,requests==2.33.1,rpcudp==5.0.1,service-identity==24.2.0,setuptools==82.0.1,six==1.17.0,sniffio==1.3.1,sortedcontainers==2.4.0,trio==0.33.0,trio-typing==0.10.0,trio-websocket==0.12.2,types-protobuf==7.34.1.20260503,types-requests==2.33.0.20260503,typing_extensions==4.15.0,u-msgpack-python==2.8.0,urllib3==2.6.3,varint==1.0.2,virtualenv==21.3.0,wsproto==1.3.2,zeroconf==0.147.3,zipp==3.23.1 -2026-05-03T08:51:33.1374547Z py310-lint: commands_pre[0]> uv pip install --upgrade pip -2026-05-03T08:51:33.1900042Z Using Python 3.10.20 environment at: .tox/py310-lint -2026-05-03T08:51:33.2954857Z Resolved 1 package in 103ms -2026-05-03T08:51:33.2955970Z Checked 1 package in 0.06ms -2026-05-03T08:51:33.2972424Z py310-lint: commands_pre[1]> uv pip install --group dev -e . -2026-05-03T08:51:33.3514785Z Using Python 3.10.20 environment at: .tox/py310-lint -2026-05-03T08:51:34.0461313Z Resolved 166 packages in 693ms -2026-05-03T08:51:34.0500055Z Building libp2p @ file:///home/runner/work/py-libp2p/py-libp2p -2026-05-03T08:51:34.0662910Z Downloading babel (9.7MiB) -2026-05-03T08:51:34.1167498Z Downloading jedi (4.7MiB) -2026-05-03T08:51:34.1177079Z Downloading pygments (1.2MiB) -2026-05-03T08:51:34.1182166Z Downloading sphinx-rtd-theme (7.3MiB) -2026-05-03T08:51:34.1184711Z Downloading pyrefly (5.5MiB) -2026-05-03T08:51:34.1186040Z Downloading faker (1.9MiB) -2026-05-03T08:51:34.1190040Z Downloading ruff (10.8MiB) -2026-05-03T08:51:34.1191694Z Downloading mypy (14.0MiB) -2026-05-03T08:51:34.1197431Z Downloading pydantic-core (2.0MiB) -2026-05-03T08:51:34.1201466Z Downloading sphinx (3.3MiB) -2026-05-03T08:51:34.5484924Z Downloaded pydantic-core -2026-05-03T08:51:34.5491558Z Downloaded pygments -2026-05-03T08:51:35.1384876Z Downloaded sphinx -2026-05-03T08:51:35.2074896Z Downloaded pyrefly -2026-05-03T08:51:35.2793799Z Downloaded sphinx-rtd-theme -2026-05-03T08:51:35.2849741Z Downloaded faker -2026-05-03T08:51:35.3720244Z Downloaded ruff -2026-05-03T08:51:35.3824326Z Downloaded babel -2026-05-03T08:51:35.6597558Z Downloaded mypy -2026-05-03T08:51:35.9119524Z Downloaded jedi -2026-05-03T08:51:36.8250305Z Built libp2p @ file:///home/runner/work/py-libp2p/py-libp2p -2026-05-03T08:51:36.8377725Z Prepared 90 packages in 2.78s -2026-05-03T08:51:36.8388198Z Uninstalled 1 package in 0.96ms -2026-05-03T08:51:37.6322126Z Installed 90 packages in 793ms -2026-05-03T08:51:37.6322945Z + alabaster==1.0.0 -2026-05-03T08:51:37.6323192Z + annotated-types==0.7.0 -2026-05-03T08:51:37.6323607Z + asttokens==3.0.1 -2026-05-03T08:51:37.6323933Z + async-exit-stack==1.0.1 -2026-05-03T08:51:37.6324180Z + babel==2.18.0 -2026-05-03T08:51:37.6324642Z + backports-tarfile==1.2.0 -2026-05-03T08:51:37.6325045Z + bracex==2.6 -2026-05-03T08:51:37.6325252Z + build==1.5.0 -2026-05-03T08:51:37.6325450Z + bump-my-version==1.3.0 -2026-05-03T08:51:37.6325690Z + cachetools==7.1.0 -2026-05-03T08:51:37.6325892Z + click==8.3.3 -2026-05-03T08:51:37.6326099Z + colorama==0.4.6 -2026-05-03T08:51:37.6326317Z + decorator==5.2.1 -2026-05-03T08:51:37.6326536Z + docutils==0.21.2 -2026-05-03T08:51:37.6326747Z + execnet==2.1.2 -2026-05-03T08:51:37.6326981Z + executing==2.2.1 -2026-05-03T08:51:37.6327184Z + factory-boy==2.12.0 -2026-05-03T08:51:37.6327397Z + faker==40.15.0 -2026-05-03T08:51:37.6327585Z + id==1.6.1 -2026-05-03T08:51:37.6328039Z + imagesize==2.0.0 -2026-05-03T08:51:37.6328276Z + iniconfig==2.3.0 -2026-05-03T08:51:37.6328531Z + ipython==8.39.0 -2026-05-03T08:51:37.6328772Z + jaraco-classes==3.4.0 -2026-05-03T08:51:37.6330166Z + jaraco-context==6.1.2 -2026-05-03T08:51:37.6330457Z + jaraco-functools==4.4.0 -2026-05-03T08:51:37.6330721Z + jedi==0.20.0 -2026-05-03T08:51:37.6330932Z + jeepney==0.9.0 -2026-05-03T08:51:37.6331133Z + jinja2==3.1.6 -2026-05-03T08:51:37.6331341Z + keyring==25.7.0 -2026-05-03T08:51:37.6331938Z - libp2p==0.6.0 (from file:///home/runner/work/py-libp2p/py-libp2p/.tox/.tmp/package/1/libp2p-0.6.0-0.editable-py3-none-any.whl) -2026-05-03T08:51:37.6332709Z + libp2p==0.6.0 (from file:///home/runner/work/py-libp2p/py-libp2p) -2026-05-03T08:51:37.6333071Z + librt==0.9.0 -2026-05-03T08:51:37.6333697Z + markdown-it-py==4.0.0 -2026-05-03T08:51:37.6333943Z + markupsafe==3.0.3 -2026-05-03T08:51:37.6334182Z + matplotlib-inline==0.2.1 -2026-05-03T08:51:37.6334439Z + mdurl==0.1.2 -2026-05-03T08:51:37.6334648Z + more-itertools==11.0.2 -2026-05-03T08:51:37.6334893Z + mypy==1.20.2 -2026-05-03T08:51:37.6335079Z + nh3==0.3.5 -2026-05-03T08:51:37.6335281Z + p2pclient==0.2.1 -2026-05-03T08:51:37.6335491Z + parso==0.8.7 -2026-05-03T08:51:37.6335698Z + pathspec==1.1.1 -2026-05-03T08:51:37.6335905Z + pexpect==4.9.0 -2026-05-03T08:51:37.6336110Z + pluggy==1.6.0 -2026-05-03T08:51:37.6336325Z + prompt-toolkit==3.0.52 -2026-05-03T08:51:37.6336541Z + ptyprocess==0.7.0 -2026-05-03T08:51:37.6336735Z + pure-eval==0.2.3 -2026-05-03T08:51:37.6336915Z + pydantic==2.13.3 -2026-05-03T08:51:37.6337155Z + pydantic-core==2.46.3 -2026-05-03T08:51:37.6337401Z + pydantic-settings==2.14.0 -2026-05-03T08:51:37.6337624Z + pygments==2.20.0 -2026-05-03T08:51:37.6337807Z + pyproject-api==1.10.0 -2026-05-03T08:51:37.6338052Z + pyproject-hooks==1.2.0 -2026-05-03T08:51:37.6338267Z + pyrefly==0.17.1 -2026-05-03T08:51:37.6338469Z + pytest==9.0.3 -2026-05-03T08:51:37.6338687Z + pytest-mock==3.15.1 -2026-05-03T08:51:37.6338930Z + pytest-rerunfailures==16.1 -2026-05-03T08:51:37.6339179Z + pytest-timeout==2.4.0 -2026-05-03T08:51:37.6339390Z + pytest-trio==0.8.0 -2026-05-03T08:51:37.6339602Z + pytest-xdist==3.8.0 -2026-05-03T08:51:37.6339794Z + python-dotenv==1.2.2 -2026-05-03T08:51:37.6340003Z + questionary==2.1.1 -2026-05-03T08:51:37.6340195Z + readme-renderer==44.0 -2026-05-03T08:51:37.6340412Z + requests-toolbelt==1.0.0 -2026-05-03T08:51:37.6340766Z + rfc3986==2.0.0 -2026-05-03T08:51:37.6340979Z + rich==15.0.0 -2026-05-03T08:51:37.6341170Z + rich-click==1.9.7 -2026-05-03T08:51:37.6341350Z + ruff==0.15.12 -2026-05-03T08:51:37.6341550Z + secretstorage==3.5.0 -2026-05-03T08:51:37.6341855Z + snowballstemmer==3.0.1 -2026-05-03T08:51:37.6342111Z + sphinx==8.1.3 -2026-05-03T08:51:37.6342342Z + sphinx-rtd-theme==3.1.0 -2026-05-03T08:51:37.6342645Z + sphinxcontrib-applehelp==2.0.0 -2026-05-03T08:51:37.6343043Z + sphinxcontrib-devhelp==2.0.0 -2026-05-03T08:51:37.6343579Z + sphinxcontrib-htmlhelp==2.1.0 -2026-05-03T08:51:37.6343989Z + sphinxcontrib-jquery==4.1 -2026-05-03T08:51:37.6344237Z + sphinxcontrib-jsmath==1.0.1 -2026-05-03T08:51:37.6344492Z + sphinxcontrib-qthelp==2.0.0 -2026-05-03T08:51:37.6344754Z + sphinxcontrib-serializinghtml==2.0.0 -2026-05-03T08:51:37.6345177Z + stack-data==0.6.3 -2026-05-03T08:51:37.6345402Z + tomli==2.4.1 -2026-05-03T08:51:37.6345602Z + tomli-w==1.2.0 -2026-05-03T08:51:37.6345795Z + tomlkit==0.14.0 -2026-05-03T08:51:37.6345987Z + towncrier==25.8.0 -2026-05-03T08:51:37.6346222Z + tox==4.53.1 -2026-05-03T08:51:37.6346423Z + traitlets==5.14.3 -2026-05-03T08:51:37.6346625Z + twine==6.2.0 -2026-05-03T08:51:37.6346821Z + typing-inspection==0.4.2 -2026-05-03T08:51:37.6347029Z + wcmatch==10.1 -2026-05-03T08:51:37.6347214Z + wcwidth==0.7.0 -2026-05-03T08:51:37.6347386Z + wheel==0.47.0 -2026-05-03T08:51:37.6463895Z py310-lint: commands[0]> pre-commit install -2026-05-03T08:51:38.5706338Z pre-commit installed at .git/hooks/pre-commit -2026-05-03T08:51:38.5843977Z py310-lint: commands[1]> pre-commit run --all-files --show-diff-on-failure -2026-05-03T08:51:38.6811482Z [INFO] Initializing environment for https://github.com/pre-commit/pre-commit-hooks. -2026-05-03T08:51:39.4189459Z [INFO] Initializing environment for https://github.com/asottile/pyupgrade. -2026-05-03T08:51:40.4277806Z [INFO] Initializing environment for https://github.com/astral-sh/ruff-pre-commit. -2026-05-03T08:51:40.9532681Z [INFO] Initializing environment for https://github.com/executablebooks/mdformat. -2026-05-03T08:51:41.5220706Z [INFO] Initializing environment for https://github.com/executablebooks/mdformat:mdformat-gfm. -2026-05-03T08:51:42.0485756Z [INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks. -2026-05-03T08:51:42.0486348Z [INFO] Once installed this environment will be reused. -2026-05-03T08:51:42.0486678Z [INFO] This may take a few minutes... -2026-05-03T08:51:46.2720141Z [INFO] Installing environment for https://github.com/asottile/pyupgrade. -2026-05-03T08:51:46.2720708Z [INFO] Once installed this environment will be reused. -2026-05-03T08:51:46.2721128Z [INFO] This may take a few minutes... -2026-05-03T08:51:49.1167487Z [INFO] Installing environment for https://github.com/astral-sh/ruff-pre-commit. -2026-05-03T08:51:49.1168075Z [INFO] Once installed this environment will be reused. -2026-05-03T08:51:49.1168471Z [INFO] This may take a few minutes... -2026-05-03T08:51:52.4952081Z [INFO] Installing environment for https://github.com/executablebooks/mdformat. -2026-05-03T08:51:52.4952599Z [INFO] Once installed this environment will be reused. -2026-05-03T08:51:52.4952856Z [INFO] This may take a few minutes... -2026-05-03T08:51:55.7988540Z check yaml...............................................................Passed -2026-05-03T08:51:55.8606936Z check toml...............................................................Passed -2026-05-03T08:51:55.9314584Z fix end of files.........................................................Passed -2026-05-03T08:51:56.0326423Z trim trailing whitespace.................................................Passed -2026-05-03T08:51:58.5665786Z pyupgrade................................................................Passed -2026-05-03T08:51:58.6946122Z ruff (legacy alias)......................................................Passed -2026-05-03T08:51:58.8404559Z ruff format..............................................................Passed -2026-05-03T08:51:59.9613232Z mdformat.................................................................Passed -2026-05-03T08:52:07.6623116Z run mypy with all dev dependencies present...............................Failed -2026-05-03T08:52:07.6623813Z - hook id: mypy-local -2026-05-03T08:52:07.6624064Z - exit code: 1 -2026-05-03T08:52:07.6624161Z -2026-05-03T08:52:07.6624401Z libp2p/kad_dht/pb/kademlia_pb2.pyi:74: error: Missing type arguments for generic type "Mapping" [type-arg] -2026-05-03T08:52:07.6624809Z Found 1 error in 1 file (checked 335 source files) -2026-05-03T08:52:07.6625014Z -2026-05-03T08:52:08.7611656Z run pyrefly typecheck locally............................................Failed -2026-05-03T08:52:08.7612177Z - hook id: pyrefly-local -2026-05-03T08:52:08.7612392Z - exit code: 1 -2026-05-03T08:52:08.7612484Z -2026-05-03T08:52:08.7612961Z WARN PYTHONPATH environment variable is set to `/home/runner/work/py-libp2p/py-libp2p/.tox/py310-lint/lib/python3.10/site-packages:`. Checks in other environments may not include these paths. -2026-05-03T08:52:08.7613892Z INFO Checking project configured at `/home/runner/work/py-libp2p/py-libp2p/pyproject.toml` -2026-05-03T08:52:08.7614582Z WARN PYTHONPATH environment variable is set to `/home/runner/work/py-libp2p/py-libp2p/.tox/py310-lint/lib/python3.10/site-packages:`. Checks in other environments may not include these paths. -2026-05-03T08:52:08.7615585Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:30:41-45: Default `None` is not assignable to parameter `network_blocks` with type `dict[Unknown, Unknown]` [bad-function-definition] -2026-05-03T08:52:08.7617189Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:141:46-50: Argument `list[bytes]` is not assignable to parameter `cids` with type `list[CIDv0 | CIDv1 | bytes | str]` in function `libp2p.bitswap.block_service.BlockService.get_blocks_batch` [bad-argument-type] -2026-05-03T08:52:08.7618588Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_block_service.py:162:46-54: Argument `list[bytes]` is not assignable to parameter `cids` with type `list[CIDv0 | CIDv1 | bytes | str]` in function `libp2p.bitswap.block_service.BlockService.get_blocks_batch` [bad-argument-type] -2026-05-03T08:52:08.7619958Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:145:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7621000Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:173:12-23: Object of class `NoneType` has no attribute `data` [missing-attribute] -2026-05-03T08:52:08.7621920Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_io_stream.py:214:24-40: Object of class `NoneType` has no attribute `data` [missing-attribute] -2026-05-03T08:52:08.7622682Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:60:12-33: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7623567Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:93:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7624342Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:94:16-33: Object of class `NoneType` has no attribute `blocksizes` [missing-attribute] -2026-05-03T08:52:08.7625119Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:115:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7625897Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:183:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7626664Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_unixfs_encoding.py:224:12-27: Object of class `NoneType` has no attribute `filesize` [missing-attribute] -2026-05-03T08:52:08.7627415Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:172:12-32: Object of class `NoneType` has no attribute `entries` [missing-attribute] -2026-05-03T08:52:08.7628396Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:234:51-65: Argument `Literal[WantType.Block]` is not assignable to parameter `want_type` with type `int` in function `libp2p.bitswap.messages.create_wantlist_entry` [bad-argument-type] -2026-05-03T08:52:08.7629607Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:238:52-65: Argument `Literal[WantType.Have]` is not assignable to parameter `want_type` with type `int` in function `libp2p.bitswap.messages.create_wantlist_entry` [bad-argument-type] -2026-05-03T08:52:08.7630595Z ERROR /home/runner/work/py-libp2p/py-libp2p/tests/core/bitswap/test_wantlist.py:249:9-17: Could not import `WantType` from `libp2p.bitswap` [missing-module-attribute] -2026-05-03T08:52:08.7631305Z INFO 16 errors shown, 184 errors ignored, 734 modules, 1,662 transitive dependencies, 657,839 lines, took 1.04s, peak memory physical 204.9 MiB -2026-05-03T08:52:08.7631662Z -2026-05-03T08:52:08.8142574Z Check for .rst files in the top-level directory..........................Passed -2026-05-03T08:52:09.5667299Z Cross-platform path handling audit (P0/P1)...............................Passed -2026-05-03T08:52:09.5821611Z py310-lint: exit 1 (31.00 seconds) /home/runner/work/py-libp2p/py-libp2p> pre-commit run --all-files --show-diff-on-failure pid=3010 -2026-05-03T08:52:09.5848132Z py310-lint: FAIL code 1 (67.24=setup[30.80]+cmd[0.16,4.35,0.94,31.00] seconds) -2026-05-03T08:52:09.5848539Z evaluation failed :( (69.20 seconds) -2026-05-03T08:52:09.6166338Z ##[error]Process completed with exit code 1. -2026-05-03T08:52:09.6249957Z Post job cleanup. -2026-05-03T08:52:09.7055343Z [command]/usr/bin/git version -2026-05-03T08:52:09.7085377Z git version 2.53.0 -2026-05-03T08:52:09.7117444Z Temporarily overriding HOME='/home/runner/work/_temp/5d400f3f-c4d4-4ba4-ab16-52bd147b9418' before making global git config changes -2026-05-03T08:52:09.7118330Z Adding repository directory to the temporary git global config as a safe directory -2026-05-03T08:52:09.7121633Z [command]/usr/bin/git config --global --add safe.directory /home/runner/work/py-libp2p/py-libp2p -2026-05-03T08:52:09.8851499Z [command]/usr/bin/git config --local --name-only --get-regexp core\.sshCommand -2026-05-03T08:52:09.8882670Z [command]/usr/bin/git submodule foreach --recursive sh -c "git config --local --name-only --get-regexp 'core\.sshCommand' && git config --local --unset-all 'core.sshCommand' || :" -2026-05-03T08:52:09.9066439Z fatal: No url found for submodule path 'extra/multihash-spec' in .gitmodules -2026-05-03T08:52:10.0343154Z ##[warning]The process '/usr/bin/git' failed with exit code 128 -2026-05-03T08:52:10.0429730Z Cleaning up orphan processes -2026-05-03T08:52:10.0667254Z ##[warning]Node.js 20 actions are deprecated. The following actions are running on Node.js 20 and may not work as expected: actions/checkout@v4, actions/setup-python@v5, astral-sh/setup-uv@v4. Actions will be forced to run with Node.js 24 by default starting June 2nd, 2026. Node.js 20 will be removed from the runner on September 16th, 2026. Please check if updated versions of these actions are available that support Node.js 24. To opt into Node.js 24 now, set the FORCE_JAVASCRIPT_ACTIONS_TO_NODE24=true environment variable on the runner or in your workflow file. Once Node.js 24 becomes the default, you can temporarily opt out by setting ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION=true. For more information see: https://github.blog/changelog/2025-09-19-deprecation-of-node-20-on-github-actions-runners/ \ No newline at end of file From a1456f53bcf12454a05f692e2476c21f4bd502b9 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 3 May 2026 22:41:40 +0530 Subject: [PATCH 12/37] fix: adjust DEFAULT_CHUNK_SIZE for DAG-PB overhead, enhance wantType handling, and update tests for dag-pb leaf blocks Co-authored-by: Copilot --- libp2p/bitswap/chunker.py | 4 +- libp2p/bitswap/messages.py | 5 +- tests/core/bitswap/test_dag.py | 70 +++++++++++++------ .../bitswap/test_filesystem_blockstore.py | 16 +++++ tests/core/bitswap/test_unixfs_encoding.py | 4 +- 5 files changed, 72 insertions(+), 27 deletions(-) diff --git a/libp2p/bitswap/chunker.py b/libp2p/bitswap/chunker.py index 2a05beae4..4739da3de 100644 --- a/libp2p/bitswap/chunker.py +++ b/libp2p/bitswap/chunker.py @@ -11,7 +11,9 @@ from pathlib import Path # Default chunk size: 63 KB (py-libp2p accepts less than 64 KB) -DEFAULT_CHUNK_SIZE = 63 * 1024 +# 63 KB minus 32 bytes to leave room for the dag-pb leaf envelope overhead, +# ensuring wrapped blocks never exceed MAX_BLOCK_SIZE (63 * 1024). +DEFAULT_CHUNK_SIZE = 63 * 1024 - 32 def chunk_bytes(data: bytes, chunk_size: int = DEFAULT_CHUNK_SIZE) -> list[bytes]: diff --git a/libp2p/bitswap/messages.py b/libp2p/bitswap/messages.py index df8a1d16a..3772359c4 100644 --- a/libp2p/bitswap/messages.py +++ b/libp2p/bitswap/messages.py @@ -40,8 +40,9 @@ def create_wantlist_entry( entry.block = cid_to_bytes(block_cid) entry.priority = priority entry.cancel = cancel - # Type checkers don't like int assignment to enum, but protobuf accepts it - entry.wantType = want_type # type: ignore[assignment] # v1.2.0 field + # Accept both plain int and WantType enum (extract .value for enum) + want_type_int = want_type.value if hasattr(want_type, "value") else int(want_type) + entry.wantType = want_type_int # type: ignore[assignment] # v1.2.0 field entry.sendDontHave = send_dont_have # v1.2.0 field return entry diff --git a/tests/core/bitswap/test_dag.py b/tests/core/bitswap/test_dag.py index e94fb0f1a..4c7c72645 100644 --- a/tests/core/bitswap/test_dag.py +++ b/tests/core/bitswap/test_dag.py @@ -52,6 +52,8 @@ class TestAddBytes: @pytest.mark.trio async def test_add_small_bytes(self): """Test adding small data (single block).""" + from libp2p.bitswap.dag_pb import create_leaf_node + # Setup mock_client = MagicMock(spec=BitswapClient) mock_client.block_store = MemoryBlockStore() @@ -66,13 +68,18 @@ async def test_add_small_bytes(self): # Verify assert root_cid is not None assert len(root_cid) > 0 - assert verify_cid(root_cid, data) - # Should be single block (RAW codec) + # Small data is stored as a dag-pb leaf node (not raw codec) + leaf_block = create_leaf_node(data) + expected_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + assert root_cid == expected_cid + assert verify_cid(root_cid, leaf_block) + + # Should be single block (DAG-PB codec) mock_client.add_block.assert_called_once() call_args = mock_client.add_block.call_args assert call_args[0][0] == root_cid # CID - assert call_args[0][1] == data # Data + assert call_args[0][1] == leaf_block # dag-pb wrapped data @pytest.mark.trio async def test_add_large_bytes(self): @@ -161,9 +168,15 @@ async def test_add_small_file(self): assert root_cid is not None mock_client.add_block.assert_called_once() - # Should be single RAW block + # Small file is stored as a dag-pb leaf node + from libp2p.bitswap.dag_pb import create_leaf_node + call_args = mock_client.add_block.call_args - assert verify_cid(call_args[0][0], data) + stored_cid = call_args[0][0] + stored_block = call_args[0][1] + leaf_block = create_leaf_node(data) + assert stored_block == leaf_block + assert verify_cid(stored_cid, leaf_block) finally: Path(temp_path).unlink() @@ -285,16 +298,22 @@ async def test_fetch_small_file(self, cid_input_kind: str): @pytest.mark.trio async def test_fetch_chunked_file(self): """Test fetching multi-chunk file.""" - # Create chunks + from libp2p.bitswap.dag_pb import create_leaf_node + + # Create dag-pb leaf blocks (matching what add_bytes/add_file produces) chunk1 = b"chunk1" * 1000 chunk2 = b"chunk2" * 1000 chunk3 = b"chunk3" * 1000 - cid1 = compute_cid_v1(chunk1, codec=CODEC_RAW) - cid2 = compute_cid_v1(chunk2, codec=CODEC_RAW) - cid3 = compute_cid_v1(chunk3, codec=CODEC_RAW) + leaf1 = create_leaf_node(chunk1) + leaf2 = create_leaf_node(chunk2) + leaf3 = create_leaf_node(chunk3) - # Create DAG-PB root node + cid1 = compute_cid_v1(leaf1, codec=CODEC_DAG_PB) + cid2 = compute_cid_v1(leaf2, codec=CODEC_DAG_PB) + cid3 = compute_cid_v1(leaf3, codec=CODEC_DAG_PB) + + # Create DAG-PB root node linking to the leaves chunks_data = [ (cid1, len(chunk1)), (cid2, len(chunk2)), @@ -308,11 +327,11 @@ def get_block_side_effect(cid, peer_id, timeout): if cid == root_cid: return root_data elif cid == cid1: - return chunk1 + return leaf1 elif cid == cid2: - return chunk2 + return leaf2 elif cid == cid3: - return chunk3 + return leaf3 raise ValueError(f"Unknown CID: {cid.hex()}") mock_client = MagicMock(spec=BitswapClient) @@ -324,23 +343,28 @@ def get_block_side_effect(cid, peer_id, timeout): # Fetch fetched_data, filename = await dag.fetch_file(root_cid, timeout=30.0) - # Verify + # Verify reconstructed data expected_data = chunk1 + chunk2 + chunk3 assert fetched_data == expected_data assert filename is None # File node without directory wrapper - # Should have fetched root + 3 chunks - assert mock_client.get_block.call_count == 4 + # root fetch (1) + tree-level batch fallback (3) + leaf batch fallback (3) = 7 + assert mock_client.get_block.call_count == 7 @pytest.mark.trio async def test_fetch_file_with_progress(self): """Test fetching with progress callback.""" - # Create chunked file + from libp2p.bitswap.dag_pb import create_leaf_node + + # Create dag-pb leaf blocks (matching what add_bytes/add_file produces) chunk1 = b"x" * 1000 chunk2 = b"y" * 1000 - cid1 = compute_cid_v1(chunk1, codec=CODEC_RAW) - cid2 = compute_cid_v1(chunk2, codec=CODEC_RAW) + leaf1 = create_leaf_node(chunk1) + leaf2 = create_leaf_node(chunk2) + + cid1 = compute_cid_v1(leaf1, codec=CODEC_DAG_PB) + cid2 = compute_cid_v1(leaf2, codec=CODEC_DAG_PB) root_data = create_file_node([(cid1, len(chunk1)), (cid2, len(chunk2))]) root_cid = compute_cid_v1(root_data, codec=CODEC_DAG_PB) @@ -350,9 +374,9 @@ def get_block_side_effect(cid, peer_id, timeout): if cid == root_cid: return root_data elif cid == cid1: - return chunk1 + return leaf1 elif cid == cid2: - return chunk2 + return leaf2 mock_client = MagicMock(spec=BitswapClient) mock_client.block_store = MemoryBlockStore() @@ -370,8 +394,8 @@ def progress_callback(current, total, status): # Verify progress assert len(progress_calls) > 0 - # Should report progress for each chunk - assert any("fetching chunk" in call[2] for call in progress_calls) + # Implementation emits "downloading" per leaf and "completed" at end + assert any(call[2] in ("downloading", "completed") for call in progress_calls) # Last call should be completion assert progress_calls[-1][2] == "completed" diff --git a/tests/core/bitswap/test_filesystem_blockstore.py b/tests/core/bitswap/test_filesystem_blockstore.py index ffb51a9f2..305408242 100644 --- a/tests/core/bitswap/test_filesystem_blockstore.py +++ b/tests/core/bitswap/test_filesystem_blockstore.py @@ -15,6 +15,7 @@ import shutil import tempfile +import pytest import trio from libp2p.bitswap.block_store import FilesystemBlockStore, MemoryBlockStore @@ -36,9 +37,19 @@ def pass_fail(label: str, ok: bool) -> None: raise AssertionError(f"FAILED: {label}") +# ── pytest fixtures ─────────────────────────────────────────────────────────── + + +@pytest.fixture +def store_path(tmp_path): + """Provide a fresh temporary directory path for each test.""" + return str(tmp_path) + + # ── tests ───────────────────────────────────────────────────────────────────── +@pytest.mark.trio async def test_basic_round_trip(store_path: str) -> None: print("\n[1] Basic put / get / has / delete") store = FilesystemBlockStore(store_path) @@ -65,6 +76,7 @@ async def test_basic_round_trip(store_path: str) -> None: pass_fail("get_block returns None after delete", await store.get_block(cid) is None) +@pytest.mark.trio async def test_persistence(store_path: str) -> None: print("\n[2] Persistence across store re-creation (simulates process restart)") @@ -92,6 +104,7 @@ async def test_persistence(store_path: str) -> None: print(f" CID2: {cid_to_text(cid2)}") +@pytest.mark.trio async def test_get_all_cids(store_path: str) -> None: print("\n[3] get_all_cids scans directory tree") store = FilesystemBlockStore(store_path) @@ -111,6 +124,7 @@ async def test_get_all_cids(store_path: str) -> None: ) +@pytest.mark.trio async def test_get_missing_returns_none(store_path: str) -> None: print("\n[4] get_block returns None for missing CID") store = FilesystemBlockStore(store_path) @@ -119,6 +133,7 @@ async def test_get_missing_returns_none(store_path: str) -> None: pass_fail("get_block returns None for unknown CID", result is None) +@pytest.mark.trio async def test_drop_in_for_memory_store(store_path: str) -> None: print("\n[5] Drop-in replacement for MemoryBlockStore") @@ -137,6 +152,7 @@ async def use_store(store) -> bytes: ) +@pytest.mark.trio async def test_directory_structure(store_path: str) -> None: print("\n[6] 2-char prefix directory structure") store = FilesystemBlockStore(store_path) diff --git a/tests/core/bitswap/test_unixfs_encoding.py b/tests/core/bitswap/test_unixfs_encoding.py index 11ba00a56..cff119430 100644 --- a/tests/core/bitswap/test_unixfs_encoding.py +++ b/tests/core/bitswap/test_unixfs_encoding.py @@ -166,7 +166,9 @@ async def add_block_impl(cid, data): tmp = f.name try: - root_cid = await dag.add_file(tmp, wrap_with_directory=False) + root_cid = await dag.add_file( + tmp, chunk_size=chunk_size, wrap_with_directory=False + ) finally: os.unlink(tmp) From f7d27b61454cc279f82c060115c1b5b587c710a6 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Mon, 4 May 2026 01:05:31 +0530 Subject: [PATCH 13/37] feat: implement batch sending for Bitswap blocks and enhance error handling in DAG fetching Co-authored-by: Copilot --- libp2p/bitswap/client.py | 92 ++++++++++++++++--- libp2p/bitswap/config.py | 3 +- libp2p/bitswap/dag.py | 49 ++++++++-- libp2p/bitswap/messages.py | 9 +- tests/core/bitswap/test_dag.py | 5 +- .../bitswap/test_filesystem_blockstore.py | 2 + .../test_kad_dht_quorum_sliding_window.py | 1 + tests/core/kad_dht/test_unit_value_store.py | 6 +- 8 files changed, 141 insertions(+), 26 deletions(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 8534c5444..272f1da7f 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -730,25 +730,91 @@ async def _process_wantlist( # Send DontHave (v1.2.0) presences_to_send.append((entry_cid, False)) - # Send responses + # Send responses in batches to stay under MAX_MESSAGE_SIZE + # and Noise protocol limit (65535 bytes) if blocks_to_send_v100 or blocks_to_send_v110 or presences_to_send: - response_msg = create_message( - blocks_v100=blocks_to_send_v100 if blocks_to_send_v100 else None, - blocks_v110=blocks_to_send_v110 if blocks_to_send_v110 else None, - block_presences=presences_to_send if presences_to_send else None, - ) - logger.debug(f"Sending response message to {peer_id} on stream {stream}") - await self._write_message(stream, response_msg) - logger.debug(f"Response message sent to {peer_id}") - - if blocks_to_send_v100 or blocks_to_send_v110: - count = len(blocks_to_send_v100) + len(blocks_to_send_v110) - logger.debug(f"Sent {count} blocks to peer {peer_id}") + # Send blocks in batches + if blocks_to_send_v100: + await self._send_blocks_in_batches_v100( + blocks_to_send_v100, peer_id, stream + ) + if blocks_to_send_v110: + await self._send_blocks_in_batches_v110( + blocks_to_send_v110, peer_id, stream + ) + # Send presences (usually small, can send all at once) if presences_to_send: + presence_msg = create_message(block_presences=presences_to_send) + await self._write_message(stream, presence_msg) logger.debug( f"Sent {len(presences_to_send)} block presences to peer {peer_id}" ) + async def _send_blocks_in_batches_v100( + self, blocks: list[bytes], peer_id: PeerID, stream: INetStream + ) -> None: + """Send blocks in batches to stay under message size limit.""" + # Noise protocol limit is 65535 bytes per message + # Reserve some space for protobuf overhead + MAX_BATCH_SIZE = 60000 # ~60KB per message for safety + + batch = [] + batch_size = 0 + + for block_data in blocks: + block_size = len(block_data) + + # If adding this block would exceed limit, send current batch first + if batch and (batch_size + block_size > MAX_BATCH_SIZE): + msg = create_message(blocks_v100=batch) + await self._write_message(stream, msg) + logger.debug(f"Sent batch of {len(batch)} blocks to peer {peer_id}") + batch = [] + batch_size = 0 + + batch.append(block_data) + batch_size += block_size + + # Send remaining blocks + if batch: + msg = create_message(blocks_v100=batch) + await self._write_message(stream, msg) + logger.debug(f"Sent final batch of {len(batch)} blocks to peer {peer_id}") + + async def _send_blocks_in_batches_v110( + self, + blocks: list[tuple[bytes, bytes]], + peer_id: PeerID, + stream: INetStream, + ) -> None: + """Send blocks (v1.1.0+ format) in batches to stay under message size limit.""" + # Noise protocol limit is 65535 bytes per message + # Reserve some space for protobuf overhead + MAX_BATCH_SIZE = 60000 # ~60KB per message for safety + + batch = [] + batch_size = 0 + + for prefix, block_data in blocks: + block_size = len(prefix) + len(block_data) + + # If adding this block would exceed limit, send current batch first + if batch and (batch_size + block_size > MAX_BATCH_SIZE): + msg = create_message(blocks_v110=batch) + await self._write_message(stream, msg) + logger.debug(f"Sent batch of {len(batch)} blocks to peer {peer_id}") + batch = [] + batch_size = 0 + + batch.append((prefix, block_data)) + batch_size += block_size + + # Send remaining blocks + if batch: + msg = create_message(blocks_v110=batch) + await self._write_message(stream, msg) + logger.debug(f"Sent final batch of {len(batch)} blocks to peer {peer_id}") + async def _process_blocks_v100(self, blocks: list[bytes], peer_id: PeerID) -> None: """ Process received blocks (v1.0.0 format). diff --git a/libp2p/bitswap/config.py b/libp2p/bitswap/config.py index 87ba26e0e..6fc3f2bfb 100644 --- a/libp2p/bitswap/config.py +++ b/libp2p/bitswap/config.py @@ -22,8 +22,9 @@ # Maximum message size (4MiB as per spec) MAX_MESSAGE_SIZE = 4 * 1024 * 1024 -# Maximum block size (63 KB - matches DEFAULT_CHUNK_SIZE in chunker.py) +# Maximum block size (63 KB - after DAG-PB/UnixFS encoding) # py-libp2p stream limit is ~64 KB, so we use 63 KB to be safe +# Note: Raw chunk data should be smaller to account for DAG-PB overhead (~14 bytes) MAX_BLOCK_SIZE = 63 * 1024 # Default timeout for operations (in seconds) diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index cf2bc6bfa..355d36c21 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -165,9 +165,30 @@ async def _get_blocks_batch( return await self._service.get_blocks_batch( cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size ) - return await self.bitswap.get_blocks_batch( - cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size - ) + # Check if the client supports native batch fetching + get_blocks_batch = getattr(self.bitswap, "get_blocks_batch", None) + if get_blocks_batch is not None and callable(get_blocks_batch): + try: + result = await get_blocks_batch( + cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) + # Ensure the result is a plain dict (not a coroutine from a mock) + if isinstance(result, dict): + return result + except Exception: + pass + # Fall back to individual _get_block calls + results: dict[bytes, bytes] = {} + for cid in cids: + from .cid import cid_to_bytes + + cid_bytes = cid_to_bytes(cid) + try: + data = await self._get_block(cid_bytes, peer_id=peer_id, timeout=timeout) + results[cid_bytes] = data + except Exception: + pass + return results async def add_file( self, @@ -732,9 +753,25 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: f"(batch_size=32, timeout={timeout}s)" ) print(msg2, flush=True) - block_map = await self._get_blocks_batch( - list(ordered_leaf_cids), peer_id=peer_id, timeout=timeout, batch_size=32 - ) + + # First try to get blocks from the already-fetched tree + block_map: dict[bytes, bytes] = {} + missing_cids: list[bytes] = [] + for leaf_cid in ordered_leaf_cids: + leaf_data = all_blocks_map.get(leaf_cid) + if leaf_data is not None: + block_map[leaf_cid] = leaf_data + else: + missing_cids.append(leaf_cid) + + # If some leaves weren't in the tree fetch, fetch them now + if missing_cids: + logger.info(f"[DAG] Fetching {len(missing_cids)} missing leaves") + missing_blocks = await self._get_blocks_batch( + missing_cids, peer_id=peer_id, timeout=timeout, batch_size=32 + ) + block_map.update(missing_blocks) + logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) diff --git a/libp2p/bitswap/messages.py b/libp2p/bitswap/messages.py index 3772359c4..0c4264bce 100644 --- a/libp2p/bitswap/messages.py +++ b/libp2p/bitswap/messages.py @@ -40,9 +40,12 @@ def create_wantlist_entry( entry.block = cid_to_bytes(block_cid) entry.priority = priority entry.cancel = cancel - # Accept both plain int and WantType enum (extract .value for enum) - want_type_int = want_type.value if hasattr(want_type, "value") else int(want_type) - entry.wantType = want_type_int # type: ignore[assignment] # v1.2.0 field + # Handle both int and WantType enum + if isinstance(want_type, int): + entry.wantType = want_type # type: ignore[assignment] + else: + # Extract .value from WantType enum + entry.wantType = want_type.value # type: ignore[assignment] entry.sendDontHave = send_dont_have # v1.2.0 field return entry diff --git a/tests/core/bitswap/test_dag.py b/tests/core/bitswap/test_dag.py index 4c7c72645..5c4b759d3 100644 --- a/tests/core/bitswap/test_dag.py +++ b/tests/core/bitswap/test_dag.py @@ -348,8 +348,9 @@ def get_block_side_effect(cid, peer_id, timeout): assert fetched_data == expected_data assert filename is None # File node without directory wrapper - # root fetch (1) + tree-level batch fallback (3) + leaf batch fallback (3) = 7 - assert mock_client.get_block.call_count == 7 + # root fetch (1) + tree-level batch fallback (3) = 4 + # Leaves are already fetched during tree traversal, no separate leaf fetch needed + assert mock_client.get_block.call_count == 4 @pytest.mark.trio async def test_fetch_file_with_progress(self): diff --git a/tests/core/bitswap/test_filesystem_blockstore.py b/tests/core/bitswap/test_filesystem_blockstore.py index 305408242..8596f26c1 100644 --- a/tests/core/bitswap/test_filesystem_blockstore.py +++ b/tests/core/bitswap/test_filesystem_blockstore.py @@ -9,6 +9,8 @@ Run with: python test_filesystem_blockstore.py + or + pytest test_filesystem_blockstore.py """ from pathlib import Path diff --git a/tests/core/kad_dht/test_kad_dht_quorum_sliding_window.py b/tests/core/kad_dht/test_kad_dht_quorum_sliding_window.py index 87e669cc0..b1be7cdb8 100644 --- a/tests/core/kad_dht/test_kad_dht_quorum_sliding_window.py +++ b/tests/core/kad_dht/test_kad_dht_quorum_sliding_window.py @@ -41,6 +41,7 @@ def _make_dht() -> KadDHT: host = MagicMock() key_pair = create_new_key_pair() host.get_id.return_value = ID.from_pubkey(key_pair.public_key) + host.get_private_key.return_value = key_pair.private_key host.get_addrs.return_value = [Multiaddr("/ip4/127.0.0.1/tcp/8000")] host.get_peerstore.return_value = MagicMock() host.new_stream = AsyncMock() diff --git a/tests/core/kad_dht/test_unit_value_store.py b/tests/core/kad_dht/test_unit_value_store.py index bdaaacd9c..3e2d58e01 100644 --- a/tests/core/kad_dht/test_unit_value_store.py +++ b/tests/core/kad_dht/test_unit_value_store.py @@ -15,6 +15,7 @@ import pytest +from libp2p.crypto.secp256k1 import create_new_key_pair from libp2p.kad_dht.value_store import ( DEFAULT_TTL, ValueStore, @@ -24,8 +25,11 @@ ) from libp2p.records.record import make_put_record +# Create a real key pair for signing +key_pair = create_new_key_pair() mock_host = Mock() -peer_id = ID.from_base58("QmTest123") +mock_host.get_private_key.return_value = key_pair.private_key +peer_id = ID.from_pubkey(key_pair.public_key) class TestValueStore: From 54d7ebf965858370a21f98516681ed41b48f28b3 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Mon, 4 May 2026 01:08:56 +0530 Subject: [PATCH 14/37] refactor: clean up whitespace and improve code readability in Bitswap and MerkleDag implementations --- libp2p/bitswap/client.py | 20 ++++++++++---------- libp2p/bitswap/dag.py | 10 ++++++---- tests/core/bitswap/test_dag.py | 3 ++- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 272f1da7f..5170c6526 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -757,13 +757,13 @@ async def _send_blocks_in_batches_v100( # Noise protocol limit is 65535 bytes per message # Reserve some space for protobuf overhead MAX_BATCH_SIZE = 60000 # ~60KB per message for safety - + batch = [] batch_size = 0 - + for block_data in blocks: block_size = len(block_data) - + # If adding this block would exceed limit, send current batch first if batch and (batch_size + block_size > MAX_BATCH_SIZE): msg = create_message(blocks_v100=batch) @@ -771,10 +771,10 @@ async def _send_blocks_in_batches_v100( logger.debug(f"Sent batch of {len(batch)} blocks to peer {peer_id}") batch = [] batch_size = 0 - + batch.append(block_data) batch_size += block_size - + # Send remaining blocks if batch: msg = create_message(blocks_v100=batch) @@ -791,13 +791,13 @@ async def _send_blocks_in_batches_v110( # Noise protocol limit is 65535 bytes per message # Reserve some space for protobuf overhead MAX_BATCH_SIZE = 60000 # ~60KB per message for safety - + batch = [] batch_size = 0 - + for prefix, block_data in blocks: block_size = len(prefix) + len(block_data) - + # If adding this block would exceed limit, send current batch first if batch and (batch_size + block_size > MAX_BATCH_SIZE): msg = create_message(blocks_v110=batch) @@ -805,10 +805,10 @@ async def _send_blocks_in_batches_v110( logger.debug(f"Sent batch of {len(batch)} blocks to peer {peer_id}") batch = [] batch_size = 0 - + batch.append((prefix, block_data)) batch_size += block_size - + # Send remaining blocks if batch: msg = create_message(blocks_v110=batch) diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 355d36c21..6073fe194 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -184,7 +184,9 @@ async def _get_blocks_batch( cid_bytes = cid_to_bytes(cid) try: - data = await self._get_block(cid_bytes, peer_id=peer_id, timeout=timeout) + data = await self._get_block( + cid_bytes, peer_id=peer_id, timeout=timeout + ) results[cid_bytes] = data except Exception: pass @@ -753,7 +755,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: f"(batch_size=32, timeout={timeout}s)" ) print(msg2, flush=True) - + # First try to get blocks from the already-fetched tree block_map: dict[bytes, bytes] = {} missing_cids: list[bytes] = [] @@ -763,7 +765,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: block_map[leaf_cid] = leaf_data else: missing_cids.append(leaf_cid) - + # If some leaves weren't in the tree fetch, fetch them now if missing_cids: logger.info(f"[DAG] Fetching {len(missing_cids)} missing leaves") @@ -771,7 +773,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: missing_cids, peer_id=peer_id, timeout=timeout, batch_size=32 ) block_map.update(missing_blocks) - + logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) diff --git a/tests/core/bitswap/test_dag.py b/tests/core/bitswap/test_dag.py index 5c4b759d3..d1144f707 100644 --- a/tests/core/bitswap/test_dag.py +++ b/tests/core/bitswap/test_dag.py @@ -349,7 +349,8 @@ def get_block_side_effect(cid, peer_id, timeout): assert filename is None # File node without directory wrapper # root fetch (1) + tree-level batch fallback (3) = 4 - # Leaves are already fetched during tree traversal, no separate leaf fetch needed + # Leaves are already fetched during tree traversal, + # no separate leaf fetch needed assert mock_client.get_block.call_count == 4 @pytest.mark.trio From fe1a1523b4699688e207456d56e0b7367eef29f4 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Mon, 4 May 2026 01:13:52 +0530 Subject: [PATCH 15/37] refactor: enhance type hints for batch processing in Bitswap and MerkleDag Co-authored-by: Copilot --- libp2p/bitswap/client.py | 4 ++-- libp2p/bitswap/dag.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 5170c6526..f77565d92 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -758,7 +758,7 @@ async def _send_blocks_in_batches_v100( # Reserve some space for protobuf overhead MAX_BATCH_SIZE = 60000 # ~60KB per message for safety - batch = [] + batch: list[bytes] = [] batch_size = 0 for block_data in blocks: @@ -792,7 +792,7 @@ async def _send_blocks_in_batches_v110( # Reserve some space for protobuf overhead MAX_BATCH_SIZE = 60000 # ~60KB per message for safety - batch = [] + batch: list[tuple[bytes, bytes]] = [] batch_size = 0 for prefix, block_data in blocks: diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 6073fe194..9283fdcf9 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -166,7 +166,9 @@ async def _get_blocks_batch( cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size ) # Check if the client supports native batch fetching - get_blocks_batch = getattr(self.bitswap, "get_blocks_batch", None) + get_blocks_batch: Callable[..., Awaitable[dict[bytes, bytes]]] | None = getattr( + self.bitswap, "get_blocks_batch", None + ) if get_blocks_batch is not None and callable(get_blocks_batch): try: result = await get_blocks_batch( @@ -758,7 +760,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: # First try to get blocks from the already-fetched tree block_map: dict[bytes, bytes] = {} - missing_cids: list[bytes] = [] + missing_cids: list[CIDInput] = [] for leaf_cid in ordered_leaf_cids: leaf_data = all_blocks_map.get(leaf_cid) if leaf_data is not None: @@ -769,10 +771,10 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: # If some leaves weren't in the tree fetch, fetch them now if missing_cids: logger.info(f"[DAG] Fetching {len(missing_cids)} missing leaves") - missing_blocks = await self._get_blocks_batch( + fetched_blocks = await self._get_blocks_batch( missing_cids, peer_id=peer_id, timeout=timeout, batch_size=32 ) - block_map.update(missing_blocks) + block_map.update(fetched_blocks) logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) @@ -781,7 +783,7 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: # extracting UnixFS inline data from leaf nodes file_data = b"" bytes_fetched = 0 - missing_blocks = [] + missing_blocks: list[bytes] = [] for idx, leaf_cid in enumerate(ordered_leaf_cids): leaf_raw = block_map.get(bytes(leaf_cid)) if leaf_raw is None: From 49ad3ef3434695bca228404ff3c8b60eb7754925 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 00:54:20 +0530 Subject: [PATCH 16/37] feat: add ProviderQueryManager for DHT-based provider discovery and caching in Bitswap Co-authored-by: Copilot --- libp2p/bitswap/client.py | 42 +- libp2p/bitswap/provider_query.py | 455 ++++++++++++++++++++++ tests/core/bitswap/test_provider_query.py | 433 ++++++++++++++++++++ 3 files changed, 927 insertions(+), 3 deletions(-) create mode 100644 libp2p/bitswap/provider_query.py create mode 100644 tests/core/bitswap/test_provider_query.py diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index f77565d92..82a1e0286 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -15,6 +15,7 @@ from libp2p.custom_types import TProtocol from libp2p.network.stream.exceptions import StreamEOF from libp2p.peer.id import ID as PeerID +from libp2p.peer.peerinfo import PeerInfo # noqa: F401 from .block_store import BlockStore, MemoryBlockStore from .cid import ( @@ -43,6 +44,7 @@ ) from .messages import create_message, create_wantlist_entry from .pb.bitswap_pb2 import Message +from .provider_query import ProviderQueryManager logger = logging.getLogger(__name__) @@ -60,6 +62,7 @@ def __init__( host: IHost, block_store: BlockStore | None = None, protocol_version: str = BITSWAP_PROTOCOL_V120, + provider_query_manager: ProviderQueryManager | None = None, ): """ Initialize Bitswap client. @@ -68,11 +71,18 @@ def __init__( host: The libp2p host block_store: Block storage backend (defaults to in-memory) protocol_version: Preferred protocol version (defaults to v1.2.0) + provider_query_manager: Optional ProviderQueryManager for automatic + DHT-based provider discovery. When supplied, + ``get_block()`` will query the DHT for providers before + broadcasting to all connected peers. """ self.host = host self.block_store = block_store or MemoryBlockStore() self.protocol_version = protocol_version + self.provider_query_manager: ProviderQueryManager | None = ( + provider_query_manager + ) self._wantlist: dict[ CIDObject, dict[str, Any] ] = {} # CID -> {priority, want_type, send_dont_have} @@ -244,9 +254,16 @@ async def get_block( """ Get a block, fetching from peers if not available locally. + If a ``ProviderQueryManager`` was supplied at construction time and no + explicit ``peer_id`` is given, the manager is consulted first to + discover which peers have the block via the DHT. The first discovered + provider is used; if none is found the request falls back to + broadcasting to all connected peers. + Args: cid: The CID of the block to fetch - peer_id: Optional specific peer to request from + peer_id: Optional specific peer to request from. When given, + DHT discovery is skipped. timeout: Timeout in seconds Returns: @@ -259,12 +276,31 @@ async def get_block( """ cid_obj = parse_cid(cid) - # Check local store first + # 1. Check local store first data = await self.block_store.get_block(cid_obj) if data is not None: return data - # Request from network + # 2. If no explicit peer given, try DHT provider discovery + if peer_id is None and self.provider_query_manager is not None: + try: + providers = await self.provider_query_manager.find_providers_single( + cid, timeout=min(5.0, timeout / 2) + ) + if providers: + peer_id = providers[0] + logger.debug( + "DHT discovered provider %s for %s", + peer_id, + format_cid_for_display(cid_obj, max_len=12), + ) + except Exception as exc: + logger.debug( + "Provider query failed, falling back to broadcast: %s", + exc, + ) + + # 3. Request from network (specific peer or broadcast) return await self._request_block(cid_obj, peer_id, timeout) async def want_block( diff --git a/libp2p/bitswap/provider_query.py b/libp2p/bitswap/provider_query.py new file mode 100644 index 000000000..e32a4c3db --- /dev/null +++ b/libp2p/bitswap/provider_query.py @@ -0,0 +1,455 @@ +""" +Provider Query Manager for Bitswap. + +This module provides DHT integration for automatic provider discovery with +caching, parallelization, and error handling. It's a critical component for +enabling automatic peer discovery in Bitswap without manual peer specification. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass, field +import logging +import time +from typing import TYPE_CHECKING + +import trio + +from libp2p.peer.id import ID as PeerID + +from .cid import CIDInput, cid_to_bytes, format_cid_for_display + +if TYPE_CHECKING: + from libp2p.kad_dht.kad_dht import KadDHT + +logger = logging.getLogger(__name__) + + +@dataclass +class ProviderCacheEntry: + """ + Cached provider information for a CID. + + Attributes: + providers: List of peer IDs that provide this content + timestamp: When this entry was cached + ttl: Time-to-live in seconds (how long the cache is valid) + + """ + + providers: list[PeerID] + timestamp: float = field(default_factory=time.time) + ttl: float = 300 # 5 minutes default + + def is_expired(self) -> bool: + """Check if this cache entry has expired.""" + return (time.time() - self.timestamp) > self.ttl + + def age(self) -> float: + """Get the age of this cache entry in seconds.""" + return time.time() - self.timestamp + + +class ProviderCache: + """ + LRU cache for provider records with TTL support. + + Caches DHT provider query results to reduce network load and improve + performance for repeated queries. + """ + + def __init__(self, max_size: int = 1000, default_ttl: float = 300): + """ + Initialize provider cache. + + Args: + max_size: Maximum number of entries to cache + default_ttl: Default time-to-live in seconds + + """ + self.max_size = max_size + self.default_ttl: float = default_ttl + self._cache: dict[bytes, ProviderCacheEntry] = {} + self._access_order: list[bytes] = [] # For LRU tracking + + def get(self, cid_bytes: bytes) -> list[PeerID] | None: + """ + Get cached providers for a CID. + + Args: + cid_bytes: CID as bytes + + Returns: + List of provider peer IDs if cached and not expired, None otherwise + + """ + if cid_bytes not in self._cache: + return None + + entry = self._cache[cid_bytes] + + # Check if expired + if entry.is_expired(): + self._remove(cid_bytes) + return None + + # Update access order (LRU) + self._mark_accessed(cid_bytes) + + return entry.providers + + def put( + self, + cid_bytes: bytes, + providers: list[PeerID], + ttl: float | None = None, + ) -> None: + """ + Cache providers for a CID. + + Args: + cid_bytes: CID as bytes + providers: List of provider peer IDs + ttl: Optional custom TTL (uses default if not specified) + + """ + # Evict oldest entry if cache is full + if len(self._cache) >= self.max_size and cid_bytes not in self._cache: + self._evict_oldest() + + # Store entry + entry = ProviderCacheEntry( + providers=providers, + timestamp=time.time(), + ttl=ttl or self.default_ttl, + ) + self._cache[cid_bytes] = entry + self._mark_accessed(cid_bytes) + + def _mark_accessed(self, cid_bytes: bytes) -> None: + """Mark a cache entry as recently accessed (for LRU).""" + # Remove from current position if exists + if cid_bytes in self._access_order: + self._access_order.remove(cid_bytes) + # Add to end (most recently used) + self._access_order.append(cid_bytes) + + def _evict_oldest(self) -> None: + """Evict the least recently used cache entry.""" + if not self._access_order: + return + oldest = self._access_order.pop(0) + self._remove(oldest) + + def _remove(self, cid_bytes: bytes) -> None: + """Remove an entry from the cache.""" + if cid_bytes in self._cache: + del self._cache[cid_bytes] + if cid_bytes in self._access_order: + self._access_order.remove(cid_bytes) + + def clear(self) -> None: + """Clear all cache entries.""" + self._cache.clear() + self._access_order.clear() + + def cleanup_expired(self) -> int: + """ + Remove all expired entries from the cache. + + Returns: + Number of entries removed + + """ + expired = [ + cid_bytes for cid_bytes, entry in self._cache.items() if entry.is_expired() + ] + + for cid_bytes in expired: + self._remove(cid_bytes) + + return len(expired) + + def size(self) -> int: + """Get current cache size.""" + return len(self._cache) + + def stats(self) -> dict[str, int]: + """ + Get cache statistics. + + Returns: + Dictionary with cache statistics + + """ + return { + "size": len(self._cache), + "max_size": self.max_size, + "expired": sum(1 for e in self._cache.values() if e.is_expired()), + } + + +class ProviderQueryManager: + """ + Manages DHT provider queries with caching and parallelization. + + This component integrates Bitswap with the Kademlia DHT to automatically + discover which peers have specific content. It provides: + + - Automatic provider discovery via DHT + - Parallel queries for multiple CIDs + - Provider caching to reduce DHT load + - Configurable limits and timeouts + - Error handling and retry logic + + Example: + >>> dht = KadDHT(host) + >>> manager = ProviderQueryManager(dht) + >>> providers = await manager.find_providers([cid1, cid2]) + >>> print(f"Found {len(providers)} provider mappings") + + """ + + def __init__( + self, + dht: KadDHT, + max_providers: int = 10, + cache_ttl: float = 300, # 5 minutes + cache_size: int = 1000, + max_concurrent_queries: int = 20, + ): + """ + Initialize Provider Query Manager. + + Args: + dht: Kademlia DHT instance for provider queries + max_providers: Maximum number of providers to return per CID + cache_ttl: Cache time-to-live in seconds + cache_size: Maximum number of CIDs to cache + max_concurrent_queries: Maximum parallel DHT queries + + """ + self.dht = dht + self.max_providers = max_providers + self.cache = ProviderCache(max_size=cache_size, default_ttl=cache_ttl) + self.query_semaphore = trio.Semaphore(max_concurrent_queries) + + # Statistics + self._stats = { + "queries": 0, + "cache_hits": 0, + "cache_misses": 0, + "errors": 0, + "providers_found": 0, + } + + async def find_providers( + self, + cids: Sequence[CIDInput], + timeout: float = 5.0, + use_cache: bool = True, + ) -> dict[bytes, list[PeerID]]: + """ + Find providers for multiple CIDs in parallel. + + This is the main entry point for provider discovery. It: + 1. Checks cache for each CID + 2. Queries DHT in parallel for cache misses + 3. Updates cache with results + 4. Returns combined results + + Args: + cids: List of CIDs to find providers for + timeout: Timeout per DHT query in seconds + use_cache: Whether to use cached results + + Returns: + Dictionary mapping CID bytes to list of provider peer IDs + + Example: + >>> cids = [cid1, cid2, cid3] + >>> results = await manager.find_providers(cids) + >>> for cid_bytes, providers in results.items(): + ... n = len(providers) + ... print(f"CID {cid_bytes.hex()[:8]}... has {n} providers") + + """ + results: dict[bytes, list[PeerID]] = {} + missing: list[tuple[CIDInput, bytes]] = [] + + # Phase 1: Check cache + for cid in cids: + cid_bytes = cid_to_bytes(cid) + + if use_cache: + cached = self.cache.get(cid_bytes) + if cached is not None: + results[cid_bytes] = cached + self._stats["cache_hits"] += 1 + logger.debug( + f"Cache hit for {format_cid_for_display(cid, max_len=12)}: " + f"{len(cached)} providers" + ) + continue + + # Not in cache or cache disabled + missing.append((cid, cid_bytes)) + self._stats["cache_misses"] += 1 + + if not missing: + logger.debug(f"All {len(cids)} CIDs found in cache") + return results + + logger.info( + f"Querying DHT for {len(missing)} CIDs (cache hits: {len(results)})" + ) + + # Phase 2: Query DHT in parallel for missing CIDs + async with trio.open_nursery() as nursery: + for cid, cid_bytes in missing: + nursery.start_soon( + self._query_single, + cid, + cid_bytes, + results, + timeout, + ) + + logger.info( + f"Provider discovery complete: {len(results)}/{len(cids)} CIDs resolved" + ) + + return results + + async def _query_single( + self, + cid: CIDInput, + cid_bytes: bytes, + results: dict[bytes, list[PeerID]], + timeout: float, + ) -> None: + """ + Query DHT for providers of a single CID. + + This method is called concurrently for each CID. It uses a semaphore + to limit parallelism and handles errors gracefully. + + Args: + cid: CID to query (for display) + cid_bytes: CID as bytes (for DHT query) + results: Shared results dictionary to update + timeout: Query timeout in seconds + + """ + async with self.query_semaphore: + self._stats["queries"] += 1 + + try: + with trio.fail_after(timeout): + # Query DHT provider store + provider_infos = self.dht.provider_store.get_providers(cid_bytes) + + # Extract peer IDs from PeerInfo objects + providers = [info.peer_id for info in provider_infos] + + # Limit to max_providers + if len(providers) > self.max_providers: + providers = providers[: self.max_providers] + + if providers: + # Update results + results[cid_bytes] = providers + + # Update cache + self.cache.put(cid_bytes, providers) + + # Update stats + self._stats["providers_found"] += len(providers) + + logger.debug( + f"Found {len(providers)} providers for " + f"{format_cid_for_display(cid, max_len=12)}" + ) + else: + logger.debug( + f"No providers found for " + f"{format_cid_for_display(cid, max_len=12)}" + ) + + except trio.TooSlowError: + self._stats["errors"] += 1 + logger.warning( + f"DHT query timeout for {format_cid_for_display(cid, max_len=12)}" + ) + except Exception as e: + self._stats["errors"] += 1 + cid_disp = format_cid_for_display(cid, max_len=12) + logger.error(f"DHT query error for {cid_disp}: {e}") + + async def find_providers_single( + self, + cid: CIDInput, + timeout: float = 5.0, + use_cache: bool = True, + ) -> list[PeerID]: + """ + Find providers for a single CID (convenience method). + + Args: + cid: CID to find providers for + timeout: Query timeout in seconds + use_cache: Whether to use cached results + + Returns: + List of provider peer IDs + + Example: + >>> providers = await manager.find_providers_single(cid) + >>> for peer_id in providers: + ... print(f"Provider: {peer_id}") + + """ + results = await self.find_providers([cid], timeout, use_cache) + cid_bytes = cid_to_bytes(cid) + return results.get(cid_bytes, []) + + def get_stats(self) -> dict[str, int]: + """ + Get provider query statistics. + + Returns: + Dictionary with statistics: + - queries: Total DHT queries made + - cache_hits: Number of cache hits + - cache_misses: Number of cache misses + - errors: Number of query errors + - providers_found: Total providers discovered + - cache_size: Current cache size + + Example: + >>> stats = manager.get_stats() + >>> print(f"Cache hit rate: {stats['cache_hits'] / stats['queries']:.1%}") + + """ + stats = self._stats.copy() + stats.update(self.cache.stats()) + return stats + + def clear_cache(self) -> None: + """Clear the provider cache.""" + self.cache.clear() + logger.info("Provider cache cleared") + + async def cleanup_expired_cache(self) -> int: + """ + Remove expired entries from cache. + + Returns: + Number of entries removed + + """ + removed = self.cache.cleanup_expired() + if removed > 0: + logger.debug(f"Removed {removed} expired cache entries") + return removed diff --git a/tests/core/bitswap/test_provider_query.py b/tests/core/bitswap/test_provider_query.py new file mode 100644 index 000000000..8edf6a318 --- /dev/null +++ b/tests/core/bitswap/test_provider_query.py @@ -0,0 +1,433 @@ +""" +Tests for ProviderQueryManager and its integration with BitswapClient. + +Covers: +- ProviderCacheEntry – TTL, expiry +- ProviderCache – LRU eviction, TTL, cleanup, stats +- ProviderQueryManager – single/batch queries, cache hit/miss, + max_providers cap, error handling, stats +- BitswapClient integration – provider_query_manager wired at construction, + get_block() uses DHT discovery +""" + +from __future__ import annotations + +import time +from unittest.mock import Mock + +import pytest +import trio + +from libp2p.bitswap.block_store import MemoryBlockStore +from libp2p.bitswap.cid import cid_to_bytes, compute_cid_v0, parse_cid +from libp2p.bitswap.client import BitswapClient +from libp2p.bitswap.provider_query import ( + ProviderCache, + ProviderCacheEntry, + ProviderQueryManager, +) +from libp2p.peer.id import ID as PeerID +from libp2p.peer.peerinfo import PeerInfo + +# ── helpers ─────────────────────────────────────────────────────────────────── + +PEER_A = PeerID.from_base58("QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN") +PEER_B = PeerID.from_base58("QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ") +PEER_C = PeerID.from_base58("QmSoLV4Bbm51jM9C4gDYZQ9Cy3U6aXMJDAbzgu2fzaDs64") + +SAMPLE_PEERS = [PEER_A, PEER_B, PEER_C] + +CID_1 = parse_cid(compute_cid_v0(b"block-one")) +CID_2 = parse_cid(compute_cid_v0(b"block-two")) +CID_3 = parse_cid(compute_cid_v0(b"block-three")) + +SAMPLE_CIDS = [CID_1, CID_2, CID_3] + + +def _mock_dht(return_peers: list[PeerID] | None = None) -> Mock: + """Return a minimal mock DHT whose provider_store returns *return_peers*.""" + dht = Mock() + dht.provider_store = Mock() + peer_infos = [PeerInfo(p, []) for p in (return_peers or [])] + dht.provider_store.get_providers = Mock(return_value=peer_infos) + return dht + + +# ═════════════════════════════════════════════════════════════════════════════ +# ProviderCacheEntry +# ═════════════════════════════════════════════════════════════════════════════ + + +class TestProviderCacheEntry: + def test_fresh_entry_not_expired(self) -> None: + entry = ProviderCacheEntry(providers=SAMPLE_PEERS, ttl=300) + assert not entry.is_expired() + assert entry.age() < 1.0 + + def test_entry_with_past_timestamp_is_expired(self) -> None: + entry = ProviderCacheEntry( + providers=SAMPLE_PEERS, + timestamp=time.time() - 10, + ttl=5, + ) + assert entry.is_expired() + + def test_default_ttl_applied(self) -> None: + entry = ProviderCacheEntry(providers=[PEER_A]) + assert entry.ttl == 300 + + +# ═════════════════════════════════════════════════════════════════════════════ +# ProviderCache +# ═════════════════════════════════════════════════════════════════════════════ + + +class TestProviderCache: + def test_put_and_get(self) -> None: + cache = ProviderCache(max_size=10, default_ttl=60) + cache.put(b"k1", SAMPLE_PEERS) + assert cache.get(b"k1") == SAMPLE_PEERS + + def test_miss_returns_none(self) -> None: + cache = ProviderCache() + assert cache.get(b"no-such-key") is None + + def test_expired_entry_returns_none(self) -> None: + cache = ProviderCache(max_size=10, default_ttl=300) + cache.put(b"k1", SAMPLE_PEERS, ttl=0.01) + time.sleep(0.05) + assert cache.get(b"k1") is None + + def test_lru_evicts_oldest(self) -> None: + cache = ProviderCache(max_size=3, default_ttl=300) + cache.put(b"a", [PEER_A]) + cache.put(b"b", [PEER_B]) + cache.put(b"c", [PEER_C]) + cache.get(b"a") # mark 'a' recently used + cache.put(b"d", [PEER_A]) # 'b' should be evicted + assert cache.get(b"b") is None + assert cache.get(b"a") is not None + assert cache.get(b"d") is not None + + def test_clear_empties_cache(self) -> None: + cache = ProviderCache(max_size=10, default_ttl=300) + cache.put(b"k1", [PEER_A]) + cache.put(b"k2", [PEER_B]) + cache.clear() + assert cache.size() == 0 + + def test_cleanup_expired_removes_stale(self) -> None: + cache = ProviderCache(max_size=10, default_ttl=300) + cache.put(b"stale", [PEER_A], ttl=0.01) + cache.put(b"fresh", [PEER_B], ttl=300) + time.sleep(0.05) + removed = cache.cleanup_expired() + assert removed == 1 + assert cache.size() == 1 + + def test_stats_keys_present(self) -> None: + cache = ProviderCache(max_size=5, default_ttl=300) + cache.put(b"k", [PEER_A]) + stats = cache.stats() + assert {"size", "max_size", "expired"} <= stats.keys() + assert stats["size"] == 1 + assert stats["max_size"] == 5 + + +# ═════════════════════════════════════════════════════════════════════════════ +# ProviderQueryManager +# ═════════════════════════════════════════════════════════════════════════════ + + +class TestProviderQueryManager: + @pytest.mark.trio + async def test_cache_miss_queries_dht(self) -> None: + dht = _mock_dht(return_peers=[PEER_A]) + mgr = ProviderQueryManager(dht) + + providers = await mgr.find_providers_single(CID_1, timeout=5.0) + + assert providers == [PEER_A] + stats = mgr.get_stats() + assert stats["queries"] == 1 + assert stats["cache_misses"] == 1 + assert stats["cache_hits"] == 0 + assert stats["providers_found"] == 1 + + @pytest.mark.trio + async def test_cache_hit_skips_dht(self) -> None: + dht = _mock_dht() + mgr = ProviderQueryManager(dht) + mgr.cache.put(cid_to_bytes(CID_1), [PEER_B]) + + providers = await mgr.find_providers_single(CID_1) + + assert providers == [PEER_B] + dht.provider_store.get_providers.assert_not_called() + assert mgr.get_stats()["cache_hits"] == 1 + + @pytest.mark.trio + async def test_second_call_uses_cache(self) -> None: + dht = _mock_dht(return_peers=[PEER_A]) + mgr = ProviderQueryManager(dht) + + await mgr.find_providers_single(CID_1) # miss + await mgr.find_providers_single(CID_1) # hit + + stats = mgr.get_stats() + assert stats["queries"] == 1 # no extra DHT call + assert stats["cache_hits"] == 1 + + @pytest.mark.trio + async def test_max_providers_cap(self) -> None: + dht = _mock_dht(return_peers=SAMPLE_PEERS) + mgr = ProviderQueryManager(dht, max_providers=1) + + providers = await mgr.find_providers_single(CID_1) + assert len(providers) == 1 + + @pytest.mark.trio + async def test_no_providers_returns_empty(self) -> None: + dht = _mock_dht(return_peers=[]) + mgr = ProviderQueryManager(dht) + providers = await mgr.find_providers_single(CID_1) + assert providers == [] + + @pytest.mark.trio + async def test_dht_error_increments_errors(self) -> None: + dht = _mock_dht() + dht.provider_store.get_providers = Mock(side_effect=RuntimeError("dht down")) + mgr = ProviderQueryManager(dht) + + providers = await mgr.find_providers_single(CID_1, timeout=5.0) + + assert providers == [] + assert mgr.get_stats()["errors"] == 1 + + @pytest.mark.trio + async def test_batch_all_cache_hits(self) -> None: + dht = _mock_dht() + mgr = ProviderQueryManager(dht) + for cid in SAMPLE_CIDS: + mgr.cache.put(cid_to_bytes(cid), [PEER_A]) + + results = await mgr.find_providers(SAMPLE_CIDS) + + assert len(results) == 3 + dht.provider_store.get_providers.assert_not_called() + + @pytest.mark.trio + async def test_batch_partial_cache(self) -> None: + dht = _mock_dht(return_peers=[PEER_B]) + mgr = ProviderQueryManager(dht) + # Pre-cache only first CID + mgr.cache.put(cid_to_bytes(CID_1), [PEER_A]) + + results = await mgr.find_providers(SAMPLE_CIDS) + + assert len(results) == 3 + # Only 2 DHT calls (CID_2 and CID_3 are cache misses) + assert dht.provider_store.get_providers.call_count == 2 + + @pytest.mark.trio + async def test_use_cache_false_always_queries_dht(self) -> None: + dht = _mock_dht(return_peers=[PEER_A]) + mgr = ProviderQueryManager(dht) + mgr.cache.put(cid_to_bytes(CID_1), [PEER_B]) # pre-populated + + providers = await mgr.find_providers_single(CID_1, use_cache=False) + + # DHT was queried despite cache having an entry + dht.provider_store.get_providers.assert_called_once() + assert providers == [PEER_A] + + @pytest.mark.trio + async def test_clear_cache_forces_new_query(self) -> None: + dht = _mock_dht(return_peers=[PEER_A]) + mgr = ProviderQueryManager(dht) + + await mgr.find_providers_single(CID_1) # miss → cached + await mgr.find_providers_single(CID_1) # hit + mgr.clear_cache() + await mgr.find_providers_single(CID_1) # miss again + + assert mgr.get_stats()["cache_misses"] == 2 + assert dht.provider_store.get_providers.call_count == 2 + + @pytest.mark.trio + async def test_cleanup_expired_cache(self) -> None: + dht = _mock_dht() + mgr = ProviderQueryManager(dht) + mgr.cache.put(cid_to_bytes(CID_1), [PEER_A], ttl=0.01) + mgr.cache.put(cid_to_bytes(CID_2), [PEER_B], ttl=300) + await trio.sleep(0.05) + + removed = await mgr.cleanup_expired_cache() + + assert removed == 1 + assert mgr.cache.size() == 1 + + def test_get_stats_initial_values(self) -> None: + mgr = ProviderQueryManager(_mock_dht()) + stats = mgr.get_stats() + assert stats["queries"] == 0 + assert stats["cache_hits"] == 0 + assert stats["cache_misses"] == 0 + assert stats["errors"] == 0 + assert stats["providers_found"] == 0 + + @pytest.mark.trio + async def test_empty_cid_list(self) -> None: + mgr = ProviderQueryManager(_mock_dht()) + assert await mgr.find_providers([]) == {} + + +# ═════════════════════════════════════════════════════════════════════════════ +# BitswapClient integration +# ═════════════════════════════════════════════════════════════════════════════ + + +class TestBitswapClientProviderQueryIntegration: + """Verify that BitswapClient wires ProviderQueryManager into get_block().""" + + def _make_client( + self, + mock_host: Mock, + pqm: ProviderQueryManager | None = None, + ) -> BitswapClient: + store = MemoryBlockStore() + return BitswapClient(mock_host, block_store=store, provider_query_manager=pqm) + + def test_provider_query_manager_stored_on_client(self, mock_host: Mock) -> None: + dht = _mock_dht() + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + assert client.provider_query_manager is pqm + + def test_no_pqm_by_default(self, mock_host: Mock) -> None: + client = self._make_client(mock_host) + assert client.provider_query_manager is None + + @pytest.mark.trio + async def test_get_block_returns_local_without_dht(self, mock_host: Mock) -> None: + """Local cache hit must never touch the DHT.""" + dht = _mock_dht(return_peers=[PEER_A]) + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + + block_data = b"local block" + cid = parse_cid(compute_cid_v0(block_data)) + await client.block_store.put_block(cid, block_data) + + result = await client.block_store.get_block(cid) + assert result == block_data + # DHT must not have been consulted + dht.provider_store.get_providers.assert_not_called() + + @pytest.mark.trio + async def test_get_block_uses_pqm_to_pick_peer(self, mock_host: Mock) -> None: + """ + When the block is not local, get_block() should call + provider_query_manager.find_providers_single() and use the + returned peer_id. + """ + discovered_peer = PEER_A + block_data = b"remote block" + cid = parse_cid(compute_cid_v0(block_data)) + + dht = _mock_dht(return_peers=[discovered_peer]) + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + + # Patch _request_block so we can inspect the peer_id it receives + captured: dict[str, object] = {} + + async def _fake_request(cid_obj, peer_id, timeout): # noqa: ANN001 + captured["peer_id"] = peer_id + return block_data + + client._request_block = _fake_request # type: ignore[method-assign] + + result = await client.get_block(cid) + + assert result == block_data + assert captured["peer_id"] == discovered_peer + + @pytest.mark.trio + async def test_get_block_falls_back_to_broadcast_when_no_providers( + self, mock_host: Mock + ) -> None: + """ + When the DHT returns no providers, get_block() must still call + _request_block with peer_id=None (broadcast fallback). + """ + dht = _mock_dht(return_peers=[]) + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + + block_data = b"broadcast block" + cid = parse_cid(compute_cid_v0(block_data)) + + captured: dict[str, object] = {} + + async def _fake_request(cid_obj, peer_id, timeout): # noqa: ANN001 + captured["peer_id"] = peer_id + return block_data + + client._request_block = _fake_request # type: ignore[method-assign] + + result = await client.get_block(cid) + + assert result == block_data + assert captured["peer_id"] is None # broadcast + + @pytest.mark.trio + async def test_explicit_peer_id_skips_pqm(self, mock_host: Mock) -> None: + """An explicit peer_id argument must bypass DHT discovery.""" + dht = _mock_dht(return_peers=[PEER_B]) + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + + block_data = b"explicit peer block" + cid = parse_cid(compute_cid_v0(block_data)) + + captured: dict[str, object] = {} + + async def _fake_request(cid_obj, peer_id, timeout): # noqa: ANN001 + captured["peer_id"] = peer_id + return block_data + + client._request_block = _fake_request # type: ignore[method-assign] + + await client.get_block(cid, peer_id=PEER_A) + + # DHT must NOT have been called + dht.provider_store.get_providers.assert_not_called() + # The explicit peer_id must be passed through unchanged + assert captured["peer_id"] == PEER_A + + @pytest.mark.trio + async def test_pqm_error_falls_back_gracefully(self, mock_host: Mock) -> None: + """A crashing PQM must not prevent the block fetch from proceeding.""" + dht = _mock_dht() + dht.provider_store.get_providers = Mock( + side_effect=RuntimeError("dht exploded") + ) + pqm = ProviderQueryManager(dht) + client = self._make_client(mock_host, pqm) + + block_data = b"fallback block" + cid = parse_cid(compute_cid_v0(block_data)) + + captured: dict[str, object] = {} + + async def _fake_request(cid_obj, peer_id, timeout): # noqa: ANN001 + captured["peer_id"] = peer_id + return block_data + + client._request_block = _fake_request # type: ignore[method-assign] + + result = await client.get_block(cid) + + assert result == block_data + assert captured["peer_id"] is None # graceful broadcast fallback From e88f3dcc6f54062274f0e6e5c63e1f939c6ac077 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 01:03:34 +0530 Subject: [PATCH 17/37] refactor: clean up type hints and remove unnecessary whitespace in test files and factories --- .gitignore | 2 -- tests/core/pubsub/test_gossipsub_v1_3_extensions.py | 6 +++--- tests/core/pubsub/test_gossipsub_v2_0.py | 3 +-- tests/utils/factories.py | 12 ++++++------ tests/utils/interop/utils.py | 4 +--- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 78a738d8d..525f5696d 100644 --- a/.gitignore +++ b/.gitignore @@ -200,5 +200,3 @@ libp2p-forge # OSO health report generated outputs reports/*.json reports/*.md - -my_blocks/ diff --git a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py index d8be5d50e..cb54386de 100644 --- a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py +++ b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py @@ -286,7 +286,7 @@ async def test_handle_observe_and_unobserve_manage_observers() -> None: topic = "obs-topic" - observer_peer: ID = IDFactory() + observer_peer = IDFactory() # Simulate that the peer advertised topicObservation support via extensions. router.extensions_state._peer_extensions[observer_peer] = PeerExtensions( @@ -311,7 +311,7 @@ async def test_handle_observe_ignored_when_peer_did_not_advertise_extension() -> assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer: ID = IDFactory() + observer_peer = IDFactory() # Peer exists, but its advertised extensions do NOT include topicObservation. router.extensions_state._peer_extensions[observer_peer] = PeerExtensions( @@ -363,7 +363,7 @@ async def test_notify_observers_sends_ihave_to_each_observer() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer: ID = IDFactory() + observer_peer = IDFactory() msg_id = b"message-id" # Configure TopicObservationState with a single observer. diff --git a/tests/core/pubsub/test_gossipsub_v2_0.py b/tests/core/pubsub/test_gossipsub_v2_0.py index c2f639cb4..518ed6683 100644 --- a/tests/core/pubsub/test_gossipsub_v2_0.py +++ b/tests/core/pubsub/test_gossipsub_v2_0.py @@ -16,7 +16,6 @@ import pytest import trio -from libp2p.peer.id import ID from libp2p.pubsub.gossipsub import PROTOCOL_ID_V20, GossipSub from libp2p.pubsub.pb import rpc_pb2 from libp2p.pubsub.pubsub import ValidationCache, ValidationResult @@ -467,7 +466,7 @@ def test_candidate_selection_for_diversity(self): # Candidates from different IPs - candidates: list[tuple[ID, float]] = [ + candidates = [ (IDFactory(), 5.0), # Different IP (IDFactory(), 4.0), # Same IP as mesh ] diff --git a/tests/utils/factories.py b/tests/utils/factories.py index 762044f74..2198d2503 100644 --- a/tests/utils/factories.py +++ b/tests/utils/factories.py @@ -447,7 +447,7 @@ async def upgrade_remote_conn(remote_conn: IRawConnection) -> None: class SwarmFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = Swarm class Params: @@ -511,7 +511,7 @@ async def create_batch_and_listen( class HostFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = BasicHost class Params: @@ -555,7 +555,7 @@ async def find_peer(self, peer_id: ID) -> PeerInfo | None: class RoutedHostFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = RoutedHost class Params: @@ -591,14 +591,14 @@ async def create_batch_and_listen( class FloodsubFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = FloodSub protocols = (FLOODSUB_PROTOCOL_ID,) class GossipsubFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = GossipSub protocols = (GOSSIPSUB_PROTOCOL_ID,) @@ -623,7 +623,7 @@ class Meta: # type: ignore[override] class PubsubFactory(factory.Factory): - class Meta: # type: ignore[override] + class Meta: model = Pubsub host = factory.SubFactory(HostFactory) diff --git a/tests/utils/interop/utils.py b/tests/utils/interop/utils.py index db299dcb5..30b89197c 100644 --- a/tests/utils/interop/utils.py +++ b/tests/utils/interop/utils.py @@ -5,9 +5,7 @@ from multiaddr import ( Multiaddr, ) -from p2pclient.libp2p_stubs.peer.id import ( - ID as StubID, # type: ignore[import-untyped, import-error] -) +from p2pclient.libp2p_stubs.peer.id import ID as StubID import trio from libp2p.abc import IHost From f65ca7339efd1fbbdac69e87cd090ba01d011f53 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 01:06:52 +0530 Subject: [PATCH 18/37] refactor: remove unnecessary whitespace in Gossipsub test files --- tests/core/pubsub/test_gossipsub_v1_3_extensions.py | 5 ++--- tests/core/pubsub/test_gossipsub_v2_0.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py index cb54386de..79ff2ad2b 100644 --- a/tests/core/pubsub/test_gossipsub_v1_3_extensions.py +++ b/tests/core/pubsub/test_gossipsub_v1_3_extensions.py @@ -285,7 +285,6 @@ async def test_handle_observe_and_unobserve_manage_observers() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - observer_peer = IDFactory() # Simulate that the peer advertised topicObservation support via extensions. @@ -331,7 +330,7 @@ async def test_emit_observe_and_unobserve_update_observing_state() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - subscriber_peer: ID = IDFactory() + subscriber_peer = IDFactory() # Stub pubsub.peers so emit_control_message sees the peer as connected. class DummyPubsub: @@ -396,7 +395,7 @@ async def test_start_and_stop_observing_topic_high_level_api() -> None: assert isinstance(router, GossipSub) topic = "obs-topic" - subscriber_peer: ID = IDFactory() + subscriber_peer = IDFactory() # Simulate pubsub state: subscriber_peer is subscribed to topic. class DummyPubsub: diff --git a/tests/core/pubsub/test_gossipsub_v2_0.py b/tests/core/pubsub/test_gossipsub_v2_0.py index 518ed6683..68e575fd7 100644 --- a/tests/core/pubsub/test_gossipsub_v2_0.py +++ b/tests/core/pubsub/test_gossipsub_v2_0.py @@ -465,7 +465,6 @@ def test_candidate_selection_for_diversity(self): scorer.ip_by_peer[peer] = "192.168.1.1" # Candidates from different IPs - candidates = [ (IDFactory(), 5.0), # Different IP (IDFactory(), 4.0), # Same IP as mesh From 55a91e0608a23da61af6e63ee18f0aa516554cb2 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 01:33:39 +0530 Subject: [PATCH 19/37] newsfragment added Co-authored-by: Copilot --- newsfragments/1321.feature.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 newsfragments/1321.feature.rst diff --git a/newsfragments/1321.feature.rst b/newsfragments/1321.feature.rst new file mode 100644 index 000000000..af0c9d04a --- /dev/null +++ b/newsfragments/1321.feature.rst @@ -0,0 +1,10 @@ +Comprehensive Bitswap overhaul for Kubo compatibility and performance: + +- **Batch block fetching** — send multiple CIDs in a single wantlist message. +- **Kubo-compatible DAG-PB encoding** — produce identical CIDs to Kubo's ``ipfs add``. +- **FilesystemBlockStore** — persistent storage surviving process restarts. +- **BlockService** — local-first lookup with automatic block caching and announcement. +- **Streaming support** — ``chunk_stream`` and ``MerkleDag.add_stream`` for efficient DAG building. +- **Bitswap 1.2.0 wantlist API** — ``WantType``, ``BlockPresence``, ``WantlistEntry``, ``BitswapMessage``. +- **DHT record signing/verification** — Kubo-compatible provider and value record signing. +- **ProviderQueryManager** — automatic DHT-based peer discovery in ``BitswapClient.get_block()`` with LRU caching. From 21fb31641fe1d6904146321da8332745c7feadd5 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 01:47:10 +0530 Subject: [PATCH 20/37] refactor: improve formatting of docstring in add_block method Co-authored-by: Copilot --- libp2p/bitswap/client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 82a1e0286..7a445b808 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -262,8 +262,7 @@ async def get_block( Args: cid: The CID of the block to fetch - peer_id: Optional specific peer to request from. When given, - DHT discovery is skipped. + peer_id: Optional specific peer to request from. When given, DHT discovery is skipped. timeout: Timeout in seconds Returns: From 0cbec92f1feb7b6561e0a08c4f670527104501fa Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Tue, 5 May 2026 01:53:11 +0530 Subject: [PATCH 21/37] refactor: improve docstring clarity for add_block method parameters Co-authored-by: Copilot --- libp2p/bitswap/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 7a445b808..3d3acefc0 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -262,7 +262,7 @@ async def get_block( Args: cid: The CID of the block to fetch - peer_id: Optional specific peer to request from. When given, DHT discovery is skipped. + peer_id: Optional peer to request from; DHT discovery is skipped when set. timeout: Timeout in seconds Returns: From 0f0b6bb6351de2f49a010cb4256ce135b5e65e2e Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Wed, 6 May 2026 23:19:50 +0530 Subject: [PATCH 22/37] refactor: update provider_query_manager to use find_providers for DHT lookups and always send signed records. Co-authored-by: Copilot --- libp2p/bitswap/provider_query.py | 8 +- libp2p/kad_dht/value_store.py | 15 +- tests/core/bitswap/test_provider_query.py | 41 +++-- tests/core/kad_dht/test_unit_value_store.py | 172 ++++++++++++++++++++ 4 files changed, 218 insertions(+), 18 deletions(-) diff --git a/libp2p/bitswap/provider_query.py b/libp2p/bitswap/provider_query.py index e32a4c3db..47fcf98ad 100644 --- a/libp2p/bitswap/provider_query.py +++ b/libp2p/bitswap/provider_query.py @@ -347,8 +347,10 @@ async def _query_single( try: with trio.fail_after(timeout): - # Query DHT provider store - provider_infos = self.dht.provider_store.get_providers(cid_bytes) + # Perform a network DHT provider lookup (not a local-store read) + provider_infos = await self.dht.provider_store.find_providers( + cid_bytes, self.max_providers + ) # Extract peer IDs from PeerInfo objects providers = [info.peer_id for info in provider_infos] @@ -361,7 +363,7 @@ async def _query_single( # Update results results[cid_bytes] = providers - # Update cache + # Update cache with remote results self.cache.put(cid_bytes, providers) # Update stats diff --git a/libp2p/kad_dht/value_store.py b/libp2p/kad_dht/value_store.py index a101a7289..459e7487e 100644 --- a/libp2p/kad_dht/value_store.py +++ b/libp2p/kad_dht/value_store.py @@ -127,11 +127,20 @@ async def _store_at_peer(self, peer_id: ID, key: bytes, value: bytes) -> bool: envelope_bytes, _ = env_to_send_in_RPC(self.host) message.senderRecord = envelope_bytes - # Set message fields + # Build the outbound record from the locally-stored signed record when + # available (normal put() path), otherwise sign the record now so the + # outbound message always carries signature and author fields. + local_entry = self.store.get(key) + if local_entry is not None: + signed_record, _ = local_entry + message.record.CopyFrom(signed_record) + else: + private_key = self.host.get_private_key() + signed_record = make_signed_put_record(key, value, private_key) + message.record.CopyFrom(signed_record) message.key = key - message.record.key = key - message.record.value = value # Note: timeReceived will be set by the receiving peer when storing + message.record.ClearField("timeReceived") # Serialize and send the protobuf message with length prefix proto_bytes = message.SerializeToString() diff --git a/tests/core/bitswap/test_provider_query.py b/tests/core/bitswap/test_provider_query.py index 8edf6a318..8617cc6eb 100644 --- a/tests/core/bitswap/test_provider_query.py +++ b/tests/core/bitswap/test_provider_query.py @@ -45,11 +45,20 @@ def _mock_dht(return_peers: list[PeerID] | None = None) -> Mock: - """Return a minimal mock DHT whose provider_store returns *return_peers*.""" + """ + Return a mock DHT whose provider_store.find_providers returns *return_peers*. + + find_providers is the async network lookup path; get_providers is the + local-store read that ProviderQueryManager no longer calls directly. + """ dht = Mock() dht.provider_store = Mock() peer_infos = [PeerInfo(p, []) for p in (return_peers or [])] - dht.provider_store.get_providers = Mock(return_value=peer_infos) + + async def _async_find_providers(key: bytes, count: int = 20) -> list[PeerInfo]: + return peer_infos[:count] + + dht.provider_store.find_providers = Mock(side_effect=_async_find_providers) return dht @@ -153,6 +162,8 @@ async def test_cache_miss_queries_dht(self) -> None: assert stats["cache_misses"] == 1 assert stats["cache_hits"] == 0 assert stats["providers_found"] == 1 + # Verify the async network path was used, not the local store read + dht.provider_store.find_providers.assert_called_once() @pytest.mark.trio async def test_cache_hit_skips_dht(self) -> None: @@ -163,7 +174,7 @@ async def test_cache_hit_skips_dht(self) -> None: providers = await mgr.find_providers_single(CID_1) assert providers == [PEER_B] - dht.provider_store.get_providers.assert_not_called() + dht.provider_store.find_providers.assert_not_called() assert mgr.get_stats()["cache_hits"] == 1 @pytest.mark.trio @@ -196,7 +207,11 @@ async def test_no_providers_returns_empty(self) -> None: @pytest.mark.trio async def test_dht_error_increments_errors(self) -> None: dht = _mock_dht() - dht.provider_store.get_providers = Mock(side_effect=RuntimeError("dht down")) + + async def _raise(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("dht down") + + dht.provider_store.find_providers = Mock(side_effect=_raise) mgr = ProviderQueryManager(dht) providers = await mgr.find_providers_single(CID_1, timeout=5.0) @@ -214,7 +229,7 @@ async def test_batch_all_cache_hits(self) -> None: results = await mgr.find_providers(SAMPLE_CIDS) assert len(results) == 3 - dht.provider_store.get_providers.assert_not_called() + dht.provider_store.find_providers.assert_not_called() @pytest.mark.trio async def test_batch_partial_cache(self) -> None: @@ -227,7 +242,7 @@ async def test_batch_partial_cache(self) -> None: assert len(results) == 3 # Only 2 DHT calls (CID_2 and CID_3 are cache misses) - assert dht.provider_store.get_providers.call_count == 2 + assert dht.provider_store.find_providers.call_count == 2 @pytest.mark.trio async def test_use_cache_false_always_queries_dht(self) -> None: @@ -238,7 +253,7 @@ async def test_use_cache_false_always_queries_dht(self) -> None: providers = await mgr.find_providers_single(CID_1, use_cache=False) # DHT was queried despite cache having an entry - dht.provider_store.get_providers.assert_called_once() + dht.provider_store.find_providers.assert_called_once() assert providers == [PEER_A] @pytest.mark.trio @@ -252,7 +267,7 @@ async def test_clear_cache_forces_new_query(self) -> None: await mgr.find_providers_single(CID_1) # miss again assert mgr.get_stats()["cache_misses"] == 2 - assert dht.provider_store.get_providers.call_count == 2 + assert dht.provider_store.find_providers.call_count == 2 @pytest.mark.trio async def test_cleanup_expired_cache(self) -> None: @@ -322,7 +337,7 @@ async def test_get_block_returns_local_without_dht(self, mock_host: Mock) -> Non result = await client.block_store.get_block(cid) assert result == block_data # DHT must not have been consulted - dht.provider_store.get_providers.assert_not_called() + dht.provider_store.find_providers.assert_not_called() @pytest.mark.trio async def test_get_block_uses_pqm_to_pick_peer(self, mock_host: Mock) -> None: @@ -410,9 +425,11 @@ async def _fake_request(cid_obj, peer_id, timeout): # noqa: ANN001 async def test_pqm_error_falls_back_gracefully(self, mock_host: Mock) -> None: """A crashing PQM must not prevent the block fetch from proceeding.""" dht = _mock_dht() - dht.provider_store.get_providers = Mock( - side_effect=RuntimeError("dht exploded") - ) + + async def _raise(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("dht exploded") + + dht.provider_store.find_providers = Mock(side_effect=_raise) pqm = ProviderQueryManager(dht) client = self._make_client(mock_host, pqm) diff --git a/tests/core/kad_dht/test_unit_value_store.py b/tests/core/kad_dht/test_unit_value_store.py index 3e2d58e01..6a5d7d4a7 100644 --- a/tests/core/kad_dht/test_unit_value_store.py +++ b/tests/core/kad_dht/test_unit_value_store.py @@ -449,6 +449,178 @@ async def test_store_at_peer_local_peer(self): assert result is True + @pytest.mark.trio + async def test_store_at_peer_propagates_signature_and_author(self): + """ + _store_at_peer must include signature and author from the locally-stored + signed record in the outbound PUT_VALUE message. + + This ensures signed-record authenticity is preserved when replicating + values to remote peers, matching go-libp2p interoperability requirements. + """ + import varint + + from libp2p.kad_dht.pb.kademlia_pb2 import Message + + # Build a host with a real key pair so put() creates a genuine signed record + kp = create_new_key_pair() + remote_peer_id = ID.from_base58("QmRemote123456789") + local_peer_id = ID.from_pubkey(kp.public_key) + + # Capture the bytes written to the mock stream + written: list[bytes] = [] + + mock_stream = Mock() + + async def _write(data: bytes) -> None: + written.append(data) + + async def _read(n: int) -> bytes: + # Simulate a minimal valid PUT_VALUE acknowledgement + resp = Message() + resp.type = Message.MessageType.PUT_VALUE + resp.key = b"test_key" + raw = resp.SerializeToString() + length = varint.encode(len(raw)) + # Return one byte at a time for the varint reader, then the body + full = length + raw + if not hasattr(_read, "_buf"): + _read._buf = iter(full) # type: ignore[attr-defined] + byte_val = next(_read._buf, b"") # type: ignore[attr-defined] + return bytes([byte_val]) if isinstance(byte_val, int) else byte_val + + mock_stream.write = Mock(side_effect=_write) + mock_stream.read = Mock(side_effect=_read) + mock_stream.close = Mock(return_value=None) + + # Patch close to be awaitable + async def _close() -> None: + pass + + mock_stream.close = _close + + h = Mock() + h.get_private_key.return_value = kp.private_key + h.get_peerstore.return_value = Mock() + + # env_to_send_in_RPC is called; return empty bytes to keep test simple + from libp2p.peer.peerstore import env_to_send_in_RPC + + original_env = env_to_send_in_RPC + + import libp2p.kad_dht.value_store as vs_module + + vs_module.env_to_send_in_RPC = Mock(return_value=(b"", None)) # type: ignore[attr-defined] + + async def _new_stream(*_args: object, **_kwargs: object) -> object: + return mock_stream + + h.new_stream = _new_stream + + try: + store = ValueStore(host=h, local_peer_id=local_peer_id) + key = b"test_key" + value = b"test_value" + + # Store locally first (creates signed record) + store.put(key, value) + + # Confirm the local record has signature and author set + local_record, _ = store.store[key] + assert local_record.signature, "put() must produce a non-empty signature" + assert local_record.author, "put() must populate the author field" + + # Now replicate to a remote peer + await store._store_at_peer(remote_peer_id, key, value) + + # Reconstruct the serialized message from what was written + # written[0] is the varint length prefix, written[1] is the proto body + assert len(written) >= 2, "Expected varint + proto body to be written" + sent_msg = Message() + sent_msg.ParseFromString(written[1]) + + assert sent_msg.HasField("record"), "Outbound message must contain a record" + assert sent_msg.record.signature == local_record.signature, ( + "Outbound record must carry the signature from the signed record" + ) + assert sent_msg.record.author == local_record.author, ( + "Outbound record must carry the author from the signed record" + ) + finally: + vs_module.env_to_send_in_RPC = original_env # type: ignore[attr-defined] + + @pytest.mark.trio + async def test_store_at_peer_signs_record_without_prior_put(self): + """ + When _store_at_peer is called without a prior put() (e.g. the get_value + propagation path), it must still produce a signed outbound record — + never a bare unsigned one. + """ + import varint + + from libp2p.kad_dht.pb.kademlia_pb2 import Message + + kp = create_new_key_pair() + remote_peer_id = ID.from_base58("QmRemote999") + local_peer_id = ID.from_pubkey(kp.public_key) + + written: list[bytes] = [] + + async def _write(data: bytes) -> None: + written.append(data) + + mock_stream = Mock() + resp = Message() + resp.type = Message.MessageType.PUT_VALUE + resp.key = b"bare_key" + raw = resp.SerializeToString() + resp_bytes = varint.encode(len(raw)) + raw + resp_iter = iter(resp_bytes) + + async def _read(n: int) -> bytes: + byte_val = next(resp_iter, b"") + return bytes([byte_val]) if isinstance(byte_val, int) else byte_val + + mock_stream.write = Mock(side_effect=_write) + mock_stream.read = Mock(side_effect=_read) + + async def _close() -> None: + pass + + mock_stream.close = _close + + h = Mock() + h.get_private_key.return_value = kp.private_key + + import libp2p.kad_dht.value_store as vs_module + + original_env = vs_module.env_to_send_in_RPC + vs_module.env_to_send_in_RPC = Mock(return_value=(b"", None)) # type: ignore[attr-defined] + + async def _new_stream(*_args: object, **_kwargs: object) -> object: + return mock_stream + + h.new_stream = _new_stream + + try: + store = ValueStore(host=h, local_peer_id=local_peer_id) + key = b"bare_key" + value = b"bare_value" + + # Do NOT call store.put() — _store_at_peer must sign the record itself + await store._store_at_peer(remote_peer_id, key, value) + + assert len(written) >= 2 + sent_msg = Message() + sent_msg.ParseFromString(written[1]) + assert sent_msg.record.key == key + assert sent_msg.record.value == value + # The record must be signed even without a prior put() + assert sent_msg.record.signature, "record must be signed inline" + assert sent_msg.record.author, "record must carry author field" + finally: + vs_module.env_to_send_in_RPC = original_env # type: ignore[attr-defined] + @pytest.mark.trio async def test_get_from_peer_local_peer(self): """Test _get_from_peer returns None when querying local peer.""" From 4d1137a90ec192a9557e3436dea34bd16c43c6b6 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Thu, 7 May 2026 00:48:17 +0530 Subject: [PATCH 23/37] refactor: enhance verify_record to support multiple key types and improve unmarshal_public_key functionality --- libp2p/records/utils.py | 41 ++++++++++++- tests/core/records/test_validator.py | 89 +++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/libp2p/records/utils.py b/libp2p/records/utils.py index 35d760dc3..2dcc6620f 100644 --- a/libp2p/records/utils.py +++ b/libp2p/records/utils.py @@ -1,11 +1,37 @@ from libp2p.crypto.ed25519 import Ed25519PublicKey -from libp2p.crypto.keys import PrivateKey +from libp2p.crypto.keys import PrivateKey, PublicKey +from libp2p.crypto.pb import crypto_pb2 +from libp2p.crypto.rsa import RSAPublicKey +from libp2p.crypto.secp256k1 import Secp256k1PublicKey class InvalidRecordType(Exception): pass +def _unmarshal_public_key(data: bytes) -> PublicKey: + """ + Deserialize a ``crypto_pb2.PublicKey`` protobuf into a concrete + ``PublicKey`` instance. + + Kept private to this module to avoid the circular import that arises + when importing from ``libp2p.records.pubkey`` (which itself imports + from this module). + """ + proto_key = crypto_pb2.PublicKey.FromString(data) + key_type = proto_key.key_type + key_data = proto_key.data + + if key_type == crypto_pb2.KeyType.RSA: + return RSAPublicKey.from_bytes(key_data) + elif key_type == crypto_pb2.KeyType.Ed25519: + return Ed25519PublicKey.from_bytes(key_data) + elif key_type == crypto_pb2.KeyType.Secp256k1: + return Secp256k1PublicKey.from_bytes(key_data) + else: + raise ValueError(f"Unsupported key type: {key_type}") + + def sign_record( private_key: PrivateKey, key: bytes, value: bytes ) -> tuple[bytes, bytes]: @@ -26,7 +52,9 @@ def sign_record( signing_payload = b"libp2p-record:" + key + value signature = private_key.sign(signing_payload) public_key = private_key.get_public_key() - author_bytes = public_key.to_bytes() + # Serialize as a protobuf-wrapped PublicKey so that verify_record (and + # remote peers) can reconstruct the key without knowing its type in advance. + author_bytes = public_key.serialize() return signature, author_bytes @@ -36,9 +64,16 @@ def verify_record( """ Verify a signed DHT record. + Supports all key types that libp2p serialises in a protobuf PublicKey + envelope (Ed25519, RSA, Secp256k1). The author field is treated as a + serialised ``crypto_pb2.PublicKey`` message and dispatched through + ``unmarshal_public_key`` so that non-Ed25519 peers are not silently + rejected. + Args: signature: The record signature author_public_key: The serialized public key of the author + (``crypto_pb2.PublicKey`` protobuf bytes) key: The record key value: The record value @@ -47,7 +82,7 @@ def verify_record( """ try: - public_key = Ed25519PublicKey.from_bytes(author_public_key) + public_key = _unmarshal_public_key(author_public_key) signing_payload = b"libp2p-record:" + key + value return public_key.verify(signing_payload, signature) except Exception: diff --git a/tests/core/records/test_validator.py b/tests/core/records/test_validator.py index 4a0efc0f7..9faf3bb6c 100644 --- a/tests/core/records/test_validator.py +++ b/tests/core/records/test_validator.py @@ -5,7 +5,12 @@ from libp2p.peer.id import ID from libp2p.records.pubkey import PublicKeyValidator, unmarshal_public_key from libp2p.records.record import make_put_record -from libp2p.records.utils import InvalidRecordType, split_key +from libp2p.records.utils import ( + InvalidRecordType, + sign_record, + split_key, + verify_record, +) from libp2p.records.validator import NamespacedValidator, Validator bad_paths = [ @@ -243,3 +248,85 @@ def select(self, key: str, values: list[bytes]) -> int: # Non-namespaced key uses custom fallback that rejects with pytest.raises(ValueError, match="Rejected by fallback"): validators.validate("plain-key", b"value") + + +# ───────────────────────────────────────────────────────────────────────────── +# verify_record — multi-key-type coverage +# ───────────────────────────────────────────────────────────────────────────── + + +class TestVerifyRecord: + """ + verify_record must accept signatures from every key type that libp2p + serialises via crypto_pb2.PublicKey (Ed25519, Secp256k1, RSA). + + Previously the implementation hard-coded Ed25519PublicKey.from_bytes, + causing it to silently return False for RSA and Secp256k1 peers and + breaking DHT interoperability with non-Ed25519 nodes. + """ + + def _round_trip(self, key_pair) -> None: # noqa: ANN001 + """Sign with *key_pair* and assert verify_record returns True.""" + key = b"/test/mykey" + value = b"hello world" + sig, author = sign_record(key_pair.private_key, key, value) + assert verify_record(sig, author, key, value), ( + f"verify_record returned False for key type " + f"{key_pair.private_key.get_type()}" + ) + + def _tampered_fails(self, key_pair) -> None: # noqa: ANN001 + """Tampered payload must make verify_record return False.""" + key = b"/test/mykey" + value = b"hello world" + sig, author = sign_record(key_pair.private_key, key, value) + assert not verify_record(sig, author, key, b"tampered"), ( + f"verify_record accepted tampered value for key type " + f"{key_pair.private_key.get_type()}" + ) + + def test_ed25519_valid_signature(self) -> None: + from libp2p.crypto.ed25519 import create_new_key_pair as ed_kp + + self._round_trip(ed_kp()) + + def test_ed25519_tampered_value_rejected(self) -> None: + from libp2p.crypto.ed25519 import create_new_key_pair as ed_kp + + self._tampered_fails(ed_kp()) + + def test_secp256k1_valid_signature(self) -> None: + from libp2p.crypto.secp256k1 import create_new_key_pair as secp_kp + + self._round_trip(secp_kp()) + + def test_secp256k1_tampered_value_rejected(self) -> None: + from libp2p.crypto.secp256k1 import create_new_key_pair as secp_kp + + self._tampered_fails(secp_kp()) + + def test_rsa_valid_signature(self) -> None: + from libp2p.crypto.rsa import create_new_key_pair as rsa_kp + + self._round_trip(rsa_kp()) + + def test_rsa_tampered_value_rejected(self) -> None: + from libp2p.crypto.rsa import create_new_key_pair as rsa_kp + + self._tampered_fails(rsa_kp()) + + def test_garbage_author_bytes_returns_false(self) -> None: + """Completely invalid author bytes must return False, not raise.""" + assert not verify_record(b"sig", b"not-a-valid-protobuf", b"key", b"value") + + def test_wrong_key_returns_false(self) -> None: + """Signature verified against a different key must return False.""" + from libp2p.crypto.ed25519 import create_new_key_pair as ed_kp + + kp1 = ed_kp() + kp2 = ed_kp() + key = b"/test/k" + value = b"v" + sig, _ = sign_record(kp1.private_key, key, value) + _, author2 = sign_record(kp2.private_key, key, value) + assert not verify_record(sig, author2, key, value) From 9b359fd81267022b40745e7a1efc86bd9e26af69 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Thu, 14 May 2026 00:52:41 +0530 Subject: [PATCH 24/37] feat: Implement Bitswap 1.3.0 with payment gating - Added PaymentGatedDecisionEngine to handle payment-required logic for block serving. - Introduced PaymentTerms, PaymentAuthorization, PaymentReceipt, and PaymentRejection messages in the new bitswap_1_3_0.proto. - Enhanced existing MerkleDag class to store internal nodes with a callback for payment gating. - Created BitswapPaymentClient_1_3 to manage client-side payment authorizations and receipts. - Updated balanced_layout function to support payment gating and internal node storage. - Added necessary protobuf definitions and generated Python files for Bitswap 1.3.0. --- libp2p/bitswap/__init__.py | 4 + libp2p/bitswap/client.py | 191 +++++++++++++- libp2p/bitswap/config.py | 4 +- libp2p/bitswap/dag.py | 16 ++ libp2p/bitswap/dag_pb.py | 10 +- libp2p/bitswap/gated_decision_engine.py | 327 ++++++++++++++++++++++++ libp2p/bitswap/payment_client_1_3.py | 224 ++++++++++++++++ libp2p/bitswap/pb/bitswap_1_3_0.proto | 104 ++++++++ libp2p/bitswap/pb/bitswap_1_3_0_pb2.py | 46 ++++ libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi | 141 ++++++++++ 10 files changed, 1059 insertions(+), 8 deletions(-) create mode 100644 libp2p/bitswap/gated_decision_engine.py create mode 100644 libp2p/bitswap/payment_client_1_3.py create mode 100644 libp2p/bitswap/pb/bitswap_1_3_0.proto create mode 100644 libp2p/bitswap/pb/bitswap_1_3_0_pb2.py create mode 100644 libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi diff --git a/libp2p/bitswap/__init__.py b/libp2p/bitswap/__init__.py index dcad9d1aa..b3d838b94 100644 --- a/libp2p/bitswap/__init__.py +++ b/libp2p/bitswap/__init__.py @@ -33,6 +33,8 @@ from .block_service import BlockService from .block_store import BlockStore, FilesystemBlockStore, MemoryBlockStore +from .gated_decision_engine import PaymentGatedDecisionEngine +from .payment_client_1_3 import BitswapPaymentClient_1_3 from .cid import ( CID_V0, CID_V1, @@ -78,6 +80,8 @@ __all__ = [ # Core "BitswapClient", + "BitswapPaymentClient_1_3", + "PaymentGatedDecisionEngine", "BlockService", "BlockStore", "MemoryBlockStore", diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 3d3acefc0..52de979c6 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -1,6 +1,6 @@ """ Bitswap client implementation for block exchange. -Supports v1.0.0, v1.1.0, and v1.2.0 protocols. +Supports v1.0.0, v1.1.0, v1.2.0, and v1.3.0 protocols. """ from collections.abc import Sequence @@ -30,6 +30,7 @@ from .config import ( BITSWAP_PROTOCOL_V100, BITSWAP_PROTOCOL_V120, + BITSWAP_PROTOCOL_V130, BITSWAP_PROTOCOLS, DEFAULT_PRIORITY, DEFAULT_TIMEOUT, @@ -43,6 +44,7 @@ TimeoutError as BitswapTimeoutError, ) from .messages import create_message, create_wantlist_entry +from .pb.bitswap_1_3_0_pb2 import Message as Message_1_3 from .pb.bitswap_pb2 import Message from .provider_query import ProviderQueryManager @@ -53,8 +55,10 @@ class BitswapClient: """ Bitswap client for exchanging blocks with other peers. - Supports Bitswap protocol versions 1.0.0, 1.1.0, and 1.2.0 for content - discovery and file sharing in a peer-to-peer network. + Supports Bitswap protocol versions 1.0.0, 1.1.0, 1.2.0, and 1.3.0 for + content discovery and file sharing in a peer-to-peer network. + + For 1.3.0 payment support, pass a payment_client and payment_engine. """ def __init__( @@ -63,6 +67,8 @@ def __init__( block_store: BlockStore | None = None, protocol_version: str = BITSWAP_PROTOCOL_V120, provider_query_manager: ProviderQueryManager | None = None, + payment_client: Any = None, # BitswapPaymentClient_1_3 (optional) + payment_engine: Any = None, # PaymentGatedDecisionEngine (optional) ): """ Initialize Bitswap client. @@ -75,6 +81,10 @@ def __init__( DHT-based provider discovery. When supplied, ``get_block()`` will query the DHT for providers before broadcasting to all connected peers. + payment_client: Optional BitswapPaymentClient_1_3 for client-side + payment handling (auto-pays for blocks in 1.3.0 mode). + payment_engine: Optional PaymentGatedDecisionEngine for server-side + payment gating (gates block serving behind payment in 1.3.0 mode). """ self.host = host @@ -83,6 +93,10 @@ def __init__( self.provider_query_manager: ProviderQueryManager | None = ( provider_query_manager ) + # 1.3.0 payment components (optional) + self.payment_client = payment_client + self.payment_engine = payment_engine + self._wantlist: dict[ CIDObject, dict[str, Any] ] = {} # CID -> {priority, want_type, send_dont_have} @@ -234,8 +248,23 @@ async def get_blocks_batch( if data is not None: results[cid_obj.buffer] = data else: - cid_str = format_cid_for_display(cid_obj) - logger.warning(f"Block not received: {cid_str}") + # Block may have arrived late (e.g. after payment round-trip). + # Check if the pending event was set after the timeout fired. + event = self._pending_requests.get(cid_obj) + if event and event.is_set(): + data = await self.block_store.get_block(cid_obj) + if data is not None: + results[cid_obj.buffer] = data + logger.info( + f"Late block received (post-timeout): " + f"{format_cid_for_display(cid_obj)}" + ) + else: + cid_str = format_cid_for_display(cid_obj) + logger.warning(f"Block not received: {cid_str}") + else: + cid_str = format_cid_for_display(cid_obj) + logger.warning(f"Block not received: {cid_str}") # Cleanup if cid_obj in self._pending_requests: @@ -687,11 +716,78 @@ async def _process_message( self, msg: Message, peer_id: PeerID, stream: INetStream ) -> None: """Process a received Bitswap message.""" + peer_id_str = str(peer_id)[:16] + if msg.HasField("wantlist"): + logger.warning("=" * 70) + logger.warning(f"📥 RECEIVED WANTLIST from peer {peer_id_str}") + logger.warning(f" Entries: {len(msg.wantlist.entries)}") + logger.warning(f" Full: {msg.wantlist.full}") + logger.warning("=" * 70) + print(f"\n📥 RECEIVED WANTLIST from peer {peer_id_str} with {len(msg.wantlist.entries)} entries", flush=True) + # Detect peer protocol version from stream protocol = stream.get_protocol() if protocol: self._peer_protocols[peer_id] = str(protocol) + peer_protocol = str(protocol) if protocol else BITSWAP_PROTOCOL_V100 + + # ── Bitswap 1.3.0 payment message handling ─────────────────────── + if peer_protocol == str(BITSWAP_PROTOCOL_V130): + # Re-parse as 1.3.0 message to access payment fields + msg_1_3 = Message_1_3() + try: + msg_1_3.ParseFromString(msg.SerializeToString()) + except Exception: + msg_1_3 = None + + if msg_1_3 is not None: + # Client-side: handle PaymentTerms / PaymentReceipts / PaymentRejections + if self.payment_client and ( + msg_1_3.payment_terms + or msg_1_3.payment_receipts + or msg_1_3.payment_rejections + ): + response = await self.payment_client.process_incoming_message( + str(peer_id), msg_1_3 + ) + if response is not None: + await self._write_message_bytes( + stream, response.SerializeToString() + ) + + # Process any blocks delivered alongside a payment receipt + if msg_1_3.payload: + await self._process_blocks_v110(msg_1_3.payload) + if msg_1_3.blocks: + await self._process_blocks_v100(list(msg_1_3.blocks), peer_id) + + # Server-side: handle PaymentAuthorizations + if self.payment_engine and msg_1_3.payment_authorizations: + response = await self.payment_engine.process_incoming_1_3_message( + str(peer_id), msg_1_3 + ) + if response is not None: + await self._write_message_bytes( + stream, response.SerializeToString() + ) + + # Handle PaymentRequired block presences specially + if msg_1_3.blockPresences: + await self._process_block_presences_1_3( + msg_1_3.blockPresences, peer_id + ) + # Don't fall through to normal presence processing + # (already handled above) + if msg.HasField("wantlist"): + await self._process_wantlist(msg.wantlist, peer_id, stream) + if msg.blocks: + await self._process_blocks_v100(list(msg.blocks), peer_id) + if msg.payload: + await self._process_blocks_v110(msg.payload) + return + + # ── Standard 1.0.0–1.2.0 message handling ──────────────────────── # Process wantlist if msg.HasField("wantlist"): await self._process_wantlist(msg.wantlist, peer_id, stream) @@ -725,6 +821,35 @@ async def _process_wantlist( # Get peer protocol for response format peer_protocol = self._peer_protocols.get(peer_id, BITSWAP_PROTOCOL_V100) + # ── 1.3.0 payment-gated wantlist handling ────────────────────────── + if str(peer_protocol) == str(BITSWAP_PROTOCOL_V130) and self.payment_engine: + for entry in wantlist.entries: + entry_cid = parse_cid(entry.block) + if entry.cancel: + if entry_cid in peer_wantlist: + del peer_wantlist[entry_cid] + continue + + peer_wantlist[entry_cid] = { + "priority": entry.priority, + "want_type": entry.wantType, + "send_dont_have": entry.sendDontHave, + } + + response_msg = await self.payment_engine.handle_want( + peer_id=str(peer_id), + cid=entry.block, + want_type=entry.wantType, + send_dont_have=entry.sendDontHave, + peer_protocol=str(BITSWAP_PROTOCOL_V130), + ) + if response_msg is not None: + await self._write_message_bytes( + stream, response_msg.SerializeToString() + ) + return + + # ── Standard 1.0.0–1.2.0 wantlist handling ──────────────────────── # Process entries blocks_to_send_v100 = [] # For v1.0.0 blocks_to_send_v110 = [] # For v1.1.0+ @@ -1086,3 +1211,59 @@ async def _write_message(self, stream: INetStream, msg: Message) -> None: # Write length prefix and message length_prefix = varint.encode(len(msg_bytes)) await stream.write(length_prefix + msg_bytes) + + async def _write_message_bytes( + self, stream: INetStream, msg_bytes: bytes + ) -> None: + """ + Write pre-serialized message bytes (for 1.3.0 Message_1_3 objects). + """ + if len(msg_bytes) > MAX_MESSAGE_SIZE: + raise MessageTooLargeError( + f"Message size {len(msg_bytes)} exceeds maximum {MAX_MESSAGE_SIZE}" + ) + length_prefix = varint.encode(len(msg_bytes)) + await stream.write(length_prefix + msg_bytes) + + async def _process_block_presences_1_3( + self, presences: Any, peer_id: PeerID + ) -> None: + """ + Process block presences from a 1.3.0 message. + Handles PaymentRequired (type=2) in addition to Have/DontHave. + """ + for presence in presences: + cid_bytes = bytes(presence.cid) + try: + cid = parse_cid(cid_bytes) + except Exception: + continue + + presence_type = presence.type + + if presence_type == 0: # Have + if peer_id not in self._expected_blocks: + self._expected_blocks[peer_id] = set() + self._expected_blocks[peer_id].add(cid) + logger.debug( + f"[1.3.0] Peer {peer_id} has block " + f"{format_cid_for_display(cid, max_len=16)}" + ) + elif presence_type == 1: # DontHave + if cid not in self._dont_have_responses: + self._dont_have_responses[cid] = set() + self._dont_have_responses[cid].add(peer_id) + logger.info( + f"[1.3.0] Peer {peer_id} doesn't have block " + f"{format_cid_for_display(cid, max_len=16)}" + ) + elif presence_type == 2: # PaymentRequired + logger.info( + f"[1.3.0] Peer {peer_id} requires payment for block " + f"{format_cid_for_display(cid, max_len=16)} " + f"(PaymentTerms will follow in same message)" + ) + # The payment_client will handle PaymentTerms + # in process_incoming_message + + diff --git a/libp2p/bitswap/config.py b/libp2p/bitswap/config.py index 6fc3f2bfb..5de84aca5 100644 --- a/libp2p/bitswap/config.py +++ b/libp2p/bitswap/config.py @@ -8,9 +8,11 @@ BITSWAP_PROTOCOL_V100 = TProtocol("/ipfs/bitswap/1.0.0") BITSWAP_PROTOCOL_V110 = TProtocol("/ipfs/bitswap/1.1.0") BITSWAP_PROTOCOL_V120 = TProtocol("/ipfs/bitswap/1.2.0") +BITSWAP_PROTOCOL_V130 = TProtocol("/ipfs/bitswap/1.3.0") # All supported protocols (ordered from newest to oldest for negotiation) BITSWAP_PROTOCOLS = [ + BITSWAP_PROTOCOL_V130, BITSWAP_PROTOCOL_V120, BITSWAP_PROTOCOL_V110, BITSWAP_PROTOCOL_V100, @@ -28,7 +30,7 @@ MAX_BLOCK_SIZE = 63 * 1024 # Default timeout for operations (in seconds) -DEFAULT_TIMEOUT = 30 +DEFAULT_TIMEOUT = 90 # Maximum number of concurrent block requests MAX_CONCURRENT_REQUESTS = 100 diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 9283fdcf9..5a6087ebe 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -327,6 +327,22 @@ async def add_file( ) root_cid, root_data = balanced_layout(leaf_triples) + # Create a sync wrapper for the async _put_block method + # We'll collect (cid, data) pairs and store them after + internal_nodes: list[tuple[bytes, bytes]] = [] + + def store_internal_node(cid: bytes, data: bytes) -> None: + """Callback to collect internal nodes for storage.""" + internal_nodes.append((cid, data)) + + root_cid, root_data = balanced_layout(leaf_triples, put_block_callback=store_internal_node) + + # Store all internal nodes + logger.info(f"Storing {len(internal_nodes)} internal DAG nodes...") + for cid, data in internal_nodes: + await self._put_block(cid, data) + + # Store the root node await self._put_block(root_cid, root_data) # Enhanced logging for root CID diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index 164add080..08d22e0c4 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -5,7 +5,7 @@ which is used by IPFS to represent files and directories as Merkle DAGs. """ -from collections.abc import Sequence +from collections.abc import Callable, Sequence from dataclasses import dataclass, field import logging @@ -321,6 +321,7 @@ def create_leaf_node(data: bytes) -> bytes: def balanced_layout( leaves: list[tuple[bytes, bytes, int]], max_links: int = MAX_LINKS_PER_NODE, + put_block_callback: Callable[[bytes, bytes], None] | None = None, ) -> tuple[bytes, bytes]: """ Build a balanced Merkle DAG from a flat list of leaf blocks. @@ -336,7 +337,8 @@ def balanced_layout( - file_data_size: Size of the raw file data inside this leaf (i.e. len(original chunk), NOT len(block)) max_links: Max links per internal node (default 174, matches Kubo) - + put_block_callback: Optional async callback to store each internal node + Signature: callback(cid_bytes, block_bytes) Returns: (root_cid_bytes, root_block_bytes) @@ -381,6 +383,10 @@ def balanced_layout( ) internal_block = encode_dag_pb(internal_links, unixfs_data) internal_cid = compute_cid_v1(internal_block, codec=CODEC_DAG_PB) + + # Store internal node if callback provided + if put_block_callback is not None: + put_block_callback(internal_cid, internal_block) # cumulative size = own block + sum of children's cumulative sizes cum_size = len(internal_block) + total_cum next_level.append((internal_cid, internal_block, total_filesize, cum_size)) diff --git a/libp2p/bitswap/gated_decision_engine.py b/libp2p/bitswap/gated_decision_engine.py new file mode 100644 index 000000000..827cffeed --- /dev/null +++ b/libp2p/bitswap/gated_decision_engine.py @@ -0,0 +1,327 @@ +""" +Payment-Gated Decision Engine for Bitswap 1.3.0. + +Extends the standard Bitswap block serving logic with payment gating: +- If a block is free (small), serve it directly. +- If a block requires payment and the peer has NOT paid, respond with + PaymentRequired (type=2) + PaymentTerms in-band (1.3.0 path) or + DONT_HAVE + side-channel (1.2.0 fallback path). +- If the peer HAS paid, serve the block normally. + +This module lives in py-libp2p so it's importable as libp2p.bitswap. +""" + +import logging +import os +import time +from typing import Any, Optional + +from libp2p.bitswap.block_store import BlockStore +from libp2p.bitswap.cid import parse_cid +from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 +from libp2p.bitswap.pb.bitswap_pb2 import Message as Message_1_2 + +logger = logging.getLogger(__name__) + +BITSWAP_PROTOCOL_V120 = "/ipfs/bitswap/1.2.0" +BITSWAP_PROTOCOL_V130 = "/ipfs/bitswap/1.3.0" + + +class PaymentGatedDecisionEngine: + """ + Decides whether to serve a block or gate it behind payment. + + Integrates with: + - gooseswarm.payments.ledger.PaymentLedger — tracks paid (peer, cid) pairs + - gooseswarm.payments.pricing.BlockPricingEngine — computes prices + - gooseswarm.payments.facilitator.FacilitatorClient — verifies EIP-712 sigs + + Usage: + engine = PaymentGatedDecisionEngine( + blockstore=my_blockstore, + ledger=my_ledger, + pricing=my_pricing, + facilitator=my_facilitator, + server_wallet="0x...", + ) + # Wire into BitswapClient as a message handler + """ + + def __init__( + self, + blockstore: BlockStore, + ledger: Any, # gooseswarm.payments.ledger.PaymentLedger + pricing: Any, # gooseswarm.payments.pricing.BlockPricingEngine + facilitator: Any, # gooseswarm.payments.facilitator.FacilitatorClient + server_wallet: str = "", + host: Any = None, + ): + self.blockstore = blockstore + self.ledger = ledger + self.pricing = pricing + self.facilitator = facilitator + self.server_wallet = server_wallet or ( + facilitator.server_wallet if facilitator else "" + ) + self.host = host + + # Pending payment offers: nonce_bytes → offer_dict + self._pending_offers: dict[bytes, dict] = {} + + # Callbacks for sending messages back to peers + # Set externally: engine.send_message_callback = async_fn(peer_id, msg_bytes) + self.send_message_callback = None + + async def handle_want( + self, + peer_id: str, + cid: str | bytes, + want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE + send_dont_have: bool, + peer_protocol: str = BITSWAP_PROTOCOL_V120, + ) -> Optional[Message_1_3 | Message_1_2]: + """ + Process a WANT request from a peer. + + Returns a Message to send back, or None if nothing should be sent. + """ + cid_str = _cid_to_str(cid) + cid_bytes = _cid_to_bytes(cid) + cid_obj = parse_cid(cid_bytes) + + # Check blockstore + block_data = await self.blockstore.get_block(cid_obj) + + if block_data is None: + # We don't have the block + if send_dont_have: + return self._make_dont_have(cid_bytes, peer_protocol) + return None + + block_size = len(block_data) + + # Compute price + price = self.pricing.compute_price(cid_str, block_size) + + if price == 0 or self.ledger.is_paid(peer_id, cid_str, block_size): + # Free block or already paid — serve it + if want_type == 1: # WANT_HAVE + return self._make_have(cid_bytes, peer_protocol) + else: # WANT_BLOCK + return self._make_block_response(cid_bytes, block_data, peer_protocol) + else: + # Payment required + if peer_protocol == BITSWAP_PROTOCOL_V130: + return await self._make_payment_required_1_3( + peer_id, cid_bytes, block_size, price + ) + else: + # 1.2.0 fallback: send DONT_HAVE (side-channel is handled separately) + if send_dont_have: + return self._make_dont_have(cid_bytes, peer_protocol) + return None + + async def handle_payment_authorization( + self, + peer_id: str, + auth: Any, # pb_1_3.Message.PaymentAuthorization + ) -> Message_1_3: + """ + Process a PaymentAuthorization from a client. + Returns a PaymentReceipt or PaymentRejection message. + """ + nonce = bytes(auth.nonce) + + # Validate against pending offer + offer = self._pending_offers.pop(nonce, None) + if offer is None: + logger.warning(f"No pending offer for nonce {nonce.hex()[:10]}... from {peer_id[:20]}...") + return self._make_payment_rejection(auth.cid, "NO_PENDING_OFFER") + + if offer["peer_id"] != peer_id: + return self._make_payment_rejection(auth.cid, "PEER_MISMATCH") + + # Check nonce replay + if self.ledger.is_nonce_used(nonce): + return self._make_payment_rejection(auth.cid, "NONCE_USED") + + # Check amount + if auth.value < offer["amount"]: + reason = f"WRONG_AMOUNT:need={offer['amount']},got={auth.value}" + return self._make_payment_rejection(auth.cid, reason) + + # Check expiry + if offer["valid_before"] < int(time.time()): + return self._make_payment_rejection(auth.cid, "EXPIRED") + + # Verify EIP-712 signature + result = await self.facilitator.verify( + from_address=auth.from_address, + to_address=auth.to_address, + value=auth.value, + valid_after=auth.valid_after, + valid_before=auth.valid_before, + nonce=nonce, + v=auth.v, + r=bytes(auth.r), + s=bytes(auth.s), + ) + + if not result.valid: + return self._make_payment_rejection(auth.cid, result.error) + + # Record payment in ledger + try: + await self.ledger.record_payment( + peer_id=peer_id, + cid=bytes(auth.cid), + tx_hash=result.tx_hash, + amount=auth.value, + nonce=nonce, + ) + except ValueError as e: + return self._make_payment_rejection(auth.cid, str(e)) + + # Send PaymentReceipt + the block data + msg = Message_1_3() + receipt = msg.payment_receipts.add() + receipt.cid = bytes(auth.cid) + receipt.tx_hash = result.tx_hash + receipt.expires = int(time.time()) + 86400 * 7 # 7 days + + # Include the paid block in the response + block_data = await self.blockstore.get_block(parse_cid(bytes(auth.cid))) + if block_data is not None: + block_entry = msg.payload.add() + block_entry.prefix = bytes(auth.cid)[:4] + block_entry.data = block_data + logger.info( + f"Payment accepted + block sent to {peer_id[:20]}... " + f"cid={bytes(auth.cid).hex()[:20]}... amount={auth.value} " + f"size={len(block_data)} bytes" + ) + else: + logger.warning( + f"Payment accepted but block not found locally: " + f"cid={bytes(auth.cid).hex()[:20]}..." + ) + logger.info( + f"Payment accepted from {peer_id[:20]}... " + f"cid={bytes(auth.cid).hex()[:20]}... amount={auth.value}" + ) + return msg + + async def process_incoming_1_3_message( + self, peer_id: str, msg: Message_1_3 + ) -> Optional[Message_1_3]: + """ + Process an incoming 1.3.0 message that may contain PaymentAuthorizations. + Returns a response message or None. + """ + if msg.payment_authorizations: + # Process the first authorization (typically one per message) + for auth in msg.payment_authorizations: + return await self.handle_payment_authorization(peer_id, auth) + return None + + # ── Internal helpers ────────────────────────────────────────────────── + + async def _make_payment_required_1_3( + self, + peer_id: str, + cid_bytes: bytes, + block_size: int, + amount: int, + ) -> Message_1_3: + """Build a 1.3.0 PaymentRequired message with embedded PaymentTerms.""" + nonce = os.urandom(32) + valid_before = int(time.time()) + 120 # 2 minute window + + # Store pending offer for when PaymentAuthorization arrives + self._pending_offers[nonce] = { + "peer_id": peer_id, + "cid": cid_bytes, + "amount": amount, + "valid_before": valid_before, + } + + msg = Message_1_3() + + # BlockPresence with type=2 (PaymentRequired) + presence = msg.blockPresences.add() + presence.cid = cid_bytes + presence.type = Message_1_3.PaymentRequired # = 2 + + # PaymentTerms in field 6 + terms = msg.payment_terms.add() + terms.cid = cid_bytes + terms.asset = self.facilitator.usdc_address if self.facilitator else "" + terms.pay_to = self.server_wallet + terms.amount = amount + terms.network = getattr(self.facilitator, "network", "base-sepolia") + terms.nonce = nonce + terms.valid_before = valid_before + terms.block_size = block_size + terms.description = ( + f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB)" + ) + terms.scheme = "exact" + + logger.info( + f"Sending PaymentRequired to {peer_id[:20]}... " + f"cid={cid_bytes.hex()[:20]}... amount={amount} units" + ) + return msg + + def _make_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1_2: + MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 + msg = MsgClass() + presence = msg.blockPresences.add() + presence.cid = cid_bytes + presence.type = MsgClass.Have # = 0 + return msg + + def _make_dont_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1_2: + MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 + msg = MsgClass() + presence = msg.blockPresences.add() + presence.cid = cid_bytes + presence.type = MsgClass.DontHave # = 1 + return msg + + def _make_block_response( + self, cid_bytes: bytes, block_data: bytes, protocol: str + ) -> Message_1_3 | Message_1_2: + MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 + msg = MsgClass() + block = msg.payload.add() + block.data = block_data + # CID prefix: first 4 bytes of CID bytes (version + codec) + block.prefix = cid_bytes[:4] if len(cid_bytes) >= 4 else cid_bytes + return msg + + def _make_payment_rejection( + self, cid_bytes: bytes, reason: str + ) -> Message_1_3: + msg = Message_1_3() + rej = msg.payment_rejections.add() + rej.cid = bytes(cid_bytes) + rej.reason = reason + logger.warning(f"Payment rejected: cid={bytes(cid_bytes).hex()[:20]}... reason={reason}") + return msg + + +def _cid_to_str(cid: str | bytes) -> str: + if isinstance(cid, bytes): + return cid.hex() + return cid + + +def _cid_to_bytes(cid: str | bytes) -> bytes: + if isinstance(cid, str): + # Try hex decode first + try: + return bytes.fromhex(cid.lstrip("0x")) + except ValueError: + return cid.encode() + return cid diff --git a/libp2p/bitswap/payment_client_1_3.py b/libp2p/bitswap/payment_client_1_3.py new file mode 100644 index 000000000..22e542a41 --- /dev/null +++ b/libp2p/bitswap/payment_client_1_3.py @@ -0,0 +1,224 @@ +""" +Bitswap 1.3.0 Payment Client. + +Client-side handler for in-band payment messages. When the server sends +a PAYMENT_REQUIRED response with PaymentTerms, this client: +1. Validates the price is acceptable +2. Signs an EIP-3009 authorization +3. Sends back a PaymentAuthorization in the same Bitswap stream +4. On receipt of PaymentReceipt, triggers a WANT_BLOCK retry + +This module lives in py-libp2p so it's importable as libp2p.bitswap. +""" + +import logging +from typing import Any, Callable, Optional + +from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 + +logger = logging.getLogger(__name__) + +# Default maximum auto-pay threshold: $0.001 USDC = 1000 micro-units +DEFAULT_MAX_AUTO_PAY_UNITS = 1000 + + +class BitswapPaymentClient_1_3: + """ + Client-side handler for Bitswap 1.3.0 payment messages. + + Processes PaymentTerms from incoming messages and auto-pays if the + amount is within the configured threshold. + + Args: + signer: An EIP3009Signer instance (gooseswarm.payments.eip3009_signer) + want_manager: Object with retry_want_block(peer_id, cid) async method + max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) + send_callback: Async function(peer_id, msg_bytes) to send responses + + """ + + def __init__( + self, + signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer + want_manager: Any, # has retry_want_block(peer_id, cid) method + max_auto_pay_usdc: float = 0.001, + send_callback: Optional[Callable] = None, + ): + self.signer = signer + self.want_manager = want_manager + self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) + self.send_callback = send_callback + + # Pending payments: nonce_hex → {peer_id, cid, terms} + self._pending_payments: dict[str, dict] = {} + + async def process_incoming_message( + self, peer_id: str, msg: Message_1_3 + ) -> Optional[Message_1_3]: + """ + Called by the Bitswap dispatcher for every incoming 1.3.0 message. + + Handles: + - PaymentTerms → sign and send PaymentAuthorization + - PaymentReceipts → retry WANT_BLOCK + - PaymentRejections → log and surface to application + + Returns a response Message to send back, or None. + """ + # Handle payment terms (server telling us what a block costs) + if msg.payment_terms: + for terms in msg.payment_terms: + response = await self._handle_payment_terms(peer_id, terms) + if response: + return response + + # Handle receipts (server confirming our payment) + for receipt in msg.payment_receipts: + await self._handle_payment_receipt(peer_id, receipt) + + # Handle rejections + for rejection in msg.payment_rejections: + self._handle_payment_rejection(peer_id, rejection) + + return None + + async def build_payment_auth_msg( + self, terms: Any # Message_1_3.PaymentTerms + ) -> Message_1_3: + """ + Build a PaymentAuthorization message for the given PaymentTerms. + Used by tests and demo scripts. + """ + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=terms.amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + + msg = Message_1_3() + auth = msg.payment_authorizations.add() + auth.cid = bytes(terms.cid) + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = terms.amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + return msg + + # ── Internal handlers ───────────────────────────────────────────────── + + async def _handle_payment_terms( + self, peer_id: str, terms: Any + ) -> Optional[Message_1_3]: + """ + Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. + Decide whether to pay and send back a PaymentAuthorization. + """ + amount = terms.amount + + # Reject if too expensive + if amount > self.max_auto_pay_units: + logger.info( + f"Block too expensive: {amount} units > max {self.max_auto_pay_units} units. " + f"Skipping — will seek block elsewhere." + ) + return None + + # Validate pricing isn't a lie (10% tolerance) + expected_amount = self._expected_price(terms.block_size) + if expected_amount > 0 and amount > expected_amount * 1.1: + logger.warning( + f"Server overcharging: asked {amount}, expected ~{expected_amount}. " + f"Skipping payment." + ) + return None + + # Sign EIP-3009 authorization + try: + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + except Exception as e: + logger.error(f"Failed to sign payment authorization: {e}") + return None + + # Build PaymentAuthorization message + response = Message_1_3() + auth = response.payment_authorizations.add() + auth.cid = bytes(terms.cid) + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + + # Track pending payment + nonce_hex = bytes(terms.nonce).hex() + self._pending_payments[nonce_hex] = { + "peer_id": peer_id, + "cid": bytes(terms.cid).hex(), + "amount": amount, + } + + logger.info( + f"Sending PaymentAuthorization to {peer_id[:20]}... " + f"cid={bytes(terms.cid).hex()[:20]}... amount={amount} units " + f"(${amount / 1_000_000:.6f} USDC)" + ) + return response + + async def _handle_payment_receipt( + self, peer_id: str, receipt: Any + ) -> None: + """Server confirmed payment. Retry the WANT_BLOCK immediately.""" + cid_hex = ( + bytes(receipt.cid).hex() + if isinstance(receipt.cid, bytes) + else receipt.cid + ) + logger.info( + f"Payment receipt received from {peer_id[:20]}... " + f"cid={cid_hex[:20]}... tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." + ) + # Trigger want manager to retry + if self.want_manager: + try: + await self.want_manager.retry_want_block(peer_id, cid_hex) + except Exception as e: + logger.error(f"Failed to retry want block: {e}") + + def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: + """Log and surface payment rejection.""" + cid_hex = ( + bytes(rejection.cid).hex() + if isinstance(rejection.cid, bytes) + else rejection.cid + ) + logger.warning( + f"Payment rejected by {peer_id[:20]}... " + f"cid={cid_hex[:20]}... reason={rejection.reason}" + ) + + def _expected_price(self, block_size_bytes: int) -> int: + """ + Client-side price oracle — must roughly match server pricing. + Used to detect overcharging. + """ + if block_size_bytes <= 4096: + return 0 + kb = block_size_bytes / 1024 + return int(kb * 10) # 10 units per KB baseline diff --git a/libp2p/bitswap/pb/bitswap_1_3_0.proto b/libp2p/bitswap/pb/bitswap_1_3_0.proto new file mode 100644 index 000000000..bd3196efb --- /dev/null +++ b/libp2p/bitswap/pb/bitswap_1_3_0.proto @@ -0,0 +1,104 @@ +// bitswap_1_3_0.proto +// Bitswap 1.3.0 — adds PAYMENT_REQUIRED block presence and in-band payment flow +// Backward compatible with 1.2.0: new fields use field numbers 6, 7, 8, 9 +// New enum value PaymentRequired = 2 (proto3 open enums — safe for old parsers) + +syntax = "proto3"; + +package bitswap.pb.v130; + +message Message { + + // ─── EXISTING: Wantlist (unchanged from 1.2.0) ───────────────────────── + message Wantlist { + enum WantType { + Block = 0; // default: client wants the full block + Have = 1; // client only wants to know if server has it + } + message Entry { + bytes block = 1; // CID bytes (CIDv1 binary) + int32 priority = 2; // higher = serve first; default 1 + bool cancel = 3; // true = remove from wantlist + WantType wantType = 4; // Block or Have + bool sendDontHave = 5; // server MUST respond DONT_HAVE if missing + } + repeated Entry entries = 1; + bool full = 2; // true = authoritative wantlist replacement + } + + // ─── EXISTING: Block payload (unchanged from 1.1.0) ───────────────────── + message Block { + bytes prefix = 1; // CID prefix: version + codec varint + bytes data = 2; // raw block bytes + } + + // ─── EXTENDED: BlockPresenceType — NEW value PaymentRequired = 2 ──────── + enum BlockPresenceType { + Have = 0; // server has the block + DontHave = 1; // server genuinely does not have the block + PaymentRequired = 2; // [NEW 1.3.0] server has the block but requires payment + // Old parsers: see integer 2, no matching case → skip entry + } + + // ─── EXISTING: BlockPresence (unchanged structure, extended enum) ──────── + message BlockPresence { + bytes cid = 1; + BlockPresenceType type = 2; // Now can be 0, 1, or 2 + } + + // ─── NEW 1.3.0: PaymentTerms — embedded in Message when type=PaymentRequired + message PaymentTerms { + bytes cid = 1; // CID of the gated block + string asset = 2; // Token contract address + string pay_to = 3; // Server's wallet address + uint64 amount = 4; // Amount in token's smallest unit + string network = 5; // Chain identifier: "base-sepolia" | "base-mainnet" + bytes nonce = 6; // Random 32 bytes — per-offer, prevents replay attacks + uint64 valid_before = 7; // Unix timestamp: offer expires after this + uint64 block_size = 8; // Actual block size in bytes + string description = 9; // Human-readable description + string scheme = 10; // Payment scheme: "exact" (EIP-3009) + } + + // ─── NEW 1.3.0: PaymentAuthorization — client's signed proof of payment + message PaymentAuthorization { + bytes cid = 1; // CID being paid for + string from_address = 2; // Client's wallet address + string to_address = 3; // Must match PaymentTerms.pay_to + uint64 value = 4; // Must be >= PaymentTerms.amount + uint64 valid_after = 5; // EIP-3009 validAfter (typically 0) + uint64 valid_before = 6; // EIP-3009 validBefore + bytes nonce = 7; // Must match PaymentTerms.nonce exactly + uint32 v = 8; // ECDSA signature recovery id (27 or 28) + bytes r = 9; // ECDSA r component (32 bytes) + bytes s = 10; // ECDSA s component (32 bytes) + string scheme = 11; // Must match PaymentTerms.scheme + } + + // ─── NEW 1.3.0: PaymentReceipt — server confirms payment accepted + message PaymentReceipt { + bytes cid = 1; // CID now authorized to be served + string tx_hash = 2; // On-chain tx hash (empty in OPTIMISTIC mode) + uint64 expires = 3; // Unix ts: this authorization is valid until this time + } + + // ─── NEW 1.3.0: PaymentRejection — server rejects a PaymentAuthorization + message PaymentRejection { + bytes cid = 1; + string reason = 2; // "INVALID_SIGNATURE" | "WRONG_AMOUNT" | "NONCE_USED" | "EXPIRED" + } + + // ─── TOP-LEVEL MESSAGE FIELDS ────────────────────────────────────────── + // Fields 1-5: identical to Bitswap 1.2.0 (never modified) + Wantlist wantlist = 1; + repeated bytes blocks = 2; // deprecated since 1.1.0 + repeated Block payload = 3; + repeated BlockPresence blockPresences = 4; // type=2 means PAYMENT_REQUIRED + int32 pendingBytes = 5; + + // Fields 6-9: NEW in 1.3.0 (safe unknown fields for old parsers) + repeated PaymentTerms payment_terms = 6; // server → client + repeated PaymentAuthorization payment_authorizations = 7; // client → server + repeated PaymentReceipt payment_receipts = 8; // server → client + repeated PaymentRejection payment_rejections = 9; // server → client +} diff --git a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.py b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.py new file mode 100644 index 000000000..24b07bf75 --- /dev/null +++ b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: bitswap_1_3_0.proto +# Protobuf Python Version: 4.25.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x62itswap_1_3_0.proto\x12\x0f\x62itswap.pb.v130\"\xaa\x0b\n\x07Message\x12\x33\n\x08wantlist\x18\x01 \x01(\x0b\x32!.bitswap.pb.v130.Message.Wantlist\x12\x0e\n\x06\x62locks\x18\x02 \x03(\x0c\x12/\n\x07payload\x18\x03 \x03(\x0b\x32\x1e.bitswap.pb.v130.Message.Block\x12>\n\x0e\x62lockPresences\x18\x04 \x03(\x0b\x32&.bitswap.pb.v130.Message.BlockPresence\x12\x14\n\x0cpendingBytes\x18\x05 \x01(\x05\x12<\n\rpayment_terms\x18\x06 \x03(\x0b\x32%.bitswap.pb.v130.Message.PaymentTerms\x12M\n\x16payment_authorizations\x18\x07 \x03(\x0b\x32-.bitswap.pb.v130.Message.PaymentAuthorization\x12\x41\n\x10payment_receipts\x18\x08 \x03(\x0b\x32\'.bitswap.pb.v130.Message.PaymentReceipt\x12\x45\n\x12payment_rejections\x18\t \x03(\x0b\x32).bitswap.pb.v130.Message.PaymentRejection\x1a\x82\x02\n\x08Wantlist\x12\x38\n\x07\x65ntries\x18\x01 \x03(\x0b\x32\'.bitswap.pb.v130.Message.Wantlist.Entry\x12\x0c\n\x04\x66ull\x18\x02 \x01(\x08\x1a\x8c\x01\n\x05\x45ntry\x12\r\n\x05\x62lock\x18\x01 \x01(\x0c\x12\x10\n\x08priority\x18\x02 \x01(\x05\x12\x0e\n\x06\x63\x61ncel\x18\x03 \x01(\x08\x12<\n\x08wantType\x18\x04 \x01(\x0e\x32*.bitswap.pb.v130.Message.Wantlist.WantType\x12\x14\n\x0csendDontHave\x18\x05 \x01(\x08\"\x1f\n\x08WantType\x12\t\n\x05\x42lock\x10\x00\x12\x08\n\x04Have\x10\x01\x1a%\n\x05\x42lock\x12\x0e\n\x06prefix\x18\x01 \x01(\x0c\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\x1aV\n\rBlockPresence\x12\x0b\n\x03\x63id\x18\x01 \x01(\x0c\x12\x38\n\x04type\x18\x02 \x01(\x0e\x32*.bitswap.pb.v130.Message.BlockPresenceType\x1a\xb9\x01\n\x0cPaymentTerms\x12\x0b\n\x03\x63id\x18\x01 \x01(\x0c\x12\r\n\x05\x61sset\x18\x02 \x01(\t\x12\x0e\n\x06pay_to\x18\x03 \x01(\t\x12\x0e\n\x06\x61mount\x18\x04 \x01(\x04\x12\x0f\n\x07network\x18\x05 \x01(\t\x12\r\n\x05nonce\x18\x06 \x01(\x0c\x12\x14\n\x0cvalid_before\x18\x07 \x01(\x04\x12\x12\n\nblock_size\x18\x08 \x01(\x04\x12\x13\n\x0b\x64\x65scription\x18\t \x01(\t\x12\x0e\n\x06scheme\x18\n \x01(\t\x1a\xc7\x01\n\x14PaymentAuthorization\x12\x0b\n\x03\x63id\x18\x01 \x01(\x0c\x12\x14\n\x0c\x66rom_address\x18\x02 \x01(\t\x12\x12\n\nto_address\x18\x03 \x01(\t\x12\r\n\x05value\x18\x04 \x01(\x04\x12\x13\n\x0bvalid_after\x18\x05 \x01(\x04\x12\x14\n\x0cvalid_before\x18\x06 \x01(\x04\x12\r\n\x05nonce\x18\x07 \x01(\x0c\x12\t\n\x01v\x18\x08 \x01(\r\x12\t\n\x01r\x18\t \x01(\x0c\x12\t\n\x01s\x18\n \x01(\x0c\x12\x0e\n\x06scheme\x18\x0b \x01(\t\x1a?\n\x0ePaymentReceipt\x12\x0b\n\x03\x63id\x18\x01 \x01(\x0c\x12\x0f\n\x07tx_hash\x18\x02 \x01(\t\x12\x0f\n\x07\x65xpires\x18\x03 \x01(\x04\x1a/\n\x10PaymentRejection\x12\x0b\n\x03\x63id\x18\x01 \x01(\x0c\x12\x0e\n\x06reason\x18\x02 \x01(\t\"@\n\x11\x42lockPresenceType\x12\x08\n\x04Have\x10\x00\x12\x0c\n\x08\x44ontHave\x10\x01\x12\x13\n\x0fPaymentRequired\x10\x02\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'bitswap_1_3_0_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _globals['_MESSAGE']._serialized_start=41 + _globals['_MESSAGE']._serialized_end=1491 + _globals['_MESSAGE_WANTLIST']._serialized_start=536 + _globals['_MESSAGE_WANTLIST']._serialized_end=794 + _globals['_MESSAGE_WANTLIST_ENTRY']._serialized_start=621 + _globals['_MESSAGE_WANTLIST_ENTRY']._serialized_end=761 + _globals['_MESSAGE_WANTLIST_WANTTYPE']._serialized_start=763 + _globals['_MESSAGE_WANTLIST_WANTTYPE']._serialized_end=794 + _globals['_MESSAGE_BLOCK']._serialized_start=796 + _globals['_MESSAGE_BLOCK']._serialized_end=833 + _globals['_MESSAGE_BLOCKPRESENCE']._serialized_start=835 + _globals['_MESSAGE_BLOCKPRESENCE']._serialized_end=921 + _globals['_MESSAGE_PAYMENTTERMS']._serialized_start=924 + _globals['_MESSAGE_PAYMENTTERMS']._serialized_end=1109 + _globals['_MESSAGE_PAYMENTAUTHORIZATION']._serialized_start=1112 + _globals['_MESSAGE_PAYMENTAUTHORIZATION']._serialized_end=1311 + _globals['_MESSAGE_PAYMENTRECEIPT']._serialized_start=1313 + _globals['_MESSAGE_PAYMENTRECEIPT']._serialized_end=1376 + _globals['_MESSAGE_PAYMENTREJECTION']._serialized_start=1378 + _globals['_MESSAGE_PAYMENTREJECTION']._serialized_end=1425 + _globals['_MESSAGE_BLOCKPRESENCETYPE']._serialized_start=1427 + _globals['_MESSAGE_BLOCKPRESENCETYPE']._serialized_end=1491 +# @@protoc_insertion_point(module_scope) diff --git a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi new file mode 100644 index 000000000..1864ea639 --- /dev/null +++ b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi @@ -0,0 +1,141 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Message(_message.Message): + __slots__ = ("wantlist", "blocks", "payload", "blockPresences", "pendingBytes", "payment_terms", "payment_authorizations", "payment_receipts", "payment_rejections") + class BlockPresenceType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + Have: _ClassVar[Message.BlockPresenceType] + DontHave: _ClassVar[Message.BlockPresenceType] + PaymentRequired: _ClassVar[Message.BlockPresenceType] + Have: Message.BlockPresenceType + DontHave: Message.BlockPresenceType + PaymentRequired: Message.BlockPresenceType + class Wantlist(_message.Message): + __slots__ = ("entries", "full") + class WantType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + Block: _ClassVar[Message.Wantlist.WantType] + Have: _ClassVar[Message.Wantlist.WantType] + Block: Message.Wantlist.WantType + Have: Message.Wantlist.WantType + class Entry(_message.Message): + __slots__ = ("block", "priority", "cancel", "wantType", "sendDontHave") + BLOCK_FIELD_NUMBER: _ClassVar[int] + PRIORITY_FIELD_NUMBER: _ClassVar[int] + CANCEL_FIELD_NUMBER: _ClassVar[int] + WANTTYPE_FIELD_NUMBER: _ClassVar[int] + SENDDONTHAVE_FIELD_NUMBER: _ClassVar[int] + block: bytes + priority: int + cancel: bool + wantType: Message.Wantlist.WantType + sendDontHave: bool + def __init__(self, block: _Optional[bytes] = ..., priority: _Optional[int] = ..., cancel: bool = ..., wantType: _Optional[_Union[Message.Wantlist.WantType, str]] = ..., sendDontHave: bool = ...) -> None: ... + ENTRIES_FIELD_NUMBER: _ClassVar[int] + FULL_FIELD_NUMBER: _ClassVar[int] + entries: _containers.RepeatedCompositeFieldContainer[Message.Wantlist.Entry] + full: bool + def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping]]] = ..., full: bool = ...) -> None: ... + class Block(_message.Message): + __slots__ = ("prefix", "data") + PREFIX_FIELD_NUMBER: _ClassVar[int] + DATA_FIELD_NUMBER: _ClassVar[int] + prefix: bytes + data: bytes + def __init__(self, prefix: _Optional[bytes] = ..., data: _Optional[bytes] = ...) -> None: ... + class BlockPresence(_message.Message): + __slots__ = ("cid", "type") + CID_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + cid: bytes + type: Message.BlockPresenceType + def __init__(self, cid: _Optional[bytes] = ..., type: _Optional[_Union[Message.BlockPresenceType, str]] = ...) -> None: ... + class PaymentTerms(_message.Message): + __slots__ = ("cid", "asset", "pay_to", "amount", "network", "nonce", "valid_before", "block_size", "description", "scheme") + CID_FIELD_NUMBER: _ClassVar[int] + ASSET_FIELD_NUMBER: _ClassVar[int] + PAY_TO_FIELD_NUMBER: _ClassVar[int] + AMOUNT_FIELD_NUMBER: _ClassVar[int] + NETWORK_FIELD_NUMBER: _ClassVar[int] + NONCE_FIELD_NUMBER: _ClassVar[int] + VALID_BEFORE_FIELD_NUMBER: _ClassVar[int] + BLOCK_SIZE_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] + SCHEME_FIELD_NUMBER: _ClassVar[int] + cid: bytes + asset: str + pay_to: str + amount: int + network: str + nonce: bytes + valid_before: int + block_size: int + description: str + scheme: str + def __init__(self, cid: _Optional[bytes] = ..., asset: _Optional[str] = ..., pay_to: _Optional[str] = ..., amount: _Optional[int] = ..., network: _Optional[str] = ..., nonce: _Optional[bytes] = ..., valid_before: _Optional[int] = ..., block_size: _Optional[int] = ..., description: _Optional[str] = ..., scheme: _Optional[str] = ...) -> None: ... + class PaymentAuthorization(_message.Message): + __slots__ = ("cid", "from_address", "to_address", "value", "valid_after", "valid_before", "nonce", "v", "r", "s", "scheme") + CID_FIELD_NUMBER: _ClassVar[int] + FROM_ADDRESS_FIELD_NUMBER: _ClassVar[int] + TO_ADDRESS_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + VALID_AFTER_FIELD_NUMBER: _ClassVar[int] + VALID_BEFORE_FIELD_NUMBER: _ClassVar[int] + NONCE_FIELD_NUMBER: _ClassVar[int] + V_FIELD_NUMBER: _ClassVar[int] + R_FIELD_NUMBER: _ClassVar[int] + S_FIELD_NUMBER: _ClassVar[int] + SCHEME_FIELD_NUMBER: _ClassVar[int] + cid: bytes + from_address: str + to_address: str + value: int + valid_after: int + valid_before: int + nonce: bytes + v: int + r: bytes + s: bytes + scheme: str + def __init__(self, cid: _Optional[bytes] = ..., from_address: _Optional[str] = ..., to_address: _Optional[str] = ..., value: _Optional[int] = ..., valid_after: _Optional[int] = ..., valid_before: _Optional[int] = ..., nonce: _Optional[bytes] = ..., v: _Optional[int] = ..., r: _Optional[bytes] = ..., s: _Optional[bytes] = ..., scheme: _Optional[str] = ...) -> None: ... + class PaymentReceipt(_message.Message): + __slots__ = ("cid", "tx_hash", "expires") + CID_FIELD_NUMBER: _ClassVar[int] + TX_HASH_FIELD_NUMBER: _ClassVar[int] + EXPIRES_FIELD_NUMBER: _ClassVar[int] + cid: bytes + tx_hash: str + expires: int + def __init__(self, cid: _Optional[bytes] = ..., tx_hash: _Optional[str] = ..., expires: _Optional[int] = ...) -> None: ... + class PaymentRejection(_message.Message): + __slots__ = ("cid", "reason") + CID_FIELD_NUMBER: _ClassVar[int] + REASON_FIELD_NUMBER: _ClassVar[int] + cid: bytes + reason: str + def __init__(self, cid: _Optional[bytes] = ..., reason: _Optional[str] = ...) -> None: ... + WANTLIST_FIELD_NUMBER: _ClassVar[int] + BLOCKS_FIELD_NUMBER: _ClassVar[int] + PAYLOAD_FIELD_NUMBER: _ClassVar[int] + BLOCKPRESENCES_FIELD_NUMBER: _ClassVar[int] + PENDINGBYTES_FIELD_NUMBER: _ClassVar[int] + PAYMENT_TERMS_FIELD_NUMBER: _ClassVar[int] + PAYMENT_AUTHORIZATIONS_FIELD_NUMBER: _ClassVar[int] + PAYMENT_RECEIPTS_FIELD_NUMBER: _ClassVar[int] + PAYMENT_REJECTIONS_FIELD_NUMBER: _ClassVar[int] + wantlist: Message.Wantlist + blocks: _containers.RepeatedScalarFieldContainer[bytes] + payload: _containers.RepeatedCompositeFieldContainer[Message.Block] + blockPresences: _containers.RepeatedCompositeFieldContainer[Message.BlockPresence] + pendingBytes: int + payment_terms: _containers.RepeatedCompositeFieldContainer[Message.PaymentTerms] + payment_authorizations: _containers.RepeatedCompositeFieldContainer[Message.PaymentAuthorization] + payment_receipts: _containers.RepeatedCompositeFieldContainer[Message.PaymentReceipt] + payment_rejections: _containers.RepeatedCompositeFieldContainer[Message.PaymentRejection] + def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping]]] = ..., payment_authorizations: _Optional[_Iterable[_Union[Message.PaymentAuthorization, _Mapping]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping]]] = ...) -> None: ... From f4a6965e097d2fa85619a1b961b1f8a6561c1f29 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sat, 16 May 2026 13:18:25 +0530 Subject: [PATCH 25/37] feat: Add ledger support to BitswapPaymentClient for tracking spent payments --- libp2p/bitswap/payment_client_1_3.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libp2p/bitswap/payment_client_1_3.py b/libp2p/bitswap/payment_client_1_3.py index 22e542a41..0db7a0fe6 100644 --- a/libp2p/bitswap/payment_client_1_3.py +++ b/libp2p/bitswap/payment_client_1_3.py @@ -43,13 +43,15 @@ def __init__( want_manager: Any, # has retry_want_block(peer_id, cid) method max_auto_pay_usdc: float = 0.001, send_callback: Optional[Callable] = None, + ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) ): self.signer = signer self.want_manager = want_manager self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) self.send_callback = send_callback + self.ledger = ledger - # Pending payments: nonce_hex → {peer_id, cid, terms} + # Pending payments: nonce_hex → {peer_id, cid, amount} self._pending_payments: dict[str, dict] = {} async def process_incoming_message( @@ -174,6 +176,18 @@ async def _handle_payment_terms( "amount": amount, } + # Persist spent payment to ledger + if self.ledger is not None: + try: + self.ledger.record_spent_payment( + peer_id=peer_id, + cid=bytes(terms.cid), + amount=amount, + nonce=bytes(terms.nonce), + ) + except Exception as _e: + logger.warning(f"Failed to persist spent payment: {_e}") + logger.info( f"Sending PaymentAuthorization to {peer_id[:20]}... " f"cid={bytes(terms.cid).hex()[:20]}... amount={amount} units " From 92f5718621f6be93e4db49f95ae18b1fd20b6b2c Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sat, 16 May 2026 13:33:04 +0530 Subject: [PATCH 26/37] refactor: remove unused multihash subprojects --- extra/multihash-spec | 1 - extra/py-multihash | 1 - extra/pymultihash | 1 - 3 files changed, 3 deletions(-) delete mode 160000 extra/multihash-spec delete mode 160000 extra/py-multihash delete mode 160000 extra/pymultihash diff --git a/extra/multihash-spec b/extra/multihash-spec deleted file mode 160000 index b43ec1026..000000000 --- a/extra/multihash-spec +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b43ec1026a610fa87878e53b3daecf3a14b3ef6f diff --git a/extra/py-multihash b/extra/py-multihash deleted file mode 160000 index dfae0dd7a..000000000 --- a/extra/py-multihash +++ /dev/null @@ -1 +0,0 @@ -Subproject commit dfae0dd7a66e0f5a0346d0297e03582443297b9c diff --git a/extra/pymultihash b/extra/pymultihash deleted file mode 160000 index 215298fa2..000000000 --- a/extra/pymultihash +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 215298fa2faa55027384d1f22519229d0918cfb0 From 8715255f476b758dd54957706c3be14f664866f8 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 17 May 2026 12:31:23 +0530 Subject: [PATCH 27/37] refactor: improve CID handling and formatting in Bitswap components --- libp2p/bitswap/cid.py | 9 ++++- libp2p/bitswap/client.py | 24 ++++++------ libp2p/bitswap/dag.py | 39 ++++++++++++++------ libp2p/bitswap/dag_pb.py | 24 +++++++++--- libp2p/bitswap/gated_decision_engine.py | 49 +++++++++++++++---------- libp2p/bitswap/payment_client_1_3.py | 34 ++++++++--------- libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi | 6 +-- 7 files changed, 114 insertions(+), 71 deletions(-) diff --git a/libp2p/bitswap/cid.py b/libp2p/bitswap/cid.py index 0056d0710..1f4431c84 100644 --- a/libp2p/bitswap/cid.py +++ b/libp2p/bitswap/cid.py @@ -243,8 +243,13 @@ def cid_to_bytes(value: CIDInput) -> bytes: def cid_to_text(value: CIDInput) -> str: - """Convert CID input to canonical CID string form.""" - return str(parse_cid(value)) + """Convert CID input to canonical CID string form (base32 for CIDv1, base58btc for CIDv0).""" + cid_obj = parse_cid(value) + # Use base32 for CIDv1 (matches Kubo's default output) + if cid_obj.version == 1: + return cid_obj.encode('base32').decode() + # Use base58btc for CIDv0 (legacy format) + return str(cid_obj) def format_cid_for_display(cid: CIDInput, max_len: int | None = None) -> str: diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 52de979c6..30b202d4e 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -67,8 +67,8 @@ def __init__( block_store: BlockStore | None = None, protocol_version: str = BITSWAP_PROTOCOL_V120, provider_query_manager: ProviderQueryManager | None = None, - payment_client: Any = None, # BitswapPaymentClient_1_3 (optional) - payment_engine: Any = None, # PaymentGatedDecisionEngine (optional) + payment_client: Any = None, # BitswapPaymentClient_1_3 (optional) + payment_engine: Any = None, # PaymentGatedDecisionEngine (optional) ): """ Initialize Bitswap client. @@ -723,8 +723,12 @@ async def _process_message( logger.warning(f" Entries: {len(msg.wantlist.entries)}") logger.warning(f" Full: {msg.wantlist.full}") logger.warning("=" * 70) - print(f"\n📥 RECEIVED WANTLIST from peer {peer_id_str} with {len(msg.wantlist.entries)} entries", flush=True) - + print( + f"\n📥 RECEIVED WANTLIST from peer {peer_id_str} with " + f"{len(msg.wantlist.entries)} entries", + flush=True, + ) + # Detect peer protocol version from stream protocol = stream.get_protocol() if protocol: @@ -735,9 +739,11 @@ async def _process_message( # ── Bitswap 1.3.0 payment message handling ─────────────────────── if peer_protocol == str(BITSWAP_PROTOCOL_V130): # Re-parse as 1.3.0 message to access payment fields - msg_1_3 = Message_1_3() + msg_1_3: Message_1_3 | None try: - msg_1_3.ParseFromString(msg.SerializeToString()) + _tmp = Message_1_3() + _tmp.ParseFromString(msg.SerializeToString()) + msg_1_3 = _tmp except Exception: msg_1_3 = None @@ -1212,9 +1218,7 @@ async def _write_message(self, stream: INetStream, msg: Message) -> None: length_prefix = varint.encode(len(msg_bytes)) await stream.write(length_prefix + msg_bytes) - async def _write_message_bytes( - self, stream: INetStream, msg_bytes: bytes - ) -> None: + async def _write_message_bytes(self, stream: INetStream, msg_bytes: bytes) -> None: """ Write pre-serialized message bytes (for 1.3.0 Message_1_3 objects). """ @@ -1265,5 +1269,3 @@ async def _process_block_presences_1_3( ) # The payment_client will handle PaymentTerms # in process_incoming_message - - diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 5a6087ebe..3a449779a 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -270,7 +270,8 @@ async def add_file( f"Wrapping single-block file in directory with name: {filename}" ) - dir_data = create_directory_node([(filename, cid, file_size)]) + # Tsize should be the block size, not the file data size + dir_data = create_directory_node([(filename, cid, len(leaf_block))]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) await self._put_block(dir_cid, dir_data) @@ -330,18 +331,20 @@ async def add_file( # Create a sync wrapper for the async _put_block method # We'll collect (cid, data) pairs and store them after internal_nodes: list[tuple[bytes, bytes]] = [] - + def store_internal_node(cid: bytes, data: bytes) -> None: """Callback to collect internal nodes for storage.""" internal_nodes.append((cid, data)) - - root_cid, root_data = balanced_layout(leaf_triples, put_block_callback=store_internal_node) - + + root_cid, root_data = balanced_layout( + leaf_triples, put_block_callback=store_internal_node + ) + # Store all internal nodes logger.info(f"Storing {len(internal_nodes)} internal DAG nodes...") for cid, data in internal_nodes: await self._put_block(cid, data) - + # Store the root node await self._put_block(root_cid, root_data) @@ -374,7 +377,8 @@ def store_internal_node(cid: bytes, data: bytes) -> None: logger.info(f"Wrapping file in directory with name: {filename}") # Create directory node with single entry pointing to the file - dir_data = create_directory_node([(filename, root_cid, file_size)]) + # Tsize should be the block size, not the file data size + dir_data = create_directory_node([(filename, root_cid, len(root_data))]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) await self._put_block(dir_cid, dir_data) @@ -619,7 +623,10 @@ async def fetch_file( if dir_links: first_link = dir_links[0] filename = first_link.name or None - actual_file_cid = first_link.cid + # Links contain multihashes, need to reconstruct CIDv1 + # Assume dag-pb codec (0x70) for file blocks + multihash = first_link.cid + actual_file_cid = b'\x01\x70' + multihash # CIDv1 + dag-pb codec + multihash logger.info(f"Filename from directory: {filename!r}") actual_file_data = await self._get_block( actual_file_cid, peer_id, timeout @@ -699,7 +706,9 @@ async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: msg = f"[DAG] Depth {depth}: {cid_str} has {len(node_links)}" logger.debug(f"{msg} children") for link in node_links: - child_cids.append(link.cid) + # Links contain multihashes, reconstruct CIDv1 with dag-pb codec + child_cid = b'\x01\x70' + link.cid + child_cids.append(child_cid) # Recursively fetch next level if there are children if child_cids: @@ -709,7 +718,9 @@ async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: await _batch_fetch_tree(child_cids, depth + 1) # Starting from the top-level links - await _batch_fetch_tree([top_link.cid for top_link in top_links], depth=1) + # Links contain multihashes, reconstruct CIDv1 with dag-pb codec + top_cids = [b'\x01\x70' + top_link.cid for top_link in top_links] + await _batch_fetch_tree(top_cids, depth=1) blocks_count = len(all_blocks_map) logger.info(f"[DAG] ✓ Tree fetch complete: {blocks_count} total blocks") print(f"[FETCH] ✓ Tree fetch complete: {blocks_count} total blocks", flush=True) @@ -746,12 +757,16 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: c_tot = len(node_links) msg = f"[DAG] Depth {depth}: processing child {c_idx}/{c_tot}" logger.debug(msg) - _collect_leaves_local(child_link.cid, depth + 1) + # Links contain multihashes, reconstruct CIDv1 + child_cid = b'\x01\x70' + child_link.cid + _collect_leaves_local(child_cid, depth + 1) # Traverse each top-level block for i, top_link in enumerate(top_links): logger.info(f"[DAG] Traversing top-level {i + 1}/{len(top_links)}...") - _collect_leaves_local(top_link.cid, depth=1) + # Links contain multihashes, reconstruct CIDv1 + top_cid = b'\x01\x70' + top_link.cid + _collect_leaves_local(top_cid, depth=1) logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index 08d22e0c4..f820f5351 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -30,10 +30,18 @@ def _encode_varint(value: int) -> bytes: def _normalize_link_cid(cid: CIDInput) -> bytes: - """Normalize CID input for DAG links while preserving raw-bytes compatibility.""" - if isinstance(cid, bytes): - return cid - return cid_to_bytes(cid) + """ + Normalize CID input for DAG links while preserving raw-bytes compatibility. + + DAG-PB links store only the multihash (not the full CID with version/codec). + This matches Kubo's behavior and the DAG-PB specification. + """ + from .cid import parse_cid + + # Always parse the CID and extract the multihash + # This handles both CID objects and raw bytes (whether CIDv0, CIDv1, or already a multihash) + cid_obj = parse_cid(cid) + return cid_obj.multihash @dataclass(init=False) @@ -137,8 +145,11 @@ def encode_dag_pb(links: list[Link], unixfs_data: UnixFSData | None = None) -> b if unixfs_data is not None: pb_unixfs = PBUnixFSData() pb_unixfs.Type = UnixFSData.TYPE_MAP[unixfs_data.type] # type: ignore[assignment] - pb_unixfs.Data = unixfs_data.data - pb_unixfs.filesize = unixfs_data.filesize + # Only set fields with non-default values to match Kubo's encoding + if unixfs_data.data: + pb_unixfs.Data = unixfs_data.data + if unixfs_data.filesize: + pb_unixfs.filesize = unixfs_data.filesize for blocksize in unixfs_data.blocksizes: pb_unixfs.blocksizes.append(blocksize) if unixfs_data.hash_type: @@ -339,6 +350,7 @@ def balanced_layout( max_links: Max links per internal node (default 174, matches Kubo) put_block_callback: Optional async callback to store each internal node Signature: callback(cid_bytes, block_bytes) + Returns: (root_cid_bytes, root_block_bytes) diff --git a/libp2p/bitswap/gated_decision_engine.py b/libp2p/bitswap/gated_decision_engine.py index 827cffeed..69e1ad977 100644 --- a/libp2p/bitswap/gated_decision_engine.py +++ b/libp2p/bitswap/gated_decision_engine.py @@ -14,7 +14,7 @@ import logging import os import time -from typing import Any, Optional +from typing import Any from libp2p.bitswap.block_store import BlockStore from libp2p.bitswap.cid import parse_cid @@ -50,9 +50,9 @@ class PaymentGatedDecisionEngine: def __init__( self, blockstore: BlockStore, - ledger: Any, # gooseswarm.payments.ledger.PaymentLedger - pricing: Any, # gooseswarm.payments.pricing.BlockPricingEngine - facilitator: Any, # gooseswarm.payments.facilitator.FacilitatorClient + ledger: Any, # gooseswarm.payments.ledger.PaymentLedger + pricing: Any, # gooseswarm.payments.pricing.BlockPricingEngine + facilitator: Any, # gooseswarm.payments.facilitator.FacilitatorClient server_wallet: str = "", host: Any = None, ): @@ -66,7 +66,7 @@ def __init__( self.host = host # Pending payment offers: nonce_bytes → offer_dict - self._pending_offers: dict[bytes, dict] = {} + self._pending_offers: dict[bytes, dict[str, Any]] = {} # Callbacks for sending messages back to peers # Set externally: engine.send_message_callback = async_fn(peer_id, msg_bytes) @@ -76,10 +76,10 @@ async def handle_want( self, peer_id: str, cid: str | bytes, - want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE + want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE send_dont_have: bool, peer_protocol: str = BITSWAP_PROTOCOL_V120, - ) -> Optional[Message_1_3 | Message_1_2]: + ) -> Message_1_3 | Message_1_2 | None: """ Process a WANT request from a peer. @@ -135,7 +135,10 @@ async def handle_payment_authorization( # Validate against pending offer offer = self._pending_offers.pop(nonce, None) if offer is None: - logger.warning(f"No pending offer for nonce {nonce.hex()[:10]}... from {peer_id[:20]}...") + logger.warning( + f"No pending offer for nonce {nonce.hex()[:10]}... " + f"from {peer_id[:20]}..." + ) return self._make_payment_rejection(auth.cid, "NO_PENDING_OFFER") if offer["peer_id"] != peer_id: @@ -213,7 +216,7 @@ async def handle_payment_authorization( async def process_incoming_1_3_message( self, peer_id: str, msg: Message_1_3 - ) -> Optional[Message_1_3]: + ) -> Message_1_3 | None: """ Process an incoming 1.3.0 message that may contain PaymentAuthorizations. Returns a response message or None. @@ -250,7 +253,7 @@ async def _make_payment_required_1_3( # BlockPresence with type=2 (PaymentRequired) presence = msg.blockPresences.add() presence.cid = cid_bytes - presence.type = Message_1_3.PaymentRequired # = 2 + presence.type = Message_1_3.BlockPresenceType.PaymentRequired # = 2 # PaymentTerms in field 6 terms = msg.payment_terms.add() @@ -262,9 +265,7 @@ async def _make_payment_required_1_3( terms.nonce = nonce terms.valid_before = valid_before terms.block_size = block_size - terms.description = ( - f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB)" - ) + terms.description = f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB)" terms.scheme = "exact" logger.info( @@ -278,15 +279,23 @@ def _make_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1 msg = MsgClass() presence = msg.blockPresences.add() presence.cid = cid_bytes - presence.type = MsgClass.Have # = 0 + if protocol == BITSWAP_PROTOCOL_V130: + presence.type = Message_1_3.BlockPresenceType.Have # = 0 + else: + presence.type = Message_1_2.BlockPresenceType.Have # = 0 return msg - def _make_dont_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1_2: + def _make_dont_have( + self, cid_bytes: bytes, protocol: str + ) -> Message_1_3 | Message_1_2: MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 msg = MsgClass() presence = msg.blockPresences.add() presence.cid = cid_bytes - presence.type = MsgClass.DontHave # = 1 + if protocol == BITSWAP_PROTOCOL_V130: + presence.type = Message_1_3.BlockPresenceType.DontHave # = 1 + else: + presence.type = Message_1_2.BlockPresenceType.DontHave # = 1 return msg def _make_block_response( @@ -300,14 +309,14 @@ def _make_block_response( block.prefix = cid_bytes[:4] if len(cid_bytes) >= 4 else cid_bytes return msg - def _make_payment_rejection( - self, cid_bytes: bytes, reason: str - ) -> Message_1_3: + def _make_payment_rejection(self, cid_bytes: bytes, reason: str) -> Message_1_3: msg = Message_1_3() rej = msg.payment_rejections.add() rej.cid = bytes(cid_bytes) rej.reason = reason - logger.warning(f"Payment rejected: cid={bytes(cid_bytes).hex()[:20]}... reason={reason}") + logger.warning( + f"Payment rejected: cid={bytes(cid_bytes).hex()[:20]}... reason={reason}" + ) return msg diff --git a/libp2p/bitswap/payment_client_1_3.py b/libp2p/bitswap/payment_client_1_3.py index 0db7a0fe6..7cab28593 100644 --- a/libp2p/bitswap/payment_client_1_3.py +++ b/libp2p/bitswap/payment_client_1_3.py @@ -11,8 +11,9 @@ This module lives in py-libp2p so it's importable as libp2p.bitswap. """ +from collections.abc import Callable import logging -from typing import Any, Callable, Optional +from typing import Any from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 @@ -39,11 +40,11 @@ class BitswapPaymentClient_1_3: def __init__( self, - signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer - want_manager: Any, # has retry_want_block(peer_id, cid) method + signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer + want_manager: Any, # has retry_want_block(peer_id, cid) method max_auto_pay_usdc: float = 0.001, - send_callback: Optional[Callable] = None, - ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) + send_callback: Callable[..., Any] | None = None, + ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) ): self.signer = signer self.want_manager = want_manager @@ -52,11 +53,11 @@ def __init__( self.ledger = ledger # Pending payments: nonce_hex → {peer_id, cid, amount} - self._pending_payments: dict[str, dict] = {} + self._pending_payments: dict[str, dict[str, Any]] = {} async def process_incoming_message( self, peer_id: str, msg: Message_1_3 - ) -> Optional[Message_1_3]: + ) -> Message_1_3 | None: """ Called by the Bitswap dispatcher for every incoming 1.3.0 message. @@ -85,7 +86,8 @@ async def process_incoming_message( return None async def build_payment_auth_msg( - self, terms: Any # Message_1_3.PaymentTerms + self, + terms: Any, # Message_1_3.PaymentTerms ) -> Message_1_3: """ Build a PaymentAuthorization message for the given PaymentTerms. @@ -117,7 +119,7 @@ async def build_payment_auth_msg( async def _handle_payment_terms( self, peer_id: str, terms: Any - ) -> Optional[Message_1_3]: + ) -> Message_1_3 | None: """ Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. Decide whether to pay and send back a PaymentAuthorization. @@ -127,7 +129,8 @@ async def _handle_payment_terms( # Reject if too expensive if amount > self.max_auto_pay_units: logger.info( - f"Block too expensive: {amount} units > max {self.max_auto_pay_units} units. " + f"Block too expensive: {amount} units > " + f"max {self.max_auto_pay_units} units. " f"Skipping — will seek block elsewhere." ) return None @@ -195,18 +198,15 @@ async def _handle_payment_terms( ) return response - async def _handle_payment_receipt( - self, peer_id: str, receipt: Any - ) -> None: + async def _handle_payment_receipt(self, peer_id: str, receipt: Any) -> None: """Server confirmed payment. Retry the WANT_BLOCK immediately.""" cid_hex = ( - bytes(receipt.cid).hex() - if isinstance(receipt.cid, bytes) - else receipt.cid + bytes(receipt.cid).hex() if isinstance(receipt.cid, bytes) else receipt.cid ) logger.info( f"Payment receipt received from {peer_id[:20]}... " - f"cid={cid_hex[:20]}... tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." + f"cid={cid_hex[:20]}... " + f"tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." ) # Trigger want manager to retry if self.want_manager: diff --git a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi index 1864ea639..1258ace8d 100644 --- a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi +++ b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi @@ -2,7 +2,7 @@ from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union +from typing import Any as _Any, ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor @@ -41,7 +41,7 @@ class Message(_message.Message): FULL_FIELD_NUMBER: _ClassVar[int] entries: _containers.RepeatedCompositeFieldContainer[Message.Wantlist.Entry] full: bool - def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping]]] = ..., full: bool = ...) -> None: ... + def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping[str, _Any]]]] = ..., full: bool = ...) -> None: ... class Block(_message.Message): __slots__ = ("prefix", "data") PREFIX_FIELD_NUMBER: _ClassVar[int] @@ -138,4 +138,4 @@ class Message(_message.Message): payment_authorizations: _containers.RepeatedCompositeFieldContainer[Message.PaymentAuthorization] payment_receipts: _containers.RepeatedCompositeFieldContainer[Message.PaymentReceipt] payment_rejections: _containers.RepeatedCompositeFieldContainer[Message.PaymentRejection] - def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping]]] = ..., payment_authorizations: _Optional[_Iterable[_Union[Message.PaymentAuthorization, _Mapping]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping]]] = ...) -> None: ... + def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping[str, _Any]]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping[str, _Any]]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping[str, _Any]]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping[str, _Any]]]] = ..., payment_authorizations: _Optional[_Iterable[_Union[Message.PaymentAuthorization, _Mapping[str, _Any]]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping[str, _Any]]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping[str, _Any]]]] = ...) -> None: ... From 350d3b03563597f36fc6112c158edd317e6d2c92 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Wed, 20 May 2026 02:19:52 +0530 Subject: [PATCH 28/37] feat(bitswap): Implement Bitswap 1.3.0 Payment Client and Ledger - Added `payment_client_1_3.py` for handling in-band payment messages, including: - Processing PaymentTerms and sending PaymentAuthorization. - Handling PaymentReceipts and PaymentRejections. - Validating payment amounts and signing EIP-3009 transactions. - Introduced `payment_ledger.py` for tracking payments at the root CID level: - Supports registration of DAG structures for child blocks. - Implements nonce deduplication to prevent replay attacks. - Provides methods to check payment status and record payments. - Updated protobuf definitions in `bitswap_1_3_0_pb2.pyi` to reflect new payment structures: - Added PaymentTerms, PaymentAuthorization, and PaymentReceipt messages. - Introduced TxReceipt for transaction details. - Created `pricing_engine.py` to compute block pricing based on configurable strategies: - Supports free, fixed, size-based, and custom pricing strategies. - Allows marking specific CIDs as free and setting per-CID prices. --- examples/bitswap_payment_example.py | 200 ++++++++ libp2p/bitswap/__init__.py | 4 + libp2p/bitswap/client.py | 144 +++++- libp2p/bitswap/gated_decision_engine.py | 483 +++++++++++++------- libp2p/bitswap/payment_client_1_3.py | 147 +++++- libp2p/bitswap/payment_client_1_3.py.backup | 455 ++++++++++++++++++ libp2p/bitswap/payment_ledger.py | 278 +++++++++++ libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi | 53 +-- libp2p/bitswap/pricing_engine.py | 177 +++++++ 9 files changed, 1719 insertions(+), 222 deletions(-) create mode 100644 examples/bitswap_payment_example.py create mode 100644 libp2p/bitswap/payment_client_1_3.py.backup create mode 100644 libp2p/bitswap/payment_ledger.py create mode 100644 libp2p/bitswap/pricing_engine.py diff --git a/examples/bitswap_payment_example.py b/examples/bitswap_payment_example.py new file mode 100644 index 000000000..f2c57931a --- /dev/null +++ b/examples/bitswap_payment_example.py @@ -0,0 +1,200 @@ +""" +Example: Bitswap 1.3.0 with Root CID Payment System + +Demonstrates how to set up a payment-gated Bitswap server that charges +for files at the root CID level (not per-block), using the new payment +infrastructure. + +Key Features: +- Payment required only for root CID (all chunks accessible after payment) +- Configurable pricing: free, fixed, or size-based +- EIP-3009 meta-transaction support (off-chain payment authorization) +- Automatic DAG registration for multi-block files + +Usage: + # Start payment-gated server + python examples/bitswap_payment_server.py --price-per-mb 0.01 + + # Start client + python examples/bitswap_payment_client.py --server /ip4/127.0.0.1/tcp/4001/p2p/... +""" + +import asyncio +import logging +from pathlib import Path + +from libp2p import new_host +from libp2p.bitswap import ( + BitswapClient, + FilesystemBlockStore, + MerkleDag, + PaymentGatedDecisionEngine, +) +from libp2p.bitswap.payment_ledger import PaymentLedger +from libp2p.bitswap.pricing_engine import BlockPricingEngine + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +async def setup_payment_server( + price_per_mb: float = 0.01, # $0.01 per MB + wallet_address: str = "0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb", +) -> tuple[BitswapClient, PaymentGatedDecisionEngine, MerkleDag]: + """ + Set up a payment-gated Bitswap server. + + Args: + price_per_mb: Price in USD per megabyte + wallet_address: Ethereum address to receive payments + + Returns: + (bitswap_client, payment_engine, dag) + """ + # Create libp2p host + host = new_host() + await host.run(["/ip4/0.0.0.0/tcp/4001"]) + + # Create block store + store = FilesystemBlockStore(Path("./bitswap_data")) + + # Create payment ledger (tracks who has paid for what) + ledger = PaymentLedger() + + # Create pricing engine (size-based: price scales with file size) + # Convert $/MB to micro-units/KB: $0.01/MB = 10,000 micro-units/MB = 10 micro-units/KB + units_per_kb = (price_per_mb * 1_000_000) / 1024 + pricing = BlockPricingEngine( + strategy="size_based", + units_per_kb=units_per_kb, + ) + + # Create payment-gated decision engine + payment_engine = PaymentGatedDecisionEngine( + blockstore=store, + ledger=ledger, + pricing=pricing, + tx_verifier=None, # Optional: add EIP-3009 verifier + server_wallet=wallet_address, + network="sepolia", + asset="USDC", + ) + + # Create Bitswap client with payment engine + bitswap = BitswapClient( + host=host, + block_store=store, + protocol_version="/ipfs/bitswap/1.3.0", + payment_engine=payment_engine, + ) + await bitswap.start() + + # Create DAG manager + dag = MerkleDag(bitswap, block_store=store) + + logger.info(f"✅ Payment-gated server started") + logger.info(f" Address: {host.get_id()}") + logger.info(f" Pricing: ${price_per_mb:.4f}/MB = {units_per_kb:.2f} units/KB") + logger.info(f" Wallet: {wallet_address}") + + return bitswap, payment_engine, dag + + +async def add_paid_file( + dag: MerkleDag, + payment_engine: PaymentGatedDecisionEngine, + file_path: Path, +) -> str: + """ + Add a file that requires payment to access. + + Args: + dag: MerkleDag instance + payment_engine: Payment engine for DAG registration + file_path: Path to file to add + + Returns: + Root CID (hex string) + """ + logger.info(f"📤 Adding paid file: {file_path}") + + # Add file to Bitswap (auto-chunks large files) + root_cid = await dag.add_file(str(file_path)) + + # Get all CIDs in the DAG (root + children) + # In a real implementation, you'd get this from the DAG add operation + # For now, we'll assume it's just the root CID + all_cids = [root_cid] + file_size = file_path.stat().st_size + + # Register DAG for root CID payment tracking + await payment_engine.register_dag( + root_cid=root_cid, + child_cids=all_cids[1:], # Exclude root from children + total_size=file_size, + ) + + logger.info(f"✅ File added: {root_cid.hex()[:20]}... ({file_size} bytes)") + return root_cid.hex() + + +async def add_free_file( + dag: MerkleDag, + payment_engine: PaymentGatedDecisionEngine, + file_path: Path, +) -> str: + """ + Add a file that is free to access (no payment required). + + Args: + dag: MerkleDag instance + payment_engine: Payment engine for marking free + file_path: Path to file to add + + Returns: + Root CID (hex string) + """ + logger.info(f"📤 Adding free file: {file_path}") + + # Add file to Bitswap + root_cid = await dag.add_file(str(file_path)) + + # Mark as free (no payment required) + payment_engine.mark_free(root_cid) + + file_size = file_path.stat().st_size + logger.info(f"✅ Free file added: {root_cid.hex()[:20]}... ({file_size} bytes)") + return root_cid.hex() + + +async def main(): + """Example usage.""" + # Set up payment-gated server + bitswap, payment_engine, dag = await setup_payment_server( + price_per_mb=0.01, # $0.01 per MB + wallet_address="0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb", + ) + + # Add some files + # Example 1: Paid file (5 MB = $0.05) + # paid_cid = await add_paid_file( + # dag, payment_engine, Path("./large_file.bin") + # ) + + # Example 2: Free file (always accessible) + # free_cid = await add_free_file( + # dag, payment_engine, Path("./readme.txt") + # ) + + logger.info("Server running. Press Ctrl+C to stop.") + + # Keep running + try: + await asyncio.Event().wait() + except KeyboardInterrupt: + logger.info("Shutting down...") + await bitswap.stop() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/libp2p/bitswap/__init__.py b/libp2p/bitswap/__init__.py index b3d838b94..e057362bc 100644 --- a/libp2p/bitswap/__init__.py +++ b/libp2p/bitswap/__init__.py @@ -34,6 +34,8 @@ from .block_service import BlockService from .block_store import BlockStore, FilesystemBlockStore, MemoryBlockStore from .gated_decision_engine import PaymentGatedDecisionEngine +from .payment_ledger import PaymentLedger +from .pricing_engine import BlockPricingEngine from .payment_client_1_3 import BitswapPaymentClient_1_3 from .cid import ( CID_V0, @@ -82,6 +84,8 @@ "BitswapClient", "BitswapPaymentClient_1_3", "PaymentGatedDecisionEngine", + "PaymentLedger", + "BlockPricingEngine", "BlockService", "BlockStore", "MemoryBlockStore", diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 30b202d4e..4ec4da154 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -520,7 +520,13 @@ async def _send_wantlist_to_peer( msg = create_message(wantlist_entries=entries, full_wantlist=False) # Get negotiated protocol for this peer or use all protocols - if peer_id in self._peer_protocols: + # If payment client is configured, always prefer 1.3.0 to enable + # in-band payment messages regardless of any cached protocol. + if self.payment_client: + protocols = [BITSWAP_PROTOCOL_V130] + [ + p for p in BITSWAP_PROTOCOLS if p != BITSWAP_PROTOCOL_V130 + ] + elif peer_id in self._peer_protocols: protocols = [TProtocol(self._peer_protocols[peer_id])] else: protocols = list(BITSWAP_PROTOCOLS) # Try all @@ -722,6 +728,10 @@ async def _process_message( logger.warning(f"📥 RECEIVED WANTLIST from peer {peer_id_str}") logger.warning(f" Entries: {len(msg.wantlist.entries)}") logger.warning(f" Full: {msg.wantlist.full}") + for _i, _e in enumerate(msg.wantlist.entries): + _cid_hex = bytes(_e.block).hex()[:20] if _e.block else 'N/A' + _wt = 'WANT_HAVE' if _e.wantType == 1 else 'WANT_BLOCK' + logger.warning(f" [{_i+1}] cid={_cid_hex}... type={_wt} cancel={_e.cancel}") logger.warning("=" * 70) print( f"\n📥 RECEIVED WANTLIST from peer {peer_id_str} with " @@ -735,9 +745,14 @@ async def _process_message( self._peer_protocols[peer_id] = str(protocol) peer_protocol = str(protocol) if protocol else BITSWAP_PROTOCOL_V100 + logger.info(f"[FLOW] Negotiated protocol for peer {str(peer_id)[:20]}...: {peer_protocol}") # ── Bitswap 1.3.0 payment message handling ─────────────────────── - if peer_protocol == str(BITSWAP_PROTOCOL_V130): + # Always try to parse as 1.3.0 when payment components are configured, + # regardless of the negotiated stream protocol. This handles the case + # where the stream was opened as 1.2.0 but the server sends back + # payment fields (which protobuf preserves as unknown fields). + if self.payment_client or self.payment_engine: # Re-parse as 1.3.0 message to access payment fields msg_1_3: Message_1_3 | None try: @@ -754,29 +769,101 @@ async def _process_message( or msg_1_3.payment_receipts or msg_1_3.payment_rejections ): + if msg_1_3.payment_terms: + logger.warning("=" * 70) + logger.warning(f"[STEP 3] CLIENT RECEIVED PAYMENT TERMS from {str(peer_id)[:20]}...") + for _t in msg_1_3.payment_terms: + logger.warning(f" cid={bytes(_t.cid).hex()[:20]}...") + logger.warning(f" amount={_t.amount} units") + logger.warning(f" asset={_t.asset} scheme={_t.scheme}") + logger.warning(f" pay_to={_t.pay_to[:20]}...") + logger.warning(f" block_size={_t.block_size}B") + logger.warning(f" valid_before={_t.valid_before}") + logger.warning("=" * 70) + if msg_1_3.payment_receipts: + logger.warning("=" * 70) + logger.warning(f"[STEP 8a] CLIENT RECEIVED PAYMENT RECEIPT from {str(peer_id)[:20]}...") + for _r in msg_1_3.payment_receipts: + logger.warning(f" cid={bytes(_r.cid).hex()[:20]}...") + logger.warning(f" tx_hash={_r.tx_hash[:20] if _r.tx_hash else 'optimistic'}") + logger.warning(f" expires={_r.expires}") + logger.warning("=" * 70) + if msg_1_3.payment_rejections: + logger.warning("=" * 70) + logger.warning(f"[STEP 8a] CLIENT RECEIVED PAYMENT REJECTION from {str(peer_id)[:20]}...") + for _rj in msg_1_3.payment_rejections: + logger.warning(f" cid={bytes(_rj.cid).hex()[:20]}...") + logger.warning(f" reason={_rj.reason}") + logger.warning("=" * 70) response = await self.payment_client.process_incoming_message( str(peer_id), msg_1_3 ) if response is not None: + logger.warning("=" * 70) + logger.warning(f"[STEP 5] CLIENT SENDING PAYMENT AUTHORIZATION to {str(peer_id)[:20]}...") + if response.payment_authorizations: + for _a in response.payment_authorizations: + logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") + logger.warning(f" from={_a.from_address[:20]}...") + logger.warning(f" to={_a.to_address[:20]}...") + logger.warning(f" value={_a.value}") + logger.warning(f" scheme={_a.scheme}") + logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") + logger.warning("=" * 70) await self._write_message_bytes( stream, response.SerializeToString() ) # Process any blocks delivered alongside a payment receipt if msg_1_3.payload: + logger.warning("=" * 70) + logger.warning(f"[STEP 9] FILE TRANSFER: CLIENT RECEIVING {len(msg_1_3.payload)} BLOCK(S) (v1.1.0+ payload) from {str(peer_id)[:20]}...") + for _bi, _b in enumerate(msg_1_3.payload): + logger.warning(f" block[{_bi+1}]: data_len={len(_b.data)}B prefix={bytes(_b.prefix).hex()[:10]}...") + logger.warning("=" * 70) await self._process_blocks_v110(msg_1_3.payload) if msg_1_3.blocks: + logger.warning("=" * 70) + logger.warning(f"[STEP 9] FILE TRANSFER: CLIENT RECEIVING {len(msg_1_3.blocks)} BLOCK(S) (v1.0.0) from {str(peer_id)[:20]}...") + logger.warning("=" * 70) await self._process_blocks_v100(list(msg_1_3.blocks), peer_id) - # Server-side: handle PaymentAuthorizations - if self.payment_engine and msg_1_3.payment_authorizations: - response = await self.payment_engine.process_incoming_1_3_message( - str(peer_id), msg_1_3 - ) - if response is not None: - await self._write_message_bytes( - stream, response.SerializeToString() - ) + # Server-side: handle PaymentAuthorizations (EIP-3009 signed payments) + if self.payment_engine: + try: + if msg_1_3.payment_authorizations: + logger.warning("=" * 70) + logger.warning(f"[STEP 6] SERVER RECEIVED PAYMENT AUTHORIZATION from {str(peer_id)[:20]}...") + for _a in msg_1_3.payment_authorizations: + logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") + logger.warning(f" from={_a.from_address[:20]}...") + logger.warning(f" to={_a.to_address[:20]}...") + logger.warning(f" value={_a.value}") + logger.warning(f" scheme={_a.scheme}") + logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") + logger.warning("=" * 70) + response = await self.payment_engine.process_incoming_1_3_message( + str(peer_id), msg_1_3 + ) + if response is not None: + _has_receipt = bool(response.payment_receipts) + _has_rejection = bool(response.payment_rejections) + _has_blocks = bool(response.payload) or bool(response.blocks) + logger.warning("=" * 70) + logger.warning(f"[STEP 8] SERVER SENDING RESPONSE after PaymentAuthorization:") + logger.warning(f" has_receipt={_has_receipt} has_rejection={_has_rejection} has_blocks={_has_blocks}") + if _has_rejection: + for _rj in response.payment_rejections: + logger.warning(f" ❌ REJECTION reason={_rj.reason}") + if _has_blocks: + _nb = len(response.payload) + len(response.blocks) + logger.warning(f" ✅ SENDING {_nb} block(s) to client — FILE TRANSFER STARTING") + logger.warning("=" * 70) + await self._write_message_bytes( + stream, response.SerializeToString() + ) + except Exception as e: + logger.error(f"Error handling PaymentAuthorization: {e}", exc_info=True) # Handle PaymentRequired block presences specially if msg_1_3.blockPresences: @@ -826,9 +913,21 @@ async def _process_wantlist( # Get peer protocol for response format peer_protocol = self._peer_protocols.get(peer_id, BITSWAP_PROTOCOL_V100) - - # ── 1.3.0 payment-gated wantlist handling ────────────────────────── - if str(peer_protocol) == str(BITSWAP_PROTOCOL_V130) and self.payment_engine: + + logger.warning("=" * 70) + logger.warning( + f"[STEP 1] SERVER PROCESSING WANTLIST from {str(peer_id)[:20]}..." + ) + logger.warning(f" entries={len(wantlist.entries)} protocol={peer_protocol}") + logger.warning(f" payment_engine={'ENABLED' if self.payment_engine else 'DISABLED (free mode)'}") + logger.warning(f" server_wallet={getattr(getattr(self, 'payment_engine', None), 'server_wallet', 'N/A')[:20] if self.payment_engine else 'N/A'}") + logger.warning("=" * 70) + + # ── Payment-gated wantlist handling ──────────────────────────────── + # Apply payment gating whenever payment_engine is enabled. + # For 1.3.0 peers: send PaymentRequired + PaymentTerms in-band. + # For older peers: send DONT_HAVE (they cannot pay in-band). + if self.payment_engine: for entry in wantlist.entries: entry_cid = parse_cid(entry.block) if entry.cancel: @@ -847,9 +946,24 @@ async def _process_wantlist( cid=entry.block, want_type=entry.wantType, send_dont_have=entry.sendDontHave, - peer_protocol=str(BITSWAP_PROTOCOL_V130), + peer_protocol=str(peer_protocol), # pass actual negotiated protocol ) if response_msg is not None: + _has_pr = bool(getattr(response_msg, 'blockPresences', [])) + _has_terms = bool(getattr(response_msg, 'payment_terms', [])) + _has_blocks = bool(getattr(response_msg, 'payload', [])) or bool(getattr(response_msg, 'blocks', [])) + logger.warning("=" * 70) + logger.warning(f"[STEP 2] SERVER SENDING RESPONSE for cid={bytes(entry.block).hex()[:20]}...") + logger.warning(f" payment_required={_has_pr} payment_terms={_has_terms} has_blocks={_has_blocks}") + if _has_pr: + for _bp in response_msg.blockPresences: + logger.warning(f" BlockPresence type={_bp.type} (2=PaymentRequired)") + if _has_terms: + for _t in response_msg.payment_terms: + logger.warning(f" PaymentTerms: amount={_t.amount} asset={_t.asset} pay_to={_t.pay_to[:20]}... scheme={_t.scheme}") + if _has_blocks: + logger.warning(f" ✅ Sending block(s) directly (free/already paid)") + logger.warning("=" * 70) await self._write_message_bytes( stream, response_msg.SerializeToString() ) diff --git a/libp2p/bitswap/gated_decision_engine.py b/libp2p/bitswap/gated_decision_engine.py index 69e1ad977..a6994bb38 100644 --- a/libp2p/bitswap/gated_decision_engine.py +++ b/libp2p/bitswap/gated_decision_engine.py @@ -2,17 +2,22 @@ Payment-Gated Decision Engine for Bitswap 1.3.0. Extends the standard Bitswap block serving logic with payment gating: -- If a block is free (small), serve it directly. +- If a block is free, serve it directly. - If a block requires payment and the peer has NOT paid, respond with - PaymentRequired (type=2) + PaymentTerms in-band (1.3.0 path) or - DONT_HAVE + side-channel (1.2.0 fallback path). -- If the peer HAS paid, serve the block normally. + PaymentRequired (type=2) + PaymentTerms in-band (1.3.0 path). +- If the peer sends a TxReceipt (on-chain payment proof), verify it + and serve the block. + +Proto alignment: + PaymentTerms → fields: cid, asset, pay_to, amount, network, block_size, description + TxReceipt → fields: cid, tx_hash, from_address, to_address, amount, asset, network + PaymentReceipt → fields: cid, tx_hash, expires + PaymentRejection → fields: cid, reason This module lives in py-libp2p so it's importable as libp2p.bitswap. """ import logging -import os import time from typing import Any @@ -32,79 +37,159 @@ class PaymentGatedDecisionEngine: Decides whether to serve a block or gate it behind payment. Integrates with: - - gooseswarm.payments.ledger.PaymentLedger — tracks paid (peer, cid) pairs - - gooseswarm.payments.pricing.BlockPricingEngine — computes prices - - gooseswarm.payments.facilitator.FacilitatorClient — verifies EIP-712 sigs - - Usage: - engine = PaymentGatedDecisionEngine( - blockstore=my_blockstore, - ledger=my_ledger, - pricing=my_pricing, - facilitator=my_facilitator, - server_wallet="0x...", - ) - # Wire into BitswapClient as a message handler + - payments.ledger.PaymentLedger — tracks paid (peer, cid) pairs + - payments.pricing.BlockPricingEngine — computes prices + - payments.tx_verifier.TxVerifier — verifies on-chain TxReceipts + + Payment flow (1.3.0): + 1. Client sends WANT_BLOCK + 2. Server → PaymentRequired + PaymentTerms (price offer) + 3. Client pays on-chain, sends TxReceipt with tx_hash + 4. Server verifies tx on-chain → PaymentReceipt + block data """ def __init__( self, blockstore: BlockStore, - ledger: Any, # gooseswarm.payments.ledger.PaymentLedger - pricing: Any, # gooseswarm.payments.pricing.BlockPricingEngine - facilitator: Any, # gooseswarm.payments.facilitator.FacilitatorClient + ledger: Any, # payments.ledger.PaymentLedger + pricing: Any, # payments.pricing.BlockPricingEngine + tx_verifier: Any, # payments.tx_verifier.TxVerifier (or None) server_wallet: str = "", - host: Any = None, + network: str = "sepolia", + asset: str = "ETH", ): self.blockstore = blockstore self.ledger = ledger self.pricing = pricing - self.facilitator = facilitator - self.server_wallet = server_wallet or ( - facilitator.server_wallet if facilitator else "" - ) - self.host = host + self.tx_verifier = tx_verifier + self.server_wallet = server_wallet + self.network = network + self.asset = asset - # Pending payment offers: nonce_bytes → offer_dict - self._pending_offers: dict[bytes, dict[str, Any]] = {} + # Track pending payment offers: cid_hex → (peer_id, terms) + self._pending_offers: dict[str, tuple[str, Any]] = {} # Callbacks for sending messages back to peers - # Set externally: engine.send_message_callback = async_fn(peer_id, msg_bytes) self.send_message_callback = None + + # Root CID tracking: cid_hex → {root_cid, total_size, child_count} + # Used to compute total file size for pricing + self._dag_info: dict[str, dict[str, Any]] = {} + + # Root CID tracking: cid_hex → {root_cid, total_size, child_count} + # Used to compute total file size for pricing + self._dag_info: dict[str, dict[str, Any]] = {} + + async def register_dag( + self, + root_cid: str | bytes, + child_cids: list[str | bytes], + total_size: int, + ) -> None: + """ + Register a DAG structure for root CID payment tracking. + + Call this after chunking a file to register the relationship between + the root CID and its child blocks, along with the total file size. + + Args: + root_cid: The root CID of the DAG + child_cids: List of child/chunk CIDs + total_size: Total size of all blocks combined (bytes) + + Example: + >>> # After adding a large file to Bitswap + >>> await engine.register_dag( + ... root_cid=root_cid, + ... child_cids=[chunk1, chunk2, ...], + ... total_size=5_000_000, # 5 MB + ... ) + """ + root_hex = _cid_to_str(root_cid) + + # Store DAG metadata + self._dag_info[root_hex] = { + "root_cid": root_hex, + "total_size": total_size, + "child_count": len(child_cids), + } + + # Register in ledger so child blocks inherit root payment status + await self.ledger.register_dag(root_cid, child_cids) + + logger.info( + f"📋 Registered DAG: root={root_hex[:20]}... " + f"size={total_size}B children={len(child_cids)}" + ) + + def mark_free(self, cid: str | bytes) -> None: + """ + Mark a CID as free (no payment required). + + Args: + cid: The CID to mark as free (root or child) + """ + self.ledger.mark_free(cid) + self.pricing.set_free(cid) + logger.info(f"Marked as FREE: {_cid_to_str(cid)[:20]}...") async def handle_want( self, peer_id: str, cid: str | bytes, - want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE + want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE send_dont_have: bool, peer_protocol: str = BITSWAP_PROTOCOL_V120, ) -> Message_1_3 | Message_1_2 | None: """ Process a WANT request from a peer. - Returns a Message to send back, or None if nothing should be sent. """ cid_str = _cid_to_str(cid) cid_bytes = _cid_to_bytes(cid) cid_obj = parse_cid(cid_bytes) + logger.info( + f"🔍 handle_want: peer={peer_id[:20]}... cid={cid_str[:20]}... " + f"want_type={want_type} protocol={peer_protocol}" + ) + # Check blockstore + logger.info("All CIDs in blockstore: " + ", ".join([c.hex() for c in self.blockstore.get_all_cids()])) block_data = await self.blockstore.get_block(cid_obj) if block_data is None: - # We don't have the block + logger.warning(f"❌ Block not in store: {cid_str[:20]}...") if send_dont_have: return self._make_dont_have(cid_bytes, peer_protocol) return None block_size = len(block_data) + logger.info(f"✅ Block found: {cid_str[:20]}... size={block_size}") - # Compute price - price = self.pricing.compute_price(cid_str, block_size) + # Get pricing size (use total DAG size if this is part of a DAG) + pricing_size = self._get_pricing_size(cid_str, block_size) + + # Compute price (at root CID level, not per-block) + price = self.pricing.compute_price(cid_str, pricing_size) + logger.info( + f"💰 Price: {price} units for {cid_str[:20]}... " + f"(block={block_size}B, pricing_size={pricing_size}B)" + ) - if price == 0 or self.ledger.is_paid(peer_id, cid_str, block_size): - # Free block or already paid — serve it + # Check if free or already paid (ledger resolves child → root automatically) + is_paid = self.ledger.is_paid(peer_id, cid_str) + + if price == 0: + # Free block — serve it + logger.info(f"✅ Serving block (FREE): {cid_str[:20]}...") + if want_type == 1: # WANT_HAVE + return self._make_have(cid_bytes, peer_protocol) + else: # WANT_BLOCK + return self._make_block_response(cid_bytes, block_data, peer_protocol) + elif is_paid: + # Already paid with sufficient amount — serve it + logger.info(f"✅ Serving block (ALREADY PAID): {cid_str[:20]}... price={price} units") if want_type == 1: # WANT_HAVE return self._make_have(cid_bytes, peer_protocol) else: # WANT_BLOCK @@ -112,11 +197,16 @@ async def handle_want( else: # Payment required if peer_protocol == BITSWAP_PROTOCOL_V130: - return await self._make_payment_required_1_3( - peer_id, cid_bytes, block_size, price + logger.info( + f"💳 Payment required: {price} units for {cid_str[:20]}..." + ) + return self._make_payment_required_1_3( + peer_id, cid_bytes, pricing_size, price ) else: - # 1.2.0 fallback: send DONT_HAVE (side-channel is handled separately) + logger.warning( + f"⚠️ Payment required but peer on {peer_protocol}, sending DONT_HAVE" + ) if send_dont_have: return self._make_dont_have(cid_bytes, peer_protocol) return None @@ -124,95 +214,115 @@ async def handle_want( async def handle_payment_authorization( self, peer_id: str, - auth: Any, # pb_1_3.Message.PaymentAuthorization + auth: Any, # Message_1_3.PaymentAuthorization ) -> Message_1_3: """ - Process a PaymentAuthorization from a client. - Returns a PaymentReceipt or PaymentRejection message. + Process a PaymentAuthorization from a client (EIP-3009 signed payment). + Verifies the signature and serves the block if valid. """ - nonce = bytes(auth.nonce) + cid_bytes = bytes(auth.cid) + cid_str = cid_bytes.hex() + from_address = auth.from_address - # Validate against pending offer - offer = self._pending_offers.pop(nonce, None) - if offer is None: - logger.warning( - f"No pending offer for nonce {nonce.hex()[:10]}... " - f"from {peer_id[:20]}..." - ) - return self._make_payment_rejection(auth.cid, "NO_PENDING_OFFER") - - if offer["peer_id"] != peer_id: - return self._make_payment_rejection(auth.cid, "PEER_MISMATCH") - - # Check nonce replay - if self.ledger.is_nonce_used(nonce): - return self._make_payment_rejection(auth.cid, "NONCE_USED") - - # Check amount - if auth.value < offer["amount"]: - reason = f"WRONG_AMOUNT:need={offer['amount']},got={auth.value}" - return self._make_payment_rejection(auth.cid, reason) - - # Check expiry - if offer["valid_before"] < int(time.time()): - return self._make_payment_rejection(auth.cid, "EXPIRED") - - # Verify EIP-712 signature - result = await self.facilitator.verify( - from_address=auth.from_address, - to_address=auth.to_address, - value=auth.value, - valid_after=auth.valid_after, - valid_before=auth.valid_before, - nonce=nonce, - v=auth.v, - r=bytes(auth.r), - s=bytes(auth.s), + logger.warning("=" * 70) + logger.warning( + f"[STEP 6b] SERVER handle_payment_authorization: peer={peer_id[:20]}... " + f"cid={cid_str[:20]}... from={from_address[:12]}... value={auth.value}" ) + logger.warning("=" * 70) - if not result.valid: - return self._make_payment_rejection(auth.cid, result.error) + # Check if already paid (ledger hit — no need to re-verify) + cid_obj = parse_cid(cid_bytes) + block_data = await self.blockstore.get_block(cid_obj) - # Record payment in ledger - try: - await self.ledger.record_payment( - peer_id=peer_id, - cid=bytes(auth.cid), - tx_hash=result.tx_hash, - amount=auth.value, - nonce=nonce, - ) - except ValueError as e: - return self._make_payment_rejection(auth.cid, str(e)) + if block_data is None: + return self._make_payment_rejection(cid_bytes, "BLOCK_NOT_FOUND") - # Send PaymentReceipt + the block data - msg = Message_1_3() - receipt = msg.payment_receipts.add() - receipt.cid = bytes(auth.cid) - receipt.tx_hash = result.tx_hash - receipt.expires = int(time.time()) + 86400 * 7 # 7 days + block_size = len(block_data) + pricing_size = self._get_pricing_size(cid_str, block_size) + expected_price = self.pricing.compute_price(cid_str, pricing_size) - # Include the paid block in the response - block_data = await self.blockstore.get_block(parse_cid(bytes(auth.cid))) - if block_data is not None: - block_entry = msg.payload.add() - block_entry.prefix = bytes(auth.cid)[:4] - block_entry.data = block_data + # Check if already paid (ledger resolves child → root automatically) + if self.ledger.is_paid(peer_id, cid_str): + # Already in ledger with sufficient payment — serve immediately logger.info( - f"Payment accepted + block sent to {peer_id[:20]}... " - f"cid={bytes(auth.cid).hex()[:20]}... amount={auth.value} " - f"size={len(block_data)} bytes" + f"✅ Already paid (ledger hit): {cid_str[:20]}... " + f"block_size={block_size}B expected_price={expected_price}" + ) + return self._make_receipt_and_block(cid_bytes, "", block_data) + + # Validate payment amount matches expected price + if auth.value < expected_price: + error_msg = ( + f"INSUFFICIENT_PAYMENT: paid={auth.value}, " + f"expected={expected_price} for {block_size}B block" ) + logger.warning(f"❌ {error_msg}") + return self._make_payment_rejection(cid_bytes, error_msg) + + # Verify EIP-3009 signature + logger.warning("=" * 70) + logger.warning(f"[STEP 7] SERVER VERIFYING EIP-3009 SIGNATURE") + logger.warning(f" from={from_address[:20]}...") + logger.warning(f" to={auth.to_address[:20]}...") + logger.warning(f" value={auth.value} expected={expected_price}") + logger.warning(f" verifier={'configured' if self.tx_verifier is not None else 'NOT CONFIGURED (optimistic mode)'}") + logger.warning("=" * 70) + if self.tx_verifier is not None: + try: + # The tx_verifier is actually a FacilitatorClient for EIP-3009 + result = await self.tx_verifier.verify( + from_address=from_address, + to_address=auth.to_address, + value=auth.value, + valid_after=auth.valid_after, + valid_before=auth.valid_before, + nonce=bytes(auth.nonce), + v=auth.v, + r=bytes(auth.r), + s=bytes(auth.s), + ) + valid = result.valid + error = result.error + except Exception as e: + logger.error(f"[STEP 7] VERIFICATION EXCEPTION: {e}", exc_info=True) + valid, error = False, str(e) + + if not valid: + logger.warning("=" * 70) + logger.warning(f"[STEP 7] ❌ EIP-3009 VERIFICATION FAILED: {error}") + logger.warning("=" * 70) + return self._make_payment_rejection(cid_bytes, error or "INVALID_SIGNATURE") + else: + logger.warning(f"[STEP 7] ✅ EIP-3009 VERIFICATION PASSED") else: + # No verifier configured — optimistic mode: trust the authorization logger.warning( - f"Payment accepted but block not found locally: " - f"cid={bytes(auth.cid).hex()[:20]}..." + "[STEP 7] ⚠️ No payment verifier configured — accepting PaymentAuthorization optimistically" ) - logger.info( - f"Payment accepted from {peer_id[:20]}... " - f"cid={bytes(auth.cid).hex()[:20]}... amount={auth.value}" + + # Record payment in ledger + try: + await self.ledger.record_payment( + peer_id=peer_id, + cid=cid_bytes, + tx_hash="", # No on-chain tx for EIP-3009 + amount=auth.value, + nonce=bytes(auth.nonce), ) - return msg + except ValueError as e: + # Duplicate nonce — already recorded + logger.info(f"Payment already recorded: {e}") + + logger.warning("=" * 70) + logger.warning( + f"[STEP 8b] ✅ SERVER PAYMENT ACCEPTED — SENDING BLOCK TO CLIENT" + ) + logger.warning( + f" cid={cid_str[:20]}... value={auth.value} expected={expected_price} block_size={block_size}B (EIP-3009)" + ) + logger.warning("=" * 70) + return self._make_receipt_and_block(cid_bytes, "", block_data) async def process_incoming_1_3_message( self, peer_id: str, msg: Message_1_3 @@ -222,14 +332,46 @@ async def process_incoming_1_3_message( Returns a response message or None. """ if msg.payment_authorizations: - # Process the first authorization (typically one per message) for auth in msg.payment_authorizations: return await self.handle_payment_authorization(peer_id, auth) return None # ── Internal helpers ────────────────────────────────────────────────── - async def _make_payment_required_1_3( + def _get_pricing_size(self, cid_str: str, block_size: int) -> int: + """ + Get the size to use for pricing calculation. + + NEW PAYMENT MODEL: For root CIDs, use total DAG size. + For child CIDs, pricing is N/A (they inherit root payment). + + Args: + cid_str: The CID (hex string) + block_size: The actual block size + + Returns: + Size in bytes to use for pricing + """ + # Check if this is a registered DAG root + dag_info = self._dag_info.get(cid_str) + if dag_info: + # This is a root CID - use total DAG size for pricing + total_size = dag_info["total_size"] + logger.info( + f"💡 CID {cid_str[:20]}... is DAG root: " + f"block_size={block_size}B, total_size={total_size}B" + ) + return total_size + + # Not a registered root CID - use block size (backward compatibility) + # This handles: old files, single-block files, or child blocks + logger.debug( + f"CID {cid_str[:20]}... not a registered DAG root, " + f"using block_size={block_size}B for pricing" + ) + return block_size + + def _make_payment_required_1_3( self, peer_id: str, cid_bytes: bytes, @@ -237,17 +379,9 @@ async def _make_payment_required_1_3( amount: int, ) -> Message_1_3: """Build a 1.3.0 PaymentRequired message with embedded PaymentTerms.""" - nonce = os.urandom(32) - valid_before = int(time.time()) + 120 # 2 minute window - - # Store pending offer for when PaymentAuthorization arrives - self._pending_offers[nonce] = { - "peer_id": peer_id, - "cid": cid_bytes, - "amount": amount, - "valid_before": valid_before, - } - + import secrets + import time + msg = Message_1_3() # BlockPresence with type=2 (PaymentRequired) @@ -255,34 +389,61 @@ async def _make_payment_required_1_3( presence.cid = cid_bytes presence.type = Message_1_3.BlockPresenceType.PaymentRequired # = 2 - # PaymentTerms in field 6 + # PaymentTerms — all fields including nonce, valid_before, scheme terms = msg.payment_terms.add() terms.cid = cid_bytes - terms.asset = self.facilitator.usdc_address if self.facilitator else "" + terms.asset = self.asset terms.pay_to = self.server_wallet terms.amount = amount - terms.network = getattr(self.facilitator, "network", "base-sepolia") - terms.nonce = nonce - terms.valid_before = valid_before + terms.network = self.network + terms.nonce = secrets.token_bytes(32) # Server generates nonce + terms.valid_before = int(time.time()) + 3600 # 1 hour expiry terms.block_size = block_size - terms.description = f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB)" - terms.scheme = "exact" + terms.description = ( + f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB) — " + f"pay {amount} wei to {self.server_wallet[:10]}..." + ) + terms.scheme = "EIP3009" # Payment scheme logger.info( - f"Sending PaymentRequired to {peer_id[:20]}... " - f"cid={cid_bytes.hex()[:20]}... amount={amount} units" + f"📤 PaymentRequired → {peer_id[:20]}... " + f"cid={cid_bytes.hex()[:20]}... amount={amount} asset={self.asset}" ) return msg + def _make_receipt_and_block( + self, cid_bytes: bytes, tx_hash: str, block_data: bytes + ) -> Message_1_3: + """Build a PaymentReceipt + block payload message.""" + msg = Message_1_3() + + receipt = msg.payment_receipts.add() + receipt.cid = cid_bytes + receipt.tx_hash = tx_hash or "" + receipt.expires = int(time.time()) + 86400 * 7 # 7 days + + block_entry = msg.payload.add() + block_entry.prefix = cid_bytes[:4] + block_entry.data = block_data + + return msg + + def _make_payment_rejection(self, cid_bytes: bytes, reason: str) -> Message_1_3: + msg = Message_1_3() + rejection = msg.payment_rejections.add() + rejection.cid = cid_bytes + rejection.reason = reason + return msg + def _make_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1_2: MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 msg = MsgClass() presence = msg.blockPresences.add() presence.cid = cid_bytes if protocol == BITSWAP_PROTOCOL_V130: - presence.type = Message_1_3.BlockPresenceType.Have # = 0 + presence.type = Message_1_3.BlockPresenceType.Have else: - presence.type = Message_1_2.BlockPresenceType.Have # = 0 + presence.type = Message_1_2.BlockPresenceType.Have return msg def _make_dont_have( @@ -293,9 +454,9 @@ def _make_dont_have( presence = msg.blockPresences.add() presence.cid = cid_bytes if protocol == BITSWAP_PROTOCOL_V130: - presence.type = Message_1_3.BlockPresenceType.DontHave # = 1 + presence.type = Message_1_3.BlockPresenceType.DontHave else: - presence.type = Message_1_2.BlockPresenceType.DontHave # = 1 + presence.type = Message_1_2.BlockPresenceType.DontHave return msg def _make_block_response( @@ -304,21 +465,38 @@ def _make_block_response( MsgClass = Message_1_3 if protocol == BITSWAP_PROTOCOL_V130 else Message_1_2 msg = MsgClass() block = msg.payload.add() + block.prefix = cid_bytes[:4] block.data = block_data - # CID prefix: first 4 bytes of CID bytes (version + codec) - block.prefix = cid_bytes[:4] if len(cid_bytes) >= 4 else cid_bytes return msg - def _make_payment_rejection(self, cid_bytes: bytes, reason: str) -> Message_1_3: - msg = Message_1_3() - rej = msg.payment_rejections.add() - rej.cid = bytes(cid_bytes) - rej.reason = reason - logger.warning( - f"Payment rejected: cid={bytes(cid_bytes).hex()[:20]}... reason={reason}" - ) - return msg + def _get_pricing_size(self, cid_str: str, block_size: int) -> int: + """ + Get the size to use for pricing calculations. + + If this CID is part of a registered DAG, return the total DAG size. + Otherwise, return the individual block size. + + Args: + cid_str: The CID being priced + block_size: The individual block size + + Returns: + Size in bytes to use for pricing + """ + # Check if this is a registered root CID + if cid_str in self._dag_info: + total_size = self._dag_info[cid_str]["total_size"] + logger.debug( + f"Using DAG total size for pricing: {cid_str[:20]}... " + f"total={total_size}B (not block={block_size}B)" + ) + return total_size + + # Not a registered DAG, use individual block size + return block_size + +# ── CID helpers ─────────────────────────────────────────────────────────────── def _cid_to_str(cid: str | bytes) -> str: if isinstance(cid, bytes): @@ -328,9 +506,8 @@ def _cid_to_str(cid: str | bytes) -> str: def _cid_to_bytes(cid: str | bytes) -> bytes: if isinstance(cid, str): - # Try hex decode first try: - return bytes.fromhex(cid.lstrip("0x")) + return bytes.fromhex(cid) except ValueError: return cid.encode() return cid diff --git a/libp2p/bitswap/payment_client_1_3.py b/libp2p/bitswap/payment_client_1_3.py index 7cab28593..3940c31a8 100644 --- a/libp2p/bitswap/payment_client_1_3.py +++ b/libp2p/bitswap/payment_client_1_3.py @@ -13,6 +13,7 @@ from collections.abc import Callable import logging +import time from typing import Any from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 @@ -20,7 +21,7 @@ logger = logging.getLogger(__name__) # Default maximum auto-pay threshold: $0.001 USDC = 1000 micro-units -DEFAULT_MAX_AUTO_PAY_UNITS = 1000 +DEFAULT_MAX_AUTO_PAY_UNITS = 1000000 class BitswapPaymentClient_1_3: @@ -33,7 +34,7 @@ class BitswapPaymentClient_1_3: Args: signer: An EIP3009Signer instance (gooseswarm.payments.eip3009_signer) want_manager: Object with retry_want_block(peer_id, cid) async method - max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) + max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $1.00) send_callback: Async function(peer_id, msg_bytes) to send responses """ @@ -42,18 +43,22 @@ def __init__( self, signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer want_manager: Any, # has retry_want_block(peer_id, cid) method - max_auto_pay_usdc: float = 0.001, + max_auto_pay_usdc: float = 1.0, send_callback: Callable[..., Any] | None = None, ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) ): self.signer = signer self.want_manager = want_manager - self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) + self.max_auto_pay_units = int(max_auto_pay_usdc * 1000000) self.send_callback = send_callback self.ledger = ledger # Pending payments: nonce_hex → {peer_id, cid, amount} self._pending_payments: dict[str, dict[str, Any]] = {} + + # Server pricing config: peer_id → {units_per_kb, last_updated} + # This is learned from PaymentTerms messages + self._server_pricing: dict[str, dict[str, Any]] = {} async def process_incoming_message( self, peer_id: str, msg: Message_1_3 @@ -125,26 +130,46 @@ async def _handle_payment_terms( Decide whether to pay and send back a PaymentAuthorization. """ amount = terms.amount + block_size = terms.block_size + + logger.warning("=" * 70) + logger.warning(f"[STEP 3b] CLIENT EVALUATING PAYMENT TERMS from {peer_id[:20]}...") + logger.warning(f" amount={amount} units max_auto_pay={self.max_auto_pay_units} units") + logger.warning(f" block_size={block_size}B asset={terms.asset} scheme={terms.scheme}") + logger.warning("=" * 70) + + # Learn server's pricing from the PaymentTerms + # The server includes its units_per_kb in the pricing calculation + self._update_server_pricing(peer_id, amount, block_size) # Reject if too expensive if amount > self.max_auto_pay_units: - logger.info( - f"Block too expensive: {amount} units > " + logger.warning( + f"[STEP 3b] ❌ PAYMENT REJECTED (too expensive): {amount} units > " f"max {self.max_auto_pay_units} units. " f"Skipping — will seek block elsewhere." ) return None - # Validate pricing isn't a lie (10% tolerance) - expected_amount = self._expected_price(terms.block_size) - if expected_amount > 0 and amount > expected_amount * 1.1: + # Validate pricing consistency using learned server config + if not self._validate_pricing(peer_id, amount, block_size): logger.warning( - f"Server overcharging: asked {amount}, expected ~{expected_amount}. " - f"Skipping payment." + f"[STEP 3b] ❌ PAYMENT REJECTED (pricing validation failed) for {block_size}B block from {peer_id[:20]}... " + f"Server asked {amount} units. Skipping payment." ) return None + + logger.warning(f"[STEP 3b] ✅ Payment terms accepted — proceeding to sign EIP-3009") # Sign EIP-3009 authorization + logger.warning("=" * 70) + logger.warning(f"[STEP 4] CLIENT SIGNING EIP-3009 AUTHORIZATION") + logger.warning(f" to={terms.pay_to[:20]}...") + logger.warning(f" value={amount} units") + logger.warning(f" nonce={bytes(terms.nonce).hex()[:20]}...") + logger.warning(f" valid_before={terms.valid_before}") + logger.warning(f" signer_address={getattr(self.signer, 'address', 'N/A')}") + logger.warning("=" * 70) try: v, r, s = self.signer.sign_transfer_authorization( to=terms.pay_to, @@ -152,8 +177,9 @@ async def _handle_payment_terms( nonce=bytes(terms.nonce), valid_before=terms.valid_before, ) + logger.warning(f"[STEP 4] EIP-3009 SIGNATURE CREATED: v={v} r_len={len(r)} s_len={len(s)}") except Exception as e: - logger.error(f"Failed to sign payment authorization: {e}") + logger.error(f"[STEP 4] FAILED TO SIGN EIP-3009 AUTHORIZATION: {e}", exc_info=True) return None # Build PaymentAuthorization message @@ -193,8 +219,9 @@ async def _handle_payment_terms( logger.info( f"Sending PaymentAuthorization to {peer_id[:20]}... " - f"cid={bytes(terms.cid).hex()[:20]}... amount={amount} units " - f"(${amount / 1_000_000:.6f} USDC)" + f"cid={bytes(terms.cid).hex()[:20]}... " + f"amount={amount} units (${amount / 1_000_000:.6f} USDC) " + f"for {terms.block_size}B block" ) return response @@ -227,12 +254,90 @@ def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: f"cid={cid_hex[:20]}... reason={rejection.reason}" ) - def _expected_price(self, block_size_bytes: int) -> int: + def _update_server_pricing(self, peer_id: str, amount: int, block_size: int) -> None: + """ + Learn the server's pricing configuration from PaymentTerms. + + The server calculates: price = max(1, int(block_size_kb * units_per_kb)) + We can reverse-engineer units_per_kb from the amount and block_size. + """ + if amount == 0 or block_size == 0: + return # Free block, no pricing info to learn + + # Calculate implied units_per_kb from this payment request + kb = block_size / 1024 + if kb > 0: + implied_units_per_kb = amount / kb + + # Store or update the pricing config for this peer + if peer_id not in self._server_pricing: + self._server_pricing[peer_id] = { + "units_per_kb": implied_units_per_kb, + "last_updated": time.time(), + "sample_count": 1, + } + logger.info( + f"Learned pricing from {peer_id[:20]}...: " + f"{implied_units_per_kb:.2f} units/KB" + ) + else: + # Average with existing samples for stability + config = self._server_pricing[peer_id] + old_rate = config["units_per_kb"] + sample_count = config["sample_count"] + new_rate = (old_rate * sample_count + implied_units_per_kb) / (sample_count + 1) + config["units_per_kb"] = new_rate + config["sample_count"] = sample_count + 1 + config["last_updated"] = time.time() + + # Warn if pricing changed significantly (>20%) + if abs(new_rate - old_rate) / old_rate > 0.2: + logger.warning( + f"Server {peer_id[:20]}... pricing changed: " + f"{old_rate:.2f} → {new_rate:.2f} units/KB" + ) + + def _validate_pricing(self, peer_id: str, amount: int, block_size: int) -> bool: + """ + Validate that the server's price request is consistent with its learned pricing. + + Returns True if pricing is acceptable, False if suspicious. + """ + if amount == 0: + return True # Free blocks are always acceptable + + # If we haven't learned pricing yet, accept this first payment + if peer_id not in self._server_pricing: + return True + + config = self._server_pricing[peer_id] + units_per_kb = config["units_per_kb"] + + # Calculate expected price using learned pricing + kb = block_size / 1024 + expected = max(1, int(kb * units_per_kb)) + + # Allow 20% tolerance for rounding and small variations + tolerance = 0.2 + min_acceptable = expected * (1 - tolerance) + max_acceptable = expected * (1 + tolerance) + + if amount < min_acceptable or amount > max_acceptable: + logger.warning( + f"Pricing inconsistency detected: " + f"expected {expected} units (±{tolerance*100}%), got {amount} units " + f"for {block_size}B block ({kb:.3f} KB) " + f"using learned rate {units_per_kb:.2f} units/KB" + ) + return False + + return True + + def get_server_pricing(self, peer_id: str) -> dict[str, Any] | None: """ - Client-side price oracle — must roughly match server pricing. - Used to detect overcharging. + Get the learned pricing configuration for a peer. + + Returns: + Dict with units_per_kb, last_updated, sample_count, or None if not learned yet. """ - if block_size_bytes <= 4096: - return 0 - kb = block_size_bytes / 1024 - return int(kb * 10) # 10 units per KB baseline + return self._server_pricing.get(peer_id) diff --git a/libp2p/bitswap/payment_client_1_3.py.backup b/libp2p/bitswap/payment_client_1_3.py.backup new file mode 100644 index 000000000..5865d635b --- /dev/null +++ b/libp2p/bitswap/payment_client_1_3.py.backup @@ -0,0 +1,455 @@ +""" +Bitswap 1.3.0 Payment Client. + +Client-side handler for in-band payment messages. When the server sends +a PAYMENT_REQUIRED response with PaymentTerms, this client: +1. Validates the price is acceptable +2. Signs an EIP-3009 USDC transferWithAuthorization +3. Sends back a PaymentAuthorization with signature +4. On receipt of PaymentReceipt, the block is served + +Proto alignment: + PaymentTerms → fields: cid, asset, pay_to, amount, network, block_size, description, nonce, valid_before, scheme + PaymentAuthorization → fields: cid, from_address, to_address, value, valid_after, valid_before, nonce, v, r, s, scheme + PaymentReceipt → fields: cid, tx_hash, expires + PaymentRejection → fields: cid, reason + +This module lives in py-libp2p so it's importable as libp2p.bitswap. +""" + +from collections.abc import Callable +import logging +from typing import Any + +from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 + +logger = logging.getLogger(__name__) + +# Default maximum auto-pay threshold: $0.001 USDC = 1000 micro-units +DEFAULT_MAX_AUTO_PAY_UNITS = 1000 + + +class BitswapPaymentClient_1_3: + """ + Client-side handler for Bitswap 1.3.0 payment messages. + + Processes PaymentTerms from incoming messages and auto-pays if the + amount is within the configured threshold using USDC EIP-3009. + + Args: + signer: An EIP3009Signer instance (payments.eip3009_signer) + want_manager: Object with retry_want_block(peer_id, cid) async method + max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) + send_callback: Async function(peer_id, msg_bytes) to send responses + ledger: Optional PaymentLedger for tracking spent payments + """ + + def __init__( + self, + signer: Any, # payments.eip3009_signer.EIP3009Signer + want_manager: Any, # has retry_want_block(peer_id, cid) method + max_auto_pay_usdc: float = 0.001, + send_callback: Callable[..., Any] | None = None, + ledger: Any = None, + ): + self.signer = signer + self.want_manager = want_manager + self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) + self.send_callback = send_callback + self.ledger = ledger + + # Pending payments: nonce_hex → {peer_id, cid, amount} + self._pending_payments: dict[str, dict[str, Any]] = {} + + async def process_incoming_message( + self, peer_id: str, msg: Message_1_3 + ) -> Message_1_3 | None: + """ + Called by the Bitswap dispatcher for every incoming 1.3.0 message. + + Handles: + - PaymentTerms → sign and send PaymentAuthorization + - PaymentReceipts → retry WANT_BLOCK + - PaymentRejections → log and surface to application + + Returns a response Message to send back, or None. + """ + # Handle payment terms (server telling us what a block costs) + if msg.payment_terms: + for terms in msg.payment_terms: + response = await self._handle_payment_terms(peer_id, terms) + if response: + return response + + # Handle receipts (server confirming our payment) + for receipt in msg.payment_receipts: + await self._handle_payment_receipt(peer_id, receipt) + + # Handle rejections + for rejection in msg.payment_rejections: + self._handle_payment_rejection(peer_id, rejection) + + return None + + async def build_payment_auth_msg( + self, + terms: Any, # Message_1_3.PaymentTerms + ) -> Message_1_3: + """ + Build a PaymentAuthorization message for the given PaymentTerms. + Used by tests and demo scripts. + """ + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=terms.amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + + msg = Message_1_3() + auth = msg.payment_authorizations.add() + auth.cid = bytes(terms.cid) + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = terms.amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + return msg + + # ── Internal handlers ───────────────────────────────────────────────── + + async def _handle_payment_terms( + self, peer_id: str, terms: Any + ) -> Message_1_3 | None: + """ + Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. + Decide whether to pay and send back a PaymentAuthorization. + """ + amount = terms.amount + cid_bytes = bytes(terms.cid) + cid_hex = cid_bytes.hex() + + logger.info( + f"💳 PaymentTerms from {peer_id[:20]}... " + f"cid={cid_hex[:20]}... amount={amount} units " + f"(${amount / 1_000_000:.6f} USDC) pay_to={terms.pay_to[:12]}..." + ) + + # Reject if too expensive + if amount > self.max_auto_pay_units: + logger.info( + f"Block too expensive: {amount} units > " + f"max {self.max_auto_pay_units} units. " + f"Skipping — will seek block elsewhere." + ) + return None + + # Validate pricing isn't a lie (10% tolerance) + expected_amount = self._expected_price(terms.block_size) + if expected_amount > 0 and amount > expected_amount * 1.1: + logger.warning( + f"Server overcharging: asked {amount}, expected ~{expected_amount}. " + f"Skipping payment." + ) + return None + + # Sign EIP-3009 authorization + try: + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + except Exception as e: + logger.error(f"Failed to sign payment authorization: {e}") + return None + + # Build PaymentAuthorization message + response = Message_1_3() + auth = response.payment_authorizations.add() + auth.cid = cid_bytes + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + + # Track pending payment + nonce_hex = bytes(terms.nonce).hex() + self._pending_payments[nonce_hex] = { + "peer_id": peer_id, + "cid": cid_hex, + "amount": amount, + } + + # Persist spent payment to ledger + if self.ledger is not None: + try: + self.ledger.record_spent_payment( + peer_id=peer_id, + cid=cid_bytes, + amount=amount, + nonce=bytes(terms.nonce), + ) + except Exception as _e: + logger.warning(f"Failed to persist spent payment: {_e}") + + logger.info( + f"Sending PaymentAuthorization to {peer_id[:20]}... " + f"cid={cid_hex[:20]}... amount={amount} units " + f"(${amount / 1_000_000:.6f} USDC)" + ) + return response + + def _expected_price(self, block_size: int) -> int: + """Compute expected price in USDC micro-units for a block of given size.""" + # Simple pricing: $0.000001 per KB + kb = block_size / 1024 + return int(kb * 1) # 1 micro-unit per KB + + async def _handle_payment_receipt(self, peer_id: str, receipt: Any) -> None: + """Server confirmed payment. Retry the WANT_BLOCK immediately.""" + cid_hex = ( + bytes(receipt.cid).hex() if isinstance(receipt.cid, bytes) else receipt.cid + ) + logger.info( + f"Payment receipt received from {peer_id[:20]}... " + f"cid={cid_hex[:20]}... " + f"tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." + ) + # Trigger want manager to retry + if self.want_manager: + try: + await self.want_manager.retry_want_block(peer_id, cid_hex) + except Exception as e: + logger.error(f"Failed to retry want block: {e}") + + def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: + """Log and surface payment rejection.""" + cid_hex = ( + bytes(rejection.cid).hex() + if isinstance(rejection.cid, bytes) + else rejection.cid + ) + logger.warning( + f"❌ PaymentRejection from {peer_id[:20]}... " + f"cid={cid_hex[:20]}... reason={rejection.reason}" + ) + + Args: + signer: An EIP3009Signer instance (gooseswarm.payments.eip3009_signer) + want_manager: Object with retry_want_block(peer_id, cid) async method + max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) + send_callback: Async function(peer_id, msg_bytes) to send responses + + """ + + def __init__( + self, + signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer + want_manager: Any, # has retry_want_block(peer_id, cid) method + max_auto_pay_usdc: float = 0.001, + send_callback: Callable[..., Any] | None = None, + ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) + ): + self.signer = signer + self.want_manager = want_manager + self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) + self.send_callback = send_callback + self.ledger = ledger + + # Pending payments: nonce_hex → {peer_id, cid, amount} + self._pending_payments: dict[str, dict[str, Any]] = {} + + async def process_incoming_message( + self, peer_id: str, msg: Message_1_3 + ) -> Message_1_3 | None: + """ + Called by the Bitswap dispatcher for every incoming 1.3.0 message. + + Handles: + - PaymentTerms → sign and send PaymentAuthorization + - PaymentReceipts → retry WANT_BLOCK + - PaymentRejections → log and surface to application + + Returns a response Message to send back, or None. + """ + # Handle payment terms (server telling us what a block costs) + if msg.payment_terms: + for terms in msg.payment_terms: + response = await self._handle_payment_terms(peer_id, terms) + if response: + return response + + # Handle receipts (server confirming our payment) + for receipt in msg.payment_receipts: + await self._handle_payment_receipt(peer_id, receipt) + + # Handle rejections + for rejection in msg.payment_rejections: + self._handle_payment_rejection(peer_id, rejection) + + return None + + async def build_payment_auth_msg( + self, + terms: Any, # Message_1_3.PaymentTerms + ) -> Message_1_3: + """ + Build a PaymentAuthorization message for the given PaymentTerms. + Used by tests and demo scripts. + """ + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=terms.amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + + msg = Message_1_3() + auth = msg.payment_authorizations.add() + auth.cid = bytes(terms.cid) + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = terms.amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + return msg + + # ── Internal handlers ───────────────────────────────────────────────── + + async def _handle_payment_terms( + self, peer_id: str, terms: Any + ) -> Message_1_3 | None: + """ + Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. + Decide whether to pay and send back a PaymentAuthorization. + """ + amount = terms.amount + + # Reject if too expensive + if amount > self.max_auto_pay_units: + logger.info( + f"Block too expensive: {amount} units > " + f"max {self.max_auto_pay_units} units. " + f"Skipping — will seek block elsewhere." + ) + return None + + # Validate pricing isn't a lie (10% tolerance) + expected_amount = self._expected_price(terms.block_size) + if expected_amount > 0 and amount > expected_amount * 1.1: + logger.warning( + f"Server overcharging: asked {amount}, expected ~{expected_amount}. " + f"Skipping payment." + ) + return None + + # Sign EIP-3009 authorization + try: + v, r, s = self.signer.sign_transfer_authorization( + to=terms.pay_to, + value=amount, + nonce=bytes(terms.nonce), + valid_before=terms.valid_before, + ) + except Exception as e: + logger.error(f"Failed to sign payment authorization: {e}") + return None + + # Build PaymentAuthorization message + response = Message_1_3() + auth = response.payment_authorizations.add() + auth.cid = bytes(terms.cid) + auth.from_address = self.signer.address + auth.to_address = terms.pay_to + auth.value = amount + auth.valid_after = 0 + auth.valid_before = terms.valid_before + auth.nonce = bytes(terms.nonce) + auth.v = v + auth.r = r + auth.s = s + auth.scheme = terms.scheme + + # Track pending payment + nonce_hex = bytes(terms.nonce).hex() + self._pending_payments[nonce_hex] = { + "peer_id": peer_id, + "cid": bytes(terms.cid).hex(), + "amount": amount, + } + + # Persist spent payment to ledger + if self.ledger is not None: + try: + self.ledger.record_spent_payment( + peer_id=peer_id, + cid=bytes(terms.cid), + amount=amount, + nonce=bytes(terms.nonce), + ) + except Exception as _e: + logger.warning(f"Failed to persist spent payment: {_e}") + + logger.info( + f"Sending PaymentAuthorization to {peer_id[:20]}... " + f"cid={bytes(terms.cid).hex()[:20]}... amount={amount} units " + f"(${amount / 1_000_000:.6f} USDC)" + ) + return response + + async def _handle_payment_receipt(self, peer_id: str, receipt: Any) -> None: + """Server confirmed payment. Retry the WANT_BLOCK immediately.""" + cid_hex = ( + bytes(receipt.cid).hex() if isinstance(receipt.cid, bytes) else receipt.cid + ) + logger.info( + f"Payment receipt received from {peer_id[:20]}... " + f"cid={cid_hex[:20]}... " + f"tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." + ) + # Trigger want manager to retry + if self.want_manager: + try: + await self.want_manager.retry_want_block(peer_id, cid_hex) + except Exception as e: + logger.error(f"Failed to retry want block: {e}") + + def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: + """Log and surface payment rejection.""" + cid_hex = ( + bytes(rejection.cid).hex() + if isinstance(rejection.cid, bytes) + else rejection.cid + ) + logger.warning( + f"Payment rejected by {peer_id[:20]}... " + f"cid={cid_hex[:20]}... reason={rejection.reason}" + ) + + def _expected_price(self, block_size_bytes: int) -> int: + """ + Client-side price oracle — must roughly match server pricing. + Used to detect overcharging. + """ + if block_size_bytes <= 4096: + return 0 + kb = block_size_bytes / 1024 + return int(kb * 10) # 10 units per KB baseline diff --git a/libp2p/bitswap/payment_ledger.py b/libp2p/bitswap/payment_ledger.py new file mode 100644 index 000000000..f2e9baf55 --- /dev/null +++ b/libp2p/bitswap/payment_ledger.py @@ -0,0 +1,278 @@ +""" +Payment Ledger for Bitswap 1.3.0 - Root CID Payment Tracking. + +Tracks payments at the root CID level, not per-block. When a peer pays for +a root CID, all child blocks (chunks) in the DAG are automatically accessible. + +Design: +- Payment records: (peer_id, root_cid) → {amount, nonce, timestamp, tx_hash} +- Root CID mapping: (child_cid) → root_cid (for chunk → root resolution) +- Nonce deduplication: Prevents replay attacks +""" + +import logging +import time +from typing import Any + +logger = logging.getLogger(__name__) + + +class PaymentLedger: + """ + Tracks root CID payments for Bitswap 1.3.0. + + When a peer pays for a root CID, they gain access to all blocks in that DAG. + This prevents charging separately for each chunk of a multi-block file. + + Example: + >>> ledger = PaymentLedger() + >>> + >>> # Register a DAG structure (root → children mapping) + >>> await ledger.register_dag( + ... root_cid="bafyroot123...", + ... child_cids=["bafychild1...", "bafychild2...", ...] + ... ) + >>> + >>> # Record payment for root CID + >>> await ledger.record_payment( + ... peer_id="12D3Koo...", + ... cid=b"\\x01\\x55...", # Can be root or child CID + ... amount=1000000, # 1 USDC in micro-units + ... nonce=b"\\x12\\x34...", + ... ) + >>> + >>> # Check if peer has paid (works for root OR child CIDs) + >>> ledger.is_paid("12D3Koo...", "bafychild1...") # True (child of paid root) + >>> ledger.is_paid("12D3Koo...", "bafyroot123...") # True (root itself) + """ + + def __init__(self): + # Payment records: (peer_id, root_cid_hex) → payment_info + self._payments: dict[tuple[str, str], dict[str, Any]] = {} + + # Child → Root mapping: child_cid_hex → root_cid_hex + # Used to resolve chunk CIDs to their root CID + self._cid_to_root: dict[str, str] = {} + + # Nonce registry: nonce_hex → (peer_id, cid_hex, timestamp) + # Prevents replay attacks (same nonce can't be used twice) + self._used_nonces: dict[str, tuple[str, str, float]] = {} + + # Free CIDs: Set of CID hashes that are always free (no payment required) + self._free_cids: set[str] = set() + + async def register_dag( + self, + root_cid: str | bytes, + child_cids: list[str | bytes], + ) -> None: + """ + Register a DAG structure so child blocks inherit root payment status. + + Args: + root_cid: The root CID of the DAG (hex string or bytes) + child_cids: List of child/chunk CIDs in the DAG + + Example: + >>> # After chunking a file into blocks + >>> await ledger.register_dag( + ... root_cid=root_cid, + ... child_cids=[chunk1_cid, chunk2_cid, ...] + ... ) + """ + root_hex = _cid_to_hex(root_cid) + + for child_cid in child_cids: + child_hex = _cid_to_hex(child_cid) + self._cid_to_root[child_hex] = root_hex + logger.debug(f"Registered child {child_hex[:20]}... → root {root_hex[:20]}...") + + logger.info( + f"Registered DAG: root={root_hex[:20]}... with {len(child_cids)} children" + ) + + def mark_free(self, cid: str | bytes) -> None: + """ + Mark a CID as free (no payment required). + + Args: + cid: The CID to mark as free (hex string or bytes) + """ + cid_hex = _cid_to_hex(cid) + self._free_cids.add(cid_hex) + logger.info(f"Marked CID as FREE: {cid_hex[:20]}...") + + def is_free(self, cid: str | bytes) -> bool: + """ + Check if a CID is marked as free. + + Args: + cid: The CID to check (hex string or bytes) + + Returns: + True if the CID is free, False otherwise + """ + cid_hex = _cid_to_hex(cid) + root_hex = self._cid_to_root.get(cid_hex, cid_hex) + return cid_hex in self._free_cids or root_hex in self._free_cids + + def is_paid( + self, + peer_id: str, + cid: str | bytes, + block_size: int = 0, # Ignored (kept for backward compatibility) + ) -> bool: + """ + Check if a peer has paid for a CID (root or child). + + Resolves child CIDs to their root CID automatically. + + Args: + peer_id: The peer ID to check + cid: The CID to check (can be root or child CID) + block_size: Ignored (kept for backward compatibility with old API) + + Returns: + True if the peer has paid for this CID (or its root), False otherwise + """ + cid_hex = _cid_to_hex(cid) + + # Check if it's a free CID + if self.is_free(cid_hex): + return True + + # Resolve to root CID if this is a child + root_hex = self._cid_to_root.get(cid_hex, cid_hex) + + # Check if payment exists for (peer, root) + key = (peer_id, root_hex) + paid = key in self._payments + + if paid: + payment = self._payments[key] + logger.debug( + f"✅ Payment found: peer={peer_id[:20]}... " + f"cid={cid_hex[:20]}... root={root_hex[:20]}... " + f"amount={payment['amount']}" + ) + else: + logger.debug( + f"❌ No payment: peer={peer_id[:20]}... " + f"cid={cid_hex[:20]}... root={root_hex[:20]}..." + ) + + return paid + + async def record_payment( + self, + peer_id: str, + cid: str | bytes, + amount: int, + nonce: bytes, + tx_hash: str = "", + ) -> None: + """ + Record a payment for a root CID. + + Args: + peer_id: The peer who paid + cid: The CID being paid for (root or child - will resolve to root) + amount: Payment amount in micro-units (e.g., USDC micro-units) + nonce: Unique nonce for this payment (prevents replay attacks) + tx_hash: Optional transaction hash (empty for EIP-3009) + + Raises: + ValueError: If the nonce has already been used + """ + cid_hex = _cid_to_hex(cid) + nonce_hex = nonce.hex() + + # Check for nonce reuse (replay attack prevention) + if nonce_hex in self._used_nonces: + existing = self._used_nonces[nonce_hex] + raise ValueError( + f"Nonce already used: {nonce_hex[:20]}... " + f"by peer={existing[0][:20]}... for cid={existing[1][:20]}..." + ) + + # Resolve to root CID + root_hex = self._cid_to_root.get(cid_hex, cid_hex) + + # Record payment + key = (peer_id, root_hex) + self._payments[key] = { + "amount": amount, + "nonce": nonce_hex, + "tx_hash": tx_hash, + "timestamp": time.time(), + } + + # Mark nonce as used + self._used_nonces[nonce_hex] = (peer_id, root_hex, time.time()) + + logger.info( + f"💰 Payment recorded: peer={peer_id[:20]}... " + f"root={root_hex[:20]}... amount={amount} " + f"nonce={nonce_hex[:16]}..." + ) + + def get_payment( + self, + peer_id: str, + cid: str | bytes, + ) -> dict[str, Any] | None: + """ + Get payment details for a peer and CID. + + Args: + peer_id: The peer ID + cid: The CID (root or child) + + Returns: + Payment info dict with keys: amount, nonce, tx_hash, timestamp + or None if no payment found + """ + cid_hex = _cid_to_hex(cid) + root_hex = self._cid_to_root.get(cid_hex, cid_hex) + key = (peer_id, root_hex) + return self._payments.get(key) + + def clear_old_nonces(self, max_age_seconds: float = 86400) -> int: + """ + Clear nonces older than max_age_seconds (default: 24 hours). + + Returns: + Number of nonces cleared + """ + now = time.time() + old_nonces = [ + nonce_hex + for nonce_hex, (_, _, timestamp) in self._used_nonces.items() + if now - timestamp > max_age_seconds + ] + + for nonce_hex in old_nonces: + del self._used_nonces[nonce_hex] + + if old_nonces: + logger.info(f"Cleared {len(old_nonces)} old nonces (>{max_age_seconds}s)") + + return len(old_nonces) + + +# ── Helper functions ────────────────────────────────────────────────────────── + +def _cid_to_hex(cid: str | bytes) -> str: + """Convert CID to hex string for consistent storage.""" + if isinstance(cid, bytes): + return cid.hex() + elif isinstance(cid, str): + # If already hex, return as-is; otherwise try to decode + try: + bytes.fromhex(cid) + return cid + except ValueError: + # Assume it's a base58/base32 encoded CID string + return cid.encode().hex() + else: + raise TypeError(f"CID must be str or bytes, got {type(cid)}") diff --git a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi index 1258ace8d..08759a2a3 100644 --- a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi +++ b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi @@ -2,12 +2,13 @@ from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import Any as _Any, ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union +from collections.abc import Iterable as _Iterable, Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor class Message(_message.Message): - __slots__ = ("wantlist", "blocks", "payload", "blockPresences", "pendingBytes", "payment_terms", "payment_authorizations", "payment_receipts", "payment_rejections") + __slots__ = ("wantlist", "blocks", "payload", "blockPresences", "pendingBytes", "payment_terms", "tx_receipts", "payment_receipts", "payment_rejections") class BlockPresenceType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () Have: _ClassVar[Message.BlockPresenceType] @@ -41,7 +42,7 @@ class Message(_message.Message): FULL_FIELD_NUMBER: _ClassVar[int] entries: _containers.RepeatedCompositeFieldContainer[Message.Wantlist.Entry] full: bool - def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping[str, _Any]]]] = ..., full: bool = ...) -> None: ... + def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping]]] = ..., full: bool = ...) -> None: ... class Block(_message.Message): __slots__ = ("prefix", "data") PREFIX_FIELD_NUMBER: _ClassVar[int] @@ -57,53 +58,39 @@ class Message(_message.Message): type: Message.BlockPresenceType def __init__(self, cid: _Optional[bytes] = ..., type: _Optional[_Union[Message.BlockPresenceType, str]] = ...) -> None: ... class PaymentTerms(_message.Message): - __slots__ = ("cid", "asset", "pay_to", "amount", "network", "nonce", "valid_before", "block_size", "description", "scheme") + __slots__ = ("cid", "asset", "pay_to", "amount", "network", "block_size", "description") CID_FIELD_NUMBER: _ClassVar[int] ASSET_FIELD_NUMBER: _ClassVar[int] PAY_TO_FIELD_NUMBER: _ClassVar[int] AMOUNT_FIELD_NUMBER: _ClassVar[int] NETWORK_FIELD_NUMBER: _ClassVar[int] - NONCE_FIELD_NUMBER: _ClassVar[int] - VALID_BEFORE_FIELD_NUMBER: _ClassVar[int] BLOCK_SIZE_FIELD_NUMBER: _ClassVar[int] DESCRIPTION_FIELD_NUMBER: _ClassVar[int] - SCHEME_FIELD_NUMBER: _ClassVar[int] cid: bytes asset: str pay_to: str amount: int network: str - nonce: bytes - valid_before: int block_size: int description: str - scheme: str - def __init__(self, cid: _Optional[bytes] = ..., asset: _Optional[str] = ..., pay_to: _Optional[str] = ..., amount: _Optional[int] = ..., network: _Optional[str] = ..., nonce: _Optional[bytes] = ..., valid_before: _Optional[int] = ..., block_size: _Optional[int] = ..., description: _Optional[str] = ..., scheme: _Optional[str] = ...) -> None: ... - class PaymentAuthorization(_message.Message): - __slots__ = ("cid", "from_address", "to_address", "value", "valid_after", "valid_before", "nonce", "v", "r", "s", "scheme") + def __init__(self, cid: _Optional[bytes] = ..., asset: _Optional[str] = ..., pay_to: _Optional[str] = ..., amount: _Optional[int] = ..., network: _Optional[str] = ..., block_size: _Optional[int] = ..., description: _Optional[str] = ...) -> None: ... + class TxReceipt(_message.Message): + __slots__ = ("cid", "tx_hash", "from_address", "to_address", "amount", "asset", "network") CID_FIELD_NUMBER: _ClassVar[int] + TX_HASH_FIELD_NUMBER: _ClassVar[int] FROM_ADDRESS_FIELD_NUMBER: _ClassVar[int] TO_ADDRESS_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - VALID_AFTER_FIELD_NUMBER: _ClassVar[int] - VALID_BEFORE_FIELD_NUMBER: _ClassVar[int] - NONCE_FIELD_NUMBER: _ClassVar[int] - V_FIELD_NUMBER: _ClassVar[int] - R_FIELD_NUMBER: _ClassVar[int] - S_FIELD_NUMBER: _ClassVar[int] - SCHEME_FIELD_NUMBER: _ClassVar[int] + AMOUNT_FIELD_NUMBER: _ClassVar[int] + ASSET_FIELD_NUMBER: _ClassVar[int] + NETWORK_FIELD_NUMBER: _ClassVar[int] cid: bytes + tx_hash: str from_address: str to_address: str - value: int - valid_after: int - valid_before: int - nonce: bytes - v: int - r: bytes - s: bytes - scheme: str - def __init__(self, cid: _Optional[bytes] = ..., from_address: _Optional[str] = ..., to_address: _Optional[str] = ..., value: _Optional[int] = ..., valid_after: _Optional[int] = ..., valid_before: _Optional[int] = ..., nonce: _Optional[bytes] = ..., v: _Optional[int] = ..., r: _Optional[bytes] = ..., s: _Optional[bytes] = ..., scheme: _Optional[str] = ...) -> None: ... + amount: int + asset: str + network: str + def __init__(self, cid: _Optional[bytes] = ..., tx_hash: _Optional[str] = ..., from_address: _Optional[str] = ..., to_address: _Optional[str] = ..., amount: _Optional[int] = ..., asset: _Optional[str] = ..., network: _Optional[str] = ...) -> None: ... class PaymentReceipt(_message.Message): __slots__ = ("cid", "tx_hash", "expires") CID_FIELD_NUMBER: _ClassVar[int] @@ -126,7 +113,7 @@ class Message(_message.Message): BLOCKPRESENCES_FIELD_NUMBER: _ClassVar[int] PENDINGBYTES_FIELD_NUMBER: _ClassVar[int] PAYMENT_TERMS_FIELD_NUMBER: _ClassVar[int] - PAYMENT_AUTHORIZATIONS_FIELD_NUMBER: _ClassVar[int] + TX_RECEIPTS_FIELD_NUMBER: _ClassVar[int] PAYMENT_RECEIPTS_FIELD_NUMBER: _ClassVar[int] PAYMENT_REJECTIONS_FIELD_NUMBER: _ClassVar[int] wantlist: Message.Wantlist @@ -135,7 +122,7 @@ class Message(_message.Message): blockPresences: _containers.RepeatedCompositeFieldContainer[Message.BlockPresence] pendingBytes: int payment_terms: _containers.RepeatedCompositeFieldContainer[Message.PaymentTerms] - payment_authorizations: _containers.RepeatedCompositeFieldContainer[Message.PaymentAuthorization] + tx_receipts: _containers.RepeatedCompositeFieldContainer[Message.TxReceipt] payment_receipts: _containers.RepeatedCompositeFieldContainer[Message.PaymentReceipt] payment_rejections: _containers.RepeatedCompositeFieldContainer[Message.PaymentRejection] - def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping[str, _Any]]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping[str, _Any]]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping[str, _Any]]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping[str, _Any]]]] = ..., payment_authorizations: _Optional[_Iterable[_Union[Message.PaymentAuthorization, _Mapping[str, _Any]]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping[str, _Any]]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping[str, _Any]]]] = ...) -> None: ... + def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping]]] = ..., tx_receipts: _Optional[_Iterable[_Union[Message.TxReceipt, _Mapping]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping]]] = ...) -> None: ... diff --git a/libp2p/bitswap/pricing_engine.py b/libp2p/bitswap/pricing_engine.py new file mode 100644 index 000000000..1d75378b3 --- /dev/null +++ b/libp2p/bitswap/pricing_engine.py @@ -0,0 +1,177 @@ +""" +Block Pricing Engine for Bitswap 1.3.0 - Root CID Pricing. + +Computes prices for files/DAGs based on total size, not individual blocks. +Supports configurable pricing strategies: +- Free: All blocks are free (price = 0) +- Fixed: Fixed price per file regardless of size +- Size-based: Price scales with total file size (units per KB) +- Custom: User-defined pricing function +""" + +import logging +from typing import Any, Callable + +logger = logging.getLogger(__name__) + + +class BlockPricingEngine: + """ + Computes prices for Bitswap blocks based on configurable strategies. + + Pricing is typically done at the root CID level (total file size), + not per-block, to avoid charging for each chunk separately. + + Example: + >>> # Size-based pricing: 100 micro-USDC per KB + >>> pricing = BlockPricingEngine( + ... strategy="size_based", + ... units_per_kb=100, + ... ) + >>> + >>> # 5 MB file = 5000 KB × 100 = 500,000 micro-units = $0.50 + >>> price = pricing.compute_price("bafyroot...", block_size=5_000_000) + >>> print(f"${price / 1_000_000:.2f}") # $0.50 + >>> + >>> # Mark specific CIDs as free + >>> pricing.set_free("bafyfree123...") + >>> pricing.compute_price("bafyfree123...", 1_000_000) # 0 (free) + """ + + def __init__( + self, + strategy: str = "size_based", + units_per_kb: float = 100.0, + fixed_price: int = 0, + custom_pricing_fn: Callable[[str, int], int] | None = None, + default_free: bool = False, + ): + """ + Initialize pricing engine. + + Args: + strategy: Pricing strategy - "free", "fixed", "size_based", or "custom" + units_per_kb: Price per KB for size_based strategy (micro-units) + fixed_price: Fixed price for "fixed" strategy (micro-units) + custom_pricing_fn: Custom function(cid_str, size) → price for "custom" strategy + default_free: If True, all CIDs are free by default + + Strategies: + - "free": Always return 0 (all blocks free) + - "fixed": Return fixed_price for all blocks + - "size_based": price = max(1, int(size_kb * units_per_kb)) + - "custom": Use custom_pricing_fn(cid_str, block_size) + """ + self.strategy = strategy + self.units_per_kb = units_per_kb + self.fixed_price = fixed_price + self.custom_pricing_fn = custom_pricing_fn + self.default_free = default_free + + # Per-CID overrides: cid_hex → price (0 = free, >0 = specific price) + self._cid_prices: dict[str, int] = {} + + logger.info( + f"Pricing engine initialized: strategy={strategy} " + f"units_per_kb={units_per_kb} default_free={default_free}" + ) + + def set_price(self, cid: str | bytes, price: int) -> None: + """ + Set a specific price for a CID (overrides strategy). + + Args: + cid: The CID (hex string or bytes) + price: Price in micro-units (0 = free) + """ + cid_hex = _cid_to_hex(cid) + self._cid_prices[cid_hex] = price + logger.info(f"Set price for {cid_hex[:20]}... = {price} units") + + def set_free(self, cid: str | bytes) -> None: + """ + Mark a CID as free (price = 0). + + Args: + cid: The CID to mark as free + """ + self.set_price(cid, 0) + + def compute_price(self, cid_str: str, block_size: int) -> int: + """ + Compute the price for a block/file. + + Args: + cid_str: The CID as a hex string + block_size: Size in bytes (for root CID, this is total file size) + + Returns: + Price in micro-units (0 = free, >0 = paid) + + Note: + For multi-block files, call this ONCE with the root CID and total size, + not for each individual chunk. + """ + # Check for per-CID override + if cid_str in self._cid_prices: + price = self._cid_prices[cid_str] + logger.debug(f"Using override price for {cid_str[:20]}... = {price}") + return price + + # Apply default free policy + if self.default_free: + return 0 + + # Apply strategy + if self.strategy == "free": + return 0 + + elif self.strategy == "fixed": + return self.fixed_price + + elif self.strategy == "size_based": + # Price = units_per_kb × size_in_kb (minimum 1 unit) + kb = block_size / 1024 + price = max(1, int(kb * self.units_per_kb)) + logger.debug( + f"Size-based pricing: {block_size}B = {kb:.2f}KB × " + f"{self.units_per_kb} = {price} units" + ) + return price + + elif self.strategy == "custom": + if self.custom_pricing_fn is None: + raise ValueError("Custom strategy requires custom_pricing_fn") + return self.custom_pricing_fn(cid_str, block_size) + + else: + raise ValueError(f"Unknown pricing strategy: {self.strategy}") + + def get_units_per_kb(self) -> float: + """ + Get the current units_per_kb rate (for size_based strategy). + + Returns: + Units per KB, or 0.0 if not using size_based strategy + """ + if self.strategy == "size_based": + return self.units_per_kb + return 0.0 + + +# ── Helper functions ────────────────────────────────────────────────────────── + +def _cid_to_hex(cid: str | bytes) -> str: + """Convert CID to hex string for consistent storage.""" + if isinstance(cid, bytes): + return cid.hex() + elif isinstance(cid, str): + # If already hex, return as-is + try: + bytes.fromhex(cid) + return cid + except ValueError: + # Assume it's a base58/base32 encoded CID string + return cid.encode().hex() + else: + raise TypeError(f"CID must be str or bytes, got {type(cid)}") From 757b30a29b44602960449d14578c062bf3261533 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sat, 30 May 2026 23:00:44 +0530 Subject: [PATCH 29/37] refactor: matched the root CID with kubo. --- examples/bitswap/bitswap.py | 41 ++++++++++++---- libp2p/bitswap/chunker.py | 7 ++- libp2p/bitswap/config.py | 2 +- libp2p/bitswap/dag.py | 95 ++++++++++++++++++------------------- libp2p/bitswap/dag_pb.py | 27 ++++++----- 5 files changed, 98 insertions(+), 74 deletions(-) diff --git a/examples/bitswap/bitswap.py b/examples/bitswap/bitswap.py index 24437a772..bd5ed43ed 100755 --- a/examples/bitswap/bitswap.py +++ b/examples/bitswap/bitswap.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +import hashlib import logging from pathlib import Path import sys @@ -14,6 +15,7 @@ from libp2p import new_host from libp2p.bitswap import BitswapClient +from libp2p.crypto.ed25519 import create_new_key_pair from libp2p.bitswap.cid import cid_to_bytes, format_cid_for_display from libp2p.bitswap.dag import MerkleDag from libp2p.peer.peerinfo import info_from_p2p_addr @@ -46,13 +48,14 @@ def format_size(size_bytes: int) -> str: return f"{size:.1f} TB" -async def run_provider(file_path: str, port: int = 0): +async def run_provider(file_path: str, port: int = 0, seed: str | None = None): """ Run the provider node to share a file. Args: file_path: Path to the file to share port: TCP port to listen on (0 for auto) + seed: Optional seed string for deterministic peer ID generation """ file_path_obj = Path(file_path) @@ -73,8 +76,16 @@ async def run_provider(file_path: str, port: int = 0): if port <= 0: port = find_free_port() listen_addrs = get_available_interfaces(port) - # Create host - host = new_host() + + # Create host with optional seed for deterministic peer ID + key_pair = None + if seed: + # Convert seed string to bytes (must be 32 bytes for Ed25519) + seed_bytes = hashlib.sha256(seed.encode()).digest() + key_pair = create_new_key_pair(seed=seed_bytes) + logger.info(f"Using deterministic peer ID from seed") + + host = new_host(key_pair=key_pair) async with host.run(listen_addrs=listen_addrs), trio.open_nursery() as nursery: logger.info(f"Peer ID: {host.get_id()}") @@ -109,7 +120,7 @@ def progress_callback(current: int, total: int, status: str): # Add file with directory wrapper for filename preservation # Always uses Merkle DAG regardless of file size root_cid = await dag.add_file( - file_path, progress_callback=progress_callback, wrap_with_directory=True + file_path, progress_callback=progress_callback, wrap_with_directory=False ) # Get all blocks that were stored @@ -161,6 +172,7 @@ async def run_client( root_cid_input: str, output_dir: str = "/tmp", port: int = 0, + seed: str | None = None, ): """ Run the client node to fetch a file. @@ -170,6 +182,7 @@ async def run_client( root_cid_input: Root CID (canonical text, /ipfs/... path, or hex string) output_dir: Directory to save the file port: TCP port to listen on (0 for auto) + seed: Optional seed string for deterministic peer ID generation """ output_path = Path(output_dir) @@ -195,8 +208,15 @@ async def run_client( port = find_free_port() listen_addrs = get_available_interfaces(port) - # Create host - host = new_host() + # Create host with optional seed for deterministic peer ID + key_pair = None + if seed: + # Convert seed string to bytes (must be 32 bytes for Ed25519) + seed_bytes = hashlib.sha256(seed.encode()).digest() + key_pair = create_new_key_pair(seed=seed_bytes) + logger.info(f"Using deterministic peer ID from seed") + + host = new_host(key_pair=key_pair) async with host.run(listen_addrs=listen_addrs), trio.open_nursery() as nursery: logger.info(f"Client Peer ID: {host.get_id()}") @@ -367,6 +387,11 @@ def parse_args(): action="store_true", help="Enable verbose logging", ) + parser.add_argument( + "--seed", + type=str, + help="Seed string for deterministic peer ID generation (same seed = same peer ID)", + ) args = parser.parse_args() @@ -397,9 +422,9 @@ def main(): ) if args.mode == "provider": - trio.run(run_provider, args.file, args.port) + trio.run(run_provider, args.file, args.port, args.seed) elif args.mode == "client": - trio.run(run_client, args.provider, args.cid, args.output, args.port) + trio.run(run_client, args.provider, args.cid, args.output, args.port, args.seed) except Exception as e: logger.critical(f"Script failed: {e}", exc_info=True) sys.exit(1) diff --git a/libp2p/bitswap/chunker.py b/libp2p/bitswap/chunker.py index 4739da3de..ba3fe9822 100644 --- a/libp2p/bitswap/chunker.py +++ b/libp2p/bitswap/chunker.py @@ -10,10 +10,9 @@ import io from pathlib import Path -# Default chunk size: 63 KB (py-libp2p accepts less than 64 KB) -# 63 KB minus 32 bytes to leave room for the dag-pb leaf envelope overhead, -# ensuring wrapped blocks never exceed MAX_BLOCK_SIZE (63 * 1024). -DEFAULT_CHUNK_SIZE = 63 * 1024 - 32 +# Default chunk size: 256 KiB — matches Kubo's default chunker (size-262144). +# Raw leaves are stored directly without dag-pb wrapping, so no overhead needed. +DEFAULT_CHUNK_SIZE = 256 * 1024 def chunk_bytes(data: bytes, chunk_size: int = DEFAULT_CHUNK_SIZE) -> list[bytes]: diff --git a/libp2p/bitswap/config.py b/libp2p/bitswap/config.py index 5de84aca5..53bd23600 100644 --- a/libp2p/bitswap/config.py +++ b/libp2p/bitswap/config.py @@ -27,7 +27,7 @@ # Maximum block size (63 KB - after DAG-PB/UnixFS encoding) # py-libp2p stream limit is ~64 KB, so we use 63 KB to be safe # Note: Raw chunk data should be smaller to account for DAG-PB overhead (~14 bytes) -MAX_BLOCK_SIZE = 63 * 1024 +MAX_BLOCK_SIZE = 512 * 1024 # Default timeout for operations (in seconds) DEFAULT_TIMEOUT = 90 diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 3a449779a..dd729eeba 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -27,6 +27,7 @@ ) from .cid import ( CODEC_DAG_PB, + CODEC_RAW, CIDInput, cid_to_bytes, compute_cid_v1, @@ -238,17 +239,17 @@ async def add_file( logger.debug(f"Using chunk size: {chunk_size} bytes") - # If file is small enough, store as single dag-pb leaf block + # If file is small enough, store as single raw leaf block (Kubo default: RawLeaves=true) if file_size <= chunk_size: logger.debug("File fits in single block") with open(file_path, "rb") as f: data = f.read() - leaf_block = create_leaf_node(data) - cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + # Raw leaf: store file bytes directly with raw codec CID + cid = compute_cid_v1(data, codec=CODEC_RAW) - await self._put_block(cid, leaf_block) + await self._put_block(cid, data) if progress_callback: await _call_progress_callback( @@ -256,7 +257,7 @@ async def add_file( ) logger.info( - f"Added file as single block: {format_cid_for_display(cid, max_len=16)}" + f"Added file as single raw block: {format_cid_for_display(cid, max_len=16)}" ) # Wrap in directory if requested @@ -270,8 +271,8 @@ async def add_file( f"Wrapping single-block file in directory with name: {filename}" ) - # Tsize should be the block size, not the file data size - dir_data = create_directory_node([(filename, cid, len(leaf_block))]) + # Tsize for raw leaf = raw file size (no block overhead) + dir_data = create_directory_node([(filename, cid, file_size)]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) await self._put_block(dir_cid, dir_data) @@ -289,17 +290,19 @@ async def add_file( logger.info("=== Starting file chunking process ===") # leaf_triples: (cid_bytes, leaf_block_bytes, raw_data_size) + # For raw leaves (Kubo default): leaf_block = raw chunk bytes, + # CID uses CODEC_RAW. This matches Kubo's RawLeaves=true behavior + # for multi-chunk files, producing identical CIDs. leaf_triples: list[tuple[bytes, bytes, int]] = [] bytes_processed = 0 # Process file in chunks (memory efficient) for i, chunk_data in enumerate(chunk_file(file_path, chunk_size)): - # Wrap chunk in UnixFS dag-pb leaf (matches Kubo's RawLeaves=false) - leaf_block = create_leaf_node(chunk_data) - chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + # Raw leaf: store chunk bytes directly with raw codec CID (Kubo default) + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) - await self._put_block(chunk_cid, leaf_block) - leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) bytes_processed += len(chunk_data) # Progress callback @@ -327,7 +330,6 @@ async def add_file( progress_callback, file_size, file_size, "creating root node" ) - root_cid, root_data = balanced_layout(leaf_triples) # Create a sync wrapper for the async _put_block method # We'll collect (cid, data) pairs and store them after internal_nodes: list[tuple[bytes, bytes]] = [] @@ -336,7 +338,7 @@ def store_internal_node(cid: bytes, data: bytes) -> None: """Callback to collect internal nodes for storage.""" internal_nodes.append((cid, data)) - root_cid, root_data = balanced_layout( + root_cid, root_data, root_tsize = balanced_layout( leaf_triples, put_block_callback=store_internal_node ) @@ -376,9 +378,9 @@ def store_internal_node(cid: bytes, data: bytes) -> None: filename = os.path.basename(file_path) logger.info(f"Wrapping file in directory with name: {filename}") - # Create directory node with single entry pointing to the file - # Tsize should be the block size, not the file data size - dir_data = create_directory_node([(filename, root_cid, len(root_data))]) + # Tsize = cumulative block size (root block + all descendant blocks), + # matching Kubo's behavior for directory link Tsize. + dir_data = create_directory_node([(filename, root_cid, root_tsize)]) dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) await self._put_block(dir_cid, dir_data) @@ -419,11 +421,10 @@ async def add_bytes( if chunk_size is None: chunk_size = DEFAULT_CHUNK_SIZE - # If data is small, store as single dag-pb leaf block + # If data is small, store as single raw leaf block (Kubo default: RawLeaves=true) if file_size <= chunk_size: - leaf_block = create_leaf_node(data) - cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self._put_block(cid, leaf_block) + cid = compute_cid_v1(data, codec=CODEC_RAW) + await self._put_block(cid, data) if progress_callback: await _call_progress_callback( @@ -432,15 +433,14 @@ async def add_bytes( return cid - # Chunk the data and wrap each chunk as a dag-pb leaf + # Chunk the data using raw leaves (Kubo default: RawLeaves=true) chunks = chunk_bytes(data, chunk_size) leaf_triples: list[tuple[bytes, bytes, int]] = [] for i, chunk_data in enumerate(chunks): - leaf_block = create_leaf_node(chunk_data) - chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self._put_block(chunk_cid, leaf_block) - leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) if progress_callback: bytes_processed = sum(s for _, _, s in leaf_triples) @@ -452,7 +452,7 @@ async def add_bytes( ) # Build balanced DAG tree - root_cid, root_data = balanced_layout(leaf_triples) + root_cid, root_data, _tsize = balanced_layout(leaf_triples) await self._put_block(root_cid, root_data) if progress_callback: @@ -513,10 +513,10 @@ async def add_stream( bytes_processed = 0 for i, chunk_data in enumerate(chunk_stream(stream, chunk_size)): - leaf_block = create_leaf_node(chunk_data) - chunk_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self._put_block(chunk_cid, leaf_block) - leaf_triples.append((chunk_cid, leaf_block, len(chunk_data))) + # Raw leaf: store chunk bytes directly (Kubo default: RawLeaves=true) + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) bytes_processed += len(chunk_data) if progress_callback: @@ -528,11 +528,10 @@ async def add_stream( f"chunking ({i + 1} chunks, {bytes_processed} bytes)", ) - # Empty stream — store a single empty leaf + # Empty stream — store a single empty raw block if not leaf_triples: - leaf_block = create_leaf_node(b"") - cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) - await self._put_block(cid, leaf_block) + cid = compute_cid_v1(b"", codec=CODEC_RAW) + await self._put_block(cid, b"") return cid # Single chunk — return the leaf CID directly (no root node needed) @@ -540,7 +539,7 @@ async def add_stream( return leaf_triples[0][0] # Multiple chunks — build balanced DAG tree - root_cid, root_data = balanced_layout(leaf_triples) + root_cid, root_data, _tsize = balanced_layout(leaf_triples) await self._put_block(root_cid, root_data) if progress_callback: @@ -623,10 +622,8 @@ async def fetch_file( if dir_links: first_link = dir_links[0] filename = first_link.name or None - # Links contain multihashes, need to reconstruct CIDv1 - # Assume dag-pb codec (0x70) for file blocks - multihash = first_link.cid - actual_file_cid = b'\x01\x70' + multihash # CIDv1 + dag-pb codec + multihash + # Links now store the full CID bytes (CIDv1 buffer or CIDv0 multihash) + actual_file_cid = first_link.cid logger.info(f"Filename from directory: {filename!r}") actual_file_data = await self._get_block( actual_file_cid, peer_id, timeout @@ -706,9 +703,8 @@ async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: msg = f"[DAG] Depth {depth}: {cid_str} has {len(node_links)}" logger.debug(f"{msg} children") for link in node_links: - # Links contain multihashes, reconstruct CIDv1 with dag-pb codec - child_cid = b'\x01\x70' + link.cid - child_cids.append(child_cid) + # Links now store full CID bytes directly + child_cids.append(link.cid) # Recursively fetch next level if there are children if child_cids: @@ -717,9 +713,8 @@ async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: logger.info(f"{msg}, fetching next level...") await _batch_fetch_tree(child_cids, depth + 1) - # Starting from the top-level links - # Links contain multihashes, reconstruct CIDv1 with dag-pb codec - top_cids = [b'\x01\x70' + top_link.cid for top_link in top_links] + # Starting from the top-level links (full CID bytes stored in links) + top_cids = [top_link.cid for top_link in top_links] await _batch_fetch_tree(top_cids, depth=1) blocks_count = len(all_blocks_map) logger.info(f"[DAG] ✓ Tree fetch complete: {blocks_count} total blocks") @@ -757,15 +752,15 @@ def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: c_tot = len(node_links) msg = f"[DAG] Depth {depth}: processing child {c_idx}/{c_tot}" logger.debug(msg) - # Links contain multihashes, reconstruct CIDv1 - child_cid = b'\x01\x70' + child_link.cid + # Links store full CID bytes directly + child_cid = child_link.cid _collect_leaves_local(child_cid, depth + 1) # Traverse each top-level block for i, top_link in enumerate(top_links): logger.info(f"[DAG] Traversing top-level {i + 1}/{len(top_links)}...") - # Links contain multihashes, reconstruct CIDv1 - top_cid = b'\x01\x70' + top_link.cid + # Links store full CID bytes directly + top_cid = top_link.cid _collect_leaves_local(top_cid, depth=1) logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index f820f5351..1ae5f3b0f 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -31,17 +31,19 @@ def _encode_varint(value: int) -> bytes: def _normalize_link_cid(cid: CIDInput) -> bytes: """ - Normalize CID input for DAG links while preserving raw-bytes compatibility. + Normalize CID input for DAG links. - DAG-PB links store only the multihash (not the full CID with version/codec). - This matches Kubo's behavior and the DAG-PB specification. + DAG-PB links store the full CID bytes in the Hash field. + For CIDv0 (legacy), this is the 34-byte multihash. + For CIDv1 (e.g. raw-leaf blocks), this is the full CIDv1 buffer + (version varint + codec varint + multihash), matching Kubo's behavior. """ from .cid import parse_cid - # Always parse the CID and extract the multihash - # This handles both CID objects and raw bytes (whether CIDv0, CIDv1, or already a multihash) cid_obj = parse_cid(cid) - return cid_obj.multihash + # CIDv0: buffer IS the multihash — no change in behavior. + # CIDv1: buffer includes version + codec + multihash — store the full CID. + return cid_obj.buffer @dataclass(init=False) @@ -238,7 +240,7 @@ def create_file_node(chunks: Sequence[tuple[CIDInput, int]]) -> bytes: blocksizes = [] for i, (cid, size) in enumerate(chunks): - links.append(Link(cid=cid, name=f"chunk{i}", size=size)) + links.append(Link(cid=cid, name="", size=size)) blocksizes.append(size) total_size += size @@ -333,7 +335,7 @@ def balanced_layout( leaves: list[tuple[bytes, bytes, int]], max_links: int = MAX_LINKS_PER_NODE, put_block_callback: Callable[[bytes, bytes], None] | None = None, -) -> tuple[bytes, bytes]: +) -> tuple[bytes, bytes, int]: """ Build a balanced Merkle DAG from a flat list of leaf blocks. @@ -352,7 +354,10 @@ def balanced_layout( Signature: callback(cid_bytes, block_bytes) Returns: - (root_cid_bytes, root_block_bytes) + (root_cid_bytes, root_block_bytes, cumulative_tsize) + where cumulative_tsize = len(root_block) + sum of all descendant block sizes. + This matches the Tsize value Kubo stores in directory links pointing to + the root of a multi-block file. Raises: ValueError: If leaves is empty @@ -362,7 +367,7 @@ def balanced_layout( raise ValueError("Cannot build balanced layout from empty leaf list") if len(leaves) == 1: - return leaves[0][0], leaves[0][1] + return leaves[0][0], leaves[0][1], len(leaves[0][1]) # Each level entry: (cid_bytes, block_bytes, file_data_size, cumulative_block_size) # cumulative_block_size = len(this block) + sum(children's cumulative sizes) @@ -404,4 +409,4 @@ def balanced_layout( next_level.append((internal_cid, internal_block, total_filesize, cum_size)) level = next_level - return level[0][0], level[0][1] + return level[0][0], level[0][1], level[0][3] From caa42679fa99e5c4ae8251632ab96ec8c90494a1 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Mon, 1 Jun 2026 03:08:29 +0530 Subject: [PATCH 30/37] feat: implement Bitswap interop with kubo completely --- examples/bitswap/bitswap.py | 34 ++++- libp2p/bitswap/client.py | 226 ++++++++++++++++++----------- libp2p/stream_muxer/yamux/yamux.py | 32 ++-- 3 files changed, 196 insertions(+), 96 deletions(-) diff --git a/examples/bitswap/bitswap.py b/examples/bitswap/bitswap.py index bd5ed43ed..d7c08ee4a 100755 --- a/examples/bitswap/bitswap.py +++ b/examples/bitswap/bitswap.py @@ -37,6 +37,25 @@ logger = logging.getLogger(__name__) +DEFAULT_LISTEN_PORT = 4013 + + +def select_preferred_listen_addr( + addrs: list[Multiaddr], port: int +) -> Multiaddr: + """Pick a stable, local-friendly address for copy/paste commands.""" + preferred_v4 = f"/ip4/127.0.0.1/tcp/{port}" + for addr in addrs: + if str(addr) == preferred_v4: + return addr + + preferred_v6 = f"/ip6/::1/tcp/{port}" + for addr in addrs: + if str(addr) == preferred_v6: + return addr + + return addrs[0] + def format_size(size_bytes: int) -> str: """Format size in human-readable form.""" @@ -142,8 +161,12 @@ def progress_callback(current: int, total: int, status: str): logger.info("FILE READY TO SHARE!") logger.info("=" * 70) - # Get the first address (clean multiaddr without duplicate /p2p/) - provider_addr = host.get_addrs()[0] + # Prefer a deterministic local address for copy/paste commands. + transport_addrs = host.get_transport_addrs() + provider_addr = select_preferred_listen_addr(transport_addrs, port) + provider_addr = provider_addr.encapsulate( + Multiaddr(f"/p2p/{host.get_id()}") + ) root_cid_text = format_cid_for_display(root_cid) logger.info(f"Root CID: {root_cid_text}") logger.info("") @@ -355,8 +378,11 @@ def parse_args(): parser.add_argument( "--port", type=int, - default=0, - help="Port to listen on (0 for random, provider mode only)", + default=DEFAULT_LISTEN_PORT, + help=( + "Port to listen on (default: 4012). " + "Use 0 to auto-select a random port." + ), ) parser.add_argument( "--file", diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index 4ec4da154..f98a6da7b 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -703,14 +703,29 @@ async def _handle_stream(self, stream: INetStream) -> None: peer_id = stream.muxed_conn.peer_id logger.debug(f"Handling Bitswap stream from peer {peer_id}") + # Detect negotiated protocol and store it immediately so that + # _process_message can use the correct protocol for responses. + protocol = stream.get_protocol() + if protocol: + self._peer_protocols[peer_id] = str(protocol) + try: + # Read the first message from this stream + msg = await self._read_message(stream) + if msg is None: + return + + # If the peer sent a WANT_HAVE and we have blocks, reply with + # a proactive HAVE so Kubo's session scores us highly and sends + # WANT_BLOCK immediately on the same stream. + await self._process_message(msg, peer_id, stream) + + # Continue reading further messages on the same stream + # (Kubo sends WANT_BLOCK as a follow-up after receiving HAVE) while True: - # Read message msg = await self._read_message(stream) if msg is None: break - - # Process message await self._process_message(msg, peer_id, stream) except Exception as e: @@ -748,12 +763,16 @@ async def _process_message( logger.info(f"[FLOW] Negotiated protocol for peer {str(peer_id)[:20]}...: {peer_protocol}") # ── Bitswap 1.3.0 payment message handling ─────────────────────── - # Always try to parse as 1.3.0 when payment components are configured, - # regardless of the negotiated stream protocol. This handles the case - # where the stream was opened as 1.2.0 but the server sends back - # payment fields (which protobuf preserves as unknown fields). - if self.payment_client or self.payment_engine: - # Re-parse as 1.3.0 message to access payment fields + # Only enter the payment path when BOTH: + # 1. A payment component (client or engine) is configured, AND + # 2. The negotiated stream protocol is actually 1.3.0. + # A peer that opened a 1.2.0 (or lower) stream must NEVER be routed + # through payment logic — doing so caused wantlists to be silently + # dropped instead of being answered. + is_v130_stream = (peer_protocol == str(BITSWAP_PROTOCOL_V130)) + if (self.payment_client or self.payment_engine) and is_v130_stream: + # Re-parse as 1.3.0 message to access payment-specific fields + # (payment_terms, payment_receipts, payment_authorizations, etc.) msg_1_3: Message_1_3 | None try: _tmp = Message_1_3() @@ -814,86 +833,65 @@ async def _process_message( stream, response.SerializeToString() ) - # Process any blocks delivered alongside a payment receipt - if msg_1_3.payload: - logger.warning("=" * 70) - logger.warning(f"[STEP 9] FILE TRANSFER: CLIENT RECEIVING {len(msg_1_3.payload)} BLOCK(S) (v1.1.0+ payload) from {str(peer_id)[:20]}...") - for _bi, _b in enumerate(msg_1_3.payload): - logger.warning(f" block[{_bi+1}]: data_len={len(_b.data)}B prefix={bytes(_b.prefix).hex()[:10]}...") - logger.warning("=" * 70) - await self._process_blocks_v110(msg_1_3.payload) - if msg_1_3.blocks: - logger.warning("=" * 70) - logger.warning(f"[STEP 9] FILE TRANSFER: CLIENT RECEIVING {len(msg_1_3.blocks)} BLOCK(S) (v1.0.0) from {str(peer_id)[:20]}...") - logger.warning("=" * 70) - await self._process_blocks_v100(list(msg_1_3.blocks), peer_id) - # Server-side: handle PaymentAuthorizations (EIP-3009 signed payments) - if self.payment_engine: + if self.payment_engine and msg_1_3.payment_authorizations: try: - if msg_1_3.payment_authorizations: + logger.warning("=" * 70) + logger.warning(f"[STEP 6] SERVER RECEIVED PAYMENT AUTHORIZATION from {str(peer_id)[:20]}...") + for _a in msg_1_3.payment_authorizations: + logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") + logger.warning(f" from={_a.from_address[:20]}...") + logger.warning(f" to={_a.to_address[:20]}...") + logger.warning(f" value={_a.value}") + logger.warning(f" scheme={_a.scheme}") + logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") + logger.warning("=" * 70) + response = await self.payment_engine.process_incoming_1_3_message( + str(peer_id), msg_1_3 + ) + if response is not None: + _has_receipt = bool(response.payment_receipts) + _has_rejection = bool(response.payment_rejections) + _has_blocks = bool(response.payload) or bool(response.blocks) logger.warning("=" * 70) - logger.warning(f"[STEP 6] SERVER RECEIVED PAYMENT AUTHORIZATION from {str(peer_id)[:20]}...") - for _a in msg_1_3.payment_authorizations: - logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") - logger.warning(f" from={_a.from_address[:20]}...") - logger.warning(f" to={_a.to_address[:20]}...") - logger.warning(f" value={_a.value}") - logger.warning(f" scheme={_a.scheme}") - logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") + logger.warning(f"[STEP 8] SERVER SENDING RESPONSE after PaymentAuthorization:") + logger.warning(f" has_receipt={_has_receipt} has_rejection={_has_rejection} has_blocks={_has_blocks}") + if _has_rejection: + for _rj in response.payment_rejections: + logger.warning(f" ❌ REJECTION reason={_rj.reason}") + if _has_blocks: + _nb = len(response.payload) + len(response.blocks) + logger.warning(f" ✅ SENDING {_nb} block(s) to client — FILE TRANSFER STARTING") logger.warning("=" * 70) - response = await self.payment_engine.process_incoming_1_3_message( - str(peer_id), msg_1_3 + await self._write_message_bytes( + stream, response.SerializeToString() ) - if response is not None: - _has_receipt = bool(response.payment_receipts) - _has_rejection = bool(response.payment_rejections) - _has_blocks = bool(response.payload) or bool(response.blocks) - logger.warning("=" * 70) - logger.warning(f"[STEP 8] SERVER SENDING RESPONSE after PaymentAuthorization:") - logger.warning(f" has_receipt={_has_receipt} has_rejection={_has_rejection} has_blocks={_has_blocks}") - if _has_rejection: - for _rj in response.payment_rejections: - logger.warning(f" ❌ REJECTION reason={_rj.reason}") - if _has_blocks: - _nb = len(response.payload) + len(response.blocks) - logger.warning(f" ✅ SENDING {_nb} block(s) to client — FILE TRANSFER STARTING") - logger.warning("=" * 70) - await self._write_message_bytes( - stream, response.SerializeToString() - ) + # Payment authorization handled — don't fall through to + # standard wantlist handling for this message. + return except Exception as e: logger.error(f"Error handling PaymentAuthorization: {e}", exc_info=True) - # Handle PaymentRequired block presences specially + # Handle PaymentRequired block presences (1.3.0 type=2) if msg_1_3.blockPresences: await self._process_block_presences_1_3( msg_1_3.blockPresences, peer_id ) - # Don't fall through to normal presence processing - # (already handled above) - if msg.HasField("wantlist"): - await self._process_wantlist(msg.wantlist, peer_id, stream) - if msg.blocks: - await self._process_blocks_v100(list(msg.blocks), peer_id) - if msg.payload: - await self._process_blocks_v110(msg.payload) - return - - # ── Standard 1.0.0–1.2.0 message handling ──────────────────────── - # Process wantlist + # Fall through below to also handle wantlist/blocks/payload + # that may be bundled in the same message. + + # ── Standard 1.0.0–1.2.0 message handling (always runs) ───────── + # Also runs for 1.3.0 streams that don't carry payment-only content + # (e.g. a plain wantlist sent over a 1.3.0 stream). if msg.HasField("wantlist"): await self._process_wantlist(msg.wantlist, peer_id, stream) - # Process blocks (v1.0.0 format) if msg.blocks: await self._process_blocks_v100(list(msg.blocks), peer_id) - # Process payload (v1.1.0+ format) if msg.payload: await self._process_blocks_v110(msg.payload) - # Process block presences (v1.2.0 format) if msg.blockPresences: await self._process_block_presences(msg.blockPresences, peer_id) @@ -927,7 +925,8 @@ async def _process_wantlist( # Apply payment gating whenever payment_engine is enabled. # For 1.3.0 peers: send PaymentRequired + PaymentTerms in-band. # For older peers: send DONT_HAVE (they cannot pay in-band). - if self.payment_engine: + is_v130_peer = str(peer_protocol) == str(BITSWAP_PROTOCOL_V130) + if self.payment_engine and is_v130_peer: for entry in wantlist.entries: entry_cid = parse_cid(entry.block) if entry.cancel: @@ -991,44 +990,107 @@ async def _process_wantlist( # Check if we have this block has_block = await self.block_store.has_block(entry_cid) + logger.warning( + f"[WANTLIST ENTRY] cid={format_cid_for_display(entry_cid, max_len=16)} " + f"wantType={entry.wantType} cancel={entry.cancel} " + f"has_block={has_block}" + ) # Handle based on want type (v1.2.0) - if entry.wantType == 1: # Have request - # Send presence information - if has_block or entry.sendDontHave: - presences_to_send.append((entry_cid, has_block)) - else: # Block request + if entry.wantType == 1: # Have request (WANT_HAVE) if has_block: + # Send the block directly — do NOT send a separate HAVE + # presence. Sending HAVE causes Go's bitswap session to + # open a NEW outbound WANT_BLOCK stream to Python. That + # stream fails due to Python TLS limitations, so Go never + # receives the block. Sending the block directly (implicit + # HAVE) is the correct interop approach. data = await self.block_store.get_block(entry_cid) if data: + print( + f"\n[WANT_HAVE] Sending block directly " + f"({len(data)} bytes) for " + f"{format_cid_for_display(entry_cid, max_len=16)}", + flush=True, + ) + logger.warning( + f"[WANT_HAVE] Sending block directly " + f"({len(data)} bytes) for " + f"{format_cid_for_display(entry_cid, max_len=16)} " + f"(skipping HAVE presence to avoid Go re-request)" + ) + if peer_protocol == BITSWAP_PROTOCOL_V100: + blocks_to_send_v100.append(data) + else: + prefix = get_cid_prefix(entry_cid) + blocks_to_send_v110.append((prefix, data)) + else: + # Don't have the block — send DontHave so requester + # knows to look elsewhere. + print( + f"\n[WANT_HAVE] DontHave for " + f"{format_cid_for_display(entry_cid, max_len=16)}", + flush=True, + ) + logger.warning( + f"[WANT_HAVE] Sending DontHave for " + f"{format_cid_for_display(entry_cid, max_len=16)}" + ) + presences_to_send.append((entry_cid, False)) + else: # Block request (WANT_BLOCK) + if has_block: + data = await self.block_store.get_block(entry_cid) + if data: + print( + f"\n[WANT_BLOCK] Sending block directly " + f"({len(data)} bytes) for " + f"{format_cid_for_display(entry_cid, max_len=16)}", + flush=True, + ) + logger.warning( + f"[WANT_BLOCK] Sending block for " + f"{format_cid_for_display(entry_cid, max_len=16)}" + ) if peer_protocol == BITSWAP_PROTOCOL_V100: blocks_to_send_v100.append(data) else: prefix = get_cid_prefix(entry_cid) blocks_to_send_v110.append((prefix, data)) - elif entry.sendDontHave: - # Send DontHave (v1.2.0) + else: + # Always send DontHave when we don't have the block, + # regardless of sendDontHave flag. This prevents the + # requester from stalling waiting for a response. presences_to_send.append((entry_cid, False)) # Send responses in batches to stay under MAX_MESSAGE_SIZE # and Noise protocol limit (65535 bytes) if blocks_to_send_v100 or blocks_to_send_v110 or presences_to_send: + # We MUST open a new stream to the client to send the blocks. + # Writing to the inbound stream that the client opened for their WANTLIST + # is often ignored by the client (Kubo), as it expects the provider to dial back. + try: + outbound_stream = await self.host.new_stream( + peer_id, [TProtocol(peer_protocol)] + ) + except Exception as e: + logger.error(f"Failed to open outbound stream to send response: {e}") + return + # Send blocks in batches if blocks_to_send_v100: await self._send_blocks_in_batches_v100( - blocks_to_send_v100, peer_id, stream + blocks_to_send_v100, peer_id, outbound_stream ) if blocks_to_send_v110: await self._send_blocks_in_batches_v110( - blocks_to_send_v110, peer_id, stream + blocks_to_send_v110, peer_id, outbound_stream ) # Send presences (usually small, can send all at once) if presences_to_send: presence_msg = create_message(block_presences=presences_to_send) - await self._write_message(stream, presence_msg) - logger.debug( - f"Sent {len(presences_to_send)} block presences to peer {peer_id}" - ) + await self._write_message(outbound_stream, presence_msg) + + await outbound_stream.close() async def _send_blocks_in_batches_v100( self, blocks: list[bytes], peer_id: PeerID, stream: INetStream diff --git a/libp2p/stream_muxer/yamux/yamux.py b/libp2p/stream_muxer/yamux/yamux.py index e48126875..ac1136553 100644 --- a/libp2p/stream_muxer/yamux/yamux.py +++ b/libp2p/stream_muxer/yamux/yamux.py @@ -1014,16 +1014,20 @@ async def handle_incoming(self) -> None: stream.closed = True stream.reset_received = True self.stream_events[stream_id].set() - - ack_header = struct.pack( - YAMUX_HEADER_FORMAT, - 0, - TYPE_WINDOW_UPDATE, - FLAG_ACK, - stream_id, - 0, - ) - new_stream_notify = stream + # Deliver the reset stream to accept_stream() so + # callers can observe the reset state, but do NOT + # send an ACK back — the stream is already dead. + new_stream_notify = stream + else: + ack_header = struct.pack( + YAMUX_HEADER_FORMAT, + 0, + TYPE_WINDOW_UPDATE, + FLAG_ACK, + stream_id, + 0, + ) + new_stream_notify = stream else: rst_header = struct.pack( YAMUX_HEADER_FORMAT, @@ -1044,6 +1048,14 @@ async def handle_incoming(self) -> None: ) if new_stream_notify is not None: await self.new_stream_send_channel.send(new_stream_notify) + elif new_stream_notify is not None: + # SYN+RST: stream is reset on arrival — deliver to + # accept_stream() without sending an ACK back. + logger.debug( + f"Delivering reset stream {stream_id} " + f"to channel (no ACK) for peer {self.peer_id}" + ) + await self.new_stream_send_channel.send(new_stream_notify) elif ( typ == TYPE_DATA or typ == TYPE_WINDOW_UPDATE ) and flags & FLAG_ACK: From be240506be7625726a66abe3692e5fc8c8d87ea1 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 18:42:22 +0530 Subject: [PATCH 31/37] feat: Implement Bitswap 1.3.0 Payment Extension - Added PaymentExtension class to handle payment-related protobuf fields and wantlists in Bitswap 1.3.0. - Integrated payment terms, receipts, and rejections processing for client-side and server-side. - Enhanced PaymentLedger to track root CID payments and manage payment records. - Updated pricing engine to support configurable pricing strategies. - Refactored tests to accommodate changes in block storage and encoding, ensuring raw blocks are used for leaves. - Improved type hints and documentation across the codebase for better clarity and maintainability. --- examples/bitswap/bitswap.py | 28 +- libp2p/bitswap/client.py | 359 ++-- libp2p/bitswap/config.py | 1 - libp2p/bitswap/dag.py | 1858 +++++++++-------- libp2p/bitswap/dag_pb.py | 2 +- libp2p/bitswap/extension.py | 57 + libp2p/bitswap/gated_decision_engine.py | 119 +- libp2p/bitswap/payment_client_1_3.py | 83 +- libp2p/bitswap/payment_extension.py | 248 +++ libp2p/bitswap/payment_ledger.py | 89 +- libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi | 6 +- libp2p/bitswap/pricing_engine.py | 52 +- tests/core/bitswap/test_cid.py | 4 +- tests/core/bitswap/test_dag.py | 27 +- .../bitswap/test_filesystem_blockstore.py | 4 +- tests/core/bitswap/test_io_stream.py | 26 +- tests/core/bitswap/test_unixfs_encoding.py | 59 +- 17 files changed, 1639 insertions(+), 1383 deletions(-) create mode 100644 libp2p/bitswap/extension.py create mode 100644 libp2p/bitswap/payment_extension.py diff --git a/examples/bitswap/bitswap.py b/examples/bitswap/bitswap.py index d7c08ee4a..cbd222004 100755 --- a/examples/bitswap/bitswap.py +++ b/examples/bitswap/bitswap.py @@ -15,9 +15,9 @@ from libp2p import new_host from libp2p.bitswap import BitswapClient -from libp2p.crypto.ed25519 import create_new_key_pair from libp2p.bitswap.cid import cid_to_bytes, format_cid_for_display from libp2p.bitswap.dag import MerkleDag +from libp2p.crypto.ed25519 import create_new_key_pair from libp2p.peer.peerinfo import info_from_p2p_addr from libp2p.utils.address_validation import ( find_free_port, @@ -40,9 +40,7 @@ DEFAULT_LISTEN_PORT = 4013 -def select_preferred_listen_addr( - addrs: list[Multiaddr], port: int -) -> Multiaddr: +def select_preferred_listen_addr(addrs: list[Multiaddr], port: int) -> Multiaddr: """Pick a stable, local-friendly address for copy/paste commands.""" preferred_v4 = f"/ip4/127.0.0.1/tcp/{port}" for addr in addrs: @@ -102,7 +100,7 @@ async def run_provider(file_path: str, port: int = 0, seed: str | None = None): # Convert seed string to bytes (must be 32 bytes for Ed25519) seed_bytes = hashlib.sha256(seed.encode()).digest() key_pair = create_new_key_pair(seed=seed_bytes) - logger.info(f"Using deterministic peer ID from seed") + logger.info("Using deterministic peer ID from seed") host = new_host(key_pair=key_pair) @@ -164,9 +162,7 @@ def progress_callback(current: int, total: int, status: str): # Prefer a deterministic local address for copy/paste commands. transport_addrs = host.get_transport_addrs() provider_addr = select_preferred_listen_addr(transport_addrs, port) - provider_addr = provider_addr.encapsulate( - Multiaddr(f"/p2p/{host.get_id()}") - ) + provider_addr = provider_addr.encapsulate(Multiaddr(f"/p2p/{host.get_id()}")) root_cid_text = format_cid_for_display(root_cid) logger.info(f"Root CID: {root_cid_text}") logger.info("") @@ -237,7 +233,7 @@ async def run_client( # Convert seed string to bytes (must be 32 bytes for Ed25519) seed_bytes = hashlib.sha256(seed.encode()).digest() key_pair = create_new_key_pair(seed=seed_bytes) - logger.info(f"Using deterministic peer ID from seed") + logger.info("Using deterministic peer ID from seed") host = new_host(key_pair=key_pair) @@ -379,10 +375,7 @@ def parse_args(): "--port", type=int, default=DEFAULT_LISTEN_PORT, - help=( - "Port to listen on (default: 4012). " - "Use 0 to auto-select a random port." - ), + help=("Port to listen on (default: 4012). Use 0 to auto-select a random port."), ) parser.add_argument( "--file", @@ -416,7 +409,10 @@ def parse_args(): parser.add_argument( "--seed", type=str, - help="Seed string for deterministic peer ID generation (same seed = same peer ID)", + help=( + "Seed string for deterministic peer ID generation " + "(same seed = same peer ID)" + ), ) args = parser.parse_args() @@ -450,7 +446,9 @@ def main(): if args.mode == "provider": trio.run(run_provider, args.file, args.port, args.seed) elif args.mode == "client": - trio.run(run_client, args.provider, args.cid, args.output, args.port, args.seed) + trio.run( + run_client, args.provider, args.cid, args.output, args.port, args.seed + ) except Exception as e: logger.critical(f"Script failed: {e}", exc_info=True) sys.exit(1) diff --git a/libp2p/bitswap/client.py b/libp2p/bitswap/client.py index f98a6da7b..6675ad64d 100644 --- a/libp2p/bitswap/client.py +++ b/libp2p/bitswap/client.py @@ -6,7 +6,7 @@ from collections.abc import Sequence import hashlib import logging -from typing import Any +from typing import TYPE_CHECKING, Any import trio import varint @@ -17,6 +17,8 @@ from libp2p.peer.id import ID as PeerID from libp2p.peer.peerinfo import PeerInfo # noqa: F401 +if TYPE_CHECKING: + from .extension import IBitswapExtension from .block_store import BlockStore, MemoryBlockStore from .cid import ( CIDInput, @@ -30,7 +32,6 @@ from .config import ( BITSWAP_PROTOCOL_V100, BITSWAP_PROTOCOL_V120, - BITSWAP_PROTOCOL_V130, BITSWAP_PROTOCOLS, DEFAULT_PRIORITY, DEFAULT_TIMEOUT, @@ -44,7 +45,6 @@ TimeoutError as BitswapTimeoutError, ) from .messages import create_message, create_wantlist_entry -from .pb.bitswap_1_3_0_pb2 import Message as Message_1_3 from .pb.bitswap_pb2 import Message from .provider_query import ProviderQueryManager @@ -58,7 +58,7 @@ class BitswapClient: Supports Bitswap protocol versions 1.0.0, 1.1.0, 1.2.0, and 1.3.0 for content discovery and file sharing in a peer-to-peer network. - For 1.3.0 payment support, pass a payment_client and payment_engine. + For 1.3.0 payment support, register a PaymentExtension. """ def __init__( @@ -67,8 +67,6 @@ def __init__( block_store: BlockStore | None = None, protocol_version: str = BITSWAP_PROTOCOL_V120, provider_query_manager: ProviderQueryManager | None = None, - payment_client: Any = None, # BitswapPaymentClient_1_3 (optional) - payment_engine: Any = None, # PaymentGatedDecisionEngine (optional) ): """ Initialize Bitswap client. @@ -81,10 +79,6 @@ def __init__( DHT-based provider discovery. When supplied, ``get_block()`` will query the DHT for providers before broadcasting to all connected peers. - payment_client: Optional BitswapPaymentClient_1_3 for client-side - payment handling (auto-pays for blocks in 1.3.0 mode). - payment_engine: Optional PaymentGatedDecisionEngine for server-side - payment gating (gates block serving behind payment in 1.3.0 mode). """ self.host = host @@ -93,9 +87,9 @@ def __init__( self.provider_query_manager: ProviderQueryManager | None = ( provider_query_manager ) - # 1.3.0 payment components (optional) - self.payment_client = payment_client - self.payment_engine = payment_engine + + self.protocol_handlers: dict[str, "IBitswapExtension"] = {} + self.supported_protocols: list[str] = list(BITSWAP_PROTOCOLS) self._wantlist: dict[ CIDObject, dict[str, Any] @@ -113,15 +107,22 @@ def __init__( self._nursery: trio.Nursery | None = None self._started = False + def register_extension(self, protocol: str, extension: "IBitswapExtension") -> None: + """Register an extension for a specific protocol.""" + extension.set_client(self) + self.protocol_handlers[protocol] = extension + if protocol not in self.supported_protocols: + self.supported_protocols.insert(0, protocol) + async def start(self) -> None: """Start the Bitswap client.""" if self._started: return # Set stream handler for all supported Bitswap protocols - for protocol in BITSWAP_PROTOCOLS: + for protocol in self.supported_protocols: self.host.set_stream_handler( - protocol, + TProtocol(protocol), self._handle_stream, ) @@ -135,8 +136,8 @@ async def stop(self) -> None: self._started = False # Unregister stream handlers for all supported Bitswap protocols - for protocol in BITSWAP_PROTOCOLS: - self.host.remove_stream_handler(protocol) + for protocol in self.supported_protocols: + self.host.remove_stream_handler(TProtocol(protocol)) # Clear wantlists and pending requests self._wantlist.clear() self._peer_wantlists.clear() @@ -520,16 +521,10 @@ async def _send_wantlist_to_peer( msg = create_message(wantlist_entries=entries, full_wantlist=False) # Get negotiated protocol for this peer or use all protocols - # If payment client is configured, always prefer 1.3.0 to enable - # in-band payment messages regardless of any cached protocol. - if self.payment_client: - protocols = [BITSWAP_PROTOCOL_V130] + [ - p for p in BITSWAP_PROTOCOLS if p != BITSWAP_PROTOCOL_V130 - ] - elif peer_id in self._peer_protocols: + if peer_id in self._peer_protocols: protocols = [TProtocol(self._peer_protocols[peer_id])] else: - protocols = list(BITSWAP_PROTOCOLS) # Try all + protocols = [TProtocol(p) for p in self.supported_protocols] # Try all # Open stream and send message stream = await self.host.new_stream( @@ -744,9 +739,11 @@ async def _process_message( logger.warning(f" Entries: {len(msg.wantlist.entries)}") logger.warning(f" Full: {msg.wantlist.full}") for _i, _e in enumerate(msg.wantlist.entries): - _cid_hex = bytes(_e.block).hex()[:20] if _e.block else 'N/A' - _wt = 'WANT_HAVE' if _e.wantType == 1 else 'WANT_BLOCK' - logger.warning(f" [{_i+1}] cid={_cid_hex}... type={_wt} cancel={_e.cancel}") + _cid_hex = bytes(_e.block).hex()[:20] if _e.block else "N/A" + _wt = "WANT_HAVE" if _e.wantType == 1 else "WANT_BLOCK" + logger.warning( + f" [{_i + 1}] cid={_cid_hex}... type={_wt} cancel={_e.cancel}" + ) logger.warning("=" * 70) print( f"\n📥 RECEIVED WANTLIST from peer {peer_id_str} with " @@ -760,131 +757,28 @@ async def _process_message( self._peer_protocols[peer_id] = str(protocol) peer_protocol = str(protocol) if protocol else BITSWAP_PROTOCOL_V100 - logger.info(f"[FLOW] Negotiated protocol for peer {str(peer_id)[:20]}...: {peer_protocol}") - - # ── Bitswap 1.3.0 payment message handling ─────────────────────── - # Only enter the payment path when BOTH: - # 1. A payment component (client or engine) is configured, AND - # 2. The negotiated stream protocol is actually 1.3.0. - # A peer that opened a 1.2.0 (or lower) stream must NEVER be routed - # through payment logic — doing so caused wantlists to be silently - # dropped instead of being answered. - is_v130_stream = (peer_protocol == str(BITSWAP_PROTOCOL_V130)) - if (self.payment_client or self.payment_engine) and is_v130_stream: - # Re-parse as 1.3.0 message to access payment-specific fields - # (payment_terms, payment_receipts, payment_authorizations, etc.) - msg_1_3: Message_1_3 | None - try: - _tmp = Message_1_3() - _tmp.ParseFromString(msg.SerializeToString()) - msg_1_3 = _tmp - except Exception: - msg_1_3 = None - - if msg_1_3 is not None: - # Client-side: handle PaymentTerms / PaymentReceipts / PaymentRejections - if self.payment_client and ( - msg_1_3.payment_terms - or msg_1_3.payment_receipts - or msg_1_3.payment_rejections - ): - if msg_1_3.payment_terms: - logger.warning("=" * 70) - logger.warning(f"[STEP 3] CLIENT RECEIVED PAYMENT TERMS from {str(peer_id)[:20]}...") - for _t in msg_1_3.payment_terms: - logger.warning(f" cid={bytes(_t.cid).hex()[:20]}...") - logger.warning(f" amount={_t.amount} units") - logger.warning(f" asset={_t.asset} scheme={_t.scheme}") - logger.warning(f" pay_to={_t.pay_to[:20]}...") - logger.warning(f" block_size={_t.block_size}B") - logger.warning(f" valid_before={_t.valid_before}") - logger.warning("=" * 70) - if msg_1_3.payment_receipts: - logger.warning("=" * 70) - logger.warning(f"[STEP 8a] CLIENT RECEIVED PAYMENT RECEIPT from {str(peer_id)[:20]}...") - for _r in msg_1_3.payment_receipts: - logger.warning(f" cid={bytes(_r.cid).hex()[:20]}...") - logger.warning(f" tx_hash={_r.tx_hash[:20] if _r.tx_hash else 'optimistic'}") - logger.warning(f" expires={_r.expires}") - logger.warning("=" * 70) - if msg_1_3.payment_rejections: - logger.warning("=" * 70) - logger.warning(f"[STEP 8a] CLIENT RECEIVED PAYMENT REJECTION from {str(peer_id)[:20]}...") - for _rj in msg_1_3.payment_rejections: - logger.warning(f" cid={bytes(_rj.cid).hex()[:20]}...") - logger.warning(f" reason={_rj.reason}") - logger.warning("=" * 70) - response = await self.payment_client.process_incoming_message( - str(peer_id), msg_1_3 - ) - if response is not None: - logger.warning("=" * 70) - logger.warning(f"[STEP 5] CLIENT SENDING PAYMENT AUTHORIZATION to {str(peer_id)[:20]}...") - if response.payment_authorizations: - for _a in response.payment_authorizations: - logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") - logger.warning(f" from={_a.from_address[:20]}...") - logger.warning(f" to={_a.to_address[:20]}...") - logger.warning(f" value={_a.value}") - logger.warning(f" scheme={_a.scheme}") - logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") - logger.warning("=" * 70) - await self._write_message_bytes( - stream, response.SerializeToString() - ) + logger.info( + f"[FLOW] Negotiated protocol for peer {str(peer_id)[:20]}...: " + f"{peer_protocol}" + ) - # Server-side: handle PaymentAuthorizations (EIP-3009 signed payments) - if self.payment_engine and msg_1_3.payment_authorizations: - try: - logger.warning("=" * 70) - logger.warning(f"[STEP 6] SERVER RECEIVED PAYMENT AUTHORIZATION from {str(peer_id)[:20]}...") - for _a in msg_1_3.payment_authorizations: - logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") - logger.warning(f" from={_a.from_address[:20]}...") - logger.warning(f" to={_a.to_address[:20]}...") - logger.warning(f" value={_a.value}") - logger.warning(f" scheme={_a.scheme}") - logger.warning(f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}") - logger.warning("=" * 70) - response = await self.payment_engine.process_incoming_1_3_message( - str(peer_id), msg_1_3 - ) - if response is not None: - _has_receipt = bool(response.payment_receipts) - _has_rejection = bool(response.payment_rejections) - _has_blocks = bool(response.payload) or bool(response.blocks) - logger.warning("=" * 70) - logger.warning(f"[STEP 8] SERVER SENDING RESPONSE after PaymentAuthorization:") - logger.warning(f" has_receipt={_has_receipt} has_rejection={_has_rejection} has_blocks={_has_blocks}") - if _has_rejection: - for _rj in response.payment_rejections: - logger.warning(f" ❌ REJECTION reason={_rj.reason}") - if _has_blocks: - _nb = len(response.payload) + len(response.blocks) - logger.warning(f" ✅ SENDING {_nb} block(s) to client — FILE TRANSFER STARTING") - logger.warning("=" * 70) - await self._write_message_bytes( - stream, response.SerializeToString() - ) - # Payment authorization handled — don't fall through to - # standard wantlist handling for this message. - return - except Exception as e: - logger.error(f"Error handling PaymentAuthorization: {e}", exc_info=True) - - # Handle PaymentRequired block presences (1.3.0 type=2) - if msg_1_3.blockPresences: - await self._process_block_presences_1_3( - msg_1_3.blockPresences, peer_id - ) - # Fall through below to also handle wantlist/blocks/payload - # that may be bundled in the same message. + # ── Protocol Extension Handling ───────────────────────────────────── + if peer_protocol in self.protocol_handlers: + handled = await self.protocol_handlers[peer_protocol].process_message( + peer_id, msg.SerializeToString(), stream + ) + if handled: + return # ── Standard 1.0.0–1.2.0 message handling (always runs) ───────── - # Also runs for 1.3.0 streams that don't carry payment-only content - # (e.g. a plain wantlist sent over a 1.3.0 stream). if msg.HasField("wantlist"): - await self._process_wantlist(msg.wantlist, peer_id, stream) + handled = False + if peer_protocol in self.protocol_handlers: + handled = await self.protocol_handlers[peer_protocol].process_wantlist( + msg.wantlist, peer_id, stream + ) + if not handled: + await self._process_wantlist(msg.wantlist, peer_id, stream) if msg.blocks: await self._process_blocks_v100(list(msg.blocks), peer_id) @@ -904,70 +798,20 @@ async def _process_wantlist( self._peer_wantlists[peer_id] = {} peer_wantlist = self._peer_wantlists[peer_id] - # Update based on full or incremental wantlist if wantlist.full: peer_wantlist.clear() # Get peer protocol for response format peer_protocol = self._peer_protocols.get(peer_id, BITSWAP_PROTOCOL_V100) - + logger.warning("=" * 70) logger.warning( f"[STEP 1] SERVER PROCESSING WANTLIST from {str(peer_id)[:20]}..." ) logger.warning(f" entries={len(wantlist.entries)} protocol={peer_protocol}") - logger.warning(f" payment_engine={'ENABLED' if self.payment_engine else 'DISABLED (free mode)'}") - logger.warning(f" server_wallet={getattr(getattr(self, 'payment_engine', None), 'server_wallet', 'N/A')[:20] if self.payment_engine else 'N/A'}") logger.warning("=" * 70) - # ── Payment-gated wantlist handling ──────────────────────────────── - # Apply payment gating whenever payment_engine is enabled. - # For 1.3.0 peers: send PaymentRequired + PaymentTerms in-band. - # For older peers: send DONT_HAVE (they cannot pay in-band). - is_v130_peer = str(peer_protocol) == str(BITSWAP_PROTOCOL_V130) - if self.payment_engine and is_v130_peer: - for entry in wantlist.entries: - entry_cid = parse_cid(entry.block) - if entry.cancel: - if entry_cid in peer_wantlist: - del peer_wantlist[entry_cid] - continue - - peer_wantlist[entry_cid] = { - "priority": entry.priority, - "want_type": entry.wantType, - "send_dont_have": entry.sendDontHave, - } - - response_msg = await self.payment_engine.handle_want( - peer_id=str(peer_id), - cid=entry.block, - want_type=entry.wantType, - send_dont_have=entry.sendDontHave, - peer_protocol=str(peer_protocol), # pass actual negotiated protocol - ) - if response_msg is not None: - _has_pr = bool(getattr(response_msg, 'blockPresences', [])) - _has_terms = bool(getattr(response_msg, 'payment_terms', [])) - _has_blocks = bool(getattr(response_msg, 'payload', [])) or bool(getattr(response_msg, 'blocks', [])) - logger.warning("=" * 70) - logger.warning(f"[STEP 2] SERVER SENDING RESPONSE for cid={bytes(entry.block).hex()[:20]}...") - logger.warning(f" payment_required={_has_pr} payment_terms={_has_terms} has_blocks={_has_blocks}") - if _has_pr: - for _bp in response_msg.blockPresences: - logger.warning(f" BlockPresence type={_bp.type} (2=PaymentRequired)") - if _has_terms: - for _t in response_msg.payment_terms: - logger.warning(f" PaymentTerms: amount={_t.amount} asset={_t.asset} pay_to={_t.pay_to[:20]}... scheme={_t.scheme}") - if _has_blocks: - logger.warning(f" ✅ Sending block(s) directly (free/already paid)") - logger.warning("=" * 70) - await self._write_message_bytes( - stream, response_msg.SerializeToString() - ) - return - # ── Standard 1.0.0–1.2.0 wantlist handling ──────────────────────── # Process entries blocks_to_send_v100 = [] # For v1.0.0 @@ -975,7 +819,14 @@ async def _process_wantlist( presences_to_send = [] # For v1.2.0 for entry in wantlist.entries: - entry_cid = parse_cid(entry.block) + try: + logger.warning(f" -> Processing entry: {bytes(entry.block).hex()}") + entry_cid = parse_cid(entry.block) + logger.warning(f" -> Parsed CID: {entry_cid}") + except Exception as e: + logger.warning(f" -> EXCEPTION in parse_cid: {e}") + continue + if entry.cancel: # Remove from peer's wantlist if entry_cid in peer_wantlist: @@ -989,9 +840,17 @@ async def _process_wantlist( } # Check if we have this block - has_block = await self.block_store.has_block(entry_cid) + logger.warning(f" -> Checking if we have block {entry_cid}") + try: + has_block = await self.block_store.has_block(entry_cid) + logger.warning(f" -> has_block result: {has_block}") + except Exception as e: + logger.warning(f" -> EXCEPTION in has_block: {e}") + has_block = False + logger.warning( - f"[WANTLIST ENTRY] cid={format_cid_for_display(entry_cid, max_len=16)} " + f"[WANTLIST ENTRY] " + f"cid={format_cid_for_display(entry_cid, max_len=16)} " f"wantType={entry.wantType} cancel={entry.cancel} " f"has_block={has_block}" ) @@ -1065,33 +924,81 @@ async def _process_wantlist( # Send responses in batches to stay under MAX_MESSAGE_SIZE # and Noise protocol limit (65535 bytes) if blocks_to_send_v100 or blocks_to_send_v110 or presences_to_send: - # We MUST open a new stream to the client to send the blocks. - # Writing to the inbound stream that the client opened for their WANTLIST - # is often ignored by the client (Kubo), as it expects the provider to dial back. - try: - outbound_stream = await self.host.new_stream( - peer_id, [TProtocol(peer_protocol)] - ) - except Exception as e: - logger.error(f"Failed to open outbound stream to send response: {e}") - return - - # Send blocks in batches - if blocks_to_send_v100: - await self._send_blocks_in_batches_v100( - blocks_to_send_v100, peer_id, outbound_stream + if self._nursery is not None: + self._nursery.start_soon( + self._send_wantlist_responses_bg, # type: ignore + peer_id, + str(peer_protocol), + blocks_to_send_v100, + blocks_to_send_v110, + presences_to_send, ) - if blocks_to_send_v110: - await self._send_blocks_in_batches_v110( - blocks_to_send_v110, peer_id, outbound_stream + else: + # Fallback to writing to the inbound stream if nursery is not available. + # This works for Python-to-Python tests, but may fail for + # Go-libp2p interop. + await self._send_wantlist_responses_inline( + stream, + peer_id, + blocks_to_send_v100, + blocks_to_send_v110, + presences_to_send, ) - # Send presences (usually small, can send all at once) - if presences_to_send: - presence_msg = create_message(block_presences=presences_to_send) - await self._write_message(outbound_stream, presence_msg) - + + async def _send_wantlist_responses_bg( + self, + peer_id: PeerID, + peer_protocol: str, + blocks_to_send_v100: list[bytes], + blocks_to_send_v110: list[tuple[bytes, bytes]], + presences_to_send: list[tuple[CIDObject, bool]], + ) -> None: + """Background task to send responses over a new outbound stream.""" + # We MUST open a new stream to the client to send the blocks. + # Writing to the inbound stream that the client opened for their WANTLIST + # is often ignored by the client (Kubo), as it expects dial back. + try: + outbound_stream = await self.host.new_stream( + peer_id, [TProtocol(peer_protocol)] + ) + except Exception as e: + logger.error(f"Failed to open outbound stream to send response: {e}") + return + + try: + await self._send_wantlist_responses_inline( + outbound_stream, + peer_id, + blocks_to_send_v100, + blocks_to_send_v110, + presences_to_send, + ) + finally: await outbound_stream.close() + async def _send_wantlist_responses_inline( + self, + stream: INetStream, + peer_id: PeerID, + blocks_to_send_v100: list[bytes], + blocks_to_send_v110: list[tuple[bytes, bytes]], + presences_to_send: list[tuple[CIDObject, bool]], + ) -> None: + """Helper to send blocks on a specific stream.""" + # Send blocks in batches + if blocks_to_send_v100: + await self._send_blocks_in_batches_v100( + blocks_to_send_v100, peer_id, stream + ) + if blocks_to_send_v110: + await self._send_blocks_in_batches_v110( + blocks_to_send_v110, peer_id, stream + ) + # Send presences (usually small, can send all at once) + if presences_to_send: + presence_msg = create_message(block_presences=presences_to_send) + await self._write_message(stream, presence_msg) + async def _send_blocks_in_batches_v100( self, blocks: list[bytes], peer_id: PeerID, stream: INetStream ) -> None: diff --git a/libp2p/bitswap/config.py b/libp2p/bitswap/config.py index 53bd23600..028103100 100644 --- a/libp2p/bitswap/config.py +++ b/libp2p/bitswap/config.py @@ -12,7 +12,6 @@ # All supported protocols (ordered from newest to oldest for negotiation) BITSWAP_PROTOCOLS = [ - BITSWAP_PROTOCOL_V130, BITSWAP_PROTOCOL_V120, BITSWAP_PROTOCOL_V110, BITSWAP_PROTOCOL_V100, diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index dd729eeba..8021d103b 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -1,928 +1,930 @@ -""" -Merkle DAG manager for file operations. - -This module provides a high-level API for adding and fetching files -using the Bitswap protocol with automatic chunking, linking, and -multi-block resolution. - -""" - -from collections.abc import Awaitable, Callable -import inspect -import io -import logging -from typing import Union - -from libp2p.peer.id import ID as PeerID - -from .block_service import BlockService -from .block_store import BlockStore -from .chunker import ( - DEFAULT_CHUNK_SIZE, - chunk_bytes, - chunk_file, - chunk_stream, - estimate_chunk_count, - get_file_size, -) -from .cid import ( - CODEC_DAG_PB, - CODEC_RAW, - CIDInput, - cid_to_bytes, - compute_cid_v1, - format_cid_for_display, - verify_cid, -) -from .client import BitswapClient -from .dag_pb import ( - balanced_layout, - create_leaf_node, - decode_dag_pb, - is_directory_node, - is_file_node, -) -from .errors import BlockNotFoundError - -logger = logging.getLogger(__name__) - - -# Type alias for progress callbacks (sync or async) -ProgressCallback = Union[ - Callable[[int, int, str], None], - Callable[[int, int, str], Awaitable[None]], -] - - -async def _call_progress_callback( - callback: ProgressCallback | None, - current: int, - total: int, - status: str, -) -> None: - """Call a progress callback, handling both sync and async callbacks.""" - if callback is None: - return - - if inspect.iscoroutinefunction(callback): - await callback(current, total, status) - else: - callback(current, total, status) - - -class MerkleDag: - """ - Merkle DAG manager for file operations. - - Provides high-level API for adding and fetching files with automatic - chunking, link creation, and recursive block fetching. - - Example: - >>> from libp2p import new_host - >>> from libp2p.bitswap import BitswapClient, MemoryBlockStore, MerkleDag - >>> import trio - >>> - >>> async def main(): - ... host = new_host() - ... async with host.run(["/ip4/0.0.0.0/tcp/0"]): - ... store = MemoryBlockStore() - ... bitswap = BitswapClient(host, store) - ... await bitswap.start() - ... - ... dag = MerkleDag(bitswap) - ... - ... # Add a large file (auto-chunked) - ... root_cid = await dag.add_file('movie.mp4') - ... print(f"Share: {cid_to_text(root_cid)}") - ... - ... # Fetch file (auto-resolves all chunks) - ... data = await dag.fetch_file(root_cid) - ... open('downloaded.mp4', 'wb').write(data) - ... - >>> trio.run(main) - - """ - - def __init__( - self, - bitswap: BitswapClient, - block_store: BlockStore | None = None, - block_service: BlockService | None = None, - ): - """ - Initialize Merkle DAG manager. - - Args: - bitswap: Bitswap client for block exchange - block_store: Optional block store (uses bitswap's store if None) - block_service: Optional BlockService for transparent local→network - fallback with auto-caching. When provided, all block - reads/writes go through it instead of bitswap directly. - Construct with: BlockService(your_store, bitswap) - - """ - self.bitswap = bitswap - self.block_store = block_store or bitswap.block_store - # If a BlockService is provided use it; otherwise fall back to - # calling bitswap directly (existing behaviour, no regression). - self._service: BlockService | None = block_service - - # ── private routing helpers ─────────────────────────────────────────────── - - async def _put_block(self, cid: CIDInput, data: bytes) -> None: - """Store a block. Routes through BlockService when available.""" - if self._service is not None: - await self._service.put_block(cid, data) - else: - await self.bitswap.add_block(cid, data) - - async def _get_block( - self, - cid: CIDInput, - peer_id: PeerID | None = None, - timeout: float = 30.0, - ) -> bytes: - """Fetch a block. Routes through BlockService when available.""" - if self._service is not None: - data = await self._service.get_block(cid, peer_id=peer_id, timeout=timeout) - if data is None: - from .cid import cid_to_bytes, format_cid_for_display - - raise BlockNotFoundError( - f"Block not found: {format_cid_for_display(cid_to_bytes(cid))}" - ) - return data - return await self.bitswap.get_block(cid, peer_id, timeout) - - async def _get_blocks_batch( - self, - cids: list[CIDInput], - peer_id: PeerID | None = None, - timeout: float = 30.0, - batch_size: int = 32, - ) -> dict[bytes, bytes]: - """Batch-fetch blocks. Routes through BlockService when available.""" - if self._service is not None: - return await self._service.get_blocks_batch( - cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size - ) - # Check if the client supports native batch fetching - get_blocks_batch: Callable[..., Awaitable[dict[bytes, bytes]]] | None = getattr( - self.bitswap, "get_blocks_batch", None - ) - if get_blocks_batch is not None and callable(get_blocks_batch): - try: - result = await get_blocks_batch( - cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size - ) - # Ensure the result is a plain dict (not a coroutine from a mock) - if isinstance(result, dict): - return result - except Exception: - pass - # Fall back to individual _get_block calls - results: dict[bytes, bytes] = {} - for cid in cids: - from .cid import cid_to_bytes - - cid_bytes = cid_to_bytes(cid) - try: - data = await self._get_block( - cid_bytes, peer_id=peer_id, timeout=timeout - ) - results[cid_bytes] = data - except Exception: - pass - return results - - async def add_file( - self, - file_path: str, - chunk_size: int | None = None, - progress_callback: Callable[[int, int, str], None] | None = None, - wrap_with_directory: bool = True, - ) -> bytes: - """ - Add a file to the DAG. - - Automatically chunks large files and creates link structure. - Small files are stored as single blocks. - - Args: - file_path: Path to file - chunk_size: Optional chunk size (auto-selected if None) - progress_callback: Optional callback(current, total, status) - wrap_with_directory: If True, wraps file in a directory node with filename - (IPFS-standard way, enables filename preservation) - - Returns: - Root CID of the file (or wrapping directory if wrap_with_directory=True) - - Raises: - FileNotFoundError: If file doesn't exist - BlockTooLargeError: If a single chunk exceeds MAX_BLOCK_SIZE - - Example: - >>> async def progress(current, total, status): - ... print(f"{status}: {current}/{total}") - >>> root_cid = await dag.add_file('movie.mp4', progress_callback=progress) - >>> print(f"Share this: {cid_to_text(root_cid)}") - - """ - # Get file size - file_size = get_file_size(file_path) - logger.info(f"Adding file: {file_path} ({file_size} bytes)") - - # Determine chunk size - if chunk_size is None: - chunk_size = DEFAULT_CHUNK_SIZE - - logger.debug(f"Using chunk size: {chunk_size} bytes") - - # If file is small enough, store as single raw leaf block (Kubo default: RawLeaves=true) - if file_size <= chunk_size: - logger.debug("File fits in single block") - - with open(file_path, "rb") as f: - data = f.read() - - # Raw leaf: store file bytes directly with raw codec CID - cid = compute_cid_v1(data, codec=CODEC_RAW) - - await self._put_block(cid, data) - - if progress_callback: - await _call_progress_callback( - progress_callback, file_size, file_size, "completed" - ) - - logger.info( - f"Added file as single raw block: {format_cid_for_display(cid, max_len=16)}" - ) - - # Wrap in directory if requested - if wrap_with_directory: - import os - - from .dag_pb import create_directory_node - - filename = os.path.basename(file_path) - logger.info( - f"Wrapping single-block file in directory with name: {filename}" - ) - - # Tsize for raw leaf = raw file size (no block overhead) - dir_data = create_directory_node([(filename, cid, file_size)]) - dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) - await self._put_block(dir_cid, dir_data) - - logger.info( - f"Created directory wrapper. Directory CID: " - f"{format_cid_for_display(dir_cid, max_len=16)}" - ) - return dir_cid - - return cid - - # Chunk the file - estimated_chunks = estimate_chunk_count(file_size, chunk_size) - logger.debug(f"Chunking file into ~{estimated_chunks} chunks") - logger.info("=== Starting file chunking process ===") - - # leaf_triples: (cid_bytes, leaf_block_bytes, raw_data_size) - # For raw leaves (Kubo default): leaf_block = raw chunk bytes, - # CID uses CODEC_RAW. This matches Kubo's RawLeaves=true behavior - # for multi-chunk files, producing identical CIDs. - leaf_triples: list[tuple[bytes, bytes, int]] = [] - bytes_processed = 0 - - # Process file in chunks (memory efficient) - for i, chunk_data in enumerate(chunk_file(file_path, chunk_size)): - # Raw leaf: store chunk bytes directly with raw codec CID (Kubo default) - chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) - - await self._put_block(chunk_cid, chunk_data) - leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) - bytes_processed += len(chunk_data) - - # Progress callback - if progress_callback: - await _call_progress_callback( - progress_callback, - bytes_processed, - file_size, - f"chunking ({i + 1} chunks)", - ) - - logger.info( - f"Chunk {i + 1}: CID={format_cid_for_display(chunk_cid)}, " - f"Size={len(chunk_data)} bytes, " - f"Progress={bytes_processed}/{file_size}" - ) - logger.debug( - f"Stored leaf {i}: {format_cid_for_display(chunk_cid, max_len=16)} " - f"({len(chunk_data)} bytes)" - ) - - # Build balanced DAG tree (max 174 links/node, matches Kubo) - if progress_callback: - await _call_progress_callback( - progress_callback, file_size, file_size, "creating root node" - ) - - # Create a sync wrapper for the async _put_block method - # We'll collect (cid, data) pairs and store them after - internal_nodes: list[tuple[bytes, bytes]] = [] - - def store_internal_node(cid: bytes, data: bytes) -> None: - """Callback to collect internal nodes for storage.""" - internal_nodes.append((cid, data)) - - root_cid, root_data, root_tsize = balanced_layout( - leaf_triples, put_block_callback=store_internal_node - ) - - # Store all internal nodes - logger.info(f"Storing {len(internal_nodes)} internal DAG nodes...") - for cid, data in internal_nodes: - await self._put_block(cid, data) - - # Store the root node - await self._put_block(root_cid, root_data) - - # Enhanced logging for root CID - logger.info("=== File chunking completed ===") - logger.info( - f"Root CID: {format_cid_for_display(root_cid)} " - f"(Balanced DAG over {len(leaf_triples)} leaves)" - ) - logger.info(f"Total file size: {file_size} bytes") - logger.info("=" * 50) - - logger.info( - f"Added file with {len(leaf_triples)} leaves. " - f"Root CID: {format_cid_for_display(root_cid, max_len=16)}" - ) - - if progress_callback: - await _call_progress_callback( - progress_callback, file_size, file_size, "completed" - ) - - # Wrap in directory if requested (IPFS-standard way for filename preservation) - if wrap_with_directory: - import os - - from .dag_pb import create_directory_node - - filename = os.path.basename(file_path) - logger.info(f"Wrapping file in directory with name: {filename}") - - # Tsize = cumulative block size (root block + all descendant blocks), - # matching Kubo's behavior for directory link Tsize. - dir_data = create_directory_node([(filename, root_cid, root_tsize)]) - dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) - await self._put_block(dir_cid, dir_data) - - logger.info( - "Created directory wrapper. Directory CID: " - f"{format_cid_for_display(dir_cid, max_len=16)}" - ) - return dir_cid - - return root_cid - - async def add_bytes( - self, - data: bytes, - chunk_size: int | None = None, - progress_callback: Callable[[int, int, str], None] | None = None, - ) -> bytes: - """ - Add bytes to the DAG (similar to add_file but for in-memory data). - - Args: - data: Data to add - chunk_size: Optional chunk size (auto-selected if None) - progress_callback: Optional callback(current, total, status) - - Returns: - Root CID - - Example: - >>> data = b"x" * (10 * 1024 * 1024) # 10 MB - >>> root_cid = await dag.add_bytes(data) - - """ - file_size = len(data) - logger.info(f"Adding {file_size} bytes") - - # Determine chunk size - if chunk_size is None: - chunk_size = DEFAULT_CHUNK_SIZE - - # If data is small, store as single raw leaf block (Kubo default: RawLeaves=true) - if file_size <= chunk_size: - cid = compute_cid_v1(data, codec=CODEC_RAW) - await self._put_block(cid, data) - - if progress_callback: - await _call_progress_callback( - progress_callback, file_size, file_size, "completed" - ) - - return cid - - # Chunk the data using raw leaves (Kubo default: RawLeaves=true) - chunks = chunk_bytes(data, chunk_size) - leaf_triples: list[tuple[bytes, bytes, int]] = [] - - for i, chunk_data in enumerate(chunks): - chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) - await self._put_block(chunk_cid, chunk_data) - leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) - - if progress_callback: - bytes_processed = sum(s for _, _, s in leaf_triples) - await _call_progress_callback( - progress_callback, - bytes_processed, - file_size, - f"chunking ({i + 1}/{len(chunks)})", - ) - - # Build balanced DAG tree - root_cid, root_data, _tsize = balanced_layout(leaf_triples) - await self._put_block(root_cid, root_data) - - if progress_callback: - await _call_progress_callback( - progress_callback, file_size, file_size, "completed" - ) - - return root_cid - - async def add_stream( - self, - stream: io.IOBase, - chunk_size: int | None = None, - progress_callback: ProgressCallback | None = None, - ) -> bytes: - """ - Add data from any io.IOBase stream to the DAG. - - More flexible than add_file() (accepts any stream, not just file paths) - and more memory efficient than add_bytes() (reads one chunk at a time, - so total memory usage is O(chunk_size) regardless of file size). - - Args: - stream: Any readable io.IOBase — open() handles, BytesIO, - GzipFile, BZ2File, network streams, pipes, etc. - chunk_size: Optional chunk size in bytes (auto-selected if None) - progress_callback: Optional callback(current, total, status). - Note: total is unknown for streams, so current - is reported as bytes processed so far. - - Returns: - Root CID bytes of the stored DAG - - Example: - >>> import io - >>> root_cid = await dag.add_stream(io.BytesIO(b"hello world")) - - >>> # Memory-efficient large file (no full read into RAM) - >>> with open("movie.mp4", "rb") as f: - ... root_cid = await dag.add_stream(f) - - >>> # Decompress and add in one pass - >>> import gzip - >>> with gzip.open("archive.gz", "rb") as f: - ... root_cid = await dag.add_stream(f) - - >>> # With BlockService for persistent caching - >>> service = BlockService(FilesystemBlockStore("./blocks"), bitswap) - >>> dag = MerkleDag(bitswap, block_service=service) - >>> with open("large.bin", "rb") as f: - ... root_cid = await dag.add_stream(f) # cached to disk - - """ - if chunk_size is None: - chunk_size = DEFAULT_CHUNK_SIZE - - leaf_triples: list[tuple[bytes, bytes, int]] = [] - bytes_processed = 0 - - for i, chunk_data in enumerate(chunk_stream(stream, chunk_size)): - # Raw leaf: store chunk bytes directly (Kubo default: RawLeaves=true) - chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) - await self._put_block(chunk_cid, chunk_data) - leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) - bytes_processed += len(chunk_data) - - if progress_callback: - # total is unknown for streams — report bytes processed so far - await _call_progress_callback( - progress_callback, - bytes_processed, - bytes_processed, - f"chunking ({i + 1} chunks, {bytes_processed} bytes)", - ) - - # Empty stream — store a single empty raw block - if not leaf_triples: - cid = compute_cid_v1(b"", codec=CODEC_RAW) - await self._put_block(cid, b"") - return cid - - # Single chunk — return the leaf CID directly (no root node needed) - if len(leaf_triples) == 1: - return leaf_triples[0][0] - - # Multiple chunks — build balanced DAG tree - root_cid, root_data, _tsize = balanced_layout(leaf_triples) - await self._put_block(root_cid, root_data) - - if progress_callback: - await _call_progress_callback( - progress_callback, bytes_processed, bytes_processed, "completed" - ) - - return root_cid - - async def fetch_file( - self, - root_cid: CIDInput, - peer_id: PeerID | None = None, - timeout: float = 30.0, - progress_callback: Callable[[int, int, str], None] | None = None, - ) -> tuple[bytes, str | None]: - """ - Fetch a file from the DAG. - - Automatically resolves links and fetches all chunks. Works with both - single-block files and multi-chunk files. Everything is handled - automatically - just provide the root CID! - - The method automatically: - - Detects directory wrappers and extracts filename - - Fetches and decodes the root block - - Determines file size and number of chunks - - Fetches all chunks in sequence - - Verifies integrity of all blocks - - Reconstructs the complete file - - Args: - root_cid: Root CID of the file (or directory wrapper) - peer_id: Optional specific peer to fetch from - timeout: Timeout per block in seconds - progress_callback: Optional callback(current, total, status) - Receives metadata automatically in first call - - Returns: - Tuple of (file_data, filename) where filename is None if not - wrapped in directory - - Raises: - BlockNotFoundError: If any block cannot be found - ValueError: If CID verification fails - - Example: - >>> # Simple usage - just provide root CID - >>> data, filename = await dag.fetch_file(root_cid) - >>> save_path = filename or 'downloaded_file' - >>> open(save_path, 'wb').write(data) - - >>> # With progress tracking - >>> def progress(current, total, status): - ... percent = (current / total) * 100 if total > 0 else 0 - ... print(f"{status}: {percent:.1f}%") - >>> data, filename = await dag.fetch_file( - ... root_cid, progress_callback=progress - ... ) - - """ - root_cid_bytes = cid_to_bytes(root_cid) - logger.info(f"Fetching file: {format_cid_for_display(root_cid_bytes)}") - - # Step 1: Fetch the root block - root_data = await self._get_block(root_cid_bytes, peer_id, timeout) - if not verify_cid(root_cid_bytes, root_data): - root_cid_str = format_cid_for_display(root_cid_bytes) - raise ValueError(f"Root block CID verification failed: {root_cid_str}") - - # Step 2: Handle directory wrapper - # (produced by `ipfs add --wrap-with-directory`) - filename = None - actual_file_cid = root_cid_bytes - actual_file_data = root_data - - if is_directory_node(root_data): - logger.info("Root is a directory node — extracting filename and file CID") - dir_links, _ = decode_dag_pb(root_data) - if dir_links: - first_link = dir_links[0] - filename = first_link.name or None - # Links now store the full CID bytes (CIDv1 buffer or CIDv0 multihash) - actual_file_cid = first_link.cid - logger.info(f"Filename from directory: {filename!r}") - actual_file_data = await self._get_block( - actual_file_cid, peer_id, timeout - ) - if not verify_cid(actual_file_cid, actual_file_data): - f_cid_str = format_cid_for_display(actual_file_cid) - err_msg = f"File block CID verification failed: {f_cid_str}" - raise ValueError(err_msg) - - # Step 3: Handle raw block (not a DAG-PB node at all) - if not is_file_node(actual_file_data): - logger.info(f"Root is a raw block: {len(actual_file_data)} bytes") - return actual_file_data, filename - - # Step 4: Parse the file node - top_links, top_unixfs = decode_dag_pb(actual_file_data) - filesize = top_unixfs.filesize if top_unixfs else 0 - total_size = filesize or sum(lnk.size for lnk in top_links) - msg = f"File node: {len(top_links)} top-level links, total size={total_size}" - logger.info(f"{msg} bytes") - - # Step 5: Small file with inline data (no links) - if not top_links: - file_data = top_unixfs.data if top_unixfs and top_unixfs.data else b"" - logger.info(f"Inline file data: {len(file_data)} bytes") - if progress_callback: - data_len = len(file_data) - await _call_progress_callback( - progress_callback, data_len, data_len, "completed" - ) - return file_data, filename - - # Step 6: Collect all leaf CIDs without opening streams - # Strategy: Recursively batch-fetch all DAG nodes - # then traverse locally to collect leaves - - top_len = len(top_links) - msg1 = f"[DAG] Recursively batch-fetching DAG tree ({top_len} top links)..." - logger.info(msg1) - msg2 = f"[FETCH] Recursively batch-fetching DAG tree ({top_len} top links)..." - print(msg2, flush=True) - - # Map to store ALL fetched blocks (both intermediate and leaves) - all_blocks_map: dict[bytes, bytes] = {} - - async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: - """Recursively batch-fetch a level of DAG nodes and queue their children.""" - if not cid_list: - return - - c_count = len(cid_list) - msg1 = f"[DAG] Depth {depth}: batch-fetching {c_count} blocks..." - logger.info(msg1) - msg2 = f"[FETCH] Depth {depth}: batch-fetching {c_count} blocks..." - print(msg2, flush=True) - - # Batch-fetch this level's blocks - level_blocks = await self._get_blocks_batch( - list(cid_list), peer_id=peer_id, timeout=timeout, batch_size=32 - ) - logger.info(f"[DAG] Depth {depth}: ✓ received {len(level_blocks)} blocks") - all_blocks_map.update(level_blocks) - - # Collect child CIDs for recursion - child_cids: list[bytes] = [] - for cid_bytes in cid_list: - block_data = level_blocks.get(cid_bytes) - if block_data is None: - c_str = format_cid_for_display(cid_bytes) - msg = f"[DAG] Depth {depth}: block {c_str} missing after" - logger.warning(f"{msg} fetch") - continue - - if is_file_node(block_data): - node_links, _ = decode_dag_pb(block_data) - cid_str = format_cid_for_display(cid_bytes) - msg = f"[DAG] Depth {depth}: {cid_str} has {len(node_links)}" - logger.debug(f"{msg} children") - for link in node_links: - # Links now store full CID bytes directly - child_cids.append(link.cid) - - # Recursively fetch next level if there are children - if child_cids: - ch_count = len(child_cids) - msg = f"[DAG] Depth {depth}: found {ch_count} child CIDs" - logger.info(f"{msg}, fetching next level...") - await _batch_fetch_tree(child_cids, depth + 1) - - # Starting from the top-level links (full CID bytes stored in links) - top_cids = [top_link.cid for top_link in top_links] - await _batch_fetch_tree(top_cids, depth=1) - blocks_count = len(all_blocks_map) - logger.info(f"[DAG] ✓ Tree fetch complete: {blocks_count} total blocks") - print(f"[FETCH] ✓ Tree fetch complete: {blocks_count} total blocks", flush=True) - - # Now traverse locally to collect leaf CIDs in order - ordered_leaf_cids: list[bytes] = [] - - def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: - """Traverse locally-fetched blocks to collect leaf CIDs.""" - block_data = all_blocks_map.get(cid_bytes) - if block_data is None: - cid_str = format_cid_for_display(cid_bytes) - logger.warning(f"[DAG] Depth {depth}: block {cid_str} not in map") - return - - if not is_file_node(block_data): - # Raw block - it's a leaf - logger.debug(f"[DAG] Depth {depth}: raw block (leaf)") - ordered_leaf_cids.append(cid_bytes) - return - - node_links, _ = decode_dag_pb(block_data) - logger.debug(f"[DAG] Depth {depth}: {len(node_links)} links") - - if not node_links: - # Leaf node (no children, data is inline in UnixFS) - logger.debug(f"[DAG] Depth {depth}: file node with inline data (leaf)") - ordered_leaf_cids.append(cid_bytes) - return - - # Intermediate node - recursively process children - for j, child_link in enumerate(node_links): - c_idx = j + 1 - c_tot = len(node_links) - msg = f"[DAG] Depth {depth}: processing child {c_idx}/{c_tot}" - logger.debug(msg) - # Links store full CID bytes directly - child_cid = child_link.cid - _collect_leaves_local(child_cid, depth + 1) - - # Traverse each top-level block - for i, top_link in enumerate(top_links): - logger.info(f"[DAG] Traversing top-level {i + 1}/{len(top_links)}...") - # Links store full CID bytes directly - top_cid = top_link.cid - _collect_leaves_local(top_cid, depth=1) - - logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") - - # Step 7: Batch-fetch all leaf blocks - # (single wantlist per batch → avoids GO_AWAY) - if progress_callback: - await _call_progress_callback( - progress_callback, - 0, - total_size, - f"fetching {len(ordered_leaf_cids)} leaf blocks in batches", - ) - - l_count = len(ordered_leaf_cids) - msg1 = f"[DAG] Starting batch fetch of {l_count} leaves with batch_size=32" - logger.info(f"{msg1}, timeout={timeout}s") - msg2 = ( - f"[FETCH] Batch fetching {l_count} leaves " - f"(batch_size=32, timeout={timeout}s)" - ) - print(msg2, flush=True) - - # First try to get blocks from the already-fetched tree - block_map: dict[bytes, bytes] = {} - missing_cids: list[CIDInput] = [] - for leaf_cid in ordered_leaf_cids: - leaf_data = all_blocks_map.get(leaf_cid) - if leaf_data is not None: - block_map[leaf_cid] = leaf_data - else: - missing_cids.append(leaf_cid) - - # If some leaves weren't in the tree fetch, fetch them now - if missing_cids: - logger.info(f"[DAG] Fetching {len(missing_cids)} missing leaves") - fetched_blocks = await self._get_blocks_batch( - missing_cids, peer_id=peer_id, timeout=timeout, batch_size=32 - ) - block_map.update(fetched_blocks) - - logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") - print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) - - # Step 8: Reassemble data in order - # extracting UnixFS inline data from leaf nodes - file_data = b"" - bytes_fetched = 0 - missing_blocks: list[bytes] = [] - for idx, leaf_cid in enumerate(ordered_leaf_cids): - leaf_raw = block_map.get(bytes(leaf_cid)) - if leaf_raw is None: - l_idx = idx + 1 - t_leaves = len(ordered_leaf_cids) - c_str = format_cid_for_display(leaf_cid) - msg = f"[DAG] Leaf block {l_idx}/{t_leaves} MISSING: {c_str}" - logger.error(msg) - print(f"[FETCH] ✗ Leaf {l_idx}/{t_leaves} MISSING", flush=True) - missing_blocks.append(leaf_cid) - continue - - # Extract data: leaf blocks are UnixFS file nodes with inline data - if is_file_node(leaf_raw): - _, leaf_unixfs = decode_dag_pb(leaf_raw) - if leaf_unixfs is not None and leaf_unixfs.data: - chunk = leaf_unixfs.data - else: - chunk = b"" - chunk_len = len(chunk) - msg = f"[DAG] Leaf {idx + 1}: extracted {chunk_len} bytes" - logger.debug(f"{msg} from file node") - else: - chunk = leaf_raw - logger.debug(f"[DAG] Leaf {idx + 1}: raw block {len(chunk)} bytes") - - file_data += chunk - bytes_fetched += len(chunk) - - if (idx + 1) % 10 == 0 or idx == len(ordered_leaf_cids) - 1: - i_p = idx + 1 - t_l = len(ordered_leaf_cids) - p_str = f"{bytes_fetched}/{total_size} bytes" - logger.info(f"[DAG] Reassembled {i_p}/{t_l} leaves: {p_str}") - print(f"[FETCH] Reassembled {i_p}/{t_l} leaves: {p_str}", flush=True) - - if progress_callback: - await _call_progress_callback( - progress_callback, bytes_fetched, total_size, "downloading" - ) - - if missing_blocks: - missing_count = len(missing_blocks) - logger.error(f"[DAG] ✗ {missing_count} blocks missing after batch fetch!") - missing_list = [format_cid_for_display(cid) for cid in missing_blocks[:5]] - msg = f"{missing_count} leaf blocks missing: {missing_list}..." - raise BlockNotFoundError(msg) - - if progress_callback: - await _call_progress_callback( - progress_callback, total_size, total_size, "completed" - ) - - file_len = len(file_data) - msg = f"[DAG] ✓ File fetch complete: {file_len} bytes, filename={filename!r}" - logger.info(msg) - print(f"[FETCH] ✓ DOWNLOAD COMPLETE: {file_len} bytes", flush=True) - return file_data, filename - - async def get_file_info( - self, root_cid: CIDInput, peer_id: PeerID | None = None, timeout: float = 30.0 - ) -> dict[str, int | list[int]]: - """ - Get information about a file without downloading it. - - Args: - root_cid: Root CID of the file - peer_id: Optional specific peer to fetch from - timeout: Timeout in seconds (default: 30.0) - - Returns: - Dictionary with file information: - - size: Total file size in bytes - - chunks: Number of chunks - - chunk_sizes: List of chunk sizes - - Example: - >>> info = await dag.get_file_info(root_cid) - >>> print(f"File size: {info['size']} bytes") - >>> print(f"Chunks: {info['chunks']}") - - """ - # Get root block - root_cid_bytes = cid_to_bytes(root_cid) - root_data = await self._get_block(root_cid_bytes, peer_id, timeout) - - # Check if it's a DAG-PB file node - if is_file_node(root_data): - links, unixfs_data = decode_dag_pb(root_data) - - if not links: - # Small file with inline data - data_size = ( - len(unixfs_data.data) if unixfs_data and unixfs_data.data else 0 - ) - return {"size": data_size, "chunks": 0, "chunk_sizes": []} - - # Multi-chunk file - total_size = ( - unixfs_data.filesize - if unixfs_data - else sum(link.size for link in links) - ) - chunk_sizes = [link.size for link in links] - - return { - "size": total_size, - "chunks": len(links), - "chunk_sizes": chunk_sizes, - } - - # Single raw block - return {"size": len(root_data), "chunks": 1, "chunk_sizes": [len(root_data)]} - - -__all__ = ["MerkleDag"] +""" +Merkle DAG manager for file operations. + +This module provides a high-level API for adding and fetching files +using the Bitswap protocol with automatic chunking, linking, and +multi-block resolution. + +""" + +from collections.abc import Awaitable, Callable +import inspect +import io +import logging +from typing import Union + +from libp2p.peer.id import ID as PeerID + +from .block_service import BlockService +from .block_store import BlockStore +from .chunker import ( + DEFAULT_CHUNK_SIZE, + chunk_bytes, + chunk_file, + chunk_stream, + estimate_chunk_count, + get_file_size, +) +from .cid import ( + CODEC_DAG_PB, + CODEC_RAW, + CIDInput, + cid_to_bytes, + compute_cid_v1, + format_cid_for_display, + verify_cid, +) +from .client import BitswapClient +from .dag_pb import ( + balanced_layout, + decode_dag_pb, + is_directory_node, + is_file_node, +) +from .errors import BlockNotFoundError + +logger = logging.getLogger(__name__) + + +# Type alias for progress callbacks (sync or async) +ProgressCallback = Union[ + Callable[[int, int, str], None], + Callable[[int, int, str], Awaitable[None]], +] + + +async def _call_progress_callback( + callback: ProgressCallback | None, + current: int, + total: int, + status: str, +) -> None: + """Call a progress callback, handling both sync and async callbacks.""" + if callback is None: + return + + if inspect.iscoroutinefunction(callback): + await callback(current, total, status) + else: + callback(current, total, status) + + +class MerkleDag: + """ + Merkle DAG manager for file operations. + + Provides high-level API for adding and fetching files with automatic + chunking, link creation, and recursive block fetching. + + Example: + >>> from libp2p import new_host + >>> from libp2p.bitswap import BitswapClient, MemoryBlockStore, MerkleDag + >>> import trio + >>> + >>> async def main(): + ... host = new_host() + ... async with host.run(["/ip4/0.0.0.0/tcp/0"]): + ... store = MemoryBlockStore() + ... bitswap = BitswapClient(host, store) + ... await bitswap.start() + ... + ... dag = MerkleDag(bitswap) + ... + ... # Add a large file (auto-chunked) + ... root_cid = await dag.add_file('movie.mp4') + ... print(f"Share: {cid_to_text(root_cid)}") + ... + ... # Fetch file (auto-resolves all chunks) + ... data = await dag.fetch_file(root_cid) + ... open('downloaded.mp4', 'wb').write(data) + ... + >>> trio.run(main) + + """ + + def __init__( + self, + bitswap: BitswapClient, + block_store: BlockStore | None = None, + block_service: BlockService | None = None, + ): + """ + Initialize Merkle DAG manager. + + Args: + bitswap: Bitswap client for block exchange + block_store: Optional block store (uses bitswap's store if None) + block_service: Optional BlockService for transparent local→network + fallback with auto-caching. When provided, all block + reads/writes go through it instead of bitswap directly. + Construct with: BlockService(your_store, bitswap) + + """ + self.bitswap = bitswap + self.block_store = block_store or bitswap.block_store + # If a BlockService is provided use it; otherwise fall back to + # calling bitswap directly (existing behaviour, no regression). + self._service: BlockService | None = block_service + + # ── private routing helpers ─────────────────────────────────────────────── + + async def _put_block(self, cid: CIDInput, data: bytes) -> None: + """Store a block. Routes through BlockService when available.""" + if self._service is not None: + await self._service.put_block(cid, data) + else: + await self.bitswap.add_block(cid, data) + + async def _get_block( + self, + cid: CIDInput, + peer_id: PeerID | None = None, + timeout: float = 30.0, + ) -> bytes: + """Fetch a block. Routes through BlockService when available.""" + if self._service is not None: + data = await self._service.get_block(cid, peer_id=peer_id, timeout=timeout) + if data is None: + from .cid import cid_to_bytes, format_cid_for_display + + raise BlockNotFoundError( + f"Block not found: {format_cid_for_display(cid_to_bytes(cid))}" + ) + return data + return await self.bitswap.get_block(cid, peer_id, timeout) + + async def _get_blocks_batch( + self, + cids: list[CIDInput], + peer_id: PeerID | None = None, + timeout: float = 30.0, + batch_size: int = 32, + ) -> dict[bytes, bytes]: + """Batch-fetch blocks. Routes through BlockService when available.""" + if self._service is not None: + return await self._service.get_blocks_batch( + cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) + # Check if the client supports native batch fetching + get_blocks_batch: Callable[..., Awaitable[dict[bytes, bytes]]] | None = getattr( + self.bitswap, "get_blocks_batch", None + ) + if get_blocks_batch is not None and callable(get_blocks_batch): + try: + result = await get_blocks_batch( + cids, peer_id=peer_id, timeout=timeout, batch_size=batch_size + ) + # Ensure the result is a plain dict (not a coroutine from a mock) + if isinstance(result, dict): + return result + except Exception: + pass + # Fall back to individual _get_block calls + results: dict[bytes, bytes] = {} + for cid in cids: + from .cid import cid_to_bytes + + cid_bytes = cid_to_bytes(cid) + try: + data = await self._get_block( + cid_bytes, peer_id=peer_id, timeout=timeout + ) + results[cid_bytes] = data + except Exception: + pass + return results + + async def add_file( + self, + file_path: str, + chunk_size: int | None = None, + progress_callback: Callable[[int, int, str], None] | None = None, + wrap_with_directory: bool = True, + ) -> bytes: + """ + Add a file to the DAG. + + Automatically chunks large files and creates link structure. + Small files are stored as single blocks. + + Args: + file_path: Path to file + chunk_size: Optional chunk size (auto-selected if None) + progress_callback: Optional callback(current, total, status) + wrap_with_directory: If True, wraps file in a directory node with filename + (IPFS-standard way, enables filename preservation) + + Returns: + Root CID of the file (or wrapping directory if wrap_with_directory=True) + + Raises: + FileNotFoundError: If file doesn't exist + BlockTooLargeError: If a single chunk exceeds MAX_BLOCK_SIZE + + Example: + >>> async def progress(current, total, status): + ... print(f"{status}: {current}/{total}") + >>> root_cid = await dag.add_file('movie.mp4', progress_callback=progress) + >>> print(f"Share this: {cid_to_text(root_cid)}") + + """ + # Get file size + file_size = get_file_size(file_path) + logger.info(f"Adding file: {file_path} ({file_size} bytes)") + + # Determine chunk size + if chunk_size is None: + chunk_size = DEFAULT_CHUNK_SIZE + + logger.debug(f"Using chunk size: {chunk_size} bytes") + + # If file is small enough, store as single raw leaf block + # (Kubo default: RawLeaves=true) + if file_size <= chunk_size: + logger.debug("File fits in single block") + + with open(file_path, "rb") as f: + data = f.read() + + # Raw leaf: store file bytes directly with raw codec CID + cid = compute_cid_v1(data, codec=CODEC_RAW) + + await self._put_block(cid, data) + + if progress_callback: + await _call_progress_callback( + progress_callback, file_size, file_size, "completed" + ) + + logger.info( + f"Added file as single raw block: " + f"{format_cid_for_display(cid, max_len=16)}" + ) + + # Wrap in directory if requested + if wrap_with_directory: + import os + + from .dag_pb import create_directory_node + + filename = os.path.basename(file_path) + logger.info( + f"Wrapping single-block file in directory with name: {filename}" + ) + + # Tsize for raw leaf = raw file size (no block overhead) + dir_data = create_directory_node([(filename, cid, file_size)]) + dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) + await self._put_block(dir_cid, dir_data) + + logger.info( + f"Created directory wrapper. Directory CID: " + f"{format_cid_for_display(dir_cid, max_len=16)}" + ) + return dir_cid + + return cid + + # Chunk the file + estimated_chunks = estimate_chunk_count(file_size, chunk_size) + logger.debug(f"Chunking file into ~{estimated_chunks} chunks") + logger.info("=== Starting file chunking process ===") + + # leaf_triples: (cid_bytes, leaf_block_bytes, raw_data_size) + # For raw leaves (Kubo default): leaf_block = raw chunk bytes, + # CID uses CODEC_RAW. This matches Kubo's RawLeaves=true behavior + # for multi-chunk files, producing identical CIDs. + leaf_triples: list[tuple[bytes, bytes, int]] = [] + bytes_processed = 0 + + # Process file in chunks (memory efficient) + for i, chunk_data in enumerate(chunk_file(file_path, chunk_size)): + # Raw leaf: store chunk bytes directly with raw codec CID (Kubo default) + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) + bytes_processed += len(chunk_data) + + # Progress callback + if progress_callback: + await _call_progress_callback( + progress_callback, + bytes_processed, + file_size, + f"chunking ({i + 1} chunks)", + ) + + logger.info( + f"Chunk {i + 1}: CID={format_cid_for_display(chunk_cid)}, " + f"Size={len(chunk_data)} bytes, " + f"Progress={bytes_processed}/{file_size}" + ) + logger.debug( + f"Stored leaf {i}: {format_cid_for_display(chunk_cid, max_len=16)} " + f"({len(chunk_data)} bytes)" + ) + + # Build balanced DAG tree (max 174 links/node, matches Kubo) + if progress_callback: + await _call_progress_callback( + progress_callback, file_size, file_size, "creating root node" + ) + + # Create a sync wrapper for the async _put_block method + # We'll collect (cid, data) pairs and store them after + internal_nodes: list[tuple[bytes, bytes]] = [] + + def store_internal_node(cid: bytes, data: bytes) -> None: + """Callback to collect internal nodes for storage.""" + internal_nodes.append((cid, data)) + + root_cid, root_data, root_tsize = balanced_layout( + leaf_triples, put_block_callback=store_internal_node + ) + + # Store all internal nodes + logger.info(f"Storing {len(internal_nodes)} internal DAG nodes...") + for cid, data in internal_nodes: + await self._put_block(cid, data) + + # Store the root node + await self._put_block(root_cid, root_data) + + # Enhanced logging for root CID + logger.info("=== File chunking completed ===") + logger.info( + f"Root CID: {format_cid_for_display(root_cid)} " + f"(Balanced DAG over {len(leaf_triples)} leaves)" + ) + logger.info(f"Total file size: {file_size} bytes") + logger.info("=" * 50) + + logger.info( + f"Added file with {len(leaf_triples)} leaves. " + f"Root CID: {format_cid_for_display(root_cid, max_len=16)}" + ) + + if progress_callback: + await _call_progress_callback( + progress_callback, file_size, file_size, "completed" + ) + + # Wrap in directory if requested (IPFS-standard way for filename preservation) + if wrap_with_directory: + import os + + from .dag_pb import create_directory_node + + filename = os.path.basename(file_path) + logger.info(f"Wrapping file in directory with name: {filename}") + + # Tsize = cumulative block size (root block + all descendant blocks), + # matching Kubo's behavior for directory link Tsize. + dir_data = create_directory_node([(filename, root_cid, root_tsize)]) + dir_cid = compute_cid_v1(dir_data, codec=CODEC_DAG_PB) + await self._put_block(dir_cid, dir_data) + + logger.info( + "Created directory wrapper. Directory CID: " + f"{format_cid_for_display(dir_cid, max_len=16)}" + ) + return dir_cid + + return root_cid + + async def add_bytes( + self, + data: bytes, + chunk_size: int | None = None, + progress_callback: Callable[[int, int, str], None] | None = None, + ) -> bytes: + """ + Add bytes to the DAG (similar to add_file but for in-memory data). + + Args: + data: Data to add + chunk_size: Optional chunk size (auto-selected if None) + progress_callback: Optional callback(current, total, status) + + Returns: + Root CID + + Example: + >>> data = b"x" * (10 * 1024 * 1024) # 10 MB + >>> root_cid = await dag.add_bytes(data) + + """ + file_size = len(data) + logger.info(f"Adding {file_size} bytes") + + # Determine chunk size + if chunk_size is None: + chunk_size = DEFAULT_CHUNK_SIZE + + # If data is small, store as single raw leaf block + # (Kubo default: RawLeaves=true) + if file_size <= chunk_size: + cid = compute_cid_v1(data, codec=CODEC_RAW) + await self._put_block(cid, data) + + if progress_callback: + await _call_progress_callback( + progress_callback, file_size, file_size, "completed" + ) + + return cid + + # Chunk the data using raw leaves (Kubo default: RawLeaves=true) + chunks = chunk_bytes(data, chunk_size) + leaf_triples: list[tuple[bytes, bytes, int]] = [] + + for i, chunk_data in enumerate(chunks): + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) + + if progress_callback: + bytes_processed = sum(s for _, _, s in leaf_triples) + await _call_progress_callback( + progress_callback, + bytes_processed, + file_size, + f"chunking ({i + 1}/{len(chunks)})", + ) + + # Build balanced DAG tree + root_cid, root_data, _tsize = balanced_layout(leaf_triples) + await self._put_block(root_cid, root_data) + + if progress_callback: + await _call_progress_callback( + progress_callback, file_size, file_size, "completed" + ) + + return root_cid + + async def add_stream( + self, + stream: io.IOBase, + chunk_size: int | None = None, + progress_callback: ProgressCallback | None = None, + ) -> bytes: + """ + Add data from any io.IOBase stream to the DAG. + + More flexible than add_file() (accepts any stream, not just file paths) + and more memory efficient than add_bytes() (reads one chunk at a time, + so total memory usage is O(chunk_size) regardless of file size). + + Args: + stream: Any readable io.IOBase — open() handles, BytesIO, + GzipFile, BZ2File, network streams, pipes, etc. + chunk_size: Optional chunk size in bytes (auto-selected if None) + progress_callback: Optional callback(current, total, status). + Note: total is unknown for streams, so current + is reported as bytes processed so far. + + Returns: + Root CID bytes of the stored DAG + + Example: + >>> import io + >>> root_cid = await dag.add_stream(io.BytesIO(b"hello world")) + + >>> # Memory-efficient large file (no full read into RAM) + >>> with open("movie.mp4", "rb") as f: + ... root_cid = await dag.add_stream(f) + + >>> # Decompress and add in one pass + >>> import gzip + >>> with gzip.open("archive.gz", "rb") as f: + ... root_cid = await dag.add_stream(f) + + >>> # With BlockService for persistent caching + >>> service = BlockService(FilesystemBlockStore("./blocks"), bitswap) + >>> dag = MerkleDag(bitswap, block_service=service) + >>> with open("large.bin", "rb") as f: + ... root_cid = await dag.add_stream(f) # cached to disk + + """ + if chunk_size is None: + chunk_size = DEFAULT_CHUNK_SIZE + + leaf_triples: list[tuple[bytes, bytes, int]] = [] + bytes_processed = 0 + + for i, chunk_data in enumerate(chunk_stream(stream, chunk_size)): + # Raw leaf: store chunk bytes directly (Kubo default: RawLeaves=true) + chunk_cid = compute_cid_v1(chunk_data, codec=CODEC_RAW) + await self._put_block(chunk_cid, chunk_data) + leaf_triples.append((chunk_cid, chunk_data, len(chunk_data))) + bytes_processed += len(chunk_data) + + if progress_callback: + # total is unknown for streams — report bytes processed so far + await _call_progress_callback( + progress_callback, + bytes_processed, + bytes_processed, + f"chunking ({i + 1} chunks, {bytes_processed} bytes)", + ) + + # Empty stream — store a single empty raw block + if not leaf_triples: + cid = compute_cid_v1(b"", codec=CODEC_RAW) + await self._put_block(cid, b"") + return cid + + # Single chunk — return the leaf CID directly (no root node needed) + if len(leaf_triples) == 1: + return leaf_triples[0][0] + + # Multiple chunks — build balanced DAG tree + root_cid, root_data, _tsize = balanced_layout(leaf_triples) + await self._put_block(root_cid, root_data) + + if progress_callback: + await _call_progress_callback( + progress_callback, bytes_processed, bytes_processed, "completed" + ) + + return root_cid + + async def fetch_file( + self, + root_cid: CIDInput, + peer_id: PeerID | None = None, + timeout: float = 30.0, + progress_callback: Callable[[int, int, str], None] | None = None, + ) -> tuple[bytes, str | None]: + """ + Fetch a file from the DAG. + + Automatically resolves links and fetches all chunks. Works with both + single-block files and multi-chunk files. Everything is handled + automatically - just provide the root CID! + + The method automatically: + - Detects directory wrappers and extracts filename + - Fetches and decodes the root block + - Determines file size and number of chunks + - Fetches all chunks in sequence + - Verifies integrity of all blocks + - Reconstructs the complete file + + Args: + root_cid: Root CID of the file (or directory wrapper) + peer_id: Optional specific peer to fetch from + timeout: Timeout per block in seconds + progress_callback: Optional callback(current, total, status) + Receives metadata automatically in first call + + Returns: + Tuple of (file_data, filename) where filename is None if not + wrapped in directory + + Raises: + BlockNotFoundError: If any block cannot be found + ValueError: If CID verification fails + + Example: + >>> # Simple usage - just provide root CID + >>> data, filename = await dag.fetch_file(root_cid) + >>> save_path = filename or 'downloaded_file' + >>> open(save_path, 'wb').write(data) + + >>> # With progress tracking + >>> def progress(current, total, status): + ... percent = (current / total) * 100 if total > 0 else 0 + ... print(f"{status}: {percent:.1f}%") + >>> data, filename = await dag.fetch_file( + ... root_cid, progress_callback=progress + ... ) + + """ + root_cid_bytes = cid_to_bytes(root_cid) + logger.info(f"Fetching file: {format_cid_for_display(root_cid_bytes)}") + + # Step 1: Fetch the root block + root_data = await self._get_block(root_cid_bytes, peer_id, timeout) + if not verify_cid(root_cid_bytes, root_data): + root_cid_str = format_cid_for_display(root_cid_bytes) + raise ValueError(f"Root block CID verification failed: {root_cid_str}") + + # Step 2: Handle directory wrapper + # (produced by `ipfs add --wrap-with-directory`) + filename = None + actual_file_cid = root_cid_bytes + actual_file_data = root_data + + if is_directory_node(root_data): + logger.info("Root is a directory node — extracting filename and file CID") + dir_links, _ = decode_dag_pb(root_data) + if dir_links: + first_link = dir_links[0] + filename = first_link.name or None + # Links now store the full CID bytes (CIDv1 buffer or CIDv0 multihash) + actual_file_cid = first_link.cid + logger.info(f"Filename from directory: {filename!r}") + actual_file_data = await self._get_block( + actual_file_cid, peer_id, timeout + ) + if not verify_cid(actual_file_cid, actual_file_data): + f_cid_str = format_cid_for_display(actual_file_cid) + err_msg = f"File block CID verification failed: {f_cid_str}" + raise ValueError(err_msg) + + # Step 3: Handle raw block (not a DAG-PB node at all) + if not is_file_node(actual_file_data): + logger.info(f"Root is a raw block: {len(actual_file_data)} bytes") + return actual_file_data, filename + + # Step 4: Parse the file node + top_links, top_unixfs = decode_dag_pb(actual_file_data) + filesize = top_unixfs.filesize if top_unixfs else 0 + total_size = filesize or sum(lnk.size for lnk in top_links) + msg = f"File node: {len(top_links)} top-level links, total size={total_size}" + logger.info(f"{msg} bytes") + + # Step 5: Small file with inline data (no links) + if not top_links: + file_data = top_unixfs.data if top_unixfs and top_unixfs.data else b"" + logger.info(f"Inline file data: {len(file_data)} bytes") + if progress_callback: + data_len = len(file_data) + await _call_progress_callback( + progress_callback, data_len, data_len, "completed" + ) + return file_data, filename + + # Step 6: Collect all leaf CIDs without opening streams + # Strategy: Recursively batch-fetch all DAG nodes + # then traverse locally to collect leaves + + top_len = len(top_links) + msg1 = f"[DAG] Recursively batch-fetching DAG tree ({top_len} top links)..." + logger.info(msg1) + msg2 = f"[FETCH] Recursively batch-fetching DAG tree ({top_len} top links)..." + print(msg2, flush=True) + + # Map to store ALL fetched blocks (both intermediate and leaves) + all_blocks_map: dict[bytes, bytes] = {} + + async def _batch_fetch_tree(cid_list: list[bytes], depth: int) -> None: + """Recursively batch-fetch a level of DAG nodes and queue their children.""" + if not cid_list: + return + + c_count = len(cid_list) + msg1 = f"[DAG] Depth {depth}: batch-fetching {c_count} blocks..." + logger.info(msg1) + msg2 = f"[FETCH] Depth {depth}: batch-fetching {c_count} blocks..." + print(msg2, flush=True) + + # Batch-fetch this level's blocks + level_blocks = await self._get_blocks_batch( + list(cid_list), peer_id=peer_id, timeout=timeout, batch_size=32 + ) + logger.info(f"[DAG] Depth {depth}: ✓ received {len(level_blocks)} blocks") + all_blocks_map.update(level_blocks) + + # Collect child CIDs for recursion + child_cids: list[bytes] = [] + for cid_bytes in cid_list: + block_data = level_blocks.get(cid_bytes) + if block_data is None: + c_str = format_cid_for_display(cid_bytes) + msg = f"[DAG] Depth {depth}: block {c_str} missing after" + logger.warning(f"{msg} fetch") + continue + + if is_file_node(block_data): + node_links, _ = decode_dag_pb(block_data) + cid_str = format_cid_for_display(cid_bytes) + msg = f"[DAG] Depth {depth}: {cid_str} has {len(node_links)}" + logger.debug(f"{msg} children") + for link in node_links: + # Links now store full CID bytes directly + child_cids.append(link.cid) + + # Recursively fetch next level if there are children + if child_cids: + ch_count = len(child_cids) + msg = f"[DAG] Depth {depth}: found {ch_count} child CIDs" + logger.info(f"{msg}, fetching next level...") + await _batch_fetch_tree(child_cids, depth + 1) + + # Starting from the top-level links (full CID bytes stored in links) + top_cids = [top_link.cid for top_link in top_links] + await _batch_fetch_tree(top_cids, depth=1) + blocks_count = len(all_blocks_map) + logger.info(f"[DAG] ✓ Tree fetch complete: {blocks_count} total blocks") + print(f"[FETCH] ✓ Tree fetch complete: {blocks_count} total blocks", flush=True) + + # Now traverse locally to collect leaf CIDs in order + ordered_leaf_cids: list[bytes] = [] + + def _collect_leaves_local(cid_bytes: bytes, depth: int = 1) -> None: + """Traverse locally-fetched blocks to collect leaf CIDs.""" + block_data = all_blocks_map.get(cid_bytes) + if block_data is None: + cid_str = format_cid_for_display(cid_bytes) + logger.warning(f"[DAG] Depth {depth}: block {cid_str} not in map") + return + + if not is_file_node(block_data): + # Raw block - it's a leaf + logger.debug(f"[DAG] Depth {depth}: raw block (leaf)") + ordered_leaf_cids.append(cid_bytes) + return + + node_links, _ = decode_dag_pb(block_data) + logger.debug(f"[DAG] Depth {depth}: {len(node_links)} links") + + if not node_links: + # Leaf node (no children, data is inline in UnixFS) + logger.debug(f"[DAG] Depth {depth}: file node with inline data (leaf)") + ordered_leaf_cids.append(cid_bytes) + return + + # Intermediate node - recursively process children + for j, child_link in enumerate(node_links): + c_idx = j + 1 + c_tot = len(node_links) + msg = f"[DAG] Depth {depth}: processing child {c_idx}/{c_tot}" + logger.debug(msg) + # Links store full CID bytes directly + child_cid = child_link.cid + _collect_leaves_local(child_cid, depth + 1) + + # Traverse each top-level block + for i, top_link in enumerate(top_links): + logger.info(f"[DAG] Traversing top-level {i + 1}/{len(top_links)}...") + # Links store full CID bytes directly + top_cid = top_link.cid + _collect_leaves_local(top_cid, depth=1) + + logger.info(f"[DAG] ✓ Collected {len(ordered_leaf_cids)} leaf blocks") + + # Step 7: Batch-fetch all leaf blocks + # (single wantlist per batch → avoids GO_AWAY) + if progress_callback: + await _call_progress_callback( + progress_callback, + 0, + total_size, + f"fetching {len(ordered_leaf_cids)} leaf blocks in batches", + ) + + l_count = len(ordered_leaf_cids) + msg1 = f"[DAG] Starting batch fetch of {l_count} leaves with batch_size=32" + logger.info(f"{msg1}, timeout={timeout}s") + msg2 = ( + f"[FETCH] Batch fetching {l_count} leaves " + f"(batch_size=32, timeout={timeout}s)" + ) + print(msg2, flush=True) + + # First try to get blocks from the already-fetched tree + block_map: dict[bytes, bytes] = {} + missing_cids: list[CIDInput] = [] + for leaf_cid in ordered_leaf_cids: + leaf_data = all_blocks_map.get(leaf_cid) + if leaf_data is not None: + block_map[leaf_cid] = leaf_data + else: + missing_cids.append(leaf_cid) + + # If some leaves weren't in the tree fetch, fetch them now + if missing_cids: + logger.info(f"[DAG] Fetching {len(missing_cids)} missing leaves") + fetched_blocks = await self._get_blocks_batch( + missing_cids, peer_id=peer_id, timeout=timeout, batch_size=32 + ) + block_map.update(fetched_blocks) + + logger.info(f"[DAG] ✓ Batch fetch complete: {len(block_map)} blocks received") + print(f"[FETCH] ✓ Batch fetch complete: {len(block_map)} blocks", flush=True) + + # Step 8: Reassemble data in order + # extracting UnixFS inline data from leaf nodes + file_data = b"" + bytes_fetched = 0 + missing_blocks: list[bytes] = [] + for idx, leaf_cid in enumerate(ordered_leaf_cids): + leaf_raw = block_map.get(bytes(leaf_cid)) + if leaf_raw is None: + l_idx = idx + 1 + t_leaves = len(ordered_leaf_cids) + c_str = format_cid_for_display(leaf_cid) + msg = f"[DAG] Leaf block {l_idx}/{t_leaves} MISSING: {c_str}" + logger.error(msg) + print(f"[FETCH] ✗ Leaf {l_idx}/{t_leaves} MISSING", flush=True) + missing_blocks.append(leaf_cid) + continue + + # Extract data: leaf blocks are UnixFS file nodes with inline data + if is_file_node(leaf_raw): + _, leaf_unixfs = decode_dag_pb(leaf_raw) + if leaf_unixfs is not None and leaf_unixfs.data: + chunk = leaf_unixfs.data + else: + chunk = b"" + chunk_len = len(chunk) + msg = f"[DAG] Leaf {idx + 1}: extracted {chunk_len} bytes" + logger.debug(f"{msg} from file node") + else: + chunk = leaf_raw + logger.debug(f"[DAG] Leaf {idx + 1}: raw block {len(chunk)} bytes") + + file_data += chunk + bytes_fetched += len(chunk) + + if (idx + 1) % 10 == 0 or idx == len(ordered_leaf_cids) - 1: + i_p = idx + 1 + t_l = len(ordered_leaf_cids) + p_str = f"{bytes_fetched}/{total_size} bytes" + logger.info(f"[DAG] Reassembled {i_p}/{t_l} leaves: {p_str}") + print(f"[FETCH] Reassembled {i_p}/{t_l} leaves: {p_str}", flush=True) + + if progress_callback: + await _call_progress_callback( + progress_callback, bytes_fetched, total_size, "downloading" + ) + + if missing_blocks: + missing_count = len(missing_blocks) + logger.error(f"[DAG] ✗ {missing_count} blocks missing after batch fetch!") + missing_list = [format_cid_for_display(cid) for cid in missing_blocks[:5]] + msg = f"{missing_count} leaf blocks missing: {missing_list}..." + raise BlockNotFoundError(msg) + + if progress_callback: + await _call_progress_callback( + progress_callback, total_size, total_size, "completed" + ) + + file_len = len(file_data) + msg = f"[DAG] ✓ File fetch complete: {file_len} bytes, filename={filename!r}" + logger.info(msg) + print(f"[FETCH] ✓ DOWNLOAD COMPLETE: {file_len} bytes", flush=True) + return file_data, filename + + async def get_file_info( + self, root_cid: CIDInput, peer_id: PeerID | None = None, timeout: float = 30.0 + ) -> dict[str, int | list[int]]: + """ + Get information about a file without downloading it. + + Args: + root_cid: Root CID of the file + peer_id: Optional specific peer to fetch from + timeout: Timeout in seconds (default: 30.0) + + Returns: + Dictionary with file information: + - size: Total file size in bytes + - chunks: Number of chunks + - chunk_sizes: List of chunk sizes + + Example: + >>> info = await dag.get_file_info(root_cid) + >>> print(f"File size: {info['size']} bytes") + >>> print(f"Chunks: {info['chunks']}") + + """ + # Get root block + root_cid_bytes = cid_to_bytes(root_cid) + root_data = await self._get_block(root_cid_bytes, peer_id, timeout) + + # Check if it's a DAG-PB file node + if is_file_node(root_data): + links, unixfs_data = decode_dag_pb(root_data) + + if not links: + # Small file with inline data + data_size = ( + len(unixfs_data.data) if unixfs_data and unixfs_data.data else 0 + ) + return {"size": data_size, "chunks": 0, "chunk_sizes": []} + + # Multi-chunk file + total_size = ( + unixfs_data.filesize + if unixfs_data + else sum(link.size for link in links) + ) + chunk_sizes = [link.size for link in links] + + return { + "size": total_size, + "chunks": len(links), + "chunk_sizes": chunk_sizes, + } + + # Single raw block + return {"size": len(root_data), "chunks": 1, "chunk_sizes": [len(root_data)]} + + +__all__ = ["MerkleDag"] diff --git a/libp2p/bitswap/dag_pb.py b/libp2p/bitswap/dag_pb.py index 1ae5f3b0f..1d4e6ec37 100644 --- a/libp2p/bitswap/dag_pb.py +++ b/libp2p/bitswap/dag_pb.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field import logging -from .cid import CODEC_DAG_PB, CIDInput, cid_to_bytes, compute_cid_v1 +from .cid import CODEC_DAG_PB, CIDInput, compute_cid_v1 from .pb.dag_pb_pb2 import PBLink, PBNode from .pb.unixfs_pb2 import Data as PBUnixFSData diff --git a/libp2p/bitswap/extension.py b/libp2p/bitswap/extension.py new file mode 100644 index 000000000..94f2bafa1 --- /dev/null +++ b/libp2p/bitswap/extension.py @@ -0,0 +1,57 @@ +from abc import ABC, abstractmethod +from typing import Any + +from libp2p.abc import INetStream +from libp2p.peer.id import ID as PeerID + + +class IBitswapExtension(ABC): + """ + Abstract base class for protocol-bound Bitswap extensions. + Extensions are registered for specific protocol versions to handle messages. + """ + + def set_client(self, client: Any) -> None: + """ + Set the parent BitswapClient instance. + """ + self.client = client + + @abstractmethod + async def process_message( + self, peer_id: PeerID, msg_bytes: bytes, stream: INetStream + ) -> bool: + """ + Process an incoming message. + + Args: + peer_id: The ID of the peer sending the message + msg_bytes: The raw bytes of the incoming message + stream: The network stream to communicate back + + Returns: + True if the extension fully handled the message and no further + processing is required. + False if normal processing should continue. + + """ + pass + + @abstractmethod + async def process_wantlist( + self, wantlist: Any, peer_id: PeerID, stream: INetStream + ) -> bool: + """ + Process a wantlist specifically. + + Args: + wantlist: The Wantlist protobuf object + peer_id: The ID of the peer + stream: The network stream + + Returns: + True if the extension handled the wantlist fully. + False if BitswapClient should process it normally. + + """ + pass diff --git a/libp2p/bitswap/gated_decision_engine.py b/libp2p/bitswap/gated_decision_engine.py index a6994bb38..e71668c72 100644 --- a/libp2p/bitswap/gated_decision_engine.py +++ b/libp2p/bitswap/gated_decision_engine.py @@ -51,9 +51,9 @@ class PaymentGatedDecisionEngine: def __init__( self, blockstore: BlockStore, - ledger: Any, # payments.ledger.PaymentLedger - pricing: Any, # payments.pricing.BlockPricingEngine - tx_verifier: Any, # payments.tx_verifier.TxVerifier (or None) + ledger: Any, # payments.ledger.PaymentLedger + pricing: Any, # payments.pricing.BlockPricingEngine + tx_verifier: Any, # payments.tx_verifier.TxVerifier (or None) server_wallet: str = "", network: str = "sepolia", asset: str = "ETH", @@ -71,11 +71,7 @@ def __init__( # Callbacks for sending messages back to peers self.send_message_callback = None - - # Root CID tracking: cid_hex → {root_cid, total_size, child_count} - # Used to compute total file size for pricing - self._dag_info: dict[str, dict[str, Any]] = {} - + # Root CID tracking: cid_hex → {root_cid, total_size, child_count} # Used to compute total file size for pricing self._dag_info: dict[str, dict[str, Any]] = {} @@ -88,15 +84,15 @@ async def register_dag( ) -> None: """ Register a DAG structure for root CID payment tracking. - + Call this after chunking a file to register the relationship between the root CID and its child blocks, along with the total file size. - + Args: root_cid: The root CID of the DAG child_cids: List of child/chunk CIDs total_size: Total size of all blocks combined (bytes) - + Example: >>> # After adding a large file to Bitswap >>> await engine.register_dag( @@ -104,19 +100,20 @@ async def register_dag( ... child_cids=[chunk1, chunk2, ...], ... total_size=5_000_000, # 5 MB ... ) + """ root_hex = _cid_to_str(root_cid) - + # Store DAG metadata self._dag_info[root_hex] = { "root_cid": root_hex, "total_size": total_size, "child_count": len(child_cids), } - + # Register in ledger so child blocks inherit root payment status await self.ledger.register_dag(root_cid, child_cids) - + logger.info( f"📋 Registered DAG: root={root_hex[:20]}... " f"size={total_size}B children={len(child_cids)}" @@ -125,9 +122,10 @@ async def register_dag( def mark_free(self, cid: str | bytes) -> None: """ Mark a CID as free (no payment required). - + Args: cid: The CID to mark as free (root or child) + """ self.ledger.mark_free(cid) self.pricing.set_free(cid) @@ -137,7 +135,7 @@ async def handle_want( self, peer_id: str, cid: str | bytes, - want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE + want_type: int, # 0 = WANT_BLOCK, 1 = WANT_HAVE send_dont_have: bool, peer_protocol: str = BITSWAP_PROTOCOL_V120, ) -> Message_1_3 | Message_1_2 | None: @@ -155,7 +153,10 @@ async def handle_want( ) # Check blockstore - logger.info("All CIDs in blockstore: " + ", ".join([c.hex() for c in self.blockstore.get_all_cids()])) + logger.info( + "All CIDs in blockstore: " + + ", ".join([c.hex() for c in self.blockstore.get_all_cids()]) + ) block_data = await self.blockstore.get_block(cid_obj) if block_data is None: @@ -169,7 +170,7 @@ async def handle_want( # Get pricing size (use total DAG size if this is part of a DAG) pricing_size = self._get_pricing_size(cid_str, block_size) - + # Compute price (at root CID level, not per-block) price = self.pricing.compute_price(cid_str, pricing_size) logger.info( @@ -179,7 +180,7 @@ async def handle_want( # Check if free or already paid (ledger resolves child → root automatically) is_paid = self.ledger.is_paid(peer_id, cid_str) - + if price == 0: # Free block — serve it logger.info(f"✅ Serving block (FREE): {cid_str[:20]}...") @@ -189,7 +190,10 @@ async def handle_want( return self._make_block_response(cid_bytes, block_data, peer_protocol) elif is_paid: # Already paid with sufficient amount — serve it - logger.info(f"✅ Serving block (ALREADY PAID): {cid_str[:20]}... price={price} units") + logger.info( + f"✅ Serving block (ALREADY PAID): {cid_str[:20]}... " + f"price={price} units" + ) if want_type == 1: # WANT_HAVE return self._make_have(cid_bytes, peer_protocol) else: # WANT_BLOCK @@ -197,15 +201,14 @@ async def handle_want( else: # Payment required if peer_protocol == BITSWAP_PROTOCOL_V130: - logger.info( - f"💳 Payment required: {price} units for {cid_str[:20]}..." - ) + logger.info(f"💳 Payment required: {price} units for {cid_str[:20]}...") return self._make_payment_required_1_3( peer_id, cid_bytes, pricing_size, price ) else: logger.warning( - f"⚠️ Payment required but peer on {peer_protocol}, sending DONT_HAVE" + f"⚠️ Payment required but peer on {peer_protocol}, " + f"sending DONT_HAVE" ) if send_dont_have: return self._make_dont_have(cid_bytes, peer_protocol) @@ -259,14 +262,19 @@ async def handle_payment_authorization( ) logger.warning(f"❌ {error_msg}") return self._make_payment_rejection(cid_bytes, error_msg) - + # Verify EIP-3009 signature logger.warning("=" * 70) - logger.warning(f"[STEP 7] SERVER VERIFYING EIP-3009 SIGNATURE") + logger.warning("[STEP 7] SERVER VERIFYING EIP-3009 SIGNATURE") logger.warning(f" from={from_address[:20]}...") logger.warning(f" to={auth.to_address[:20]}...") logger.warning(f" value={auth.value} expected={expected_price}") - logger.warning(f" verifier={'configured' if self.tx_verifier is not None else 'NOT CONFIGURED (optimistic mode)'}") + verifier_status = ( + "configured" + if self.tx_verifier is not None + else "NOT CONFIGURED (optimistic mode)" + ) + logger.warning(f" verifier={verifier_status}") logger.warning("=" * 70) if self.tx_verifier is not None: try: @@ -292,13 +300,16 @@ async def handle_payment_authorization( logger.warning("=" * 70) logger.warning(f"[STEP 7] ❌ EIP-3009 VERIFICATION FAILED: {error}") logger.warning("=" * 70) - return self._make_payment_rejection(cid_bytes, error or "INVALID_SIGNATURE") + return self._make_payment_rejection( + cid_bytes, error or "INVALID_SIGNATURE" + ) else: - logger.warning(f"[STEP 7] ✅ EIP-3009 VERIFICATION PASSED") + logger.warning("[STEP 7] ✅ EIP-3009 VERIFICATION PASSED") else: # No verifier configured — optimistic mode: trust the authorization logger.warning( - "[STEP 7] ⚠️ No payment verifier configured — accepting PaymentAuthorization optimistically" + "[STEP 7] ⚠️ No payment verifier configured — accepting " + "PaymentAuthorization optimistically" ) # Record payment in ledger @@ -315,11 +326,10 @@ async def handle_payment_authorization( logger.info(f"Payment already recorded: {e}") logger.warning("=" * 70) + logger.warning("[STEP 8b] ✅ SERVER PAYMENT ACCEPTED — SENDING BLOCK TO CLIENT") logger.warning( - f"[STEP 8b] ✅ SERVER PAYMENT ACCEPTED — SENDING BLOCK TO CLIENT" - ) - logger.warning( - f" cid={cid_str[:20]}... value={auth.value} expected={expected_price} block_size={block_size}B (EIP-3009)" + f" cid={cid_str[:20]}... value={auth.value} expected={expected_price} " + f"block_size={block_size}B (EIP-3009)" ) logger.warning("=" * 70) return self._make_receipt_and_block(cid_bytes, "", block_data) @@ -331,8 +341,8 @@ async def process_incoming_1_3_message( Process an incoming 1.3.0 message that may contain PaymentAuthorizations. Returns a response message or None. """ - if msg.payment_authorizations: - for auth in msg.payment_authorizations: + if msg.payment_authorizations: # type: ignore[attr-defined] + for auth in msg.payment_authorizations: # type: ignore[attr-defined] return await self.handle_payment_authorization(peer_id, auth) return None @@ -341,16 +351,17 @@ async def process_incoming_1_3_message( def _get_pricing_size(self, cid_str: str, block_size: int) -> int: """ Get the size to use for pricing calculation. - + NEW PAYMENT MODEL: For root CIDs, use total DAG size. For child CIDs, pricing is N/A (they inherit root payment). - + Args: cid_str: The CID (hex string) block_size: The actual block size - + Returns: Size in bytes to use for pricing + """ # Check if this is a registered DAG root dag_info = self._dag_info.get(cid_str) @@ -362,7 +373,7 @@ def _get_pricing_size(self, cid_str: str, block_size: int) -> int: f"block_size={block_size}B, total_size={total_size}B" ) return total_size - + # Not a registered root CID - use block size (backward compatibility) # This handles: old files, single-block files, or child blocks logger.debug( @@ -381,7 +392,7 @@ def _make_payment_required_1_3( """Build a 1.3.0 PaymentRequired message with embedded PaymentTerms.""" import secrets import time - + msg = Message_1_3() # BlockPresence with type=2 (PaymentRequired) @@ -396,14 +407,14 @@ def _make_payment_required_1_3( terms.pay_to = self.server_wallet terms.amount = amount terms.network = self.network - terms.nonce = secrets.token_bytes(32) # Server generates nonce - terms.valid_before = int(time.time()) + 3600 # 1 hour expiry + terms.nonce = secrets.token_bytes(32) # type: ignore[attr-defined] + terms.valid_before = int(time.time()) + 3600 # type: ignore[attr-defined] terms.block_size = block_size terms.description = ( f"Block {cid_bytes.hex()[:20]}... ({block_size // 1024}KB) — " f"pay {amount} wei to {self.server_wallet[:10]}..." ) - terms.scheme = "EIP3009" # Payment scheme + terms.scheme = "EIP3009" # type: ignore[attr-defined] logger.info( f"📤 PaymentRequired → {peer_id[:20]}... " @@ -441,9 +452,9 @@ def _make_have(self, cid_bytes: bytes, protocol: str) -> Message_1_3 | Message_1 presence = msg.blockPresences.add() presence.cid = cid_bytes if protocol == BITSWAP_PROTOCOL_V130: - presence.type = Message_1_3.BlockPresenceType.Have + presence.type = Message_1_3.BlockPresenceType.Have # type: ignore else: - presence.type = Message_1_2.BlockPresenceType.Have + presence.type = Message_1_2.BlockPresenceType.Have # type: ignore return msg def _make_dont_have( @@ -454,9 +465,9 @@ def _make_dont_have( presence = msg.blockPresences.add() presence.cid = cid_bytes if protocol == BITSWAP_PROTOCOL_V130: - presence.type = Message_1_3.BlockPresenceType.DontHave + presence.type = Message_1_3.BlockPresenceType.DontHave # type: ignore else: - presence.type = Message_1_2.BlockPresenceType.DontHave + presence.type = Message_1_2.BlockPresenceType.DontHave # type: ignore return msg def _make_block_response( @@ -469,19 +480,20 @@ def _make_block_response( block.data = block_data return msg - def _get_pricing_size(self, cid_str: str, block_size: int) -> int: + def _get_pricing_size_fallback(self, cid_str: str, block_size: int) -> int: """ Get the size to use for pricing calculations. - + If this CID is part of a registered DAG, return the total DAG size. Otherwise, return the individual block size. - + Args: cid_str: The CID being priced block_size: The individual block size - + Returns: Size in bytes to use for pricing + """ # Check if this is a registered root CID if cid_str in self._dag_info: @@ -491,13 +503,14 @@ def _get_pricing_size(self, cid_str: str, block_size: int) -> int: f"total={total_size}B (not block={block_size}B)" ) return total_size - + # Not a registered DAG, use individual block size return block_size # ── CID helpers ─────────────────────────────────────────────────────────────── + def _cid_to_str(cid: str | bytes) -> str: if isinstance(cid, bytes): return cid.hex() diff --git a/libp2p/bitswap/payment_client_1_3.py b/libp2p/bitswap/payment_client_1_3.py index 3940c31a8..8e4c052b8 100644 --- a/libp2p/bitswap/payment_client_1_3.py +++ b/libp2p/bitswap/payment_client_1_3.py @@ -55,7 +55,7 @@ def __init__( # Pending payments: nonce_hex → {peer_id, cid, amount} self._pending_payments: dict[str, dict[str, Any]] = {} - + # Server pricing config: peer_id → {units_per_kb, last_updated} # This is learned from PaymentTerms messages self._server_pricing: dict[str, dict[str, Any]] = {} @@ -106,7 +106,7 @@ async def build_payment_auth_msg( ) msg = Message_1_3() - auth = msg.payment_authorizations.add() + auth = msg.payment_authorizations.add() # type: ignore[attr-defined] auth.cid = bytes(terms.cid) auth.from_address = self.signer.address auth.to_address = terms.pay_to @@ -131,11 +131,17 @@ async def _handle_payment_terms( """ amount = terms.amount block_size = terms.block_size - + logger.warning("=" * 70) - logger.warning(f"[STEP 3b] CLIENT EVALUATING PAYMENT TERMS from {peer_id[:20]}...") - logger.warning(f" amount={amount} units max_auto_pay={self.max_auto_pay_units} units") - logger.warning(f" block_size={block_size}B asset={terms.asset} scheme={terms.scheme}") + logger.warning( + f"[STEP 3b] CLIENT EVALUATING PAYMENT TERMS from {peer_id[:20]}..." + ) + logger.warning( + f" amount={amount} units max_auto_pay={self.max_auto_pay_units} units" + ) + logger.warning( + f" block_size={block_size}B asset={terms.asset} scheme={terms.scheme}" + ) logger.warning("=" * 70) # Learn server's pricing from the PaymentTerms @@ -151,19 +157,21 @@ async def _handle_payment_terms( ) return None - # Validate pricing consistency using learned server config if not self._validate_pricing(peer_id, amount, block_size): logger.warning( - f"[STEP 3b] ❌ PAYMENT REJECTED (pricing validation failed) for {block_size}B block from {peer_id[:20]}... " + f"[STEP 3b] ❌ PAYMENT REJECTED (pricing validation failed) for " + f"{block_size}B block from {peer_id[:20]}... " f"Server asked {amount} units. Skipping payment." ) return None - - logger.warning(f"[STEP 3b] ✅ Payment terms accepted — proceeding to sign EIP-3009") + + logger.warning( + "[STEP 3b] ✅ Payment terms accepted — proceeding to sign EIP-3009" + ) # Sign EIP-3009 authorization logger.warning("=" * 70) - logger.warning(f"[STEP 4] CLIENT SIGNING EIP-3009 AUTHORIZATION") + logger.warning("[STEP 4] CLIENT SIGNING EIP-3009 AUTHORIZATION") logger.warning(f" to={terms.pay_to[:20]}...") logger.warning(f" value={amount} units") logger.warning(f" nonce={bytes(terms.nonce).hex()[:20]}...") @@ -177,14 +185,19 @@ async def _handle_payment_terms( nonce=bytes(terms.nonce), valid_before=terms.valid_before, ) - logger.warning(f"[STEP 4] EIP-3009 SIGNATURE CREATED: v={v} r_len={len(r)} s_len={len(s)}") + logger.warning( + f"[STEP 4] EIP-3009 SIGNATURE CREATED: v={v} r_len={len(r)} " + f"s_len={len(s)}" + ) except Exception as e: - logger.error(f"[STEP 4] FAILED TO SIGN EIP-3009 AUTHORIZATION: {e}", exc_info=True) + logger.error( + f"[STEP 4] FAILED TO SIGN EIP-3009 AUTHORIZATION: {e}", exc_info=True + ) return None # Build PaymentAuthorization message response = Message_1_3() - auth = response.payment_authorizations.add() + auth = response.payment_authorizations.add() # type: ignore[attr-defined] auth.cid = bytes(terms.cid) auth.from_address = self.signer.address auth.to_address = terms.pay_to @@ -254,21 +267,23 @@ def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: f"cid={cid_hex[:20]}... reason={rejection.reason}" ) - def _update_server_pricing(self, peer_id: str, amount: int, block_size: int) -> None: + def _update_server_pricing( + self, peer_id: str, amount: int, block_size: int + ) -> None: """ Learn the server's pricing configuration from PaymentTerms. - + The server calculates: price = max(1, int(block_size_kb * units_per_kb)) We can reverse-engineer units_per_kb from the amount and block_size. """ if amount == 0 or block_size == 0: return # Free block, no pricing info to learn - + # Calculate implied units_per_kb from this payment request kb = block_size / 1024 if kb > 0: implied_units_per_kb = amount / kb - + # Store or update the pricing config for this peer if peer_id not in self._server_pricing: self._server_pricing[peer_id] = { @@ -285,59 +300,63 @@ def _update_server_pricing(self, peer_id: str, amount: int, block_size: int) -> config = self._server_pricing[peer_id] old_rate = config["units_per_kb"] sample_count = config["sample_count"] - new_rate = (old_rate * sample_count + implied_units_per_kb) / (sample_count + 1) + new_rate = (old_rate * sample_count + implied_units_per_kb) / ( + sample_count + 1 + ) config["units_per_kb"] = new_rate config["sample_count"] = sample_count + 1 config["last_updated"] = time.time() - + # Warn if pricing changed significantly (>20%) if abs(new_rate - old_rate) / old_rate > 0.2: logger.warning( f"Server {peer_id[:20]}... pricing changed: " f"{old_rate:.2f} → {new_rate:.2f} units/KB" ) - + def _validate_pricing(self, peer_id: str, amount: int, block_size: int) -> bool: """ Validate that the server's price request is consistent with its learned pricing. - + Returns True if pricing is acceptable, False if suspicious. """ if amount == 0: return True # Free blocks are always acceptable - + # If we haven't learned pricing yet, accept this first payment if peer_id not in self._server_pricing: return True - + config = self._server_pricing[peer_id] units_per_kb = config["units_per_kb"] - + # Calculate expected price using learned pricing kb = block_size / 1024 expected = max(1, int(kb * units_per_kb)) - + # Allow 20% tolerance for rounding and small variations tolerance = 0.2 min_acceptable = expected * (1 - tolerance) max_acceptable = expected * (1 + tolerance) - + if amount < min_acceptable or amount > max_acceptable: logger.warning( f"Pricing inconsistency detected: " - f"expected {expected} units (±{tolerance*100}%), got {amount} units " + f"expected {expected} units (±{tolerance * 100}%), got {amount} units " f"for {block_size}B block ({kb:.3f} KB) " f"using learned rate {units_per_kb:.2f} units/KB" ) return False - + return True - + def get_server_pricing(self, peer_id: str) -> dict[str, Any] | None: """ Get the learned pricing configuration for a peer. - + Returns: - Dict with units_per_kb, last_updated, sample_count, or None if not learned yet. + Dict with units_per_kb, last_updated, sample_count, + or None if not learned yet. + """ return self._server_pricing.get(peer_id) diff --git a/libp2p/bitswap/payment_extension.py b/libp2p/bitswap/payment_extension.py new file mode 100644 index 000000000..9a4f7afcf --- /dev/null +++ b/libp2p/bitswap/payment_extension.py @@ -0,0 +1,248 @@ +import logging +from typing import Any + +from libp2p.abc import INetStream +from libp2p.peer.id import ID as PeerID + +from .cid import parse_cid +from .extension import IBitswapExtension +from .pb.bitswap_1_3_0_pb2 import Message as Message_1_3 + +logger = logging.getLogger(__name__) + + +class PaymentExtension(IBitswapExtension): + """ + Bitswap 1.3.0 Payment Extension. + Intercepts and processes payment-related protobuf fields and wantlists. + """ + + def __init__(self, payment_client: Any = None, payment_engine: Any = None): + self.payment_client = payment_client + self.payment_engine = payment_engine + self.client = None + + async def process_message( + self, peer_id: PeerID, msg_bytes: bytes, stream: INetStream + ) -> bool: + """ + Process the 1.3.0 specific fields: payment terms, receipts, auths. + Returns False so that standard wantlist and block processing can + continue if needed. + """ + msg_1_3: Message_1_3 | None = None + try: + _tmp = Message_1_3() + _tmp.ParseFromString(msg_bytes) + msg_1_3 = _tmp + except Exception: + return False + + if msg_1_3 is None: + return False + + # Client-side: handle PaymentTerms / PaymentReceipts / PaymentRejections + if self.payment_client and ( + msg_1_3.payment_terms + or msg_1_3.payment_receipts + or msg_1_3.payment_rejections + ): + if msg_1_3.payment_terms: + logger.warning("=" * 70) + logger.warning( + f"[STEP 3] CLIENT RECEIVED PAYMENT TERMS from " + f"{str(peer_id)[:20]}..." + ) + for _t in msg_1_3.payment_terms: + logger.warning(f" cid={bytes(_t.cid).hex()[:20]}...") + logger.warning(f" amount={_t.amount} units") + logger.warning(f" asset={_t.asset} scheme={_t.scheme}") # type: ignore[attr-defined] + logger.warning(f" pay_to={_t.pay_to[:20]}...") + logger.warning(f" block_size={_t.block_size}B") + logger.warning(f" valid_before={_t.valid_before}") # type: ignore[attr-defined] + logger.warning("=" * 70) + if msg_1_3.payment_receipts: + logger.warning("=" * 70) + logger.warning( + f"[STEP 8a] CLIENT RECEIVED PAYMENT RECEIPT from " + f"{str(peer_id)[:20]}..." + ) + for _r in msg_1_3.payment_receipts: + logger.warning(f" cid={bytes(_r.cid).hex()[:20]}...") + logger.warning( + f" tx_hash={_r.tx_hash[:20] if _r.tx_hash else 'optimistic'}" + ) + logger.warning(f" expires={_r.expires}") + logger.warning("=" * 70) + if msg_1_3.payment_rejections: + logger.warning("=" * 70) + logger.warning( + f"[STEP 8a] CLIENT RECEIVED PAYMENT REJECTION from " + f"{str(peer_id)[:20]}..." + ) + for _rj in msg_1_3.payment_rejections: + logger.warning(f" cid={bytes(_rj.cid).hex()[:20]}...") + logger.warning(f" reason={_rj.reason}") + logger.warning("=" * 70) + + response = await self.payment_client.process_incoming_message( + str(peer_id), msg_1_3 + ) + if response is not None: + logger.warning("=" * 70) + logger.warning( + f"[STEP 5] CLIENT SENDING PAYMENT AUTHORIZATION to " + f"{str(peer_id)[:20]}..." + ) + if response.payment_authorizations: + for _a in response.payment_authorizations: + logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") + logger.warning(f" from={_a.from_address[:20]}...") + logger.warning(f" to={_a.to_address[:20]}...") + logger.warning(f" value={_a.value}") + logger.warning(f" scheme={_a.scheme}") + logger.warning( + f" v={_a.v} r_len={len(bytes(_a.r))} " + f"s_len={len(bytes(_a.s))}" + ) + logger.warning("=" * 70) + await self.client._write_message_bytes( + stream, response.SerializeToString() + ) + + # Server-side: handle PaymentAuthorizations (EIP-3009 signed payments) + if self.payment_engine and msg_1_3.payment_authorizations: # type: ignore[attr-defined] + try: + logger.warning("=" * 70) + logger.warning( + f"[STEP 6] SERVER RECEIVED PAYMENT AUTHORIZATION from " + f"{str(peer_id)[:20]}..." + ) + for _a in msg_1_3.payment_authorizations: # type: ignore[attr-defined] + logger.warning(f" cid={bytes(_a.cid).hex()[:20]}...") + logger.warning(f" from={_a.from_address[:20]}...") + logger.warning(f" to={_a.to_address[:20]}...") + logger.warning(f" value={_a.value}") + logger.warning(f" scheme={_a.scheme}") + logger.warning( + f" v={_a.v} r_len={len(bytes(_a.r))} s_len={len(bytes(_a.s))}" + ) + logger.warning("=" * 70) + + response = await self.payment_engine.process_incoming_1_3_message( + str(peer_id), msg_1_3 + ) + if response is not None: + _has_receipt = bool(response.payment_receipts) + _has_rejection = bool(response.payment_rejections) + _has_blocks = bool(response.payload) or bool(response.blocks) + logger.warning("=" * 70) + logger.warning( + "[STEP 8] SERVER SENDING RESPONSE after PaymentAuthorization:" + ) + logger.warning( + f" has_receipt={_has_receipt} " + f"has_rejection={_has_rejection} has_blocks={_has_blocks}" + ) + if _has_rejection: + for _rj in response.payment_rejections: + logger.warning(f" ❌ REJECTION reason={_rj.reason}") + if _has_blocks: + _nb = len(response.payload) + len(response.blocks) + logger.warning( + f" ✅ SENDING {_nb} block(s) to client " # type: ignore + f"— FILE TRANSFER STARTING" + ) + logger.warning("=" * 70) + await self.client._write_message_bytes( + stream, response.SerializeToString() + ) + + # Payment authorization handled — we intercept this completely. + return True + except Exception as e: + logger.error(f"Error handling PaymentAuthorization: {e}", exc_info=True) + + # Handle PaymentRequired block presences (1.3.0 type=2) + if msg_1_3.blockPresences: + await self.client._process_block_presences_1_3( + msg_1_3.blockPresences, peer_id + ) + + return False + + async def process_wantlist( + self, wantlist: Any, peer_id: PeerID, stream: INetStream + ) -> bool: + """ + Gated wantlist processing. + If we have a payment_engine, we MUST gate block sharing behind payment terms. + """ + if not self.payment_engine: + return False + + if peer_id not in self.client._peer_wantlists: + self.client._peer_wantlists[peer_id] = {} + peer_wantlist = self.client._peer_wantlists[peer_id] + + if wantlist.full: + peer_wantlist.clear() + + for entry in wantlist.entries: + entry_cid = parse_cid(entry.block) + if entry.cancel: + if entry_cid in peer_wantlist: + del peer_wantlist[entry_cid] + continue + + peer_wantlist[entry_cid] = { + "priority": entry.priority, + "want_type": entry.wantType, + "send_dont_have": entry.sendDontHave, + } + + peer_protocol = self.client._peer_protocols.get(peer_id, "") + response_msg = await self.payment_engine.handle_want( + peer_id=str(peer_id), + cid=entry.block, + want_type=entry.wantType, + send_dont_have=entry.sendDontHave, + peer_protocol=str(peer_protocol), + ) + + if response_msg is not None: + _has_pr = bool(getattr(response_msg, "blockPresences", [])) + _has_terms = bool(getattr(response_msg, "payment_terms", [])) + _has_blocks = bool(getattr(response_msg, "payload", [])) or bool( + getattr(response_msg, "blocks", []) + ) + logger.warning("=" * 70) + logger.warning( + f"[STEP 2] SERVER SENDING RESPONSE for cid=" + f"{bytes(entry.block).hex()[:20]}..." + ) + logger.warning( + f" payment_required={_has_pr} payment_terms={_has_terms} " + f"has_blocks={_has_blocks}" + ) + if _has_pr: + for _bp in response_msg.blockPresences: + logger.warning( + f" BlockPresence type={_bp.type} (2=PaymentRequired)" + ) + if _has_terms: + for _t in response_msg.payment_terms: + logger.warning( + f" PaymentTerms: amount={_t.amount} asset={_t.asset} " + f"pay_to={_t.pay_to[:20]}... scheme={_t.scheme}" + ) + if _has_blocks: + logger.warning( + " ✅ Sending block(s) directly (free/already paid)" + ) + logger.warning("=" * 70) + await self.client._write_message_bytes( + stream, response_msg.SerializeToString() + ) + + return True diff --git a/libp2p/bitswap/payment_ledger.py b/libp2p/bitswap/payment_ledger.py index f2e9baf55..97ccaec1c 100644 --- a/libp2p/bitswap/payment_ledger.py +++ b/libp2p/bitswap/payment_ledger.py @@ -18,21 +18,21 @@ class PaymentLedger: - """ + r""" Tracks root CID payments for Bitswap 1.3.0. - + When a peer pays for a root CID, they gain access to all blocks in that DAG. This prevents charging separately for each chunk of a multi-block file. - + Example: >>> ledger = PaymentLedger() - >>> + >>> >>> # Register a DAG structure (root → children mapping) >>> await ledger.register_dag( ... root_cid="bafyroot123...", ... child_cids=["bafychild1...", "bafychild2...", ...] ... ) - >>> + >>> >>> # Record payment for root CID >>> await ledger.record_payment( ... peer_id="12D3Koo...", @@ -40,24 +40,25 @@ class PaymentLedger: ... amount=1000000, # 1 USDC in micro-units ... nonce=b"\\x12\\x34...", ... ) - >>> + >>> >>> # Check if peer has paid (works for root OR child CIDs) >>> ledger.is_paid("12D3Koo...", "bafychild1...") # True (child of paid root) >>> ledger.is_paid("12D3Koo...", "bafyroot123...") # True (root itself) + """ - def __init__(self): + def __init__(self) -> None: # Payment records: (peer_id, root_cid_hex) → payment_info self._payments: dict[tuple[str, str], dict[str, Any]] = {} - + # Child → Root mapping: child_cid_hex → root_cid_hex # Used to resolve chunk CIDs to their root CID self._cid_to_root: dict[str, str] = {} - + # Nonce registry: nonce_hex → (peer_id, cid_hex, timestamp) # Prevents replay attacks (same nonce can't be used twice) self._used_nonces: dict[str, tuple[str, str, float]] = {} - + # Free CIDs: Set of CID hashes that are always free (no payment required) self._free_cids: set[str] = set() @@ -68,25 +69,28 @@ async def register_dag( ) -> None: """ Register a DAG structure so child blocks inherit root payment status. - + Args: root_cid: The root CID of the DAG (hex string or bytes) child_cids: List of child/chunk CIDs in the DAG - + Example: >>> # After chunking a file into blocks >>> await ledger.register_dag( ... root_cid=root_cid, ... child_cids=[chunk1_cid, chunk2_cid, ...] ... ) + """ root_hex = _cid_to_hex(root_cid) - + for child_cid in child_cids: child_hex = _cid_to_hex(child_cid) self._cid_to_root[child_hex] = root_hex - logger.debug(f"Registered child {child_hex[:20]}... → root {root_hex[:20]}...") - + logger.debug( + f"Registered child {child_hex[:20]}... → root {root_hex[:20]}..." + ) + logger.info( f"Registered DAG: root={root_hex[:20]}... with {len(child_cids)} children" ) @@ -94,9 +98,10 @@ async def register_dag( def mark_free(self, cid: str | bytes) -> None: """ Mark a CID as free (no payment required). - + Args: cid: The CID to mark as free (hex string or bytes) + """ cid_hex = _cid_to_hex(cid) self._free_cids.add(cid_hex) @@ -105,12 +110,13 @@ def mark_free(self, cid: str | bytes) -> None: def is_free(self, cid: str | bytes) -> bool: """ Check if a CID is marked as free. - + Args: cid: The CID to check (hex string or bytes) - + Returns: True if the CID is free, False otherwise + """ cid_hex = _cid_to_hex(cid) root_hex = self._cid_to_root.get(cid_hex, cid_hex) @@ -124,30 +130,31 @@ def is_paid( ) -> bool: """ Check if a peer has paid for a CID (root or child). - + Resolves child CIDs to their root CID automatically. - + Args: peer_id: The peer ID to check cid: The CID to check (can be root or child CID) block_size: Ignored (kept for backward compatibility with old API) - + Returns: True if the peer has paid for this CID (or its root), False otherwise + """ cid_hex = _cid_to_hex(cid) - + # Check if it's a free CID if self.is_free(cid_hex): return True - + # Resolve to root CID if this is a child root_hex = self._cid_to_root.get(cid_hex, cid_hex) - + # Check if payment exists for (peer, root) key = (peer_id, root_hex) paid = key in self._payments - + if paid: payment = self._payments[key] logger.debug( @@ -160,7 +167,7 @@ def is_paid( f"❌ No payment: peer={peer_id[:20]}... " f"cid={cid_hex[:20]}... root={root_hex[:20]}..." ) - + return paid async def record_payment( @@ -173,20 +180,21 @@ async def record_payment( ) -> None: """ Record a payment for a root CID. - + Args: peer_id: The peer who paid cid: The CID being paid for (root or child - will resolve to root) amount: Payment amount in micro-units (e.g., USDC micro-units) nonce: Unique nonce for this payment (prevents replay attacks) tx_hash: Optional transaction hash (empty for EIP-3009) - + Raises: ValueError: If the nonce has already been used + """ cid_hex = _cid_to_hex(cid) nonce_hex = nonce.hex() - + # Check for nonce reuse (replay attack prevention) if nonce_hex in self._used_nonces: existing = self._used_nonces[nonce_hex] @@ -194,10 +202,10 @@ async def record_payment( f"Nonce already used: {nonce_hex[:20]}... " f"by peer={existing[0][:20]}... for cid={existing[1][:20]}..." ) - + # Resolve to root CID root_hex = self._cid_to_root.get(cid_hex, cid_hex) - + # Record payment key = (peer_id, root_hex) self._payments[key] = { @@ -206,10 +214,10 @@ async def record_payment( "tx_hash": tx_hash, "timestamp": time.time(), } - + # Mark nonce as used self._used_nonces[nonce_hex] = (peer_id, root_hex, time.time()) - + logger.info( f"💰 Payment recorded: peer={peer_id[:20]}... " f"root={root_hex[:20]}... amount={amount} " @@ -223,14 +231,15 @@ def get_payment( ) -> dict[str, Any] | None: """ Get payment details for a peer and CID. - + Args: peer_id: The peer ID cid: The CID (root or child) - + Returns: Payment info dict with keys: amount, nonce, tx_hash, timestamp or None if no payment found + """ cid_hex = _cid_to_hex(cid) root_hex = self._cid_to_root.get(cid_hex, cid_hex) @@ -240,9 +249,10 @@ def get_payment( def clear_old_nonces(self, max_age_seconds: float = 86400) -> int: """ Clear nonces older than max_age_seconds (default: 24 hours). - + Returns: Number of nonces cleared + """ now = time.time() old_nonces = [ @@ -250,18 +260,19 @@ def clear_old_nonces(self, max_age_seconds: float = 86400) -> int: for nonce_hex, (_, _, timestamp) in self._used_nonces.items() if now - timestamp > max_age_seconds ] - + for nonce_hex in old_nonces: del self._used_nonces[nonce_hex] - + if old_nonces: logger.info(f"Cleared {len(old_nonces)} old nonces (>{max_age_seconds}s)") - + return len(old_nonces) # ── Helper functions ────────────────────────────────────────────────────────── + def _cid_to_hex(cid: str | bytes) -> str: """Convert CID to hex string for consistent storage.""" if isinstance(cid, bytes): diff --git a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi index 08759a2a3..75bcd0b01 100644 --- a/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi +++ b/libp2p/bitswap/pb/bitswap_1_3_0_pb2.pyi @@ -3,7 +3,7 @@ from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from collections.abc import Iterable as _Iterable, Mapping as _Mapping -from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union, Any as _Any DESCRIPTOR: _descriptor.FileDescriptor @@ -42,7 +42,7 @@ class Message(_message.Message): FULL_FIELD_NUMBER: _ClassVar[int] entries: _containers.RepeatedCompositeFieldContainer[Message.Wantlist.Entry] full: bool - def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping]]] = ..., full: bool = ...) -> None: ... + def __init__(self, entries: _Optional[_Iterable[_Union[Message.Wantlist.Entry, _Mapping[str, _Any]]]] = ..., full: bool = ...) -> None: ... class Block(_message.Message): __slots__ = ("prefix", "data") PREFIX_FIELD_NUMBER: _ClassVar[int] @@ -125,4 +125,4 @@ class Message(_message.Message): tx_receipts: _containers.RepeatedCompositeFieldContainer[Message.TxReceipt] payment_receipts: _containers.RepeatedCompositeFieldContainer[Message.PaymentReceipt] payment_rejections: _containers.RepeatedCompositeFieldContainer[Message.PaymentRejection] - def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping]]] = ..., tx_receipts: _Optional[_Iterable[_Union[Message.TxReceipt, _Mapping]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping]]] = ...) -> None: ... + def __init__(self, wantlist: _Optional[_Union[Message.Wantlist, _Mapping[str, _Any]]] = ..., blocks: _Optional[_Iterable[bytes]] = ..., payload: _Optional[_Iterable[_Union[Message.Block, _Mapping[str, _Any]]]] = ..., blockPresences: _Optional[_Iterable[_Union[Message.BlockPresence, _Mapping[str, _Any]]]] = ..., pendingBytes: _Optional[int] = ..., payment_terms: _Optional[_Iterable[_Union[Message.PaymentTerms, _Mapping[str, _Any]]]] = ..., tx_receipts: _Optional[_Iterable[_Union[Message.TxReceipt, _Mapping[str, _Any]]]] = ..., payment_receipts: _Optional[_Iterable[_Union[Message.PaymentReceipt, _Mapping[str, _Any]]]] = ..., payment_rejections: _Optional[_Iterable[_Union[Message.PaymentRejection, _Mapping[str, _Any]]]] = ...) -> None: ... diff --git a/libp2p/bitswap/pricing_engine.py b/libp2p/bitswap/pricing_engine.py index 1d75378b3..6dc443cbe 100644 --- a/libp2p/bitswap/pricing_engine.py +++ b/libp2p/bitswap/pricing_engine.py @@ -9,8 +9,8 @@ - Custom: User-defined pricing function """ +from collections.abc import Callable import logging -from typing import Any, Callable logger = logging.getLogger(__name__) @@ -18,24 +18,25 @@ class BlockPricingEngine: """ Computes prices for Bitswap blocks based on configurable strategies. - + Pricing is typically done at the root CID level (total file size), not per-block, to avoid charging for each chunk separately. - + Example: >>> # Size-based pricing: 100 micro-USDC per KB >>> pricing = BlockPricingEngine( ... strategy="size_based", ... units_per_kb=100, ... ) - >>> + >>> >>> # 5 MB file = 5000 KB × 100 = 500,000 micro-units = $0.50 >>> price = pricing.compute_price("bafyroot...", block_size=5_000_000) >>> print(f"${price / 1_000_000:.2f}") # $0.50 - >>> + >>> >>> # Mark specific CIDs as free >>> pricing.set_free("bafyfree123...") >>> pricing.compute_price("bafyfree123...", 1_000_000) # 0 (free) + """ def __init__( @@ -48,29 +49,31 @@ def __init__( ): """ Initialize pricing engine. - + Args: strategy: Pricing strategy - "free", "fixed", "size_based", or "custom" units_per_kb: Price per KB for size_based strategy (micro-units) fixed_price: Fixed price for "fixed" strategy (micro-units) - custom_pricing_fn: Custom function(cid_str, size) → price for "custom" strategy + custom_pricing_fn: Custom function(cid_str, size) → price + for "custom" strategy default_free: If True, all CIDs are free by default - + Strategies: - "free": Always return 0 (all blocks free) - "fixed": Return fixed_price for all blocks - "size_based": price = max(1, int(size_kb * units_per_kb)) - "custom": Use custom_pricing_fn(cid_str, block_size) + """ self.strategy = strategy self.units_per_kb = units_per_kb self.fixed_price = fixed_price self.custom_pricing_fn = custom_pricing_fn self.default_free = default_free - + # Per-CID overrides: cid_hex → price (0 = free, >0 = specific price) self._cid_prices: dict[str, int] = {} - + logger.info( f"Pricing engine initialized: strategy={strategy} " f"units_per_kb={units_per_kb} default_free={default_free}" @@ -79,10 +82,11 @@ def __init__( def set_price(self, cid: str | bytes, price: int) -> None: """ Set a specific price for a CID (overrides strategy). - + Args: cid: The CID (hex string or bytes) price: Price in micro-units (0 = free) + """ cid_hex = _cid_to_hex(cid) self._cid_prices[cid_hex] = price @@ -91,44 +95,46 @@ def set_price(self, cid: str | bytes, price: int) -> None: def set_free(self, cid: str | bytes) -> None: """ Mark a CID as free (price = 0). - + Args: cid: The CID to mark as free + """ self.set_price(cid, 0) def compute_price(self, cid_str: str, block_size: int) -> int: """ Compute the price for a block/file. - + Args: cid_str: The CID as a hex string block_size: Size in bytes (for root CID, this is total file size) - + Returns: Price in micro-units (0 = free, >0 = paid) - + Note: For multi-block files, call this ONCE with the root CID and total size, not for each individual chunk. + """ # Check for per-CID override if cid_str in self._cid_prices: price = self._cid_prices[cid_str] logger.debug(f"Using override price for {cid_str[:20]}... = {price}") return price - + # Apply default free policy if self.default_free: return 0 - + # Apply strategy if self.strategy == "free": return 0 - + elif self.strategy == "fixed": return self.fixed_price - + elif self.strategy == "size_based": # Price = units_per_kb × size_in_kb (minimum 1 unit) kb = block_size / 1024 @@ -138,21 +144,22 @@ def compute_price(self, cid_str: str, block_size: int) -> int: f"{self.units_per_kb} = {price} units" ) return price - + elif self.strategy == "custom": if self.custom_pricing_fn is None: raise ValueError("Custom strategy requires custom_pricing_fn") return self.custom_pricing_fn(cid_str, block_size) - + else: raise ValueError(f"Unknown pricing strategy: {self.strategy}") def get_units_per_kb(self) -> float: """ Get the current units_per_kb rate (for size_based strategy). - + Returns: Units per KB, or 0.0 if not using size_based strategy + """ if self.strategy == "size_based": return self.units_per_kb @@ -161,6 +168,7 @@ def get_units_per_kb(self) -> float: # ── Helper functions ────────────────────────────────────────────────────────── + def _cid_to_hex(cid: str | bytes) -> str: """Convert CID to hex string for consistent storage.""" if isinstance(cid, bytes): diff --git a/tests/core/bitswap/test_cid.py b/tests/core/bitswap/test_cid.py index 741c2d769..54d121a5a 100644 --- a/tests/core/bitswap/test_cid.py +++ b/tests/core/bitswap/test_cid.py @@ -352,7 +352,9 @@ def test_cid_to_bytes_and_text_roundtrip(): roundtrip_bytes = cid_to_bytes(cid_text) assert roundtrip_bytes == cid_bytes - assert cid_text == str(make_cid(cid_bytes)) + # String representations might differ by base (e.g. base32 vs base58btc) + # but they should parse to the same bytes. + assert cid_to_bytes(cid_text) == cid_to_bytes(str(make_cid(cid_bytes))) def test_object_wrappers_for_v0_and_v1(): diff --git a/tests/core/bitswap/test_dag.py b/tests/core/bitswap/test_dag.py index d1144f707..c883cf3f2 100644 --- a/tests/core/bitswap/test_dag.py +++ b/tests/core/bitswap/test_dag.py @@ -52,8 +52,6 @@ class TestAddBytes: @pytest.mark.trio async def test_add_small_bytes(self): """Test adding small data (single block).""" - from libp2p.bitswap.dag_pb import create_leaf_node - # Setup mock_client = MagicMock(spec=BitswapClient) mock_client.block_store = MemoryBlockStore() @@ -69,17 +67,16 @@ async def test_add_small_bytes(self): assert root_cid is not None assert len(root_cid) > 0 - # Small data is stored as a dag-pb leaf node (not raw codec) - leaf_block = create_leaf_node(data) - expected_cid = compute_cid_v1(leaf_block, codec=CODEC_DAG_PB) + # Small data is stored as a raw leaf node (RawLeaves=True default) + expected_cid = compute_cid_v1(data, codec=CODEC_RAW) assert root_cid == expected_cid - assert verify_cid(root_cid, leaf_block) + assert verify_cid(root_cid, data) - # Should be single block (DAG-PB codec) + # Should be single block (raw codec) mock_client.add_block.assert_called_once() call_args = mock_client.add_block.call_args assert call_args[0][0] == root_cid # CID - assert call_args[0][1] == leaf_block # dag-pb wrapped data + assert call_args[0][1] == data # raw data @pytest.mark.trio async def test_add_large_bytes(self): @@ -168,15 +165,12 @@ async def test_add_small_file(self): assert root_cid is not None mock_client.add_block.assert_called_once() - # Small file is stored as a dag-pb leaf node - from libp2p.bitswap.dag_pb import create_leaf_node - + # Small file is stored as a raw leaf node call_args = mock_client.add_block.call_args stored_cid = call_args[0][0] stored_block = call_args[0][1] - leaf_block = create_leaf_node(data) - assert stored_block == leaf_block - assert verify_cid(stored_cid, leaf_block) + assert stored_block == data + assert verify_cid(stored_cid, data) finally: Path(temp_path).unlink() @@ -256,9 +250,8 @@ async def test_add_file_with_custom_chunk_size(self): temp_path, chunk_size=chunk_size, wrap_with_directory=False ) - # Should have many chunks - # (3.2MB / 16KB = 200 chunks) + 1 root = 201 calls - assert mock_client.add_block.call_count == 201 + # (3.2MB / 16KB = 200 chunks) + intermediate nodes + 1 root + assert mock_client.add_block.call_count > 200 finally: Path(temp_path).unlink() diff --git a/tests/core/bitswap/test_filesystem_blockstore.py b/tests/core/bitswap/test_filesystem_blockstore.py index 8596f26c1..edf691170 100644 --- a/tests/core/bitswap/test_filesystem_blockstore.py +++ b/tests/core/bitswap/test_filesystem_blockstore.py @@ -161,7 +161,9 @@ async def test_directory_structure(store_path: str) -> None: cid, data = make_block(b"check directory layout") await store.put_block(cid, data) - cid_str = cid_to_text(cid) + from cid import make_cid + + cid_str = str(make_cid(cid)) expected_dir = Path(store_path) / cid_str[:2] expected_file = expected_dir / cid_str[2:] diff --git a/tests/core/bitswap/test_io_stream.py b/tests/core/bitswap/test_io_stream.py index bd1ecdecb..a8949036d 100644 --- a/tests/core/bitswap/test_io_stream.py +++ b/tests/core/bitswap/test_io_stream.py @@ -15,7 +15,6 @@ from libp2p.bitswap.block_store import MemoryBlockStore from libp2p.bitswap.chunker import DEFAULT_CHUNK_SIZE, chunk_stream from libp2p.bitswap.cid import cid_to_text -from libp2p.bitswap.dag_pb import decode_dag_pb, is_file_node def ok(label): @@ -140,11 +139,8 @@ async def add_block(cid, data): assert len(stored) == 1 block = list(stored.values())[0] - assert is_file_node(block) - _, unixfs = decode_dag_pb(block) - assert unixfs is not None - assert unixfs.filesize == 0 - ok("empty stream → 1 empty dag-pb leaf block stored") + assert block == b"" + ok("empty stream → 1 empty raw leaf block stored") async def test_add_stream_single_chunk(): @@ -170,9 +166,7 @@ async def add_block(cid, data): assert len(stored) == 1, f"expected 1 block, got {len(stored)}" block = stored[bytes(root_cid)] - _, unixfs = decode_dag_pb(block) - assert unixfs is not None - assert unixfs.data == data + assert block == data ok("single chunk: leaf CID returned directly, inline data correct") @@ -206,20 +200,12 @@ async def add_block(cid, data): with gzip.GzipFile(fileobj=buf, mode="rb") as gz: root_cid = await dag.add_stream(gz) - # Reassemble all leaf data + # Since it's < 256KB, it's a single raw chunk root_block = stored[bytes(root_cid)] - links, _ = decode_dag_pb(root_block) - reassembled = b"" - for link in links: - leaf = stored[bytes(link.cid)] - _, leaf_unixfs = decode_dag_pb(leaf) - assert leaf_unixfs is not None - reassembled += leaf_unixfs.data - - assert reassembled == original + assert root_block == original ok( f"gzip stream: {compressed_size} compressed → {len(original)} bytes added " - f"in {len(links)} chunks" + f"as a single chunk" ) diff --git a/tests/core/bitswap/test_unixfs_encoding.py b/tests/core/bitswap/test_unixfs_encoding.py index cff119430..2bcecd0ac 100644 --- a/tests/core/bitswap/test_unixfs_encoding.py +++ b/tests/core/bitswap/test_unixfs_encoding.py @@ -69,7 +69,7 @@ def test_balanced_layout_single(): leaf = create_leaf_node(data) cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) - root_cid, root_block = balanced_layout([(cid, leaf, len(data))]) + root_cid, root_block, _ = balanced_layout([(cid, leaf, len(data))]) assert bytes(root_cid) == bytes(cid) assert root_block == leaf ok("single leaf: root_cid == leaf_cid") @@ -85,7 +85,7 @@ def test_balanced_layout_two_leaves(): cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) leaves.append((cid, leaf, len(data))) - root_cid, root_block = balanced_layout(leaves) + root_cid, root_block, _ = balanced_layout(leaves) # Root must be a dag-pb file node with 2 links assert is_file_node(root_block) @@ -109,7 +109,7 @@ def test_balanced_layout_two_levels(): cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) leaves.append((cid, leaf, chunk_size)) - root_cid, root_block = balanced_layout(leaves) + root_cid, root_block, _ = balanced_layout(leaves) links, unixfs = decode_dag_pb(root_block) # Root should link to 2 internal nodes (174 + 1) @@ -131,7 +131,7 @@ def test_balanced_layout_flat(): cid = compute_cid_v1(leaf, codec=CODEC_DAG_PB) leaves.append((cid, leaf, 50)) - root_cid, root_block = balanced_layout(leaves) + root_cid, root_block, _ = balanced_layout(leaves) links, unixfs = decode_dag_pb(root_block) assert len(links) == 174, f"expected 174 direct links, got {len(links)}" @@ -172,14 +172,18 @@ async def add_block_impl(cid, data): finally: os.unlink(tmp) - # Every stored block must be a dag-pb file node (no raw blocks) + # Root block must be dag-pb, but leaves must be raw blocks raw_blocks = [] + dag_pb_blocks = [] for cid_bytes, block_data in stored.items(): - if not is_file_node(block_data): - raw_blocks.append(cid_to_text(cid_bytes)[:20]) + if is_file_node(block_data): + dag_pb_blocks.append(cid_bytes) + else: + raw_blocks.append(cid_bytes) - assert raw_blocks == [], f"Found non-dag-pb blocks: {raw_blocks}" - ok(f"All {len(stored)} stored blocks are dag-pb file nodes (no raw blocks)") + assert len(dag_pb_blocks) == 1, f"Expected 1 root node, got {len(dag_pb_blocks)}" + assert len(raw_blocks) > 0, f"Expected raw leaves, got {len(raw_blocks)}" + ok("Root is dag-pb, and all leaves are raw blocks") # Root must link to 3 leaves root_block = stored[bytes(root_cid)] @@ -189,18 +193,17 @@ async def add_block_impl(cid, data): assert unixfs.filesize == len(content) ok(f"root has 3 links, filesize={unixfs.filesize}") - # Each leaf must contain inline UnixFS data + # Each leaf must be raw data for link in links: leaf_block = stored[bytes(link.cid)] - leaf_links, leaf_unixfs = decode_dag_pb(leaf_block) - assert leaf_links == [], "leaf must have no links" - assert leaf_unixfs is not None and leaf_unixfs.data != b"" - ok("each leaf contains inline UnixFS data") + assert not is_file_node(leaf_block), "leaf must be raw data" + assert len(leaf_block) > 0 + ok("each leaf contains raw data") -# ── 7. add_bytes produces dag-pb leaves ────────────────────────────────────── +# ── 7. add_bytes produces raw leaves ────────────────────────────────────── async def test_add_bytes_produces_dag_pb_leaves(): - print("\n[7] MerkleDag.add_bytes produces dag-pb leaf blocks") + print("\n[7] MerkleDag.add_bytes produces raw leaf blocks") from unittest.mock import AsyncMock, MagicMock from libp2p.bitswap.client import BitswapClient @@ -217,16 +220,24 @@ async def add_block_impl(cid, data): mock_client.add_block = AsyncMock(side_effect=add_block_impl) dag = MerkleDag(mock_client) - content = b"y" * (63 * 1024 * 2 + 500) # 3 chunks - root_cid = await dag.add_bytes(content) + content = b"y" * (256 * 1024 * 3 + 500) # > 3 default chunks + await dag.add_bytes(content) - raw_blocks = [cid_to_text(c)[:20] for c, d in stored.items() if not is_file_node(d)] - assert raw_blocks == [], f"Found non-dag-pb blocks: {raw_blocks}" - ok(f"All {len(stored)} stored blocks are dag-pb file nodes") - - root_block = stored[bytes(root_cid)] + raw_blocks = [] + dag_pb_blocks = [] + for c, d in stored.items(): + if is_file_node(d): + dag_pb_blocks.append(c) + else: + raw_blocks.append(c) + + assert len(dag_pb_blocks) == 1 + assert len(raw_blocks) > 0 + ok("Root is dag-pb, and all leaves are raw blocks") + + root_block = stored[dag_pb_blocks[0]] links, unixfs = decode_dag_pb(root_block) - assert len(links) == 3 + assert len(links) == 4 assert unixfs is not None assert unixfs.filesize == len(content) ok(f"root has 3 links, filesize={unixfs.filesize}") From bedc00a2fb1b0540517eab170520c4c81421a9c8 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 20:06:27 +0530 Subject: [PATCH 32/37] feat: Enhance Bitswap payment extension and fix lint issues --- examples/bitswap_payment_example.py | 200 ---------------------------- libp2p/bitswap/cid.py | 7 +- libp2p/bitswap/extension.py | 10 +- libp2p/bitswap/payment_extension.py | 1 - 4 files changed, 13 insertions(+), 205 deletions(-) delete mode 100644 examples/bitswap_payment_example.py diff --git a/examples/bitswap_payment_example.py b/examples/bitswap_payment_example.py deleted file mode 100644 index f2c57931a..000000000 --- a/examples/bitswap_payment_example.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Example: Bitswap 1.3.0 with Root CID Payment System - -Demonstrates how to set up a payment-gated Bitswap server that charges -for files at the root CID level (not per-block), using the new payment -infrastructure. - -Key Features: -- Payment required only for root CID (all chunks accessible after payment) -- Configurable pricing: free, fixed, or size-based -- EIP-3009 meta-transaction support (off-chain payment authorization) -- Automatic DAG registration for multi-block files - -Usage: - # Start payment-gated server - python examples/bitswap_payment_server.py --price-per-mb 0.01 - - # Start client - python examples/bitswap_payment_client.py --server /ip4/127.0.0.1/tcp/4001/p2p/... -""" - -import asyncio -import logging -from pathlib import Path - -from libp2p import new_host -from libp2p.bitswap import ( - BitswapClient, - FilesystemBlockStore, - MerkleDag, - PaymentGatedDecisionEngine, -) -from libp2p.bitswap.payment_ledger import PaymentLedger -from libp2p.bitswap.pricing_engine import BlockPricingEngine - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -async def setup_payment_server( - price_per_mb: float = 0.01, # $0.01 per MB - wallet_address: str = "0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb", -) -> tuple[BitswapClient, PaymentGatedDecisionEngine, MerkleDag]: - """ - Set up a payment-gated Bitswap server. - - Args: - price_per_mb: Price in USD per megabyte - wallet_address: Ethereum address to receive payments - - Returns: - (bitswap_client, payment_engine, dag) - """ - # Create libp2p host - host = new_host() - await host.run(["/ip4/0.0.0.0/tcp/4001"]) - - # Create block store - store = FilesystemBlockStore(Path("./bitswap_data")) - - # Create payment ledger (tracks who has paid for what) - ledger = PaymentLedger() - - # Create pricing engine (size-based: price scales with file size) - # Convert $/MB to micro-units/KB: $0.01/MB = 10,000 micro-units/MB = 10 micro-units/KB - units_per_kb = (price_per_mb * 1_000_000) / 1024 - pricing = BlockPricingEngine( - strategy="size_based", - units_per_kb=units_per_kb, - ) - - # Create payment-gated decision engine - payment_engine = PaymentGatedDecisionEngine( - blockstore=store, - ledger=ledger, - pricing=pricing, - tx_verifier=None, # Optional: add EIP-3009 verifier - server_wallet=wallet_address, - network="sepolia", - asset="USDC", - ) - - # Create Bitswap client with payment engine - bitswap = BitswapClient( - host=host, - block_store=store, - protocol_version="/ipfs/bitswap/1.3.0", - payment_engine=payment_engine, - ) - await bitswap.start() - - # Create DAG manager - dag = MerkleDag(bitswap, block_store=store) - - logger.info(f"✅ Payment-gated server started") - logger.info(f" Address: {host.get_id()}") - logger.info(f" Pricing: ${price_per_mb:.4f}/MB = {units_per_kb:.2f} units/KB") - logger.info(f" Wallet: {wallet_address}") - - return bitswap, payment_engine, dag - - -async def add_paid_file( - dag: MerkleDag, - payment_engine: PaymentGatedDecisionEngine, - file_path: Path, -) -> str: - """ - Add a file that requires payment to access. - - Args: - dag: MerkleDag instance - payment_engine: Payment engine for DAG registration - file_path: Path to file to add - - Returns: - Root CID (hex string) - """ - logger.info(f"📤 Adding paid file: {file_path}") - - # Add file to Bitswap (auto-chunks large files) - root_cid = await dag.add_file(str(file_path)) - - # Get all CIDs in the DAG (root + children) - # In a real implementation, you'd get this from the DAG add operation - # For now, we'll assume it's just the root CID - all_cids = [root_cid] - file_size = file_path.stat().st_size - - # Register DAG for root CID payment tracking - await payment_engine.register_dag( - root_cid=root_cid, - child_cids=all_cids[1:], # Exclude root from children - total_size=file_size, - ) - - logger.info(f"✅ File added: {root_cid.hex()[:20]}... ({file_size} bytes)") - return root_cid.hex() - - -async def add_free_file( - dag: MerkleDag, - payment_engine: PaymentGatedDecisionEngine, - file_path: Path, -) -> str: - """ - Add a file that is free to access (no payment required). - - Args: - dag: MerkleDag instance - payment_engine: Payment engine for marking free - file_path: Path to file to add - - Returns: - Root CID (hex string) - """ - logger.info(f"📤 Adding free file: {file_path}") - - # Add file to Bitswap - root_cid = await dag.add_file(str(file_path)) - - # Mark as free (no payment required) - payment_engine.mark_free(root_cid) - - file_size = file_path.stat().st_size - logger.info(f"✅ Free file added: {root_cid.hex()[:20]}... ({file_size} bytes)") - return root_cid.hex() - - -async def main(): - """Example usage.""" - # Set up payment-gated server - bitswap, payment_engine, dag = await setup_payment_server( - price_per_mb=0.01, # $0.01 per MB - wallet_address="0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb", - ) - - # Add some files - # Example 1: Paid file (5 MB = $0.05) - # paid_cid = await add_paid_file( - # dag, payment_engine, Path("./large_file.bin") - # ) - - # Example 2: Free file (always accessible) - # free_cid = await add_free_file( - # dag, payment_engine, Path("./readme.txt") - # ) - - logger.info("Server running. Press Ctrl+C to stop.") - - # Keep running - try: - await asyncio.Event().wait() - except KeyboardInterrupt: - logger.info("Shutting down...") - await bitswap.stop() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/libp2p/bitswap/cid.py b/libp2p/bitswap/cid.py index 1f4431c84..b1c89821a 100644 --- a/libp2p/bitswap/cid.py +++ b/libp2p/bitswap/cid.py @@ -243,11 +243,14 @@ def cid_to_bytes(value: CIDInput) -> bytes: def cid_to_text(value: CIDInput) -> str: - """Convert CID input to canonical CID string form (base32 for CIDv1, base58btc for CIDv0).""" + """ + Convert CID input to canonical CID string form + (base32 for CIDv1, base58btc for CIDv0). + """ cid_obj = parse_cid(value) # Use base32 for CIDv1 (matches Kubo's default output) if cid_obj.version == 1: - return cid_obj.encode('base32').decode() + return cid_obj.encode("base32").decode() # Use base58btc for CIDv0 (legacy format) return str(cid_obj) diff --git a/libp2p/bitswap/extension.py b/libp2p/bitswap/extension.py index 94f2bafa1..0dc682c5b 100644 --- a/libp2p/bitswap/extension.py +++ b/libp2p/bitswap/extension.py @@ -1,9 +1,13 @@ from abc import ABC, abstractmethod -from typing import Any +from typing import TYPE_CHECKING, Any from libp2p.abc import INetStream +from libp2p.bitswap.client import BitswapClient from libp2p.peer.id import ID as PeerID +if not TYPE_CHECKING: + from libp2p.bitswap.client import BitswapClient + class IBitswapExtension(ABC): """ @@ -11,7 +15,9 @@ class IBitswapExtension(ABC): Extensions are registered for specific protocol versions to handle messages. """ - def set_client(self, client: Any) -> None: + client: "BitswapClient" + + def set_client(self, client: BitswapClient) -> None: """ Set the parent BitswapClient instance. """ diff --git a/libp2p/bitswap/payment_extension.py b/libp2p/bitswap/payment_extension.py index 9a4f7afcf..1b68d1561 100644 --- a/libp2p/bitswap/payment_extension.py +++ b/libp2p/bitswap/payment_extension.py @@ -20,7 +20,6 @@ class PaymentExtension(IBitswapExtension): def __init__(self, payment_client: Any = None, payment_engine: Any = None): self.payment_client = payment_client self.payment_engine = payment_engine - self.client = None async def process_message( self, peer_id: PeerID, msg_bytes: bytes, stream: INetStream From fac2614dbc09c2fbb4d4ed8935c852db921bda87 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 20:47:44 +0530 Subject: [PATCH 33/37] fix: Correct logging format in QUICTransport for better clarity --- libp2p/transport/quic/transport.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libp2p/transport/quic/transport.py b/libp2p/transport/quic/transport.py index 0572fcfb9..8c6167f3b 100644 --- a/libp2p/transport/quic/transport.py +++ b/libp2p/transport/quic/transport.py @@ -268,7 +268,7 @@ async def dial( # Get appropriate QUIC client configuration config_key = TProtocol(f"{quic_version}_client") - logger.debug("config_key", config_key, self._quic_configs.keys()) + logger.debug("config_key %s %s", config_key, self._quic_configs.keys()) config = self._quic_configs.get(config_key) if not config: raise QUICDialError(f"Unsupported QUIC version: {quic_version}") @@ -286,7 +286,7 @@ async def dial( # Debug log to verify certificate is present logger.info( - f"Dialing QUIC connection to {host}:{port} (version: {{quic_version}})" + f"Dialing QUIC connection to {host}:{port} (version: {quic_version})" ) logger.debug("Starting QUIC Connection") From 95a03d1adbad8dba240b6717a0d89c5dbadae408 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 21:00:06 +0530 Subject: [PATCH 34/37] added newsfragment file --- newsfragments/1347.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/1347.feature.rst diff --git a/newsfragments/1347.feature.rst b/newsfragments/1347.feature.rst new file mode 100644 index 000000000..f19cc7629 --- /dev/null +++ b/newsfragments/1347.feature.rst @@ -0,0 +1 @@ +Implement comprehensive Bitswap interoperability with IPFS Kubo, including UnixFS DAG-PB encoding and balanced layout support. Introduces ``FilesystemBlockStore`` and ``BlockService`` for robust block caching, Bitswap batch fetching, and streaming inputs (``chunk_stream``). From 29f8453c4e4b812a7c5add6b199aa214c2e99c1a Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 21:09:33 +0530 Subject: [PATCH 35/37] fix: Improve formatting of docstring in verify_record function --- libp2p/records/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libp2p/records/utils.py b/libp2p/records/utils.py index 2dcc6620f..eb117d981 100644 --- a/libp2p/records/utils.py +++ b/libp2p/records/utils.py @@ -72,8 +72,7 @@ def verify_record( Args: signature: The record signature - author_public_key: The serialized public key of the author - (``crypto_pb2.PublicKey`` protobuf bytes) + author_public_key: The serialized public key of the author (``crypto_pb2.PublicKey`` protobuf bytes) key: The record key value: The record value From 50d3c26ac261d8f1470a5b212d52e27b13901f87 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Sun, 14 Jun 2026 23:10:22 +0530 Subject: [PATCH 36/37] fix: lint issues --- libp2p/records/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libp2p/records/utils.py b/libp2p/records/utils.py index eb117d981..4657510a6 100644 --- a/libp2p/records/utils.py +++ b/libp2p/records/utils.py @@ -72,7 +72,7 @@ def verify_record( Args: signature: The record signature - author_public_key: The serialized public key of the author (``crypto_pb2.PublicKey`` protobuf bytes) + author_public_key: The serialized public key of the author key: The record key value: The record value From a10c22fe73a8ae232c629f6acf8520a4c81263f5 Mon Sep 17 00:00:00 2001 From: sumanjeet0012 Date: Mon, 15 Jun 2026 01:03:41 +0530 Subject: [PATCH 37/37] feat: implement block pricing engine tests, changed wrap_with_directory default to false and removed extra files --- libp2p/bitswap/dag.py | 2 +- libp2p/bitswap/payment_client_1_3.py.backup | 455 -------------------- newsfragments/1321.feature.rst | 10 - tests/core/bitswap/test_payment.py | 110 +++++ 4 files changed, 111 insertions(+), 466 deletions(-) delete mode 100644 libp2p/bitswap/payment_client_1_3.py.backup delete mode 100644 newsfragments/1321.feature.rst create mode 100644 tests/core/bitswap/test_payment.py diff --git a/libp2p/bitswap/dag.py b/libp2p/bitswap/dag.py index 8021d103b..e06df18ce 100644 --- a/libp2p/bitswap/dag.py +++ b/libp2p/bitswap/dag.py @@ -199,7 +199,7 @@ async def add_file( file_path: str, chunk_size: int | None = None, progress_callback: Callable[[int, int, str], None] | None = None, - wrap_with_directory: bool = True, + wrap_with_directory: bool = False, ) -> bytes: """ Add a file to the DAG. diff --git a/libp2p/bitswap/payment_client_1_3.py.backup b/libp2p/bitswap/payment_client_1_3.py.backup deleted file mode 100644 index 5865d635b..000000000 --- a/libp2p/bitswap/payment_client_1_3.py.backup +++ /dev/null @@ -1,455 +0,0 @@ -""" -Bitswap 1.3.0 Payment Client. - -Client-side handler for in-band payment messages. When the server sends -a PAYMENT_REQUIRED response with PaymentTerms, this client: -1. Validates the price is acceptable -2. Signs an EIP-3009 USDC transferWithAuthorization -3. Sends back a PaymentAuthorization with signature -4. On receipt of PaymentReceipt, the block is served - -Proto alignment: - PaymentTerms → fields: cid, asset, pay_to, amount, network, block_size, description, nonce, valid_before, scheme - PaymentAuthorization → fields: cid, from_address, to_address, value, valid_after, valid_before, nonce, v, r, s, scheme - PaymentReceipt → fields: cid, tx_hash, expires - PaymentRejection → fields: cid, reason - -This module lives in py-libp2p so it's importable as libp2p.bitswap. -""" - -from collections.abc import Callable -import logging -from typing import Any - -from libp2p.bitswap.pb.bitswap_1_3_0_pb2 import Message as Message_1_3 - -logger = logging.getLogger(__name__) - -# Default maximum auto-pay threshold: $0.001 USDC = 1000 micro-units -DEFAULT_MAX_AUTO_PAY_UNITS = 1000 - - -class BitswapPaymentClient_1_3: - """ - Client-side handler for Bitswap 1.3.0 payment messages. - - Processes PaymentTerms from incoming messages and auto-pays if the - amount is within the configured threshold using USDC EIP-3009. - - Args: - signer: An EIP3009Signer instance (payments.eip3009_signer) - want_manager: Object with retry_want_block(peer_id, cid) async method - max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) - send_callback: Async function(peer_id, msg_bytes) to send responses - ledger: Optional PaymentLedger for tracking spent payments - """ - - def __init__( - self, - signer: Any, # payments.eip3009_signer.EIP3009Signer - want_manager: Any, # has retry_want_block(peer_id, cid) method - max_auto_pay_usdc: float = 0.001, - send_callback: Callable[..., Any] | None = None, - ledger: Any = None, - ): - self.signer = signer - self.want_manager = want_manager - self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) - self.send_callback = send_callback - self.ledger = ledger - - # Pending payments: nonce_hex → {peer_id, cid, amount} - self._pending_payments: dict[str, dict[str, Any]] = {} - - async def process_incoming_message( - self, peer_id: str, msg: Message_1_3 - ) -> Message_1_3 | None: - """ - Called by the Bitswap dispatcher for every incoming 1.3.0 message. - - Handles: - - PaymentTerms → sign and send PaymentAuthorization - - PaymentReceipts → retry WANT_BLOCK - - PaymentRejections → log and surface to application - - Returns a response Message to send back, or None. - """ - # Handle payment terms (server telling us what a block costs) - if msg.payment_terms: - for terms in msg.payment_terms: - response = await self._handle_payment_terms(peer_id, terms) - if response: - return response - - # Handle receipts (server confirming our payment) - for receipt in msg.payment_receipts: - await self._handle_payment_receipt(peer_id, receipt) - - # Handle rejections - for rejection in msg.payment_rejections: - self._handle_payment_rejection(peer_id, rejection) - - return None - - async def build_payment_auth_msg( - self, - terms: Any, # Message_1_3.PaymentTerms - ) -> Message_1_3: - """ - Build a PaymentAuthorization message for the given PaymentTerms. - Used by tests and demo scripts. - """ - v, r, s = self.signer.sign_transfer_authorization( - to=terms.pay_to, - value=terms.amount, - nonce=bytes(terms.nonce), - valid_before=terms.valid_before, - ) - - msg = Message_1_3() - auth = msg.payment_authorizations.add() - auth.cid = bytes(terms.cid) - auth.from_address = self.signer.address - auth.to_address = terms.pay_to - auth.value = terms.amount - auth.valid_after = 0 - auth.valid_before = terms.valid_before - auth.nonce = bytes(terms.nonce) - auth.v = v - auth.r = r - auth.s = s - auth.scheme = terms.scheme - return msg - - # ── Internal handlers ───────────────────────────────────────────────── - - async def _handle_payment_terms( - self, peer_id: str, terms: Any - ) -> Message_1_3 | None: - """ - Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. - Decide whether to pay and send back a PaymentAuthorization. - """ - amount = terms.amount - cid_bytes = bytes(terms.cid) - cid_hex = cid_bytes.hex() - - logger.info( - f"💳 PaymentTerms from {peer_id[:20]}... " - f"cid={cid_hex[:20]}... amount={amount} units " - f"(${amount / 1_000_000:.6f} USDC) pay_to={terms.pay_to[:12]}..." - ) - - # Reject if too expensive - if amount > self.max_auto_pay_units: - logger.info( - f"Block too expensive: {amount} units > " - f"max {self.max_auto_pay_units} units. " - f"Skipping — will seek block elsewhere." - ) - return None - - # Validate pricing isn't a lie (10% tolerance) - expected_amount = self._expected_price(terms.block_size) - if expected_amount > 0 and amount > expected_amount * 1.1: - logger.warning( - f"Server overcharging: asked {amount}, expected ~{expected_amount}. " - f"Skipping payment." - ) - return None - - # Sign EIP-3009 authorization - try: - v, r, s = self.signer.sign_transfer_authorization( - to=terms.pay_to, - value=amount, - nonce=bytes(terms.nonce), - valid_before=terms.valid_before, - ) - except Exception as e: - logger.error(f"Failed to sign payment authorization: {e}") - return None - - # Build PaymentAuthorization message - response = Message_1_3() - auth = response.payment_authorizations.add() - auth.cid = cid_bytes - auth.from_address = self.signer.address - auth.to_address = terms.pay_to - auth.value = amount - auth.valid_after = 0 - auth.valid_before = terms.valid_before - auth.nonce = bytes(terms.nonce) - auth.v = v - auth.r = r - auth.s = s - auth.scheme = terms.scheme - - # Track pending payment - nonce_hex = bytes(terms.nonce).hex() - self._pending_payments[nonce_hex] = { - "peer_id": peer_id, - "cid": cid_hex, - "amount": amount, - } - - # Persist spent payment to ledger - if self.ledger is not None: - try: - self.ledger.record_spent_payment( - peer_id=peer_id, - cid=cid_bytes, - amount=amount, - nonce=bytes(terms.nonce), - ) - except Exception as _e: - logger.warning(f"Failed to persist spent payment: {_e}") - - logger.info( - f"Sending PaymentAuthorization to {peer_id[:20]}... " - f"cid={cid_hex[:20]}... amount={amount} units " - f"(${amount / 1_000_000:.6f} USDC)" - ) - return response - - def _expected_price(self, block_size: int) -> int: - """Compute expected price in USDC micro-units for a block of given size.""" - # Simple pricing: $0.000001 per KB - kb = block_size / 1024 - return int(kb * 1) # 1 micro-unit per KB - - async def _handle_payment_receipt(self, peer_id: str, receipt: Any) -> None: - """Server confirmed payment. Retry the WANT_BLOCK immediately.""" - cid_hex = ( - bytes(receipt.cid).hex() if isinstance(receipt.cid, bytes) else receipt.cid - ) - logger.info( - f"Payment receipt received from {peer_id[:20]}... " - f"cid={cid_hex[:20]}... " - f"tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." - ) - # Trigger want manager to retry - if self.want_manager: - try: - await self.want_manager.retry_want_block(peer_id, cid_hex) - except Exception as e: - logger.error(f"Failed to retry want block: {e}") - - def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: - """Log and surface payment rejection.""" - cid_hex = ( - bytes(rejection.cid).hex() - if isinstance(rejection.cid, bytes) - else rejection.cid - ) - logger.warning( - f"❌ PaymentRejection from {peer_id[:20]}... " - f"cid={cid_hex[:20]}... reason={rejection.reason}" - ) - - Args: - signer: An EIP3009Signer instance (gooseswarm.payments.eip3009_signer) - want_manager: Object with retry_want_block(peer_id, cid) async method - max_auto_pay_usdc: Maximum amount to auto-pay in USDC (default $0.001) - send_callback: Async function(peer_id, msg_bytes) to send responses - - """ - - def __init__( - self, - signer: Any, # gooseswarm.payments.eip3009_signer.EIP3009Signer - want_manager: Any, # has retry_want_block(peer_id, cid) method - max_auto_pay_usdc: float = 0.001, - send_callback: Callable[..., Any] | None = None, - ledger: Any = None, # gooseswarm.payments.ledger.PaymentLedger (optional) - ): - self.signer = signer - self.want_manager = want_manager - self.max_auto_pay_units = int(max_auto_pay_usdc * 1_000_000) - self.send_callback = send_callback - self.ledger = ledger - - # Pending payments: nonce_hex → {peer_id, cid, amount} - self._pending_payments: dict[str, dict[str, Any]] = {} - - async def process_incoming_message( - self, peer_id: str, msg: Message_1_3 - ) -> Message_1_3 | None: - """ - Called by the Bitswap dispatcher for every incoming 1.3.0 message. - - Handles: - - PaymentTerms → sign and send PaymentAuthorization - - PaymentReceipts → retry WANT_BLOCK - - PaymentRejections → log and surface to application - - Returns a response Message to send back, or None. - """ - # Handle payment terms (server telling us what a block costs) - if msg.payment_terms: - for terms in msg.payment_terms: - response = await self._handle_payment_terms(peer_id, terms) - if response: - return response - - # Handle receipts (server confirming our payment) - for receipt in msg.payment_receipts: - await self._handle_payment_receipt(peer_id, receipt) - - # Handle rejections - for rejection in msg.payment_rejections: - self._handle_payment_rejection(peer_id, rejection) - - return None - - async def build_payment_auth_msg( - self, - terms: Any, # Message_1_3.PaymentTerms - ) -> Message_1_3: - """ - Build a PaymentAuthorization message for the given PaymentTerms. - Used by tests and demo scripts. - """ - v, r, s = self.signer.sign_transfer_authorization( - to=terms.pay_to, - value=terms.amount, - nonce=bytes(terms.nonce), - valid_before=terms.valid_before, - ) - - msg = Message_1_3() - auth = msg.payment_authorizations.add() - auth.cid = bytes(terms.cid) - auth.from_address = self.signer.address - auth.to_address = terms.pay_to - auth.value = terms.amount - auth.valid_after = 0 - auth.valid_before = terms.valid_before - auth.nonce = bytes(terms.nonce) - auth.v = v - auth.r = r - auth.s = s - auth.scheme = terms.scheme - return msg - - # ── Internal handlers ───────────────────────────────────────────────── - - async def _handle_payment_terms( - self, peer_id: str, terms: Any - ) -> Message_1_3 | None: - """ - Server sent us PaymentTerms alongside a PaymentRequired BlockPresence. - Decide whether to pay and send back a PaymentAuthorization. - """ - amount = terms.amount - - # Reject if too expensive - if amount > self.max_auto_pay_units: - logger.info( - f"Block too expensive: {amount} units > " - f"max {self.max_auto_pay_units} units. " - f"Skipping — will seek block elsewhere." - ) - return None - - # Validate pricing isn't a lie (10% tolerance) - expected_amount = self._expected_price(terms.block_size) - if expected_amount > 0 and amount > expected_amount * 1.1: - logger.warning( - f"Server overcharging: asked {amount}, expected ~{expected_amount}. " - f"Skipping payment." - ) - return None - - # Sign EIP-3009 authorization - try: - v, r, s = self.signer.sign_transfer_authorization( - to=terms.pay_to, - value=amount, - nonce=bytes(terms.nonce), - valid_before=terms.valid_before, - ) - except Exception as e: - logger.error(f"Failed to sign payment authorization: {e}") - return None - - # Build PaymentAuthorization message - response = Message_1_3() - auth = response.payment_authorizations.add() - auth.cid = bytes(terms.cid) - auth.from_address = self.signer.address - auth.to_address = terms.pay_to - auth.value = amount - auth.valid_after = 0 - auth.valid_before = terms.valid_before - auth.nonce = bytes(terms.nonce) - auth.v = v - auth.r = r - auth.s = s - auth.scheme = terms.scheme - - # Track pending payment - nonce_hex = bytes(terms.nonce).hex() - self._pending_payments[nonce_hex] = { - "peer_id": peer_id, - "cid": bytes(terms.cid).hex(), - "amount": amount, - } - - # Persist spent payment to ledger - if self.ledger is not None: - try: - self.ledger.record_spent_payment( - peer_id=peer_id, - cid=bytes(terms.cid), - amount=amount, - nonce=bytes(terms.nonce), - ) - except Exception as _e: - logger.warning(f"Failed to persist spent payment: {_e}") - - logger.info( - f"Sending PaymentAuthorization to {peer_id[:20]}... " - f"cid={bytes(terms.cid).hex()[:20]}... amount={amount} units " - f"(${amount / 1_000_000:.6f} USDC)" - ) - return response - - async def _handle_payment_receipt(self, peer_id: str, receipt: Any) -> None: - """Server confirmed payment. Retry the WANT_BLOCK immediately.""" - cid_hex = ( - bytes(receipt.cid).hex() if isinstance(receipt.cid, bytes) else receipt.cid - ) - logger.info( - f"Payment receipt received from {peer_id[:20]}... " - f"cid={cid_hex[:20]}... " - f"tx={receipt.tx_hash[:20] if receipt.tx_hash else 'optimistic'}..." - ) - # Trigger want manager to retry - if self.want_manager: - try: - await self.want_manager.retry_want_block(peer_id, cid_hex) - except Exception as e: - logger.error(f"Failed to retry want block: {e}") - - def _handle_payment_rejection(self, peer_id: str, rejection: Any) -> None: - """Log and surface payment rejection.""" - cid_hex = ( - bytes(rejection.cid).hex() - if isinstance(rejection.cid, bytes) - else rejection.cid - ) - logger.warning( - f"Payment rejected by {peer_id[:20]}... " - f"cid={cid_hex[:20]}... reason={rejection.reason}" - ) - - def _expected_price(self, block_size_bytes: int) -> int: - """ - Client-side price oracle — must roughly match server pricing. - Used to detect overcharging. - """ - if block_size_bytes <= 4096: - return 0 - kb = block_size_bytes / 1024 - return int(kb * 10) # 10 units per KB baseline diff --git a/newsfragments/1321.feature.rst b/newsfragments/1321.feature.rst deleted file mode 100644 index af0c9d04a..000000000 --- a/newsfragments/1321.feature.rst +++ /dev/null @@ -1,10 +0,0 @@ -Comprehensive Bitswap overhaul for Kubo compatibility and performance: - -- **Batch block fetching** — send multiple CIDs in a single wantlist message. -- **Kubo-compatible DAG-PB encoding** — produce identical CIDs to Kubo's ``ipfs add``. -- **FilesystemBlockStore** — persistent storage surviving process restarts. -- **BlockService** — local-first lookup with automatic block caching and announcement. -- **Streaming support** — ``chunk_stream`` and ``MerkleDag.add_stream`` for efficient DAG building. -- **Bitswap 1.2.0 wantlist API** — ``WantType``, ``BlockPresence``, ``WantlistEntry``, ``BitswapMessage``. -- **DHT record signing/verification** — Kubo-compatible provider and value record signing. -- **ProviderQueryManager** — automatic DHT-based peer discovery in ``BitswapClient.get_block()`` with LRU caching. diff --git a/tests/core/bitswap/test_payment.py b/tests/core/bitswap/test_payment.py new file mode 100644 index 000000000..8e944e566 --- /dev/null +++ b/tests/core/bitswap/test_payment.py @@ -0,0 +1,110 @@ +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from libp2p.bitswap.gated_decision_engine import PaymentGatedDecisionEngine +from libp2p.bitswap.payment_ledger import PaymentLedger +from libp2p.bitswap.pricing_engine import BlockPricingEngine + + +def test_block_pricing_engine_size_based(): + pricing = BlockPricingEngine(strategy="size_based", units_per_kb=10.0) + # 500 KB = 512000 bytes + price = pricing.compute_price("cid1", 512000) + assert price == 5000 + + +def test_block_pricing_engine_fixed(): + pricing = BlockPricingEngine(strategy="fixed", fixed_price=123) + price = pricing.compute_price("cid1", 512000) + assert price == 123 + + +def test_block_pricing_engine_free(): + pricing = BlockPricingEngine(strategy="free") + price = pricing.compute_price("cid1", 512000) + assert price == 0 + + +def test_block_pricing_engine_overrides(): + pricing = BlockPricingEngine(strategy="fixed", fixed_price=100) + pricing.set_price(b"cid2", 50) + pricing.set_free(b"cid3") + + assert pricing.compute_price(b"cid1".hex(), 10) == 100 + assert pricing.compute_price(b"cid2".hex(), 10) == 50 + assert pricing.compute_price(b"cid3".hex(), 10) == 0 + + +@pytest.mark.trio +async def test_payment_ledger_registration_and_payment(): + ledger = PaymentLedger() + + root_cid = b"root" + child_cids: list[bytes | str] = [b"child1", b"child2"] + + await ledger.register_dag(root_cid, child_cids) + + assert not ledger.is_paid("peer1", b"child1") + + await ledger.record_payment("peer1", b"root", amount=1000, nonce=b"nonce1") + + # After payment for root, root and children should be considered paid + assert ledger.is_paid("peer1", b"root") + assert ledger.is_paid("peer1", b"child1") + assert ledger.is_paid("peer1", b"child2") + + # Peer 2 has not paid + assert not ledger.is_paid("peer2", b"root") + + +@pytest.mark.trio +async def test_payment_ledger_nonce_replay(): + ledger = PaymentLedger() + + await ledger.record_payment("peer1", b"root", amount=1000, nonce=b"nonce1") + + with pytest.raises(ValueError, match="Nonce already used"): + await ledger.record_payment("peer1", b"root", amount=1000, nonce=b"nonce1") + + # Different nonce should work + await ledger.record_payment("peer1", b"root", amount=1000, nonce=b"nonce2") + + +@pytest.mark.trio +async def test_gated_decision_engine_auth(): + # Setup mocks + blockstore = AsyncMock() + blockstore.get_block.return_value = b"block data" + + ledger = PaymentLedger() + pricing = BlockPricingEngine(strategy="fixed", fixed_price=100) + + engine = PaymentGatedDecisionEngine( + blockstore=blockstore, ledger=ledger, pricing=pricing, tx_verifier=None + ) + + auth = MagicMock() + auth.cid = b"cid1" + auth.value = 50 + auth.from_address = "0x..." + auth.nonce = b"nonce1" + + # Payment less than expected + response = await engine.handle_payment_authorization("peer1", auth) + assert len(response.payment_rejections) == 1 + assert "INSUFFICIENT_PAYMENT" in response.payment_rejections[0].reason + + # Payment sufficient + auth.value = 100 + response = await engine.handle_payment_authorization("peer1", auth) + assert len(response.payment_receipts) == 1 + assert len(response.payload) == 1 + assert response.payload[0].data == b"block data" + + # Nonce reused + auth.nonce = b"nonce1" + # Should still succeed because ledger is_paid will return true + # and it skips re-verification + response = await engine.handle_payment_authorization("peer1", auth) + assert len(response.payment_receipts) == 1