From 0510a6ac180950d528c461fca0f546ce65bdadd8 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sun, 17 May 2026 09:32:02 -0400 Subject: [PATCH 1/7] test: add test to reproduce corruption issue --- src/preloaded/arweave/dev_bundler_task.erl | 234 +++++++++++++++------ 1 file changed, 164 insertions(+), 70 deletions(-) diff --git a/src/preloaded/arweave/dev_bundler_task.erl b/src/preloaded/arweave/dev_bundler_task.erl index cd00d989e..7a70905b9 100644 --- a/src/preloaded/arweave/dev_bundler_task.erl +++ b/src/preloaded/arweave/dev_bundler_task.erl @@ -33,10 +33,7 @@ execute_task(#task{type = post_tx, data = Items, opts = Opts} = Task) -> case build_signed_tx(Items, Opts) of {ok, SignedTX} -> Committed = hb_message:convert( - SignedTX, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, - Opts), + SignedTX, <<"structured@1.0">>, <<"tx@1.0">>, Opts), ?event(bundler_short, log_task(posting_tx, Task, [{tx, {explicit, hb_message:id(Committed, signed, Opts)}}] @@ -185,7 +182,7 @@ build_signed_tx(Items, Opts) -> data_items_to_tx(Items, Opts) -> List = lists:map( - fun(Item) -> + fun(Item) -> hb_message:convert( Item, #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, @@ -251,71 +248,6 @@ format_timestamp() -> Millisecs = (MegaSecs * 1000000 + Secs) * 1000 + (MicroSecs div 1000), calendar:system_time_to_rfc3339(Millisecs, [{unit, millisecond}, {offset, "Z"}]). -build_signed_tx_test() -> - Anchor = rand:bytes(32), - Price = 12345, - {ServerHandle, NodeOpts} = hb_mock_server:start_arweave_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)} - }), - TestOpts = NodeOpts#{ - <<"priv-wallet">> => ar_wallet:new(), - <<"store">> => hb_test_utils:test_store() - }, - try - Timestamp = 12344567, - ListValue = [<<"a">>, <<"b">>, <<"c">>], - StructuredItems = [ - #{ - <<"body">> => <<"body1">>, - <<"tag1">> => <<"value1">>, - <<"timestamp">> => Timestamp - }, - #{ - <<"body">> => <<"body3">>, - <<"tag3">> => <<"value3">>, - <<"list">> => ListValue - }, - #{ - <<"body">> => <<"body2">>, - <<"tag2">> => <<"value2">> - } - ], - Items = [ - hb_message:commit( - Item, - TestOpts, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true } - ) - || Item <- StructuredItems], - {ok, SignedTX} = build_signed_tx(Items, TestOpts), - ?assert(ar_tx:verify(SignedTX)), - ?assertEqual(Anchor, SignedTX#tx.anchor), - ?assertEqual(Price, SignedTX#tx.reward), - ?event(debug_test, {signed_tx, SignedTX}), - BundledTX = ar_bundles:deserialize(SignedTX), - ?event(debug_test, {bundled_tx, BundledTX}), - BundledItems = hb_util:numbered_keys_to_list(BundledTX#tx.data, #{}), - lists:foreach( - fun(Item) -> - ?assert(ar_bundles:verify_item(Item)) - end, - BundledItems - ), - BundledStructuredItems = [ - hb_message:convert( - Item, - <<"structured@1.0">>, - <<"ans104@1.0">>, - TestOpts - ) - || Item <- BundledItems], - ?assertEqual(lists:reverse(Items), BundledStructuredItems), - ok - after - hb_mock_server:stop(ServerHandle) - end. - build_signed_tx_on_arbundles_js_test() -> Anchor = rand:bytes(32), Price = 12345, @@ -388,3 +320,165 @@ build_signed_tx_on_arbundles_js_test() -> after hb_mock_server:stop(ServerHandle) end. + +%% Test that a nested dataitem is handled correctly by the bundler flow. +%% This test focuses in on the conversion that happens between building +%% the signed bundle TX and building the bundle proofs. +bundle_convert_real_data_test() -> + Item = inlined_broken_item(), + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = hb_mock_server:start_arweave_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + TestOpts = NodeOpts#{ + <<"priv-wallet">> => ar_wallet:new(), + <<"store">> => hb_test_utils:test_store() + }, + try + {ok, SignedTX} = build_signed_tx([Item], TestOpts), + ?assert(ar_tx:verify(SignedTX)), + Committed = hb_message:convert( + SignedTX, <<"structured@1.0">>, <<"tx@1.0">>, TestOpts), + %% This convert is exactly what build_proofs runs. + TX = hb_message:convert( + Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), + SignedSize = byte_size(SignedTX#tx.data), + RecoveredSize = byte_size(TX#tx.data), + Delta = RecoveredSize - SignedSize, + Multiple = case Delta of + 0 -> 0; + _ when Delta rem 2500 =:= 0 -> Delta div 2500; + _ -> {non_clean_2500, Delta} + end, + ?assertEqual(0, Delta, { + inflation_detected_from_inlined_item, + #{signed_size => SignedSize, + recovered_size => RecoveredSize, + delta_bytes => Delta, + multiple_of_2500 => Multiple} + }), + ?assert(ar_tx:verify(TX)) + after + hb_mock_server:stop(ServerHandle) + end. + +bundle_convert_minimal_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = hb_mock_server:start_arweave_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + TestOpts = NodeOpts#{ + <<"priv-wallet">> => ar_wallet:new(), + <<"store">> => hb_test_utils:test_store() + }, + try + Item = hb_message:commit( + #{ <<"key">> => <<"value">>, + <<"body">> => #{ <<"a">> => <<"b">> } }, + TestOpts, #{<<"device">> => <<"ans104@1.0">>}), + {ok, SignedTX} = build_signed_tx([Item], TestOpts), + ?assert(ar_tx:verify(SignedTX)), + Committed = hb_message:convert( + SignedTX, <<"structured@1.0">>, <<"tx@1.0">>, TestOpts), + TX = hb_message:convert( + Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), + ?event(debug_test, {signed_tx, SignedTX}), + ?event(debug_test, {committed, Committed}), + ?event(debug_test, {tx, TX}), + SignedSize = byte_size(SignedTX#tx.data), + RecoveredSize = byte_size(TX#tx.data), + Delta = RecoveredSize - SignedSize, + Multiple = case Delta of + 0 -> 0; + _ when Delta rem 2500 =:= 0 -> Delta div 2500; + _ -> {non_clean_2500, Delta} + end, + ?assertEqual(0, Delta, { + inflation_detected_from_minimal_item, + #{signed_size => SignedSize, + recovered_size => RecoveredSize, + delta_bytes => Delta, + multiple_of_2500 => Multiple} + }) + after + hb_mock_server:stop(ServerHandle) + end. + +%% Hardcoded item, structurally identical to one observed in a broken +%% production bundle (TXID -BTiilFCWd2kB3oOdCpPDJLGXhjeNxIeMH3kerPXKCM). +%% AO "Assignment" message with `body`, two commitments (HMAC + RSA-PSS), +%% per-event commitments inside the body. All public key / signature +%% bytes are real (from production) since the structured form encodes +%% them. +inlined_broken_item() -> + #{<<"base-hashpath">> => + <<"w_l6KLmO8OeEM6vmdwX1HwdCDmHiOlhUyAeNdjwpspU/p4CQHPCo629uDl8seMpWN5Z4EZpRK6bUNPbGAoOIkrs">>, + <<"block-hash">> => <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>, + <<"block-height">> => 0, + <<"block-timestamp">> => 0, + <<"body">> => + #{<<"commitments">> => + #{<<"HkUAI3fWd3uHltdfyHzLU5IreUtmIIqv45ZxsC12psI">> => + #{<<"commitment-device">> => <<"httpsig@1.0">>, + <<"committed">> => + [<<"event">>, <<"reference">>, <<"status-class">>], + <<"committer">> => + <<"mfj2T6f_3stKQk7fctrbpZKUfu4V6MQKCH-YHLtFnOY">>, + <<"keyid">> => + <<"publickey:kvgGBmHtEOxZTuJuJuHVqBe51aLpSDvZrzj5RjNEbw8NrYm3GB9+BEKdYD+fHZ0H775PJf8mosGapkP6pB8h8ZEEc6AuOo+lTJ9SKEnYit1Q6YG5Dg306EDfm0dpMU3zKe9pE4CIHf3ffCDqa1Xh4c1zdcFqKyofeT8PWIGQZCScA8rYG+aG2Z/y6QduyxBgzFfITdzeXbnJONmZEwPEA29LeDWmCxA7CSfE6W8+2aDW75qQjETRVXzxou0I0tsc3uXzd9E0/yU6NbDi93sIBiO8z2pNbGrMGIfpH+4dirH/YZNBW8PBgOLjnpe5yoPrT+cI8OoEX27u/al/rkPG4u0wBlxnollSr/lXc5HIn6AWvpSF7nuXcmdtG8Y8RK95h4YZ9d6CG3tSOTvSk7wK8IH97hScB16EpiuT6Xi/TPYh3PwVC/VDxLMox19v1eP1riHC/nkIroerCmaIwGfxI2XNUgQzaTcygjT0DFbbLZFakCZpJ+0+u2/S1I4EbdpdcChWqrA8psUlyR3sbhhPDpEP1ldNO+08OyW/PMfMwXEkVR+WHM2t5m9cyZwjpes6epCQQWjMIkwqeIRZWwo607iJKsXgd5n+73FWytVWNS1mOH1nDDkfDXOq4R60B6C+2k0As14b2Dv4eXZstbr8KbtVIHit/IytBohieLpMcF0=">>, + <<"signature">> => + <<"fZgshLexhVcpiQ1sBhwa_eoDn97vvc6IoJtwntc8VJTSDAokQ0RyThcjqhcgtF04kl4T986lZrVptAXkiKwog-gH1vnJX1T2yGAM-ZlTNTTmdLE7OIQhvs26-0_L3poPUSEjHsZ1vU2RpUUvKLIEQdCwlgTXGx54ZGB6feYXMn9e01tZPEdTVD0AcALa5G55aqyH3Lde5KXx4vOgdvWaCr772dXZ6C8249UG02SIHy3xvp1UdkLtzIbvSY9n5UzC1Bt-b5JftIijmVuIv3oI0_y9rRxGYLm2m7VusHwYjRdFAjN5X_NvYpWx25b62CNNLwprJfDqhllZsDz6PjnhRh9ZocOP3OLrarW0owFt0dfDRt3VBYaksUYTem-9YWtzS3Qa7kZSB754xtOW62wvu3kVH2sNB5C9SoXmheoPUjNLa4qXQv4-NJPF4wVdj8QxM0mYO0KQZfCUZtXhYYaqwRmS2aMyUrca1xjPOkD0nr7B1IS805O08fTkN6YcMluUH93myL4VbPPa2v1V2k-B-OlP4AzOn9F1uzk5ek--K_-2QdC63vgm4EKv8XqBoipUJ0Fe0jKUsE9iLZJddoMrYrsQCp8WMWX7iGaP6zJU2tbMpkAl-rr_Hc8xUkJ3eBd6pQcw-1MQ8EK7trPnjQD0EQZAG2HYj87HG-qCX3l9o8w">>, + <<"type">> => <<"rsa-pss-sha512">>}, + <<"asDiK4CqvjJf2d9FFf3r3-xCrs1jA8ee9tWUp43BuWk">> => + #{<<"commitment-device">> => <<"httpsig@1.0">>, + <<"committed">> => + [<<"event">>, <<"reference">>, <<"status-class">>], + <<"keyid">> => <<"constant:ao">>, + <<"signature">> => + <<"asDiK4CqvjJf2d9FFf3r3-xCrs1jA8ee9tWUp43BuWk">>, + <<"type">> => <<"hmac-sha256">>}}, + <<"event">> => <<"is_admissible">>, + <<"reference">> => + <<"HnbIWJdkG4CCwHCiycMKMmv2posdcTJ5xFcZ9lpTQQs">>, + <<"status-class">> => <<"success">>}, + <<"commitments">> => + #{<<"KT4ZXa_nhnWTfNJdVwOPDwHNN3eqYs_o3JoYv_odNvE">> => + #{<<"commitment-device">> => <<"httpsig@1.0">>, + <<"committed">> => + [<<"ao-types">>, <<"base-hashpath">>, <<"block-hash">>, + <<"block-height">>, <<"block-timestamp">>, <<"body">>, + <<"data-protocol">>, <<"epoch">>, <<"path">>, + <<"process">>, <<"slot">>, <<"timestamp">>, <<"type">>, + <<"variant">>], + <<"keyid">> => <<"constant:ao">>, + <<"signature">> => + <<"KT4ZXa_nhnWTfNJdVwOPDwHNN3eqYs_o3JoYv_odNvE">>, + <<"type">> => <<"hmac-sha256">>}, + <<"j1KSZD2tQOXpYvbPaqmLyRN6OOXxGa20bkgfeCj4a30">> => + #{<<"bundle">> => <<"false">>, + <<"commitment-device">> => <<"ans104@1.0">>, + <<"committed">> => + [<<"ao-types">>, <<"base-hashpath">>, <<"block-hash">>, + <<"block-height">>, <<"block-timestamp">>, <<"body">>, + <<"data-protocol">>, <<"epoch">>, <<"path">>, + <<"process">>, <<"slot">>, <<"timestamp">>, <<"type">>, + <<"variant">>], + <<"committer">> => + <<"n_XZJhUnmldNFo4dhajoPZWhBXuJk-OcQr5JQ49c4Zo">>, + <<"keyid">> => + <<"publickey:9BXuilimqVo7fpnoToPHZwqL7w_C0Qn4N3egeJRy05-nSpUv1vyp9xHbVLKVMPnJsie5Awt_xxob_jDvXSmE1fDsUpNnFurxG88UWN4zSNi87EfOorDQjHPRUqKPIYvg6xqPCpXPpOccJbFuack3ltQKtF5XLoaKWbsPdUtMquRXrbJgnGeOvXhQhbKa4xJKwGmjVC_LpY5FQ8j-cOlBOVVe_B7KF4eWG3sJf-z59MJQOaAozyU2iZpsuhslkTNVj8sM9CqkSfyD8EjEZdfF088IM_dJgk6ehIDHbx3FcGVnxpUHkXEnJFAlXRzdqmNb84QXsTNOHqwQPZ3q5wPRWS6iUaNxfeS_SsR6otIJgrYq04LYJLcpHuKGp53-b8tTeIvDFcmS2_kPijPqPINbf9c5uH0mxMNomB-8rVDIkIZ6Ojc_M0JnaQSk2rYPq8qRy2PuvAFyo1zeGM-2Bo4GNl9dMnfIr_Q6MlxRUwAwLHdOt0BJkxEBfOIw3MkB2d-SiVWtxG1Uqib7Iu_yn3j9DwzUOHjRQTse07giNDXRMsr1ml_sCK3bIetUFVnjjnoTNDEItDSck7lTFgvCdyXKkvXtSiNHkW8TCbdTDY0hBJzLVheKDb_cCyfmcTKo5ql2sWsZYCC7XybKdRMxU2HNNIUSpcDvhnTwv5-oq42Lmqc">>, + <<"signature">> => + <<"sE9TuQTsMCHhaSOmHF-Wqu8QBbSNMSeSztiE1b0tJfmSNOe1nKPmMcCZN1rHD8L9xQWJw4hSVUbChwt4QReTz2IoXFz1NT80F1qCY2x3uFMFxgUHb2abTQW_-VNjFGWFe-sguwYLAIZGYoJ9a2g1EJCRfksk9iOWXRt7j_yIBixKATq-QsEWdcwfBsEUYWq-IRI1RdPAr9ToZeQ13TtWWYxcRbKHwxJ1M58p2CuLCi1OXVmENLjacAawuhBjGV4oTQ1-QBap-JOjB6kRTXtWjNGnMTPF01edFJIxgRncnODrTO_ehz6qkFH6iMhI9oV4w5VcRCKnNM7fxTXKj6DeiuAb1KrirpzohzsTLautMqRhst8gSViBlftd4XoVCDVscawuz8yPDyJoDxhIIup7mO51QSmNVTM6JpSEsG-CbXa64aECBOq7_x-ld9xHyNvCCSHetSJ3EBiJDWHE8XCurePGJ6GLeggugQ85LxgsRaLDm9UIlbMhopkK4X-SyXz5_pGwUSegLa1QHWWxnIaS5zTm0f4yi_YiBmgmS27v28T-nTzOHuBGTl8yUWVG_CKAELjFVREm5I7h4UuDQuFoXlkkFW22-Gyx5tZh1eSxRpl1NOwhyGc9O-6TIR46t1BhlItitOoi6JEf26JjTmwJWF7kR8xyahCYWtHFEkzpob4">>, + <<"type">> => <<"rsa-pss-sha256">>}}, + <<"data-protocol">> => <<"ao">>, + <<"epoch">> => <<"0">>, + <<"path">> => <<"compute">>, + <<"process">> => <<"1V65_gzlifHH_surfFzL6HGfRlLJuEX_y0VbPHwIKec">>, + <<"slot">> => 180901, + <<"timestamp">> => 1778975170441, + <<"type">> => <<"Assignment">>, + <<"variant">> => <<"ao.N.1">>}. From 109233bb6052e319b431a4c7818ae6d4f8d9d257 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Mon, 18 May 2026 20:50:59 -0400 Subject: [PATCH 2/7] test: make the upload script more flexible --- test/arbundles.js/upload-dataitem.js | 2 +- test/arbundles.js/upload-items.js | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/test/arbundles.js/upload-dataitem.js b/test/arbundles.js/upload-dataitem.js index 7f153d334..1175a5faf 100644 --- a/test/arbundles.js/upload-dataitem.js +++ b/test/arbundles.js/upload-dataitem.js @@ -45,7 +45,7 @@ async function uploadDataItem(itemPath, gatewayUrl = "https://up.arweave.net") { // Upload to the gateway console.log("Uploading to gateway..."); - const uploadUrl = `${gatewayUrl}/tx`; + const uploadUrl = `${gatewayUrl}/~bundler@1.0/tx`; const response = await axios.post(uploadUrl, itemBuffer, { headers: { diff --git a/test/arbundles.js/upload-items.js b/test/arbundles.js/upload-items.js index 9bc63d475..67579202f 100644 --- a/test/arbundles.js/upload-items.js +++ b/test/arbundles.js/upload-items.js @@ -3,19 +3,20 @@ const path = require("path"); const { ArweaveSigner, createData } = require("@dha-team/arbundles"); // Configuration -const BUNDLER_URL = "http://localhost:8734"; +const BUNDLER_URL = process.env.BUNDLER_URL || "http://localhost:8734"; +const ENDPOINT_PATH = process.env.ENDPOINT_PATH || "/~bundler@1.0/item?codec-device=ans104@1.0"; const DEFAULT_WALLET = "../../hyperbeam-key.json"; const CONCURRENT_UPLOADS = 100; // Number of parallel uploads async function performanceTest(walletPath, itemCount, bytesPerItem = 0) { const wallet = require(path.resolve(walletPath)); const signer = new ArweaveSigner(wallet); - const endpoint = `${BUNDLER_URL}/~bundler@1.0/item?codec-device=ans104@1.0`; + const endpoint = `${BUNDLER_URL}${ENDPOINT_PATH}`; console.log("\n" + "=".repeat(70)); console.log("ANS-104 Bundle Upload Performance Test"); console.log("=".repeat(70)); - console.log(`Target: ${BUNDLER_URL}`); + console.log(`Target: ${endpoint}`); console.log(`Items: ${itemCount}`); console.log(`Item Size: ${bytesPerItem > 0 ? `~${bytesPerItem} bytes` : 'default'}`); console.log(`Concurrent: ${CONCURRENT_UPLOADS}`); @@ -147,10 +148,20 @@ if (require.main === module) { console.error(" number_of_items - Number of data items to create and upload"); console.error(" bytes_per_item - Minimum size of each item in bytes (optional)"); console.error(""); + console.error("Environment variables:"); + console.error(" BUNDLER_URL - Gateway base URL (default: http://localhost:8734)"); + console.error(" ENDPOINT_PATH - Path appended to gateway (default: /~bundler@1.0/item?codec-device=ans104@1.0)"); + console.error(""); console.error("Examples:"); console.error(" node upload-items.js 100"); console.error(" node upload-items.js 100 1024"); console.error(" node upload-items.js /path/to/wallet.json 100 1024"); + console.error(" BUNDLER_URL=https://forward.computer node upload-items.js 100"); + console.error(" BUNDLER_URL=https://forward.computer ENDPOINT_PATH='/~bundler@1.0/tx?codec-device=ans104@1.0' node upload-items.js 1"); + console.error(""); + console.error("Note: when posting raw ANS-104 bytes, ENDPOINT_PATH must include"); + console.error(" ?codec-device=ans104@1.0 — otherwise the server will reject"); + console.error(" the body as 'unsigned-item' (no signers visible)."); process.exit(1); } From 4e2aa69e184dfa265cd57975e5018aaec626e5fb Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 20 May 2026 14:30:38 -0400 Subject: [PATCH 3/7] fix: preserve per-subtree bundle state through message conversions When converting from structured to a target codec, the target codec is provided as a hint to dev_structured and informs how dev_structured loads or unloads children as it recurses through dev_structured:from. This prevents bugs where a message could end up in a load state that does not match its commitments. This change also adds similar functionality to verify/id/commit. --- src/core/resolver/hb_message.erl | 66 +++++++-- src/preloaded/codec/dev_ans104.erl | 64 +++------ src/preloaded/codec/dev_structured.erl | 47 +++++-- src/preloaded/codec/dev_tx.erl | 56 ++++---- src/preloaded/codec/lib_arweave_common.erl | 151 ++++++++++----------- src/preloaded/message/dev_message.erl | 145 +++++++++++++------- 6 files changed, 303 insertions(+), 226 deletions(-) diff --git a/src/core/resolver/hb_message.erl b/src/core/resolver/hb_message.erl index 425eadbfd..eea24fad9 100644 --- a/src/core/resolver/hb_message.erl +++ b/src/core/resolver/hb_message.erl @@ -58,6 +58,7 @@ -module(hb_message). -export([id/1, id/2, id/3]). -export([convert/3, convert/4, uncommitted/1, uncommitted/2, committed/3]). +-export([add_bundle_hint/2, add_bundle_hint/3]). -export([with_only_committers/2, with_only_committers/3, commitment_devices/2]). -export([verify/1, verify/2, verify/3, paranoid_verify/2, paranoid_verify/3]). -export([commit/2, commit/3, signers/2, type/1, minimize/1]). @@ -102,6 +103,7 @@ convert(Msg, TargetFormat, SourceFormat, Opts) -> true -> hb_maps:without([<<"priv">>], Msg, Opts); false -> Msg end, + TargetFormat, SourceFormat, Opts ), @@ -110,8 +112,9 @@ convert(Msg, TargetFormat, SourceFormat, Opts) -> _ -> from_tabm(TABM, TargetFormat, OldPriv, Opts) end. -to_tabm(Msg, SourceFormat, Opts) -> - {SourceCodecMod, Params} = conversion_spec_to_req(SourceFormat, Opts), +to_tabm(Msg, TargetFormat, SourceFormat, Opts) -> + {SourceCodecMod, Params0} = conversion_spec_to_req(SourceFormat, Opts), + Params = add_bundle_hint(Params0, TargetFormat, Opts), % We use _from_ here because the codecs are labelled from the perspective % of their own format. `dev_codec_ans104:from/1' will convert _from_ % an ANS-104 message _into_ a TABM. @@ -121,6 +124,51 @@ to_tabm(Msg, SourceFormat, Opts) -> {ok, OtherTypeRes} -> OtherTypeRes end. +%% @doc Extract the device value from a conversion spec. +conversion_spec_device(Spec, _Default, _Opts) + when is_binary(Spec) orelse (Spec == tabm) -> + Spec; +conversion_spec_device(Spec, Default, Opts) when is_map(Spec) -> + hb_maps:get(<<"device">>, Spec, Default, Opts); +conversion_spec_device(_Spec, Default, _Opts) -> + Default. + +%% @doc Extend a structured->tabm source spec with the `bundle' flag and +%% `hint-device' implied by a hint spec, so the structured codec can decide +%% whether to load or offload children and can call the target codec's +%% `to_hint/3' callback at each node of the tree. +%% +%% `Spec' is the spec being extended (the source spec when converting). +%% `HintSpec' is the spec from which we should infer bundling +%% (target spec when converting). +add_bundle_hint(Spec, Opts) -> + add_bundle_hint(Spec, Spec, Opts). +add_bundle_hint(Spec, HintSpec, Opts) -> + WithBundle = + case maps:is_key(<<"bundle">>, Spec) of + true -> + Spec; + false -> + case + is_map(HintSpec) + andalso hb_maps:find(<<"bundle">>, HintSpec, Opts) + of + {ok, Bundle} -> Spec#{ <<"bundle">> => Bundle }; + _ -> Spec + end + end, + case maps:is_key(<<"hint-device">>, WithBundle) of + true -> + WithBundle; + false -> + case conversion_spec_device(HintSpec, undefined, Opts) of + HintDevice when is_binary(HintDevice) -> + WithBundle#{ <<"hint-device">> => HintDevice }; + _ -> + WithBundle + end + end. + from_tabm(Msg, TargetFormat, OldPriv, Opts) -> {TargetCodecMod, Params} = conversion_spec_to_req(TargetFormat, Opts), % We use the _to_ function here because each of the codecs we may call in @@ -147,17 +195,15 @@ restore_priv(Msg, OldPriv, Opts) -> %% Expects conversion spec to either be a binary codec name, or a map with a %% `device' key and other parameters. Additionally honors the `always_bundle' %% key in the node message if present. -conversion_spec_to_req(Spec, Opts) when is_binary(Spec) or (Spec == tabm) -> +conversion_spec_to_req(Spec, Opts) when is_binary(Spec) orelse (Spec == tabm) -> conversion_spec_to_req(#{ <<"device">> => Spec }, Opts); conversion_spec_to_req(Spec, Opts) -> try - Device = - hb_maps:get( - <<"device">>, - Spec, - no_codec_device_in_conversion_spec, - Opts - ), + Device = conversion_spec_device( + Spec, + no_codec_device_in_conversion_spec, + Opts + ), { case Device of tabm -> tabm; diff --git a/src/preloaded/codec/dev_ans104.erl b/src/preloaded/codec/dev_ans104.erl index 2b4b5facc..5e01ad6b2 100644 --- a/src/preloaded/codec/dev_ans104.erl +++ b/src/preloaded/codec/dev_ans104.erl @@ -2,7 +2,7 @@ %%% records to and from TABMs. -module(dev_ans104). -device_libraries([lib_arweave_common]). --export([to/3, from/3, commit/3, verify/3, content_type/1]). +-export([to/3, to_hint/3, from/3, commit/3, verify/3, content_type/1]). -export([serialize/3, deserialize/3]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -128,6 +128,16 @@ do_from(RawTX, Req, Opts) -> ?event({from, {parsed_message, WithCommitments}}), {ok, WithCommitments}. +%% @doc Inspect a message's signed ans104 commitment and, if it carries an +%% explicit `bundle' field, mirror that value onto the request `Req'. +%% If there is no ans104 commitment, or the commitment does not carry +%% a `bundle' field, the `Req' is returned unchanged. +to_hint(Msg, Req, Opts) -> + case lib_arweave_common:bundle_hint(<<"ans104@1.0">>, Msg, Req, Opts) of + not_found -> {ok, Req}; + Hint -> Hint + end. + %% @doc Internal helper to translate a message to its #tx record representation, %% which can then be used by ar_bundles to serialize the message. We call the %% message's device in order to get the keys that we will be checkpointing. We @@ -144,55 +154,19 @@ to(Binary, _Req, _Opts) when is_binary(Binary) -> } }; to(TX, _Req, _Opts) when is_record(TX, tx) -> {ok, TX}; -to(RawTABM, Req, Opts) when is_map(RawTABM) -> +to(TABM, Req, Opts) when is_map(TABM) -> % Ensure that the TABM is fully loaded if the `bundle` key is set to true. - ?event(ans104_to, {to, {inbound, RawTABM}, {req, Req}}, + ?event(ans104_to, {to, {inbound, TABM}, {req, Req}}, #{debug_print_verify => false}), - MaybeCommitment = hb_message:commitment( - #{ <<"commitment-device">> => <<"ans104@1.0">> }, - RawTABM, + TX = lib_arweave_common:to( + <<"ans104@1.0">>, TABM, Req, + fun lib_arweave_common:fields_to_tx/4, + fun lib_arweave_common:excluded_tags/3, Opts ), - IsBundle = lib_arweave_common:is_bundle(MaybeCommitment, Req, Opts), - MaybeBundle = lib_arweave_common:maybe_load(RawTABM, IsBundle, Opts), - ?event(ans104_to, {to, {maybe_bundle, MaybeBundle}}, - #{debug_print_verify => false}), - - % Calculate and normalize the `data', if applicable. - Data = lib_arweave_common:data( - MaybeBundle, Req, fun dev_ans104:to/3, Opts), - ?event(ans104_to, {to, {calculated_data, Data}}, - #{debug_print_verify => false}), - TX0 = lib_arweave_common:siginfo( - MaybeBundle, MaybeCommitment, - fun lib_arweave_common:fields_to_tx/4, Opts - ), - ?event(ans104_to, {to, {found_siginfo, TX0}}, - #{debug_print_verify => false}), - TX1 = TX0#tx { data = Data }, - % Calculate the tags for the TX. - Tags = lib_arweave_common:tags( - TX1, MaybeCommitment, MaybeBundle, - lib_arweave_common:excluded_tags(TX1, MaybeBundle, Opts), Opts), - ?event(ans104_to, {to, {calculated_tags, Tags}}, - #{debug_print_verify => false}), - TX2 = TX1#tx { tags = Tags }, - Res = - try ar_tx:normalize(TX2) - catch - Type:Error:Stacktrace -> - ?event({ - {reset_ids_error, Error}, - {tx_without_data, {explicit, TX2}}}), - ?event({prepared_tx_before_ids, - {tags, {explicit, TX2#tx.tags}}, - {data, TX2#tx.data} - }), - erlang:raise(Type, Error, Stacktrace) - end, - ?event(ans104_to, {to, {result, Res}}, + ?event(ans104_to, {to, {result, TX}}, #{debug_print_verify => false}), - {ok, Res}; + {ok, TX}; to(Other, _Req, _Opts) -> throw({invalid_tx, Other}). diff --git a/src/preloaded/codec/dev_structured.erl b/src/preloaded/codec/dev_structured.erl index f93339031..1cdd6ad05 100644 --- a/src/preloaded/codec/dev_structured.erl +++ b/src/preloaded/codec/dev_structured.erl @@ -80,20 +80,26 @@ from(List, Req, Opts) when is_list(List) -> {ok, hb_util:numbered_keys_to_list(DecodedAsMap, Opts)} end; from(Msg, Req, Opts) when is_map(Msg) -> - % Normalize the message, offloading links to the cache. - NormLinks = hb_link:normalize(Msg, linkify_mode(Req, Opts), Opts), + HintedReq = apply_bundle_hint(Msg, Req, Opts), + NormLinks = hb_link:normalize(Msg, linkify_mode(HintedReq, Opts), Opts), NormKeysMap = hb_ao:normalize_keys(NormLinks, Opts), - EncodeTypes = find_encode_types(Req, Opts), + EncodeTypes = find_encode_types(HintedReq, Opts), {Types, Values} = lists:foldl( fun (Key, {Types, Values}) -> case hb_maps:find(Key, NormKeysMap, Opts) of {ok, Value} when is_binary(Value) -> {Types, [{Key, Value} | Values]}; - {ok, Nested} when is_map(Nested) or is_list(Nested) -> + {ok, Nested} when is_map(Nested) orelse is_list(Nested) -> ?event({from_recursing, {nested, Nested}}), - {Types, [{Key, hb_util:ok(from(Nested, Req, Opts))} | Values]}; + {Types, + [{ + Key, + hb_util:ok(from(Nested, HintedReq, Opts)) + } | Values]}; {ok, Value} when - is_atom(Value) or is_integer(Value) or is_float(Value) -> + is_atom(Value) + orelse is_integer(Value) + orelse is_float(Value) -> BinKey = hb_ao:normalize_key(Key), ?event({encode_value, Value}), case maybe_encode_value(Value, EncodeTypes) of @@ -136,7 +142,7 @@ from(Msg, Req, Opts) when is_map(Msg) -> % Encode the AoTypes as a structured dictionary % And include as a field on the produced TABM WithTypes = - hb_maps:from_list(case Types of + hb_maps:from_list(case Types of [] -> Values; T -> AoTypes = iolist_to_binary(hb_structured_fields:dictionary( @@ -174,16 +180,35 @@ type(Atom) when is_atom(Atom) -> <<"atom">>; type(List) when is_list(List) -> <<"list">>; type(Other) -> Other. +%% @doc If a `hint-device` key is present it indicates the desired +%% terminal fomat (after being converted via an intermediate `tabm` +%% format). In that case dev_structured defers to the target codec +%% to determine whether child messages should be loaded or unloaded. +apply_bundle_hint(Msg, Req, Opts) -> + case hb_maps:get(<<"hint-device">>, Req, undefined, Opts) of + undefined -> Req; + DeviceBin -> + TargetCodecMod = + hb_device:message_to_device( + #{ <<"device">> => DeviceBin }, + Opts + ), + % May add a `bundle` key to the request + try hb_util:ok(TargetCodecMod:to_hint(Msg, Req, Opts)) + catch _:_ -> Req + end + end. + %% @doc Discern the linkify mode from the request and the options. linkify_mode(Req, Opts) -> case hb_maps:get(<<"bundle">>, Req, not_found, Opts) of - not_found -> hb_opts:get(linkify_mode, offload, Opts); true -> % The request is asking for a bundle, so we should _not_ linkify. false; - false -> - % The request is asking for a flat message, so we should linkify. - true + _ -> + % The request is either asking for a flat message or has not + % specified. In both cases we should linkify. + hb_opts:get(linkify_mode, offload, Opts) end. %% @doc Convert a TABM into a native HyperBEAM message. diff --git a/src/preloaded/codec/dev_tx.erl b/src/preloaded/codec/dev_tx.erl index b11a0c9a3..300fa1bed 100644 --- a/src/preloaded/codec/dev_tx.erl +++ b/src/preloaded/codec/dev_tx.erl @@ -2,7 +2,7 @@ %%% records to and from TABMs. -module(dev_tx). -device_libraries([lib_arweave_common]). --export([from/3, to/3, commit/3, verify/3]). +-export([from/3, to/3, to_hint/3, commit/3, verify/3]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -102,6 +102,20 @@ do_from(RawTX, Req, Opts) -> ?event({from, {parsed_message, hb_util:human_id(TX#tx.id)}}), {ok, WithCommitments}. +%% @doc Inspect a message's signed tx@1.0 commitment and, if the commitment +%% carries an explicit `bundle' field, mirror that value onto the request `Req'. +%% If no matching commitment exists, we might be dealing with a nested +%% message (i.e. tx@1.0 root, with one ore more bundled ans104@1.0 children), +%% so delegate to ans104@1.0. +to_hint(Msg, Req, Opts) -> + case lib_arweave_common:bundle_hint(<<"tx@1.0">>, Msg, Req, Opts) of + not_found -> + case lib_arweave_common:bundle_hint(<<"ans104@1.0">>, Msg, Req, Opts) of + not_found -> {ok, Req}; + Hint -> Hint + end; + Hint -> Hint + end. %% @doc Internal helper to translate a message to its #tx record representation, %% which can then be used by ar_tx to serialize the message. We call the %% message's device in order to get the keys that we will be checkpointing. We @@ -119,39 +133,17 @@ to(Binary, _Req, _Opts) when is_binary(Binary) -> }) }; to(TX, _Req, _Opts) when is_record(TX, tx) -> {ok, TX}; -to(RawTABM, Req, Opts) when is_map(RawTABM) -> - % Ensure that the TABM is fully loaded if the `bundle` key is set to true. - ?event({to, {inbound, RawTABM}, {req, Req}}), - MaybeCommitment = hb_message:commitment( - #{ <<"commitment-device">> => <<"tx@1.0">> }, - RawTABM, +to(TABM, Req, Opts) when is_map(TABM) -> + ?event({to, {inbound, TABM}, {req, Req}}), + TX = lib_arweave_common:to( + <<"tx@1.0">>, TABM, Req, + fun dev_tx_to:fields_to_tx/4, + fun dev_tx_to:excluded_tags/3, Opts ), - IsBundle = lib_arweave_common:is_bundle(MaybeCommitment, Req, Opts), - MaybeBundle = lib_arweave_common:maybe_load(RawTABM, IsBundle, Opts), - ?event({to, {raw_tabm, RawTABM}, {is_bundle, IsBundle}, {maybe_bundle, MaybeBundle}, {req, Req}, {opts, Opts}}), - % Calculate and normalize the `data', if applicable. - Data = - lib_arweave_common:data( - MaybeBundle, Req, fun lib_arweave_common:to/3, Opts), - ?event({calculated_data, Data}), - TX0 = lib_arweave_common:siginfo( - MaybeBundle, MaybeCommitment, - fun dev_tx_to:fields_to_tx/4, Opts), - ?event({found_siginfo, TX0}), - TX1 = TX0#tx { data = Data }, - % Calculate the tags for the TX. - Tags = lib_arweave_common:tags( - TX1, MaybeCommitment, MaybeBundle, - dev_tx_to:excluded_tags(TX1, MaybeBundle, Opts), - Opts), - ?event({calculated_tags, Tags}), - TX2 = TX1#tx { tags = Tags }, - ?event({tx_before_id_gen, TX2}), - FinalTX = ar_tx:normalize(TX2), - enforce_valid_tx(FinalTX), - ?event({to_result, FinalTX}), - {ok, FinalTX}; + enforce_valid_tx(TX), + ?event({to_result, TX}), + {ok, TX}; to(Other, _Req, _Opts) -> throw({invalid_tx, Other}). diff --git a/src/preloaded/codec/lib_arweave_common.erl b/src/preloaded/codec/lib_arweave_common.erl index aa0951aea..e379f8dbb 100644 --- a/src/preloaded/codec/lib_arweave_common.erl +++ b/src/preloaded/codec/lib_arweave_common.erl @@ -1,10 +1,10 @@ %%% @doc Shared Arweave codec helpers. -module(lib_arweave_common). --export([from/3, to/3]). +-export([from/3]). -export([fields/3, tags/2, data/5, committed/6, base/5]). -export([with_commitments/8]). --export([is_bundle/3, maybe_load/3, data/4, tags/5, excluded_tags/3]). --export([siginfo/4, fields_to_tx/4]). +-export([bundle_hint/4, data/3, tags/5, excluded_tags/3]). +-export([to/3, to/6, siginfo/4, fields_to_tx/4]). -export([bundle_header/2, bundle_header/3]). -include("include/hb.hrl"). @@ -34,35 +34,63 @@ from_item(RawTX, Req, Opts) -> ) }. -%% @doc Convert a message into its ANS-104 item form. +%% @doc Recursively encode a nested message as an `ans104@1.0' #tx record. +%% Codecs that bundle nested ans104 data items (e.g. `tx@1.0') recurse +%% through this shared-library entry point rather than calling the +%% `ans104@1.0' device module directly, which is not reachable across +%% device boundaries. to(Binary, _Req, _Opts) when is_binary(Binary) -> {ok, #tx{ tags = [{<<"ao-type">>, <<"binary">>}], data = Binary }}; to(TX, _Req, _Opts) when is_record(TX, tx) -> {ok, TX}; -to(RawTABM, Req, Opts) when is_map(RawTABM) -> +to(TABM, Req, Opts) when is_map(TABM) -> + {ok, + to( + <<"ans104@1.0">>, TABM, Req, + fun ?MODULE:fields_to_tx/4, + fun ?MODULE:excluded_tags/3, + Opts + )}; +to(Other, _Req, _Opts) -> + throw({invalid_tx, Other}). + +%% @doc Prepare a TABM as a #tx record using codec-specific field +%% extraction and tag exclusion rules. Nested messages are always encoded +%% as `ans104@1.0' data items, via to/3. +to(Device, TABM, Req, FieldsFun, ExcludedTagsFun, Opts) -> MaybeCommitment = hb_message:commitment( - #{ <<"commitment-device">> => <<"ans104@1.0">> }, - RawTABM, + #{ <<"commitment-device">> => Device }, + TABM, Opts ), - IsBundle = is_bundle(MaybeCommitment, Req, Opts), - MaybeBundle = maybe_load(RawTABM, IsBundle, Opts), - Data = data(MaybeBundle, Req, fun lib_arweave_common:to/3, Opts), - TX0 = - siginfo( - MaybeBundle, MaybeCommitment, - fun lib_arweave_common:fields_to_tx/4, Opts - ), + Data = data(TABM, Req, Opts), + ?event({calculated_data, Data}), + TX0 = siginfo(TABM, MaybeCommitment, FieldsFun, Opts), + ?event({found_siginfo, TX0}), TX1 = TX0#tx{ data = Data }, - Tags = - tags( - TX1, MaybeCommitment, MaybeBundle, - excluded_tags(TX1, MaybeBundle, Opts), Opts - ), - {ok, ar_tx:normalize(TX1#tx{ tags = Tags })}; -to(Other, _Req, _Opts) -> - throw({invalid_tx, Other}). + Tags = tags( + TX1, + MaybeCommitment, + TABM, + ExcludedTagsFun(TX1, TABM, Opts), + Opts + ), + ?event({calculated_tags, Tags}), + TX = TX1#tx{ tags = Tags }, + ?event({tx_before_id_gen, TX}), + try ar_tx:normalize(TX) + catch + Type:Error:Stacktrace -> + ?event({ + {reset_ids_error, Error}, + {tx_without_data, {explicit, TX}}}), + ?event({prepared_tx_before_ids, + {tags, {explicit, TX#tx.tags}}, + {data, TX#tx.data} + }), + erlang:raise(Type, Error, Stacktrace) + end. %% @doc Return a TABM message containing the fields of the given decoded %% ANS-104 data item that should be included in the base message. @@ -429,65 +457,24 @@ deduplicating_from_list(Tags, Opts) -> %%% Encoding helpers. -is_bundle({ok, _, Commitment}, _Req, Opts) -> - hb_util:atom(hb_ao:get(<<"bundle">>, Commitment, false, Opts)); -is_bundle(_, Req, Opts) -> - case hb_maps:is_key(<<"bundle">>, Req, Opts) of - true -> hb_util:atom(hb_ao:get(<<"bundle">>, Req, false, Opts)); - false -> hb_util:atom(hb_ao:get(<<"bundle">>, Opts, false, Opts)) - end. - -%% @doc Determine if the message should be loaded from the cache and re-converted -%% to the TABM format. We do this if the `bundle' key is set to true. -maybe_load(RawTABM, true, Opts) -> - % Convert back to the fully loaded structured@1.0 message, then - % convert to TABM with bundling enabled. - Structured = hb_message:convert(RawTABM, <<"structured@1.0">>, Opts), - Loaded = hb_cache:ensure_all_loaded(Structured, Opts), - % Convert to TABM with bundling enabled. - LoadedTABM = - hb_message:convert( - Loaded, - tabm, +%% @doc Apply the `bundle' hint from a signed commitment for `Device'. +%% Returns `not_found' when no signed commitment for `Device' exists. +bundle_hint(Device, Msg, Req, Opts) -> + case hb_message:commitment( #{ - <<"device">> => <<"structured@1.0">>, - <<"bundle">> => true + <<"commitment-device">> => Device, + <<"committer">> => '_' }, - Opts - ), - % Ensure the commitments from the original message are the only - % ones in the fully loaded message, recursively for nested maps. - replace_commitments_recursive(LoadedTABM, RawTABM); -maybe_load(RawTABM, false, _Opts) -> - RawTABM. - -%% @doc Recursively replace commitments from RawTABM into LoadedTABM. -replace_commitments_recursive(LoadedTABM, RawTABM) - when is_map(LoadedTABM), is_map(RawTABM) -> - LoadedTABM2 = - case maps:find(<<"commitments">>, RawTABM) of - {ok, RawCommitments} -> - LoadedTABM#{ <<"commitments">> => RawCommitments }; - error -> - maps:remove(<<"commitments">>, LoadedTABM) - end, - maps:map( - fun(<<"commitments">>, Value) -> - Value; - (Key, Value) when is_map(Value) -> - case maps:get(Key, RawTABM, undefined) of - RawValue when is_map(RawValue) -> - replace_commitments_recursive(Value, RawValue); - _ -> - Value + Msg, + Opts) of + {ok, _, Commitment} -> + case hb_util:atom( + hb_maps:get(<<"bundle">>, Commitment, not_found, Opts)) of + not_found -> {ok, Req}; + Value -> {ok, Req#{ <<"bundle">> => Value }} end; - (_Key, Value) -> - Value - end, - LoadedTABM2 - ); -replace_commitments_recursive(LoadedTABM, _RawTABM) -> - LoadedTABM. + _ -> not_found + end. %% @doc Calculate the fields for a message, returning an initial TX record. siginfo(_Message, {ok, _, Commitment}, FieldsFun, Opts) -> @@ -568,13 +555,13 @@ fields_to_tx(TX, Prefix, Map, Opts) -> }. %% @doc Calculate the data field for a message. -data(TABM, Req, ToFun, Opts) -> +data(TABM, Req, Opts) -> DataKey = inline_key(TABM), UnencodedNestedMsgs = data_messages(TABM, Opts), NestedMsgs = hb_maps:map( fun(_, Msg) -> - hb_util:ok(ToFun(Msg, Req, Opts)) + hb_util:ok(to(Msg, Req, Opts)) end, UnencodedNestedMsgs, Opts @@ -587,7 +574,7 @@ data(TABM, Req, ToFun, Opts) -> {?DEFAULT_DATA, _} -> NestedMsgs; {DataVal, _} -> - NestedMsgs#{ DataKey => hb_util:ok(ToFun(DataVal, Req, Opts)) } + NestedMsgs#{ DataKey => hb_util:ok(to(DataVal, Req, Opts)) } end. %% @doc Calculate data messages for large tag values or nested messages. diff --git a/src/preloaded/message/dev_message.erl b/src/preloaded/message/dev_message.erl index 837e974a9..1b664f246 100644 --- a/src/preloaded/message/dev_message.erl +++ b/src/preloaded/message/dev_message.erl @@ -135,38 +135,57 @@ id(RawBase, Req, NodeOpts) -> end. calculate_id(RawBase, Req, NodeOpts) -> - % Find the ID device for the message. - Base = hb_message:convert(RawBase, tabm, NodeOpts), - ?event(debug_id, {calculate_ids, {base, Base}}), - IDMod = - case id_device(Base, NodeOpts) of - {ok, IDDev} -> IDDev; + % Resolve the ID device up-front so we can plumb it as `hint-device' into + % the structured->tabm conversion below. This keeps the children's load + % state consistent with what `commit/3' and `verify/3' would produce. + IDDev = + case id_device(RawBase, NodeOpts) of + {ok, Device} -> Device; {error, Error} -> throw({id, Error}) end, - ?event(debug_id, {generating_id, {idmod, IDMod}, {base, Base}}), - % If the ID device resolves to this device, use the default commitment - % device instead to avoid recursing through `message@1.0/commit'. - CommitDev = - case hb_device:message_to_device(#{ <<"device">> => IDMod }, NodeOpts) of - ?MODULE -> ?DEFAULT_ID_DEVICE; - _ -> IDMod - end, - ?event(debug_id, {called_id_device, CommitDev}, NodeOpts), - {ok, #{ <<"commitments">> := Comms} } = - hb_ao:raw( - CommitDev, - <<"commit">>, - Base, - Req#{ <<"type">> => <<"unsigned">> }, + SourceSpec = + hb_message:add_bundle_hint( + #{ <<"device">> => <<"structured@1.0">> }, + Req#{ <<"device">> => IDDev }, NodeOpts ), - ?event(debug_id, - {generated_id, - {type, unsigned}, - {commitments, maps:keys(Comms)} - } - ), - {ok, hd(maps:keys(Comms))}. + Base = hb_message:convert(RawBase, tabm, SourceSpec, NodeOpts), + ?event(debug_id, {calculate_ids, {base, Base}}), + ?event(debug_id, {generating_id, {id_device, IDDev}, {base, Base}}), + % Get the device module from the message, or use the default if it is not + % set. We can tell if the device is not set (or is the default) by checking + % whether the device module is the same as this module. + DevMod = + case hb_device:message_to_device(#{ <<"device">> => IDDev }, NodeOpts) of + ?MODULE -> + hb_device:message_to_device( + #{ <<"device">> => ?DEFAULT_ID_DEVICE }, + NodeOpts + ); + Module -> Module + end, + % Apply the function's default `commit' function with the appropriate arguments. + % If it doesn't exist, error. + case hb_device:find_exported_function(Base, DevMod, commit, 3, NodeOpts) of + {ok, Fun} -> + ?event(debug_id, {called_id_device, IDDev}, NodeOpts), + {ok, #{ <<"commitments">> := Comms} } = + apply( + Fun, + hb_device:truncate_args( + Fun, + [Base, Req#{ <<"type">> => <<"unsigned">> }, NodeOpts] + ) + ), + ?event(debug_id, + {generated_id, + {type, unsigned}, + {commitments, maps:keys(Comms)} + } + ), + {ok, hd(maps:keys(Comms))}; + not_found -> throw({id, id_resolver_not_found_for_device, DevMod}) + end. %% @doc Locate the ID device of a message. The ID device is determined the %% `device' set in _all_ of the commitments. If no commitments are present, @@ -246,10 +265,32 @@ commit(Self, Req, Opts) -> _ -> Opts#{ <<"linkify-mode">> => offload } end, - % Encode to a TABM + AttMod = + hb_device:message_to_device( + #{ <<"device">> => AttDev }, + CommitOpts + ), + {ok, AttFun} = + hb_device:find_exported_function( + Base, + AttMod, + commit, + 3, + CommitOpts + ), + % Encode to a TABM. The `bundle' flag (when set on the request) is the + % caller's intent for the top-level commit and flows through on the + % source spec; `hint-device' lets the structured codec preserve any + % matching nested commitment's own bundle state per-node. + SourceSpec = + hb_message:add_bundle_hint( + #{ <<"device">> => <<"structured@1.0">> }, + Req#{ <<"device">> => AttDev }, + CommitOpts + ), Loaded = ensure_commitments_loaded( - hb_message:convert(Base, tabm, CommitOpts), + hb_message:convert(Base, tabm, SourceSpec, CommitOpts), Opts ), {ok, Committed} = @@ -269,18 +310,9 @@ commit(Self, Req, Opts) -> verify(Self, Req, Opts) -> % Get the target message of the verification request. {ok, RawBase} = hb_message:find_target(Self, Req, Opts), - Base = - hb_message:convert( - ensure_commitments_loaded( - RawBase, - Opts - ), - tabm, - Opts - ), - ?event(verify, {verify, {base_found, Base}}), - Commitments = maps:get(<<"commitments">>, Base, #{}), - IDsToVerify = commitment_ids_from_request(Base, Req, Opts), + CommitmentBase = ensure_commitments_loaded(RawBase, Opts), + Commitments = maps:get(<<"commitments">>, CommitmentBase, #{}), + IDsToVerify = commitment_ids_from_request(CommitmentBase, Req, Opts), % Generate the new commitment request base messsage by removing the keys % used by this function (path, committers, commitments) and returning the % remaining keys. This message will then be merged with each commitment @@ -300,13 +332,34 @@ verify(Self, Req, Opts) -> Res = lists:all( fun(CommitmentID) -> + Commitment = maps:merge( + ReqBase, + maps:get(CommitmentID, Commitments) + ), + % Build the source spec exactly as commit/3 does: derive a + % `hint-device' from the commitment device so the structured + % codec verifies each subtree in the bundle state it was + % committed in, and mirror any `bundle' from the request. + SourceSpec = + hb_message:add_bundle_hint( + #{ <<"device">> => <<"structured@1.0">> }, + Req#{ + <<"device">> => + maps:get( + <<"commitment-device">>, + Commitment, + undefined + ) + }, + Opts + ), + Base = hb_message:convert( + CommitmentBase, tabm, SourceSpec, Opts), + ?event(verify, {verify, {base_found, Base}}), {ok, Res} = verify_commitment( Base, - maps:merge( - ReqBase, - maps:get(CommitmentID, Commitments) - ), + Commitment, Opts ), ?event(verify, From 47c7c496b2b152bda1e7ae2d220b2e23e59679c5 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 20 May 2026 14:30:57 -0400 Subject: [PATCH 4/7] test: cover per-subtree bundle state across the message@1.0 APIs Add dev_message_bundle_test_vectors: a battery exercising the `bundle'/`hint-device' machinery over a three-level signed message tree. For every 3x3x3 permutation of per-level bundle flags (true / false / none) it checks verify/3, id/3 and convert/4 -- the per-node path always verifies, a forced request bundle is harmless, and a converted tree's load state matches what was requested. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/dev_message_bundle_test_vectors.erl | 175 +++++++++++++++++++++ src/preloaded/arweave/dev_bundler_task.erl | 89 ++++++++++- src/preloaded/codec/dev_ans104.erl | 19 ++- src/preloaded/codec/dev_tx.erl | 20 +-- 4 files changed, 278 insertions(+), 25 deletions(-) create mode 100644 src/dev_message_bundle_test_vectors.erl diff --git a/src/dev_message_bundle_test_vectors.erl b/src/dev_message_bundle_test_vectors.erl new file mode 100644 index 000000000..15a617eed --- /dev/null +++ b/src/dev_message_bundle_test_vectors.erl @@ -0,0 +1,175 @@ +%%% @doc A battery of test vectors exercising the `bundle' / `hint-device' +%%% machinery of the `message@1.0' device across a three-level message tree. +%%% +%%% The tree is built bottom-up; each level is a committed (signed) message +%%% holding the level below it as a sub-message: +%%% +%%%
+%%%     L1 (root) -- l2 --> L2 (middle) -- l3 --> L3 (leaf) -- inner --> #{}
+%%% 
+%%% +%%% Each level is committed with its own `bundle' choice -- `true', `false' +%%% or `none' (committed with no `bundle' flag at all). The flag decides +%%% whether that level's sub-message is held inline (loaded) or as a link +%%% (offloaded) in the level's signed TABM form: +%%% +%%% - L1's flag controls `l2', L2's flag controls `l3', and L3's flag +%%% controls L3's plain sub-map `inner'. +%%% +%%% `none' is observably identical to `false': committing with no flag +%%% offloads children exactly as `false' does. +%%% +%%% For every 3x3x3 permutation of build flags the suite checks: +%%% +%%% - verify/3 with no forced bundle: the reliable path -- every level +%%% verifies in the state it was committed in. +%%% - verify/3 with a forced bundle (`true'|`false'): the edge case -- a +%%% `bundle' on the verify request is harmless. verify builds its +%%% source spec like commit/3 (mirroring the request `bundle' but also +%%% setting `hint-device'), so the per-node hints override the forced +%%% value and the tree still verifies. Tested for completeness. +%%% - id/3: the root's id equals its sole commitment's key. +%%% - convert/4: the tree round-trips through the `ans104@1.0' codec -- +%%% the standard structured<->codec path -- and still verifies at every +%%% level. A `bundle' on the request is per-node-overridden, so the +%%% committed shape survives the round-trip. +-module(dev_message_bundle_test_vectors). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +%% @doc Fresh, isolated options for a single vector: a new wallet and a new +%% in-memory store, so vectors cannot interfere with one another. +fresh_opts() -> + #{ + <<"priv-wallet">> => hb:wallet(), + <<"store">> => hb_test_utils:test_store() + }. + +%% @doc Commit a message with the `ans104@1.0' codec. `Bundle' is `true', +%% `false', or `none' to commit with no `bundle' flag at all. +commit(Msg, none, Opts) -> + hb_message:commit(Msg, Opts, #{ <<"device">> => <<"ans104@1.0">> }); +commit(Msg, Bundle, Opts) -> + hb_message:commit( + Msg, + Opts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => Bundle } + ). + +%% @doc Build a signed three-level tree with the given per-level flags. +build_tree(B1, B2, B3, Opts) -> + L3 = + commit( + #{ + <<"l3-tag">> => <<"l3-value">>, + <<"inner">> => #{ <<"deep">> => <<"deep-value">> } + }, + B3, + Opts + ), + L2 = commit(#{ <<"l2-tag">> => <<"l2-value">>, <<"l3">> => L3 }, B2, Opts), + commit(#{ <<"l1-tag">> => <<"l1-value">>, <<"l2">> => L2 }, B1, Opts). + +%%% Test vector generator. + +%% @doc The {API, RequestBundle} operations run against every tree shape. +operations() -> + [ + {verify, none}, + {verify, true}, + {verify, false}, + {id, none}, + {convert, none}, + {convert, true}, + {convert, false} + ]. + +%% @doc Generate the full grid: 3x3x3 tree shapes x the operation list. +bundle_vectors_test_() -> + {timeout, 240, + [ + { + test_label(B1, B2, B3, Api, ReqBundle), + fun() -> run(B1, B2, B3, Api, ReqBundle) end + } + || + B1 <- [true, false, none], + B2 <- [true, false, none], + B3 <- [true, false, none], + {Api, ReqBundle} <- operations() + ] + }. + +test_label(B1, B2, B3, Api, ReqBundle) -> + lists:flatten( + io_lib:format( + "L1=~p L2=~p L3=~p ~p req-bundle=~p", + [B1, B2, B3, Api, ReqBundle] + ) + ). + +%% @doc Build the tree and exercise the chosen API. +run(B1, B2, B3, Api, ReqBundle) -> + Opts = fresh_opts(), + Tree = build_tree(B1, B2, B3, Opts), + % Every freshly built tree must verify via the reliable per-node path, + % whatever per-level bundle permutation it was signed with. + ?assert(hb_message:verify(Tree, all, Opts)), + exercise(Api, ReqBundle, B1, B2, B3, Tree, Opts). + +%%% Per-API exercises. + +%% `verify': verification always uses the per-node path -- each subtree is +%% checked in the bundle state it was committed in. A `bundle' on the +%% request is mirrored as commit/3 does, but `hint-device' is set too, so +%% the per-node hints override it. A validly-built tree therefore always +%% verifies at every level, with or without a forced request bundle. +exercise(verify, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> + Spec = verify_spec(ReqBundle), + ?assert(hb_message:verify(Tree, Spec, Opts)), + L2 = hb_maps:get(<<"l2">>, Tree, undefined, Opts), + ?assert(hb_message:verify(L2, Spec, Opts)), + L3 = hb_maps:get(<<"l3">>, L2, undefined, Opts), + ?assert(hb_message:verify(L3, Spec, Opts)); + +%% `id': the root was committed exactly once, so `id/3' with `all' +%% committers accumulates to that single commitment -- the id must equal +%% the key under which it is stored in the root's commitments map. +exercise(id, _ReqBundle, _B1, _B2, _B3, Tree, Opts) -> + Id = hb_message:id(Tree, all, Opts), + Commitments = hb_maps:get(<<"commitments">>, Tree, #{}, Opts), + ?assertEqual([Id], maps:keys(Commitments)); + +%% `convert': round-trip the tree through the `ans104@1.0' codec -- the +%% standard structured<->codec path. Each subtree converts in the state its +%% own commitment dictates (per-node), so a `bundle' flag on the request is +%% overridden and the committed shape is preserved. The round-tripped tree +%% must therefore still verify at every level. +exercise(convert, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> + Encoded = hb_message:convert(Tree, convert_target(ReqBundle), Opts), + Restored = + hb_message:convert( + Encoded, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ), + ?assert(hb_message:verify(Restored, all, Opts)), + L2 = hb_maps:get(<<"l2">>, Restored, undefined, Opts), + ?assert(hb_message:verify(L2, all, Opts)), + L3 = hb_maps:get(<<"l3">>, L2, undefined, Opts), + ?assert(hb_message:verify(L3, all, Opts)). + +%% @doc The verify spec for a request-bundle value: `all' committers, plus +%% the forced `bundle' flag when one is given. +verify_spec(none) -> + all; +verify_spec(ReqBundle) -> + #{ <<"committers">> => <<"all">>, <<"bundle">> => ReqBundle }. + +%% @doc The convert target for a request-bundle value: the bare `ans104@1.0' +%% codec, plus a forced `bundle' flag when one is given. +convert_target(none) -> + <<"ans104@1.0">>; +convert_target(ReqBundle) -> + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => ReqBundle }. diff --git a/src/preloaded/arweave/dev_bundler_task.erl b/src/preloaded/arweave/dev_bundler_task.erl index 7a70905b9..f6a27a472 100644 --- a/src/preloaded/arweave/dev_bundler_task.erl +++ b/src/preloaded/arweave/dev_bundler_task.erl @@ -283,14 +283,14 @@ build_signed_tx_on_arbundles_js_test() -> ?assert(ar_bundles:verify_item(BundledItem)), % Convert both dataitems to structured messages ItemStructured = hb_message:convert(Item, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, + <<"structured@1.0">>, + <<"ans104@1.0">>, TestOpts), ?event(debug_test, {item_structured, ItemStructured}), ?assert(hb_message:verify(ItemStructured, all, TestOpts)), BundledItemStructured = hb_message:convert(BundledItem, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, + <<"structured@1.0">>, + <<"ans104@1.0">>, TestOpts), ?event(debug_test, {bundled_item_structured, BundledItemStructured}), ?assert(hb_message:verify(BundledItemStructured, all, TestOpts)), @@ -303,15 +303,15 @@ build_signed_tx_on_arbundles_js_test() -> ?assert(ar_tx:verify(SignedTX)), % Convert the signed TX to a structured message StructuredTX = hb_message:convert(SignedTX, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + <<"structured@1.0">>, + <<"tx@1.0">>, TestOpts), % ?event(debug_test, {structured_tx, StructuredTX}), ?assert(hb_message:verify(StructuredTX, all, TestOpts)), % Convert back to an L1 TX SignedTXRoundtrip = hb_message:convert(StructuredTX, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + <<"tx@1.0">>, + #{ <<"device">> => <<"structured@1.0">>, <<"hint-device">> => <<"tx@1.0">> }, TestOpts), ?event(debug_test, {signed_tx_roundtrip, SignedTXRoundtrip}), ?assert(ar_tx:verify(SignedTXRoundtrip)), @@ -408,6 +408,79 @@ bundle_convert_minimal_test() -> hb_mock_server:stop(ServerHandle) end. +%% @doc Drive a nested tree of items signed in mixed bundle states through +%% the bundler flow: each child is signed with bundle=true OR bundle=false, +%% then we build the bundle TX, sign it, convert through structured@1.0 and +%% back to tx@1.0, and assert nothing was inflated and every commitment +%% still verifies. This exercises the full `hint-device' plumbing across a +%% mixed tree, mirroring the production scenario that motivated the fix. +bundle_convert_mixed_tree_verify_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = hb_mock_server:start_arweave_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + TestOpts = NodeOpts#{ + <<"priv-wallet">> => ar_wallet:new(), + <<"store">> => hb_test_utils:test_store() + }, + try + %% Build three items. The first carries a child signed bundle=false, + %% the second a child signed bundle=true, the third has no nested + %% child at all. The L1 bundle TX therefore contains items that + %% would individually each round-trip with a different bundle state. + InnerFalse = hb_message:commit( + #{ <<"leaf-tag">> => <<"leaf-false">>, + <<"leaf-list">> => [1, 2, 3] }, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => false }), + ?assert(hb_message:verify(InnerFalse, all, TestOpts)), + InnerTrue = hb_message:commit( + #{ <<"leaf-tag">> => <<"leaf-true">>, + <<"leaf-list">> => [4, 5, 6] }, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }), + ?assert(hb_message:verify(InnerTrue, all, TestOpts)), + ItemA = hb_message:commit( + #{ <<"item-tag">> => <<"a">>, <<"inner">> => InnerFalse }, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }), + ?assert(hb_message:verify(ItemA, all, TestOpts)), + ItemB = hb_message:commit( + #{ <<"item-tag">> => <<"b">>, <<"inner">> => InnerTrue }, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => false }), + ?assert(hb_message:verify(ItemB, all, TestOpts)), + ItemC = hb_message:commit( + #{ <<"item-tag">> => <<"c">> }, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => false }), + ?assert(hb_message:verify(ItemC, all, TestOpts)), + {ok, SignedTX} = build_signed_tx([ItemA, ItemB, ItemC], TestOpts), + ?assert(ar_tx:verify(SignedTX)), + Committed = hb_message:convert( + SignedTX, <<"structured@1.0">>, <<"tx@1.0">>, TestOpts), + ?event(debug_test, {committed, {explicit, Committed}}), + ?assert(hb_message:verify(Committed, all, TestOpts)), + %% Convert back to TX (same path build_proofs uses) and check that + %% the data did not inflate. + TX = hb_message:convert( + Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), + ?assert(ar_tx:verify(TX)), + SignedSize = byte_size(SignedTX#tx.data), + RecoveredSize = byte_size(TX#tx.data), + Delta = RecoveredSize - SignedSize, + ?assertEqual(0, Delta, { + inflation_detected_on_mixed_tree, + #{signed_size => SignedSize, + recovered_size => RecoveredSize, + delta_bytes => Delta} + }) + after + hb_mock_server:stop(ServerHandle) + end. + %% Hardcoded item, structurally identical to one observed in a broken %% production bundle (TXID -BTiilFCWd2kB3oOdCpPDJLGXhjeNxIeMH3kerPXKCM). %% AO "Assignment" message with `body`, two commitments (HMAC + RSA-PSS), diff --git a/src/preloaded/codec/dev_ans104.erl b/src/preloaded/codec/dev_ans104.erl index 5e01ad6b2..d393df915 100644 --- a/src/preloaded/codec/dev_ans104.erl +++ b/src/preloaded/codec/dev_ans104.erl @@ -814,17 +814,18 @@ test_bundle_commitment(Commit, Encode, Decode) -> hb_util:atom(hb_ao:get(<<"bundle">>, CommittedCommitment, false, Opts)), Label), - Encoded = hb_message:convert(Committed, + Encoded = hb_message:convert(Committed, #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => ToBool(Encode) }, - <<"structured@1.0">>, Opts), + <<"structured@1.0">>, + Opts), ?event(debug_test, {encoded, Label, {explicit, Encoded}}), ?assert(ar_bundles:verify_item(Encoded), Label), %% IF the input message is unbundled, #tx.data should be empty. ?assertEqual(ToBool(Commit), Encoded#tx.data /= <<>>, Label), - Decoded = hb_message:convert(Encoded, + Decoded = hb_message:convert(Encoded, #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => ToBool(Decode) }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => ToBool(Encode) }, + <<"ans104@1.0">>, Opts), ?event(debug_test, {decoded, Label, {explicit, Decoded}}), ?assert(hb_message:verify(Decoded, all, Opts), Label), @@ -857,16 +858,17 @@ test_bundle_uncommitted(Encode, Decode) -> ToBool = fun(unbundled) -> false; (bundled) -> true end, Label = lists:flatten(io_lib:format("~p -> ~p", [Encode, Decode])), - Encoded = hb_message:convert(Structured, + Encoded = hb_message:convert(Structured, #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => ToBool(Encode) }, - <<"structured@1.0">>, Opts), + <<"structured@1.0">>, + Opts), ?event(debug_test, {encoded, Label, {explicit, Encoded}}), %% IF the input message is unbundled, #tx.data should be empty. ?assertEqual(ToBool(Encode), Encoded#tx.data /= <<>>, Label), - Decoded = hb_message:convert(Encoded, + Decoded = hb_message:convert(Encoded, #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => ToBool(Decode) }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => ToBool(Encode) }, + <<"ans104@1.0">>, Opts), ?event(debug_test, {decoded, Label, {explicit, Decoded}}), case Encode of @@ -876,3 +878,4 @@ test_bundle_uncommitted(Encode, Decode) -> ?assertEqual([1, 2, 3], maps:get(<<"list">>, Decoded, Opts), Label) end, ok. + diff --git a/src/preloaded/codec/dev_tx.erl b/src/preloaded/codec/dev_tx.erl index 300fa1bed..1e582ec9b 100644 --- a/src/preloaded/codec/dev_tx.erl +++ b/src/preloaded/codec/dev_tx.erl @@ -1444,17 +1444,18 @@ test_bundle_commitment(Commit, Encode, Decode) -> hb_util:atom(hb_ao:get(<<"bundle">>, CommittedCommitment, false, Opts)), Label), - Encoded = hb_message:convert(Committed, + Encoded = hb_message:convert(Committed, #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => ToBool(Encode) }, - <<"structured@1.0">>, Opts), + <<"structured@1.0">>, + Opts), ?event(debug_test, {encoded, Label, {explicit, Encoded}}), ?assert(ar_tx:verify(Encoded), Label), %% IF the input message is unbundled, #tx.data should be empty. ?assertEqual(ToBool(Commit), Encoded#tx.data /= <<>>, Label), - Decoded = hb_message:convert(Encoded, + Decoded = hb_message:convert(Encoded, #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => ToBool(Decode) }, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => ToBool(Encode) }, + <<"tx@1.0">>, Opts), ?event(debug_test, {decoded, Label, {explicit, Decoded}}), ?assert(hb_message:verify(Decoded, all, Opts), Label), @@ -1486,16 +1487,17 @@ test_bundle_uncommitted(Encode, Decode) -> ToBool = fun(unbundled) -> false; (bundled) -> true end, Label = lists:flatten(io_lib:format("~p -> ~p", [Encode, Decode])), - Encoded = hb_message:convert(Structured, + Encoded = hb_message:convert(Structured, #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => ToBool(Encode) }, - <<"structured@1.0">>, Opts), + <<"structured@1.0">>, + Opts), ?event(debug_test, {encoded, Label, {explicit, Encoded}}), - %% IF the input message is unbundled, #tx.data should be empty. + %% If the input message is unbundled, #tx.data should be empty. ?assertEqual(ToBool(Encode), Encoded#tx.data /= <<>>, Label), - Decoded = hb_message:convert(Encoded, + Decoded = hb_message:convert(Encoded, #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => ToBool(Decode) }, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => ToBool(Encode) }, + <<"tx@1.0">>, Opts), ?event(debug_test, {decoded, Label, {explicit, Decoded}}), case Encode of From 698e1ba5e1a12f9ffd5674ae90a945691e4d06a4 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Wed, 20 May 2026 22:56:24 -0400 Subject: [PATCH 5/7] fix: repair bundle hint device tests --- src/preloaded/codec/dev_structured.erl | 12 +++++------- src/preloaded/codec/dev_tx.erl | 6 +----- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/preloaded/codec/dev_structured.erl b/src/preloaded/codec/dev_structured.erl index 1cdd6ad05..f04040955 100644 --- a/src/preloaded/codec/dev_structured.erl +++ b/src/preloaded/codec/dev_structured.erl @@ -188,14 +188,12 @@ apply_bundle_hint(Msg, Req, Opts) -> case hb_maps:get(<<"hint-device">>, Req, undefined, Opts) of undefined -> Req; DeviceBin -> - TargetCodecMod = - hb_device:message_to_device( - #{ <<"device">> => DeviceBin }, - Opts - ), % May add a `bundle` key to the request - try hb_util:ok(TargetCodecMod:to_hint(Msg, Req, Opts)) - catch _:_ -> Req + try hb_util:ok( + hb_ao:raw(DeviceBin, <<"to-hint">>, Msg, Req, Opts) + ) + catch _:_ -> + Req end end. diff --git a/src/preloaded/codec/dev_tx.erl b/src/preloaded/codec/dev_tx.erl index 1e582ec9b..8d7392205 100644 --- a/src/preloaded/codec/dev_tx.erl +++ b/src/preloaded/codec/dev_tx.erl @@ -109,11 +109,7 @@ do_from(RawTX, Req, Opts) -> %% so delegate to ans104@1.0. to_hint(Msg, Req, Opts) -> case lib_arweave_common:bundle_hint(<<"tx@1.0">>, Msg, Req, Opts) of - not_found -> - case lib_arweave_common:bundle_hint(<<"ans104@1.0">>, Msg, Req, Opts) of - not_found -> {ok, Req}; - Hint -> Hint - end; + not_found -> hb_ao:raw(<<"ans104@1.0">>, <<"to-hint">>, Msg, Req, Opts); Hint -> Hint end. %% @doc Internal helper to translate a message to its #tx record representation, From 8053889a095cca83eb33e083f2c8b6ad0e6d9ff9 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 21 May 2026 20:44:45 -0400 Subject: [PATCH 6/7] fix: mostly limit bundle flag to root message --- src/preloaded/arweave/dev_bundler.erl | 137 ++++++++++++++++++++++++- src/preloaded/codec/dev_structured.erl | 8 +- src/preloaded/message/dev_message.erl | 109 ++++++++------------ 3 files changed, 181 insertions(+), 73 deletions(-) diff --git a/src/preloaded/arweave/dev_bundler.erl b/src/preloaded/arweave/dev_bundler.erl index 98112fdd9..25eae2bf1 100644 --- a/src/preloaded/arweave/dev_bundler.erl +++ b/src/preloaded/arweave/dev_bundler.erl @@ -659,6 +659,125 @@ nested_bundle_test_parallel() -> stop_test_servers(ServerHandle, NodeOpts) end. +%% @doc End-to-end bundler test for a nested item structured like the +%% broken production bundle that motivated the per-subtree `bundle' fix: +%% the parent is signed with `ans104@1.0' and `bundle' => false (so its +%% child is offloaded as a link in the committed form) and the child is +%% signed with `httpsig@1.0'. The child is posted on its own first, so it +%% is also a top-level bundle item and the parent's offloaded link resolves +%% to a separately-uploaded item. +nested_inlined_bundle_child_posted_test_parallel() -> + run_nested_inlined_bundle_test(child_posted). + +%% @doc As `nested_inlined_bundle_child_posted_test_parallel/0', but the +%% child is never posted on its own -- an unrelated plain data item fills +%% its bundle slot instead. The child therefore reaches the cache only via +%% the parent's inline body, exercising the offload path without a +%% separately-uploaded link target. +nested_inlined_bundle_child_not_posted_test_parallel() -> + run_nested_inlined_bundle_test(child_not_posted). + +%% @doc Shared body for the nested-inlined-bundle variants. Builds the +%% httpsig child and its `bundle' => false ans104 parent, posts three items +%% (the first chosen by `Variant'), then asserts the resulting bundle TX +%% verifies, carries three valid items, and round-trips through +%% `structured@1.0' without inflating. +run_nested_inlined_bundle_test(Variant) -> + Anchor = rand:bytes(32), + Price = 12345, + % NodeOpts redirects arweave gateway requests to the mock server. + {ServerHandle, NodeOpts} = hb_mock_server:start_arweave_gateway( + #{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + } + ), + try + ClientOpts = #{ <<"priv-wallet">> => ar_wallet:new() }, + NodeOpts2 = maps:merge(NodeOpts, #{ <<"bundler-max-items">> => 3 }), + Node = hb_http_server:start_node(NodeOpts2#{ + <<"priv-wallet">> => ar_wallet:new(), + <<"store">> => hb_test_utils:test_store() + }), + %% Child: an `httpsig@1.0'-signed message + Child = hb_message:commit( + #{ + <<"event">> => <<"is_admissible">>, + <<"reference">> => <<"ref-value">>, + <<"status-class">> => <<"success">> + }, + ClientOpts, + #{ <<"device">> => <<"httpsig@1.0">> } + ), + ?assert(hb_message:verify(Child, all, ClientOpts)), + %% Parent: signed with `ans104@1.0' and `bundle' => false, so the + %% child is offloaded as a link in the parent's committed form. + Parent = hb_message:commit( + #{ + <<"data-protocol">> => <<"ao">>, + <<"type">> => <<"Assignment">>, + <<"body">> => Child + }, + ClientOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => false } + ), + ?assert(hb_message:verify(Parent, all, ClientOpts)), + %% Post the first bundle slot (per `Variant'), then the nested + %% parent, then a plain data item. + ?assertMatch({ok, _}, post_first_item(Node, Variant, Child, ClientOpts)), + ?assertMatch({ok, _}, post_structured_item(Node, Parent, ClientOpts)), + ?assertMatch({ok, _}, + post_data_item(Node, new_data_item(2, 10), ClientOpts)), + %% The three items bundle into a single transaction. + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + Proofs = hb_mock_server:get_requests(chunk, 1, ServerHandle), + ?assert(length(Proofs) >= 1), + %% Reconstitute the bundle TX and verify it carries three valid items. + TX = reconstitute_tx(hd(TXs), Proofs), + ?event(debug_test, {tx, TX}), + ?assert(ar_tx:verify(TX)), + ?assertEqual(Anchor, TX#tx.anchor), + ?assertEqual(Price, TX#tx.reward), + Bundle = ar_bundles:deserialize(TX), + ?assertEqual(3, maps:size(Bundle#tx.data)), + %% Each bundled item must still verify once decoded back to + %% `structured@1.0'. + maps:foreach( + fun(_Key, BundledItem) -> + Structured = hb_message:convert( + BundledItem, + <<"structured@1.0">>, + <<"ans104@1.0">>, + ClientOpts + ), + ?assert(hb_message:verify(Structured, all, ClientOpts)) + end, + Bundle#tx.data + ), + %% The bundle TX must convert to `structured@1.0' and verify, then + %% round-trip back to `tx@1.0' without inflating its data. + TXStructured = hb_message:convert( + TX, <<"structured@1.0">>, <<"tx@1.0">>, ClientOpts), + ?assert(hb_message:verify(TXStructured, all, ClientOpts)), + TXRoundtrip = hb_message:convert( + TXStructured, <<"tx@1.0">>, <<"structured@1.0">>, ClientOpts), + ?assertEqual(byte_size(TX#tx.data), byte_size(TXRoundtrip#tx.data)), + ?assert(ar_tx:verify(TXRoundtrip)), + ok + after + %% Always cleanup, even if test fails + stop_test_servers(ServerHandle, NodeOpts) + end. + +%% @doc Post the first of the three bundled items. The `child_posted' +%% variant uploads the httpsig child on its own; `child_not_posted' fills +%% the slot with an unrelated plain data item. +post_first_item(Node, child_posted, Child, ClientOpts) -> + post_structured_item(Node, Child, ClientOpts); +post_first_item(Node, child_not_posted, _Child, ClientOpts) -> + post_data_item(Node, new_data_item(1, 10), ClientOpts). + price_error_test_parallel() -> test_api_error(#{ price => {500, <<"error">>}, @@ -1522,6 +1641,10 @@ post_data_item(Node, Item, Opts) -> <<"ans104@1.0">>, Opts ), + post_structured_item(Node, StructuredItem, Opts). + +%% @doc Post an already-`structured@1.0' message to the bundler endpoint. +post_structured_item(Node, StructuredItem, Opts) -> hb_http:post( Node, #{ @@ -1532,8 +1655,11 @@ post_data_item(Node, Item, Opts) -> Opts ). -assert_bundle(Node, ExpectedItems, Anchor, Price, TXRequest, Proofs, ClientOpts) -> - %% Reconstitute the transaction with its data from the POSTed payloads. +%% @doc Reconstitute a bundle transaction from a captured `tx' request and +%% its `chunk' proof requests: decode the header, validate every chunk's +%% merkle path, then concatenate the chunks in offset order to recover the +%% transaction data. +reconstitute_tx(TXRequest, Proofs) -> TXBinary = maps:get(<<"body">>, TXRequest), TXJSON = hb_json:decode(TXBinary), TXHeader = ar_tx:json_struct_to_tx(TXJSON), @@ -1558,8 +1684,11 @@ assert_bundle(Node, ExpectedItems, Anchor, Price, TXRequest, Proofs, ClientOpts) ), SortedChunks = lists:sort(fun({O1, _}, {O2, _}) -> O1 =< O2 end, ChunksWithOffsets), Chunks = [Chunk || {_Offset, Chunk} <- SortedChunks], - DataBinary = iolist_to_binary(Chunks), - TX = TXHeader#tx{ data = DataBinary }, + TXHeader#tx{ data = iolist_to_binary(Chunks) }. + +assert_bundle(Node, ExpectedItems, Anchor, Price, TXRequest, Proofs, ClientOpts) -> + %% Reconstitute the transaction with its data from the POSTed payloads. + TX = reconstitute_tx(TXRequest, Proofs), ?event(debug_test, {tx, TX}), ?assert(ar_tx:verify(TX)), ?assertEqual(Anchor, TX#tx.anchor), diff --git a/src/preloaded/codec/dev_structured.erl b/src/preloaded/codec/dev_structured.erl index f04040955..38ffa1bb0 100644 --- a/src/preloaded/codec/dev_structured.erl +++ b/src/preloaded/codec/dev_structured.erl @@ -91,10 +91,16 @@ from(Msg, Req, Opts) when is_map(Msg) -> {Types, [{Key, Value} | Values]}; {ok, Nested} when is_map(Nested) orelse is_list(Nested) -> ?event({from_recursing, {nested, Nested}}), + % Strip out the `bundle' flag on reqursive calls - bundle + % status will be redetermined by the hint device for each + % message. {Types, [{ Key, - hb_util:ok(from(Nested, HintedReq, Opts)) + hb_util:ok(from( + Nested, + hb_maps:without([<<"bundle">>], Req, Opts), + Opts)) } | Values]}; {ok, Value} when is_atom(Value) diff --git a/src/preloaded/message/dev_message.erl b/src/preloaded/message/dev_message.erl index 1b664f246..88f00a993 100644 --- a/src/preloaded/message/dev_message.erl +++ b/src/preloaded/message/dev_message.erl @@ -143,10 +143,14 @@ calculate_id(RawBase, Req, NodeOpts) -> {ok, Device} -> Device; {error, Error} -> throw({id, Error}) end, + % Encode to a TABM. The `bundle' flag (when set on the request) is the + % caller's intent for the top-level commit. We don't specify a + % `hint-device' - as we're building a commitment there is no + % existing commitment that we want to pull a `bundle' flag from. SourceSpec = hb_message:add_bundle_hint( #{ <<"device">> => <<"structured@1.0">> }, - Req#{ <<"device">> => IDDev }, + Req, NodeOpts ), Base = hb_message:convert(RawBase, tabm, SourceSpec, NodeOpts), @@ -164,28 +168,22 @@ calculate_id(RawBase, Req, NodeOpts) -> ); Module -> Module end, - % Apply the function's default `commit' function with the appropriate arguments. - % If it doesn't exist, error. - case hb_device:find_exported_function(Base, DevMod, commit, 3, NodeOpts) of - {ok, Fun} -> - ?event(debug_id, {called_id_device, IDDev}, NodeOpts), - {ok, #{ <<"commitments">> := Comms} } = - apply( - Fun, - hb_device:truncate_args( - Fun, - [Base, Req#{ <<"type">> => <<"unsigned">> }, NodeOpts] - ) - ), - ?event(debug_id, - {generated_id, - {type, unsigned}, - {commitments, maps:keys(Comms)} - } - ), - {ok, hd(maps:keys(Comms))}; - not_found -> throw({id, id_resolver_not_found_for_device, DevMod}) - end. + ?event(debug_id, {called_id_device, CommitDev}, NodeOpts), + {ok, #{ <<"commitments">> := Comms} } = + hb_ao:raw( + CommitDev, + <<"commit">>, + Base, + Req#{ <<"type">> => <<"unsigned">> }, + NodeOpts + ), + ?event(debug_id, + {generated_id, + {type, unsigned}, + {commitments, maps:keys(Comms)} + } + ), + {ok, hd(maps:keys(Comms))}. %% @doc Locate the ID device of a message. The ID device is determined the %% `device' set in _all_ of the commitments. If no commitments are present, @@ -265,27 +263,14 @@ commit(Self, Req, Opts) -> _ -> Opts#{ <<"linkify-mode">> => offload } end, - AttMod = - hb_device:message_to_device( - #{ <<"device">> => AttDev }, - CommitOpts - ), - {ok, AttFun} = - hb_device:find_exported_function( - Base, - AttMod, - commit, - 3, - CommitOpts - ), % Encode to a TABM. The `bundle' flag (when set on the request) is the - % caller's intent for the top-level commit and flows through on the - % source spec; `hint-device' lets the structured codec preserve any - % matching nested commitment's own bundle state per-node. + % caller's intent for the top-level commit. We don't specify a + % `hint-device' - as we're building a commitment there is no + % existing commitment that we want to pull a `bundle' flag from. SourceSpec = hb_message:add_bundle_hint( #{ <<"device">> => <<"structured@1.0">> }, - Req#{ <<"device">> => AttDev }, + Req, CommitOpts ), Loaded = @@ -310,9 +295,18 @@ commit(Self, Req, Opts) -> verify(Self, Req, Opts) -> % Get the target message of the verification request. {ok, RawBase} = hb_message:find_target(Self, Req, Opts), - CommitmentBase = ensure_commitments_loaded(RawBase, Opts), - Commitments = maps:get(<<"commitments">>, CommitmentBase, #{}), - IDsToVerify = commitment_ids_from_request(CommitmentBase, Req, Opts), + Base = + hb_message:convert( + ensure_commitments_loaded( + RawBase, + Opts + ), + tabm, + Opts + ), + ?event(verify, {verify, {base_found, Base}}), + Commitments = maps:get(<<"commitments">>, Base, #{}), + IDsToVerify = commitment_ids_from_request(Base, Req, Opts), % Generate the new commitment request base messsage by removing the keys % used by this function (path, committers, commitments) and returning the % remaining keys. This message will then be merged with each commitment @@ -332,34 +326,13 @@ verify(Self, Req, Opts) -> Res = lists:all( fun(CommitmentID) -> - Commitment = maps:merge( - ReqBase, - maps:get(CommitmentID, Commitments) - ), - % Build the source spec exactly as commit/3 does: derive a - % `hint-device' from the commitment device so the structured - % codec verifies each subtree in the bundle state it was - % committed in, and mirror any `bundle' from the request. - SourceSpec = - hb_message:add_bundle_hint( - #{ <<"device">> => <<"structured@1.0">> }, - Req#{ - <<"device">> => - maps:get( - <<"commitment-device">>, - Commitment, - undefined - ) - }, - Opts - ), - Base = hb_message:convert( - CommitmentBase, tabm, SourceSpec, Opts), - ?event(verify, {verify, {base_found, Base}}), {ok, Res} = verify_commitment( Base, - Commitment, + maps:merge( + ReqBase, + maps:get(CommitmentID, Commitments) + ), Opts ), ?event(verify, From 751e14829e9a062ed9648a37745f499e6069ddc0 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 22 May 2026 13:40:50 -0400 Subject: [PATCH 7/7] fix: clean up bundle flag handling --- src/dev_message_bundle_test_vectors.erl | 150 +++++++++++---------- src/preloaded/arweave/dev_bundler.erl | 38 ++---- src/preloaded/arweave/dev_bundler_task.erl | 47 +------ src/preloaded/codec/dev_ans104.erl | 2 - src/preloaded/codec/dev_structured.erl | 14 +- src/preloaded/codec/dev_tx.erl | 3 - src/preloaded/codec/lib_arweave_common.erl | 7 - src/preloaded/message/dev_message.erl | 82 ++++++----- 8 files changed, 144 insertions(+), 199 deletions(-) diff --git a/src/dev_message_bundle_test_vectors.erl b/src/dev_message_bundle_test_vectors.erl index 15a617eed..30f7710a9 100644 --- a/src/dev_message_bundle_test_vectors.erl +++ b/src/dev_message_bundle_test_vectors.erl @@ -1,65 +1,65 @@ %%% @doc A battery of test vectors exercising the `bundle' / `hint-device' %%% machinery of the `message@1.0' device across a three-level message tree. %%% -%%% The tree is built bottom-up; each level is a committed (signed) message -%%% holding the level below it as a sub-message: +%%% The tree is built bottom-up; each level holds the level below it as a +%%% sub-message: %%% %%%
-%%%     L1 (root) -- l2 --> L2 (middle) -- l3 --> L3 (leaf) -- inner --> #{}
+%%%     L1 (root) --> L2 (middle) --> L3 (leaf) --> #{}
 %%% 
%%% -%%% Each level is committed with its own `bundle' choice -- `true', `false' -%%% or `none' (committed with no `bundle' flag at all). The flag decides -%%% whether that level's sub-message is held inline (loaded) or as a link -%%% (offloaded) in the level's signed TABM form: +%%% Each level is built with one of four choices: %%% -%%% - L1's flag controls `l2', L2's flag controls `l3', and L3's flag -%%% controls L3's plain sub-map `inner'. +%%% - `bundle_true' -- committed (`ans104@1.0') with `bundle' => true +%%% - `bundle_false' -- committed with `bundle' => false +%%% - `no_bundle' -- committed with no `bundle' flag +%%% - `uncommitted' -- not committed at all; a plain, unsigned map +%%% +%%% For every 4x4x4 permutation of build choices the suite checks: %%% -%%% `none' is observably identical to `false': committing with no flag -%%% offloads children exactly as `false' does. -%%% -%%% For every 3x3x3 permutation of build flags the suite checks: -%%% -%%% - verify/3 with no forced bundle: the reliable path -- every level -%%% verifies in the state it was committed in. -%%% - verify/3 with a forced bundle (`true'|`false'): the edge case -- a -%%% `bundle' on the verify request is harmless. verify builds its -%%% source spec like commit/3 (mirroring the request `bundle' but also -%%% setting `hint-device'), so the per-node hints override the forced -%%% value and the tree still verifies. Tested for completeness. -%%% - id/3: the root's id equals its sole commitment's key. -%%% - convert/4: the tree round-trips through the `ans104@1.0' codec -- -%%% the standard structured<->codec path -- and still verifies at every -%%% level. A `bundle' on the request is per-node-overridden, so the +%%% - verify/3: every level verifies in the state it was committed in. +%%% - id/3: the root's id equals its sole commitment's key (or, for an +%%% uncommitted root, the content-addressed unsigned id). +%%% - convert/4 with target `bundle' none/true/false: the tree +%%% round-trips through the `ans104@1.0' codec -- the standard +%%% structured<->codec path -- and still verifies at every level. A +%%% `bundle' on the conversion target applies only to the root; nested +%%% subtrees follow their own commitments via `hint-device', so the %%% committed shape survives the round-trip. -module(dev_message_bundle_test_vectors). -include_lib("eunit/include/eunit.hrl"). -include("include/hb.hrl"). -%% @doc Fresh, isolated options for a single vector: a new wallet and a new -%% in-memory store, so vectors cannot interfere with one another. fresh_opts() -> #{ <<"priv-wallet">> => hb:wallet(), <<"store">> => hb_test_utils:test_store() }. -%% @doc Commit a message with the `ans104@1.0' codec. `Bundle' is `true', -%% `false', or `none' to commit with no `bundle' flag at all. -commit(Msg, none, Opts) -> +%% @doc Build one tree level from its build choice: commit the message with +%% the `ans104@1.0' codec (with `bundle' => true, `bundle' => false, or no +%% `bundle' flag), or -- for `uncommitted' -- leave it as a plain map. +build_level(Msg, uncommitted, _Opts) -> + Msg; +build_level(Msg, no_bundle, Opts) -> hb_message:commit(Msg, Opts, #{ <<"device">> => <<"ans104@1.0">> }); -commit(Msg, Bundle, Opts) -> +build_level(Msg, bundle_true, Opts) -> + hb_message:commit( + Msg, + Opts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true } + ); +build_level(Msg, bundle_false, Opts) -> hb_message:commit( Msg, Opts, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => Bundle } + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => false } ). -%% @doc Build a signed three-level tree with the given per-level flags. +%% @doc Build a three-level tree with the given per-level build choices. build_tree(B1, B2, B3, Opts) -> L3 = - commit( + build_level( #{ <<"l3-tag">> => <<"l3-value">>, <<"inner">> => #{ <<"deep">> => <<"deep-value">> } @@ -67,8 +67,9 @@ build_tree(B1, B2, B3, Opts) -> B3, Opts ), - L2 = commit(#{ <<"l2-tag">> => <<"l2-value">>, <<"l3">> => L3 }, B2, Opts), - commit(#{ <<"l1-tag">> => <<"l1-value">>, <<"l2">> => L2 }, B1, Opts). + L2 = + build_level(#{ <<"l2-tag">> => <<"l2-value">>, <<"l3">> => L3 }, B2, Opts), + build_level(#{ <<"l1-tag">> => <<"l1-value">>, <<"l2">> => L2 }, B1, Opts). %%% Test vector generator. @@ -76,26 +77,28 @@ build_tree(B1, B2, B3, Opts) -> operations() -> [ {verify, none}, - {verify, true}, - {verify, false}, {id, none}, {convert, none}, {convert, true}, {convert, false} ]. -%% @doc Generate the full grid: 3x3x3 tree shapes x the operation list. +%% @doc The per-level build choices a tree level can take. +build_choices() -> + [bundle_true, bundle_false, no_bundle, uncommitted]. + +%% @doc Generate the full grid: 4x4x4 tree shapes x the operation list. bundle_vectors_test_() -> - {timeout, 240, + {timeout, 600, [ { test_label(B1, B2, B3, Api, ReqBundle), fun() -> run(B1, B2, B3, Api, ReqBundle) end } || - B1 <- [true, false, none], - B2 <- [true, false, none], - B3 <- [true, false, none], + B1 <- build_choices(), + B2 <- build_choices(), + B3 <- build_choices(), {Api, ReqBundle} <- operations() ] }. @@ -108,44 +111,52 @@ test_label(B1, B2, B3, Api, ReqBundle) -> ) ). -%% @doc Build the tree and exercise the chosen API. +%% @doc Build the tree and run_test the chosen API. run(B1, B2, B3, Api, ReqBundle) -> Opts = fresh_opts(), Tree = build_tree(B1, B2, B3, Opts), + ?event(debug_test, {tree, + {label, test_label(B1, B2, B3, Api, ReqBundle)}, + {built, Tree}}), % Every freshly built tree must verify via the reliable per-node path, % whatever per-level bundle permutation it was signed with. ?assert(hb_message:verify(Tree, all, Opts)), - exercise(Api, ReqBundle, B1, B2, B3, Tree, Opts). + run_test(Api, ReqBundle, B1, B2, B3, Tree, Opts). -%%% Per-API exercises. +%%% Per-API run_tests. -%% `verify': verification always uses the per-node path -- each subtree is -%% checked in the bundle state it was committed in. A `bundle' on the -%% request is mirrored as commit/3 does, but `hint-device' is set too, so -%% the per-node hints override it. A validly-built tree therefore always -%% verifies at every level, with or without a forced request bundle. -exercise(verify, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> - Spec = verify_spec(ReqBundle), - ?assert(hb_message:verify(Tree, Spec, Opts)), +%% `verify': every level of a validly-built tree verifies. The bundle +%% state each subtree was committed in is reproduced per-node via +%% `hint-device', so the verify request carries no `bundle'. (`run/3' +%% already verifies the root, so this only adds the nested levels.) +run_test(verify, _ReqBundle, _B1, _B2, _B3, Tree, Opts) -> L2 = hb_maps:get(<<"l2">>, Tree, undefined, Opts), - ?assert(hb_message:verify(L2, Spec, Opts)), + ?assert(hb_message:verify(L2, all, Opts)), L3 = hb_maps:get(<<"l3">>, L2, undefined, Opts), - ?assert(hb_message:verify(L3, Spec, Opts)); + ?assert(hb_message:verify(L3, all, Opts)); -%% `id': the root was committed exactly once, so `id/3' with `all' -%% committers accumulates to that single commitment -- the id must equal -%% the key under which it is stored in the root's commitments map. -exercise(id, _ReqBundle, _B1, _B2, _B3, Tree, Opts) -> +%% `id': +%% - committed root: `id/3' with `all' committers accumulates to the +%% single commitment -- the id must equal the key under which it is +%% stored in the root's commitments map. +%% - uncommitted root: there are no commitments, so `id/3' falls back to +%% the (content-addressed) unsigned id -- `all' committers must give +%% the same result as the bare unsigned-id call. +run_test(id, _ReqBundle, uncommitted, _B2, _B3, Tree, Opts) -> + ?assertEqual( + hb_message:id(Tree, none, Opts), + hb_message:id(Tree, all, Opts) + ); +run_test(id, _ReqBundle, _B1, _B2, _B3, Tree, Opts) -> Id = hb_message:id(Tree, all, Opts), Commitments = hb_maps:get(<<"commitments">>, Tree, #{}, Opts), ?assertEqual([Id], maps:keys(Commitments)); -%% `convert': round-trip the tree through the `ans104@1.0' codec -- the -%% standard structured<->codec path. Each subtree converts in the state its -%% own commitment dictates (per-node), so a `bundle' flag on the request is -%% overridden and the committed shape is preserved. The round-tripped tree -%% must therefore still verify at every level. -exercise(convert, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> +%% `convert': round-trip the tree through the `ans104@1.0' codec. Each subtree +%% converts in the state its own commitment dictates (per-node) via +%% `hint-device', so the `bundle' on the conversion target applies only to the +%% root and the committed shape is preserved. +run_test(convert, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> Encoded = hb_message:convert(Tree, convert_target(ReqBundle), Opts), Restored = hb_message:convert( @@ -160,13 +171,6 @@ exercise(convert, ReqBundle, _B1, _B2, _B3, Tree, Opts) -> L3 = hb_maps:get(<<"l3">>, L2, undefined, Opts), ?assert(hb_message:verify(L3, all, Opts)). -%% @doc The verify spec for a request-bundle value: `all' committers, plus -%% the forced `bundle' flag when one is given. -verify_spec(none) -> - all; -verify_spec(ReqBundle) -> - #{ <<"committers">> => <<"all">>, <<"bundle">> => ReqBundle }. - %% @doc The convert target for a request-bundle value: the bare `ans104@1.0' %% codec, plus a forced `bundle' flag when one is given. convert_target(none) -> diff --git a/src/preloaded/arweave/dev_bundler.erl b/src/preloaded/arweave/dev_bundler.erl index 25eae2bf1..126226df0 100644 --- a/src/preloaded/arweave/dev_bundler.erl +++ b/src/preloaded/arweave/dev_bundler.erl @@ -659,30 +659,17 @@ nested_bundle_test_parallel() -> stop_test_servers(ServerHandle, NodeOpts) end. -%% @doc End-to-end bundler test for a nested item structured like the -%% broken production bundle that motivated the per-subtree `bundle' fix: -%% the parent is signed with `ans104@1.0' and `bundle' => false (so its -%% child is offloaded as a link in the committed form) and the child is -%% signed with `httpsig@1.0'. The child is posted on its own first, so it -%% is also a top-level bundle item and the parent's offloaded link resolves -%% to a separately-uploaded item. -nested_inlined_bundle_child_posted_test_parallel() -> - run_nested_inlined_bundle_test(child_posted). - -%% @doc As `nested_inlined_bundle_child_posted_test_parallel/0', but the -%% child is never posted on its own -- an unrelated plain data item fills -%% its bundle slot instead. The child therefore reaches the cache only via -%% the parent's inline body, exercising the offload path without a -%% separately-uploaded link target. -nested_inlined_bundle_child_not_posted_test_parallel() -> - run_nested_inlined_bundle_test(child_not_posted). - -%% @doc Shared body for the nested-inlined-bundle variants. Builds the -%% httpsig child and its `bundle' => false ans104 parent, posts three items -%% (the first chosen by `Variant'), then asserts the resulting bundle TX -%% verifies, carries three valid items, and round-trips through -%% `structured@1.0' without inflating. -run_nested_inlined_bundle_test(Variant) -> +%% @doc End-to-end bundler test for a nested dataitem where the parent +%% has bundle=false. The chile is posted on its own first. +nested_unbundled_bundle_child_posted_test_parallel() -> + run_nested_unbundled_bundle_test(child_posted). + +%% @doc Like `nested_inlined_bundle_child_posted_test_parallel/0', but the +%% child is never posted on its own. +nested_unbundled_bundle_child_not_posted_test_parallel() -> + run_nested_unbundled_bundle_test(child_not_posted). + +run_nested_unbundled_bundle_test(Variant) -> Anchor = rand:bytes(32), Price = 12345, % NodeOpts redirects arweave gateway requests to the mock server. @@ -770,9 +757,6 @@ run_nested_inlined_bundle_test(Variant) -> stop_test_servers(ServerHandle, NodeOpts) end. -%% @doc Post the first of the three bundled items. The `child_posted' -%% variant uploads the httpsig child on its own; `child_not_posted' fills -%% the slot with an unrelated plain data item. post_first_item(Node, child_posted, Child, ClientOpts) -> post_structured_item(Node, Child, ClientOpts); post_first_item(Node, child_not_posted, _Child, ClientOpts) -> diff --git a/src/preloaded/arweave/dev_bundler_task.erl b/src/preloaded/arweave/dev_bundler_task.erl index f6a27a472..e7992ac9e 100644 --- a/src/preloaded/arweave/dev_bundler_task.erl +++ b/src/preloaded/arweave/dev_bundler_task.erl @@ -311,7 +311,7 @@ build_signed_tx_on_arbundles_js_test() -> % Convert back to an L1 TX SignedTXRoundtrip = hb_message:convert(StructuredTX, <<"tx@1.0">>, - #{ <<"device">> => <<"structured@1.0">>, <<"hint-device">> => <<"tx@1.0">> }, + <<"structured@1.0">>, TestOpts), ?event(debug_test, {signed_tx_roundtrip, SignedTXRoundtrip}), ?assert(ar_tx:verify(SignedTXRoundtrip)), @@ -344,21 +344,6 @@ bundle_convert_real_data_test() -> %% This convert is exactly what build_proofs runs. TX = hb_message:convert( Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), - SignedSize = byte_size(SignedTX#tx.data), - RecoveredSize = byte_size(TX#tx.data), - Delta = RecoveredSize - SignedSize, - Multiple = case Delta of - 0 -> 0; - _ when Delta rem 2500 =:= 0 -> Delta div 2500; - _ -> {non_clean_2500, Delta} - end, - ?assertEqual(0, Delta, { - inflation_detected_from_inlined_item, - #{signed_size => SignedSize, - recovered_size => RecoveredSize, - delta_bytes => Delta, - multiple_of_2500 => Multiple} - }), ?assert(ar_tx:verify(TX)) after hb_mock_server:stop(ServerHandle) @@ -386,24 +371,7 @@ bundle_convert_minimal_test() -> SignedTX, <<"structured@1.0">>, <<"tx@1.0">>, TestOpts), TX = hb_message:convert( Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), - ?event(debug_test, {signed_tx, SignedTX}), - ?event(debug_test, {committed, Committed}), - ?event(debug_test, {tx, TX}), - SignedSize = byte_size(SignedTX#tx.data), - RecoveredSize = byte_size(TX#tx.data), - Delta = RecoveredSize - SignedSize, - Multiple = case Delta of - 0 -> 0; - _ when Delta rem 2500 =:= 0 -> Delta div 2500; - _ -> {non_clean_2500, Delta} - end, - ?assertEqual(0, Delta, { - inflation_detected_from_minimal_item, - #{signed_size => SignedSize, - recovered_size => RecoveredSize, - delta_bytes => Delta, - multiple_of_2500 => Multiple} - }) + ?assert(ar_tx:verify(TX)) after hb_mock_server:stop(ServerHandle) end. @@ -467,16 +435,7 @@ bundle_convert_mixed_tree_verify_test() -> %% the data did not inflate. TX = hb_message:convert( Committed, <<"tx@1.0">>, <<"structured@1.0">>, TestOpts), - ?assert(ar_tx:verify(TX)), - SignedSize = byte_size(SignedTX#tx.data), - RecoveredSize = byte_size(TX#tx.data), - Delta = RecoveredSize - SignedSize, - ?assertEqual(0, Delta, { - inflation_detected_on_mixed_tree, - #{signed_size => SignedSize, - recovered_size => RecoveredSize, - delta_bytes => Delta} - }) + ?assert(ar_tx:verify(TX)) after hb_mock_server:stop(ServerHandle) end. diff --git a/src/preloaded/codec/dev_ans104.erl b/src/preloaded/codec/dev_ans104.erl index d393df915..58584d345 100644 --- a/src/preloaded/codec/dev_ans104.erl +++ b/src/preloaded/codec/dev_ans104.erl @@ -130,8 +130,6 @@ do_from(RawTX, Req, Opts) -> %% @doc Inspect a message's signed ans104 commitment and, if it carries an %% explicit `bundle' field, mirror that value onto the request `Req'. -%% If there is no ans104 commitment, or the commitment does not carry -%% a `bundle' field, the `Req' is returned unchanged. to_hint(Msg, Req, Opts) -> case lib_arweave_common:bundle_hint(<<"ans104@1.0">>, Msg, Req, Opts) of not_found -> {ok, Req}; diff --git a/src/preloaded/codec/dev_structured.erl b/src/preloaded/codec/dev_structured.erl index 38ffa1bb0..8c20797bb 100644 --- a/src/preloaded/codec/dev_structured.erl +++ b/src/preloaded/codec/dev_structured.erl @@ -91,16 +91,14 @@ from(Msg, Req, Opts) when is_map(Msg) -> {Types, [{Key, Value} | Values]}; {ok, Nested} when is_map(Nested) orelse is_list(Nested) -> ?event({from_recursing, {nested, Nested}}), - % Strip out the `bundle' flag on reqursive calls - bundle - % status will be redetermined by the hint device for each - % message. + % We pass the HintedReq to the recursive call rather than + % Req so that this message's bundle status serves as the + % default for any children that don't explicitly set the + % `bundle' flag on the hinted commitment. {Types, [{ Key, - hb_util:ok(from( - Nested, - hb_maps:without([<<"bundle">>], Req, Opts), - Opts)) + hb_util:ok(from(Nested, HintedReq, Opts)) } | Values]}; {ok, Value} when is_atom(Value) @@ -187,7 +185,7 @@ type(List) when is_list(List) -> <<"list">>; type(Other) -> Other. %% @doc If a `hint-device` key is present it indicates the desired -%% terminal fomat (after being converted via an intermediate `tabm` +%% terminal format (after being converted via an intermediate `tabm` %% format). In that case dev_structured defers to the target codec %% to determine whether child messages should be loaded or unloaded. apply_bundle_hint(Msg, Req, Opts) -> diff --git a/src/preloaded/codec/dev_tx.erl b/src/preloaded/codec/dev_tx.erl index 8d7392205..7e9ab9d0f 100644 --- a/src/preloaded/codec/dev_tx.erl +++ b/src/preloaded/codec/dev_tx.erl @@ -104,9 +104,6 @@ do_from(RawTX, Req, Opts) -> %% @doc Inspect a message's signed tx@1.0 commitment and, if the commitment %% carries an explicit `bundle' field, mirror that value onto the request `Req'. -%% If no matching commitment exists, we might be dealing with a nested -%% message (i.e. tx@1.0 root, with one ore more bundled ans104@1.0 children), -%% so delegate to ans104@1.0. to_hint(Msg, Req, Opts) -> case lib_arweave_common:bundle_hint(<<"tx@1.0">>, Msg, Req, Opts) of not_found -> hb_ao:raw(<<"ans104@1.0">>, <<"to-hint">>, Msg, Req, Opts); diff --git a/src/preloaded/codec/lib_arweave_common.erl b/src/preloaded/codec/lib_arweave_common.erl index e379f8dbb..755b9492c 100644 --- a/src/preloaded/codec/lib_arweave_common.erl +++ b/src/preloaded/codec/lib_arweave_common.erl @@ -35,10 +35,6 @@ from_item(RawTX, Req, Opts) -> }. %% @doc Recursively encode a nested message as an `ans104@1.0' #tx record. -%% Codecs that bundle nested ans104 data items (e.g. `tx@1.0') recurse -%% through this shared-library entry point rather than calling the -%% `ans104@1.0' device module directly, which is not reachable across -%% device boundaries. to(Binary, _Req, _Opts) when is_binary(Binary) -> {ok, #tx{ tags = [{<<"ao-type">>, <<"binary">>}], data = Binary }}; to(TX, _Req, _Opts) when is_record(TX, tx) -> @@ -54,9 +50,6 @@ to(TABM, Req, Opts) when is_map(TABM) -> to(Other, _Req, _Opts) -> throw({invalid_tx, Other}). -%% @doc Prepare a TABM as a #tx record using codec-specific field -%% extraction and tag exclusion rules. Nested messages are always encoded -%% as `ans104@1.0' data items, via to/3. to(Device, TABM, Req, FieldsFun, ExcludedTagsFun, Opts) -> MaybeCommitment = hb_message:commitment( diff --git a/src/preloaded/message/dev_message.erl b/src/preloaded/message/dev_message.erl index 88f00a993..f158e9bb9 100644 --- a/src/preloaded/message/dev_message.erl +++ b/src/preloaded/message/dev_message.erl @@ -144,29 +144,27 @@ calculate_id(RawBase, Req, NodeOpts) -> {error, Error} -> throw({id, Error}) end, % Encode to a TABM. The `bundle' flag (when set on the request) is the - % caller's intent for the top-level commit. We don't specify a - % `hint-device' - as we're building a commitment there is no - % existing commitment that we want to pull a `bundle' flag from. + % caller's intent for the top-level message and applies only to the root; + % `hint-device' lets the structured codec reproduce each nested + % commitment's own bundle state per-node, so the id is computed over the + % same shape `commit/3' and `verify/3' would produce. SourceSpec = hb_message:add_bundle_hint( #{ <<"device">> => <<"structured@1.0">> }, - Req, + Req#{ <<"device">> => IDDev }, NodeOpts ), Base = hb_message:convert(RawBase, tabm, SourceSpec, NodeOpts), ?event(debug_id, {calculate_ids, {base, Base}}), ?event(debug_id, {generating_id, {id_device, IDDev}, {base, Base}}), - % Get the device module from the message, or use the default if it is not - % set. We can tell if the device is not set (or is the default) by checking - % whether the device module is the same as this module. - DevMod = + % Get the commitment device name from the message, or use the default if + % it is not set. We can tell if the device is not set (or is the default) + % by checking whether the resolved device module is this module itself. + % `hb_ao:raw/5' expects a device name, not a resolved module. + CommitDev = case hb_device:message_to_device(#{ <<"device">> => IDDev }, NodeOpts) of - ?MODULE -> - hb_device:message_to_device( - #{ <<"device">> => ?DEFAULT_ID_DEVICE }, - NodeOpts - ); - Module -> Module + ?MODULE -> ?DEFAULT_ID_DEVICE; + _ -> IDDev end, ?event(debug_id, {called_id_device, CommitDev}, NodeOpts), {ok, #{ <<"commitments">> := Comms} } = @@ -264,13 +262,13 @@ commit(Self, Req, Opts) -> Opts#{ <<"linkify-mode">> => offload } end, % Encode to a TABM. The `bundle' flag (when set on the request) is the - % caller's intent for the top-level commit. We don't specify a - % `hint-device' - as we're building a commitment there is no - % existing commitment that we want to pull a `bundle' flag from. + % caller's intent for the top-level commit and applies only to the root + % message; `hint-device' lets the structured codec preserve each nested + % commitment's own bundle state per-node. SourceSpec = hb_message:add_bundle_hint( #{ <<"device">> => <<"structured@1.0">> }, - Req, + Req#{ <<"device">> => AttDev }, CommitOpts ), Loaded = @@ -295,22 +293,13 @@ commit(Self, Req, Opts) -> verify(Self, Req, Opts) -> % Get the target message of the verification request. {ok, RawBase} = hb_message:find_target(Self, Req, Opts), - Base = - hb_message:convert( - ensure_commitments_loaded( - RawBase, - Opts - ), - tabm, - Opts - ), - ?event(verify, {verify, {base_found, Base}}), - Commitments = maps:get(<<"commitments">>, Base, #{}), - IDsToVerify = commitment_ids_from_request(Base, Req, Opts), + CommitmentBase = ensure_commitments_loaded(RawBase, Opts), + Commitments = maps:get(<<"commitments">>, CommitmentBase, #{}), + IDsToVerify = commitment_ids_from_request(CommitmentBase, Req, Opts), % Generate the new commitment request base messsage by removing the keys % used by this function (path, committers, commitments) and returning the % remaining keys. This message will then be merged with each commitment - % message to generate the final request, allowing the caller to pass + % message to generate the final request, allowing the caller to pass % additional keys to the commitment device. ReqBase = maps:without( @@ -326,13 +315,36 @@ verify(Self, Req, Opts) -> Res = lists:all( fun(CommitmentID) -> + Commitment = maps:merge( + ReqBase, + maps:get(CommitmentID, Commitments) + ), + % Build the source spec from the commitment device alone: a + % `hint-device' lets the structured codec reproduce each + % subtree in the bundle state it was committed in. The verify + % request's `bundle' is deliberately *not* propagated -- a + % commitment is always verified in the state it was signed + % in, so any `bundle' passed by the caller is irrelevant. + SourceSpec = + hb_message:add_bundle_hint( + #{ <<"device">> => <<"structured@1.0">> }, + #{ + <<"device">> => + maps:get( + <<"commitment-device">>, + Commitment, + undefined + ) + }, + Opts + ), + Base = hb_message:convert( + CommitmentBase, tabm, SourceSpec, Opts), + ?event(verify, {verify, {base_found, Base}}), {ok, Res} = verify_commitment( Base, - maps:merge( - ReqBase, - maps:get(CommitmentID, Commitments) - ), + Commitment, Opts ), ?event(verify,