From 94a4ae943d4c6fcd7a94ec94facb2b7eda99038d Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 11:34:14 -0400 Subject: [PATCH 01/13] EXPERIMENTAL: re-enable D8PSK gate on OFDM_CHIRP wide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit D8PSK was disabled in the OFDM rate ladder with a "TEMPORARY: fails on any fading" note that's been stale since the 2026-03-15 CPE correction + per-symbol pilot tracking landed. cli_simulator sweeps on the experimental branch confirm D8PSK now works in fading: good fading (cli_simulator 7-message test): R1/2 SNR=8: FAIL (cliff) R1/2 SNR=10: PASS, 4 retx R1/2 SNR=12: PASS, 2 retx R1/2 SNR=15: PASS, 0 retx R2/3 SNR=10: PASS, 28 retx (high — below cliff) R2/3 SNR=12: PASS, 45 retx (very high) R2/3 SNR=15: PASS, 0 retx R2/3 SNR=20: PASS, 1 retx R3/4 SNR=20: PASS, 6 retx (border, AWGN-only) moderate fading: D8PSK R1/2 stable at SNR>=15 (3-6 retx). Conservative gate added in waveform_selection.hpp: - D8PSK R3/4 at AWGN (fading<0.15) AND SNR>=22 - D8PSK R2/3 at AWGN AND SNR>=18, OR good fading (fading<0.65) AND SNR>=15 - D8PSK R1/2 at good fading (fading<0.65) AND SNR>=10 - DQPSK fallback otherwise (preserves all documented baselines) Throughput math at the most common operating point: Before: DQPSK R1/2 good fading SNR=15 → ~2.3 kbps usable After: D8PSK R1/2 good fading SNR=15 → ~3.4 kbps usable (+47%) After: D8PSK R2/3 good fading SNR=15 → ~5.0 kbps usable (+117%) (adaptive promotion past bootstrap cap; first MODE_CHANGE) In auto-rate cli_simulator regression sweep, all conditions PASS: good fading SNR=12: D8PSK R1/2, 1 retx, PASS good fading SNR=15: D8PSK R1/2, 0 retx, PASS good fading SNR=20: D8PSK R1/2, 0 retx, PASS (R2/3 after MODE_CHANGE) good fading SNR=25: D8PSK R1/2, 0 retx, PASS moderate SNR=12-25: DQPSK R1/4 to R1/2 unchanged, all PASS Tests updated: test_protocol mode_change test (was DQPSK R2/3 at SNR=22, now D8PSK R2/3 capped); test_waveform_policy expectations flipped from DQPSK to D8PSK at the new gate thresholds, plus 4 new boundary checks pinning the D8PSK ladder. ctest 35/35. Branch: experimental/throughput-push (NOT main). Codex audited this change as the highest-leverage tonight lever among 20 candidates surveyed; remaining levers (16-QAM, 32-QAM, larger LDPC, HARQ-IR, per-subcarrier bit loading) all need multi-day research-level work and are deferred. Co-Authored-By: Claude Opus 4.7 (1M context) Co-Authored-By: Codex (audit, 2 passes) --- src/protocol/waveform_selection.hpp | 60 +++++++++++++++++++++++------ tests/test_protocol.cpp | 30 +++++++++++---- tests/test_waveform_policy.cpp | 29 ++++++++++++-- 3 files changed, 98 insertions(+), 21 deletions(-) diff --git a/src/protocol/waveform_selection.hpp b/src/protocol/waveform_selection.hpp index c28fdec..d455a8b 100644 --- a/src/protocol/waveform_selection.hpp +++ b/src/protocol/waveform_selection.hpp @@ -180,19 +180,57 @@ inline void recommendDataMode(float snr_db, WaveformMode waveform, return; } - // OFDM modes: use shared rate selection helper - mod = Modulation::DQPSK; // Always differential for HF phase stability + // OFDM modes: D8PSK gated on conditions, otherwise DQPSK. + // + // D8PSK ladder (re-enabled 2026-05-04 after wide cli_simulator + // sweeps showed it works in fading with the post-2026-03-15 CPE + // correction + per-symbol pilot tracking already in the demod): + // sweep results for D8PSK on good fading (cli_simulator 7-msg): + // R1/2 SNR=8: FAIL (cliff) + // R1/2 SNR=10: PASS, 4 retx + // R1/2 SNR=12: PASS, 2 retx + // R1/2 SNR=15: PASS, 0 retx + // R2/3 SNR=10: PASS, 28 retx (high) + // R2/3 SNR=12: PASS, 45 retx (very high) + // R2/3 SNR=15: PASS, 0 retx + // R2/3 SNR=20: PASS, 1 retx + // R3/4 SNR=20: PASS, 6 retx (border, AWGN-only) + // Moderate fading: R1/2 SNR>=15 also stable (3-6 retx). + // + // The throughput case: D8PSK R2/3 at SNR=15 good fading carries + // 1.5× the bits/symbol of DQPSK R2/3 at the same conditions, so + // the throughput jumps from ~3.4 kbps to ~5 kbps with zero retx. + // + // Conservative thresholds — D8PSK only triggers when sweeps showed + // 0 or near-0 retx. Anything below the cliff falls back to DQPSK. + const bool d8psk_awgn_ok = + (fading_index < 0.15f && snr_db >= 18.0f); + const bool d8psk_good_ok = + (fading_index < 0.65f && snr_db >= 15.0f); + if (d8psk_awgn_ok || d8psk_good_ok) { + mod = Modulation::D8PSK; + // R2/3 is the throughput sweet spot at these SNRs (zero retx + // in sweeps). R3/4 was borderline (6 retx at SNR=20 good) so + // restrict to AWGN at SNR>=22. + if (fading_index < 0.15f && snr_db >= 22.0f) { + rate = CodeRate::R3_4; + } else { + rate = CodeRate::R2_3; + } + return; + } - // TEMPORARY: D8PSK disabled until R1/2+ rates are verified - // D8PSK only on TRUE AWGN (fading_index < 0.15) + very high SNR - // Testing showed D8PSK fails on any fading - too sensitive to phase errors - // if (fading_index < 0.15f && snr_db >= 25.0f) { - // mod = Modulation::D8PSK; - // rate = CodeRate::R1_2; - // return; - // } + // Slightly relaxed D8PSK R1/2 gate for less-than-clean conditions + // — gives ~3.4 kbps at SNR=10-12 good fading (vs DQPSK R1/4 at + // ~1.15 kbps). Cliff is at SNR=8 (FAIL), so floor is SNR=10. + if (fading_index < 0.65f && snr_db >= 10.0f) { + mod = Modulation::D8PSK; + rate = CodeRate::R1_2; + return; + } - // Use shared helper for rate selection (SINGLE SOURCE OF TRUTH) + // Default: DQPSK with the existing wide ladder. + mod = Modulation::DQPSK; // Always differential for HF phase stability rate = selectOFDMCodeRate(snr_db, fading_index); } diff --git a/tests/test_protocol.cpp b/tests/test_protocol.cpp index e1b3495..914d0af 100644 --- a/tests/test_protocol.cpp +++ b/tests/test_protocol.cpp @@ -805,8 +805,11 @@ bool test_protocol_rate_upgrade() { stationA.setLocalCallsign("W1ABC"); stationB.setLocalCallsign("K2DEF"); - // Station B measures good SNR - should trigger MODE_CHANGE - stationB.setMeasuredSNR(22.0f); // Should recommend DQPSK R2/3 + // Station B measures good SNR - should trigger MODE_CHANGE. + // SNR=22 + AWGN now promotes to D8PSK R3/4 (1.5× bits/symbol vs + // the previous DQPSK R2/3 expectation, after the 2026-05-04 + // D8PSK ladder re-enable). + stationB.setMeasuredSNR(22.0f); bool a_mode_changed = false; ultra::Modulation a_new_mod = ultra::Modulation::DQPSK; @@ -831,8 +834,18 @@ bool test_protocol_rate_upgrade() { // Verify MODE_CHANGE was received if (!a_mode_changed) FAIL("No MODE_CHANGE received at A"); - if (a_new_mod != ultra::Modulation::DQPSK) FAIL("Wrong modulation"); - if (a_new_rate != ultra::CodeRate::R2_3) FAIL("Expected R2/3 at 22 dB SNR"); + if (a_new_mod != ultra::Modulation::D8PSK) { + std::cout << "(got " << ultra::modulationToString(a_new_mod) << ") "; + FAIL("Expected D8PSK at 22 dB SNR after D8PSK ladder re-enable"); + } + // Bootstrap rate cap: capInitialOFDMRate keeps R3/4 only at SNR>=24 + // AND fading<0.05. At SNR=22 the cap returns R2/3, so the expected + // outcome is D8PSK R2/3 not D8PSK R3/4. (See waveform_selection.hpp + // line 64.) + if (a_new_rate != ultra::CodeRate::R2_3) { + std::cout << "(got " << ultra::codeRateToString(a_new_rate) << ") "; + FAIL("Expected R2/3 at 22 dB SNR (D8PSK R3/4 capped to R2/3 at bootstrap)"); + } // Verify both stations report same mode if (stationA.getDataCodeRate() != ultra::CodeRate::R2_3) FAIL("A has wrong code rate"); @@ -923,10 +936,13 @@ bool test_adaptive_bidirectional() { if (!stationA.isConnected()) FAIL("Not connected"); - // Verify high-rate differential mode - if (stationA.getDataModulation() != ultra::Modulation::DQPSK) { + // Verify high-rate mode. D8PSK ladder re-enabled 2026-05-04 + // (see waveform_selection.hpp): high-SNR AWGN now picks D8PSK R3/4 + // — same code rate but 1.5× bits/symbol = ~5 kbps usable instead + // of ~3.4 kbps. Fall-back to DQPSK is the heavy-fading branch. + if (stationA.getDataModulation() != ultra::Modulation::D8PSK) { std::cout << "(got " << ultra::modulationToString(stationA.getDataModulation()) << ") "; - FAIL("Expected DQPSK at 27 dB SNR"); + FAIL("Expected D8PSK at 27 dB SNR (post-D8PSK-gate)"); } if (stationA.getDataCodeRate() != ultra::CodeRate::R3_4) { std::cout << "(got " << ultra::codeRateToString(stationA.getDataCodeRate()) << ") "; diff --git a/tests/test_waveform_policy.cpp b/tests/test_waveform_policy.cpp index db04a88..01ef1ca 100644 --- a/tests/test_waveform_policy.cpp +++ b/tests/test_waveform_policy.cpp @@ -95,13 +95,36 @@ void test_data_mode_policy() { Modulation mod = Modulation::AUTO; CodeRate rate = CodeRate::AUTO; + // D8PSK gate re-enabled 2026-05-04 (see waveform_selection.hpp). + // High-SNR AWGN now picks D8PSK R3/4 (was DQPSK R3/4) — same code + // rate but 1.5× bits/symbol. Throughput jumps from ~3.4 to ~5 kbps. recommendDataMode(27.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.00f); - CHECK(mod == Modulation::DQPSK, "OFDM data mode should remain DQPSK at high SNR"); + CHECK(mod == Modulation::D8PSK, "high-SNR AWGN should promote to D8PSK"); CHECK(rate == CodeRate::R3_4, "near-AWGN SNR27 should use R3/4"); + // Good-fading SNR>=15: D8PSK R2/3 (was DQPSK R1/2). 1.5× bits per + // symbol AND 1.33× code rate = ~2× throughput on this channel. + // Sweep showed 0 retx at SNR=15 good fading. recommendDataMode(18.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::DQPSK, "fading OFDM data mode should remain DQPSK"); - CHECK(rate == CodeRate::R1_2, "good fading SNR18 should use R1/2"); + CHECK(mod == Modulation::D8PSK, "good-fading SNR18 should promote to D8PSK"); + CHECK(rate == CodeRate::R2_3, "good-fading SNR18 should use R2/3 with D8PSK"); + + // SNR=12 good fading: D8PSK R1/2 (was DQPSK R1/2). + // 1.5× throughput at the same rate. + recommendDataMode(12.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); + CHECK(mod == Modulation::D8PSK, "good-fading SNR12 should promote to D8PSK"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR12 D8PSK should use R1/2 (cliff at SNR=8)"); + + // SNR=9 good fading: below D8PSK floor (cliff at SNR=8 from sweeps), + // falls back to DQPSK rate ladder. + recommendDataMode(9.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); + CHECK(mod == Modulation::DQPSK, "below-floor SNR=9 falls back to DQPSK"); + CHECK(rate == CodeRate::R1_4, "DQPSK fallback at SNR=9 uses R1/4"); + + // Heavy fading: D8PSK rejected even at high SNR — falls back to DQPSK. + recommendDataMode(20.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.90f); + CHECK(mod == Modulation::DQPSK, "moderate fading should reject D8PSK"); + CHECK(rate == CodeRate::R1_2, "DQPSK moderate fading uses R1/2"); recommendDataMode(12.0f, WaveformMode::MC_DPSK, mod, rate, 0.90f); CHECK(mod == Modulation::DQPSK, "MC-DPSK should use DQPSK"); From 3e70a84151c54042a318eb78338ee0195d3f3862 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 12:05:29 -0400 Subject: [PATCH 02/13] narrow D8PSK gate: tighten R2/3 thresholds after file-transfer stress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7-message sweep suggested D8PSK R2/3 at SNR>=15 good fading was zero-retx. Follow-up 5KB file-transfer test at SNR=18 good fading showed 31 retx for 28 frames — D8PSK R2/3 is too tight there. The short test doesn't expose the failure mode the longer transfer does because a 7-message exchange completes before the channel hits its worst fade events. Tightened the D8PSK R2/3 gate to three conditional tiers: - Clean AWGN (fading<0.10) at SNR>=15: R2/3 (preserves the adaptive-upgrade test path; sweep clean) - Slight residual fading (<0.15) at SNR>=18: R2/3 - Real good fading (<0.65) at SNR>=20: R2/3 (file-transfer-tested) D8PSK R1/2 stays at SNR>=10 fading<0.65 (unchanged) — that's the dependable +47 % win over DQPSK R1/2 that doesn't break under file-transfer load. Verified: ctest 35/35 (test_connection_adaptive R2/3 promotion path stays reachable at SNR=15 fading=0.05) cli_simulator --snr 18 --fading good --file 5120 --test: PASS (was FAIL with the looser R2/3 gate) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/protocol/waveform_selection.hpp | 43 +++++++++++++++++------------ tests/test_protocol.cpp | 7 +++-- tests/test_waveform_policy.cpp | 24 ++++++++++------ 3 files changed, 44 insertions(+), 30 deletions(-) diff --git a/src/protocol/waveform_selection.hpp b/src/protocol/waveform_selection.hpp index d455a8b..d975fe2 100644 --- a/src/protocol/waveform_selection.hpp +++ b/src/protocol/waveform_selection.hpp @@ -201,28 +201,35 @@ inline void recommendDataMode(float snr_db, WaveformMode waveform, // 1.5× the bits/symbol of DQPSK R2/3 at the same conditions, so // the throughput jumps from ~3.4 kbps to ~5 kbps with zero retx. // - // Conservative thresholds — D8PSK only triggers when sweeps showed - // 0 or near-0 retx. Anything below the cliff falls back to DQPSK. - const bool d8psk_awgn_ok = - (fading_index < 0.15f && snr_db >= 18.0f); - const bool d8psk_good_ok = - (fading_index < 0.65f && snr_db >= 15.0f); - if (d8psk_awgn_ok || d8psk_good_ok) { + // D8PSK R3/4 — only on near-AWGN with very high SNR. Sweep showed + // 6 retx at SNR=20 good fading (borderline) so reserve for AWGN. + if (fading_index < 0.15f && snr_db >= 24.0f) { mod = Modulation::D8PSK; - // R2/3 is the throughput sweet spot at these SNRs (zero retx - // in sweeps). R3/4 was borderline (6 retx at SNR=20 good) so - // restrict to AWGN at SNR>=22. - if (fading_index < 0.15f && snr_db >= 22.0f) { - rate = CodeRate::R3_4; - } else { - rate = CodeRate::R2_3; - } + rate = CodeRate::R3_4; + return; + } + + // D8PSK R2/3 — needs more margin than the 7-message sweep suggested. + // 5KB file-transfer test at SNR=18 good fading (~0.32) showed 31 retx + // for 28 frames (excessive). Three tiers based on channel quality: + // - Clean AWGN (fading<0.10) at SNR>=15: R2/3 works (sweeps clean, + // adaptive_upgrade tests verify this is reachable) + // - Slight residual fading (<0.15) at SNR>=18: R2/3 ok + // - Real good fading (<0.65) at SNR>=20: tight but reliable per + // the file-transfer stress test + const bool d8psk_r23_clean = (fading_index < 0.10f && snr_db >= 15.0f); + const bool d8psk_r23_awgn = (fading_index < 0.15f && snr_db >= 18.0f); + const bool d8psk_r23_fading = (fading_index < 0.65f && snr_db >= 20.0f); + if (d8psk_r23_clean || d8psk_r23_awgn || d8psk_r23_fading) { + mod = Modulation::D8PSK; + rate = CodeRate::R2_3; return; } - // Slightly relaxed D8PSK R1/2 gate for less-than-clean conditions - // — gives ~3.4 kbps at SNR=10-12 good fading (vs DQPSK R1/4 at - // ~1.15 kbps). Cliff is at SNR=8 (FAIL), so floor is SNR=10. + // D8PSK R1/2 — the dependable D8PSK win. Cliff at SNR=8 (sweep + // FAIL); 0 retx at SNR=15 good fading. Gives ~3.4 kbps usable + // (vs DQPSK R1/2 ~2.3 kbps at the same conditions, +47%) without + // the file-transfer-stress retx storm that R2/3 hits below SNR=20. if (fading_index < 0.65f && snr_db >= 10.0f) { mod = Modulation::D8PSK; rate = CodeRate::R1_2; diff --git a/tests/test_protocol.cpp b/tests/test_protocol.cpp index 914d0af..91eaa54 100644 --- a/tests/test_protocol.cpp +++ b/tests/test_protocol.cpp @@ -937,9 +937,10 @@ bool test_adaptive_bidirectional() { if (!stationA.isConnected()) FAIL("Not connected"); // Verify high-rate mode. D8PSK ladder re-enabled 2026-05-04 - // (see waveform_selection.hpp): high-SNR AWGN now picks D8PSK R3/4 - // — same code rate but 1.5× bits/symbol = ~5 kbps usable instead - // of ~3.4 kbps. Fall-back to DQPSK is the heavy-fading branch. + // (see waveform_selection.hpp): high-SNR AWGN picks D8PSK R3/4 + // (recommendDataMode result), but bootstrap cap (capInitialOFDMRate) + // requires SNR>=24 to keep R3/4. SNR=27 with fading=0 satisfies + // both, so the expected outcome is D8PSK R3/4. if (stationA.getDataModulation() != ultra::Modulation::D8PSK) { std::cout << "(got " << ultra::modulationToString(stationA.getDataModulation()) << ") "; FAIL("Expected D8PSK at 27 dB SNR (post-D8PSK-gate)"); diff --git a/tests/test_waveform_policy.cpp b/tests/test_waveform_policy.cpp index 01ef1ca..2cad533 100644 --- a/tests/test_waveform_policy.cpp +++ b/tests/test_waveform_policy.cpp @@ -96,21 +96,27 @@ void test_data_mode_policy() { CodeRate rate = CodeRate::AUTO; // D8PSK gate re-enabled 2026-05-04 (see waveform_selection.hpp). - // High-SNR AWGN now picks D8PSK R3/4 (was DQPSK R3/4) — same code - // rate but 1.5× bits/symbol. Throughput jumps from ~3.4 to ~5 kbps. + // After file-transfer stress sweeps, R3/4 only fires on AWGN with + // SNR>=24, R2/3 needs SNR>=20 in fading or SNR>=18 in AWGN, R1/2 + // is the dependable D8PSK win for SNR=10-19 good fading. + + // High-SNR AWGN: D8PSK R3/4 recommendDataMode(27.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.00f); CHECK(mod == Modulation::D8PSK, "high-SNR AWGN should promote to D8PSK"); CHECK(rate == CodeRate::R3_4, "near-AWGN SNR27 should use R3/4"); - // Good-fading SNR>=15: D8PSK R2/3 (was DQPSK R1/2). 1.5× bits per - // symbol AND 1.33× code rate = ~2× throughput on this channel. - // Sweep showed 0 retx at SNR=15 good fading. + // SNR=20 good fading: D8PSK R2/3 — the throughput sweet spot + recommendDataMode(20.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); + CHECK(mod == Modulation::D8PSK, "good-fading SNR20 should be D8PSK"); + CHECK(rate == CodeRate::R2_3, "good-fading SNR20 D8PSK uses R2/3"); + + // SNR=18 good fading: file-transfer sweep showed R2/3 has too many + // retx here, so this drops to D8PSK R1/2 (still 1.5× DQPSK R1/2) recommendDataMode(18.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::D8PSK, "good-fading SNR18 should promote to D8PSK"); - CHECK(rate == CodeRate::R2_3, "good-fading SNR18 should use R2/3 with D8PSK"); + CHECK(mod == Modulation::D8PSK, "good-fading SNR18 should be D8PSK"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR18 D8PSK uses R1/2 (R2/3 needs SNR>=20)"); - // SNR=12 good fading: D8PSK R1/2 (was DQPSK R1/2). - // 1.5× throughput at the same rate. + // SNR=12 good fading: D8PSK R1/2 recommendDataMode(12.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); CHECK(mod == Modulation::D8PSK, "good-fading SNR12 should promote to D8PSK"); CHECK(rate == CodeRate::R1_2, "good-fading SNR12 D8PSK should use R1/2 (cliff at SNR=8)"); From 1071328e7107fbeca1fb3baa8dcb7987d071fb3f Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 12:26:59 -0400 Subject: [PATCH 03/13] EXPERIMENTAL: D8PSK now uses high-throughput ARQ window (=16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isHighThroughputOFDMMode() previously rejected anything that wasn't DQPSK, so the new D8PSK R1/2+ rates fell back to the default OFDM window=8. Extending the predicate to D8PSK R1/2/R2/3/R3/4 puts the new rates on the same window=16 selective-repeat track that DQPSK high-rate modes already use. isSpeculativeHighRateOFDM() also now recognizes D8PSK R2/3 and R3/4 as speculative — so window=16 applies only on near-AWGN, falling back to window=8 in real fading. R1/2 (the dependable D8PSK win) is non-speculative and gets window=16 unconditionally when the D8PSK gate fires (SNR>=10 fading<0.65). Verified: - ctest 35/35 (test_connection_policy padding-policy assertion flipped: D8PSK R2/3 now fires the same partial-burst padding policy as DQPSK R2/3, which is the right behavior post-promotion) - cli_simulator 5KB file transfer SNR=18 good fading auto-rate: PASS, window=16 D8PSK R1/2 in initial mode log - cli_simulator SNR=15 good fading R1/4 documented baseline: PASS Combined throughput estimate on the experimental branch vs main: Good fading SNR=15: ~2.3 kbps → ~3.4 kbps (D8PSK R1/2 + window=16) Good fading SNR=20: ~3.4 kbps → ~5.0 kbps (D8PSK R2/3 + window=16) AWGN SNR=27: ~3.9 kbps → ~5.9 kbps (D8PSK R3/4 + window=16) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/protocol/connection_policy.hpp | 24 +++++++++++++++++------- tests/test_connection_policy.cpp | 11 +++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/protocol/connection_policy.hpp b/src/protocol/connection_policy.hpp index 0e26ecf..f62be74 100644 --- a/src/protocol/connection_policy.hpp +++ b/src/protocol/connection_policy.hpp @@ -76,17 +76,27 @@ inline bool isHighThroughputOFDM(float fading_index, float snr_db) { } inline bool isHighThroughputOFDMMode(Modulation mod, CodeRate rate) { - if (mod != Modulation::DQPSK) { - return false; + // High-throughput predicate gates window=16 selective-repeat + // (vs window=8 default). DQPSK at R1/2+ uses bigger window because + // fading correlation across an 8-frame burst is tolerable; D8PSK + // gets the same treatment because the 2026-05-04 D8PSK gate only + // fires when the channel is good enough to support it (SNR>=10 + // fading<0.65 minimum), which is the same precondition the larger + // window assumes. + if (mod == Modulation::DQPSK || mod == Modulation::D8PSK) { + return rate == CodeRate::R1_2 || + rate == CodeRate::R2_3 || + rate == CodeRate::R3_4; } - return rate == CodeRate::R1_2 || - rate == CodeRate::R2_3 || - rate == CodeRate::R3_4; + return false; } inline bool isSpeculativeHighRateOFDM(Modulation mod, CodeRate rate) { - return mod == Modulation::DQPSK && - (rate == CodeRate::R2_3 || rate == CodeRate::R3_4); + // R2/3 and R3/4 are speculative (window=16 only on near-AWGN); + // R1/2 is non-speculative (window=16 always when fading channel + // is good). Both DQPSK and D8PSK follow the same logic. + const bool risky_rate = (rate == CodeRate::R2_3 || rate == CodeRate::R3_4); + return risky_rate && (mod == Modulation::DQPSK || mod == Modulation::D8PSK); } inline size_t ofdmWindowSize(Modulation mod, CodeRate rate, bool near_awgn_ofdm) { diff --git a/tests/test_connection_policy.cpp b/tests/test_connection_policy.cpp index cb1c8ec..a9e5258 100644 --- a/tests/test_connection_policy.cpp +++ b/tests/test_connection_policy.cpp @@ -194,8 +194,15 @@ void test_ofdm_profile_selection() { "near-AWGN high-rate burst should not be padded"); CHECK(!shouldPadHighRateFadingBurst(Modulation::DQPSK, CodeRate::R1_2, false, 7), "R1/2 fading burst should not use speculative high-rate padding"); - CHECK(!shouldPadHighRateFadingBurst(Modulation::D8PSK, CodeRate::R2_3, false, 7), - "non-DQPSK high-rate burst should not use padding policy"); + // After 2026-05-04 D8PSK ladder re-enable, D8PSK R2/3 is now a + // speculative high-rate OFDM mode (same window/padding policy as + // DQPSK R2/3). Padding fires for partial high-rate fading bursts. + CHECK(shouldPadHighRateFadingBurst(Modulation::D8PSK, CodeRate::R2_3, false, 7), + "D8PSK R2/3 fading partial burst should pad like DQPSK R2/3"); + CHECK(!shouldPadHighRateFadingBurst(Modulation::D8PSK, CodeRate::R1_2, false, 7), + "D8PSK R1/2 is high-throughput non-speculative, no padding"); + CHECK(!shouldPadHighRateFadingBurst(Modulation::QPSK, CodeRate::R2_3, false, 7), + "non-(DQPSK/D8PSK) high-rate burst should not use padding policy"); CHECK(ofdmAckBatchSize(true) == 0, "near-AWGN ACK batch disabled"); CHECK(ofdmAckBatchSize(false) == 0, "fading ACK batch sentinel"); From 615f6c57a0ce2974e7cc18ead5ed632eec9c802e Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 12:28:03 -0400 Subject: [PATCH 04/13] =?UTF-8?q?docs:=20throughput=20push=20results=20?= =?UTF-8?q?=E2=80=94=20D8PSK=20ladder=20+=20window=3D16?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captures the experimental branch's audit + execution + measurement. Bottom line: 10 kbps in good fading is unreachable per Codex Shannon- ceiling math. Real wins delivered: - D8PSK R1/2 in good fading SNR>=10: +47 % over DQPSK R1/2 - D8PSK R2/3 in clean conditions: +47 % over DQPSK R2/3 - D8PSK R3/4 on AWGN SNR>=24: +47 % over DQPSK R3/4 - ARQ window=16 extended to D8PSK rates Combined: ~3.4 kbps in typical good fading SNR=15 (was ~2.3 kbps), ~5.0 kbps in clean conditions SNR=20 (was ~3.4 kbps). Documents three Codex audit passes, the sweep that picked the gate thresholds, and the file-transfer stress test that tightened R2/3. Lists the deferred levers (16-QAM, larger LDPC, HARQ-IR, per- subcarrier bit loading) for the next throughput attempt. Branch: experimental/throughput-push (not main). Awaits OTA validation before merging. Co-Authored-By: Claude Opus 4.7 (1M context) Co-Authored-By: Codex (audit, 3 passes) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 133 +++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 docs/THROUGHPUT_PUSH_2026-05-04.md diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md new file mode 100644 index 0000000..5b68f53 --- /dev/null +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -0,0 +1,133 @@ +# Throughput push — experimental/throughput-push branch (2026-05-04) + +User asked: "can we go the highest possible towards 10 kbps in good +fading conditions?" Worked autonomously on a branch with three Codex +audit passes. Outcome: real wins, but **10 kbps in good fading is +not physically achievable** with this PHY shape. Honest ceiling is +~5-6 kbps in good fading, ~6 kbps on AWGN. + +## Codex's brutal answer (audit pass 1) + +10 kbps in good fading SNR=15 is unreachable. +- Shannon capacity for B=2.8 kHz, SNR=15 dB → C ≈ 14 kbps **only for + flat AWGN**. +- After ~3 dB coding/equalization gap → ~11.4 kbps. +- After pilots (~10-15%), CP (~10%), LTS (5%), ARQ overhead (~15%), + retx margin → practical ceiling well below 10 kbps. +- Real-world VARA HF "8.5 kbps theoretical" is essentially never seen + on actual HF; per N1CLC user logs and VARA-MODEM forum, real + median is ~900 bps with peaks around 2-4 kbps. + +What ProjectUltra was already doing well: +- 59 carriers in 2.8 kHz (not 30 — that's a stale comment in the + default constructor for an unused code path). +- Production cp_mode = MEDIUM = 96 samples (not 256). +- LDPC 648-bit codewords with HARQ chase combining. +- Selective-repeat ARQ with window=16 on high-throughput rates. +- Burst interleaver with 8-frame groups. + +## What we shipped on this branch + +Three commits on `experimental/throughput-push`: + +### 1. `94a4ae9` — D8PSK gate re-enabled +The "D8PSK fails on any fading" comment in +`waveform_selection.hpp` was stale (predates the 2026-03-15 CPE +correction + per-symbol pilot tracking). Sweeps confirmed D8PSK now +works in fading: + +| Mode/Rate | SNR=8 | SNR=10 | SNR=12 | SNR=15 | SNR=20 | +|---|---|---|---|---|---| +| D8PSK R1/2 good | FAIL | PASS, 4 retx | PASS, 2 retx | **PASS, 0 retx** | PASS, 0 retx | +| D8PSK R2/3 good | — | PASS, 28 retx | PASS, 45 retx | PASS, 0 retx | PASS, 1 retx | +| D8PSK R3/4 good | — | — | — | — | PASS, 6 retx (border) | + +Three-tier gate added: +- D8PSK R3/4: AWGN-only (fading<0.15) AND SNR>=24 +- D8PSK R2/3: SNR>=15 fading<0.10, OR SNR>=18 fading<0.15, OR SNR>=20 fading<0.65 +- D8PSK R1/2: SNR>=10 fading<0.65 (the dependable +47 % win) + +### 2. `3e70a84` — D8PSK R2/3 thresholds tightened after stress test +5 KB file transfer at SNR=18 good fading with the looser R2/3 gate +showed 31 retx for 28 frames — much worse than the 7-message sweep +suggested. R2/3 floor in real fading bumped to SNR>=20. + +### 3. `1071328` — D8PSK now uses window=16 ARQ +`isHighThroughputOFDMMode()` previously rejected anything that wasn't +DQPSK. Extended to D8PSK, so the new high rates get the same +selective-repeat window=16 that DQPSK high rates use. Speculative +flag (window=16 only on near-AWGN) extended to D8PSK R2/3 and R3/4; +D8PSK R1/2 stays non-speculative (window=16 unconditionally inside +its gate). + +## Throughput before / after + +| Channel | Before (main) | After (this branch) | Delta | +|---|---|---|---| +| AWGN SNR=27 | DQPSK R3/4, ~3.9 kbps | D8PSK R3/4, ~5.9 kbps | +51 % | +| Good fading SNR=20 | DQPSK R2/3, ~3.4 kbps | D8PSK R2/3, ~5.0 kbps | +47 % | +| Good fading SNR=15 | DQPSK R1/2, ~2.3 kbps | D8PSK R1/2 win=16, ~3.4 kbps | +47 % | +| Good fading SNR=12 | DQPSK R1/4, ~1.15 kbps | D8PSK R1/2, ~3.4 kbps | +196 % | +| Moderate fading any | unchanged DQPSK | unchanged | 0 % | + +## Tonight's ceiling vs the 10 kbps ask + +| Operating point | Best-case ceiling | Vs 10 kbps target | +|---|---|---| +| AWGN SNR=27+ | ~5.9 kbps | 59 % of target | +| Good fading SNR=20 | ~5.0 kbps | 50 % of target | +| Good fading SNR=15 | ~3.4 kbps | 34 % of target | + +**Not 10 kbps.** That number wasn't reachable; Shannon/coding gap +math says it can't be reached on a fading channel without going +to 64-QAM (which collapses below SNR=20) or to multi-week research- +level changes (per-subcarrier bit loading, HARQ-IR, larger LDPC, +turbo equalization). + +## Levers Codex audited but deferred + +Tonight-deferred (need multi-day work): + +- **16-QAM / 32-QAM** — disabled on `OFDM_CHIRP` today. Would need + coherent path validation, much higher SNR floor (16-QAM cliff ~12 dB + above DQPSK in fading per literature), and OFDM_COX is a better + home for high-order QAM. +- **Larger LDPC codewords (1944-bit)** — IEEE 802.11n long matrices + are in `src/fec/ldpc_802_11n.hpp` already; integration into the + frame format needs work because 648 is baked into the frame header + layout. +- **HARQ-IR (incremental redundancy)** — Backlog #7. Would need rate- + compatible LDPC puncturing, ARQ retx packet semantics extended. +- **Per-subcarrier bit loading** — Backlog #5. Highest theoretical + win (2-4 dB) but needs continuous SNR feedback and coordinated + encoder/decoder bit-mapping. +- **Continuous pilots** — Backlog #6. 1-2 dB on faster fading. + +## Reviewer notes + +- Branch protects main: experimental gate changes are isolated. +- All three commits keep `ctest 35/35` green. +- Documented baselines (CLAUDE.md) still pass: SNR=15 good R1/4, + SNR=20 good R2/3, SNR=20 AWGN R3/4. +- 5 KB file transfer SNR=18 good fading auto-rate: PASS. +- This branch is suitable for OTA testing on real radios. If field + reports show D8PSK R1/2 retx storms in real fading, revert to + `94a4ae9~1` or tighten the SNR floor in waveform_selection.hpp. + +## Where to take this next + +1. **OTA validation** — replay 21-frame fixture at the new D8PSK + gate over a real radio + WebSDR loopback. Confirm SNR≥10 → 0 retx + on a live HF channel. +2. **Move QAM modes to OFDM_COX** — Schmidl-Cox sync + coherent demod + is a better fit for 16/32-QAM than the chirp + LTS path. That's + where the next 1.5-2× theoretical win lives, but it's research- + level, not autonomous-tonight. +3. **Per-subcarrier bit loading** is the highest-leverage research + item still on the table. Backlog item #5 in + `docs/MODEM_IMPROVEMENT_BACKLOG.md`. + +If field reports support the gate: cherry-pick to main as +`feat: D8PSK ladder + window=16` after another Codex audit. +Otherwise the branch sits as documented experimental data for the +next throughput attempt. From 96bb2b78a0f0d4f450f5b7e34e687f8f10b8d9e5 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 15:36:49 -0400 Subject: [PATCH 05/13] =?UTF-8?q?HARDWARE-VALIDATED:=20tighten=20D8PSK=20g?= =?UTF-8?q?ates=20to=20real=20Mac=E2=86=94Pi5=20measurements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mac↔Pi5 audio loopback A/B with synthetic-channel injection (5KB file transfers, R1/2 forced, good fading) revealed a 10 dB sim-vs- hardware gap on D8PSK: Forced D8PSK vs DQPSK R1/2 at SNR=15-20 good fading: SNR=15: DQPSK 1078 bps (2 retx) > D8PSK 728 bps (5-40 retx) SNR=18: DQPSK 1234 bps (0 retx) > D8PSK 641 bps (38 retx) SNR=20: DQPSK 1247 bps (0 retx) < D8PSK 1595 bps (0 retx) ← +28 % win Watterson simulator showed D8PSK R1/2 working cleanly at SNR=10 because Watterson doesn't model: soundcard ADC/DAC quantization, audio AGC residual, anti-aliasing filter response, real audio-chain phase noise. D8PSK's 8-phase decision is far more sensitive to those than DQPSK's 4-phase. Gates tightened based on real measurements: D8PSK R1/2: was SNR>=10 fading<0.65; now SNR>=20 fading<0.65 D8PSK R2/3: was SNR>=15-20 fading<0.65 (multi-tier); now AWGN only — fading<0.10 SNR>=18 OR fading<0.15 SNR>=22 D8PSK R3/4: unchanged — AWGN-only SNR>=24 DQPSK fallback for everything below the new D8PSK floor Adaptive promotion test: SNR=20 auto-rate D8PSK R1/2 at start; adaptive only attempts R2/3 when fading drops below 0.10 (the post-promotion measurement window) so the 486-bps "promotion- collapsed" failure path observed earlier no longer triggers. Also added --mod option to tools/run_hw_test.sh so future hardware A/B comparisons can force modulation directly. Verified: - ctest 35/35 - Auto-rate SNR=15 good fading 5KB: PASS, 904 bps (DQPSK R1/2) - Auto-rate SNR=20 good fading 5KB: PASS, 1130 bps (D8PSK R1/2) - Hardware smoke 4/4: PASS Honest position on the original "10 kbps in good fading" ask: - Theoretical sim ceiling: ~5 kbps with D8PSK R2/3 + window=16 - Real hardware ceiling: ~1.6 kbps payload (D8PSK R1/2 SNR=20 forced) - 10 kbps is physically unreachable on this PHY shape; this branch delivers a real ~28 % hardware-measured win at SNR>=20 good fading, no regression on documented baselines. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/protocol/waveform_selection.hpp | 38 ++++++++++++++++------------- tests/test_waveform_policy.cpp | 25 ++++++++++--------- tools/run_hw_test.sh | 8 ++++-- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/protocol/waveform_selection.hpp b/src/protocol/waveform_selection.hpp index d975fe2..82b6baa 100644 --- a/src/protocol/waveform_selection.hpp +++ b/src/protocol/waveform_selection.hpp @@ -209,28 +209,32 @@ inline void recommendDataMode(float snr_db, WaveformMode waveform, return; } - // D8PSK R2/3 — needs more margin than the 7-message sweep suggested. - // 5KB file-transfer test at SNR=18 good fading (~0.32) showed 31 retx - // for 28 frames (excessive). Three tiers based on channel quality: - // - Clean AWGN (fading<0.10) at SNR>=15: R2/3 works (sweeps clean, - // adaptive_upgrade tests verify this is reachable) - // - Slight residual fading (<0.15) at SNR>=18: R2/3 ok - // - Real good fading (<0.65) at SNR>=20: tight but reliable per - // the file-transfer stress test - const bool d8psk_r23_clean = (fading_index < 0.10f && snr_db >= 15.0f); - const bool d8psk_r23_awgn = (fading_index < 0.15f && snr_db >= 18.0f); - const bool d8psk_r23_fading = (fading_index < 0.65f && snr_db >= 20.0f); - if (d8psk_r23_clean || d8psk_r23_awgn || d8psk_r23_fading) { + // D8PSK R2/3 — gated to AWGN-only after Mac↔Pi5 hardware A/B + // showed the simulator's "good fading" promotion path destabilizes + // on real audio. SNR=20 good fading auto-rate: adaptive promoted + // to D8PSK R2/3, hit 15 retx, dropped throughput from 1595 bps + // (forced R1/2) down to 486 bps (auto with R2/3 promotion attempt). + // Restricting R2/3 to fading<0.15 keeps the adaptive ladder from + // chasing R2/3 on the rougher channels where it reliably fails. + const bool d8psk_r23_clean = (fading_index < 0.10f && snr_db >= 18.0f); + const bool d8psk_r23_awgn = (fading_index < 0.15f && snr_db >= 22.0f); + if (d8psk_r23_clean || d8psk_r23_awgn) { mod = Modulation::D8PSK; rate = CodeRate::R2_3; return; } - // D8PSK R1/2 — the dependable D8PSK win. Cliff at SNR=8 (sweep - // FAIL); 0 retx at SNR=15 good fading. Gives ~3.4 kbps usable - // (vs DQPSK R1/2 ~2.3 kbps at the same conditions, +47%) without - // the file-transfer-stress retx storm that R2/3 hits below SNR=20. - if (fading_index < 0.65f && snr_db >= 10.0f) { + // D8PSK R1/2 — gated on the hardware-measured cliff, not the + // simulator one. Mac↔Pi5 audio loopback A/B with synthetic-channel + // injection (5 KB file, R1/2, good fading) showed: + // SNR=15 good: DQPSK 1078 bps (2 retx) > D8PSK 728 bps (5-40 retx) + // SNR=18 good: DQPSK 1234 bps (0 retx) > D8PSK 641 bps (38 retx) + // SNR=20 good: DQPSK 1247 bps (0 retx) < D8PSK 1595 bps (0 retx) ← +28% + // The simulator's "SNR>=10 works" came from Watterson without + // soundcard quantization / AGC residual / audio chain phase noise + // — D8PSK's 8-phase decision is far more sensitive to those than + // DQPSK's 4-phase. Real cliff is SNR=20 in good fading. + if (fading_index < 0.65f && snr_db >= 20.0f) { mod = Modulation::D8PSK; rate = CodeRate::R1_2; return; diff --git a/tests/test_waveform_policy.cpp b/tests/test_waveform_policy.cpp index 2cad533..de5b421 100644 --- a/tests/test_waveform_policy.cpp +++ b/tests/test_waveform_policy.cpp @@ -105,24 +105,27 @@ void test_data_mode_policy() { CHECK(mod == Modulation::D8PSK, "high-SNR AWGN should promote to D8PSK"); CHECK(rate == CodeRate::R3_4, "near-AWGN SNR27 should use R3/4"); - // SNR=20 good fading: D8PSK R2/3 — the throughput sweet spot + // SNR=20 good fading: hardware A/B forced D8PSK R2/3 → adaptive + // promotion path collapsed. R2/3 is now AWGN-only (fading<0.15); + // good fading SNR=20 stays D8PSK R1/2. recommendDataMode(20.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); CHECK(mod == Modulation::D8PSK, "good-fading SNR20 should be D8PSK"); - CHECK(rate == CodeRate::R2_3, "good-fading SNR20 D8PSK uses R2/3"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR20 D8PSK uses R1/2 (R2/3 is AWGN-only on hw)"); - // SNR=18 good fading: file-transfer sweep showed R2/3 has too many - // retx here, so this drops to D8PSK R1/2 (still 1.5× DQPSK R1/2) + // SNR=18 good fading: hardware A/B (Mac↔Pi5 5KB R1/2 inject good) + // showed D8PSK 641 bps with 38 retx vs DQPSK 1234 bps 0 retx. + // D8PSK R1/2 is now gated at SNR>=20 in fading, so SNR=18 good + // falls back to DQPSK R1/2. recommendDataMode(18.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::D8PSK, "good-fading SNR18 should be D8PSK"); - CHECK(rate == CodeRate::R1_2, "good-fading SNR18 D8PSK uses R1/2 (R2/3 needs SNR>=20)"); + CHECK(mod == Modulation::DQPSK, "good-fading SNR18 stays DQPSK (D8PSK cliff at SNR=20 on hw)"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR18 should use R1/2 with DQPSK"); - // SNR=12 good fading: D8PSK R1/2 + // SNR=12 good fading: well below the D8PSK hardware cliff recommendDataMode(12.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::D8PSK, "good-fading SNR12 should promote to D8PSK"); - CHECK(rate == CodeRate::R1_2, "good-fading SNR12 D8PSK should use R1/2 (cliff at SNR=8)"); + CHECK(mod == Modulation::DQPSK, "good-fading SNR12 stays DQPSK"); + CHECK(rate == CodeRate::R1_4, "DQPSK fallback at SNR=12 uses R1/4 (selectOFDMCodeRate)"); - // SNR=9 good fading: below D8PSK floor (cliff at SNR=8 from sweeps), - // falls back to DQPSK rate ladder. + // SNR=9 good fading: also below floor. recommendDataMode(9.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); CHECK(mod == Modulation::DQPSK, "below-floor SNR=9 falls back to DQPSK"); CHECK(rate == CodeRate::R1_4, "DQPSK fallback at SNR=9 uses R1/4"); diff --git a/tools/run_hw_test.sh b/tools/run_hw_test.sh index a4b7b30..3ddb374 100755 --- a/tools/run_hw_test.sh +++ b/tools/run_hw_test.sh @@ -53,6 +53,7 @@ MAC_AUDIO_IN=${MAC_AUDIO_IN:-Sound Blaster Play! 3} SNR=${SNR:-20} CHANNEL=${CHANNEL:-awgn} # awgn|good|moderate|poor|flutter RATE=${RATE:-r1_4} # auto|r1_4|r1_2|r2_3|r3_4 +MOD=${MOD:-auto} # auto|dqpsk|qpsk|d8psk|dbpsk|bpsk|qam16|qam32|qam64 FILE_SIZE=${FILE_SIZE:-} # empty = message test INJECT_CHANNEL=${INJECT_CHANNEL:-0} # 1 = synthetic HF channel on each TX INJECT_GAIN=${INJECT_GAIN:-} # optional post-injection gain/headroom @@ -67,6 +68,7 @@ while [[ $# -gt 0 ]]; do --snr) SNR="$2"; shift 2;; --channel) CHANNEL="$2"; shift 2;; --rate) RATE="$2"; shift 2;; + --mod) MOD="$2"; shift 2;; --file) FILE_SIZE="$2"; shift 2;; --inject) INJECT_CHANNEL=1; shift;; --no-inject) INJECT_CHANNEL=0; shift;; @@ -113,6 +115,8 @@ FILE_FLAG="" RATE_FLAG="" [[ "$RATE" != "auto" && "$RATE" != "AUTO" ]] && RATE_FLAG="--rate $RATE" +MOD_FLAG="" +[[ "$MOD" != "auto" && "$MOD" != "AUTO" ]] && MOD_FLAG="--mod $MOD" CHANNEL_FLAG="" [[ "$CHANNEL" != "awgn" ]] && CHANNEL_FLAG="--channel $CHANNEL" @@ -149,7 +153,7 @@ PI_CMD="cd $PI_REPO && \ rm -f /tmp/ultra_B.log; \ nohup ./build/cli_simulator --role B \ $PI_DEVS \ - --snr $SNR $RATE_FLAG $CHANNEL_FLAG $INJECT_FLAG $INJECT_GAIN_FLAG \ + --snr $SNR $RATE_FLAG $MOD_FLAG $CHANNEL_FLAG $INJECT_FLAG $INJECT_GAIN_FLAG \ --idle-seconds $B_IDLE_SECONDS \ $EXTRA_CLI_ARGS \ > /tmp/ultra_B.log 2>&1 & \ @@ -164,7 +168,7 @@ echo "[2/3] Running station A locally..." set +e "$MAC_BIN" --role A \ ${MAC_DEVS_ARR[@]+"${MAC_DEVS_ARR[@]}"} \ - --snr "$SNR" $RATE_FLAG $CHANNEL_FLAG $INJECT_FLAG $INJECT_GAIN_FLAG $FILE_FLAG \ + --snr "$SNR" $RATE_FLAG $MOD_FLAG $CHANNEL_FLAG $INJECT_FLAG $INJECT_GAIN_FLAG $FILE_FLAG \ $EXTRA_CLI_ARGS \ > "$LOG_DIR/A.log" 2>&1 A_EXIT=$? From 74ea15678601dc40058b31d280edd3ac4d7b79bd Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 15:37:17 -0400 Subject: [PATCH 06/13] docs: hardware-validation results for throughput push MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5KB file-transfer A/B on Mac↔Pi5 audio loopback at SNR=15/18/20 good fading showed the simulator overpromised D8PSK by 10 dB. Real cliff is SNR=20 in fading; below that, DQPSK is faster than D8PSK because the 8-phase modulation eats more noise from soundcard quantization + audio-chain phase jitter than the LDPC code can recover. Hardware-validated D8PSK R1/2 win at SNR>=20 good fading: +28 % (1247 → 1595 bps payload, both 0 retx). 10 kbps remains unreachable. On hardware the practical ceiling is ~1.6 kbps payload at the operating points we care about. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md index 5b68f53..1d7514a 100644 --- a/docs/THROUGHPUT_PUSH_2026-05-04.md +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -103,6 +103,53 @@ Tonight-deferred (need multi-day work): encoder/decoder bit-mapping. - **Continuous pilots** — Backlog #6. 1-2 dB on faster fading. +## Hardware validation (Mac↔Pi5 audio loopback) — 2026-05-04 15:00 + +Sweep done after the simulator-only commits. Mac (Sound Blaster +Play! 3 USB) ↔ Pi5 (USB Audio Device, calibrated per CLAUDE.md) +with synthetic-channel injection at the documented gain (0.70). + +**Forced-mod 5KB file transfer payload throughput, good fading:** + +| SNR | DQPSK R1/2 | D8PSK R1/2 | Winner | +|---:|---:|---:|---| +| 15 | 1078 bps (2 retx) | 728 bps (5-40 retx) | **DQPSK** | +| 18 | 1234 bps (0 retx) | 641 bps (38 retx) | **DQPSK** | +| 20 | 1247 bps (0 retx) | **1595 bps (0 retx)** | **D8PSK +28 %** | + +The hardware cliff is **between SNR=18 and SNR=20**, even though the +simulator's Watterson-based sweep showed D8PSK working cleanly at +SNR=10. The 10 dB sim-vs-hardware gap comes from soundcard +quantization, AGC residual, and audio-chain phase noise that +Watterson doesn't model. D8PSK's 8-phase decision is dramatically +more sensitive to phase noise than DQPSK's 4-phase. + +**Gate tightening that followed the hardware measurements** (commit +96bb2b7): +- D8PSK R1/2: simulator floor SNR=10 → hardware floor SNR=20 +- D8PSK R2/3: was multi-tier good-fading; now AWGN-only +- DQPSK keeps everything below + +After tightening, auto-rate at SNR=20 good correctly picks D8PSK +R1/2 and delivers 1130 bps with one adaptive downgrade event over +the 36-second test (vs forced D8PSK R1/2 at 1595 bps without the +adaptive jitter). At SNR=15 good, auto-rate falls back to DQPSK +R1/2 at 904 bps — same as main-branch behavior, no regression. + +**Honest 10 kbps comparison:** + +| Goal | Reachable? | +|---|---| +| 10 kbps in good fading SNR=15 | No. Ceiling ~1.0 kbps payload on hardware. | +| 10 kbps in good fading SNR=20 | No. Ceiling ~1.6 kbps payload on hardware. | +| 10 kbps in clean AWGN | No. Theoretical max ~6 kbps (D8PSK R3/4); hardware cliff probably ~3-4 kbps. | + +Production HF data modems (VARA / Pactor) advertise 8.5-10.5 kbps +"theoretical max" but real-world median per published user logs is +0.9-2 kbps — same neighborhood ProjectUltra lives in today. The ++28 % hardware win at SNR=20 is meaningful but doesn't change the +order-of-magnitude story. + ## Reviewer notes - Branch protects main: experimental gate changes are isolated. From b42d6a8086d830a878be2df6ded8e94f2f84b413 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 16:00:11 -0400 Subject: [PATCH 07/13] docs: window-size + higher-rate investigation after pushback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User pushed back on apparent throughput regression. Investigation findings, all from real Mac↔Pi5 audio loopback: (a) "We had 2000+ bps DQPSK" was correct — for R2/3 and R3/4, not R1/2 (the fading-fallback rate). DQPSK R2/3 good SNR=20: 1422 bps (sim said 1837) DQPSK R3/4 AWGN SNR=25: 2058 bps (sim said 2508) (b) "Window 16 doesn't work on fading" — not borne out by hardware A/B at SNR=15 good/moderate. Window=16 actually wins by 12-28 % over window=8 across every tested condition. The 8-frame burst interleaver still aligns: window=16 = 2 burst groups in series. Window=16 restored. (c) D8PSK R2/3 on AWGN SNR=22+ is the real win discovered tonight: 2382-2410 bps payload, 0 retx, beats DQPSK R3/4 (2058) at the same channel. Auto-rate triggers it correctly at SNR=22 AWGN delivering 2406 bps with 0 retx. (d) D8PSK R3/4 ceiling on hardware: ~2620 bps at SNR=30 AWGN — diminishing returns vs R2/3. Updated THROUGHPUT_PUSH_2026-05-04.md with these measurements and the auto-rate validation. Honest 10 kbps answer remains: no, but the 2 kbps target is achievable today on D8PSK R2/3 in AWGN. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 50 ++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md index 1d7514a..101c9b7 100644 --- a/docs/THROUGHPUT_PUSH_2026-05-04.md +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -136,13 +136,57 @@ the 36-second test (vs forced D8PSK R1/2 at 1595 bps without the adaptive jitter). At SNR=15 good, auto-rate falls back to DQPSK R1/2 at 904 bps — same as main-branch behavior, no regression. +## Higher-rate hardware ceiling sweep (post-pushback) + +User pushed back: "we had over 2000 bps with DQPSK." That recall was +correct — for **R2/3 and R3/4**, not R1/2 (which is the fading- +fallback). Forced-mod hardware sweep at higher rates: + +| Mode/Rate | Channel | SNR | Throughput | Retx | +|---|---|---:|---:|---:| +| DQPSK R3/4 | AWGN | 25 | 2058 bps | 0 | +| DQPSK R2/3 | good | 20 | 1422 bps | 0 | +| **D8PSK R2/3** | **AWGN** | **22** | **2382 bps** | 0 | +| **D8PSK R2/3** | **AWGN** | **25** | **2410 bps** | 0 | +| D8PSK R3/4 | AWGN | 27 | 2566 bps | 0 | +| D8PSK R3/4 | AWGN | 30 | 2620 bps | 0 | + +**Auto-rate at SNR=22 AWGN: D8PSK R2/3 selected, 2406 bps, 0 retx.** +That hits the "2000+ bps" target the user remembered — and beats +DQPSK R3/4 (2058 bps) at the same channel quality. + +## Window-size investigation (was window=16 the wrong call?) + +User flagged: "window 16 doesn't work on fading; we had window 6/8 +optimal aligned to burst groups." Investigation: window=16 was +introduced 2026-05-01 (commit f07208c, "Improve OFDM streaming +file throughput recovery") with kHighThroughputOFDMWindowFrames=16 +for DQPSK R1/2+. Burst interleaver group is 8 frames; window=16 = +2 burst groups, still aligned. + +Hardware A/B with kHighThroughputOFDMWindowFrames temporarily +forced to 8: + +| Test | window=8 | window=16 | Winner | +|---|---:|---:|---| +| DQPSK R1/2 SNR=15 good | 1077 bps (2 retx) | 1078 bps (2 retx) | tied | +| DQPSK R1/2 SNR=15 moderate | 1103 bps (0 retx) | 1234 bps (0 retx) | window=16 +12 % | +| DQPSK R1/2 SNR=20 good | 1119 bps (0 retx) | 1247 bps (0 retx) | window=16 +12 % | +| D8PSK R1/2 SNR=20 good | 1247 bps (1 retx) | 1595 bps (0 retx) | window=16 +28 % | + +Window=16 wins in every measured condition on this hardware rig. +The user's intuition that window=16 hurts fading wasn't borne out; +window=16 works because the burst interleaver still groups in 8s, +the second burst group just follows immediately. Restored. + **Honest 10 kbps comparison:** | Goal | Reachable? | |---|---| -| 10 kbps in good fading SNR=15 | No. Ceiling ~1.0 kbps payload on hardware. | -| 10 kbps in good fading SNR=20 | No. Ceiling ~1.6 kbps payload on hardware. | -| 10 kbps in clean AWGN | No. Theoretical max ~6 kbps (D8PSK R3/4); hardware cliff probably ~3-4 kbps. | +| 10 kbps in good fading SNR=15 | No. Hardware ceiling ~1.0 kbps. | +| 10 kbps in good fading SNR=20 | No. Hardware ceiling ~1.6 kbps. | +| 10 kbps in clean AWGN | No. Hardware ceiling ~2.6 kbps (D8PSK R3/4 SNR=30). | +| 2 kbps target reached? | **Yes — D8PSK R2/3 AWGN SNR=22 = 2406 bps.** | Production HF data modems (VARA / Pactor) advertise 8.5-10.5 kbps "theoretical max" but real-world median per published user logs is From 933876db98088e218f2e3271fc5cab268a371313 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 16:09:26 -0400 Subject: [PATCH 08/13] docs: direct main vs experimental hardware A/B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User pushed: "I feel main has more speed." Settled with data — rebuilt main on Pi5, ran identical conditions to the experimental- branch tests: Test MAIN EXPERIMENTAL Δ DQPSK R1/2 SNR=15 good (forced) 1073 bps 1077 bps tied DQPSK R2/3 SNR=20 good (forced) 1415 bps 1422 bps tied DQPSK R3/4 SNR=25 AWGN (forced) 2057 bps 2058 bps tied AUTO SNR=22 AWGN 1837 bps 2406 bps +31 % Forced-mode tests are tied — the experimental commits don't change the modulator or LDPC layer, only the rate-ladder gates. Experimental wins where the gate fires (auto-rate clean conditions): main picks DQPSK R2/3, experimental picks D8PSK R2/3, same code rate but 1.5× bits-per-symbol = +31 % real throughput, 0 retx either branch. No regression on any tested point. Main is not faster than this branch on this hardware. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md index 101c9b7..7730398 100644 --- a/docs/THROUGHPUT_PUSH_2026-05-04.md +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -194,6 +194,36 @@ Production HF data modems (VARA / Pactor) advertise 8.5-10.5 kbps +28 % hardware win at SNR=20 is meaningful but doesn't change the order-of-magnitude story. +## Direct main vs experimental A/B on identical hardware + +User insisted: "I feel our main has more speed." Data settles it. + +Mac↔Pi5 audio loopback, 5 KB file transfer, calibrated injection, +same audio cabling, same SNR, same channel, same `run_hw_test.sh` +invocation. Pi5 rebuilt cli_simulator on each branch before the +corresponding test: + +| Test | MAIN bps | EXPERIMENTAL bps | Δ | +|---|---:|---:|---| +| DQPSK R1/2 SNR=15 good (forced) | 1073 | 1077 | tied | +| DQPSK R2/3 SNR=20 good (forced) | 1415 | 1422 | tied | +| DQPSK R3/4 SNR=25 AWGN (forced) | 2057 | 2058 | tied | +| **auto SNR=22 AWGN** | **1837** | **2406** | **+31 %** | + +**Forced-mode tests are essentially identical** — same encoder / +decoder code; the experimental commits don't change the modulator +or LDPC layer. + +**Experimental wins only where the new D8PSK gate fires.** At auto- +rate SNR=22 AWGN, main picks DQPSK R2/3 (1837 bps); experimental +picks D8PSK R2/3 (2406 bps). Same code rate, 1.5× bits per symbol +(D8PSK 3 bits vs DQPSK 2 bits) = +31 % real throughput, 0 retx on +either branch. + +**No regression on main vs experimental anywhere measured.** The +"I feel main has more speed" intuition didn't hold up against +direct hardware A/B. + ## Reviewer notes - Branch protects main: experimental gate changes are isolated. From 9008011f92cbb73a8db2304e964a29ab3cf642b3 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 16:41:56 -0400 Subject: [PATCH 09/13] =?UTF-8?q?docs:=20throughput=20investigation=20?= =?UTF-8?q?=E2=80=94=20CW=3D8=20is=20the=20missing=20lever=20(+50=20%)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User pushed: "1077 bps DQPSK R1/2 SNR=15 good is too low — investigate." Hardware sweep on Mac↔Pi5 with --cw-count 8 (existing CLI opt-in from commit 6cc77ea): Mode/Rate Channel SNR cw=4 cw=8 Δ DQPSK R1/2 good 15 1077 bps 1615 bps +50 % DQPSK R1/2 good 12 ~1100 bps 1594 bps +45 % DQPSK R1/2 moderate 15 1234 bps 1594 bps +29 % DQPSK R3/4 AWGN 25 2057 bps 2360 bps +14 % D8PSK R2/3 AWGN 22 2406 bps 2906 bps +21 % D8PSK R3/4 AWGN 27 2620 bps 3127 bps +19 % CW=8 wins everywhere except R2/3 in fading (14 retx vs 0 with cw=4) because longer frames hit fade events more often. R1/2 — the dominant fading operating mode — gets a clean +50 % at SNR=15 with zero retx penalty. The math: 5.3 s SACK-deferral per ARQ window is fixed overhead. CW=8 doubles payload-per-frame, so it amortizes that fixed cost across twice the bytes. The lever has been in --cw-count since 6cc77ea; it just isn't on by default. Initially attempted to make this auto rate-aware in Connection::configureArqForCurrentDataMode (R1/2 → CW=8 in the connection setup) but it tripped the test_connection_adaptive clean-window accumulator's timing model (longer frames need more ticks for 3 clean windows). Reverted the auto-bump; documented as a CLI opt-in. The auto-promote path needs a follow-up to make adaptive window-counting CW-aware before it can ship as default. For the user's stated goal of "more than 1077 bps DQPSK R1/2 SNR=15 good fading": deliverable today via `--cw-count 8` = 1615 bps. Absolute hardware ceiling on this rig is ~3.1 kbps (D8PSK R3/4 AWGN SNR=27 + cw=8); 10 kbps remains unreachable. ctest 35/35. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md index 7730398..7fde1a7 100644 --- a/docs/THROUGHPUT_PUSH_2026-05-04.md +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -224,6 +224,54 @@ either branch. "I feel main has more speed" intuition didn't hold up against direct hardware A/B. +## CW=8 aggregation — the missing lever (post-pushback) + +User: "1077 bps DQPSK R1/2 SNR=15 good is too low — why are we so +low?" Investigation found that `--cw-count 8` (an existing CLI opt- +in from commit `6cc77ea`, "+15-22 % throughput") is genuinely the +biggest single throughput lever and it isn't on by default. + +Hardware sweep on Mac↔Pi5 with `--cw-count 8`: + +| Mode/Rate | Channel | SNR | cw=4 | cw=8 | Δ | +|---|---|---:|---:|---:|---| +| DQPSK R1/2 | good | 12 | ~1100 bps | **1594 bps** | +45 % | +| DQPSK R1/2 | good | 15 | 1077 bps | **1615 bps** | **+50 %** | +| DQPSK R1/2 | moderate | 15 | 1234 bps | 1594 bps | +29 % | +| DQPSK R3/4 | AWGN | 25 | 2057 bps | 2360 bps | +14 % | +| D8PSK R2/3 | AWGN | 22 | 2406 bps | **2906 bps** | +21 % | +| D8PSK R3/4 | AWGN | 27 | 2620 bps | **3127 bps** | +19 % | + +The math: per-window overhead (5.3 s SACK deferral, ACK TX, decode +margin) is FIXED. CW=8 doubles bytes-per-frame, so it amortizes +that fixed cost over twice the payload. Per-frame retransmits +don't increase proportionally for R1/2 (it's robust enough), so +the win is clean. + +Caveat: CW=8 with R2/3 in fading hits 14 retx (was 0) because +longer frames have more fade exposure. R2/3 fading should keep +CW=4. R1/2 (the dominant operating mode) and R3/4 (AWGN-only) get +the win cleanly. + +Tried to auto-promote CW=8 in the connection enter-connected path, +but it broke the `test_connection_adaptive` clean-window accumulator +timing model (longer frames need more ticks before 3 clean windows +accumulate, and the synthetic test only has 3000 ms of budget). +Reverted the auto-promote; CW=8 remains a CLI opt-in via +`--cw-count 8` (in cli_simulator) or via `setFixedFrameCodewords(8)` +on `Connection`. + +**For the user's "1077 bps is too low" pushback**: with +`--cw-count 8` the same DQPSK R1/2 SNR=15 good test goes to +**1615 bps** = +50 %. Absolute hardware ceiling on this rig is +**3.1 kbps** (D8PSK R3/4 AWGN SNR=27 + CW=8). 10 kbps is still +unreachable. + +Open follow-up: make CW count adaptive (e.g. `recommendCWCount(rate, +fading)`) AND update `test_connection_adaptive` to be CW-aware so +the auto-promote can ship as a default. That's a multi-hour task +needing care; deferred to a focused session. + ## Reviewer notes - Branch protects main: experimental gate changes are isolated. From b55b7c4fcdd9a34e49027e5d2d2ef898e7af5667 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 17:06:54 -0400 Subject: [PATCH 10/13] =?UTF-8?q?docs:=20auto-CW-bump=20implementation=20a?= =?UTF-8?q?ttempt=20=E2=80=94=20rolled=20back?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempted to ship CW=8 as the auto-default for R1/2 / R3/4 / R2/3- AWGN via recommendCWCount() helper in connection_policy.hpp + auto-application in Connection::configureArqForCurrentDataMode(). Hit two blockers: 1. Encoder/decoder don't see protocol-side CW changes. When protocol bumped CW=4→8 on connect, StreamingEncoder / StreamingDecoder still had CW=4. Mid-handshake mismatch caused `libc++abi: terminating due to uncaught exception of type std::__1::system_error: mutex lock failed: Invalid argument` during the first burst-flush on hardware (Mac↔Pi5). The modem layer is set up upstream of Connection by ProtocolEngine / cli_simulator before the connection knows the data rate, with no callback path for CW changes downstream. 2. Global kDefaultFixedFrameCodewords = 4 → 8 as a fallback broke 3 ctest suites (ConnectionPolicy, ConnectionAdaptive, FrameV2 subprocess aborted). The constant feeds into frame-format math, fixed-buffer allocations, and adaptive timing models that all assume CW=4 baseline. Both attempts reverted. Branch experimental/throughput-push stays at 9008011 documenting CW=8 as a CLI-opt-in win (--cw-count 8 today gives the +50 % real-hardware throughput on DQPSK R1/2 SNR=15 good fading). Documented the refactor that ships CW=8 as default: a callback path for protocol→modem CW-count change notifications + adaptive- timing-test cw-aware tick budget. ~1-2 days of careful work. ctest 35/35 (post-revert). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/THROUGHPUT_PUSH_2026-05-04.md | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/THROUGHPUT_PUSH_2026-05-04.md b/docs/THROUGHPUT_PUSH_2026-05-04.md index 7fde1a7..aa56717 100644 --- a/docs/THROUGHPUT_PUSH_2026-05-04.md +++ b/docs/THROUGHPUT_PUSH_2026-05-04.md @@ -272,6 +272,44 @@ fading)`) AND update `test_connection_adaptive` to be CW-aware so the auto-promote can ship as a default. That's a multi-hour task needing care; deferred to a focused session. +## Auto-CW-bump implementation attempt (rolled back) + +Tried to ship CW=8 as the default for R1/2 / R3/4 / R2/3-AWGN (the +hardware-validated +50 % win) via `recommendCWCount(mod, rate, +fading, snr)` in `connection_policy.hpp` + auto-application inside +`Connection::configureArqForCurrentDataMode()`. Two blockers landed: + +1. **Encoder/decoder don't see protocol-side CW changes.** When the + protocol bumped CW=4→8 on connect, `StreamingEncoder` / + `StreamingDecoder` still had CW=4. Mid-handshake mismatch caused + `mutex lock failed: Invalid argument` during the first burst-flush + on hardware (Mac↔Pi5). Encoder/decoder need a notification path + from `Connection::configureArqForCurrentDataMode()` — there isn't + one today, the modem layer is set up upstream by `ProtocolEngine` / + `cli_simulator` before the connection knows the data rate. +2. **Global `kDefaultFixedFrameCodewords = 4 → 8`** as a quick + alternative broke 3 ctest suites (`ConnectionPolicy`, + `ConnectionAdaptive`, `FrameV2 — subprocess aborted`). The + constant feeds into frame-format math, fixed-buffer allocations, + and adaptive timing models that all assume CW=4 baseline. + +Both attempts reverted; experimental branch remains at the prior +commit `9008011` documenting CW=8 as a CLI-opt-in win. + +**What it'd take to ship CW=8 as the auto-default**: a refactor +that puts CW-count change on a callback path: + + Connection::configureArqForCurrentDataMode() + → fires cw_count_changed_callback_(int new_cw_count) + → ProtocolEngine routes to ModemAdapter + → ModemAdapter notifies StreamingEncoder + StreamingDecoder + → file_transfer_.setMaxChunkPayload(...) + → adaptive timing model gets CW-aware tick budget + +Roughly 1–2 days of careful work. Tracked as backlog — the +50 % +hardware win is real and worth doing, just not safe as a one-session +change. + ## Reviewer notes - Branch protects main: experimental gate changes are isolated. From 1a98b4dbd0a17316bb9d9d327055d3b2f2045431 Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 18:25:08 -0400 Subject: [PATCH 11/13] Negotiate fixed-frame CW count on the wire (CONNECT_ACK + MODE_CHANGE) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier attempt set CW from a host-side data-mode-changed callback that re-entered ProtocolEngine::mutex_; non-recursive std::mutex deadlocked the responder, CONNECT_ACK never drained from tx_queue_, initiator timed out. Caught with seed=1 sim A/B (baseline 10.5s handshake, patched 120s timeout, 100% reproducible). Codex (gpt-5.5 xhigh) review surfaced three more hazards beyond the deadlock: stale CONNECT_ACK retry timer, decoder fallback to configured fixed_frame_codewords_ when header read fails, and "callbacks fire under the protocol mutex". Bottom line: do not ship "both sides recompute" as the agreement mechanism — carry CW explicitly on the wire. Wire: - ConnectFrame.PAYLOAD_SIZE 25→26B, new data_frame_cw_count byte. CONNECT carries initiator's forced CW (0=AUTO); CONNECT_ACK carries responder's chosen value. Initiator applies the echoed value. - ControlFrame::ModeChangeInfo gains data_frame_cw_count via payload[5] (was reserved — no size change). Policy: - recommendCWCount(rate) is rate-only: R1/2, R2/3, R3/4 → 8; R1/4 → 4. No SNR/fading dependency, so cross-peer agreement collapses to "both peers ran the same rate negotiation". - applyDataMode(mod, rate, cw_count=0): explicit cw from MODE_CHANGE, else auto via recommendCWCount(rate). Triggers requeuePendingChunks on rate-changed OR cw-changed (was rate-changed only). - DataModeChangedCallback signature now (mod, rate, cw_count, snr, fading). Hosts poke encoder+decoder directly from the param — no protocol_.setForcedFrameCodewords() inside the callback, no mutex re-entry. - CONNECT_ACK retry timer is now computed AFTER cw is finalized. CLI override: - setForcedFrameCodewords(cw, forced=true). forced=true marks config_.forced_cw_count for one-sided wire propagation; forced=false is the boot-time path (host wiring up encoder/decoder) that does NOT mark forced and thus does not bypass the responder's auto-pick. - cli_simulator tracks cw_count_forced_ explicitly so only --cw-count trips the forced path; default init goes through forced=false. Sim verification (seed=1, SNR=15 good fading, 5KB DQPSK R1/2 auto): baseline (CW=4 default): handshake at 10.5s, transfer done by 39s patched: both peers "Negotiated CW count: 8", transfer done by 36s Hardware A/B (Mac↔Pi5 audio loopback, --inject good fading SNR=15, DQPSK R1/2 5KB): run 1 (auto, init bug had forced=true): 1233 bps, 39 frames, 0 retx run 2 (auto, after init bug fixed): 1448 bps, 19 frames, 0 retx In-session +17%; frames halved (39→19) confirms CW=8 in effect. Prior force-CW measurements: 1077 bps (CW=4) → 1615 bps (CW=8). ctest: 35/35 green incl. ConnectionPolicy, ConnectionAdaptive, and FrameV2 — the suites that had broken on the previous abandoned attempt. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/gui/app.cpp | 15 ++++++- src/protocol/connection.cpp | 63 ++++++++++++++++++++++------ src/protocol/connection.hpp | 26 ++++++++++-- src/protocol/connection_handlers.cpp | 58 ++++++++++++++++++++----- src/protocol/connection_policy.hpp | 25 +++++++++++ src/protocol/frame_v2.cpp | 27 ++++++++---- src/protocol/frame_v2.hpp | 26 +++++++++--- src/protocol/protocol_engine.cpp | 4 +- src/protocol/protocol_engine.hpp | 2 +- tests/test_frame_v2.cpp | 3 +- tests/test_protocol.cpp | 6 ++- tools/cli_simulator.cpp | 38 ++++++++++++++--- tools/threaded_simulator.cpp | 11 +++-- tools/ultra_tnc.cpp | 7 ++++ 14 files changed, 253 insertions(+), 58 deletions(-) diff --git a/src/gui/app.cpp b/src/gui/app.cpp index d13262c..f5a0d8f 100644 --- a/src/gui/app.cpp +++ b/src/gui/app.cpp @@ -565,9 +565,16 @@ App::App(const Options& opts) : options_(opts), sim_ui_visible_(opts.enable_sim) }); ultra::gui::startupTrace("App", "protocol-callbacks-mid6"); - protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, float snr_db, float peer_fading) { + protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, + int cw_count, + float snr_db, float peer_fading) { // Update modem engine with new data mode modem_.setDataMode(mod, rate); + // Sync ModemEngine encoder/decoder to negotiated CW count from the + // wire. DO NOT call protocol_.setForcedFrameCodewords here — the + // engine mutex is held while this callback runs and re-entry will + // deadlock (caught 2026-05-04 in cli_simulator A/B with seed=1). + modem_.setFixedFrameCodewords(cw_count); resetAdaptiveAdvisory(); // Local estimate for operator visibility/debugging. @@ -1036,9 +1043,13 @@ void App::initVirtualStation() { appendRxLogLine(msg); }); - virtual_protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, float snr_db, float peer_fading) { + virtual_protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, + int cw_count, + float snr_db, float peer_fading) { // Update virtual modem engine with new data mode virtual_modem_->setDataMode(mod, rate); + // Same direct-update path as the real modem — no protocol re-entry. + virtual_modem_->setFixedFrameCodewords(cw_count); // Show [SIM-MODE] line so user can see what the responder actually measured auto waveform = virtual_modem_->getWaveformMode(); diff --git a/src/protocol/connection.cpp b/src/protocol/connection.cpp index 1207400..fb93de9 100644 --- a/src/protocol/connection.cpp +++ b/src/protocol/connection.cpp @@ -303,21 +303,33 @@ void Connection::acceptCall() { codeRateToString(rec_rate)); } + // Pick negotiated CW count (honor initiator's forced value, else auto). + // Computed BEFORE building CONNECT_ACK so the embedded byte and the + // initiator's view match what we'll actually use locally. + int negotiated_cw = (pending_forced_cw_count_ != 0) + ? v2::sanitizeFixedFrameCodewords(pending_forced_cw_count_) + : connection_policy::recommendCWCount(rec_rate); + // Clear pending forced modes pending_forced_modulation_ = Modulation::AUTO; pending_forced_code_rate_ = CodeRate::AUTO; + pending_forced_cw_count_ = 0; // Set our local data mode immediately data_modulation_ = rec_mod; data_code_rate_ = rec_rate; + data_frame_cw_count_ = negotiated_cw; + config_.fixed_frame_codewords = negotiated_cw; - LOG_MODEM(INFO, "Connection: Accepting call from %s (waveform=%s, data=%s %s)", + LOG_MODEM(INFO, "Connection: Accepting call from %s (waveform=%s, data=%s %s, cw=%d)", remote_call_.c_str(), waveformModeToString(negotiated_mode_), - modulationToString(data_modulation_), codeRateToString(data_code_rate_)); + modulationToString(data_modulation_), codeRateToString(data_code_rate_), + data_frame_cw_count_); auto ack = v2::ConnectFrame::makeConnectAck(local_call_, remote_call_, static_cast(negotiated_mode_), - rec_mod, rec_rate, measured_snr_db_, fading_index_); + rec_mod, rec_rate, measured_snr_db_, fading_index_, + static_cast(negotiated_cw)); Bytes ack_data = ack.serialize(); LOG_MODEM(INFO, "Connection: Sending CONNECT_ACK (%zu bytes)", ack_data.size()); @@ -332,7 +344,8 @@ void Connection::acceptCall() { // Notify application of initial data mode if (on_data_mode_changed_) { - on_data_mode_changed_(data_modulation_, data_code_rate_, measured_snr_db_, fading_index_); + on_data_mode_changed_(data_modulation_, data_code_rate_, data_frame_cw_count_, + measured_snr_db_, fading_index_); } } @@ -440,8 +453,19 @@ void Connection::abortTxNow() { connectionStateToString(state_)); } -void Connection::setForcedFrameCodewords(int cw_count) { +void Connection::setForcedFrameCodewords(int cw_count, bool forced) { cw_count = v2::sanitizeFixedFrameCodewords(cw_count); + if (forced) { + // Operator override: initiator embeds in CONNECT.data_frame_cw_count + // so responder honors and echoes via CONNECT_ACK. One-sided + // propagation — caller only needs to set this on one peer. + config_.forced_cw_count = static_cast(cw_count); + } + // Note: !forced is the boot-time default path (host wiring up encoder/ + // decoder before connection). It MUST NOT touch config_.forced_cw_count + // or every connect would advertise the default as a forced override + // and bypass auto-pick on the responder. + if (cw_count == data_frame_cw_count_) { return; } @@ -876,7 +900,8 @@ void Connection::onFrameReceived(const Bytes& frame_data) { LOG_MODEM(INFO, "Connection: MODE_CHANGE acknowledged, applying %s %s", modulationToString(pending_modulation_), codeRateToString(pending_code_rate_)); - applyDataMode(pending_modulation_, pending_code_rate_); + applyDataMode(pending_modulation_, pending_code_rate_, + pending_cw_count_); if (was_downgrade) { adaptive_post_downgrade_lockout_ms_ = ADAPTIVE_POST_DOWNGRADE_LOCKOUT_MS; @@ -887,6 +912,7 @@ void Connection::onFrameReceived(const Bytes& frame_data) { // Notify application of mode change if (on_data_mode_changed_) { on_data_mode_changed_(data_modulation_, data_code_rate_, + data_frame_cw_count_, pending_snr_db_, pending_fading_index_); } runDeferredArqRefill(); @@ -1281,7 +1307,8 @@ void Connection::tick(uint32_t elapsed_ms) { config_.mode_capabilities, static_cast(config_.preferred_mode), static_cast(config_.forced_modulation), - static_cast(config_.forced_code_rate)); + static_cast(config_.forced_code_rate), + config_.forced_cw_count); transmitFrame(connect_frame.serialize()); timeout_remaining_ms_ = config_.connect_timeout_ms; } @@ -1346,12 +1373,13 @@ void Connection::tick(uint32_t elapsed_ms) { } else { LOG_MODEM(WARN, "Connection: MODE_CHANGE timeout, retrying (%d/%d)", mode_change_retry_count_, MODE_CHANGE_MAX_RETRIES); - // Resend MODE_CHANGE with same parameters + // Resend MODE_CHANGE with same parameters (incl. CW) auto frame = v2::ControlFrame::makeModeChange(local_call_, remote_call_, mode_change_seq_, pending_modulation_, pending_code_rate_, pending_snr_db_, pending_fading_index_, - pending_reason_); + pending_reason_, + pending_cw_count_); transmitFrame(frame.serialize()); mode_change_timeout_ms_ = MODE_CHANGE_TIMEOUT_MS; } @@ -1533,14 +1561,23 @@ void Connection::configureArqForCurrentDataMode() { } } -void Connection::applyDataMode(Modulation mod, CodeRate rate) { +void Connection::applyDataMode(Modulation mod, CodeRate rate, int cw_count) { + // Resolve final CW count: explicit value if specified (e.g. from + // MODE_CHANGE wire byte), else auto-pick from rate. + const int new_cw = (cw_count > 0) + ? v2::sanitizeFixedFrameCodewords(cw_count) + : connection_policy::recommendCWCount(rate); const bool rate_changed = rate != data_code_rate_; + const bool cw_changed = new_cw != data_frame_cw_count_; + // Pending chunks must be re-encoded if rate OR CW changed: the ARQ payload + // capacity depends on both, and chunks queued under the old geometry will + // overflow / mis-align under the new one. const bool requeue_file = - rate_changed && + (rate_changed || cw_changed) && file_transfer_.getState() == FileTransferState::SENDING && file_transfer_.hasPendingChunks(); const bool refill_file = - rate_changed && + (rate_changed || cw_changed) && file_transfer_.getState() == FileTransferState::SENDING; if (requeue_file) { file_transfer_.requeuePendingChunks(); @@ -1548,6 +1585,8 @@ void Connection::applyDataMode(Modulation mod, CodeRate rate) { data_modulation_ = mod; data_code_rate_ = rate; + data_frame_cw_count_ = new_cw; + config_.fixed_frame_codewords = new_cw; configureArqForCurrentDataMode(); resetAdaptiveModeController(); diff --git a/src/protocol/connection.hpp b/src/protocol/connection.hpp index e39d37d..50105b8 100644 --- a/src/protocol/connection.hpp +++ b/src/protocol/connection.hpp @@ -40,6 +40,11 @@ struct ConnectionConfig { Modulation forced_modulation = Modulation::AUTO; CodeRate forced_code_rate = CodeRate::AUTO; int fixed_frame_codewords = v2::kDefaultFixedFrameCodewords; + // Initiator-side forced CW override (0 = AUTO, responder picks via + // recommendCWCount(rate)). When non-zero, the initiator embeds this + // value in CONNECT.data_frame_cw_count and the responder honors it + // in CONNECT_ACK + applyDataMode. Set via setForcedFrameCodewords(). + uint8_t forced_cw_count = 0; }; // Connection statistics @@ -173,7 +178,12 @@ class Connection { // Forced data mode - operator can override SNR-based selection void setForcedModulation(Modulation mod) { config_.forced_modulation = mod; } void setForcedCodeRate(CodeRate rate) { config_.forced_code_rate = rate; } - void setForcedFrameCodewords(int cw_count); + // forced=true marks this as an operator override: the initiator will + // embed it in CONNECT.data_frame_cw_count and the responder will + // honor + echo it. forced=false is the boot-time default path used + // by host wiring (encoder/decoder bootstrap) — does NOT mark forced, + // so the responder still gets to auto-pick via recommendCWCount(rate). + void setForcedFrameCodewords(int cw_count, bool forced = true); Modulation getForcedModulation() const { return config_.forced_modulation; } CodeRate getForcedCodeRate() const { return config_.forced_code_rate; } int getForcedFrameCodewords() const { return data_frame_cw_count_; } @@ -220,7 +230,13 @@ class Connection { bool isFading() const { return fading_index_ > 0.65f; } // Callback when remote station requests mode change - using DataModeChangedCallback = std::function; + // Data-mode-changed callback. cw_count is the negotiated fixed-frame CW + // count for the new rate (1..8) — host updates encoder/decoder from this + // value directly. Host MUST NOT call back into ProtocolEngine from this + // callback (mutex held; re-entry will deadlock). + using DataModeChangedCallback = std::function; void setDataModeChangedCallback(DataModeChangedCallback cb) { on_data_mode_changed_ = cb; } // Request mode change to remote station @@ -244,6 +260,7 @@ class Connection { // Pending forced modes from incoming CONNECT (for manual accept flow) Modulation pending_forced_modulation_ = Modulation::AUTO; CodeRate pending_forced_code_rate_ = CodeRate::AUTO; + uint8_t pending_forced_cw_count_ = 0; // 0 = AUTO (responder chooses) // Waveform mode WaveformMode narrowband_override_ = WaveformMode::AUTO; // Session-scoped, cleared on disconnect/reset @@ -265,6 +282,7 @@ class Connection { int mode_change_retry_count_ = 0; Modulation pending_modulation_ = Modulation::DQPSK; CodeRate pending_code_rate_ = CodeRate::R1_4; + uint8_t pending_cw_count_ = 0; // 0 = use applyDataMode's default float pending_snr_db_ = 15.0f; float pending_fading_index_ = 0.0f; uint8_t pending_reason_ = 0; @@ -332,7 +350,9 @@ class Connection { void processArqFrame(const Bytes& frame_data); void runDeferredArqRefill(); void configureArqForCurrentDataMode(); - void applyDataMode(Modulation mod, CodeRate rate); + // Apply a new data mode. cw_count: 0 = compute via recommendCWCount(rate), + // 1..8 = explicit (used when MODE_CHANGE wire byte specifies a value). + void applyDataMode(Modulation mod, CodeRate rate, int cw_count = 0); void resetAdaptiveModeController(); void updateAdaptiveModeController(uint32_t elapsed_ms); bool tryIssueAdaptiveModeChangeAtBoundary(); diff --git a/src/protocol/connection_handlers.cpp b/src/protocol/connection_handlers.cpp index db8b756..8d2301a 100644 --- a/src/protocol/connection_handlers.cpp +++ b/src/protocol/connection_handlers.cpp @@ -58,12 +58,14 @@ void Connection::sendFullConnect() { config_.mode_capabilities, static_cast(config_.preferred_mode), static_cast(config_.forced_modulation), - static_cast(config_.forced_code_rate)); + static_cast(config_.forced_code_rate), + config_.forced_cw_count); Bytes connect_data = connect_frame.serialize(); - LOG_MODEM(INFO, "Connection: Sending CONNECT via %s (%zu bytes, forced_mod=%d, forced_rate=%d)", + LOG_MODEM(INFO, "Connection: Sending CONNECT via %s (%zu bytes, forced_mod=%d, forced_rate=%d, forced_cw=%d)", waveformModeToString(connect_waveform_), connect_data.size(), - static_cast(config_.forced_modulation), static_cast(config_.forced_code_rate)); + static_cast(config_.forced_modulation), static_cast(config_.forced_code_rate), + static_cast(config_.forced_cw_count)); transmitFrame(connect_data); } @@ -168,18 +170,32 @@ void Connection::handleConnect(const v2::ConnectFrame& frame, const std::string& data_code_rate_ = rec_rate; arq_.setCodeRate(data_code_rate_); // Update ARQ for correct total_cw calculation + // Pick negotiated CW count. Honor initiator's forced value if it sent + // one (frame.data_frame_cw_count != 0), else auto-pick from rate. + // We store the result in data_frame_cw_count_ here BEFORE building + // the CONNECT_ACK and before computing the retry delay so both reflect + // the value the initiator will see on the wire — Codex finding 4. + int negotiated_cw = (frame.data_frame_cw_count != 0) + ? v2::sanitizeFixedFrameCodewords(frame.data_frame_cw_count) + : connection_policy::recommendCWCount(rec_rate); + data_frame_cw_count_ = negotiated_cw; + config_.fixed_frame_codewords = negotiated_cw; + const uint8_t cw_byte = static_cast(negotiated_cw); + // Prefer full-callsign CONNECT_ACK when the initiator callsign is known, // fallback to hash-only ACK when we only have src_hash. Bytes ack_data; if (!src_call.empty()) { auto ack = v2::ConnectFrame::makeConnectAck(local_call_, src_call, static_cast(negotiated_mode_), - rec_mod, rec_rate, snr_db, fading_index_); + rec_mod, rec_rate, snr_db, fading_index_, + cw_byte); ack_data = ack.serialize(); } else { auto ack = v2::ConnectFrame::makeConnectAckByHash(local_call_, frame.src_hash, static_cast(negotiated_mode_), - rec_mod, rec_rate, snr_db, fading_index_); + rec_mod, rec_rate, snr_db, fading_index_, + cw_byte); ack_data = ack.serialize(); } transmitFrame(ack_data); @@ -202,7 +218,8 @@ void Connection::handleConnect(const v2::ConnectFrame& frame, const std::string& // Notify application of initial data mode if (on_data_mode_changed_) { - on_data_mode_changed_(data_modulation_, data_code_rate_, snr_db, fading_index_); + on_data_mode_changed_(data_modulation_, data_code_rate_, data_frame_cw_count_, + snr_db, fading_index_); } } else { pending_remote_call_ = src_call.empty() ? "REMOTE" : src_call; @@ -210,6 +227,7 @@ void Connection::handleConnect(const v2::ConnectFrame& frame, const std::string& // Store forced modes from initiator for later use in acceptCall() pending_forced_modulation_ = static_cast(frame.initial_modulation); pending_forced_code_rate_ = static_cast(frame.initial_code_rate); + pending_forced_cw_count_ = frame.data_frame_cw_count; // 0 = AUTO if (on_incoming_call_) { on_incoming_call_(pending_remote_call_); } @@ -233,9 +251,18 @@ void Connection::handleConnectAck(const v2::ConnectFrame& frame, const std::stri float snr_db = v2::decodeSNR(frame.measured_snr); float peer_fading = v2::decodeFadingIndex(frame.mode_capabilities); + // Negotiated CW count from responder. Falls back to recommendCWCount(rate) + // if responder advertised 0 (interoperability with un-upgraded peer that + // hasn't been built since the protocol change — defensive only). + int negotiated_cw = (frame.data_frame_cw_count != 0) + ? v2::sanitizeFixedFrameCodewords(frame.data_frame_cw_count) + : connection_policy::recommendCWCount(init_rate); + // Apply the initial data mode immediately data_modulation_ = init_mod; data_code_rate_ = init_rate; + data_frame_cw_count_ = negotiated_cw; + config_.fixed_frame_codewords = negotiated_cw; arq_.setCodeRate(data_code_rate_); // Update ARQ for correct total_cw calculation // Update remote callsign if we got it from the frame @@ -261,7 +288,8 @@ void Connection::handleConnectAck(const v2::ConnectFrame& frame, const std::stri // Notify application of initial data mode if (on_data_mode_changed_) { - on_data_mode_changed_(data_modulation_, data_code_rate_, snr_db, peer_fading); + on_data_mode_changed_(data_modulation_, data_code_rate_, data_frame_cw_count_, + snr_db, peer_fading); } } @@ -367,8 +395,10 @@ void Connection::handleModeChange(const v2::ControlFrame& frame, const std::stri // Update local state and refresh the ARQ profile for the new fixed-frame // capacity/window/timing. The requester waits for this ACK before sending - // more DATA in the new mode. - applyDataMode(info.modulation, info.code_rate); + // more DATA in the new mode. The cw_count field in MODE_CHANGE is the + // requester's chosen value — receiver applies it directly so both peers + // stay in lockstep on frame geometry. + applyDataMode(info.modulation, info.code_rate, info.data_frame_cw_count); // Send ACK for the MODE_CHANGE auto ack = v2::ControlFrame::makeAck(local_call_, remote_call_, frame.seq); @@ -376,7 +406,8 @@ void Connection::handleModeChange(const v2::ControlFrame& frame, const std::stri // Notify application of mode change if (on_data_mode_changed_) { - on_data_mode_changed_(info.modulation, info.code_rate, info.snr_db, info.fading_index); + on_data_mode_changed_(info.modulation, info.code_rate, data_frame_cw_count_, + info.snr_db, info.fading_index); } runDeferredArqRefill(); @@ -408,10 +439,15 @@ void Connection::requestModeChange(Modulation new_mod, CodeRate new_rate, mode_change_retry_count_ = 0; mode_change_timeout_ms_ = MODE_CHANGE_TIMEOUT_MS; + // Pick the CW count for the new rate (rate-only — both peers will agree + // because both run recommendCWCount(rate) on the same rate). + pending_cw_count_ = static_cast(connection_policy::recommendCWCount(new_rate)); + mode_change_seq_++; auto frame = v2::ControlFrame::makeModeChange(local_call_, remote_call_, mode_change_seq_, new_mod, new_rate, - measured_snr, fading_index_, reason); + measured_snr, fading_index_, reason, + pending_cw_count_); transmitFrame(frame.serialize()); // NOTE: Don't update local mode until ACK is received diff --git a/src/protocol/connection_policy.hpp b/src/protocol/connection_policy.hpp index f62be74..4a7286c 100644 --- a/src/protocol/connection_policy.hpp +++ b/src/protocol/connection_policy.hpp @@ -191,6 +191,31 @@ inline SackDelayProfile ofdmSackDelays(bool defer_to_burst_tail, return profile; } +// Recommend fixed-frame CW count for a given OFDM data rate. +// Rate-only (no SNR/fading dependency) so both peers always agree +// on the CW count from the negotiated rate alone — no risk of one +// peer reading a different SNR than the other and picking a +// different CW geometry. +// +// Hardware A/B (Mac↔Pi5 5KB transfer): +// DQPSK R1/2 SNR=15 good fading: CW=4 → 1077 bps (2 retx) +// CW=8 → 1615 bps (0 retx) +50% +// D8PSK R3/4 SNR=27 AWGN: CW=8 → 3127 bps (ceiling) +// +// R1/4 stays at the default 4 — small frames, low-SNR robustness, +// no measured win from going wider. +inline int recommendCWCount(CodeRate rate) { + switch (rate) { + case CodeRate::R1_2: + case CodeRate::R2_3: + case CodeRate::R3_4: + return 8; + case CodeRate::R1_4: + default: + return v2::kDefaultFixedFrameCodewords; // 4 + } +} + inline AckRepeatProfile ofdmAckRepeatProfile(Modulation mod, CodeRate rate, bool near_awgn_ofdm) { diff --git a/src/protocol/frame_v2.cpp b/src/protocol/frame_v2.cpp index 05f6c69..2d9ce85 100644 --- a/src/protocol/frame_v2.cpp +++ b/src/protocol/frame_v2.cpp @@ -262,7 +262,8 @@ ControlFrame ControlFrame::makeDisconnect(const std::string& src, const std::str ControlFrame ControlFrame::makeModeChange(const std::string& src, const std::string& dst, uint16_t seq, Modulation new_mod, CodeRate new_rate, - float snr_db, float fading_index, uint8_t reason) { + float snr_db, float fading_index, uint8_t reason, + uint8_t cw_count) { ControlFrame f; f.type = FrameType::MODE_CHANGE; f.flags = Flags::VERSION_V2; @@ -274,13 +275,14 @@ ControlFrame ControlFrame::makeModeChange(const std::string& src, const std::str f.payload[2] = encodeSNR(snr_db); f.payload[3] = reason; f.payload[4] = encodeFadingIndex(fading_index); - f.payload[5] = 0; // Reserved + f.payload[5] = cw_count; // Negotiated CW count (0=AUTO/unspecified) return f; } ControlFrame ControlFrame::makeModeChangeByHash(const std::string& src, uint32_t dst_hash, uint16_t seq, Modulation new_mod, CodeRate new_rate, - float snr_db, float fading_index, uint8_t reason) { + float snr_db, float fading_index, uint8_t reason, + uint8_t cw_count) { ControlFrame f; f.type = FrameType::MODE_CHANGE; f.flags = Flags::VERSION_V2; @@ -292,7 +294,7 @@ ControlFrame ControlFrame::makeModeChangeByHash(const std::string& src, uint32_t f.payload[2] = encodeSNR(snr_db); f.payload[3] = reason; f.payload[4] = encodeFadingIndex(fading_index); - f.payload[5] = 0; // Reserved + f.payload[5] = cw_count; return f; } @@ -654,7 +656,8 @@ std::string DataFrame::payloadAsText() const { ConnectFrame ConnectFrame::makeConnect(const std::string& src, const std::string& dst, uint8_t mode_caps, uint8_t forced_waveform, - uint8_t forced_modulation, uint8_t forced_code_rate) { + uint8_t forced_modulation, uint8_t forced_code_rate, + uint8_t forced_cw_count) { ConnectFrame f; f.type = FrameType::CONNECT; f.flags = Flags::VERSION_V2; @@ -673,12 +676,14 @@ ConnectFrame ConnectFrame::makeConnect(const std::string& src, const std::string f.initial_modulation = forced_modulation; // 0xFF = AUTO, else forced f.initial_code_rate = forced_code_rate; // 0xFF = AUTO, else forced f.measured_snr = 0; // Not used in CONNECT + f.data_frame_cw_count = forced_cw_count; // 0=AUTO, else 1..8 forced return f; } ConnectFrame ConnectFrame::makeConnectAck(const std::string& src, const std::string& dst, uint8_t neg_mode, Modulation init_mod, CodeRate init_rate, - float snr_db, float fading_index) { + float snr_db, float fading_index, + uint8_t cw_count) { ConnectFrame f; f.type = FrameType::CONNECT_ACK; f.flags = Flags::VERSION_V2; @@ -699,6 +704,7 @@ ConnectFrame ConnectFrame::makeConnectAck(const std::string& src, const std::str f.initial_modulation = static_cast(init_mod); f.initial_code_rate = static_cast(init_rate); f.measured_snr = encodeSNR(snr_db); + f.data_frame_cw_count = cw_count; // Final negotiated CW count (1..8) return f; } @@ -740,7 +746,8 @@ ConnectFrame ConnectFrame::makeDisconnect(const std::string& src, const std::str ConnectFrame ConnectFrame::makeConnectAckByHash(const std::string& src, uint32_t dst_hash, uint8_t neg_mode, Modulation init_mod, CodeRate init_rate, - float snr_db, float fading_index) { + float snr_db, float fading_index, + uint8_t cw_count) { ConnectFrame f; f.type = FrameType::CONNECT_ACK; f.flags = Flags::VERSION_V2; @@ -760,6 +767,7 @@ ConnectFrame ConnectFrame::makeConnectAckByHash(const std::string& src, uint32_t f.initial_modulation = static_cast(init_mod); f.initial_code_rate = static_cast(init_rate); f.measured_snr = encodeSNR(snr_db); + f.data_frame_cw_count = cw_count; // Final negotiated CW count (1..8) return f; } @@ -816,7 +824,8 @@ Bytes ConnectFrame::serialize() const { out.push_back((hcrc >> 8) & 0xFF); out.push_back(hcrc & 0xFF); - // Payload: src_callsign (10B) + dst_callsign (10B) + caps (1B) + wfmode (1B) + mod (1B) + rate (1B) + snr (1B) + // Payload: src_callsign (10B) + dst_callsign (10B) + caps (1B) + wfmode (1B) + // + mod (1B) + rate (1B) + snr (1B) + cw_count (1B) = 26B for (int i = 0; i < MAX_CALLSIGN_LEN; i++) { out.push_back(static_cast(src_callsign[i])); } @@ -828,6 +837,7 @@ Bytes ConnectFrame::serialize() const { out.push_back(initial_modulation); out.push_back(initial_code_rate); out.push_back(measured_snr); + out.push_back(data_frame_cw_count); // Frame CRC (2 bytes) uint16_t fcrc = ControlFrame::calculateCRC(out.data(), out.size()); @@ -899,6 +909,7 @@ std::optional ConnectFrame::deserialize(ByteSpan data) { f.initial_modulation = data[field_offset + 2]; f.initial_code_rate = data[field_offset + 3]; f.measured_snr = data[field_offset + 4]; + f.data_frame_cw_count = data[field_offset + 5]; return f; } diff --git a/src/protocol/frame_v2.hpp b/src/protocol/frame_v2.hpp index 1317027..2c62983 100644 --- a/src/protocol/frame_v2.hpp +++ b/src/protocol/frame_v2.hpp @@ -344,10 +344,12 @@ struct ControlFrame { static ControlFrame makeDisconnect(const std::string& src, const std::string& dst); static ControlFrame makeModeChange(const std::string& src, const std::string& dst, uint16_t seq, Modulation new_mod, CodeRate new_rate, - float snr_db, float fading_index, uint8_t reason); + float snr_db, float fading_index, uint8_t reason, + uint8_t cw_count = 0); static ControlFrame makeModeChangeByHash(const std::string& src, uint32_t dst_hash, uint16_t seq, Modulation new_mod, CodeRate new_rate, - float snr_db, float fading_index, uint8_t reason); + float snr_db, float fading_index, uint8_t reason, + uint8_t cw_count = 0); static ControlFrame makeConnect(const std::string& src, const std::string& dst, uint8_t mode_capabilities, uint8_t preferred_mode); static ControlFrame makeConnectAck(const std::string& src, const std::string& dst, @@ -380,6 +382,10 @@ struct ControlFrame { float snr_db; float fading_index; uint8_t reason; + // Negotiated fixed-frame CW count for the new rate. 0 means "old peer + // / unspecified — receiver picks via recommendCWCount(rate)". Wire + // byte: payload[5] (was reserved). + uint8_t data_frame_cw_count; }; // Parse MODE_CHANGE payload from a ControlFrame @@ -390,6 +396,7 @@ struct ControlFrame { info.snr_db = decodeSNR(payload[2]); info.reason = payload[3]; info.fading_index = decodeFadingIndex(payload[4]); + info.data_frame_cw_count = payload[5]; return info; } }; @@ -468,7 +475,7 @@ struct DataFrame { // Always uses 4 codewords with frame-level interleaving for fading resistance. struct ConnectFrame { static constexpr size_t MAX_CALLSIGN_LEN = 10; // 9 chars + null terminator - static constexpr size_t PAYLOAD_SIZE = 25; // 10 + 10 + 1 + 1 + 1 + 1 + 1 + static constexpr size_t PAYLOAD_SIZE = 26; // 10 + 10 + 1 + 1 + 1 + 1 + 1 + 1 (cw_count) FrameType type = FrameType::CONNECT; uint8_t flags = Flags::VERSION_V2; @@ -485,22 +492,29 @@ struct ConnectFrame { uint8_t initial_modulation = 0; // Forced/agreed Modulation (0xFF=AUTO) uint8_t initial_code_rate = 0; // Forced/agreed CodeRate (0xFF=AUTO) uint8_t measured_snr = 0; // CONNECT_ACK: measured SNR + // Negotiated fixed-frame CW count. + // CONNECT: initiator's forced CW count (0=AUTO, else 1..8) + // CONNECT_ACK: responder's chosen CW count (final agreed value, 1..8) + uint8_t data_frame_cw_count = 0; // Factory methods static ConnectFrame makeConnect(const std::string& src, const std::string& dst, uint8_t mode_caps, uint8_t forced_waveform, uint8_t forced_modulation = 0xFF, - uint8_t forced_code_rate = 0xFF); + uint8_t forced_code_rate = 0xFF, + uint8_t forced_cw_count = 0); static ConnectFrame makeConnectAck(const std::string& src, const std::string& dst, uint8_t neg_mode, Modulation init_mod, CodeRate init_rate, - float snr_db, float fading_index); + float snr_db, float fading_index, + uint8_t cw_count); static ConnectFrame makeConnectNak(const std::string& src, const std::string& dst); static ConnectFrame makeDisconnect(const std::string& src, const std::string& dst); // Hash-based factory (for responding when only hash is known, fills in our callsign) static ConnectFrame makeConnectAckByHash(const std::string& src, uint32_t dst_hash, uint8_t neg_mode, Modulation init_mod, CodeRate init_rate, - float snr_db, float fading_index); + float snr_db, float fading_index, + uint8_t cw_count); static ConnectFrame makeConnectNakByHash(const std::string& src, uint32_t dst_hash); // Serialize to bytes (uses DATA frame format) diff --git a/src/protocol/protocol_engine.cpp b/src/protocol/protocol_engine.cpp index 08c28ea..cd17644 100644 --- a/src/protocol/protocol_engine.cpp +++ b/src/protocol/protocol_engine.cpp @@ -448,9 +448,9 @@ void ProtocolEngine::setForcedCodeRate(CodeRate rate) { connection_.setForcedCodeRate(rate); } -void ProtocolEngine::setForcedFrameCodewords(int cw_count) { +void ProtocolEngine::setForcedFrameCodewords(int cw_count, bool forced) { std::lock_guard lock(mutex_); - connection_.setForcedFrameCodewords(cw_count); + connection_.setForcedFrameCodewords(cw_count, forced); } Modulation ProtocolEngine::getForcedModulation() const { diff --git a/src/protocol/protocol_engine.hpp b/src/protocol/protocol_engine.hpp index 370a327..bf5cbbe 100644 --- a/src/protocol/protocol_engine.hpp +++ b/src/protocol/protocol_engine.hpp @@ -134,7 +134,7 @@ class ProtocolEngine { // 0xFF (AUTO) = let responder decide based on SNR void setForcedModulation(Modulation mod); void setForcedCodeRate(CodeRate rate); - void setForcedFrameCodewords(int cw_count); + void setForcedFrameCodewords(int cw_count, bool forced = true); Modulation getForcedModulation() const; CodeRate getForcedCodeRate() const; int getForcedFrameCodewords() const; diff --git a/tests/test_frame_v2.cpp b/tests/test_frame_v2.cpp index 83022ac..dd7a6cf 100644 --- a/tests/test_frame_v2.cpp +++ b/tests/test_frame_v2.cpp @@ -237,13 +237,14 @@ void test_connect_frame_roundtrip_and_crc() { auto ack = ConnectFrame::makeConnectAck("W1AW", "VA2MVR/P", static_cast(WaveformMode::OFDM_CHIRP), Modulation::DQPSK, CodeRate::R1_2, - 15.25f, 0.62f); + 15.25f, 0.62f, 8); auto ack_bytes = ack.serialize(); auto ack_parsed = ConnectFrame::deserialize(ack_bytes); assert(ack_parsed.has_value()); assert(ack_parsed->type == FrameType::CONNECT_ACK); assert(std::abs(decodeSNR(ack_parsed->measured_snr) - 15.25f) < 0.001f); assert(std::abs(decodeFadingIndex(ack_parsed->mode_capabilities) - 0.62f) < 0.001f); + assert(ack_parsed->data_frame_cw_count == 8); auto corrupt_header = serialized; corrupt_header[5] ^= 0x01; diff --git a/tests/test_protocol.cpp b/tests/test_protocol.cpp index 91eaa54..14e9e32 100644 --- a/tests/test_protocol.cpp +++ b/tests/test_protocol.cpp @@ -133,7 +133,7 @@ bool test_control_frame_types() { // Test ConnectFrames (3 codewords) std::vector> connect_cases = { { v2::ConnectFrame::makeConnect("CALLSIGN1", "CALLSIGN2", 0x07, 0), v2::FrameType::CONNECT }, - { v2::ConnectFrame::makeConnectAck("CALLSIGN1", "CALLSIGN2", 0, ultra::Modulation::DQPSK, ultra::CodeRate::R1_4, 15.0f, 0.60f), v2::FrameType::CONNECT_ACK }, + { v2::ConnectFrame::makeConnectAck("CALLSIGN1", "CALLSIGN2", 0, ultra::Modulation::DQPSK, ultra::CodeRate::R1_4, 15.0f, 0.60f, 4), v2::FrameType::CONNECT_ACK }, { v2::ConnectFrame::makeConnectNak("CALLSIGN1", "CALLSIGN2"), v2::FrameType::CONNECT_NAK }, { v2::ConnectFrame::makeDisconnect("CALLSIGN1", "CALLSIGN2"), v2::FrameType::DISCONNECT }, }; @@ -815,7 +815,9 @@ bool test_protocol_rate_upgrade() { ultra::Modulation a_new_mod = ultra::Modulation::DQPSK; ultra::CodeRate a_new_rate = ultra::CodeRate::R1_4; - stationA.setDataModeChangedCallback([&](ultra::Modulation mod, ultra::CodeRate rate, float snr, float peer_fading) { + stationA.setDataModeChangedCallback([&](ultra::Modulation mod, ultra::CodeRate rate, + int cw_count, float snr, float peer_fading) { + (void)cw_count; (void)snr; (void)peer_fading; a_mode_changed = true; diff --git a/tools/cli_simulator.cpp b/tools/cli_simulator.cpp index 8eaff6c..3df5e88 100644 --- a/tools/cli_simulator.cpp +++ b/tools/cli_simulator.cpp @@ -711,9 +711,11 @@ class SimulatedStation { void setSNR(float snr) { snr_db_ = snr; } void setForcedModulation(Modulation mod) { protocol_.setForcedModulation(mod); } void setForcedCodeRate(CodeRate rate) { protocol_.setForcedCodeRate(rate); } - void setFixedFrameCodewords(int cw_count) { + // forced=true → operator override, propagates via wire. + // forced=false → boot-time default (encoder/decoder bootstrap only). + void setFixedFrameCodewords(int cw_count, bool forced = true) { fixed_frame_codewords_ = v2::sanitizeFixedFrameCodewords(cw_count); - protocol_.setForcedFrameCodewords(fixed_frame_codewords_); + protocol_.setForcedFrameCodewords(fixed_frame_codewords_, forced); if (encoder_) encoder_->setFixedFrameCodewords(fixed_frame_codewords_); if (decoder_) decoder_->setFixedFrameCodewords(fixed_frame_codewords_); } @@ -1384,10 +1386,26 @@ class SimulatedStation { } }); - // Data mode changes (modulation + code rate) + // Data mode changes (modulation + code rate + CW count). The CW + // count comes from the wire (CONNECT_ACK / MODE_CHANGE) — protocol + // layer has already set its own data_frame_cw_count_, this just + // syncs the encoder + decoder. NO re-entry into protocol_ here: + // the engine mutex is held while the callback fires (the deadlock + // we caught on 2026-05-04). protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, + int cw_count, float peer_snr_db, float peer_fading) { setDataMode(mod, rate); + if (cw_count > 0 && cw_count != fixed_frame_codewords_) { + fixed_frame_codewords_ = v2::sanitizeFixedFrameCodewords(cw_count); + if (encoder_) encoder_->setFixedFrameCodewords(fixed_frame_codewords_); + if (decoder_) decoder_->setFixedFrameCodewords(fixed_frame_codewords_); + LOG_MODEM(INFO, "[%s] Negotiated CW count: %d for %s %s " + "(peer SNR=%.1f, fading=%.2f)", + callsign_.c_str(), fixed_frame_codewords_, + modulationToString(mod), codeRateToString(rate), + peer_snr_db, peer_fading); + } logPeerAdaptiveAdvisory(mod, rate, peer_snr_db, peer_fading); }); @@ -1560,8 +1578,12 @@ class CLISimulator { void setForcedModulation(Modulation mod) { forced_mod_ = mod; } void setForcedCodeRate(CodeRate rate) { forced_rate_ = rate; } void setOFDMConfigPreset(OFDMConfigPreset preset) { ofdm_config_preset_ = preset; } + // Marks an operator-forced override; remembered so initStation() + // pushes it into the protocol layer with forced=true (which makes + // the initiator embed it in CONNECT and the responder honor it). void setFixedFrameCodewords(int cw_count) { fixed_frame_codewords_ = v2::sanitizeFixedFrameCodewords(cw_count); + cw_count_forced_ = true; } void setPreferredWaveform(WaveformMode mode) { forced_waveform_ = mode; } void setTestFileTransfer(bool v) { test_file_transfer_ = v; } @@ -1646,8 +1668,8 @@ class CLISimulator { bravo_->setDecodeDelayMs(decode_delay_ms_); alpha_->setRxBatchCallbacks(rx_batch_callbacks_); bravo_->setRxBatchCallbacks(rx_batch_callbacks_); - alpha_->setFixedFrameCodewords(fixed_frame_codewords_); - bravo_->setFixedFrameCodewords(fixed_frame_codewords_); + alpha_->setFixedFrameCodewords(fixed_frame_codewords_, cw_count_forced_); + bravo_->setFixedFrameCodewords(fixed_frame_codewords_, cw_count_forced_); alpha_->setSoftCombiningHARQ(soft_combining_harq_); bravo_->setSoftCombiningHARQ(soft_combining_harq_); @@ -1773,6 +1795,7 @@ class CLISimulator { int decode_delay_ms_ = 0; // --decode-delay-ms N (simulated slow decoder) int rx_batch_callbacks_ = 1; // --rx-batch-callbacks N (batched decoder feed) int fixed_frame_codewords_ = v2::kDefaultFixedFrameCodewords; + bool cw_count_forced_ = false; // true iff --cw-count was passed bool soft_combining_harq_ = false; bool save_signals_ = false; int save_signals_message_limit_ = 0; // 0 = full run @@ -2472,7 +2495,10 @@ class CLISimulator { station->setChannelInterleave(use_channel_interleave_); station->setNoBurstInterleave(no_burst_interleave_); station->setBurstInterleaveGroupSize(burst_group_size_); - station->setFixedFrameCodewords(fixed_frame_codewords_); + // forced=true only if user passed --cw-count; otherwise this is + // boot-time init that should leave protocol-level forced_cw_count=0 + // so the responder gets to auto-pick via recommendCWCount(rate). + station->setFixedFrameCodewords(fixed_frame_codewords_, cw_count_forced_); station->setSoftCombiningHARQ(soft_combining_harq_); if (soft_combining_harq_) { std::cout << " HARQ: RX soft-combining enabled\n"; diff --git a/tools/threaded_simulator.cpp b/tools/threaded_simulator.cpp index af146a2..1c20936 100644 --- a/tools/threaded_simulator.cpp +++ b/tools/threaded_simulator.cpp @@ -182,13 +182,16 @@ class Station { } }); - // Mode changes - protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, float snr, float peer_fading) { + // Mode changes (incl. negotiated CW count from wire) + protocol_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, + int cw_count, + float snr, float peer_fading) { (void)snr; (void)peer_fading; modem_.setDataMode(mod, rate); - LOG_INFO("MODEM", "[%s] MODE -> %s %s", callsign_.c_str(), - modulationToString(mod), codeRateToString(rate)); + modem_.setFixedFrameCodewords(cw_count); + LOG_INFO("MODEM", "[%s] MODE -> %s %s cw=%d", callsign_.c_str(), + modulationToString(mod), codeRateToString(rate), cw_count); }); protocol_.setModeNegotiatedCallback([this](WaveformMode mode) { diff --git a/tools/ultra_tnc.cpp b/tools/ultra_tnc.cpp index 86e4711..6865ac2 100644 --- a/tools/ultra_tnc.cpp +++ b/tools/ultra_tnc.cpp @@ -246,10 +246,17 @@ class UltraTNCStation { }); engine_.setDataModeChangedCallback([this](Modulation mod, CodeRate rate, + int cw_count, float peer_snr_db, float peer_fading) { (void)peer_snr_db; (void)peer_fading; setDataMode(mod, rate); + // Sync encoder/decoder to the negotiated CW count (set by the + // protocol layer from CONNECT_ACK / MODE_CHANGE wire bytes). + // Direct calls only — DO NOT re-enter ProtocolEngine here, the + // engine mutex is held while this callback fires. + encoder_.setFixedFrameCodewords(cw_count); + decoder_.setFixedFrameCodewords(cw_count); }); engine_.setModeNegotiatedCallback([this](WaveformMode mode) { From 2ff233208528168ecd94afe0286662634697974e Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 18:31:36 -0400 Subject: [PATCH 12/13] Preserve --cw-count override across mid-transfer MODE_CHANGE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review caught: requestModeChange() always set pending_cw_count_ = recommendCWCount(new_rate), so an operator-forced --cw-count 4 was silently lost on the first mid-transfer MODE_CHANGE (rate adapted up → CW jumped to 8 against the operator's wish). The config_.forced_cw_count field already exists for exactly this purpose at CONNECT time; just check it again at MODE_CHANGE time. Cast wrapper kept compatible with int return from sanitize + recommendCWCount. Also adds 2026-05-04 CHANGELOG entry covering the full wire-negotiation work (commits 1a98b4d + this one). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/CHANGELOG.md | 100 +++++++++++++++++++++++++++ src/protocol/connection_handlers.cpp | 11 ++- 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6dcdd5c..9be8c68 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,106 @@ This log tracks all bug fixes and behavioral changes to prevent re-doing work du --- +## 2026-05-04: Wire-level negotiation of fixed-frame CW count + +**What was broken:** +Throughput on DQPSK R1/2 SNR=15 good fading was bottlenecked at +~1077 bps because every data frame carried only 4 codewords (the +`kDefaultFixedFrameCodewords` default). Mac↔Pi5 hardware A/B with +manual `--cw-count 8` on both peers showed 1615 bps (+50 %, with +**fewer** retx because larger frames amortize the 5.3 s SACK-defer +overhead across twice the payload). The frame format already supported +1–8 CW (`kMaxFixedFrameCodewords = 8`, count in the frame header at +`frame_v2.cpp:808`) — the dial just wasn't being turned for everyday +auto-rate connections. + +A first attempt set CW from a host-side data-mode-changed callback +that called `protocol_.setForcedFrameCodewords()`. That re-entered +`ProtocolEngine::mutex_` (a non-recursive `std::mutex` — see +`protocol_engine.hpp:34`), deadlocking the responder's protocol +thread. Symptom: BRAVO logged "Adaptive CW count 4 -> 8", then went +silent forever; CONNECT_ACK was queued in `tx_queue_` but +`defer_tx_` never reset (line 222 of `protocol_engine.cpp:onRxData` +unreachable past the deadlock); ALPHA timed out waiting at 120 s. +Reproduced 100 % with `--seed 1`. + +Codex (gpt-5.5 xhigh) review of the redesign also surfaced three +hazards I'd missed: stale CONNECT_ACK retry timer (computed before +CW finalized), decoder fallback to configured `fixed_frame_codewords_` +when the header read fails (so the wire-byte alone doesn't save us +when peers disagree on configured CW), and the general "callbacks +fire under the protocol mutex — host code must not call back in". +Codex's bottom line: don't ship "both sides recompute" as the +agreement mechanism — make CW an explicit negotiated parameter on +the wire. + +**What was changed:** +- **Wire format** (`src/protocol/frame_v2.{hpp,cpp}`): + - `ConnectFrame::PAYLOAD_SIZE` 25 → 26 B; new `data_frame_cw_count` + byte appended after `measured_snr`. Frame total 44 → 45 B. + - `CONNECT` carries initiator's forced CW (0 = AUTO); + `CONNECT_ACK` carries responder's chosen value (1..8). Initiator + applies the echoed value via `frame.data_frame_cw_count`. + - `ControlFrame::ModeChangeInfo` gains `data_frame_cw_count` via + `payload[5]` (was a reserved byte — no size change). +- **Policy** (`src/protocol/connection_policy.hpp`): + - `recommendCWCount(rate)` is rate-only: R1/2, R2/3, R3/4 → 8; + R1/4 → 4. No SNR/fading dependency, so cross-peer agreement + collapses to "both peers ran the same rate negotiation". +- **Connection** (`src/protocol/connection.{hpp,cpp,_handlers.cpp}`): + - `applyDataMode(mod, rate, cw_count = 0)`: explicit CW from + MODE_CHANGE wire byte, else auto via `recommendCWCount(rate)`. + Triggers `requeuePendingChunks` on rate-changed OR cw-changed + (was rate-changed only). + - `setForcedFrameCodewords(cw, forced = true)`: `forced = true` + marks `config_.forced_cw_count` for one-sided wire propagation + (initiator embeds in CONNECT, responder honors and echoes). + `forced = false` is the boot-time path (host wiring up + encoder/decoder before connection) — does NOT mark forced and + so does not bypass the responder's auto-pick. + - Responder picks negotiated CW BEFORE building CONNECT_ACK and + BEFORE computing the retry timer (closes the stale-timer hazard). +- **Callback** (`src/protocol/connection.hpp`): + - `DataModeChangedCallback` signature now + `(mod, rate, cw_count, snr_db, peer_fading)`. Hosts (cli_simulator, + ultra_gui real + virtual, ultra_tnc, threaded_simulator) update + encoder + decoder directly from the param. **No** call to + `protocol_.setForcedFrameCodewords()` inside the callback — the + rule is now spelled out in a comment on the typedef. +- **CLI** (`tools/cli_simulator.cpp`): + - `cw_count_forced_` flag: only `--cw-count N` flips it to true. + Boot init at `SimController::initStation` passes `forced=false` + so the default 4 doesn't bypass auto-pick. + +**How it's properly fixed:** +Both peers see the negotiated CW count on the wire (CONNECT_ACK byte +for initial, MODE_CHANGE byte for mid-transfer). They set their local +`data_frame_cw_count_` from the wire, never from independent +re-derivation, so peers cannot disagree even if their channel +measurements drift. The `recommendCWCount` function is rate-only so +even in fallback paths there's no SNR/fading-driven divergence. The +encoder/decoder are updated directly from the callback param, which +removes the protocol-mutex re-entry that caused the deadlock. + +**Test verification:** +- Sim regression: `./build/cli_simulator --snr 15 --fading good + --rate auto --file 5120 --max-time 200 --seed 1` → both peers log + "Negotiated CW count: 8 for DQPSK R1/2", handshake at 10.5/11.0 s, + transfer done by 36 s. +- `--cw-count 4` override: ALPHA logs `forced_cw=4`, both peers + configure cw=4. Wire negotiation honors the override one-sided. +- ctest: 35/35 green (incl. `ConnectionPolicy`, `ConnectionAdaptive`, + `FrameV2` — the suites that broke on the prior abandoned attempt). +- Hardware A/B (Mac↔Pi5 audio loopback, `--inject good --snr 15`, + DQPSK R1/2 5KB, no `--cw-count`): + - Run 1 (boot-init bug had forced=true): 1233 bps, 39 frames, 0 retx + - Run 2 (bug fixed): **1448 bps, 19 frames, 0 retx** (+17 % + in-session, frames halved 39→19 confirms CW=8 in effect). + +**Commit:** `1a98b4d`. + +--- + ## 2026-05-02: TNC Phase 5 — Windows cross-platform support **Goal:** diff --git a/src/protocol/connection_handlers.cpp b/src/protocol/connection_handlers.cpp index 8d2301a..5a343d1 100644 --- a/src/protocol/connection_handlers.cpp +++ b/src/protocol/connection_handlers.cpp @@ -439,9 +439,14 @@ void Connection::requestModeChange(Modulation new_mod, CodeRate new_rate, mode_change_retry_count_ = 0; mode_change_timeout_ms_ = MODE_CHANGE_TIMEOUT_MS; - // Pick the CW count for the new rate (rate-only — both peers will agree - // because both run recommendCWCount(rate) on the same rate). - pending_cw_count_ = static_cast(connection_policy::recommendCWCount(new_rate)); + // Pick the CW count for the new rate. If the operator forced a CW + // count via --cw-count (config_.forced_cw_count != 0), preserve that + // override across mode changes — otherwise auto-pick from rate. + // Without this, a later MODE_CHANGE would silently drop the user's + // override (caught by Codex, 2026-05-04). + pending_cw_count_ = static_cast((config_.forced_cw_count != 0) + ? v2::sanitizeFixedFrameCodewords(config_.forced_cw_count) + : connection_policy::recommendCWCount(new_rate)); mode_change_seq_++; auto frame = v2::ControlFrame::makeModeChange(local_call_, remote_call_, From 767a081f1db9ac1685471be833e5e7792563d49c Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 4 May 2026 20:23:09 -0400 Subject: [PATCH 13/13] Tighten D8PSK gate SNR>=20 -> SNR>=22 (good fading) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 10-seed hardware sweep (Mac↔Pi5 audio loopback, 5KB DQPSK auto, good fading injected, 2026-05-04 post-CW=8 wire negotiation): SNR=20 good: D8PSK retx-hit 38 % (3/8 storms incl. 270 bps) mean 1448 bps ≈ DQPSK alt 1444 bps — wash with tail. SNR=22 good: D8PSK retx-hit 17 % (1/6 single retx, no storms) mean 1783 bps vs DQPSK 1450 — +23 % real win. SNR=24 good: D8PSK retx-hit 43 % (3/7 incl. 2 FAILs at 320-374 bps, 17-78 retx). Counterintuitively WORSE than 22. The single-seed CLAUDE.md datapoint (SNR=20 D8PSK 1595 bps clean) that motivated the prior SNR>=20 gate was unrepresentative of the distribution. Variance hidden in single-seed measurements. Storms aren't predictable from bulk fading_index (SNR=22 storm hit at fading=0.45; SNR=24 storms hit at 0.52/0.53/0.58 — all median), so tightening fading further doesn't help. SNR=22 is the floor. Test updated to assert the new ladder: SNR=22 good promotes to D8PSK; SNR=20 good stays DQPSK. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/protocol/waveform_selection.hpp | 30 ++++++++++++++++++----------- tests/test_waveform_policy.cpp | 26 +++++++++++++++---------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/protocol/waveform_selection.hpp b/src/protocol/waveform_selection.hpp index 82b6baa..96db3ce 100644 --- a/src/protocol/waveform_selection.hpp +++ b/src/protocol/waveform_selection.hpp @@ -224,17 +224,25 @@ inline void recommendDataMode(float snr_db, WaveformMode waveform, return; } - // D8PSK R1/2 — gated on the hardware-measured cliff, not the - // simulator one. Mac↔Pi5 audio loopback A/B with synthetic-channel - // injection (5 KB file, R1/2, good fading) showed: - // SNR=15 good: DQPSK 1078 bps (2 retx) > D8PSK 728 bps (5-40 retx) - // SNR=18 good: DQPSK 1234 bps (0 retx) > D8PSK 641 bps (38 retx) - // SNR=20 good: DQPSK 1247 bps (0 retx) < D8PSK 1595 bps (0 retx) ← +28% - // The simulator's "SNR>=10 works" came from Watterson without - // soundcard quantization / AGC residual / audio chain phase noise - // — D8PSK's 8-phase decision is far more sensitive to those than - // DQPSK's 4-phase. Real cliff is SNR=20 in good fading. - if (fading_index < 0.65f && snr_db >= 20.0f) { + // D8PSK R1/2 — gated on the hardware-measured cliff. Mac↔Pi5 audio + // loopback 10-seed sweep at SNR=20/22/24 good fading injected + // (2026-05-04, post-CW=8 wire negotiation) showed: + // SNR=20 good: D8PSK retx-hit 38 % (3/8 storms incl. 270 bps) + // mean 1448 bps ≈ DQPSK alt 1444 bps — wash with + // catastrophic tail. + // SNR=22 good: D8PSK retx-hit 17 % (1/6 single retx, no storms) + // mean 1783 bps vs DQPSK 1450 bps — +23 % real win. + // SNR=24 good: D8PSK retx-hit 43 % (3/7 incl. 2 FAILs at 320-374 bps, + // 17-78 retx). Counterintuitively WORSE than 22: + // higher SNR doesn't fix the soundcard/Doppler-induced + // phase glitches that cliff D8PSK; it just promotes + // D8PSK in more conditions where those glitches hit. + // The single-seed CLAUDE.md datapoint (SNR=20 D8PSK 1595 bps clean) + // was unrepresentative — variance hidden in single-seed measurements. + // Conclusion: SNR=22 is the floor where D8PSK is net-positive. + // Storms aren't predictable from bulk fading_index, so tightening + // fading further doesn't help. + if (fading_index < 0.65f && snr_db >= 22.0f) { mod = Modulation::D8PSK; rate = CodeRate::R1_2; return; diff --git a/tests/test_waveform_policy.cpp b/tests/test_waveform_policy.cpp index de5b421..853b358 100644 --- a/tests/test_waveform_policy.cpp +++ b/tests/test_waveform_policy.cpp @@ -105,19 +105,25 @@ void test_data_mode_policy() { CHECK(mod == Modulation::D8PSK, "high-SNR AWGN should promote to D8PSK"); CHECK(rate == CodeRate::R3_4, "near-AWGN SNR27 should use R3/4"); - // SNR=20 good fading: hardware A/B forced D8PSK R2/3 → adaptive - // promotion path collapsed. R2/3 is now AWGN-only (fading<0.15); - // good fading SNR=20 stays D8PSK R1/2. + // SNR=22 good fading: 10-seed hardware sweep (Mac↔Pi5, 2026-05-04) + // showed D8PSK retx-hit 17% (1/6 single retx, no storms), mean + // 1783 bps vs DQPSK 1450 bps — +23% real win. This is the floor + // where D8PSK is net-positive on hardware. + recommendDataMode(22.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); + CHECK(mod == Modulation::D8PSK, "good-fading SNR22 should be D8PSK (hardware sweet spot)"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR22 D8PSK uses R1/2"); + + // SNR=20 good fading: same sweep showed D8PSK retx-hit 38% + // (3/8 storms incl. 270 bps catastrophic), mean 1448 bps ≈ DQPSK + // 1444 — wash with high variance. D8PSK gate now SNR>=22 so + // SNR=20 stays DQPSK R1/2. recommendDataMode(20.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::D8PSK, "good-fading SNR20 should be D8PSK"); - CHECK(rate == CodeRate::R1_2, "good-fading SNR20 D8PSK uses R1/2 (R2/3 is AWGN-only on hw)"); + CHECK(mod == Modulation::DQPSK, "good-fading SNR20 stays DQPSK (D8PSK gate raised to SNR>=22)"); + CHECK(rate == CodeRate::R1_2, "good-fading SNR20 should use R1/2 with DQPSK"); - // SNR=18 good fading: hardware A/B (Mac↔Pi5 5KB R1/2 inject good) - // showed D8PSK 641 bps with 38 retx vs DQPSK 1234 bps 0 retx. - // D8PSK R1/2 is now gated at SNR>=20 in fading, so SNR=18 good - // falls back to DQPSK R1/2. + // SNR=18 good fading: well below the D8PSK floor. recommendDataMode(18.0f, WaveformMode::OFDM_CHIRP, mod, rate, 0.30f); - CHECK(mod == Modulation::DQPSK, "good-fading SNR18 stays DQPSK (D8PSK cliff at SNR=20 on hw)"); + CHECK(mod == Modulation::DQPSK, "good-fading SNR18 stays DQPSK"); CHECK(rate == CodeRate::R1_2, "good-fading SNR18 should use R1/2 with DQPSK"); // SNR=12 good fading: well below the D8PSK hardware cliff