From 062e83059af29c7384de2b2a8df2a0f66f0b5ca4 Mon Sep 17 00:00:00 2001 From: Reuven Harrison Date: Fri, 22 May 2026 20:28:27 +0300 Subject: [PATCH] pr-comment: pipe POST body via stdin to avoid curl ARG_MAX failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same class of bug as #118 (jq ARG_MAX), one layer down. After #118 fixed the jq invocation, the next line on the failure path is response=$(curl ... -d "$payload") The full JSON payload (~4 MB for the customer spec that surfaced the jq case) goes through curl's argv via `-d`, exceeds the OS argument- length limit (ARG_MAX, typically 2 MB on Linux), and the script aborts with: /entrypoint.sh: line 124: curl: Argument list too long right after the (now-successful) jq step, with the same downstream symptoms: free `::notice::` URL printed, no PR comment posted, no review-token link generated. Customer saw "same problem" after bumping their pin to @main with #118 applied — the jq step now works but the next layer hits the same trap. Fix: pipe the payload to curl via stdin and use `--data-binary @-` to consume from stdin. printf is a shell builtin so the variable never goes through execve. Same shape as the jq fix. Extends the test workflow with `pr_comment_curl_handles_large_payload`, a companion to the existing `pr_comment_handles_large_payload`. The previous test exercises the jq path with an empty oasdiff_token, which short-circuits past the curl step. The new test exercises the curl step itself by providing a non-empty token and stubbing both oasdiff (multi-MB changelog source) and curl (records the body byte count from stdin and emits a fake 200 response). Asserts the curl stub was invoked with >4 MB of body — proves the payload made it through stdin without hitting ARG_MAX on argv. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/test.yaml | 92 +++++++++++++++++++++++++++++++++++++ pr-comment/entrypoint.sh | 11 ++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d1eb398..82f3a98 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -658,3 +658,95 @@ jobs: exit 1 fi echo "PASS" + + pr_comment_curl_handles_large_payload: + runs-on: ubuntu-latest + name: Test pr-comment curl POST survives a multi-MB payload + # Companion to pr_comment_handles_large_payload. The previous test + # exercises the jq payload-construction path with an empty + # oasdiff_token, which short-circuits past the curl step. This one + # exercises the curl step itself by providing a non-empty token and + # stubbing both oasdiff (multi-MB changelog source) and curl (verifies + # it received the payload via stdin, not via argv). Catches the + # regression class where `curl -d "$payload"` puts a multi-MB body on + # argv and aborts with `curl: Argument list too long` after the jq + # invocation already succeeded — the same ARG_MAX trap one layer + # down. The fix pipes the payload through stdin via `--data-binary @-`. + steps: + - uses: actions/checkout@v6 + - name: Stub oasdiff + curl, run entrypoint with a fake token + run: | + set -euo pipefail + mkdir -p /tmp/stub /tmp/run + + # Same filler strategy as the jq test: dd | tr produces a + # ~2 MB string of 'a' characters and exits 0 cleanly under + # pipefail. + dd if=/dev/zero bs=1024 count=2000 status=none | tr '\0' 'a' > /tmp/filler + + cat > /tmp/stub/oasdiff <<'STUB' + #!/bin/sh + filler=$(cat /tmp/filler) + printf '[{"id":"big-1","text":"%s","level":3},{"id":"big-2","text":"%s","level":3}]' "$filler" "$filler" + STUB + chmod +x /tmp/stub/oasdiff + + # Stub curl: consume the POST body from stdin, record its + # byte count to a side file the assertions below can read, + # and emit the success-response shape the entrypoint expects + # from `-s -w "\n%{http_code}"` (body then a newline then the + # HTTP status code). + cat > /tmp/stub/curl <<'STUB' + #!/bin/sh + body=$(cat) + printf '%s' "$body" | wc -c > /tmp/run/curl-bytes + printf '{"review_token":"stub-token-uuid"}\n200\n' + STUB + chmod +x /tmp/stub/curl + + export GITHUB_REF=refs/pull/123/merge + export GITHUB_REPOSITORY=foo/bar + export GITHUB_SHA=deadbeef + export GITHUB_BASE_REF=main + export GITHUB_STEP_SUMMARY=/tmp/run/step-summary + cat > /tmp/run/event.json <&1) + rc=$? + set -e + echo "--- entrypoint output (truncated) ---" + echo "$out" | head -c 2000 + echo "--- exit code: $rc ---" + + if [ "$rc" -ne 0 ]; then + echo "FAIL: expected exit 0, got $rc" >&2 + if echo "$out" | grep -q "curl: Argument list too long"; then + echo "The regression has returned: \$payload is being passed via curl argv (-d) instead of stdin (--data-binary @-)." >&2 + fi + exit 1 + fi + if [ ! -f /tmp/run/curl-bytes ]; then + echo "FAIL: curl stub was never invoked; script aborted before reaching the POST" >&2 + exit 1 + fi + curl_bytes=$(cat /tmp/run/curl-bytes | tr -d ' ') + echo "curl received: ${curl_bytes} bytes" + if [ "$curl_bytes" -lt 4000000 ]; then + echo "FAIL: curl stub received only ${curl_bytes} bytes; expected >4 MB (proves payload made it through stdin)" >&2 + exit 1 + fi + if ! echo "$out" | grep -q "::notice::.*View API changes"; then + echo "FAIL: script aborted before emitting the review-page notice" >&2 + exit 1 + fi + echo "PASS" diff --git a/pr-comment/entrypoint.sh b/pr-comment/entrypoint.sh index 8b50d42..50f8893 100755 --- a/pr-comment/entrypoint.sh +++ b/pr-comment/entrypoint.sh @@ -121,10 +121,17 @@ if [ -z "$oasdiff_token" ]; then exit 0 fi -response=$(curl -s -w "\n%{http_code}" -X POST \ +# POST the payload via stdin (`--data-binary @-`) rather than as a +# `-d` argv value. For specs whose changelog runs into the multi-MB +# range the assembled payload is also multi-MB; passing it via argv +# would exceed ARG_MAX and surface as `curl: Argument list too long`, +# aborting the action exactly like the analogous jq case did at line +# 89 before the previous fix. `printf` is a shell builtin so the +# variable never goes through execve. +response=$(printf '%s' "$payload" | curl -s -w "\n%{http_code}" -X POST \ "${service_url}/tenants/${oasdiff_token}/pr-comment" \ -H "Content-Type: application/json" \ - -d "$payload") + --data-binary @-) http_code=$(echo "$response" | tail -1) body=$(echo "$response" | sed '$d')