Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion scripts/sw-pipeline-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,16 @@ test_start_requires_goal_or_issue() {
}

# ──────────────────────────────────────────────────────────────────────────────
# 4. Intake with inline --goal creates branch + artifacts
# 4. Help includes cost approval bypass flag
# ──────────────────────────────────────────────────────────────────────────────
test_help_includes_skip_cost_approval() {
invoke_pipeline --help
assert_exit_code 0 "help should succeed" &&
assert_output_contains "skip-cost-approval" "help documents cost approval bypass"
}

# ──────────────────────────────────────────────────────────────────────────────
# 5. Intake with inline --goal creates branch + artifacts
# ──────────────────────────────────────────────────────────────────────────────
test_intake_inline() {
# Use intake-only template so pipeline stops after intake
Expand Down Expand Up @@ -1814,6 +1823,7 @@ main() {
"test_preflight_passes:Preflight passes with all mocks"
"test_preflight_fails_missing_loop:Preflight fails when sw-loop.sh missing"
"test_start_requires_goal_or_issue:Start requires --goal or --issue"
"test_help_includes_skip_cost_approval:CLI help includes --skip-cost-approval"
"test_intake_inline:Intake with --goal creates branch + artifacts"
"test_intake_issue:Intake with --issue fetches from GitHub"
"test_plan_generates_artifacts:Plan generates plan.md, dod.md, tasks"
Expand Down
129 changes: 129 additions & 0 deletions scripts/sw-pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,122 @@ estimate_pipeline_cost() {
echo "{\"input_tokens\":${avg_input},\"output_tokens\":${avg_output}}"
}

write_cost_forecast() {
local forecast_json="$1"
mkdir -p "$ARTIFACTS_DIR"
echo "$forecast_json" | jq '.' > "$ARTIFACTS_DIR/cost-forecast.json"
}

forecast_pipeline_cost() {
local stages_json
stages_json=$(jq '[.stages[] | select(.enabled == true)]' "$PIPELINE_CONFIG" 2>/dev/null || echo "[]")

local stage_count
stage_count=$(echo "$stages_json" | jq 'length' 2>/dev/null || echo "0")

local est input_tokens output_tokens
est=$(estimate_pipeline_cost "$stages_json")
input_tokens=$(echo "$est" | jq -r '.input_tokens // 0')
output_tokens=$(echo "$est" | jq -r '.output_tokens // 0')

local complexity_score
complexity_score="${INTELLIGENCE_COMPLEXITY:-5}"
[[ ! "$complexity_score" =~ ^[0-9]+$ ]] && complexity_score=5

local model_plan model model_key input_rate output_rate
model_plan=$(jq -c '[.stages[] | select(.enabled==true) | {id: .id, model: (.config.model // .model // empty)}]' "$PIPELINE_CONFIG" 2>/dev/null || echo '[]')
model="${MODEL:-$(jq -r '.defaults.model // "sonnet"' "$PIPELINE_CONFIG" 2>/dev/null || echo sonnet)}"
model_key=$(echo "$model" | tr '[:upper:]' '[:lower:]')
input_rate=$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.input // 3" 2>/dev/null || echo "3")
output_rate=$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.output // 15" 2>/dev/null || echo "15")

local base_cost complexity_multiplier iteration_multiplier predicted_cost margin
base_cost=$(awk -v it="$input_tokens" -v ot="$output_tokens" -v ir="$input_rate" -v or="$output_rate" 'BEGIN{printf "%.4f", ((it/1000000)*ir)+((ot/1000000)*or)}')
complexity_multiplier=$(awk -v c="$complexity_score" 'BEGIN{printf "%.3f", 0.85 + (c/10)*0.5}')
Comment on lines +286 to +295
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Forecast cost calculation currently ignores per-stage model routing.

At Line 287 you build model_plan, but at Line 288-295 pricing is computed from a single model. For mixed-model stage configs, this can significantly skew predicted cost.

Proposed fix (routing-aware blended rates)
-    local model_plan model model_key input_rate output_rate
+    local model_plan model model_key input_rate output_rate
     model_plan=$(jq -c '[.stages[] | select(.enabled==true) | {id: .id, model: (.config.model // .model // empty)}]' "$PIPELINE_CONFIG" 2>/dev/null || echo '[]')
     model="${MODEL:-$(jq -r '.defaults.model // "sonnet"' "$PIPELINE_CONFIG" 2>/dev/null || echo sonnet)}"
     model_key=$(echo "$model" | tr '[:upper:]' '[:lower:]')
-    input_rate=$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.input // 3" 2>/dev/null || echo "3")
-    output_rate=$(echo "$COST_MODEL_RATES" | jq -r ".${model_key}.output // 15" 2>/dev/null || echo "15")
+    # Blend rates using per-stage model routing; fallback to selected/default model.
+    input_rate=$(jq -n \
+        --argjson mp "$model_plan" \
+        --argjson rates "$COST_MODEL_RATES" \
+        --arg fallback "$model_key" '
+        if ($mp|length) == 0 then
+          ($rates[$fallback].input // 3)
+        else
+          (($mp | map((.model // $fallback | ascii_downcase) as $m | ($rates[$m].input // 3)) | add) / ($mp|length))
+        end
+    ' 2>/dev/null || echo "3")
+    output_rate=$(jq -n \
+        --argjson mp "$model_plan" \
+        --argjson rates "$COST_MODEL_RATES" \
+        --arg fallback "$model_key" '
+        if ($mp|length) == 0 then
+          ($rates[$fallback].output // 15)
+        else
+          (($mp | map((.model // $fallback | ascii_downcase) as $m | ($rates[$m].output // 15)) | add) / ($mp|length))
+        end
+    ' 2>/dev/null || echo "15")

Also applies to: 324-346

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/sw-pipeline.sh` around lines 286 - 295, The cost forecast currently
ignores per-stage routing by using a single MODEL-derived rate; instead iterate
the already-built model_plan to compute blended input/output rates from
COST_MODEL_RATES (fallbacks kept) weighted by stage usage (e.g., count or
per-stage token proportions), then replace the single input_rate/output_rate
used in base_cost and the later block (the code computing base_cost,
complexity_multiplier and the duplicate logic around lines 324-346) with the
blended rates; ensure you reference model_plan, COST_MODEL_RATES, input_tokens,
output_tokens, base_cost, input_rate, output_rate and complexity_multiplier so
the script sums per-stage contributions and computes the final predicted_cost
consistently.


local max_iter avg_iter
max_iter=$(echo "$stages_json" | jq '[.[] | .config.max_iterations // empty] | if length>0 then max else 10 end' 2>/dev/null || echo "10")
avg_iter=$(echo "$stages_json" | jq '[.[] | .config.max_iterations // empty] | if length>0 then (add/length) else 10 end' 2>/dev/null || echo "10")
iteration_multiplier=$(awk -v a="$avg_iter" 'BEGIN{printf "%.3f", 1 + ((a-10)/100)}')

predicted_cost=$(awk -v b="$base_cost" -v cm="$complexity_multiplier" -v im="$iteration_multiplier" 'BEGIN{printf "%.2f", b*cm*im}')

local historical_avg historical_samples
historical_avg=$(jq -s -r --arg cs "$complexity_score" '
map(select(.type=="pipeline.completed" and (.total_cost|tonumber?!=null)))
| map(select((.complexity|tonumber? // 5) >= (($cs|tonumber)-1) and (.complexity|tonumber? // 5) <= (($cs|tonumber)+1)))
| if length>0 then (map(.total_cost|tonumber) | add/length) else 0 end
' "${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}" 2>/dev/null || echo "0")
historical_samples=$(jq -s -r --arg cs "$complexity_score" '
map(select(.type=="pipeline.completed" and (.total_cost|tonumber?!=null)))
| map(select((.complexity|tonumber? // 5) >= (($cs|tonumber)-1) and (.complexity|tonumber? // 5) <= (($cs|tonumber)+1)))
| length
' "${EVENTS_FILE:-$HOME/.shipwright/events.jsonl}" 2>/dev/null || echo "0")
if [[ "${historical_samples:-0}" -gt 0 ]]; then
predicted_cost=$(awk -v p="$predicted_cost" -v h="$historical_avg" 'BEGIN{printf "%.2f", (p*0.6)+(h*0.4)}')
fi

margin=$(awk -v p="$predicted_cost" 'BEGIN{m=p*0.27; if (m<0.50) m=0.50; printf "%.2f", m}')

local duration_minutes
duration_minutes=$(awk -v sc="$stage_count" -v c="$complexity_score" -v ai="$avg_iter" 'BEGIN{d=(sc*9)+(c*4)+(ai*1.5); if(d<10)d=10; printf "%.0f", d}')

jq -n \
--arg ts "$(now_iso)" \
--argjson complexity_score "$complexity_score" \
--argjson historical_samples "${historical_samples:-0}" \
--argjson historical_avg_usd "${historical_avg:-0}" \
--argjson threshold_usd "${COST_APPROVAL_THRESHOLD_USD:-10}" \
--argjson predicted_cost_usd "$predicted_cost" \
--argjson margin_usd "$margin" \
--argjson duration_minutes "$duration_minutes" \
--argjson max_iterations "$max_iter" \
--argjson stage_count "$stage_count" \
--argjson model_plan "$model_plan" \
'{
ts: $ts,
predicted_cost_usd: $predicted_cost_usd,
confidence_margin_usd: $margin_usd,
estimated_duration_minutes: $duration_minutes,
complexity_score: $complexity_score,
historical_similar_issue_cost_avg_usd: $historical_avg_usd,
historical_similar_issue_samples: $historical_samples,
pipeline_stage_count: $stage_count,
model_routing_plan: $model_plan,
max_iterations: $max_iterations,
approval_threshold_usd: $threshold_usd,
approval_required: ($predicted_cost_usd > $threshold_usd)
}'
}

require_cost_approval_if_needed() {
local forecast_json="$1"
local predicted margin duration threshold needs_approval
predicted=$(echo "$forecast_json" | jq -r '.predicted_cost_usd // 0')
margin=$(echo "$forecast_json" | jq -r '.confidence_margin_usd // 0')
duration=$(echo "$forecast_json" | jq -r '.estimated_duration_minutes // 0')
threshold=$(echo "$forecast_json" | jq -r '.approval_threshold_usd // 10')
needs_approval=$(echo "$forecast_json" | jq -r '.approval_required // false')

echo -e " ${BOLD}Cost Forecast:${RESET} \$$predicted (±\$$margin), Duration: ${duration}min"

if [[ "$SKIP_COST_APPROVAL" == "true" || "$SKIP_GATES" == "true" || "$HEADLESS" == "true" || "$CI_MODE" == "true" ]]; then
return 0
fi

if [[ "$needs_approval" == "true" ]]; then
echo -e " ${YELLOW}Approval required:${RESET} forecast exceeds threshold (\$$threshold)"
local answer=""
read -rp " Proceed with pipeline start? [y/N] " answer || true
if ! echo "$answer" | grep -qiE '^(y|yes)$'; then
warn "Pipeline start canceled by user (cost approval gate)"
emit_event "pipeline.cost_approval_blocked" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}"
return 1
fi
fi
return 0
Comment on lines +367 to +377
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Emit an explicit “approved” event when the user accepts the cost gate.

At Line 373, blocked decisions are emitted, but accepted decisions are not. This leaves approval telemetry incomplete.

Proposed fix
     if [[ "$needs_approval" == "true" ]]; then
         echo -e "  ${YELLOW}Approval required:${RESET} forecast exceeds threshold (\$$threshold)"
         local answer=""
         read -rp "  Proceed with pipeline start? [y/N] " answer || true
         if ! echo "$answer" | grep -qiE '^(y|yes)$'; then
             warn "Pipeline start canceled by user (cost approval gate)"
             emit_event "pipeline.cost_approval_blocked" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}"
             return 1
         fi
+        emit_event "pipeline.cost_approval_approved" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}" "mode=manual"
     fi
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if [[ "$needs_approval" == "true" ]]; then
echo -e " ${YELLOW}Approval required:${RESET} forecast exceeds threshold (\$$threshold)"
local answer=""
read -rp " Proceed with pipeline start? [y/N] " answer || true
if ! echo "$answer" | grep -qiE '^(y|yes)$'; then
warn "Pipeline start canceled by user (cost approval gate)"
emit_event "pipeline.cost_approval_blocked" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}"
return 1
fi
fi
return 0
if [[ "$needs_approval" == "true" ]]; then
echo -e " ${YELLOW}Approval required:${RESET} forecast exceeds threshold (\$$threshold)"
local answer=""
read -rp " Proceed with pipeline start? [y/N] " answer || true
if ! echo "$answer" | grep -qiE '^(y|yes)$'; then
warn "Pipeline start canceled by user (cost approval gate)"
emit_event "pipeline.cost_approval_blocked" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}"
return 1
fi
emit_event "pipeline.cost_approval_approved" "predicted_cost=${predicted}" "threshold=${threshold}" "issue=${ISSUE_NUMBER:-0}" "mode=manual"
fi
return 0
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/sw-pipeline.sh` around lines 367 - 377, When the cost gate is
accepted (the branch where read -rp sets answer and the grep check passes), emit
a complementary approval event—call emit_event with a new event name like
"pipeline.cost_approval_approved" and include the same metadata used for blocked
(predicted_cost=${predicted}, threshold=${threshold}, issue=${ISSUE_NUMBER:-0})
so telemetry is complete; add this emit_event call in the success path right
after the user confirms and before returning 0, referencing the existing
needs_approval check, the answer variable, and the emit_event function.

}

# ─── Defaults ───────────────────────────────────────────────────────────────
GOAL=""
ISSUE_NUMBER=""
Expand All @@ -283,6 +399,8 @@ NO_GITHUB_LABEL=false
CI_MODE=false
DRY_RUN=false
IGNORE_BUDGET=false
SKIP_COST_APPROVAL=false
COST_APPROVAL_THRESHOLD_USD=$(_config_get_int "pipeline.cost_approval_threshold_usd" 10 2>/dev/null || echo 10)
COMPLETED_STAGES=""
RESUME_FROM_CHECKPOINT=false
MAX_ITERATIONS_OVERRIDE=""
Expand Down Expand Up @@ -355,6 +473,7 @@ show_help() {
echo -e " ${DIM}--no-github-label${RESET} Don't modify issue labels"
echo -e " ${DIM}--ci${RESET} CI mode (skip gates, non-interactive)"
echo -e " ${DIM}--ignore-budget${RESET} Skip budget enforcement checks"
echo -e " ${DIM}--skip-cost-approval${RESET} Skip pre-start cost approval gate"
echo -e " ${DIM}--worktree [=name]${RESET} Run in isolated git worktree (parallel-safe)"
echo -e " ${DIM}--dry-run${RESET} Show what would happen without executing"
echo -e " ${DIM}--slack-webhook <url>${RESET} Send notifications to Slack"
Expand Down Expand Up @@ -441,6 +560,7 @@ parse_args() {
--no-github-label) NO_GITHUB_LABEL=true; shift ;;
--ci) CI_MODE=true; SKIP_GATES=true; shift ;;
--ignore-budget) IGNORE_BUDGET=true; shift ;;
--skip-cost-approval) SKIP_COST_APPROVAL=true; shift ;;
--max-iterations) MAX_ITERATIONS_OVERRIDE="$2"; shift 2 ;;
--completed-stages) COMPLETED_STAGES="$2"; shift 2 ;;
--resume) RESUME_FROM_CHECKPOINT=true; shift ;;
Expand Down Expand Up @@ -2479,6 +2599,15 @@ pipeline_start() {
fi
fi

# Forecast pipeline cost/duration and enforce optional approval gate
local forecast_json
forecast_json=$(forecast_pipeline_cost)
write_cost_forecast "$forecast_json"
PREDICTED_COST=$(echo "$forecast_json" | jq -r '.predicted_cost_usd // empty' 2>/dev/null || echo "")
export PREDICTED_COST
emit_event "pipeline.cost_forecast" "issue=${ISSUE_NUMBER:-0}" "predicted_cost=${PREDICTED_COST:-0}" "threshold=${COST_APPROVAL_THRESHOLD_USD:-10}"
require_cost_approval_if_needed "$forecast_json" || return 1

# Start background heartbeat writer
start_heartbeat

Expand Down