diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..02f5ab34 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,7 @@ +*.sh text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.py text eol=lf +*.json text eol=lf +*.jsonl text eol=lf +*.md text eol=lf diff --git a/.github/workflows/gas-benchmarks.yml b/.github/workflows/gas-benchmarks.yml index c7f36755..12007fec 100644 --- a/.github/workflows/gas-benchmarks.yml +++ b/.github/workflows/gas-benchmarks.yml @@ -6,18 +6,19 @@ on: paths: - "stellar-lend/contracts/**" - "stellar-lend/benchmarks/**" + - "scripts/gas_benchmark_report.py" - ".github/workflows/gas-benchmarks.yml" pull_request: branches: ["main", "dev"] paths: - "stellar-lend/contracts/**" - "stellar-lend/benchmarks/**" + - "scripts/gas_benchmark_report.py" - ".github/workflows/gas-benchmarks.yml" - # Allow manual trigger for on-demand profiling workflow_dispatch: inputs: compare_baseline: - description: "Compare against baseline (true/false)" + description: "Compare against baseline and enforce the 10% regression gate" required: false default: "true" @@ -26,14 +27,15 @@ concurrency: cancel-in-progress: true jobs: - # ───────────────────────────────────────────────────────────────────────────── - # Gas Benchmark Job - # ───────────────────────────────────────────────────────────────────────────── gas-benchmarks: - name: Gas Benchmarks — All Contracts + name: Gas Benchmarks runs-on: ubuntu-latest + timeout-minutes: 30 env: CARGO_TERM_COLOR: always + BENCHMARK_RESULTS: stellar-lend/benchmark-results.json + BENCHMARK_DASHBOARD: stellar-lend/benchmark-dashboard.md + BENCHMARK_HISTORY: stellar-lend/benchmark-history.jsonl steps: - name: Checkout code @@ -44,7 +46,7 @@ jobs: with: toolchain: stable - - name: Cache cargo registry & build + - name: Cache cargo registry and build artifacts uses: actions/cache@v4 with: path: | @@ -59,118 +61,50 @@ jobs: - name: Build benchmark suite run: | cd stellar-lend - cargo build --bin run_benchmarks --release 2>&1 | tee benchmark-build.log - echo "Build exit code: $?" + cargo build -p stellarlend-benchmarks --release 2>&1 | tee benchmark-build.log - name: Run gas benchmarks - id: run_benchmarks run: | cd stellar-lend - cargo run --bin run_benchmarks -- \ + cargo run -p stellarlend-benchmarks --bin run_benchmarks --release -- \ --output benchmark-results.json \ 2>&1 | tee benchmark-output.log - echo "Benchmark exit code: $?" - - name: Run benchmarks with baseline comparison - id: baseline_check - if: ${{ github.event.inputs.compare_baseline != 'false' }} + - name: Generate dashboard and enforce gates + env: + COMPARE_BASELINE: ${{ inputs.compare_baseline || 'true' }} run: | - cd stellar-lend - # Use baseline from repo if it exists and has results - BASELINE="benchmarks/baseline.json" - RESULTS=$(python3 -c "import json; d=json.load(open('$BASELINE')); print(len(d.get('results', [])))" 2>/dev/null || echo "0") - - if [ "$RESULTS" -gt "0" ]; then - echo "Comparing against baseline ($RESULTS recorded operations)..." - cargo run --bin run_benchmarks -- \ - --compare "$BASELINE" \ - --output benchmark-results.json \ - 2>&1 | tee benchmark-comparison.log - EXIT_CODE=$? - if [ $EXIT_CODE -ne 0 ]; then - echo "::error::Gas regression detected! See benchmark-comparison.log for details." - exit $EXIT_CODE - fi - echo "All operations within gas budgets." - else - echo "No baseline results found — skipping regression check." - echo "Run benchmarks locally and commit baseline.json to enable regression detection." + EXTRA_ARGS="" + if [ "$COMPARE_BASELINE" = "true" ]; then + EXTRA_ARGS="--baseline stellar-lend/benchmarks/baseline.json --fail-on-regression" fi - - name: Generate benchmark summary + python3 scripts/gas_benchmark_report.py \ + --results "$BENCHMARK_RESULTS" \ + --coverage stellar-lend/benchmarks/public-functions.json \ + --dashboard "$BENCHMARK_DASHBOARD" \ + --history stellar-lend/benchmarks/history.jsonl \ + --history-out "$BENCHMARK_HISTORY" \ + --max-regression-pct 10 \ + $EXTRA_ARGS + + - name: Publish benchmark dashboard if: always() run: | - cd stellar-lend - if [ -f benchmark-results.json ]; then - echo "## Gas Benchmark Results" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Contract | Operations | Max Instructions | Avg Instructions | Over Budget |" >> $GITHUB_STEP_SUMMARY - echo "|----------|-----------|-----------------|-----------------|-------------|" >> $GITHUB_STEP_SUMMARY - python3 - <<'EOF' - import json, os - - with open("benchmark-results.json") as f: - report = json.load(f) - - summary = report.get("summary_by_contract", {}) - with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as out: - for contract, s in sorted(summary.items()): - over = s.get("over_budget_count", 0) - status = "✗" if over > 0 else "✓" - out.write( - f"| {contract} | {s['total_operations']} | " - f"{s['max_instructions']:,} | {s['avg_instructions']:,} | " - f"{status} {over} |\n" - ) - - total = report["total_benchmarks"] - passed = report["passed"] - failed = report["failed"] - out.write(f"\n**Total:** {total} | **Passed:** {passed} | **Failed:** {failed}\n") - EOF + if [ -f "$BENCHMARK_DASHBOARD" ]; then + cat "$BENCHMARK_DASHBOARD" >> "$GITHUB_STEP_SUMMARY" fi - - name: Upload benchmark results + - name: Upload benchmark artifacts if: always() uses: actions/upload-artifact@v4 with: name: gas-benchmark-results-${{ github.sha }} path: | stellar-lend/benchmark-results.json + stellar-lend/benchmark-dashboard.md + stellar-lend/benchmark-history.jsonl stellar-lend/benchmark-output.log - stellar-lend/benchmark-comparison.log + stellar-lend/benchmark-build.log + if-no-files-found: warn retention-days: 90 - - - name: Upload build log - if: failure() - uses: actions/upload-artifact@v4 - with: - name: benchmark-build-log - path: stellar-lend/benchmark-build.log - - # ── Budget Alert Gate ────────────────────────────────────────────────── - - name: Enforce gas budget gate - if: always() - run: | - cd stellar-lend - if [ ! -f benchmark-results.json ]; then - echo "::error::benchmark-results.json not found — benchmarks may have failed to run." - exit 1 - fi - - FAILED=$(python3 -c " - import json - with open('benchmark-results.json') as f: - r = json.load(f) - over = [x for x in r['results'] if not x['within_budget'] and x['budget'] > 0] - for o in over: - print(f\" {o['operation']}: {o['instructions']:,} instructions (budget: {o['budget']:,})\") - print(len(over)) - " | tail -1) - - if [ "$FAILED" -gt "0" ]; then - echo "::error::$FAILED operation(s) exceeded gas budget. See benchmark-results.json for details." - exit 1 - fi - - echo "All operations within gas budgets." diff --git a/run-benchmarks.sh b/run-benchmarks.sh index 3b23da9b..7df6f12e 100755 --- a/run-benchmarks.sh +++ b/run-benchmarks.sh @@ -1,132 +1,90 @@ #!/usr/bin/env bash -# run-benchmarks.sh — Local gas benchmark runner for StellarLend -# -# Usage: -# ./run-benchmarks.sh # Run all benchmarks -# ./run-benchmarks.sh --compare # Compare against baseline -# ./run-benchmarks.sh --update-baseline # Run and update baseline.json -# ./run-benchmarks.sh --help # Show help +# Local gas benchmark runner for StellarLend. set -euo pipefail -# ── Colors ──────────────────────────────────────────────────────────────────── -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' - BENCH_DIR="stellar-lend" -BASELINE="stellar-lend/benchmarks/baseline.json" -OUTPUT="stellar-lend/benchmark-results.json" +BASELINE="$BENCH_DIR/benchmarks/baseline.json" +COVERAGE="$BENCH_DIR/benchmarks/public-functions.json" +HISTORY="$BENCH_DIR/benchmarks/history.jsonl" +OUTPUT="$BENCH_DIR/benchmark-results.json" +DASHBOARD="$BENCH_DIR/benchmark-dashboard.md" +HISTORY_OUT="$BENCH_DIR/benchmark-history.jsonl" -# ── Argument parsing ────────────────────────────────────────────────────────── COMPARE=false UPDATE_BASELINE=false SHOW_HELP=false for arg in "$@"; do - case $arg in - --compare) COMPARE=true ;; + case "$arg" in + --compare) COMPARE=true ;; --update-baseline) UPDATE_BASELINE=true ;; - --help|-h) SHOW_HELP=true ;; + --help|-h) SHOW_HELP=true ;; + *) + echo "Unknown argument: $arg" >&2 + exit 2 + ;; esac done if $SHOW_HELP; then - echo "" - echo " StellarLend Gas Benchmark Runner" - echo "" - echo " Usage:" - echo " ./run-benchmarks.sh Run all benchmarks" - echo " ./run-benchmarks.sh --compare Compare against baseline (fail on regression)" - echo " ./run-benchmarks.sh --update-baseline Run and save results as new baseline" - echo "" - echo " Output:" - echo " benchmark-results.json Latest results (always written)" - echo " benchmarks/baseline.json Baseline for regression detection" - echo "" + cat <<'EOF' +StellarLend Gas Benchmark Runner + +Usage: + ./run-benchmarks.sh Run all benchmarks and generate dashboard + ./run-benchmarks.sh --compare Fail on budget, coverage, or >10% baseline regression + ./run-benchmarks.sh --update-baseline Run and save current results as the new baseline + +Outputs: + stellar-lend/benchmark-results.json + stellar-lend/benchmark-dashboard.md + stellar-lend/benchmark-history.jsonl +EOF exit 0 fi -# ── Prerequisites ───────────────────────────────────────────────────────────── -echo -e "${BLUE}╔══════════════════════════════════════════════════════════╗${NC}" -echo -e "${BLUE}║ StellarLend Gas Benchmark Suite ║${NC}" -echo -e "${BLUE}╚══════════════════════════════════════════════════════════╝${NC}" -echo "" +if ! command -v cargo >/dev/null 2>&1; then + echo "Rust/Cargo not found. Install from https://rustup.rs" >&2 + exit 1 +fi -if ! command -v cargo &>/dev/null; then - echo -e "${RED}✗ Rust/Cargo not found. Install from https://rustup.rs${NC}" +if ! command -v python3 >/dev/null 2>&1; then + echo "python3 not found. It is required for dashboard and regression gates." >&2 exit 1 fi if [ ! -d "$BENCH_DIR" ]; then - echo -e "${RED}✗ stellar-lend directory not found. Run from project root.${NC}" + echo "stellar-lend directory not found. Run from the project root." >&2 exit 1 fi -# ── Build ───────────────────────────────────────────────────────────────────── -echo -e "${YELLOW}▶ Building benchmark suite...${NC}" -(cd "$BENCH_DIR" && cargo build --bin run_benchmarks --release 2>&1) -echo -e "${GREEN}✓ Build complete${NC}" -echo "" +echo "Building benchmark suite..." +(cd "$BENCH_DIR" && cargo build -p stellarlend-benchmarks --release) -# ── Run benchmarks ──────────────────────────────────────────────────────────── -echo -e "${YELLOW}▶ Running gas benchmarks...${NC}" -echo "" +echo "Running gas benchmarks..." +(cd "$BENCH_DIR" && cargo run -p stellarlend-benchmarks --bin run_benchmarks --release -- --output "../$OUTPUT") + +REPORT_ARGS=( + --results "$OUTPUT" + --coverage "$COVERAGE" + --dashboard "$DASHBOARD" + --history "$HISTORY" + --history-out "$HISTORY_OUT" + --max-regression-pct 10 +) if $COMPARE; then - RESULTS_COUNT=$(python3 -c "import json; d=json.load(open('$BASELINE')); print(len(d.get('results', [])))" 2>/dev/null || echo "0") - if [ "$RESULTS_COUNT" -gt "0" ]; then - echo -e "${CYAN} Comparing against baseline ($RESULTS_COUNT operations)...${NC}" - (cd "$BENCH_DIR" && cargo run --bin run_benchmarks --release -- \ - --compare "../$BASELINE" \ - --output "../$OUTPUT") - EXIT_CODE=$? - if [ $EXIT_CODE -ne 0 ]; then - echo "" - echo -e "${RED}✗ Gas regression detected! Review benchmark-results.json${NC}" - exit $EXIT_CODE - fi - else - echo -e "${YELLOW} No baseline results found — running without comparison.${NC}" - echo -e "${YELLOW} Run with --update-baseline to create a baseline.${NC}" - (cd "$BENCH_DIR" && cargo run --bin run_benchmarks --release -- --output "../$OUTPUT") - fi -elif $UPDATE_BASELINE; then - echo -e "${CYAN} Running benchmarks and updating baseline...${NC}" - (cd "$BENCH_DIR" && cargo run --bin run_benchmarks --release -- --output "../$OUTPUT") - cp "$OUTPUT" "$BASELINE" - echo "" - echo -e "${GREEN}✓ Baseline updated: $BASELINE${NC}" - echo -e "${YELLOW} Commit this file to track gas usage over time.${NC}" -else - (cd "$BENCH_DIR" && cargo run --bin run_benchmarks --release -- --output "../$OUTPUT") + REPORT_ARGS+=(--baseline "$BASELINE" --fail-on-regression) fi -echo "" -echo -e "${GREEN}✓ Benchmarks complete. Results: $OUTPUT${NC}" - -# ── Quick summary ───────────────────────────────────────────────────────────── -if command -v python3 &>/dev/null && [ -f "$OUTPUT" ]; then - echo "" - python3 - <<'EOF' -import json +python3 scripts/gas_benchmark_report.py "${REPORT_ARGS[@]}" -with open("stellar-lend/benchmark-results.json") as f: - report = json.load(f) - -total = report["total_benchmarks"] -passed = report["passed"] -failed = report["failed"] - -print(f" Summary: {total} benchmarks | {passed} passed | {failed} failed") - -if failed > 0: - print("\n Over-budget operations:") - for r in report["results"]: - if not r["within_budget"] and r["budget"] > 0: - print(f" ✗ {r['operation']}: {r['instructions']:,} (budget: {r['budget']:,})") -EOF +if $UPDATE_BASELINE; then + cp "$OUTPUT" "$BASELINE" + echo "Baseline updated: $BASELINE" fi + +echo "Benchmarks complete:" +echo " Results: $OUTPUT" +echo " Dashboard: $DASHBOARD" diff --git a/scripts/gas_benchmark_report.py b/scripts/gas_benchmark_report.py new file mode 100644 index 00000000..79548510 --- /dev/null +++ b/scripts/gas_benchmark_report.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +"""Validate gas benchmark results and generate a trend dashboard.""" + +from __future__ import annotations + +import argparse +import json +import sys +from collections import defaultdict +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +def load_json(path: Path, default: Any) -> Any: + if not path or not path.exists(): + return default + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def write_text(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def normalize_operation(raw: dict[str, Any], default_contract: str = "") -> dict[str, Any]: + operation = str(raw.get("operation") or raw.get("name") or "") + contract = str(raw.get("contract") or default_contract or "") + scenario = str(raw.get("scenario") or raw.get("description") or "") + + if "::" not in operation and contract: + contract_key = contract.replace("-", "_") + operation = f"{contract_key}::{operation}" + + instructions = raw.get("instructions", raw.get("cpu_insns", raw.get("cpu", 0))) + memory = raw.get("memory_bytes", raw.get("mem_bytes", raw.get("memory", 0))) + budget = int(raw.get("budget") or 0) + + return { + "operation": operation, + "contract": contract or operation.split("::", 1)[0], + "scenario": scenario, + "description": str(raw.get("description") or scenario or operation), + "instructions": int(instructions or 0), + "memory_bytes": int(memory or 0), + "storage_reads": int(raw.get("storage_reads") or raw.get("disk_read_entries") or 0), + "storage_writes": int(raw.get("storage_writes") or raw.get("write_entries") or 0), + "cold_storage": bool(raw.get("cold_storage", "cold" in scenario.lower())), + "budget": budget, + "within_budget": bool(raw.get("within_budget", budget == 0 or int(instructions or 0) <= budget)), + "tags": [str(tag) for tag in raw.get("tags", [])], + } + + +def load_results(path: Path) -> tuple[dict[str, Any], list[dict[str, Any]]]: + payload = load_json(path, {}) + if "results" in payload: + return payload, [normalize_operation(item) for item in payload.get("results", [])] + if "benchmarks" in payload: + contract = str(payload.get("contract") or "") + return payload, [normalize_operation(item, contract) for item in payload.get("benchmarks", [])] + return payload, [] + + +def load_baseline(path: Path) -> dict[str, dict[str, Any]]: + if not path: + return {} + _, baseline_results = load_results(path) + return {item["operation"]: item for item in baseline_results} + + +def coverage_failures(results: list[dict[str, Any]], coverage_path: Path | None) -> list[str]: + if not coverage_path: + return [] + coverage = load_json(coverage_path, {"required_operations": []}) + required = [str(item) for item in coverage.get("required_operations", [])] + measured = {item["operation"] for item in results} + return [operation for operation in required if operation not in measured] + + +def budget_failures(results: list[dict[str, Any]]) -> list[dict[str, Any]]: + return [item for item in results if item["budget"] > 0 and item["instructions"] > item["budget"]] + + +def regression_failures( + results: list[dict[str, Any]], + baseline: dict[str, dict[str, Any]], + max_regression_pct: float, +) -> list[dict[str, Any]]: + failures = [] + for item in results: + prior = baseline.get(item["operation"]) + if not prior: + continue + old = int(prior.get("instructions") or 0) + new = int(item.get("instructions") or 0) + if old <= 0: + continue + pct = ((new - old) / old) * 100 + if pct > max_regression_pct: + failures.append({**item, "baseline": old, "increase_pct": pct}) + return failures + + +def storage_summary(results: list[dict[str, Any]]) -> dict[str, dict[str, int]]: + summary: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + for item in results: + bucket = summary[item["contract"]] + bucket["operations"] += 1 + bucket["reads"] += item["storage_reads"] + bucket["writes"] += item["storage_writes"] + bucket["cold"] += 1 if item["cold_storage"] else 0 + bucket["max_instructions"] = max(bucket["max_instructions"], item["instructions"]) + return summary + + +def cross_contract_results(results: list[dict[str, Any]]) -> list[dict[str, Any]]: + markers = ("cross", "bridge", "amm", "flash_loan", "callback", "auto_swap") + selected = [] + for item in results: + haystack = " ".join([item["operation"], item["description"], *item["tags"]]).lower() + if any(marker in haystack for marker in markers): + selected.append(item) + return selected + + +def history_entry(payload: dict[str, Any], results: list[dict[str, Any]]) -> dict[str, Any]: + instructions = [item["instructions"] for item in results] + return { + "timestamp": payload.get("timestamp") or datetime.now(timezone.utc).isoformat(), + "source": "gas-benchmark-report", + "total_benchmarks": len(results), + "passed": len(results) - len(budget_failures(results)), + "failed": len(budget_failures(results)), + "max_instructions": max(instructions) if instructions else 0, + "avg_instructions": int(sum(instructions) / len(instructions)) if instructions else 0, + } + + +def read_history(path: Path | None) -> list[dict[str, Any]]: + if not path or not path.exists(): + return [] + entries = [] + for line in path.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + continue + return entries + + +def write_history(path: Path, entries: list[dict[str, Any]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + lines = [json.dumps(entry, sort_keys=True, separators=(",", ":")) for entry in entries] + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def render_dashboard( + payload: dict[str, Any], + results: list[dict[str, Any]], + missing: list[str], + over_budget: list[dict[str, Any]], + regressions: list[dict[str, Any]], + history: list[dict[str, Any]], +) -> str: + total = len(results) + passed = total - len(over_budget) + failed = len(over_budget) + lines = [ + "# Gas Benchmark Dashboard", + "", + f"- Timestamp: {payload.get('timestamp') or datetime.now(timezone.utc).isoformat()}", + f"- Total benchmarks: {total}", + f"- Passed budgets: {passed}", + f"- Failed budgets: {failed}", + f"- Missing required operations: {len(missing)}", + f"- Regression findings: {len(regressions)}", + "", + "## Contract Summary", + "", + "| Contract | Operations | Max instructions | Storage reads | Storage writes | Cold cases |", + "| --- | ---: | ---: | ---: | ---: | ---: |", + ] + + for contract, item in sorted(storage_summary(results).items()): + lines.append( + f"| {contract} | {item['operations']} | {item['max_instructions']} | " + f"{item['reads']} | {item['writes']} | {item['cold']} |" + ) + + lines.extend(["", "## Cross-Contract And Integration Calls", ""]) + cross_items = cross_contract_results(results) + if cross_items: + lines.extend(["| Operation | Instructions | Memory bytes |", "| --- | ---: | ---: |"]) + for item in sorted(cross_items, key=lambda row: row["operation"]): + lines.append(f"| {item['operation']} | {item['instructions']} | {item['memory_bytes']} |") + else: + lines.append("No cross-contract tagged operations were found in this run.") + + if missing: + lines.extend(["", "## Missing Coverage", ""]) + lines.extend(f"- `{operation}`" for operation in missing) + + if over_budget: + lines.extend(["", "## Over Budget", "", "| Operation | Actual | Budget |", "| --- | ---: | ---: |"]) + for item in over_budget: + lines.append(f"| {item['operation']} | {item['instructions']} | {item['budget']} |") + + if regressions: + lines.extend( + [ + "", + "## Regressions Above Threshold", + "", + "| Operation | Current | Baseline | Increase |", + "| --- | ---: | ---: | ---: |", + ] + ) + for item in regressions: + lines.append( + f"| {item['operation']} | {item['instructions']} | {item['baseline']} | " + f"{item['increase_pct']:.2f}% |" + ) + + lines.extend(["", "## Historical Trend", ""]) + if history: + lines.extend(["| Timestamp | Total | Passed | Failed | Max instructions | Avg instructions |", "| --- | ---: | ---: | ---: | ---: | ---: |"]) + for item in history[-10:]: + lines.append( + f"| {item.get('timestamp', '')} | {item.get('total_benchmarks', 0)} | " + f"{item.get('passed', 0)} | {item.get('failed', 0)} | " + f"{item.get('max_instructions', 0)} | {item.get('avg_instructions', 0)} |" + ) + else: + lines.append("No historical entries are available yet.") + + return "\n".join(lines) + "\n" + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--results", type=Path, required=True) + parser.add_argument("--baseline", type=Path) + parser.add_argument("--coverage", type=Path) + parser.add_argument("--dashboard", type=Path, required=True) + parser.add_argument("--history", type=Path) + parser.add_argument("--history-out", type=Path) + parser.add_argument("--max-regression-pct", type=float, default=10.0) + parser.add_argument("--fail-on-regression", action="store_true") + args = parser.parse_args() + + payload, results = load_results(args.results) + baseline = load_baseline(args.baseline) if args.baseline else {} + missing = coverage_failures(results, args.coverage) + over_budget = budget_failures(results) + regressions = regression_failures(results, baseline, args.max_regression_pct) + + history = read_history(args.history) + current_entry = history_entry(payload, results) + history_with_current = [*history, current_entry] + if args.history_out: + write_history(args.history_out, history_with_current) + + write_text( + args.dashboard, + render_dashboard(payload, results, missing, over_budget, regressions, history_with_current), + ) + + errors = [] + if missing: + errors.append(f"{len(missing)} required benchmark operation(s) missing") + if over_budget: + errors.append(f"{len(over_budget)} operation(s) over budget") + if args.fail_on_regression and regressions: + errors.append(f"{len(regressions)} operation(s) regressed by more than {args.max_regression_pct:.1f}%") + + if errors: + for error in errors: + print(f"error: {error}", file=sys.stderr) + print(f"Dashboard written to {args.dashboard}", file=sys.stderr) + return 1 + + print(f"Dashboard written to {args.dashboard}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/stellar-lend/benchmarks/README.md b/stellar-lend/benchmarks/README.md index 3f7b5b06..865ad0af 100644 --- a/stellar-lend/benchmarks/README.md +++ b/stellar-lend/benchmarks/README.md @@ -6,11 +6,11 @@ Comprehensive gas benchmarks for all StellarLend protocol contracts. Tracks CPU | Contract | Operations Benchmarked | |----------|----------------------| -| `lending` | 23 | +| `lending` | 22 | | `hello-world` (core lending) | 28 | | `amm` | 15 | | `bridge` | 17 | -| **Total** | **83** | +| **Total** | **82** | ## How Gas is Measured @@ -107,6 +107,38 @@ git add stellar-lend/benchmarks/baseline.json git commit -m "chore: update gas benchmark baseline after optimization" ``` +## Coverage And Regression Gates + +`public-functions.json` lists the benchmark operations that must be present in every complete report. The CI gate fails when: + +1. A required operation is missing from `benchmark-results.json` +2. An operation exceeds its hard instruction budget +3. An operation regresses by more than 10% compared with `baseline.json` + +The gate and dashboard are generated by: + +```bash +python3 scripts/gas_benchmark_report.py \ + --results stellar-lend/benchmark-results.json \ + --baseline stellar-lend/benchmarks/baseline.json \ + --coverage stellar-lend/benchmarks/public-functions.json \ + --dashboard stellar-lend/benchmark-dashboard.md \ + --history stellar-lend/benchmarks/history.jsonl \ + --history-out stellar-lend/benchmark-history.jsonl \ + --max-regression-pct 10 \ + --fail-on-regression +``` + +## Historical Dashboard + +Every CI run uploads: + +- `benchmark-results.json` with raw per-operation measurements +- `benchmark-dashboard.md` with contract summaries, missing coverage, regressions, storage usage, and cross-contract calls +- `benchmark-history.jsonl` with the historical trend plus the latest run + +Commit `benchmarks/baseline.json` after intentional gas changes. Commit `benchmarks/history.jsonl` only when maintainers want a new long-lived trend point in the repository. + ## CI Integration The `.github/workflows/gas-benchmarks.yml` workflow: @@ -114,9 +146,9 @@ The `.github/workflows/gas-benchmarks.yml` workflow: - Runs on every PR touching `contracts/**` or `benchmarks/**` - Builds and runs the full benchmark suite - Compares against `baseline.json` if it has recorded results -- Fails the PR if any operation exceeds its gas budget -- Uploads `benchmark-results.json` as a CI artifact (retained 90 days) -- Posts a summary table to the GitHub Actions step summary +- Fails the PR if a required operation is missing, any operation exceeds its gas budget, or any operation regresses by more than 10% +- Uploads `benchmark-results.json`, `benchmark-dashboard.md`, and `benchmark-history.jsonl` as CI artifacts +- Posts the Markdown dashboard to the GitHub Actions step summary ## Edge Cases Covered diff --git a/stellar-lend/benchmarks/history.jsonl b/stellar-lend/benchmarks/history.jsonl new file mode 100644 index 00000000..d6fbad0c --- /dev/null +++ b/stellar-lend/benchmarks/history.jsonl @@ -0,0 +1 @@ +{"timestamp":"2026-04-23T00:00:00Z","source":"initial-empty-baseline","total_benchmarks":0,"passed":0,"failed":0,"max_instructions":0,"avg_instructions":0} diff --git a/stellar-lend/benchmarks/public-functions.json b/stellar-lend/benchmarks/public-functions.json new file mode 100644 index 00000000..b561ca2d --- /dev/null +++ b/stellar-lend/benchmarks/public-functions.json @@ -0,0 +1,88 @@ +{ + "version": 1, + "description": "Operations that must be present in every complete gas benchmark report.", + "required_operations": [ + "amm::initialize_amm_settings", + "amm::add_amm_protocol", + "amm::update_amm_settings", + "amm::execute_swap", + "amm::execute_swap_warm", + "amm::add_liquidity", + "amm::add_liquidity_warm", + "amm::remove_liquidity", + "amm::auto_swap_for_collateral", + "amm::validate_amm_callback", + "amm::get_amm_settings", + "amm::get_amm_protocols", + "amm::get_swap_history_empty", + "amm::get_swap_history_populated", + "amm::get_liquidity_history", + "bridge::init", + "bridge::register_bridge", + "bridge::register_bridge_warm", + "bridge::set_bridge_fee", + "bridge::set_bridge_active", + "bridge::bridge_deposit", + "bridge::bridge_deposit_warm", + "bridge::bridge_withdraw", + "bridge::bridge_withdraw_warm", + "bridge::transfer_admin", + "bridge::get_bridge_config", + "bridge::list_bridges_empty", + "bridge::list_bridges_populated", + "bridge::get_admin", + "bridge::compute_fee", + "bridge::compute_fee_zero_rate", + "bridge::bridge_deposit_multi_bridge_storage", + "hello_world::initialize", + "hello_world::deposit_collateral", + "hello_world::deposit_collateral_warm", + "hello_world::borrow_asset", + "hello_world::borrow_asset_warm", + "hello_world::repay_debt", + "hello_world::repay_debt_warm", + "hello_world::withdraw_collateral", + "hello_world::withdraw_collateral_warm", + "hello_world::liquidate", + "hello_world::can_be_liquidated", + "hello_world::get_max_liquidatable_amount", + "hello_world::get_liquidation_incentive_amount", + "hello_world::execute_flash_loan", + "hello_world::set_risk_params", + "hello_world::set_emergency_pause", + "hello_world::transfer_admin", + "hello_world::update_asset_config", + "hello_world::set_treasury", + "hello_world::get_treasury", + "hello_world::set_fee_config", + "hello_world::get_fee_config", + "hello_world::get_reserve_balance", + "hello_world::get_health_factor", + "hello_world::get_user_position", + "hello_world::get_user_asset_list", + "hello_world::get_user_total_collateral_value", + "hello_world::deposit_collateral_multi_asset_storage", + "lending::initialize", + "lending::initialize_deposit_settings", + "lending::deposit", + "lending::deposit_warm", + "lending::deposit_collateral", + "lending::deposit_collateral_warm", + "lending::borrow", + "lending::repay", + "lending::repay_warm", + "lending::withdraw", + "lending::withdraw_warm", + "lending::liquidate", + "lending::flash_loan", + "lending::get_health_factor", + "lending::get_user_position", + "lending::get_user_debt", + "lending::get_collateral_balance", + "lending::set_oracle", + "lending::set_pause", + "lending::set_flash_loan_fee_bps", + "lending::set_liquidation_threshold_bps", + "lending::deposit_multi_asset_storage" + ] +}