trace replay H200 llama fp8 1 hr v1.1 #117
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Multi-Turn Benchmark Sweep | |
| run-name: "${{ inputs.run_name || format('Multi-Turn Sweep - tp={0} users={1} offload={2}', inputs.tp_values, inputs.user_values, inputs.offload_values) }}" | |
| on: | |
| # push: | |
| # branches: | |
| # - experimental/multi-turn-benchmark | |
| # paths: | |
| # - .github/workflows/multiturn-sweep.yml | |
| workflow_dispatch: | |
| inputs: | |
| run_name: | |
| description: 'Custom run name (optional)' | |
| required: false | |
| default: '' | |
| type: string | |
| tp_values: | |
| description: 'TP sizes (JSON array)' | |
| required: true | |
| default: '[1, 2, 4, 8]' | |
| type: string | |
| user_values: | |
| description: 'Concurrent user counts (JSON array). Ignored if config_file is set.' | |
| required: false | |
| default: '[8, 16, 32, 64, 128, 256, 512, 1024, 2048]' | |
| type: string | |
| offload_values: | |
| description: 'Offload modes (JSON array: on/off/noprefix). Ignored if config_file is set.' | |
| required: false | |
| default: '["on", "off", "noprefix"]' | |
| type: string | |
| config_file: | |
| description: 'YAML config with per-TP sweep settings (e.g. .github/configs/multiturn-agentic-trace.yaml). Overrides tp/user/offload values.' | |
| required: false | |
| default: '' | |
| type: string | |
| config_key: | |
| description: 'Top-level key in config_file to use (e.g. h200-fp8-llama70b, b200-fp4-dsr1). Required if config_file has multiple entries.' | |
| required: false | |
| default: '' | |
| type: string | |
| duration: | |
| description: 'Benchmark duration in seconds (optional, runs to completion if omitted)' | |
| required: false | |
| default: '' | |
| type: string | |
| request_rate: | |
| description: 'Request rate per client (Poisson, req/s). 0 = no delay.' | |
| required: false | |
| default: '0' | |
| type: string | |
| total_cpu_dram_gb: | |
| description: 'Total CPU DRAM for KV offload (GB)' | |
| required: true | |
| default: '100' | |
| type: string | |
| image: | |
| description: 'Container image' | |
| required: true | |
| default: 'vllm/vllm-openai:v0.18.0' | |
| type: string | |
| model: | |
| description: 'Model name' | |
| required: true | |
| default: 'nvidia/Llama-3.3-70B-Instruct-FP4' | |
| type: string | |
| precision: | |
| description: 'Model precision (fp4, fp8, etc.) — used to select benchmark script' | |
| required: false | |
| default: 'fp4' | |
| type: string | |
| script_suffix: | |
| description: 'Suffix for benchmark script (e.g. "_lmcache" → multiturn_fp4_b200_lmcache.sh)' | |
| required: false | |
| default: '' | |
| type: string | |
| runner: | |
| description: 'Runner label (e.g. b200, h200-dgxc-slurm)' | |
| required: false | |
| default: 'b200' | |
| type: string | |
| ep: | |
| description: 'Expert parallelism size (for MoE models, default 0 = disabled)' | |
| required: false | |
| default: '0' | |
| type: string | |
| ref: | |
| description: 'Git ref (branch/sha) to checkout' | |
| required: false | |
| type: string | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Generate matrix from config file or CLI inputs | |
| # --------------------------------------------------------------------------- | |
| generate-matrix: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| matrix: ${{ steps.gen.outputs.matrix }} | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| if: ${{ inputs.config_file != '' }} | |
| with: | |
| token: ${{ secrets.REPO_PAT }} | |
| fetch-depth: 1 | |
| ref: ${{ inputs.ref || github.ref }} | |
| sparse-checkout: ${{ inputs.config_file }} | |
| - id: gen | |
| run: | | |
| pip install -q pyyaml | |
| python3 << 'PYEOF' | |
| import json, os, sys | |
| config_file = "${{ inputs.config_file }}".strip() | |
| if config_file: | |
| import yaml | |
| with open(config_file) as f: | |
| full_config = yaml.safe_load(f) | |
| config_key = "${{ inputs.config_key }}".strip() | |
| # If config_key specified, use that section; otherwise auto-detect | |
| if config_key and config_key in full_config: | |
| config = full_config[config_key] | |
| elif config_key: | |
| print(f"ERROR: config_key '{config_key}' not found. Available: {list(full_config.keys())}") | |
| sys.exit(1) | |
| elif len(full_config) == 1: | |
| config = next(iter(full_config.values())) | |
| else: | |
| # Check if top-level keys look like tp entries (tp2, tp4, etc.) | |
| if all(k.startswith("tp") for k in full_config): | |
| config = full_config | |
| else: | |
| print(f"ERROR: Multiple entries in config, specify --config_key. Available: {list(full_config.keys())}") | |
| sys.exit(1) | |
| includes = [] | |
| for key, settings in config.items(): | |
| tp = int(key.replace("tp", "")) | |
| users = settings.get("users", []) | |
| offloads = settings.get("offload", ["on", "off"]) | |
| ep = settings.get("ep", 0) | |
| for u in users: | |
| for o in offloads: | |
| entry = {"tp": tp, "users": u, "offload": o} | |
| if ep > 0: | |
| entry["ep"] = ep | |
| includes.append(entry) | |
| else: | |
| tp_values = json.loads('${{ inputs.tp_values }}') | |
| user_values = json.loads('${{ inputs.user_values }}') | |
| offload_values = json.loads('${{ inputs.offload_values }}') | |
| includes = [] | |
| for tp in tp_values: | |
| for u in user_values: | |
| for o in offload_values: | |
| includes.append({"tp": tp, "users": u, "offload": o}) | |
| matrix = {"include": includes} | |
| print(f"Generated {len(includes)} matrix entries") | |
| with open(os.environ["GITHUB_OUTPUT"], "a") as f: | |
| f.write(f"matrix={json.dumps(matrix)}\n") | |
| PYEOF | |
| # --------------------------------------------------------------------------- | |
| # Matrix benchmark jobs — each cell calls the multiturn template | |
| # --------------------------------------------------------------------------- | |
| sweep: | |
| needs: generate-matrix | |
| uses: ./.github/workflows/benchmark-multiturn-tmpl.yml | |
| name: sweep / | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} | |
| secrets: inherit | |
| with: | |
| runner: ${{ inputs.runner }} | |
| image: ${{ inputs.image }} | |
| model: ${{ inputs.model }} | |
| precision: ${{ inputs.precision }} | |
| exp-name: "multiturn_tp${{ matrix.tp }}_users${{ matrix.users }}_offload${{ matrix.offload }}" | |
| tp: "${{ matrix.tp }}" | |
| users: "${{ matrix.users }}" | |
| offload-mode: ${{ matrix.offload }} | |
| duration: ${{ inputs.duration }} | |
| request-rate: ${{ inputs.request_rate }} | |
| total-cpu-dram-gb: ${{ inputs.total_cpu_dram_gb }} | |
| script-suffix: ${{ inputs.script_suffix }} | |
| ep: "${{ matrix.ep || inputs.ep }}" | |
| ref: ${{ inputs.ref }} | |
| # --------------------------------------------------------------------------- | |
| # Collect & aggregate results | |
| # --------------------------------------------------------------------------- | |
| collect: | |
| runs-on: ubuntu-latest | |
| needs: sweep | |
| if: always() | |
| name: Collect results | |
| steps: | |
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| token: ${{ secrets.REPO_PAT }} | |
| fetch-depth: 1 | |
| ref: ${{ inputs.ref || github.ref }} | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install dependencies | |
| run: pip install pandas matplotlib numpy | |
| - name: Download all artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: 'multiturn_*' | |
| path: results/ | |
| - name: Run aggregation | |
| run: | | |
| python experimental/multiturn/vllm_benchmark/scripts/collect_sweep_results.py results/ aggregated/ | |
| - name: Upload aggregated results | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 | |
| with: | |
| name: multiturn_aggregated | |
| path: aggregated/ |