diff --git a/.gitignore b/.gitignore index 8be933eb..f373ab6a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ __pycache__/ *.dot .pyre *et_def.pb.cc -*et_def.pb.h \ No newline at end of file +*et_def.pb.h +/mlsys26/traces +chakra_env/ \ No newline at end of file diff --git a/mlsys26/README.md b/mlsys26/README.md new file mode 100644 index 00000000..2840fe6b --- /dev/null +++ b/mlsys26/README.md @@ -0,0 +1,139 @@ +# MLSys 2026 MLCommons Chakra Artifact Evaluation + +## Install/Set up Chakra + +### Create python virtual environment for Chakra +```bash +# Create a virtual environment in the path/to/chakra/ +$ python3 -m venv chakra_env + +# Activate the virtual environment +$ source chakra_env/bin/activate +``` + +## Install Chakra and Convert NeMo Traces to Chakra .et + +### Install Chakra +```bash +source chakra_env/bin/activate +pip install . +``` + +### Pin protobuf version +> **Critical:** The protobuf version used to **generate** the `.et` traces must match the +> version compiled into the ASTRA-sim Docker image. The Dockerfile builds **protobuf 6.33.0**. +> Pin your Chakra environment to the same version before converting traces. +```bash +pip install protobuf==6.33.0 +``` + +### Install PARAM (required by `chakra_trace_link`) +`chakra_trace_link` depends on `et_replay` from the [PARAM](https://github.com/facebookresearch/param) project. +```bash +git clone https://github.com/facebookresearch/param.git +cd param/et_replay +git checkout 7b19f586dd8b267333114992833a0d7e0d601630 +pip install . +cd ../.. +``` + +### Download traces +```bash +cd mlsys26 +bash download_nemo_chakra_traces.sh +``` + +### Convert traces (trace link + converter in one step) +```bash +bash convert_traces.sh +``` + +Outputs are written to: +- `mlsys26/traces/linked/` — linked JSON (host + device merged per rank) +- `mlsys26/traces/et/` — protobuf `.et` files ready for ASTRA-sim + +## Using ASTRA-sim for Chakra-Based Simulation of Diverse Networked Systems + +ASTRA-sim leverages Chakra’s ET feeder to replace its original custom workload format. This integration has enabled a range of co-design studies on emerging platforms, particularly for exploring and optimizing networking infrastructures. + +### ASTRA-sim Installation +> [!WARNING] +> Run the below command inside the `${CHAKRA_REPO_ROOT}/mlsys26` directory. + +```bash +# Clone ASTRA-sim. +git clone git@github.com:astra-sim/astra-sim.git + + +cd ./astra-sim +# Pin ASTRA-sim to the validated commit for this artifact +git checkout changhai/chakra_main_paper +git submodule update --init --recursive +cd .. +``` + +> [!NOTE] +> Building the docker container can take several minutes. +```bash +# Align the protobuf versions through the following patch +cd ${CHAKRA_REPO_ROOT}/mlsys26 +bash astra-sim-patch.sh ./astra-sim/Dockerfile + +# Remove any old container and image first, if any (full clean rebuild) +docker rm -f astra-sim-mlsys26 2>/dev/null || true +docker rmi -f astra-sim:mlsys26 2>/dev/null || true + +# Build Docker image +docker build -t astra-sim:mlsys26 -f ./astra-sim/Dockerfile ./astra-sim + +# Run container with bind mounts: +# /app/astra-sim <- astra-sim source + build output +# /app/astra-sim/mlsys26/plots <- run scripts and configs +# /traces <- .et trace files +docker run -it --name astra-sim-mlsys26 --shm-size=8g \ + -v "$(pwd)/astra-sim:/app/astra-sim" \ + -v "$(pwd)/plots:/app/astra-sim/mlsys26/plots" \ + -v "$(pwd)/traces/et:/traces" \ + astra-sim:mlsys26 bash +``` + +### Build ASTRA-sim inside the container +```bash +# Inside the container: +./build/astra_analytical/build.sh +``` + + +### Final Step (with Astra-Sim and Chakra all in place) - Run the simulation +```bash +# Inside the container (after building): +bash /app/astra-sim/mlsys26/plots/m8x7/mixtral_8x7b.sh +``` + +### Draw the plots (Fig. 6,7,8,12) +```bash +# Assume going back to the path/to/chakra/mlsys26 and with chakra_env activated +# Go to plots directory +cd plots + +# install matplotlib for plotting +$ pip install matplotlib + +# Figure 6 +python chakra_kineto_reconstruct.py + +# Figure 7 +python plot_coll_ib.py + +# Figure 8 +bash run_plot_memory.sh + +# Figure 12 +cd ./m8x7/ +python plot_astra-sim_bw_analysis.py + +# Cleanup the results logs in the directory generated (Optional) +cd /app/astra-sim/mlsys26/plots/m8x7/ +find . -maxdepth 1 -type d ! -name . -exec rm -rf {} + +``` + diff --git a/mlsys26/astra-sim-patch.sh b/mlsys26/astra-sim-patch.sh new file mode 100644 index 00000000..c8b98891 --- /dev/null +++ b/mlsys26/astra-sim-patch.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -euo pipefail + +DOCKERFILE="${1:-Dockerfile}" + +if [[ ! -f "$DOCKERFILE" ]]; then + echo "Error: $DOCKERFILE not found" + exit 1 +fi + +# cp "$DOCKERFILE" "${DOCKERFILE}.bak" + +sed -i \ + -e 's/^ARG ABSL_VER=20240722\.0$/ARG ABSL_VER=20250814.1/' \ + -e 's/^## Download Abseil 20240722\.0.*/## Download Abseil 20250814.1 (Latest LTS as of 10\/31\/2024)/' \ + -e 's/^ARG PROTOBUF_VER=29\.0$/ARG PROTOBUF_VER=33.0/' \ + -e 's/^## Download Protobuf 29\.0.*/## Download Protobuf 33.0 (=v6.33.0, latest stable version as of Feb\/01\/2025)/' \ + -e 's/protobuf==5\.\${PROTOBUF_VER}/protobuf==6.${PROTOBUF_VER}/' \ + "$DOCKERFILE" + +python3 - "$DOCKERFILE" <<'PY' +from pathlib import Path +import re +import sys + +path = Path(sys.argv[1]) +text = path.read_text() + +# Update all C++ standard settings from 14 -> 17 +text = re.sub(r'(-DCMAKE_CXX_STANDARD=)14\b', r'\g<1>17', text) + +path.write_text(text) +PY + +# Patch the CMakeLists.txt that lives alongside the Dockerfile +CMAKEFILE="$(dirname "$DOCKERFILE")/CMakeLists.txt" + +if [[ ! -f "$CMAKEFILE" ]]; then + echo "Warning: $CMAKEFILE not found, skipping CMakeLists.txt patch" +else + python3 - "$CMAKEFILE" <<'PY' +from pathlib import Path +import re +import sys + +path = Path(sys.argv[1]) +text = path.read_text() + +# Remove hardcoded abseil .so linker lines that are baked into the repo +# but break builds when the abseil version changes. +cleaned, n = re.subn( + r'\ntarget_link_libraries\(AstraSim PRIVATE /usr/local/lib/libabsl_log_internal[^\n]+\)', + '', + text, +) + +if n == 0: + print(f"No abseil link libraries found in {sys.argv[1]}, nothing to remove") +else: + path.write_text(cleaned) + print(f"Removed {n} abseil link librar{'y' if n == 1 else 'ies'} from {sys.argv[1]}") +PY + echo "Patched $CMAKEFILE" +fi + +echo "Patched $DOCKERFILE" +# echo "Backup saved as ${DOCKERFILE}.bak" \ No newline at end of file diff --git a/mlsys26/convert_traces.sh b/mlsys26/convert_traces.sh new file mode 100755 index 00000000..4ace1a96 --- /dev/null +++ b/mlsys26/convert_traces.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# convert_traces.sh +# Links Chakra host+device traces and converts them to protobuf (.et) format +# for all ranks in the Mixtral-8x7B NeMo trace set. +# +# Usage: +# source /bin/activate +# bash mlsys26/convert_traces.sh +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TRACE_DIR="${SCRIPT_DIR}/traces/nemo-chakra-mixtral-8x7B-traces" +LINKED_DIR="${SCRIPT_DIR}/traces/linked" +ET_DIR="${SCRIPT_DIR}/traces/et" + +# --------------------------------------------------------------------------- +# Validate inputs +# --------------------------------------------------------------------------- +if [[ ! -d "${TRACE_DIR}" ]]; then + echo "[ERROR] Trace directory not found: ${TRACE_DIR}" + echo " Run download_nemo_chakra_traces.sh first." + exit 1 +fi + +mkdir -p "${LINKED_DIR}" "${ET_DIR}" + +# Automatically detect number of ranks from host_*.json files +NUM_RANKS=$(ls "${TRACE_DIR}"/host_*.json 2>/dev/null | wc -l) +if [[ "${NUM_RANKS}" -eq 0 ]]; then + echo "[ERROR] No host_*.json files found in ${TRACE_DIR}" + exit 1 +fi +echo "[INFO] Found ${NUM_RANKS} rank(s) in ${TRACE_DIR}" + +# --------------------------------------------------------------------------- +# Step 1: chakra_trace_link (host + device → linked JSON) +# --------------------------------------------------------------------------- +echo "" +echo "=== Step 1: chakra_trace_link ===" +for ((rank=0; rank ${LINKED_OUT}" + chakra_trace_link \ + --chakra-host-trace "${HOST_TRACE}" \ + --chakra-device-trace "${DEVICE_TRACE}" \ + --rank "${rank}" \ + --output-file "${LINKED_OUT}" +done +echo "[INFO] All ranks linked." + +# --------------------------------------------------------------------------- +# Step 2: chakra_converter (linked JSON → protobuf .et) +# ASTRA-sim expects files named {prefix}.{npu_id}.et +# e.g. chakra_trace.0.et, chakra_trace.1.et, ... +# so we use --output /chakra_trace. → chakra_trace..et +# --------------------------------------------------------------------------- +echo "" +echo "=== Step 2: chakra_converter ===" +for ((rank=0; rank ${ET_OUT}" + chakra_converter PyTorch \ + --input "${LINKED_IN}" \ + --output "${ET_OUT}" +done +echo "[INFO] All ranks converted." + +echo "" +echo "=== Done ===" +echo "Linked JSON traces : ${LINKED_DIR}/" +echo "Protobuf .et traces: ${ET_DIR}/" +echo " Files: chakra_trace.0.et ... chakra_trace.$((NUM_RANKS-1)).et" +echo " ASTRA-sim workload prefix: /traces/chakra_trace" diff --git a/mlsys26/download_nemo_chakra_traces.sh b/mlsys26/download_nemo_chakra_traces.sh new file mode 100644 index 00000000..74266d63 --- /dev/null +++ b/mlsys26/download_nemo_chakra_traces.sh @@ -0,0 +1,8 @@ +echo 'Running dataset download script' + +mkdir -p traces +cd traces + +pip3 install gdown charset_normalizer chardet +gdown --id 1lz6VCqQ-n5lSyshH0XKSqdynKOVRqGZs -O nemo-chakra-mixtral-8x7B-traces.zip +tar -xzvf nemo-chakra-mixtral-8x7B-traces.zip \ No newline at end of file diff --git a/mlsys26/plots/chakra_kineto_reconstruct.py b/mlsys26/plots/chakra_kineto_reconstruct.py new file mode 100644 index 00000000..39f4ffc5 --- /dev/null +++ b/mlsys26/plots/chakra_kineto_reconstruct.py @@ -0,0 +1,140 @@ +import matplotlib.pyplot as plt +import numpy as np + +# ---------------------- +# Input data (in microseconds) +# ---------------------- +workloads = ["GPT3 5B", "GPT3 175B", "Llama3 8B", "Llama3 70B", + "Mixtral8x7B", "Mixtral8x22B", "DeepSeek"] + +Kineto_total = np.array([4756993, 122523790, 50121297, 169539857, + 10617205, 15850134, 32575067], dtype=float) +Chakra_total = np.array([4614357, 118234851, 43391211, 165717189, + 8495977, 15122716, 27726698], dtype=float) +astra_total = np.array([4876814, 118233705, 43386681, 165716010, 8495393, 14242611, 27726132], dtype=float) +astra_comm = np.array([10158, 113617612, 22726909, 151149875, 3315647, 12467803, 25816064], dtype=float) + +base = Chakra_total +astra_comp = astra_total - astra_comm + +Kineto_comp = np.minimum(astra_comp, Kineto_total) +Kineto_comm = Kineto_total - Kineto_comp +Chakra_comp = np.minimum(astra_comp, Chakra_total) +Chakra_comm = Chakra_total - Chakra_comp +# ---------------------- +# Normalization: divide by Kineto total +# ---------------------- +base = np.where(base == 0, 1.0, base) +# base = np.where(Kineto_total == 0, 1.0, Kineto_total) + +# Compute and communication assumed identical for both systems +# comp_ratio = 0.8 +# comm_ratio = 0.2 + +# comp_n = np.full_like(Kineto_total, comp_ratio, dtype=float) +# comm_n = np.full_like(Kineto_total, comm_ratio, dtype=float) + +comp_n = Kineto_comp / base +comm_n = Kineto_comm / base +replay_comp_n = Chakra_comp / base +replay_comm_n = Chakra_comm / base +astra_comp_n = astra_comp / base +astra_comm_n = astra_comm / base + +# Idle time = difference between Kineto and Chakra normalized by Kineto +idle_n = (Kineto_total - Chakra_total) / Kineto_total +idle_n = np.clip(idle_n, 0, None) # no negative idle + +# ---------------------- +# Layout settings +# ---------------------- +bars_per_group = 2 +bar_width = 0.25 +intra_gap = 0.08 +group_gap = 0.15 + +num_groups = len(workloads) +group_width = (bars_per_group - 1) * (bar_width + intra_gap) + bar_width +group_centers = np.arange(num_groups) * (group_width + group_gap) +offsets = np.array([-0.4 * (bar_width + intra_gap), +0.4 * (bar_width + intra_gap)]) + +# Color scheme +comp_color = "#d62728" # red +comm_color = "#1f77b4" # blue +idle_color = "#aaaaaa" # gray + +axis_label_size = 22 +tick_size = 20 + +fig, ax = plt.subplots(figsize=(14, 6)) + +x_Kineto = group_centers + offsets[0] # Left side +x_Chakra = group_centers + offsets[1] # Right side + + +# Kineto bars (with idle) +ax.bar(x_Kineto, comp_n, bar_width, color=comp_color, zorder=3) +ax.bar(x_Kineto, comm_n, bar_width, bottom=comp_n, color=comm_color, zorder=3) +for i in range(num_groups): + ax.bar(x_Kineto[i], idle_n[i], bar_width, + bottom=comp_n[i] + comm_n[i], color=idle_color, zorder=3) + +# Chakra bars (baseline) +ax.bar(x_Chakra, comp_n, bar_width, color=comp_color, zorder=3) +ax.bar(x_Chakra, comm_n, bar_width, bottom=comp_n, color=comm_color, zorder=3) + +# ---------------------- +# Axes and labels +# ---------------------- +ax.set_ylabel("Normalized Execution Time", fontsize=axis_label_size) +ax.set_xticks(group_centers) +ax.set_xticklabels(workloads, fontsize=tick_size) +ax.tick_params(axis="y", labelsize=tick_size) + +comp_patch = plt.Rectangle((0, 0), 1, 1, color=comp_color) +comm_patch = plt.Rectangle((0, 0), 1, 1, color=comm_color) +idle_patch = plt.Rectangle((0, 0), 1, 1, color=idle_color) + +ax.legend( + [comp_patch, comm_patch, idle_patch], + ["Computation", "Exposed Communication", "Idle Time"], + ncol=3, + fontsize=20, + loc="upper left", + handletextpad=0.4, # spacing between legend marker and text + columnspacing=0.8, # horizontal space between columns + handlelength=1.2, # length of the color box + borderpad=0.3, # inner padding of the legend box + labelspacing=0.4 # vertical space between entries (useful if multiple rows) +) + +# ---------------------- +# Grid and dividers +# ---------------------- +ax.grid(axis="y", linestyle="--", alpha=0.7, zorder=0) +boundaries = (group_centers[:-1] + group_centers[1:]) / 2.0 +for b in boundaries: + ax.axvline(b, color="#999999", linestyle=":", linewidth=1, zorder=1) + + +# sub_labels = ["Chakra", "Kineto"] +# sub_positions = [x_Chakra, x_Kineto] +# y_text = -0.07 +# for pos, lab in zip(sub_positions, sub_labels): +# for xi in pos: +# ax.text(xi, y_text, lab, ha="center", va="top", fontsize=14, +# transform=ax.get_xaxis_transform()) + +# ---------------------- +# Adjust limits and add sub-labels +# ---------------------- +all_x = np.concatenate([x_Kineto, x_Chakra]) # include both sides +xmin, xmax = all_x.min() - bar_width * 1.4, all_x.max() + bar_width * 1.2 +ax.set_xlim(xmin, xmax) + +# plt.subplots_adjust(bottom=0.15) # make room for labels +# plt.tight_layout() +plt.subplots_adjust(left=0.10, bottom=0.15, right=0.98, top=0.95) +plt.tight_layout() +plt.savefig("runtime_idle_label.pdf", bbox_inches="tight") +# plt.show() diff --git a/mlsys26/plots/chakra_memory_extracts.tar.gz b/mlsys26/plots/chakra_memory_extracts.tar.gz new file mode 100644 index 00000000..516823a5 Binary files /dev/null and b/mlsys26/plots/chakra_memory_extracts.tar.gz differ diff --git a/mlsys26/plots/m8x7/fc150.yml b/mlsys26/plots/m8x7/fc150.yml new file mode 100644 index 00000000..21508282 --- /dev/null +++ b/mlsys26/plots/m8x7/fc150.yml @@ -0,0 +1,4 @@ +topology: [ FullyConnected ] +npus_count: [ 8 ] +bandwidth: [ 21.43 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/fc300.yml b/mlsys26/plots/m8x7/fc300.yml new file mode 100644 index 00000000..ca20c4b3 --- /dev/null +++ b/mlsys26/plots/m8x7/fc300.yml @@ -0,0 +1,4 @@ +topology: [ FullyConnected ] +npus_count: [ 8 ] +bandwidth: [ 42.86 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/fc600.yml b/mlsys26/plots/m8x7/fc600.yml new file mode 100644 index 00000000..5814fa74 --- /dev/null +++ b/mlsys26/plots/m8x7/fc600.yml @@ -0,0 +1,4 @@ +topology: [ FullyConnected ] +npus_count: [ 8 ] +bandwidth: [ 85.71 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/fc75.yml b/mlsys26/plots/m8x7/fc75.yml new file mode 100644 index 00000000..7d48bd59 --- /dev/null +++ b/mlsys26/plots/m8x7/fc75.yml @@ -0,0 +1,4 @@ +topology: [ FullyConnected ] +npus_count: [ 8 ] +bandwidth: [ 10.72 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/fc900.yml b/mlsys26/plots/m8x7/fc900.yml new file mode 100644 index 00000000..bea198f1 --- /dev/null +++ b/mlsys26/plots/m8x7/fc900.yml @@ -0,0 +1,4 @@ +topology: [ FullyConnected ] +npus_count: [ 8 ] +bandwidth: [ 128.58 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/mixtral_8x7b.sh b/mlsys26/plots/m8x7/mixtral_8x7b.sh new file mode 100755 index 00000000..db6ecfd4 --- /dev/null +++ b/mlsys26/plots/m8x7/mixtral_8x7b.sh @@ -0,0 +1,80 @@ +#!/bin/bash +set -e + +## ****************************************************************************** +## This source code is licensed under the MIT license found in the +## LICENSE file in the root directory of this source tree. +## +## Copyright (c) 2024 Georgia Institute of Technology +## ****************************************************************************** + +# find the absolute path to this script +SCRIPT_DIR=$(dirname "$(realpath "$0")") + +# Inside the Docker container, ASTRA-sim is built at /app/astra-sim. +# PROJECT_DIR is kept for reference but ASTRA_SIM is set explicitly +# to avoid path resolution issues with the mlsys26 mount point. +PROJECT_DIR="/app/astra-sim" +EXAMPLE_DIR="${PROJECT_DIR:?}/examples" + +# start +echo "[ASTRA-sim] Compiling ASTRA-sim with the Analytical Network Backend..." +echo "" + +# Compile +# "${PROJECT_DIR:?}"/build/astra_analytical/build.sh + +echo "" +echo "[ASTRA-sim] Compilation finished." +echo "[ASTRA-sim] Running ASTRA-sim Example with Analytical Network Backend..." +echo "" + + +# paths +ASTRA_SIM="${PROJECT_DIR:?}/build/astra_analytical/build/bin/AstraSim_Analytical_Congestion_Aware" +# Chakra .et traces are mounted at /traces inside the Docker container. +# ASTRA-sim reads /traces/chakra_trace.0.et ... /traces/chakra_trace.7.et +WORKLOAD="/traces/chakra_trace" +SYSTEM="${SCRIPT_DIR:?}/system.json" +REMOTE_MEMORY="${EXAMPLE_DIR:?}/remote_memory/analytical/no_memory_expansion.json" + +# Temporary debug knobs: +# 1) suppress unreleased-node teardown warnings +# 2) avoid early exit when event queue drains before all ranks finish +# 3) cap retries to avoid infinite loops +: "${ASTRA_SIM_SKIP_UNRELEASED_NODE_CHECK:=1}" +: "${ASTRA_SIM_DISABLE_EARLY_EXIT:=1}" +: "${ASTRA_SIM_EMPTY_QUEUE_STALL_LIMIT:=20000}" +export ASTRA_SIM_SKIP_UNRELEASED_NODE_CHECK +export ASTRA_SIM_DISABLE_EARLY_EXIT +export ASTRA_SIM_EMPTY_QUEUE_STALL_LIMIT + + +run_case() { + local case_name="$1" + local logging_dir="${SCRIPT_DIR:?}/${case_name}" + local network="${SCRIPT_DIR:?}/${case_name}.yml" + + mkdir -p "${logging_dir:?}" + echo "[ASTRA-sim] Running case: ${case_name}" + + "${ASTRA_SIM:?}" \ + --workload-configuration="${WORKLOAD}" \ + --system-configuration="${SYSTEM:?}" \ + --remote-memory-configuration="${REMOTE_MEMORY:?}" \ + --network-configuration="${network:?}" \ + --logging-folder="${logging_dir:?}" > /dev/null 2>&1 || true +} + +for case_name in \ + fc75 fc150 fc300 fc600 fc900\ + s75 s150 s300 s600 s900 \ + r75 r150 r300 r600 r900; do + run_case "${case_name}" & +done +wait + + +# finalize +echo "" +echo "[ASTRA-sim] Finished the execution." diff --git a/mlsys26/plots/m8x7/plot_astra-sim_bw_analysis.py b/mlsys26/plots/m8x7/plot_astra-sim_bw_analysis.py new file mode 100644 index 00000000..79e8eab2 --- /dev/null +++ b/mlsys26/plots/m8x7/plot_astra-sim_bw_analysis.py @@ -0,0 +1,42 @@ +import matplotlib.pyplot as plt +import numpy as np + +# Example data +topologies = ['Fully-Connected', 'Ring', 'Switch'] +bandwidths = ['75GB/s', '150GB/s', '300GB/s', '600GB/s', '900GB/s'] + +# Suppose this is your 2D data: rows=topologies, cols=bandwidths +data = np.array([ + [7085495469, 4064450176, 2905072696, 1990520371, 1708311078], # Switch + [4331750922, 2520517119, 1801545391, 1454825514, 1342633550], # Ring + [2514776156, 1838174566, 1513367965, 1338718656, 1280430992] # Fully-Connected +]) + +data = data / min(data.flatten()) # Normalize by the minimum value + +# Number of topologies and bandwidths +n_topo = len(topologies) +n_bw = len(bandwidths) + +# X locations for the groups +x = np.arange(n_topo) + +# Width of each bar +width = 0.13 + +fig, ax = plt.subplots(figsize=(8, 2.5)) + +# Draw bars for each bandwidth +for i in range(n_bw): + ax.bar(x + i*width - width*(n_bw-1)/2, data[:, i], width, label=bandwidths[i]) + +# Labels and legend +ax.set_xticks(x) +ax.set_xticklabels(topologies) +ax.set_ylabel('Normalized Comm Time') +# ax.set_title('Commifferent topologies and bandwidths') +ax.legend(title='Bandwidth') +plt.tight_layout() + +# plt.show() +plt.savefig("astra-sim-chakra-bw-analysis.pdf") diff --git a/mlsys26/plots/m8x7/r150.yml b/mlsys26/plots/m8x7/r150.yml new file mode 100644 index 00000000..fd3c38ae --- /dev/null +++ b/mlsys26/plots/m8x7/r150.yml @@ -0,0 +1,4 @@ +topology: [ Ring ] +npus_count: [ 8 ] +bandwidth: [ 75.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/r300.yml b/mlsys26/plots/m8x7/r300.yml new file mode 100644 index 00000000..f4e0e3c2 --- /dev/null +++ b/mlsys26/plots/m8x7/r300.yml @@ -0,0 +1,4 @@ +topology: [ Ring ] +npus_count: [ 8 ] +bandwidth: [ 150.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/r600.yml b/mlsys26/plots/m8x7/r600.yml new file mode 100644 index 00000000..3b4b62bb --- /dev/null +++ b/mlsys26/plots/m8x7/r600.yml @@ -0,0 +1,4 @@ +topology: [ Ring ] +npus_count: [ 8 ] +bandwidth: [ 300.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/r75.yml b/mlsys26/plots/m8x7/r75.yml new file mode 100644 index 00000000..895cc897 --- /dev/null +++ b/mlsys26/plots/m8x7/r75.yml @@ -0,0 +1,4 @@ +topology: [ Ring ] +npus_count: [ 8 ] +bandwidth: [ 37.5 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/r900.yml b/mlsys26/plots/m8x7/r900.yml new file mode 100644 index 00000000..288340bf --- /dev/null +++ b/mlsys26/plots/m8x7/r900.yml @@ -0,0 +1,4 @@ +topology: [ Ring ] +npus_count: [ 8 ] +bandwidth: [ 450.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/s150.yml b/mlsys26/plots/m8x7/s150.yml new file mode 100644 index 00000000..428fb1ee --- /dev/null +++ b/mlsys26/plots/m8x7/s150.yml @@ -0,0 +1,4 @@ +topology: [ Switch ] +npus_count: [ 8 ] +bandwidth: [ 150.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/s300.yml b/mlsys26/plots/m8x7/s300.yml new file mode 100644 index 00000000..af0f8fbd --- /dev/null +++ b/mlsys26/plots/m8x7/s300.yml @@ -0,0 +1,4 @@ +topology: [ Switch ] +npus_count: [ 8 ] +bandwidth: [ 300.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/s600.yml b/mlsys26/plots/m8x7/s600.yml new file mode 100644 index 00000000..a6943a31 --- /dev/null +++ b/mlsys26/plots/m8x7/s600.yml @@ -0,0 +1,4 @@ +topology: [ Switch ] +npus_count: [ 8 ] +bandwidth: [ 600.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/s75.yml b/mlsys26/plots/m8x7/s75.yml new file mode 100644 index 00000000..0028165f --- /dev/null +++ b/mlsys26/plots/m8x7/s75.yml @@ -0,0 +1,4 @@ +topology: [ Switch ] +npus_count: [ 8 ] +bandwidth: [ 75.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/s900.yml b/mlsys26/plots/m8x7/s900.yml new file mode 100644 index 00000000..39244203 --- /dev/null +++ b/mlsys26/plots/m8x7/s900.yml @@ -0,0 +1,4 @@ +topology: [ Switch ] +npus_count: [ 8 ] +bandwidth: [ 900.0 ] # GB/s +latency: [ 936.25 ] # ns diff --git a/mlsys26/plots/m8x7/system.json b/mlsys26/plots/m8x7/system.json new file mode 100644 index 00000000..4ce315e3 --- /dev/null +++ b/mlsys26/plots/m8x7/system.json @@ -0,0 +1,21 @@ +{ + "scheduling-policy": "LIFO", + "endpoint-delay": 10, + "active-chunks-per-dimension": 2, + "preferred-dataset-splits": 4, + "all-reduce-implementation": [ + "ring" + ], + "all-gather-implementation": [ + "ring" + ], + "reduce-scatter-implementation": [ + "ring" + ], + "all-to-all-implementation": [ + "ring" + ], + "collective-optimization": "localBWAware", + "local-mem-bw": 3350, + "boost-mode": 0 +} diff --git a/mlsys26/plots/plot_coll_ib.py b/mlsys26/plots/plot_coll_ib.py new file mode 100644 index 00000000..25df1b0d --- /dev/null +++ b/mlsys26/plots/plot_coll_ib.py @@ -0,0 +1,51 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +# --- Data for InfiniBand 400 Gb/s --- +data_400 = { + 'Subcategory': ['AllToAll', 'AllGather', 'ReduceScatter', 'AllReduce'], + 'Total Duration (us)': [2334290.06, 766347.27, 435756.7, 15609.41] +} + +# --- Data for InfiniBand 100 Gb/s --- +data_100 = { + 'Subcategory': ['AllToAll', 'AllGather', 'ReduceScatter', 'AllReduce'], + 'Total Duration (us)': [9640178.28, 3351602.3, 648624.11, 151577.8] +} + + +df_400 = pd.DataFrame(data_400) +df_100 = pd.DataFrame(data_100) + +df = df_400.merge(df_100, on='Subcategory', suffixes=('_400Gbps', '_100Gbps')) + +# Plot +x = np.arange(len(df['Subcategory'])) +width = 0.35 + +fig, ax = plt.subplots(figsize=(12, 4)) + + +bars_400 = ax.bar(x - width/2, df['Total Duration (us)_400Gbps'], width, label='400 Gb/s', color='#1f77b4', alpha=0.85) +bars_100 = ax.bar(x + width/2, df['Total Duration (us)_100Gbps'], width, label='100 Gb/s', color='#ff7f0e', alpha=0.85) + +ax.set_xlabel('Collective Communication Type', fontsize=20) +ax.set_ylabel('Total Duration (µs)', fontsize=20) +# ax.set_title('Comparison of Collective Communication Duration\nunder Different InfiniBand Speeds', fontsize=14) +ax.set_xticks(x) +ax.set_xticklabels(df['Subcategory'], fontsize=16) +ax.legend(fontsize=18, loc='upper right') +ax.grid(axis='y', linestyle='--', alpha=0.6) + +ax.tick_params(axis='y', labelsize=16) # Increase only the font size for y-axis ticks +ax.tick_params(axis='x', labelsize=16) # Keep x-axis consistent + +# Annotate ratio (optional, easy to compare) +for i, (v400, v100) in enumerate(zip(df['Total Duration (us)_400Gbps'], df['Total Duration (us)_100Gbps'])): + ratio = v100 / v400 + ax.text(x[i], max(v400, v100) * 0.95, f"{ratio:.1f}× slower", ha='center', fontsize=18, color='darkred') + +plt.tight_layout() +plt.savefig("coll_comm_ib_perf.pdf", bbox_inches='tight') +plt.close(fig) diff --git a/mlsys26/plots/plot_memory.py b/mlsys26/plots/plot_memory.py new file mode 100644 index 00000000..68f9e600 --- /dev/null +++ b/mlsys26/plots/plot_memory.py @@ -0,0 +1,66 @@ +import sys +import os +import pandas as pd +import matplotlib.pyplot as plt + +def main(): + if len(sys.argv) < 3: + print("Usage: python plot_memory.py ... ") + sys.exit(1) + + # All arguments except the last are input files + input_files = sys.argv[1:-1] + output_pdf = sys.argv[-1] + + plt.figure(figsize=(10, 4)) + + for input_path in input_files: + if not os.path.exists(input_path): + print(f"Warning: {input_path} not found, skipping.") + continue + + # Read the file (supports CSV or JSON) + if input_path.endswith(".csv"): + df = pd.read_csv(input_path) + elif input_path.endswith(".json"): + df = pd.read_json(input_path) + else: + print(f"Unsupported file format: {input_path}") + continue + + # Determine timestamp column and convert to seconds if needed + if "Timestamp (us)" in df.columns: + df["Timestamp (s)"] = df["Timestamp (us)"] / 1_000_000 + elif "Timestamp (s)" not in df.columns: + print(f"No timestamp column found in {input_path}, skipping.") + continue + + # Offset timestamps relative to the first timestamp + first_ts = df["Timestamp (s)"].iloc[0] + df["Timestamp Offset (s)"] = df["Timestamp (s)"] - first_ts + + # Derive legend name from filename + base_name = os.path.basename(input_path) + if "memory_" in base_name and "." in base_name: + legend_name = base_name.split("memory_")[-1].split(".")[0] + else: + legend_name = os.path.splitext(base_name)[0] + + # Plot “Total Allocated (MB)” if available + if "Total Allocated (MB)" in df.columns: + plt.plot(df["Timestamp Offset (s)"], df["Total Allocated (MB)"], label=legend_name) + else: + print(f"Column 'Total Allocated (MB)' not found in {input_path}, skipping.") + continue + plt.xlabel("Timeline (s)", fontsize=18) + plt.ylabel("Memory (MB)", fontsize=18) + plt.xticks(fontsize=14) + plt.yticks(fontsize=14) + plt.legend(fontsize=14) + plt.grid(True) + plt.tight_layout() + plt.savefig(output_pdf) + print(f"Memory usage plot saved to {output_pdf}") + +if __name__ == "__main__": + main() diff --git a/mlsys26/plots/run_plot_memory.sh b/mlsys26/plots/run_plot_memory.sh new file mode 100644 index 00000000..a7b2f227 --- /dev/null +++ b/mlsys26/plots/run_plot_memory.sh @@ -0,0 +1,13 @@ +#!/bin/bash +tar -xzf chakra_memory_extracts.tar.gz + +cd "$(dirname "$0")" + +python plot_memory.py \ + chakra_memory_DeepSeek-MoE-8GPUs.csv \ + chakra_memory_GPT3-175B-32GPUs.csv \ + chakra_memory_GPT3-5B-8GPUs.csv \ + chakra_memory_Llama3-70B-16GPUs.csv \ + chakra_memory_Mixtral-8x22B-32GPUs.csv \ + chakra_memory_Mixtral-8x7B-8GPUs.csv \ + memory_all_models.pdf