Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: Docs

on:
push:
branches: [master]
pull_request:
branches: [master]
workflow_dispatch:

permissions:
contents: read
pages: write
id-token: write

concurrency:
group: "pages-${{ github.ref }}"
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: false

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y texlive-latex-extra texlive-fonts-recommended texlive-science dvipng cm-super libosmesa6-dev libgl1-mesa-dev

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Install dependencies
run: uv sync

- name: Fetch MLflow artifacts
env:
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
run: uv run python main.py --fetch

- name: Build Sphinx docs
env:
PYVISTA_OFF_SCREEN: "true"
PYTHONPATH: "${{ github.workspace }}"
run: uv run python main.py --docs

- name: Upload artifact
if: github.ref == 'refs/heads/main'
uses: actions/upload-pages-artifact@v3
with:
path: docs/build/html

deploy:
if: github.ref == 'refs/heads/main'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ docs/reports/TexReport/


# Generated data (keep README.md)
data/*
#data/*

# Uv stuff
uv.lock
Expand All @@ -18,6 +18,7 @@ docs/source/generated/
docs/source/example_gallery/
docs/source/_autosummary/
docs/source/gen_modules/backreferences/
sg_execution_times*

# macOS
.DS_Store
Expand Down
37 changes: 0 additions & 37 deletions CODE_REVIEW.md

This file was deleted.

56 changes: 41 additions & 15 deletions Experiments/01-kernels/compute_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
1. Convergence validation with analytical solution
2. Fixed iteration performance benchmark
"""

import numpy as np
import pandas as pd
from pathlib import Path
from dataclasses import asdict
from scipy.ndimage import laplace

Expand All @@ -29,10 +29,10 @@ def run_kernel(kernel, f, max_iter, track_algebraic=False):

if track_algebraic:
kernel.timeseries.physical_errors = []
h2 = kernel.parameters.h ** 2
h2 = kernel.parameters.h**2

for _ in range(max_iter):
residual = kernel.step(u_old, u, f)
kernel.step(u_old, u, f)

if track_algebraic:
Au = -laplace(u) / h2
Expand All @@ -47,10 +47,10 @@ def run_kernel(kernel, f, max_iter, track_algebraic=False):
def kernel_to_df(kernel, kernel_name, N, omega, **extra):
"""Convert kernel timeseries to DataFrame."""
df = pd.DataFrame(asdict(kernel.timeseries))
df['iteration'] = range(len(df))
df['kernel'] = kernel_name
df['N'] = N
df['omega'] = omega
df["iteration"] = range(len(df))
df["kernel"] = kernel_name
df["N"] = N
df["omega"] = omega
for k, v in extra.items():
df[k] = v
return df
Expand All @@ -67,14 +67,18 @@ def kernel_to_df(kernel, kernel_name, N, omega, **extra):
f = problems.sinusoidal_source_term(N)

numpy_kernel = NumPyKernel(N=N, omega=omega, tolerance=0.0, max_iter=max_iter)
numba_kernel = NumbaKernel(N=N, omega=omega, tolerance=0.0, max_iter=max_iter, numba_threads=4)
numba_kernel = NumbaKernel(
N=N, omega=omega, tolerance=0.0, max_iter=max_iter, numba_threads=4
)
numba_kernel.warmup()

for name, kernel in [('numpy', numpy_kernel), ('numba', numba_kernel)]:
for name, kernel in [("numpy", numpy_kernel), ("numba", numba_kernel)]:
run_kernel(kernel, f, max_iter, track_algebraic=True)
all_dfs.append(kernel_to_df(kernel, name, N, omega, tolerance=0.0))

pd.concat(all_dfs, ignore_index=True).to_parquet(data_dir / "kernel_convergence.parquet", index=False)
pd.concat(all_dfs, ignore_index=True).to_parquet(
data_dir / "kernel_convergence.parquet", index=False
)


# Experiment 2: Fixed Iteration Benchmark
Expand All @@ -90,16 +94,38 @@ def kernel_to_df(kernel, kernel_name, N, omega, **extra):
kernel = NumPyKernel(N=N, omega=omega, tolerance=0.0, max_iter=max_iter)
f = np.ones((N, N, N), dtype=np.float64)
run_kernel(kernel, f, max_iter)
all_dfs.append(kernel_to_df(kernel, 'numpy', N, omega, max_iter=max_iter, use_numba=False, num_threads=0))
all_dfs.append(
kernel_to_df(
kernel, "numpy", N, omega, max_iter=max_iter, use_numba=False, num_threads=0
)
)

# Numba with thread scaling
for num_threads in thread_counts:
for idx, N in enumerate(problem_sizes):
kernel = NumbaKernel(N=N, omega=omega, tolerance=0.0, max_iter=max_iter, numba_threads=num_threads)
kernel = NumbaKernel(
N=N,
omega=omega,
tolerance=0.0,
max_iter=max_iter,
numba_threads=num_threads,
)
if idx == 0:
kernel.warmup()
f = np.ones((N, N, N), dtype=np.float64)
run_kernel(kernel, f, max_iter)
all_dfs.append(kernel_to_df(kernel, 'numba', N, omega, max_iter=max_iter, use_numba=True, num_threads=num_threads))

pd.concat(all_dfs, ignore_index=True).to_parquet(data_dir / "kernel_benchmark.parquet", index=False)
all_dfs.append(
kernel_to_df(
kernel,
"numba",
N,
omega,
max_iter=max_iter,
use_numba=True,
num_threads=num_threads,
)
)

pd.concat(all_dfs, ignore_index=True).to_parquet(
data_dir / "kernel_benchmark.parquet", index=False
)
93 changes: 51 additions & 42 deletions Experiments/01-kernels/plot_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

Comprehensive analysis and visualization of NumPy vs Numba kernel benchmarks.
"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Expand All @@ -23,8 +24,12 @@
fig_dir.mkdir(parents=True, exist_ok=True)

# Check if data exists
if not data_dir.exists():
raise FileNotFoundError(f"Data not found: {data_dir}. Run compute_kernels.py first.")
if not list(data_dir.glob("*.parquet")):
print(f"Data not found: {data_dir}. Run compute_kernels.py first.")
# Graceful exit for docs build
import sys

sys.exit(0)

# %%
# Plot 1: Convergence Validation
Expand All @@ -37,21 +42,21 @@
# Create faceted plot: one subplot per problem size
g = sns.relplot(
data=df_conv,
x='iteration',
y='physical_errors',
col='N',
hue='kernel',
x="iteration",
y="physical_errors",
col="N",
hue="kernel",
style="kernel",
kind='line',
kind="line",
dashes=True,
markers=False,
facet_kws={'sharey': True, 'sharex': False}
facet_kws={"sharey": True, "sharex": False},
)

g.set(xscale='log', yscale='log')
g.set_axis_labels('Iteration', r'Algebraic Residual $||Au - f||_\infty$')
g.set_titles(col_template='N={col_name}')
g.fig.suptitle(r'Kernel Convergence Validation', y=1.02)
g.set(xscale="log", yscale="log")
g.set_axis_labels("Iteration", r"Algebraic Residual $||Au - f||_\infty$")
g.set_titles(col_template="N={col_name}")
g.fig.suptitle(r"Kernel Convergence Validation", y=1.02)

# Save figure
g.savefig(fig_dir / "01_convergence_validation.pdf")
Expand All @@ -64,13 +69,14 @@
df = pd.read_parquet(benchmark_file)

# Convert to milliseconds
df['time_ms'] = df['compute_times'] * 1000
df["time_ms"] = df["compute_times"] * 1000

# Prepare configuration labels
df['config'] = df.apply(
lambda row: 'NumPy' if row['kernel'] == 'numpy'
df["config"] = df.apply(
lambda row: "NumPy"
if row["kernel"] == "numpy"
else f"Numba ({int(row['num_threads'])} threads)",
axis=1
axis=1,
)

# %%
Expand All @@ -81,21 +87,21 @@
fig, ax = plt.subplots()
sns.lineplot(
data=df,
x='N',
y='time_ms',
hue='config',
style='config',
x="N",
y="time_ms",
hue="config",
style="config",
markers=True,
dashes=False,
errorbar='ci', # Show confidence intervals
ax=ax
errorbar="ci", # Show confidence intervals
ax=ax,
)

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Problem Size (N)')
ax.set_ylabel('Time per Iteration (ms)')
ax.set_title('Kernel Performance Comparison')
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel("Problem Size (N)")
ax.set_ylabel("Time per Iteration (ms)")
ax.set_title("Kernel Performance Comparison")

fig.savefig(fig_dir / "02_performance.pdf")

Expand All @@ -104,30 +110,33 @@
# -------------------------

# Compute numpy baseline for each N and iteration
df_numpy = df[df['kernel'] == 'numpy'][['N', 'iteration', 'compute_times']].rename(
columns={'compute_times': 'numpy_time'}
df_numpy = df[df["kernel"] == "numpy"][["N", "iteration", "compute_times"]].rename(
columns={"compute_times": "numpy_time"}
)
df_speedup = df[df["kernel"] == "numba"].merge(
df_numpy, on=["N", "iteration"], how="left"
)
df_speedup["speedup"] = df_speedup["numpy_time"] / df_speedup["compute_times"]
df_speedup["thread_label"] = (
df_speedup["num_threads"].astype(int).astype(str) + " threads"
)
df_speedup = df[df['kernel'] == 'numba'].merge(df_numpy, on=['N', 'iteration'], how='left')
df_speedup['speedup'] = df_speedup['numpy_time'] / df_speedup['compute_times']
df_speedup['thread_label'] = df_speedup['num_threads'].astype(int).astype(str) + ' threads'

# Create speedup plot - seaborn will compute mean and error bars
fig, ax = plt.subplots()
sns.lineplot(
data=df_speedup,
x='N',
y='speedup',
hue='thread_label',
style='thread_label',
x="N",
y="speedup",
hue="thread_label",
style="thread_label",
markers=True,
dashes=False,
errorbar='ci',
ax=ax
errorbar="ci",
ax=ax,
)

ax.set_xlabel('Problem Size (N)')
ax.set_ylabel('Speedup vs NumPy')
ax.set_title('Fixed Iteration Speedup (200 iterations)')
ax.set_xlabel("Problem Size (N)")
ax.set_ylabel("Speedup vs NumPy")
ax.set_title("Fixed Iteration Speedup (200 iterations)")

fig.savefig(fig_dir / "03_speedup_fixed_iter.pdf")

Loading