Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a778aae
Refactor CUDA backend to streamline buffer allocation, enhance memory…
Xayah-Hina Mar 28, 2026
674fe13
Refactor CUDA backend to remove unused LaunchGeometry struct, streaml…
Xayah-Hina Mar 28, 2026
d020c63
Refactor CUDA backend to remove DomainSide struct, streamline collide…
Xayah-Hina Mar 28, 2026
217db0c
Refactor CUDA backend to enhance scalar field sampling, streamline bo…
Xayah-Hina Mar 28, 2026
8f0e111
Refactor CUDA backend to replace fminf and fmaxf with cuda::std::clam…
Xayah-Hina Mar 28, 2026
1363475
Refactor CMakeLists.txt to enhance MSVC compile options with /Zc:prep…
Xayah-Hina Mar 28, 2026
96addc2
Refactor CUDA backend to introduce StableFluidsExportDesc structure, …
Xayah-Hina Mar 28, 2026
6d8ba76
Refactor CUDA backend to improve weighted field sampling, streamline …
Xayah-Hina Mar 28, 2026
3e84c45
Refactor CUDA backend to enhance weight calculation logic, improving …
Xayah-Hina Mar 28, 2026
a6a126c
Refactor CUDA backend to enhance collider handling, introduce new vel…
Xayah-Hina Mar 28, 2026
e1a69ae
Refactor CMakeLists.txt to disable Vulkan app build option and update…
Xayah-Hina Mar 28, 2026
554a3f9
Refactor CMakeLists.txt to enable Vulkan app build option, update err…
Xayah-Hina Mar 28, 2026
1e738ad
Refactor visualization logic to streamline field handling, update den…
Xayah-Hina Mar 28, 2026
3f00115
Refactor app.cpp and app.ixx to improve code formatting, enhance read…
Xayah-Hina Mar 28, 2026
791278c
Refactor CUDA backend to remove unused projection metrics state and a…
Xayah-Hina Mar 28, 2026
b8d716e
Refactor field rendering logic to simplify sample_field function, rem…
Xayah-Hina Mar 28, 2026
d4ffea1
Refactor app.cpp and app.ixx to add velocity plane visualization, enh…
Xayah-Hina Mar 28, 2026
75963b2
Refactor app.cpp and app.ixx to introduce scene_plume module, enhance…
Xayah-Hina Mar 28, 2026
f0c0da9
Refactor app.cpp and app.ixx to streamline visualization settings man…
Xayah-Hina Mar 28, 2026
e2c358e
Refactor app.cpp and app.ixx to enhance velocity plane settings, stre…
Xayah-Hina Mar 28, 2026
1af699b
Refactor app.cpp and app.ixx to update velocity plane settings, enhan…
Xayah-Hina Mar 28, 2026
ea8c1d9
Refactor field and scene modules to introduce background color settin…
Xayah-Hina Mar 28, 2026
3bd28e3
Refactor app.cpp and app.ixx to implement scene switching functionali…
Xayah-Hina Mar 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ target_include_directories(stable-fluids PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(stable-fluids PRIVATE STABLE_FLUIDS_BUILD_SHARED)
target_compile_options(stable-fluids PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda --expt-relaxed-constexpr>)
if (MSVC)
target_compile_options(stable-fluids PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/W4 /permissive->)
target_compile_options(stable-fluids PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/W4 /permissive- /Zc:preprocessor>)
target_compile_options(stable-fluids PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/Zc:preprocessor>)
else ()
target_compile_options(stable-fluids PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wall -Wextra -Wpedantic>)
endif ()
Expand All @@ -57,7 +58,8 @@ if (STABLE_FLUIDS_BUILD_SAMPLES)
target_link_libraries(stable-fluids-sample PRIVATE stable-fluids CUDA::cudart)
target_compile_options(stable-fluids-sample PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda --expt-relaxed-constexpr>)
if (MSVC)
target_compile_options(stable-fluids-sample PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/W4 /permissive->)
target_compile_options(stable-fluids-sample PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/W4 /permissive- /Zc:preprocessor>)
target_compile_options(stable-fluids-sample PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/Zc:preprocessor>)
else ()
target_compile_options(stable-fluids-sample PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wall -Wextra -Wpedantic>)
endif ()
Expand Down Expand Up @@ -129,6 +131,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE

vulkan-app/main.cpp
vulkan-app/app.cpp
vulkan-app/scene_plume.cpp
vulkan-app/scene_cloud.cpp

PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES
vulkan-app/modules/vk.camera.ixx
Expand All @@ -142,11 +146,16 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
vulkan-app/modules/vk.swapchain.ixx
vulkan-app/modules/vk.texture.ixx
vulkan-app/app.ixx
vulkan-app/scene_plume.ixx
vulkan-app/scene_cloud.ixx
)
add_dependencies(stable-fluids-app stable-fluids-shaders)
target_compile_definitions(stable-fluids-app PRIVATE SMOKE_SIM_SHADER_DIR="${SMOKE_SIM_SHADER_DIR}" VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VULKAN_HPP_NO_STRUCT_CONSTRUCTORS=1)
target_link_libraries(stable-fluids-app PRIVATE Vulkan::Vulkan glfw stable-fluids CUDA::cudart)
target_include_directories(stable-fluids-app PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-app/vendor/imgui)
set_property(TARGET stable-fluids-app PROPERTY CXX_MODULE_STD ON)
set_target_properties(stable-fluids-app PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
if (MSVC)
target_compile_options(stable-fluids-app PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/W4 /permissive- /Zc:preprocessor>)
endif ()
endif ()
1,769 changes: 300 additions & 1,469 deletions backend-cuda.cu

Large diffs are not rendered by default.

260 changes: 115 additions & 145 deletions main.cu
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#include "stable-fluids-3d.h"

#include <algorithm>
#include <array>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime.h>
#include <numeric>
#include <vector>
#include <array>

namespace {

bool cuda_ok(const cudaError_t status, const char* what) {
if (status == cudaSuccess) return true;
std::fprintf(stderr, "%s failed: %s\n", what, cudaGetErrorString(status));
Expand All @@ -22,194 +24,162 @@ namespace {
return false;
}

__host__ __device__ std::uint64_t index_3d(const int x, const int y, const int z, const int sx, const int sy) {
return static_cast<std::uint64_t>(z) * static_cast<std::uint64_t>(sx) * static_cast<std::uint64_t>(sy) + static_cast<std::uint64_t>(y) * static_cast<std::uint64_t>(sx) + static_cast<std::uint64_t>(x);
}

__global__ void fill_kernel(float* field, const float value, const int nx, const int ny, const int nz) {
const int x = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
const int y = static_cast<int>(blockIdx.y * blockDim.y + threadIdx.y);
const int z = static_cast<int>(blockIdx.z * blockDim.z + threadIdx.z);
if (x >= nx || y >= ny || z >= nz) return;
field[index_3d(x, y, z, nx, ny)] = value;
}

__global__ void add_blob_kernel(float* field, const float amplitude, const float center_x, const float center_y, const float center_z, const float radius, const int nx, const int ny, const int nz, const float h) {
const int x = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
const int y = static_cast<int>(blockIdx.y * blockDim.y + threadIdx.y);
const int z = static_cast<int>(blockIdx.z * blockDim.z + threadIdx.z);
if (x >= nx || y >= ny || z >= nz) return;
const float px = (static_cast<float>(x) + 0.5f) * h;
const float py = (static_cast<float>(y) + 0.5f) * h;
const float pz = (static_cast<float>(z) + 0.5f) * h;
const float dx = px - center_x;
const float dy = py - center_y;
const float dz = pz - center_z;
const float radius2 = radius * radius;
const float dist2 = dx * dx + dy * dy + dz * dz;
if (dist2 > radius2) return;
const float weight = radius2 > 0.0f ? 1.0f - dist2 / radius2 : 0.0f;
field[index_3d(x, y, z, nx, ny)] += amplitude * weight;
}

} // namespace

int main() {
constexpr int32_t nx = 100;
constexpr int32_t ny = 100;
constexpr int32_t nz = 100;
constexpr int frames = 24;
constexpr float cell_size = 0.01f;
constexpr float extent_x = static_cast<float>(nx) * cell_size;
constexpr float extent_y = static_cast<float>(ny) * cell_size;
constexpr float extent_z = static_cast<float>(nz) * cell_size;
constexpr float gravity_y = -9.81f;
constexpr float buoyancy_beta = 0.35f;
constexpr float buoyancy_weight = -gravity_y * buoyancy_beta;

StableFluidsSimulationConfig config{
constexpr int32_t nx = 96;
constexpr int32_t ny = 96;
constexpr int32_t nz = 96;
constexpr int frames = 32;
constexpr float cell_size = 0.01f;
constexpr float extent_x = static_cast<float>(nx) * cell_size;
constexpr float extent_y = static_cast<float>(ny) * cell_size;
constexpr float extent_z = static_cast<float>(nz) * cell_size;
constexpr float source_r = 0.055f;
constexpr float source_x = extent_x * 0.35f;
constexpr float source_y = extent_y * 0.16f;
constexpr float source_z = extent_z * 0.50f;

const StableFluidsSimulationConfig config{
.nx = nx,
.ny = ny,
.nz = nz,
.cell_size = cell_size,
.dt = 1.0f / 120.0f,
.viscosity = 0.00015f,
.dt = 1.0f / 90.0f,
.viscosity = 0.00012f,
.diffuse_iterations = 24,
.pressure_iterations = 96,
.uniform_force_x = 0.0f,
.uniform_force_y = 0.0f,
.uniform_force_z = 0.0f,
.domain_boundary = {
.x_min = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, },
.x_max = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, },
.y_min = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), .velocity = 0.0f, },
.y_max = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, },
.z_min = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, },
.z_max = { .type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, },
.boundary = {
.x = STABLE_FLUIDS_BOUNDARY_PERIODIC,
.y = STABLE_FLUIDS_BOUNDARY_FIXED,
.z = STABLE_FLUIDS_BOUNDARY_PERIODIC,
},
.block_x = 8,
.block_y = 8,
.block_z = 4,
};

std::array fields{
const std::array fields{
StableFluidsFieldCreateDesc{
.name = "density",
.component_count = 1,
.flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE,
.diffusion = 0.00005f,
.extension_mode = static_cast<uint32_t>(STABLE_FLUIDS_FIELD_EXTENSION_STREAK),
.default_value_0 = 0.0f,
.default_value_1 = 0.0f,
.default_value_2 = 0.0f,
.default_value_3 = 0.0f,
},
StableFluidsFieldCreateDesc{
.name = "dye",
.component_count = 3,
.flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE,
.diffusion = 0.00002f,
.extension_mode = static_cast<uint32_t>(STABLE_FLUIDS_FIELD_EXTENSION_STREAK),
.default_value_0 = 0.0f,
.default_value_1 = 0.0f,
.default_value_2 = 0.0f,
.default_value_3 = 0.0f,
.dissipation = 0.35f,
.initial_value = 0.0f,
},
};
std::array buoyancy_terms{
StableFluidsBuoyancyDesc{
.field_index = 0,
.weight = buoyancy_weight,
.ambient = 0.0f,
},
};
std::array<StableFluidsFieldHandle, 2> field_handles{};
std::array<StableFluidsFieldHandle, 1> field_handles{};

StableFluidsContextCreateDesc create_desc{
const StableFluidsContextCreateDesc create_desc{
.config = config,
.stream = nullptr,
.fields = fields.data(),
.field_count = static_cast<uint32_t>(fields.size()),
.buoyancy_terms = buoyancy_terms.data(),
.buoyancy_term_count = static_cast<uint32_t>(buoyancy_terms.size()),
};

StableFluidsContext context = nullptr;
if (!stable_ok(stable_fluids_create_context_cuda(&create_desc, &context, field_handles.data(), static_cast<uint32_t>(field_handles.size())), "stable_fluids_create_context_cuda")) return EXIT_FAILURE;

const StableFluidsColliderDesc collider{
.collider_type = static_cast<uint32_t>(STABLE_FLUIDS_COLLIDER_SPHERE),
.velocity_boundary_type = static_cast<uint32_t>(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP),
.center_x = extent_x * 0.5f,
.center_y = extent_y * 0.36f,
.center_z = extent_z * 0.5f,
.radius = 0.08f,
.half_extent_x = 0.0f,
.half_extent_y = 0.0f,
.half_extent_z = 0.0f,
.linear_velocity_x = 0.0f,
.linear_velocity_y = 0.0f,
.linear_velocity_z = 0.0f,
};
const StableFluidsSceneDesc scene_desc{
.colliders = &collider,
.collider_count = 1,
};
if (!stable_ok(stable_fluids_update_scene_cuda(context, &scene_desc), "stable_fluids_update_scene_cuda")) {
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
const dim3 block(static_cast<unsigned>(config.block_x), static_cast<unsigned>(config.block_y), static_cast<unsigned>(config.block_z));
const dim3 cells(
static_cast<unsigned>((config.nx + config.block_x - 1) / config.block_x),
static_cast<unsigned>((config.ny + config.block_y - 1) / config.block_y),
static_cast<unsigned>((config.nz + config.block_z - 1) / config.block_z));

const auto cell_count = static_cast<std::size_t>(nx) * static_cast<std::size_t>(ny) * static_cast<std::size_t>(nz);
const auto scalar_size = cell_count * sizeof(float);

float* force_x = nullptr;
float* force_y = nullptr;
float* force_z = nullptr;
float* density_source = nullptr;
float* device_density = nullptr;

if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&force_x), scalar_size), "cudaMalloc force_x")) return EXIT_FAILURE;
if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&force_y), scalar_size), "cudaMalloc force_y")) return EXIT_FAILURE;
if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&force_z), scalar_size), "cudaMalloc force_z")) return EXIT_FAILURE;
if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&density_source), scalar_size), "cudaMalloc density_source")) return EXIT_FAILURE;
if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&device_density), scalar_size), "cudaMalloc device_density")) return EXIT_FAILURE;

const auto begin = std::chrono::steady_clock::now();
for (int frame = 0; frame < frames; ++frame) {
const float center_x = extent_x * 0.18f;
const float center_y = extent_y * 0.14f;
const float center_z = extent_z * 0.28f + static_cast<float>(frame & 1) * 0.005f;
const StableFluidsVelocitySourceDesc velocity_source{
.center_x = center_x,
.center_y = center_y,
.center_z = center_z,
.radius = 0.045f,
.velocity_x = 0.18f,
.velocity_y = 0.42f,
.velocity_z = 0.12f,
};
const std::array field_sources{
StableFluidsFieldSourceDesc{
.field = field_handles[0],
.center_x = center_x,
.center_y = center_y,
.center_z = center_z,
.radius = 0.045f,
.value_0 = 0.55f,
.value_1 = 0.0f,
.value_2 = 0.0f,
.value_3 = 0.0f,
},
StableFluidsFieldSourceDesc{
.field = field_handles[1],
.center_x = center_x,
.center_y = center_y,
.center_z = center_z,
.radius = 0.045f,
.value_0 = 0.85f,
.value_1 = 0.22f,
.value_2 = 1.10f,
.value_3 = 0.0f,
},
fill_kernel<<<cells, block>>>(force_x, 0.0f, nx, ny, nz);
fill_kernel<<<cells, block>>>(force_y, 0.0f, nx, ny, nz);
fill_kernel<<<cells, block>>>(force_z, 0.0f, nx, ny, nz);
fill_kernel<<<cells, block>>>(density_source, 0.0f, nx, ny, nz);
if (!cuda_ok(cudaGetLastError(), "fill_kernel")) return EXIT_FAILURE;

const float lateral = std::sin(static_cast<float>(frame) * 0.35f);
const float swirl = std::cos(static_cast<float>(frame) * 0.27f);
add_blob_kernel<<<cells, block>>>(density_source, 32.0f, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size);
add_blob_kernel<<<cells, block>>>(force_x, 2.2f * lateral, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size);
add_blob_kernel<<<cells, block>>>(force_y, 7.5f, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size);
add_blob_kernel<<<cells, block>>>(force_z, 1.8f * swirl, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size);
if (!cuda_ok(cudaGetLastError(), "add_blob_kernel")) return EXIT_FAILURE;

const StableFluidsFieldSourceDesc field_source{
.field = field_handles[0],
.values = density_source,
};
const StableFluidsStepDesc step_desc{
.velocity_sources = &velocity_source,
.velocity_source_count = 1,
.field_sources = field_sources.data(),
.field_source_count = static_cast<uint32_t>(field_sources.size()),
.force_x = force_x,
.force_y = force_y,
.force_z = force_z,
.field_sources = &field_source,
.field_source_count = 1,
};
if (!stable_ok(stable_fluids_step_cuda(context, &step_desc), "stable_fluids_step_cuda")) {
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
if (!stable_ok(stable_fluids_step_cuda(context, &step_desc), "stable_fluids_step_cuda")) return EXIT_FAILURE;
}

std::vector<float> density(static_cast<std::size_t>(nx) * static_cast<std::size_t>(ny) * static_cast<std::size_t>(nz), 0.0f);
float* device_density = nullptr;
const auto scalar_bytes = density.size() * sizeof(float);
if (!cuda_ok(cudaMalloc(reinterpret_cast<void**>(&device_density), scalar_bytes), "cudaMalloc export density")) {
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
const StableFluidsExportDesc export_desc{
.kind = STABLE_FLUIDS_EXPORT_FIELD,
.field = field_handles[0],
};
if (!stable_ok(stable_fluids_export_cuda(context, &export_desc, device_density), "stable_fluids_export_cuda")) return EXIT_FAILURE;
if (!cuda_ok(cudaDeviceSynchronize(), "cudaDeviceSynchronize")) return EXIT_FAILURE;

if (!stable_ok(stable_fluids_export_field_components_cuda(context, field_handles[0], 0, 1, device_density), "stable_fluids_export_field_components_cuda")) {
cudaFree(device_density);
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
if (!cuda_ok(cudaDeviceSynchronize(), "cudaDeviceSynchronize")) {
cudaFree(device_density);
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
if (!cuda_ok(cudaMemcpy(density.data(), device_density, scalar_bytes, cudaMemcpyDeviceToHost), "cudaMemcpy density")) {
cudaFree(device_density);
stable_fluids_destroy_context_cuda(context);
return EXIT_FAILURE;
}
std::vector<float> density(cell_count, 0.0f);
if (!cuda_ok(cudaMemcpy(density.data(), device_density, scalar_size, cudaMemcpyDeviceToHost), "cudaMemcpy density")) return EXIT_FAILURE;

cudaFree(force_x);
cudaFree(force_y);
cudaFree(force_z);
cudaFree(density_source);
cudaFree(device_density);
stable_fluids_destroy_context_cuda(context);

const float total_density = std::accumulate(density.begin(), density.end(), 0.0f);
const float peak_density = density.empty() ? 0.0f : *std::max_element(density.begin(), density.end());
const auto elapsed_ms = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - begin).count();
const float peak_density = density.empty() ? 0.0f : *std::max_element(density.begin(), density.end());
const auto elapsed_ms = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - begin).count();
std::printf("frames=%d total_density=%.6f peak_density=%.6f elapsed_ms=%.3f\n", frames, total_density, peak_density, elapsed_ms);
return EXIT_SUCCESS;
}
Loading
Loading