diff --git a/CMakeLists.txt b/CMakeLists.txt index cc00665..5def745 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,8 @@ target_include_directories(stable-fluids PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_definitions(stable-fluids PRIVATE STABLE_FLUIDS_BUILD_SHARED) target_compile_options(stable-fluids PRIVATE $<$:--extended-lambda --expt-relaxed-constexpr>) if (MSVC) - target_compile_options(stable-fluids PRIVATE $<$:/W4 /permissive->) + target_compile_options(stable-fluids PRIVATE $<$:/W4 /permissive- /Zc:preprocessor>) + target_compile_options(stable-fluids PRIVATE $<$:-Xcompiler=/Zc:preprocessor>) else () target_compile_options(stable-fluids PRIVATE $<$:-Wall -Wextra -Wpedantic>) endif () @@ -57,7 +58,8 @@ if (STABLE_FLUIDS_BUILD_SAMPLES) target_link_libraries(stable-fluids-sample PRIVATE stable-fluids CUDA::cudart) target_compile_options(stable-fluids-sample PRIVATE $<$:--extended-lambda --expt-relaxed-constexpr>) if (MSVC) - target_compile_options(stable-fluids-sample PRIVATE $<$:/W4 /permissive->) + target_compile_options(stable-fluids-sample PRIVATE $<$:/W4 /permissive- /Zc:preprocessor>) + target_compile_options(stable-fluids-sample PRIVATE $<$:-Xcompiler=/Zc:preprocessor>) else () target_compile_options(stable-fluids-sample PRIVATE $<$:-Wall -Wextra -Wpedantic>) endif () @@ -129,6 +131,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE vulkan-app/main.cpp vulkan-app/app.cpp + vulkan-app/scene_plume.cpp + vulkan-app/scene_cloud.cpp PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES vulkan-app/modules/vk.camera.ixx @@ -142,6 +146,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE vulkan-app/modules/vk.swapchain.ixx vulkan-app/modules/vk.texture.ixx vulkan-app/app.ixx + vulkan-app/scene_plume.ixx + vulkan-app/scene_cloud.ixx ) add_dependencies(stable-fluids-app stable-fluids-shaders) target_compile_definitions(stable-fluids-app PRIVATE SMOKE_SIM_SHADER_DIR="${SMOKE_SIM_SHADER_DIR}" VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VULKAN_HPP_NO_STRUCT_CONSTRUCTORS=1) @@ -149,4 +155,7 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE target_include_directories(stable-fluids-app PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-app/vendor/imgui) set_property(TARGET stable-fluids-app PROPERTY CXX_MODULE_STD ON) set_target_properties(stable-fluids-app PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON) + if (MSVC) + target_compile_options(stable-fluids-app PRIVATE $<$:/W4 /permissive- /Zc:preprocessor>) + endif () endif () diff --git a/backend-cuda.cu b/backend-cuda.cu index d38a745..e058358 100644 --- a/backend-cuda.cu +++ b/backend-cuda.cu @@ -1,1436 +1,285 @@ #include "stable-fluids-3d.h" - #include -#include -#include +#include #include #include +#include #include -#include #include namespace stable_fluids { using Stream = cudaStream_t; - constexpr StableFluidsResult success = STABLE_FLUIDS_RESULT_OK; - constexpr StableFluidsResult invalid_argument = STABLE_FLUIDS_RESULT_INVALID_ARGUMENT; - constexpr StableFluidsResult invalid_context = STABLE_FLUIDS_RESULT_INVALID_CONTEXT; - constexpr StableFluidsResult invalid_config = STABLE_FLUIDS_RESULT_INVALID_CONFIG; - constexpr StableFluidsResult invalid_field = STABLE_FLUIDS_RESULT_INVALID_FIELD; - constexpr StableFluidsResult invalid_scene = STABLE_FLUIDS_RESULT_INVALID_SCENE; - constexpr StableFluidsResult invalid_export = STABLE_FLUIDS_RESULT_INVALID_EXPORT; - constexpr StableFluidsResult out_of_memory = STABLE_FLUIDS_RESULT_OUT_OF_MEMORY; - constexpr StableFluidsResult backend_failure = STABLE_FLUIDS_RESULT_BACKEND_FAILURE; - constexpr uint8_t cell_fluid = 0; - constexpr uint8_t cell_solid = 1; - constexpr uint8_t face_open = 0; - constexpr uint8_t face_fixed = 1; - constexpr uint8_t face_outflow = 2; - enum class Axis : int { - x = 0, - y = 1, - z = 2, - }; - - struct ProjectionMetricsState; - - struct LaunchGeometry { - dim3 block{}; - dim3 cells{}; - dim3 faces{}; - }; + constexpr StableFluidsResult success = STABLE_FLUIDS_RESULT_OK; + constexpr StableFluidsResult out_of_memory = STABLE_FLUIDS_RESULT_OUT_OF_MEMORY; + constexpr StableFluidsResult backend_failure = STABLE_FLUIDS_RESULT_BACKEND_FAILURE; struct DeviceBuffers { - float* velocity_x = nullptr; - float* velocity_y = nullptr; - float* velocity_z = nullptr; - float* temp_velocity_x = nullptr; - float* temp_velocity_y = nullptr; - float* temp_velocity_z = nullptr; - float* pressure = nullptr; - float* divergence = nullptr; - ProjectionMetricsState* projection_metrics = nullptr; - float* scalar_scratch = nullptr; - uint8_t* cell_flags = nullptr; - uint8_t* u_flags = nullptr; - uint8_t* v_flags = nullptr; - uint8_t* w_flags = nullptr; - float* u_target = nullptr; - float* v_target = nullptr; - float* w_target = nullptr; + float* velocity_x = nullptr; + float* velocity_y = nullptr; + float* velocity_z = nullptr; + float* temp_velocity_x = nullptr; + float* temp_velocity_y = nullptr; + float* temp_velocity_z = nullptr; + float* pressure = nullptr; + float* divergence = nullptr; }; struct FieldStorage { StableFluidsFieldCreateDesc desc{}; float* data = nullptr; - }; - - struct HostBoundaryAtlas { - std::vector cell_flags{}; - std::vector cell_owner{}; - std::vector u_flags{}; - std::vector v_flags{}; - std::vector w_flags{}; - std::vector u_target{}; - std::vector v_target{}; - std::vector w_target{}; + float* temp = nullptr; }; struct ContextStorage { StableFluidsSimulationConfig config{}; - std::vector colliders{}; std::vector fields{}; - std::vector buoyancy_terms{}; - HostBoundaryAtlas host_atlas{}; DeviceBuffers device{}; - Stream stream = nullptr; - bool owns_stream = false; - bool atlas_dirty = true; - uint32_t max_field_components = 0; - }; - - struct ProjectionMetricsState { - float max_abs_divergence = 0.0f; - float sum_sq_divergence = 0.0f; - uint32_t fluid_cell_count = 0; - uint32_t _padding = 0; + Stream stream = nullptr; + bool owns_stream = false; }; __host__ __device__ std::uint64_t index_3d(const int x, const int y, const int z, const int sx, const int sy) { return static_cast(z) * static_cast(sx) * static_cast(sy) + static_cast(y) * static_cast(sx) + static_cast(x); } - dim3 make_grid(const int nx, const int ny, const int nz, const dim3& block) { - return { - static_cast((nx + static_cast(block.x) - 1) / static_cast(block.x)), - static_cast((ny + static_cast(block.y) - 1) / static_cast(block.y)), - static_cast((nz + static_cast(block.z) - 1) / static_cast(block.z)), - }; - } - - LaunchGeometry make_launch_geometry(const StableFluidsSimulationConfig& config) { - const dim3 block( - static_cast(std::max(config.block_x, 1)), - static_cast(std::max(config.block_y, 1)), - static_cast(std::max(config.block_z, 1)) - ); - return { - .block = block, - .cells = make_grid(config.nx, config.ny, config.nz, block), - .faces = make_grid(config.nx + 1, config.ny + 1, config.nz + 1, block), - }; - } - - std::uint64_t scalar_count(const StableFluidsSimulationConfig& config) { - return static_cast(config.nx) * static_cast(config.ny) * static_cast(config.nz); - } - - std::uint64_t u_face_count(const StableFluidsSimulationConfig& config) { - return static_cast(config.nx + 1) * static_cast(config.ny) * static_cast(config.nz); - } - - std::uint64_t v_face_count(const StableFluidsSimulationConfig& config) { - return static_cast(config.nx) * static_cast(config.ny + 1) * static_cast(config.nz); - } - - std::uint64_t w_face_count(const StableFluidsSimulationConfig& config) { - return static_cast(config.nx) * static_cast(config.ny) * static_cast(config.nz + 1); - } - - std::uint64_t field_value_count(const StableFluidsSimulationConfig& config, const uint32_t components) { - return scalar_count(config) * static_cast(components); - } - - FieldStorage* find_field(ContextStorage& context, const StableFluidsFieldHandle handle) { - if (handle == 0) return nullptr; - const auto index = static_cast(handle - 1u); - if (index >= context.fields.size()) return nullptr; - return &context.fields[index]; - } - - const FieldStorage* find_field(const ContextStorage& context, const StableFluidsFieldHandle handle) { - if (handle == 0) return nullptr; - const auto index = static_cast(handle - 1u); - if (index >= context.fields.size()) return nullptr; - return &context.fields[index]; - } - - bool point_inside_collider(const StableFluidsColliderDesc& collider, const float x, const float y, const float z) { - const float dx = x - collider.center_x; - const float dy = y - collider.center_y; - const float dz = z - collider.center_z; - if (collider.collider_type == STABLE_FLUIDS_COLLIDER_SPHERE) { - return dx * dx + dy * dy + dz * dz <= collider.radius * collider.radius; - } - return std::abs(dx) <= collider.half_extent_x && std::abs(dy) <= collider.half_extent_y && std::abs(dz) <= collider.half_extent_z; - } - - float collider_signed_distance(const StableFluidsColliderDesc& collider, const float x, const float y, const float z) { - const float dx = x - collider.center_x; - const float dy = y - collider.center_y; - const float dz = z - collider.center_z; - if (collider.collider_type == STABLE_FLUIDS_COLLIDER_SPHERE) { - return std::sqrt(dx * dx + dy * dy + dz * dz) - collider.radius; - } - - const float qx = std::abs(dx) - collider.half_extent_x; - const float qy = std::abs(dy) - collider.half_extent_y; - const float qz = std::abs(dz) - collider.half_extent_z; - const float outside = std::sqrt(std::max(qx, 0.0f) * std::max(qx, 0.0f) + std::max(qy, 0.0f) * std::max(qy, 0.0f) + std::max(qz, 0.0f) * std::max(qz, 0.0f)); - const float inside = std::min(std::max(qx, std::max(qy, qz)), 0.0f); - return outside + inside; - } - - void resize_host_atlas(ContextStorage& context) { - context.host_atlas.cell_flags.resize(static_cast(scalar_count(context.config)), cell_fluid); - context.host_atlas.cell_owner.resize(static_cast(scalar_count(context.config)), -1); - context.host_atlas.u_flags.resize(static_cast(u_face_count(context.config)), face_open); - context.host_atlas.v_flags.resize(static_cast(v_face_count(context.config)), face_open); - context.host_atlas.w_flags.resize(static_cast(w_face_count(context.config)), face_open); - context.host_atlas.u_target.resize(static_cast(u_face_count(context.config)), 0.0f); - context.host_atlas.v_target.resize(static_cast(v_face_count(context.config)), 0.0f); - context.host_atlas.w_target.resize(static_cast(w_face_count(context.config)), 0.0f); - } - - StableFluidsResult upload_boundary_atlas(ContextStorage& context) { - const auto cell_bytes = scalar_count(context.config) * sizeof(uint8_t); - const auto u_flag_bytes = u_face_count(context.config) * sizeof(uint8_t); - const auto v_flag_bytes = v_face_count(context.config) * sizeof(uint8_t); - const auto w_flag_bytes = w_face_count(context.config) * sizeof(uint8_t); - const auto u_val_bytes = u_face_count(context.config) * sizeof(float); - const auto v_val_bytes = v_face_count(context.config) * sizeof(float); - const auto w_val_bytes = w_face_count(context.config) * sizeof(float); - - if (cudaMemcpyAsync(context.device.cell_flags, context.host_atlas.cell_flags.data(), cell_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.u_flags, context.host_atlas.u_flags.data(), u_flag_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.v_flags, context.host_atlas.v_flags.data(), v_flag_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.w_flags, context.host_atlas.w_flags.data(), w_flag_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.u_target, context.host_atlas.u_target.data(), u_val_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.v_target, context.host_atlas.v_target.data(), v_val_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - if (cudaMemcpyAsync(context.device.w_target, context.host_atlas.w_target.data(), w_val_bytes, cudaMemcpyHostToDevice, context.stream) != cudaSuccess) return backend_failure; - return success; - } - - StableFluidsResult build_boundary_atlas(ContextStorage& context) { - const int nx = context.config.nx; - const int ny = context.config.ny; - const int nz = context.config.nz; - const float h = context.config.cell_size; - constexpr float collider_touch_radius = 0.75f; - constexpr float huge_distance = 1.0e30f; - - struct DomainSide { - Axis axis; - bool max_side; - StableFluidsBoundaryFaceDesc face; - }; - - const std::array domain_sides{ - DomainSide{ .axis = Axis::x, .max_side = false, .face = context.config.domain_boundary.x_min, }, - DomainSide{ .axis = Axis::x, .max_side = true, .face = context.config.domain_boundary.x_max, }, - DomainSide{ .axis = Axis::y, .max_side = false, .face = context.config.domain_boundary.y_min, }, - DomainSide{ .axis = Axis::y, .max_side = true, .face = context.config.domain_boundary.y_max, }, - DomainSide{ .axis = Axis::z, .max_side = false, .face = context.config.domain_boundary.z_min, }, - DomainSide{ .axis = Axis::z, .max_side = true, .face = context.config.domain_boundary.z_max, }, - }; - - resize_host_atlas(context); - std::ranges::fill(context.host_atlas.cell_flags, cell_fluid); - std::ranges::fill(context.host_atlas.cell_owner.begin(), context.host_atlas.cell_owner.end(), -1); - std::ranges::fill(context.host_atlas.u_flags.begin(), context.host_atlas.u_flags.end(), face_open); - std::ranges::fill(context.host_atlas.v_flags.begin(), context.host_atlas.v_flags.end(), face_open); - std::ranges::fill(context.host_atlas.w_flags.begin(), context.host_atlas.w_flags.end(), face_open); - std::ranges::fill(context.host_atlas.u_target.begin(), context.host_atlas.u_target.end(), 0.0f); - std::ranges::fill(context.host_atlas.v_target.begin(), context.host_atlas.v_target.end(), 0.0f); - std::ranges::fill(context.host_atlas.w_target.begin(), context.host_atlas.w_target.end(), 0.0f); - - auto set_face = [&](std::vector& flags, std::vector& values, const int x, const int y, const int z, const int sx, const int sy, const uint8_t type, const float target) { - const auto index = static_cast(index_3d(x, y, z, sx, sy)); - flags[index] = type; - values[index] = target; - }; - auto cell_owner = [&](const int x, const int y, const int z) { - if (x < 0 || y < 0 || z < 0 || x >= nx || y >= ny || z >= nz) return -1; - return context.host_atlas.cell_owner[static_cast(index_3d(x, y, z, nx, ny))]; - }; - auto collider_axis_velocity = [&](const StableFluidsColliderDesc& collider, const Axis axis) { - if (axis == Axis::x) return collider.linear_velocity_x; - if (axis == Axis::y) return collider.linear_velocity_y; - return collider.linear_velocity_z; - }; - auto find_best_collider = [&](const float px, const float py, const float pz, const bool require_inside, const bool require_no_slip, const float max_distance) { - float best_distance = huge_distance; - int best_owner = -1; - for (std::size_t collider_index = 0; collider_index < context.colliders.size(); ++collider_index) { - const auto& collider = context.colliders[collider_index]; - if (require_inside && !point_inside_collider(collider, px, py, pz)) continue; - if (require_no_slip && collider.velocity_boundary_type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) continue; - const float distance = collider_signed_distance(collider, px, py, pz); - if (distance > max_distance || distance >= best_distance) continue; - best_distance = distance; - best_owner = static_cast(collider_index); - } - return best_owner; - }; - auto boundary_face_type = [&](const StableFluidsBoundaryFaceDesc& face) { - return face.type == STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW ? face_outflow : face_fixed; - }; - auto boundary_face_target = [&](const StableFluidsBoundaryFaceDesc& face) { - return face.type == STABLE_FLUIDS_VELOCITY_BOUNDARY_INFLOW ? face.velocity : 0.0f; - }; - auto pick_face_owner = [&](const int owner_a, const int owner_b, const float px, const float py, const float pz) { - if (owner_a >= 0 && owner_b < 0) return owner_a; - if (owner_b >= 0 && owner_a < 0) return owner_b; - if (owner_a >= 0 && owner_b >= 0) { - const float distance_a = collider_signed_distance(context.colliders[static_cast(owner_a)], px, py, pz); - const float distance_b = collider_signed_distance(context.colliders[static_cast(owner_b)], px, py, pz); - return distance_a <= distance_b ? owner_a : owner_b; - } - return find_best_collider(px, py, pz, false, false, collider_touch_radius); - }; - auto set_interface_face = [&](std::vector& flags, std::vector& values, const int face_index_x, const int face_index_y, const int face_index_z, const int sx, const int sy, const float px, const float py, const float pz, const Axis axis, const int owner_a, const int owner_b) { - const auto face_index = static_cast(index_3d(face_index_x, face_index_y, face_index_z, sx, sy)); - if (flags[face_index] != face_open) return; - const int owner = pick_face_owner(owner_a, owner_b, px, py, pz); - if (owner < 0) return; - flags[face_index] = face_fixed; - values[face_index] = collider_axis_velocity(context.colliders[static_cast(owner)], axis); - }; - auto set_no_slip_touch_face = [&](std::vector& flags, std::vector& values, const int face_index_x, const int face_index_y, const int face_index_z, const int sx, const int sy, const float px, const float py, const float pz, const Axis axis, const int owner) { - if (owner < 0) return; - const auto face_index = static_cast(index_3d(face_index_x, face_index_y, face_index_z, sx, sy)); - if (flags[face_index] != face_open) return; - if (context.colliders[static_cast(owner)].velocity_boundary_type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) return; - flags[face_index] = face_fixed; - values[face_index] = collider_axis_velocity(context.colliders[static_cast(owner)], axis); - }; - auto pick_touch_owner = [&](const float px, const float py, const float pz, const int x_begin, const int x_end, const int y_begin, const int y_end, const int z_begin, const int z_end) { - float best_distance = huge_distance; - int best_owner = -1; - for (int cell_z = z_begin; cell_z <= z_end; ++cell_z) { - for (int cell_y = y_begin; cell_y <= y_end; ++cell_y) { - for (int cell_x = x_begin; cell_x <= x_end; ++cell_x) { - const int owner = cell_owner(cell_x, cell_y, cell_z); - if (owner < 0) continue; - const auto& collider = context.colliders[static_cast(owner)]; - if (collider.velocity_boundary_type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) continue; - const float distance = collider_signed_distance(collider, px, py, pz); - if (distance >= best_distance) continue; - best_distance = distance; - best_owner = owner; - } - } - } - return best_owner; - }; - auto apply_domain_side = [&](const DomainSide& side) { - const uint8_t type = boundary_face_type(side.face); - const float target = boundary_face_target(side.face); - switch (side.axis) { - case Axis::x: { - const int normal_x = side.max_side ? nx : 0; - for (int z = 0; z < nz; ++z) { - for (int y = 0; y < ny; ++y) set_face(context.host_atlas.u_flags, context.host_atlas.u_target, normal_x, y, z, nx + 1, ny, type, target); - } - if (side.face.type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) break; - const int tangent_x = side.max_side ? nx - 1 : 0; - for (int z = 0; z < nz; ++z) { - for (int y = 0; y <= ny; ++y) set_face(context.host_atlas.v_flags, context.host_atlas.v_target, tangent_x, y, z, nx, ny + 1, face_fixed, 0.0f); - for (int y = 0; y < ny; ++y) set_face(context.host_atlas.w_flags, context.host_atlas.w_target, tangent_x, y, z, nx, ny, face_fixed, 0.0f); - } - break; - } - case Axis::y: { - const int normal_y = side.max_side ? ny : 0; - for (int z = 0; z < nz; ++z) { - for (int x = 0; x < nx; ++x) set_face(context.host_atlas.v_flags, context.host_atlas.v_target, x, normal_y, z, nx, ny + 1, type, target); - } - if (side.face.type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) break; - const int tangent_y = side.max_side ? ny - 1 : 0; - for (int z = 0; z < nz; ++z) { - for (int x = 0; x <= nx; ++x) set_face(context.host_atlas.u_flags, context.host_atlas.u_target, x, tangent_y, z, nx + 1, ny, face_fixed, 0.0f); - for (int x = 0; x < nx; ++x) set_face(context.host_atlas.w_flags, context.host_atlas.w_target, x, tangent_y, z, nx, ny, face_fixed, 0.0f); - } - break; - } - case Axis::z: { - const int normal_z = side.max_side ? nz : 0; - for (int y = 0; y < ny; ++y) { - for (int x = 0; x < nx; ++x) set_face(context.host_atlas.w_flags, context.host_atlas.w_target, x, y, normal_z, nx, ny, type, target); - } - if (side.face.type != STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP) break; - const int tangent_z = side.max_side ? nz - 1 : 0; - for (int y = 0; y < ny; ++y) { - for (int x = 0; x <= nx; ++x) set_face(context.host_atlas.u_flags, context.host_atlas.u_target, x, y, tangent_z, nx + 1, ny, face_fixed, 0.0f); - for (int x = 0; x < nx; ++x) set_face(context.host_atlas.v_flags, context.host_atlas.v_target, x, y, tangent_z, nx, ny + 1, face_fixed, 0.0f); - } - break; - } - } - }; - - for (int z = 0; z < nz; ++z) { - for (int y = 0; y < ny; ++y) { - for (int x = 0; x < nx; ++x) { - const float px = (static_cast(x) + 0.5f) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const auto cell_index = static_cast(index_3d(x, y, z, nx, ny)); - const int owner = find_best_collider(px, py, pz, true, false, huge_distance); - if (owner < 0) continue; - context.host_atlas.cell_flags[cell_index] = cell_solid; - context.host_atlas.cell_owner[cell_index] = owner; - } - } - } - - for (const auto& side : domain_sides) apply_domain_side(side); - - for (int z = 0; z < nz; ++z) { - for (int y = 0; y < ny; ++y) { - for (int x = 1; x < nx; ++x) { - const int left_owner = cell_owner(x - 1, y, z); - const int right_owner = cell_owner(x, y, z); - if (left_owner < 0 && right_owner < 0) continue; - const float px = static_cast(x) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = (static_cast(z) + 0.5f) * h; - set_interface_face(context.host_atlas.u_flags, context.host_atlas.u_target, x, y, z, nx + 1, ny, px, py, pz, Axis::x, left_owner, right_owner); - } - } - } - - for (int z = 0; z < nz; ++z) { - for (int y = 1; y < ny; ++y) { - for (int x = 0; x < nx; ++x) { - const int below_owner = cell_owner(x, y - 1, z); - const int above_owner = cell_owner(x, y, z); - if (below_owner < 0 && above_owner < 0) continue; - const float px = (static_cast(x) + 0.5f) * h; - const float py = static_cast(y) * h; - const float pz = (static_cast(z) + 0.5f) * h; - set_interface_face(context.host_atlas.v_flags, context.host_atlas.v_target, x, y, z, nx, ny + 1, px, py, pz, Axis::y, below_owner, above_owner); - } - } - } - - for (int z = 1; z < nz; ++z) { - for (int y = 0; y < ny; ++y) { - for (int x = 0; x < nx; ++x) { - const int back_owner = cell_owner(x, y, z - 1); - const int front_owner = cell_owner(x, y, z); - if (back_owner < 0 && front_owner < 0) continue; - const float px = (static_cast(x) + 0.5f) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = static_cast(z) * h; - set_interface_face(context.host_atlas.w_flags, context.host_atlas.w_target, x, y, z, nx, ny, px, py, pz, Axis::z, back_owner, front_owner); - } - } - } - - for (int z = 0; z < nz; ++z) { - for (int y = 0; y < ny; ++y) { - for (int x = 0; x <= nx; ++x) { - const float px = static_cast(x) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const int owner = pick_touch_owner(px, py, pz, x - 1, x, y - 1, y, z - 1, z); - set_no_slip_touch_face(context.host_atlas.u_flags, context.host_atlas.u_target, x, y, z, nx + 1, ny, px, py, pz, Axis::x, owner); - } - } + __device__ float load(const float* field, int x, int y, int z, const int nx, const int ny, const int nz, const StableFluidsBoundaryConfig boundary) { + if (x < 0 || x >= nx) { + if (boundary.x != STABLE_FLUIDS_BOUNDARY_PERIODIC || nx <= 0) return 0.0f; + x %= nx; + if (x < 0) x += nx; } - - for (int z = 0; z < nz; ++z) { - for (int y = 0; y <= ny; ++y) { - for (int x = 0; x < nx; ++x) { - const float px = (static_cast(x) + 0.5f) * h; - const float py = static_cast(y) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const int owner = pick_touch_owner(px, py, pz, x - 1, x, y - 1, y, z - 1, z); - set_no_slip_touch_face(context.host_atlas.v_flags, context.host_atlas.v_target, x, y, z, nx, ny + 1, px, py, pz, Axis::y, owner); - } - } - } - - for (int z = 0; z <= nz; ++z) { - for (int y = 0; y < ny; ++y) { - for (int x = 0; x < nx; ++x) { - const float px = (static_cast(x) + 0.5f) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = static_cast(z) * h; - const int owner = pick_touch_owner(px, py, pz, x - 1, x, y - 1, y, z - 1, z); - set_no_slip_touch_face(context.host_atlas.w_flags, context.host_atlas.w_target, x, y, z, nx, ny, px, py, pz, Axis::z, owner); - } - } - } - return success; - } - - template - StableFluidsResult allocate_device_array(T*& ptr, const std::uint64_t count) { - if (count == 0) return success; - if (cudaMalloc(reinterpret_cast(&ptr), count * sizeof(T)) != cudaSuccess) return out_of_memory; - return success; - } - - template - void release_device_array(T*& ptr) { - if (ptr != nullptr) cudaFree(ptr); - ptr = nullptr; - } - - void destroy_buffers(ContextStorage& context) { - release_device_array(context.device.velocity_x); - release_device_array(context.device.velocity_y); - release_device_array(context.device.velocity_z); - release_device_array(context.device.temp_velocity_x); - release_device_array(context.device.temp_velocity_y); - release_device_array(context.device.temp_velocity_z); - release_device_array(context.device.pressure); - release_device_array(context.device.divergence); - release_device_array(context.device.projection_metrics); - release_device_array(context.device.scalar_scratch); - release_device_array(context.device.cell_flags); - release_device_array(context.device.u_flags); - release_device_array(context.device.v_flags); - release_device_array(context.device.w_flags); - release_device_array(context.device.u_target); - release_device_array(context.device.v_target); - release_device_array(context.device.w_target); - for (auto& field : context.fields) release_device_array(field.data); - } - - StableFluidsResult allocate_buffers(ContextStorage& context) { - if (allocate_device_array(context.device.velocity_x, u_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.velocity_y, v_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.velocity_z, w_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.temp_velocity_x, u_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.temp_velocity_y, v_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.temp_velocity_z, w_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.pressure, scalar_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.divergence, scalar_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.projection_metrics, 1) != success) return out_of_memory; - if (allocate_device_array(context.device.scalar_scratch, field_value_count(context.config, (std::max)(context.max_field_components, 1u))) != success) return out_of_memory; - if (allocate_device_array(context.device.cell_flags, scalar_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.u_flags, u_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.v_flags, v_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.w_flags, w_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.u_target, u_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.v_target, v_face_count(context.config)) != success) return out_of_memory; - if (allocate_device_array(context.device.w_target, w_face_count(context.config)) != success) return out_of_memory; - for (auto& field : context.fields) { - if (allocate_device_array(field.data, field_value_count(context.config, field.desc.component_count)) != success) return out_of_memory; + if (y < 0 || y >= ny) { + if (boundary.y != STABLE_FLUIDS_BOUNDARY_PERIODIC || ny <= 0) return 0.0f; + y %= ny; + if (y < 0) y += ny; } - return success; - } - - __device__ float clamp_world(const float value, const float max_value) { - return fminf(fmaxf(value, 0.0f), max_value); - } - - __device__ bool point_is_solid(const uint8_t* cell_flags, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h) { - const int ix = min(max(static_cast(floorf(x / h)), 0), nx - 1); - const int iy = min(max(static_cast(floorf(y / h)), 0), ny - 1); - const int iz = min(max(static_cast(floorf(z / h)), 0), nz - 1); - return cell_flags[index_3d(ix, iy, iz, nx, ny)] == cell_solid; - } - - __device__ bool point_inside_domain(const float3 point, const int nx, const int ny, const int nz, const float h) { - return point.x >= 0.0f && point.x <= static_cast(nx) * h && point.y >= 0.0f && point.y <= static_cast(ny) * h && point.z >= 0.0f && point.z <= static_cast(nz) * h; - } - - __device__ float3 clip_backtrace_to_fluid(const uint8_t* cell_flags, const float3 origin, const float3 target, const int nx, const int ny, const int nz, const float h) { - float3 lo = origin; - float3 hi = target; - if (!point_is_solid(cell_flags, hi.x, hi.y, hi.z, nx, ny, nz, h)) return hi; - for (int i = 0; i < 8; ++i) { - const float3 mid = make_float3(0.5f * (lo.x + hi.x), 0.5f * (lo.y + hi.y), 0.5f * (lo.z + hi.z)); - if (point_is_solid(cell_flags, mid.x, mid.y, mid.z, nx, ny, nz, h)) hi = mid; - else lo = mid; + if (z < 0 || z >= nz) { + if (boundary.z != STABLE_FLUIDS_BOUNDARY_PERIODIC || nz <= 0) return 0.0f; + z %= nz; + if (z < 0) z += nz; } - return lo; - } - - __device__ int wrap_index(const int value, const int size) { - const int mod = value % size; - return mod < 0 ? mod + size : mod; + return field[index_3d(x, y, z, nx, ny)]; } - struct ScalarAxisSample { - int i0; - int i1; - float t; - }; - - __device__ ScalarAxisSample resolve_scalar_axis(const float g, const int size, const uint32_t extension_mode) { - if (size <= 1) return { .i0 = 0, .i1 = 0, .t = 0.0f, }; - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_REPEAT) { - const int i0 = static_cast(floorf(g)); - const int i1 = i0 + 1; - return { - .i0 = wrap_index(i0, size), - .i1 = wrap_index(i1, size), - .t = g - static_cast(i0), - }; + __device__ float sample_linear(const float* field, float x, float y, float z, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { + const float extent_x = static_cast(nx) * h; + const float extent_y = static_cast(ny) * h; + const float extent_z = static_cast(nz) * h; + if (boundary.x == STABLE_FLUIDS_BOUNDARY_PERIODIC) { + x = extent_x <= 0.0f ? 0.0f : fmodf(x, extent_x); + if (x < 0.0f) x += extent_x; } - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_STREAK) { - const float clamped = fminf(fmaxf(g, 0.0f), static_cast(size - 1)); - const int i0 = static_cast(floorf(clamped)); - const int i1 = min(i0 + 1, size - 1); - return { - .i0 = i0, - .i1 = i1, - .t = clamped - static_cast(i0), - }; + if (boundary.y == STABLE_FLUIDS_BOUNDARY_PERIODIC) { + y = extent_y <= 0.0f ? 0.0f : fmodf(y, extent_y); + if (y < 0.0f) y += extent_y; } - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_EXTRAPOLATE) { - if (g <= 0.0f) return { .i0 = 0, .i1 = 1, .t = g, }; - if (g >= static_cast(size - 1)) return { .i0 = size - 2, .i1 = size - 1, .t = g - static_cast(size - 2), }; - const int i0 = static_cast(floorf(g)); - return { - .i0 = i0, - .i1 = i0 + 1, - .t = g - static_cast(i0), - }; + if (boundary.z == STABLE_FLUIDS_BOUNDARY_PERIODIC) { + z = extent_z <= 0.0f ? 0.0f : fmodf(z, extent_z); + if (z < 0.0f) z += extent_z; } - const int i0 = static_cast(floorf(g)); - const int i1 = i0 + 1; - return { - .i0 = i0, - .i1 = i1, - .t = g - static_cast(i0), - }; - } - __device__ float load_scalar_sample(const float* field, const uint8_t* cell_flags, int ix, int iy, int iz, const int nx, const int ny, const int nz, const uint32_t extension_mode, const float constant_value) { - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_REPEAT) { - ix = wrap_index(ix, nx); - iy = wrap_index(iy, ny); - iz = wrap_index(iz, nz); - } else if (ix < 0 || ix >= nx || iy < 0 || iy >= ny || iz < 0 || iz >= nz) { - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_CONSTANT) return constant_value; - ix = min(max(ix, 0), nx - 1); - iy = min(max(iy, 0), ny - 1); - iz = min(max(iz, 0), nz - 1); - } - if (cell_flags[index_3d(ix, iy, iz, nx, ny)] == cell_solid) return constant_value; - return field[index_3d(ix, iy, iz, nx, ny)]; - } - - __device__ float sample_scalar_field(const float* field, const uint8_t* cell_flags, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h, const uint32_t extension_mode, const float constant_value) { const float gx = x / h - 0.5f; const float gy = y / h - 0.5f; const float gz = z / h - 0.5f; - const ScalarAxisSample xs = resolve_scalar_axis(gx, nx, extension_mode); - const ScalarAxisSample ys = resolve_scalar_axis(gy, ny, extension_mode); - const ScalarAxisSample zs = resolve_scalar_axis(gz, nz, extension_mode); - const float c000 = load_scalar_sample(field, cell_flags, xs.i0, ys.i0, zs.i0, nx, ny, nz, extension_mode, constant_value); - const float c100 = load_scalar_sample(field, cell_flags, xs.i1, ys.i0, zs.i0, nx, ny, nz, extension_mode, constant_value); - const float c010 = load_scalar_sample(field, cell_flags, xs.i0, ys.i1, zs.i0, nx, ny, nz, extension_mode, constant_value); - const float c110 = load_scalar_sample(field, cell_flags, xs.i1, ys.i1, zs.i0, nx, ny, nz, extension_mode, constant_value); - const float c001 = load_scalar_sample(field, cell_flags, xs.i0, ys.i0, zs.i1, nx, ny, nz, extension_mode, constant_value); - const float c101 = load_scalar_sample(field, cell_flags, xs.i1, ys.i0, zs.i1, nx, ny, nz, extension_mode, constant_value); - const float c011 = load_scalar_sample(field, cell_flags, xs.i0, ys.i1, zs.i1, nx, ny, nz, extension_mode, constant_value); - const float c111 = load_scalar_sample(field, cell_flags, xs.i1, ys.i1, zs.i1, nx, ny, nz, extension_mode, constant_value); - const float c00 = c000 + (c100 - c000) * xs.t; - const float c10 = c010 + (c110 - c010) * xs.t; - const float c01 = c001 + (c101 - c001) * xs.t; - const float c11 = c011 + (c111 - c011) * xs.t; - const float c0 = c00 + (c10 - c00) * ys.t; - const float c1 = c01 + (c11 - c01) * ys.t; - return c0 + (c1 - c0) * zs.t; - } - - __device__ float load_u(const float* field, const uint8_t* flags, const float* target, const int x, const int y, const int z, const int nx, const int ny, const int nz) { - const int ix = min(max(x, 0), nx); - const int iy = min(max(y, 0), ny - 1); - const int iz = min(max(z, 0), nz - 1); - const auto index = index_3d(ix, iy, iz, nx + 1, ny); - return flags[index] == face_fixed ? target[index] : field[index]; - } - - __device__ float load_v(const float* field, const uint8_t* flags, const float* target, const int x, const int y, const int z, const int nx, const int ny, const int nz) { - const int ix = min(max(x, 0), nx - 1); - const int iy = min(max(y, 0), ny); - const int iz = min(max(z, 0), nz - 1); - const auto index = index_3d(ix, iy, iz, nx, ny + 1); - return flags[index] == face_fixed ? target[index] : field[index]; - } - - __device__ float load_w(const float* field, const uint8_t* flags, const float* target, const int x, const int y, const int z, const int nx, const int ny, const int nz) { - const int ix = min(max(x, 0), nx - 1); - const int iy = min(max(y, 0), ny - 1); - const int iz = min(max(z, 0), nz); - const auto index = index_3d(ix, iy, iz, nx, ny); - return flags[index] == face_fixed ? target[index] : field[index]; - } - __device__ float sample_u_field(const float* field, const uint8_t* flags, const float* target, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h) { - const float gx = clamp_world(x / h, static_cast(nx)); - const float gy = clamp_world(y / h - 0.5f, static_cast(ny - 1)); - const float gz = clamp_world(z / h - 0.5f, static_cast(nz - 1)); const int x0 = static_cast(floorf(gx)); const int y0 = static_cast(floorf(gy)); const int z0 = static_cast(floorf(gz)); - const int x1 = min(x0 + 1, nx); - const int y1 = min(y0 + 1, ny - 1); - const int z1 = min(z0 + 1, nz - 1); - const float tx = gx - static_cast(x0); - const float ty = gy - static_cast(y0); - const float tz = gz - static_cast(z0); - const float c000 = load_u(field, flags, target, x0, y0, z0, nx, ny, nz); - const float c100 = load_u(field, flags, target, x1, y0, z0, nx, ny, nz); - const float c010 = load_u(field, flags, target, x0, y1, z0, nx, ny, nz); - const float c110 = load_u(field, flags, target, x1, y1, z0, nx, ny, nz); - const float c001 = load_u(field, flags, target, x0, y0, z1, nx, ny, nz); - const float c101 = load_u(field, flags, target, x1, y0, z1, nx, ny, nz); - const float c011 = load_u(field, flags, target, x0, y1, z1, nx, ny, nz); - const float c111 = load_u(field, flags, target, x1, y1, z1, nx, ny, nz); - const float c00 = c000 + (c100 - c000) * tx; - const float c10 = c010 + (c110 - c010) * tx; - const float c01 = c001 + (c101 - c001) * tx; - const float c11 = c011 + (c111 - c011) * tx; - const float c0 = c00 + (c10 - c00) * ty; - const float c1 = c01 + (c11 - c01) * ty; - return c0 + (c1 - c0) * tz; - } + const int x1 = x0 + 1; + const int y1 = y0 + 1; + const int z1 = z0 + 1; - __device__ float sample_v_field(const float* field, const uint8_t* flags, const float* target, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h) { - const float gx = clamp_world(x / h - 0.5f, static_cast(nx - 1)); - const float gy = clamp_world(y / h, static_cast(ny)); - const float gz = clamp_world(z / h - 0.5f, static_cast(nz - 1)); - const int x0 = static_cast(floorf(gx)); - const int y0 = static_cast(floorf(gy)); - const int z0 = static_cast(floorf(gz)); - const int x1 = min(x0 + 1, nx - 1); - const int y1 = min(y0 + 1, ny); - const int z1 = min(z0 + 1, nz - 1); const float tx = gx - static_cast(x0); const float ty = gy - static_cast(y0); const float tz = gz - static_cast(z0); - const float c000 = load_v(field, flags, target, x0, y0, z0, nx, ny, nz); - const float c100 = load_v(field, flags, target, x1, y0, z0, nx, ny, nz); - const float c010 = load_v(field, flags, target, x0, y1, z0, nx, ny, nz); - const float c110 = load_v(field, flags, target, x1, y1, z0, nx, ny, nz); - const float c001 = load_v(field, flags, target, x0, y0, z1, nx, ny, nz); - const float c101 = load_v(field, flags, target, x1, y0, z1, nx, ny, nz); - const float c011 = load_v(field, flags, target, x0, y1, z1, nx, ny, nz); - const float c111 = load_v(field, flags, target, x1, y1, z1, nx, ny, nz); - const float c00 = c000 + (c100 - c000) * tx; - const float c10 = c010 + (c110 - c010) * tx; - const float c01 = c001 + (c101 - c001) * tx; - const float c11 = c011 + (c111 - c011) * tx; - const float c0 = c00 + (c10 - c00) * ty; - const float c1 = c01 + (c11 - c01) * ty; - return c0 + (c1 - c0) * tz; - } - __device__ float sample_w_field(const float* field, const uint8_t* flags, const float* target, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h) { - const float gx = clamp_world(x / h - 0.5f, static_cast(nx - 1)); - const float gy = clamp_world(y / h - 0.5f, static_cast(ny - 1)); - const float gz = clamp_world(z / h, static_cast(nz)); - const int x0 = static_cast(floorf(gx)); - const int y0 = static_cast(floorf(gy)); - const int z0 = static_cast(floorf(gz)); - const int x1 = min(x0 + 1, nx - 1); - const int y1 = min(y0 + 1, ny - 1); - const int z1 = min(z0 + 1, nz); - const float tx = gx - static_cast(x0); - const float ty = gy - static_cast(y0); - const float tz = gz - static_cast(z0); - const float c000 = load_w(field, flags, target, x0, y0, z0, nx, ny, nz); - const float c100 = load_w(field, flags, target, x1, y0, z0, nx, ny, nz); - const float c010 = load_w(field, flags, target, x0, y1, z0, nx, ny, nz); - const float c110 = load_w(field, flags, target, x1, y1, z0, nx, ny, nz); - const float c001 = load_w(field, flags, target, x0, y0, z1, nx, ny, nz); - const float c101 = load_w(field, flags, target, x1, y0, z1, nx, ny, nz); - const float c011 = load_w(field, flags, target, x0, y1, z1, nx, ny, nz); - const float c111 = load_w(field, flags, target, x1, y1, z1, nx, ny, nz); + const float c000 = load(field, x0, y0, z0, nx, ny, nz, boundary); + const float c100 = load(field, x1, y0, z0, nx, ny, nz, boundary); + const float c010 = load(field, x0, y1, z0, nx, ny, nz, boundary); + const float c110 = load(field, x1, y1, z0, nx, ny, nz, boundary); + const float c001 = load(field, x0, y0, z1, nx, ny, nz, boundary); + const float c101 = load(field, x1, y0, z1, nx, ny, nz, boundary); + const float c011 = load(field, x0, y1, z1, nx, ny, nz, boundary); + const float c111 = load(field, x1, y1, z1, nx, ny, nz, boundary); + const float c00 = c000 + (c100 - c000) * tx; const float c10 = c010 + (c110 - c010) * tx; const float c01 = c001 + (c101 - c001) * tx; const float c11 = c011 + (c111 - c011) * tx; - const float c0 = c00 + (c10 - c00) * ty; - const float c1 = c01 + (c11 - c01) * ty; + const float c0 = c00 + (c10 - c00) * ty; + const float c1 = c01 + (c11 - c01) * ty; return c0 + (c1 - c0) * tz; } - __device__ float3 sample_velocity_field(const float* u, const float* v, const float* w, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float* u_target, const float* v_target, const float* w_target, const float3 pos, const int nx, const int ny, const int nz, const float h) { - return make_float3( - sample_u_field(u, u_flags, u_target, pos.x, pos.y, pos.z, nx, ny, nz, h), - sample_v_field(v, v_flags, v_target, pos.x, pos.y, pos.z, nx, ny, nz, h), - sample_w_field(w, w_flags, w_target, pos.x, pos.y, pos.z, nx, ny, nz, h) - ); - } - - __device__ float clamp_u_outflow(const float value, const int x, const int nx) { - if (x == 0) return fminf(value, 0.0f); - if (x == nx) return fmaxf(value, 0.0f); - return value; - } - - __device__ float clamp_v_outflow(const float value, const int y, const int ny) { - if (y == 0) return fminf(value, 0.0f); - if (y == ny) return fmaxf(value, 0.0f); - return value; - } - - __device__ float clamp_w_outflow(const float value, const int z, const int nz) { - if (z == 0) return fminf(value, 0.0f); - if (z == nz) return fmaxf(value, 0.0f); - return value; - } - - __global__ void clear_field_kernel(float* field, const int components, const float value_0, const float value_1, const float value_2, const float value_3, const int nx, const int ny, const int nz) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x >= nx || y >= ny || z >= nz) return; - const auto cell_index = index_3d(x, y, z, nx, ny); - const auto cell_count_value = static_cast(nx) * static_cast(ny) * static_cast(nz); - if (components > 0) field[cell_index] = value_0; - if (components > 1) field[cell_count_value + cell_index] = value_1; - if (components > 2) field[cell_count_value * 2u + cell_index] = value_2; - if (components > 3) field[cell_count_value * 3u + cell_index] = value_3; - } - - __global__ void clear_velocity_fields_kernel(float* u, float* v, float* w, const int nx, const int ny, const int nz) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x <= nx && y < ny && z < nz) u[index_3d(x, y, z, nx + 1, ny)] = 0.0f; - if (x < nx && y <= ny && z < nz) v[index_3d(x, y, z, nx, ny + 1)] = 0.0f; - if (x < nx && y < ny && z <= nz) w[index_3d(x, y, z, nx, ny)] = 0.0f; + __device__ float3 sample_velocity(const float* velocity_x, const float* velocity_y, const float* velocity_z, const float x, const float y, const float z, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { + return make_float3(sample_linear(velocity_x, x, y, z, nx, ny, nz, h, boundary), sample_linear(velocity_y, x, y, z, nx, ny, nz, h, boundary), sample_linear(velocity_z, x, y, z, nx, ny, nz, h, boundary)); } - __global__ void apply_face_constraints_kernel(float* u, float* v, float* w, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float* u_target, const float* v_target, const float* w_target, const int nx, const int ny, const int nz) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x <= nx && y < ny && z < nz) { - const auto index = index_3d(x, y, z, nx + 1, ny); - if (u_flags[index] == face_fixed) u[index] = u_target[index]; - else if (u_flags[index] == face_outflow) u[index] = clamp_u_outflow(u[index], x, nx); - } - if (x < nx && y <= ny && z < nz) { - const auto index = index_3d(x, y, z, nx, ny + 1); - if (v_flags[index] == face_fixed) v[index] = v_target[index]; - else if (v_flags[index] == face_outflow) v[index] = clamp_v_outflow(v[index], y, ny); - } - if (x < nx && y < ny && z <= nz) { - const auto index = index_3d(x, y, z, nx, ny); - if (w_flags[index] == face_fixed) w[index] = w_target[index]; - else if (w_flags[index] == face_outflow) w[index] = clamp_w_outflow(w[index], z, nz); - } + __device__ float3 trace_particle_rk2(const float x, const float y, const float z, const float* velocity_x, const float* velocity_y, const float* velocity_z, const float dt, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { + const float3 velocity_0 = sample_velocity(velocity_x, velocity_y, velocity_z, x, y, z, nx, ny, nz, h, boundary); + const float3 mid = make_float3(x - 0.5f * dt * velocity_0.x, y - 0.5f * dt * velocity_0.y, z - 0.5f * dt * velocity_0.z); + const float3 velocity_1 = sample_velocity(velocity_x, velocity_y, velocity_z, mid.x, mid.y, mid.z, nx, ny, nz, h, boundary); + return make_float3(x - dt * velocity_1.x, y - dt * velocity_1.y, z - dt * velocity_1.z); } - __global__ void clear_solid_cells_kernel(float* field, const int components, const uint8_t* cell_flags, const int nx, const int ny, const int nz) { + __global__ void fill_kernel(float* field, const float value, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto cell_index = index_3d(x, y, z, nx, ny); - if (cell_flags[cell_index] != cell_solid) return; - const auto cell_count_value = static_cast(nx) * static_cast(ny) * static_cast(nz); - for (int component = 0; component < components; ++component) field[static_cast(component) * cell_count_value + cell_index] = 0.0f; - } - - __global__ void add_velocity_source_kernel(float* u, float* v, float* w, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float center_x, const float center_y, const float center_z, const float radius, const float velocity_x, const float velocity_y, const float velocity_z, const int nx, const int ny, const int nz, const float h) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - const float radius2 = radius * radius; - if (x <= nx && y < ny && z < nz && u_flags[index_3d(x, y, z, nx + 1, ny)] != face_fixed) { - const float px = static_cast(x) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const float dx = px - center_x; - const float dy = py - center_y; - const float dz = pz - center_z; - const float distance2 = dx * dx + dy * dy + dz * dz; - if (distance2 <= radius2) { - const float weight = fmaxf(0.0f, 1.0f - distance2 / radius2); - const auto index = index_3d(x, y, z, nx + 1, ny); - u[index] = u[index] * (1.0f - weight) + velocity_x * weight; - } - } - - if (x < nx && y <= ny && z < nz && v_flags[index_3d(x, y, z, nx, ny + 1)] != face_fixed) { - const float px = (static_cast(x) + 0.5f) * h; - const float py = static_cast(y) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const float dx = px - center_x; - const float dy = py - center_y; - const float dz = pz - center_z; - const float distance2 = dx * dx + dy * dy + dz * dz; - if (distance2 <= radius2) { - const float weight = fmaxf(0.0f, 1.0f - distance2 / radius2); - const auto index = index_3d(x, y, z, nx, ny + 1); - v[index] = v[index] * (1.0f - weight) + velocity_y * weight; - } - } - - if (x < nx && y < ny && z <= nz && w_flags[index_3d(x, y, z, nx, ny)] != face_fixed) { - const float px = (static_cast(x) + 0.5f) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = static_cast(z) * h; - const float dx = px - center_x; - const float dy = py - center_y; - const float dz = pz - center_z; - const float distance2 = dx * dx + dy * dy + dz * dz; - if (distance2 <= radius2) { - const float weight = fmaxf(0.0f, 1.0f - distance2 / radius2); - const auto index = index_3d(x, y, z, nx, ny); - w[index] = w[index] * (1.0f - weight) + velocity_z * weight; - } - } + field[index_3d(x, y, z, nx, ny)] = value; } - __global__ void add_field_source_kernel(float* field, const int components, const uint8_t* cell_flags, const float center_x, const float center_y, const float center_z, const float radius, const float value_0, const float value_1, const float value_2, const float value_3, const int nx, const int ny, const int nz, const float h) { + __global__ void add_force_kernel(float* velocity_x, float* velocity_y, float* velocity_z, const float* force_x, const float* force_y, const float* force_z, const float dt, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) return; - const float px = (static_cast(x) + 0.5f) * h; - const float py = (static_cast(y) + 0.5f) * h; - const float pz = (static_cast(z) + 0.5f) * h; - const float dx = px - center_x; - const float dy = py - center_y; - const float dz = pz - center_z; - const float distance2 = dx * dx + dy * dy + dz * dz; - const float radius2 = radius * radius; - if (distance2 > radius2) return; - const float weight = fmaxf(0.0f, 1.0f - distance2 / radius2); - const auto cell_count_value = static_cast(nx) * static_cast(ny) * static_cast(nz); - if (components > 0) field[index] = field[index] * (1.0f - weight) + value_0 * weight; - if (components > 1) field[cell_count_value + index] = field[cell_count_value + index] * (1.0f - weight) + value_1 * weight; - if (components > 2) field[cell_count_value * 2u + index] = field[cell_count_value * 2u + index] * (1.0f - weight) + value_2 * weight; - if (components > 3) field[cell_count_value * 3u + index] = field[cell_count_value * 3u + index] * (1.0f - weight) + value_3 * weight; - } - - __global__ void add_uniform_forces_kernel(float* u, float* v, float* w, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const int nx, const int ny, const int nz, const float dt, const float uniform_force_x, const float uniform_force_y, const float uniform_force_z) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - - if (x > 0 && x < nx && y < ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx + 1, ny); - if (u_flags[face_index] != face_fixed) u[face_index] += dt * uniform_force_x; - } - - if (x < nx && y > 0 && y < ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx, ny + 1); - if (v_flags[face_index] != face_fixed) v[face_index] += dt * uniform_force_y; - } - - if (x < nx && y < ny && z > 0 && z < nz) { - const auto face_index = index_3d(x, y, z, nx, ny); - if (w_flags[face_index] != face_fixed) w[face_index] += dt * uniform_force_z; - } - } - - __global__ void add_buoyancy_kernel(float* v, const float* field, const uint8_t* v_flags, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float dt, const float weight, const float ambient) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x >= nx || y <= 0 || y >= ny || z >= nz) return; - const auto face_index = index_3d(x, y, z, nx, ny + 1); - if (v_flags[face_index] == face_fixed) return; - const auto below = index_3d(x, y - 1, z, nx, ny); - const auto above = index_3d(x, y, z, nx, ny); - if (cell_flags[below] == cell_solid || cell_flags[above] == cell_solid) return; - const float averaged = 0.5f * (field[below] + field[above]); - v[face_index] += dt * weight * (averaged - ambient); - } - - __global__ void advect_velocity_kernel(float* u_dst, float* v_dst, float* w_dst, const float* u_src, const float* v_src, const float* w_src, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float* u_target, const float* v_target, const float* w_target, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float h, const float dt) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - - if (x <= nx && y < ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx + 1, ny); - if (u_flags[face_index] == face_fixed) u_dst[face_index] = u_target[face_index]; - else { - const float3 pos = make_float3(static_cast(x) * h, (static_cast(y) + 0.5f) * h, (static_cast(z) + 0.5f) * h); - const float3 vel = sample_velocity_field(u_src, v_src, w_src, u_flags, v_flags, w_flags, u_target, v_target, w_target, pos, nx, ny, nz, h); - float3 back = make_float3(clamp_world(pos.x - dt * vel.x, static_cast(nx) * h), clamp_world(pos.y - dt * vel.y, static_cast(ny) * h), clamp_world(pos.z - dt * vel.z, static_cast(nz) * h)); - back = clip_backtrace_to_fluid(cell_flags, pos, back, nx, ny, nz, h); - u_dst[face_index] = sample_u_field(u_src, u_flags, u_target, back.x, back.y, back.z, nx, ny, nz, h); - } - } - - if (x < nx && y <= ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx, ny + 1); - if (v_flags[face_index] == face_fixed) v_dst[face_index] = v_target[face_index]; - else { - const float3 pos = make_float3((static_cast(x) + 0.5f) * h, static_cast(y) * h, (static_cast(z) + 0.5f) * h); - const float3 vel = sample_velocity_field(u_src, v_src, w_src, u_flags, v_flags, w_flags, u_target, v_target, w_target, pos, nx, ny, nz, h); - float3 back = make_float3(clamp_world(pos.x - dt * vel.x, static_cast(nx) * h), clamp_world(pos.y - dt * vel.y, static_cast(ny) * h), clamp_world(pos.z - dt * vel.z, static_cast(nz) * h)); - back = clip_backtrace_to_fluid(cell_flags, pos, back, nx, ny, nz, h); - v_dst[face_index] = sample_v_field(v_src, v_flags, v_target, back.x, back.y, back.z, nx, ny, nz, h); - } - } - - if (x < nx && y < ny && z <= nz) { - const auto face_index = index_3d(x, y, z, nx, ny); - if (w_flags[face_index] == face_fixed) w_dst[face_index] = w_target[face_index]; - else { - const float3 pos = make_float3((static_cast(x) + 0.5f) * h, (static_cast(y) + 0.5f) * h, static_cast(z) * h); - const float3 vel = sample_velocity_field(u_src, v_src, w_src, u_flags, v_flags, w_flags, u_target, v_target, w_target, pos, nx, ny, nz, h); - float3 back = make_float3(clamp_world(pos.x - dt * vel.x, static_cast(nx) * h), clamp_world(pos.y - dt * vel.y, static_cast(ny) * h), clamp_world(pos.z - dt * vel.z, static_cast(nz) * h)); - back = clip_backtrace_to_fluid(cell_flags, pos, back, nx, ny, nz, h); - w_dst[face_index] = sample_w_field(w_src, w_flags, w_target, back.x, back.y, back.z, nx, ny, nz, h); - } - } + if (force_x != nullptr) velocity_x[index] += dt * force_x[index]; + if (force_y != nullptr) velocity_y[index] += dt * force_y[index]; + if (force_z != nullptr) velocity_z[index] += dt * force_z[index]; } - __global__ void advect_scalar_kernel(float* dst, const float* src, const float* u, const float* v, const float* w, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float* u_target, const float* v_target, const float* w_target, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float h, const float dt, const uint32_t extension_mode, const float constant_value) { + __global__ void add_field_source_kernel(float* field, const float* source, const float dt, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - dst[index] = 0.0f; - return; - } - const float3 pos = make_float3((static_cast(x) + 0.5f) * h, (static_cast(y) + 0.5f) * h, (static_cast(z) + 0.5f) * h); - const float3 vel = sample_velocity_field(u, v, w, u_flags, v_flags, w_flags, u_target, v_target, w_target, pos, nx, ny, nz, h); - const float3 raw_back = make_float3(pos.x - dt * vel.x, pos.y - dt * vel.y, pos.z - dt * vel.z); - float3 back = raw_back; - if (extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_CONSTANT || extension_mode == STABLE_FLUIDS_FIELD_EXTENSION_STREAK) { - back = make_float3(clamp_world(back.x, static_cast(nx) * h), clamp_world(back.y, static_cast(ny) * h), clamp_world(back.z, static_cast(nz) * h)); - } - if (point_inside_domain(back, nx, ny, nz, h)) back = clip_backtrace_to_fluid(cell_flags, pos, back, nx, ny, nz, h); - dst[index] = sample_scalar_field(src, cell_flags, back.x, back.y, back.z, nx, ny, nz, h, extension_mode, constant_value); - } - - __global__ void diffuse_scalar_rbgs_kernel(float* dst, const float* src, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float alpha, const uint32_t extension_mode, const float constant_value, const int parity) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x >= nx || y >= ny || z >= nz || ((x + y + z) & 1) != parity) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - dst[index] = 0.0f; - return; - } - const float left = load_scalar_sample(dst, cell_flags, x - 1, y, z, nx, ny, nz, extension_mode, constant_value); - const float right = load_scalar_sample(dst, cell_flags, x + 1, y, z, nx, ny, nz, extension_mode, constant_value); - const float down = load_scalar_sample(dst, cell_flags, x, y - 1, z, nx, ny, nz, extension_mode, constant_value); - const float up = load_scalar_sample(dst, cell_flags, x, y + 1, z, nx, ny, nz, extension_mode, constant_value); - const float back = load_scalar_sample(dst, cell_flags, x, y, z - 1, nx, ny, nz, extension_mode, constant_value); - const float front = load_scalar_sample(dst, cell_flags, x, y, z + 1, nx, ny, nz, extension_mode, constant_value); - dst[index] = (src[index] + alpha * (left + right + down + up + back + front)) / (1.0f + 6.0f * alpha); - } - - __global__ void diffuse_velocity_rbgs_kernel(float* dst, const float* src, const uint8_t* flags, const float* target, const int sx, const int sy, const int sz, const float alpha, const int parity) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x >= sx || y >= sy || z >= sz || ((x + y + z) & 1) != parity) return; - const auto index = index_3d(x, y, z, sx, sy); - if (flags[index] == face_fixed) { - dst[index] = target[index]; - return; - } - - const float center = dst[index]; - const float left = x > 0 ? (flags[index_3d(x - 1, y, z, sx, sy)] == face_fixed ? target[index_3d(x - 1, y, z, sx, sy)] : dst[index_3d(x - 1, y, z, sx, sy)]) : center; - const float right = x + 1 < sx ? (flags[index_3d(x + 1, y, z, sx, sy)] == face_fixed ? target[index_3d(x + 1, y, z, sx, sy)] : dst[index_3d(x + 1, y, z, sx, sy)]) : center; - const float down = y > 0 ? (flags[index_3d(x, y - 1, z, sx, sy)] == face_fixed ? target[index_3d(x, y - 1, z, sx, sy)] : dst[index_3d(x, y - 1, z, sx, sy)]) : center; - const float up = y + 1 < sy ? (flags[index_3d(x, y + 1, z, sx, sy)] == face_fixed ? target[index_3d(x, y + 1, z, sx, sy)] : dst[index_3d(x, y + 1, z, sx, sy)]) : center; - const float back = z > 0 ? (flags[index_3d(x, y, z - 1, sx, sy)] == face_fixed ? target[index_3d(x, y, z - 1, sx, sy)] : dst[index_3d(x, y, z - 1, sx, sy)]) : center; - const float front = z + 1 < sz ? (flags[index_3d(x, y, z + 1, sx, sy)] == face_fixed ? target[index_3d(x, y, z + 1, sx, sy)] : dst[index_3d(x, y, z + 1, sx, sy)]) : center; - dst[index] = (src[index] + alpha * (left + right + down + up + back + front)) / (1.0f + 6.0f * alpha); + field[index] += dt * source[index]; } - __global__ void compute_divergence_kernel(float* divergence, const float* u, const float* v, const float* w, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float inv_h) { + __global__ void advect_component_kernel(float* destination, const float* source, const float* velocity_x, const float* velocity_y, const float* velocity_z, const float dt, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - divergence[index] = 0.0f; - return; - } - divergence[index] = (u[index_3d(x + 1, y, z, nx + 1, ny)] - u[index_3d(x, y, z, nx + 1, ny)] + v[index_3d(x, y + 1, z, nx, ny + 1)] - v[index_3d(x, y, z, nx, ny + 1)] + w[index_3d(x, y, z + 1, nx, ny)] - w[index_3d(x, y, z, nx, ny)]) * inv_h; + const float px = (static_cast(x) + 0.5f) * h; + const float py = (static_cast(y) + 0.5f) * h; + const float pz = (static_cast(z) + 0.5f) * h; + const float3 traced = trace_particle_rk2(px, py, pz, velocity_x, velocity_y, velocity_z, dt, nx, ny, nz, h, boundary); + destination[index_3d(x, y, z, nx, ny)] = sample_linear(source, traced.x, traced.y, traced.z, nx, ny, nz, h, boundary); } - __global__ void accumulate_projection_metrics_kernel(ProjectionMetricsState* metrics, const float* u, const float* v, const float* w, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const float inv_h) { + __global__ void diffuse_rbgs_kernel(float* destination, const float* source, const float alpha, const int parity, const int nx, const int ny, const int nz, const StableFluidsBoundaryConfig boundary) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) return; - const float value = fabsf((u[index_3d(x + 1, y, z, nx + 1, ny)] - u[index_3d(x, y, z, nx + 1, ny)] + v[index_3d(x, y + 1, z, nx, ny + 1)] - v[index_3d(x, y, z, nx, ny + 1)] + w[index_3d(x, y, z + 1, nx, ny)] - w[index_3d(x, y, z, nx, ny)]) * inv_h); - atomicMax(reinterpret_cast(&metrics->max_abs_divergence), __float_as_uint(value)); - atomicAdd(&metrics->sum_sq_divergence, value * value); - atomicAdd(&metrics->fluid_cell_count, 1u); - } - - __global__ void pressure_rbgs_kernel(float* pressure, const float* divergence, const uint8_t* cell_flags, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const int nx, const int ny, const int nz, const float h2, const int parity) { - const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); - const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); - const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - if (x >= nx || y >= ny || z >= nz || ((x + y + z) & 1) != parity) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - pressure[index] = 0.0f; - return; - } - - float sum = 0.0f; - int diag = 0; - - if (u_flags[index_3d(x, y, z, nx + 1, ny)] == face_open) { - ++diag; - if (x > 0 && cell_flags[index_3d(x - 1, y, z, nx, ny)] != cell_solid) sum += pressure[index_3d(x - 1, y, z, nx, ny)]; - } else if (u_flags[index_3d(x, y, z, nx + 1, ny)] == face_outflow) { - ++diag; - } - if (u_flags[index_3d(x + 1, y, z, nx + 1, ny)] == face_open) { - ++diag; - if (x + 1 < nx && cell_flags[index_3d(x + 1, y, z, nx, ny)] != cell_solid) sum += pressure[index_3d(x + 1, y, z, nx, ny)]; - } else if (u_flags[index_3d(x + 1, y, z, nx + 1, ny)] == face_outflow) { - ++diag; - } - if (v_flags[index_3d(x, y, z, nx, ny + 1)] == face_open) { - ++diag; - if (y > 0 && cell_flags[index_3d(x, y - 1, z, nx, ny)] != cell_solid) sum += pressure[index_3d(x, y - 1, z, nx, ny)]; - } else if (v_flags[index_3d(x, y, z, nx, ny + 1)] == face_outflow) { - ++diag; - } - if (v_flags[index_3d(x, y + 1, z, nx, ny + 1)] == face_open) { - ++diag; - if (y + 1 < ny && cell_flags[index_3d(x, y + 1, z, nx, ny)] != cell_solid) sum += pressure[index_3d(x, y + 1, z, nx, ny)]; - } else if (v_flags[index_3d(x, y + 1, z, nx, ny + 1)] == face_outflow) { - ++diag; - } - if (w_flags[index_3d(x, y, z, nx, ny)] == face_open) { - ++diag; - if (z > 0 && cell_flags[index_3d(x, y, z - 1, nx, ny)] != cell_solid) sum += pressure[index_3d(x, y, z - 1, nx, ny)]; - } else if (w_flags[index_3d(x, y, z, nx, ny)] == face_outflow) { - ++diag; - } - if (w_flags[index_3d(x, y, z + 1, nx, ny)] == face_open) { - ++diag; - if (z + 1 < nz && cell_flags[index_3d(x, y, z + 1, nx, ny)] != cell_solid) sum += pressure[index_3d(x, y, z + 1, nx, ny)]; - } else if (w_flags[index_3d(x, y, z + 1, nx, ny)] == face_outflow) { - ++diag; - } - - pressure[index] = diag > 0 ? (sum - divergence[index] * h2) / static_cast(diag) : 0.0f; + if (((x + y + z) & 1) != parity) return; + const float neighbors = load(destination, x - 1, y, z, nx, ny, nz, boundary) + load(destination, x + 1, y, z, nx, ny, nz, boundary) + load(destination, x, y - 1, z, nx, ny, nz, boundary) + load(destination, x, y + 1, z, nx, ny, nz, boundary) + load(destination, x, y, z - 1, nx, ny, nz, boundary) + load(destination, x, y, z + 1, nx, ny, nz, boundary); + const auto index = index_3d(x, y, z, nx, ny); + destination[index] = (source[index] + alpha * neighbors) / (1.0f + 6.0f * alpha); } - __global__ void project_velocity_kernel(float* u, float* v, float* w, const float* pressure, const uint8_t* cell_flags, const uint8_t* u_flags, const uint8_t* v_flags, const uint8_t* w_flags, const float* u_target, const float* v_target, const float* w_target, const int nx, const int ny, const int nz, const float inv_h) { + __global__ void dissipate_kernel(float* destination, const float* source, const float factor, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); - - if (x <= nx && y < ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx + 1, ny); - if (u_flags[face_index] == face_fixed) u[face_index] = u_target[face_index]; - else if (u_flags[face_index] == face_outflow) { - if (x == 0 && cell_flags[index_3d(0, y, z, nx, ny)] != cell_solid) u[face_index] -= pressure[index_3d(0, y, z, nx, ny)] * inv_h; - else if (x == nx && cell_flags[index_3d(nx - 1, y, z, nx, ny)] != cell_solid) u[face_index] += pressure[index_3d(nx - 1, y, z, nx, ny)] * inv_h; - } - else if (u_flags[face_index] == face_open && x > 0 && x < nx && cell_flags[index_3d(x - 1, y, z, nx, ny)] != cell_solid && cell_flags[index_3d(x, y, z, nx, ny)] != cell_solid) u[face_index] -= (pressure[index_3d(x, y, z, nx, ny)] - pressure[index_3d(x - 1, y, z, nx, ny)]) * inv_h; - } - - if (x < nx && y <= ny && z < nz) { - const auto face_index = index_3d(x, y, z, nx, ny + 1); - if (v_flags[face_index] == face_fixed) v[face_index] = v_target[face_index]; - else if (v_flags[face_index] == face_outflow) { - if (y == 0 && cell_flags[index_3d(x, 0, z, nx, ny)] != cell_solid) v[face_index] -= pressure[index_3d(x, 0, z, nx, ny)] * inv_h; - else if (y == ny && cell_flags[index_3d(x, ny - 1, z, nx, ny)] != cell_solid) v[face_index] += pressure[index_3d(x, ny - 1, z, nx, ny)] * inv_h; - } - else if (v_flags[face_index] == face_open && y > 0 && y < ny && cell_flags[index_3d(x, y - 1, z, nx, ny)] != cell_solid && cell_flags[index_3d(x, y, z, nx, ny)] != cell_solid) v[face_index] -= (pressure[index_3d(x, y, z, nx, ny)] - pressure[index_3d(x, y - 1, z, nx, ny)]) * inv_h; - } - - if (x < nx && y < ny && z <= nz) { - const auto face_index = index_3d(x, y, z, nx, ny); - if (w_flags[face_index] == face_fixed) w[face_index] = w_target[face_index]; - else if (w_flags[face_index] == face_outflow) { - if (z == 0 && cell_flags[index_3d(x, y, 0, nx, ny)] != cell_solid) w[face_index] -= pressure[index_3d(x, y, 0, nx, ny)] * inv_h; - else if (z == nz && cell_flags[index_3d(x, y, nz - 1, nx, ny)] != cell_solid) w[face_index] += pressure[index_3d(x, y, nz - 1, nx, ny)] * inv_h; - } - else if (w_flags[face_index] == face_open && z > 0 && z < nz && cell_flags[index_3d(x, y, z - 1, nx, ny)] != cell_solid && cell_flags[index_3d(x, y, z, nx, ny)] != cell_solid) w[face_index] -= (pressure[index_3d(x, y, z, nx, ny)] - pressure[index_3d(x, y, z - 1, nx, ny)]) * inv_h; - } + if (x >= nx || y >= ny || z >= nz) return; + const auto index = index_3d(x, y, z, nx, ny); + destination[index] = source[index] * factor; } - __global__ void compute_velocity_magnitude_kernel(float* destination, const float* u, const float* v, const float* w, const uint8_t* cell_flags, const int nx, const int ny, const int nz) { + __global__ void compute_divergence_kernel(float* divergence, const float* velocity_x, const float* velocity_y, const float* velocity_z, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - destination[index] = 0.0f; - return; - } - const float ux = 0.5f * (u[index_3d(x, y, z, nx + 1, ny)] + u[index_3d(x + 1, y, z, nx + 1, ny)]); - const float vy = 0.5f * (v[index_3d(x, y, z, nx, ny + 1)] + v[index_3d(x, y + 1, z, nx, ny + 1)]); - const float wz = 0.5f * (w[index_3d(x, y, z, nx, ny)] + w[index_3d(x, y, z + 1, nx, ny)]); - destination[index] = sqrtf(ux * ux + vy * vy + wz * wz); + const float inv_2h = 0.5f / h; + const float ddx = (load(velocity_x, x + 1, y, z, nx, ny, nz, boundary) - load(velocity_x, x - 1, y, z, nx, ny, nz, boundary)) * inv_2h; + const float ddy = (load(velocity_y, x, y + 1, z, nx, ny, nz, boundary) - load(velocity_y, x, y - 1, z, nx, ny, nz, boundary)) * inv_2h; + const float ddz = (load(velocity_z, x, y, z + 1, nx, ny, nz, boundary) - load(velocity_z, x, y, z - 1, nx, ny, nz, boundary)) * inv_2h; + divergence[index_3d(x, y, z, nx, ny)] = ddx + ddy + ddz; } - __global__ void export_velocity_kernel(float* destination, const float* u, const float* v, const float* w, const uint8_t* cell_flags, const int nx, const int ny, const int nz) { + __global__ void pressure_rbgs_kernel(float* pressure, const float* divergence, const int parity, const int nx, const int ny, const int nz, const float h2, const StableFluidsBoundaryConfig boundary) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - const auto base = index * 3ull; - if (cell_flags[index] == cell_solid) { - destination[base + 0] = 0.0f; - destination[base + 1] = 0.0f; - destination[base + 2] = 0.0f; - return; - } - destination[base + 0] = 0.5f * (u[index_3d(x, y, z, nx + 1, ny)] + u[index_3d(x + 1, y, z, nx + 1, ny)]); - destination[base + 1] = 0.5f * (v[index_3d(x, y, z, nx, ny + 1)] + v[index_3d(x, y + 1, z, nx, ny + 1)]); - destination[base + 2] = 0.5f * (w[index_3d(x, y, z, nx, ny)] + w[index_3d(x, y, z + 1, nx, ny)]); + if (((x + y + z) & 1) != parity) return; + const float neighbors = load(pressure, x - 1, y, z, nx, ny, nz, boundary) + load(pressure, x + 1, y, z, nx, ny, nz, boundary) + load(pressure, x, y - 1, z, nx, ny, nz, boundary) + load(pressure, x, y + 1, z, nx, ny, nz, boundary) + load(pressure, x, y, z - 1, nx, ny, nz, boundary) + load(pressure, x, y, z + 1, nx, ny, nz, boundary); + const auto index = index_3d(x, y, z, nx, ny); + pressure[index] = (neighbors - h2 * divergence[index]) / 6.0f; } - __global__ void export_solid_mask_kernel(float* destination, const uint8_t* cell_flags, const int nx, const int ny, const int nz) { + __global__ void project_velocity_kernel(float* destination_x, float* destination_y, float* destination_z, const float* source_x, const float* source_y, const float* source_z, const float* pressure, const int nx, const int ny, const int nz, const float h, const StableFluidsBoundaryConfig boundary) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - destination[index] = cell_flags[index] == cell_solid ? 1.0f : 0.0f; + const float inv_2h = 0.5f / h; + const auto index = index_3d(x, y, z, nx, ny); + const float grad_x = (load(pressure, x + 1, y, z, nx, ny, nz, boundary) - load(pressure, x - 1, y, z, nx, ny, nz, boundary)) * inv_2h; + const float grad_y = (load(pressure, x, y + 1, z, nx, ny, nz, boundary) - load(pressure, x, y - 1, z, nx, ny, nz, boundary)) * inv_2h; + const float grad_z = (load(pressure, x, y, z + 1, nx, ny, nz, boundary) - load(pressure, x, y, z - 1, nx, ny, nz, boundary)) * inv_2h; + destination_x[index] = source_x[index] - grad_x; + destination_y[index] = source_y[index] - grad_y; + destination_z[index] = source_z[index] - grad_z; } - __global__ void export_field_components_kernel(float* destination, const float* field, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const int total_components, const int component_offset, const int export_components) { + __global__ void pack_velocity_kernel(float* destination, const float* velocity_x, const float* velocity_y, const float* velocity_z, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - if (cell_flags[index] == cell_solid) { - for (int component = 0; component < export_components; ++component) destination[index * static_cast(export_components) + static_cast(component)] = 0.0f; - return; - } - const auto cell_count_value = static_cast(nx) * static_cast(ny) * static_cast(nz); - for (int component = 0; component < export_components; ++component) { - const auto src_component = static_cast(component_offset + component); - destination[index * static_cast(export_components) + static_cast(component)] = src_component < static_cast(total_components) ? field[src_component * cell_count_value + index] : 0.0f; - } + const auto index = index_3d(x, y, z, nx, ny); + const auto cell_count = static_cast(nx) * static_cast(ny) * static_cast(nz); + destination[index] = velocity_x[index]; + destination[cell_count + index] = velocity_y[index]; + destination[cell_count * 2u + index] = velocity_z[index]; } - __global__ void pack_alpha_rgb_rgba_kernel(float* destination, const float* alpha_field, const float* rgb_field, const uint8_t* cell_flags, const int nx, const int ny, const int nz, const int rgb_components) { + __global__ void velocity_magnitude_kernel(float* destination, const float* velocity_x, const float* velocity_y, const float* velocity_z, const int nx, const int ny, const int nz) { const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); if (x >= nx || y >= ny || z >= nz) return; - const auto index = index_3d(x, y, z, nx, ny); - const auto base = index * 4ull; - if (cell_flags[index] == cell_solid) { - destination[base + 0] = 0.0f; - destination[base + 1] = 0.0f; - destination[base + 2] = 0.0f; - destination[base + 3] = 0.0f; - return; + const auto index = index_3d(x, y, z, nx, ny); + const float vx = velocity_x[index]; + const float vy = velocity_y[index]; + const float vz = velocity_z[index]; + destination[index] = sqrtf(vx * vx + vy * vy + vz * vz); + } + + void destroy_context_buffers(ContextStorage& context) { + if (context.device.velocity_x != nullptr) cudaFree(context.device.velocity_x); + if (context.device.velocity_y != nullptr) cudaFree(context.device.velocity_y); + if (context.device.velocity_z != nullptr) cudaFree(context.device.velocity_z); + if (context.device.temp_velocity_x != nullptr) cudaFree(context.device.temp_velocity_x); + if (context.device.temp_velocity_y != nullptr) cudaFree(context.device.temp_velocity_y); + if (context.device.temp_velocity_z != nullptr) cudaFree(context.device.temp_velocity_z); + if (context.device.pressure != nullptr) cudaFree(context.device.pressure); + if (context.device.divergence != nullptr) cudaFree(context.device.divergence); + context.device.velocity_x = nullptr; + context.device.velocity_y = nullptr; + context.device.velocity_z = nullptr; + context.device.temp_velocity_x = nullptr; + context.device.temp_velocity_y = nullptr; + context.device.temp_velocity_z = nullptr; + context.device.pressure = nullptr; + context.device.divergence = nullptr; + for (auto& field : context.fields) { + if (field.data != nullptr) cudaFree(field.data); + if (field.temp != nullptr) cudaFree(field.temp); + field.data = nullptr; + field.temp = nullptr; } - const auto cell_count_value = static_cast(nx) * static_cast(ny) * static_cast(nz); - destination[base + 0] = alpha_field[index]; - destination[base + 1] = rgb_components > 0 ? rgb_field[index] : 0.0f; - destination[base + 2] = rgb_components > 1 ? rgb_field[cell_count_value + index] : 0.0f; - destination[base + 3] = rgb_components > 2 ? rgb_field[cell_count_value * 2u + index] : 0.0f; } } // namespace stable_fluids -struct StableFluidsContext_t : stable_fluids::ContextStorage { -}; - -namespace { - - stable_fluids::ContextStorage* as_storage(StableFluidsContext context) { - return static_cast(context); - } - - StableFluidsResult clear_projection_state(stable_fluids::ContextStorage& context) { - if (cudaMemsetAsync(context.device.pressure, 0, stable_fluids::scalar_count(context.config) * sizeof(float), context.stream) != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemsetAsync(context.device.divergence, 0, stable_fluids::scalar_count(context.config) * sizeof(float), context.stream) != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemsetAsync(context.device.projection_metrics, 0, sizeof(stable_fluids::ProjectionMetricsState), context.stream) != cudaSuccess) return stable_fluids::backend_failure; - return stable_fluids::success; - } - - StableFluidsResult apply_velocity_constraints(stable_fluids::ContextStorage& context, const stable_fluids::LaunchGeometry& launch) { - stable_fluids::apply_face_constraints_kernel<<>>( - context.device.velocity_x, - context.device.velocity_y, - context.device.velocity_z, - context.device.u_flags, - context.device.v_flags, - context.device.w_flags, - context.device.u_target, - context.device.v_target, - context.device.w_target, - context.config.nx, - context.config.ny, - context.config.nz - ); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; - } - - StableFluidsResult copy_velocity_fields( - stable_fluids::ContextStorage& context, - float* dst_x, - float* dst_y, - float* dst_z, - const float* src_x, - const float* src_y, - const float* src_z - ) { - const auto u_bytes = stable_fluids::u_face_count(context.config) * sizeof(float); - const auto v_bytes = stable_fluids::v_face_count(context.config) * sizeof(float); - const auto w_bytes = stable_fluids::w_face_count(context.config) * sizeof(float); - if (cudaMemcpyAsync(dst_x, src_x, u_bytes, cudaMemcpyDeviceToDevice, context.stream) != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemcpyAsync(dst_y, src_y, v_bytes, cudaMemcpyDeviceToDevice, context.stream) != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemcpyAsync(dst_z, src_z, w_bytes, cudaMemcpyDeviceToDevice, context.stream) != cudaSuccess) return stable_fluids::backend_failure; - return stable_fluids::success; - } - - StableFluidsResult rebuild_atlas_if_needed(stable_fluids::ContextStorage& context) { - if (!context.atlas_dirty) return stable_fluids::success; - const StableFluidsResult build_code = stable_fluids::build_boundary_atlas(context); - if (build_code != stable_fluids::success) return build_code; - const StableFluidsResult code = stable_fluids::upload_boundary_atlas(context); - if (code != stable_fluids::success) return code; - context.atlas_dirty = false; - return stable_fluids::success; - } - - StableFluidsResult reset_fields(stable_fluids::ContextStorage& context) { - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(context.config); - if (const StableFluidsResult code = clear_projection_state(context); code != stable_fluids::success) return code; - stable_fluids::clear_velocity_fields_kernel<<>>(context.device.velocity_x, context.device.velocity_y, context.device.velocity_z, context.config.nx, context.config.ny, context.config.nz); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - if (const StableFluidsResult code = rebuild_atlas_if_needed(context); code != stable_fluids::success) return code; - if (const StableFluidsResult code = apply_velocity_constraints(context, launch); code != stable_fluids::success) return code; - for (const auto& field : context.fields) { - stable_fluids::clear_field_kernel<<>>(field.data, static_cast(field.desc.component_count), field.desc.default_value_0, field.desc.default_value_1, field.desc.default_value_2, field.desc.default_value_3, context.config.nx, context.config.ny, context.config.nz); - stable_fluids::clear_solid_cells_kernel<<>>(field.data, static_cast(field.desc.component_count), context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz); - } - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; - } - - StableFluidsResult diffuse_velocity(stable_fluids::ContextStorage& context, const stable_fluids::LaunchGeometry& launch) { - const float alpha = context.config.dt * context.config.viscosity / (context.config.cell_size * context.config.cell_size); - if (alpha <= 0.0f) return stable_fluids::success; - - if (const StableFluidsResult code = copy_velocity_fields(context, context.device.temp_velocity_x, context.device.temp_velocity_y, context.device.temp_velocity_z, context.device.velocity_x, context.device.velocity_y, context.device.velocity_z); code != stable_fluids::success) return code; - - for (int iteration = 0; iteration < context.config.diffuse_iterations; ++iteration) { - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_x, context.device.temp_velocity_x, context.device.u_flags, context.device.u_target, context.config.nx + 1, context.config.ny, context.config.nz, alpha, 0); - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_x, context.device.temp_velocity_x, context.device.u_flags, context.device.u_target, context.config.nx + 1, context.config.ny, context.config.nz, alpha, 1); - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_y, context.device.temp_velocity_y, context.device.v_flags, context.device.v_target, context.config.nx, context.config.ny + 1, context.config.nz, alpha, 0); - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_y, context.device.temp_velocity_y, context.device.v_flags, context.device.v_target, context.config.nx, context.config.ny + 1, context.config.nz, alpha, 1); - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_z, context.device.temp_velocity_z, context.device.w_flags, context.device.w_target, context.config.nx, context.config.ny, context.config.nz + 1, alpha, 0); - stable_fluids::diffuse_velocity_rbgs_kernel<<>>(context.device.velocity_z, context.device.temp_velocity_z, context.device.w_flags, context.device.w_target, context.config.nx, context.config.ny, context.config.nz + 1, alpha, 1); - } - - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; - } - - StableFluidsResult project_velocity(stable_fluids::ContextStorage& context, const stable_fluids::LaunchGeometry& launch) { - if (const StableFluidsResult code = apply_velocity_constraints(context, launch); code != stable_fluids::success) return code; - const float inv_h = 1.0f / context.config.cell_size; - const float h2 = context.config.cell_size * context.config.cell_size; - if (cudaMemsetAsync(context.device.pressure, 0, stable_fluids::scalar_count(context.config) * sizeof(float), context.stream) != cudaSuccess) return stable_fluids::backend_failure; - stable_fluids::compute_divergence_kernel<<>>(context.device.divergence, context.device.velocity_x, context.device.velocity_y, context.device.velocity_z, context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz, inv_h); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - for (int iteration = 0; iteration < context.config.pressure_iterations; ++iteration) { - stable_fluids::pressure_rbgs_kernel<<>>(context.device.pressure, context.device.divergence, context.device.cell_flags, context.device.u_flags, context.device.v_flags, context.device.w_flags, context.config.nx, context.config.ny, context.config.nz, h2, 0); - stable_fluids::pressure_rbgs_kernel<<>>(context.device.pressure, context.device.divergence, context.device.cell_flags, context.device.u_flags, context.device.v_flags, context.device.w_flags, context.config.nx, context.config.ny, context.config.nz, h2, 1); - } - stable_fluids::project_velocity_kernel<<>>(context.device.velocity_x, context.device.velocity_y, context.device.velocity_z, context.device.pressure, context.device.cell_flags, context.device.u_flags, context.device.v_flags, context.device.w_flags, context.device.u_target, context.device.v_target, context.device.w_target, context.config.nx, context.config.ny, context.config.nz, inv_h); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - if (const StableFluidsResult code = apply_velocity_constraints(context, launch); code != stable_fluids::success) return code; - if (cudaMemsetAsync(context.device.projection_metrics, 0, sizeof(stable_fluids::ProjectionMetricsState), context.stream) != cudaSuccess) return stable_fluids::backend_failure; - stable_fluids::accumulate_projection_metrics_kernel<<>>(context.device.projection_metrics, context.device.velocity_x, context.device.velocity_y, context.device.velocity_z, context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz, inv_h); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; - } - - StableFluidsResult advect_and_diffuse_scalar(stable_fluids::ContextStorage& context, const stable_fluids::FieldStorage& field, const dim3& block, const dim3& cells) { - const auto cell_count_value = stable_fluids::scalar_count(context.config); - const auto bytes = cell_count_value * sizeof(float); - for (uint32_t component = 0; component < field.desc.component_count; ++component) { - float* field_component = field.data + static_cast(component) * cell_count_value; - float* temp_component = context.device.scalar_scratch + static_cast(component) * cell_count_value; - const float constant_value = component == 0 ? field.desc.default_value_0 : (component == 1 ? field.desc.default_value_1 : (component == 2 ? field.desc.default_value_2 : field.desc.default_value_3)); - if ((field.desc.flags & STABLE_FLUIDS_FIELD_ADVECT) != 0u) { - stable_fluids::advect_scalar_kernel<<>>(temp_component, field_component, context.device.velocity_x, context.device.velocity_y, context.device.velocity_z, context.device.u_flags, context.device.v_flags, context.device.w_flags, context.device.u_target, context.device.v_target, context.device.w_target, context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz, context.config.cell_size, context.config.dt, field.desc.extension_mode, constant_value); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemcpyAsync(field_component, temp_component, bytes, cudaMemcpyDeviceToDevice, context.stream) != cudaSuccess) return stable_fluids::backend_failure; - } - if ((field.desc.flags & STABLE_FLUIDS_FIELD_DIFFUSE) != 0u) { - const float alpha = context.config.dt * field.desc.diffusion / (context.config.cell_size * context.config.cell_size); - if (alpha > 0.0f) { - if (cudaMemcpyAsync(temp_component, field_component, bytes, cudaMemcpyDeviceToDevice, context.stream) != cudaSuccess) return stable_fluids::backend_failure; - for (int iteration = 0; iteration < context.config.diffuse_iterations; ++iteration) { - stable_fluids::diffuse_scalar_rbgs_kernel<<>>(field_component, temp_component, context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz, alpha, field.desc.extension_mode, constant_value, 0); - stable_fluids::diffuse_scalar_rbgs_kernel<<>>(field_component, temp_component, context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz, alpha, field.desc.extension_mode, constant_value, 1); - } - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - } - } - } - stable_fluids::clear_solid_cells_kernel<<>>(field.data, static_cast(field.desc.component_count), context.device.cell_flags, context.config.nx, context.config.ny, context.config.nz); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; - } - -} // namespace +struct StableFluidsContext_t : stable_fluids::ContextStorage {}; extern "C" { StableFluidsResult stable_fluids_create_context_cuda(const StableFluidsContextCreateDesc* desc, StableFluidsContext* out_context, StableFluidsFieldHandle* out_field_handles, const uint32_t out_field_handle_capacity) { - if (out_context == nullptr) return stable_fluids::invalid_argument; *out_context = nullptr; - if (desc == nullptr) return stable_fluids::invalid_argument; - if (desc->config.nx <= 0 || desc->config.ny <= 0 || desc->config.nz <= 0) return stable_fluids::invalid_config; - if (desc->config.cell_size <= 0.0f) return stable_fluids::invalid_config; - if (desc->config.dt <= 0.0f) return stable_fluids::invalid_config; - if (desc->config.diffuse_iterations <= 0 || desc->config.pressure_iterations <= 0) return stable_fluids::invalid_config; - const auto validate_boundary_face = [](const StableFluidsBoundaryFaceDesc& face) { - return face.type <= STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW; - }; - const std::array boundary_faces{ - desc->config.domain_boundary.x_min, - desc->config.domain_boundary.x_max, - desc->config.domain_boundary.y_min, - desc->config.domain_boundary.y_max, - desc->config.domain_boundary.z_min, - desc->config.domain_boundary.z_max, - }; - for (const auto& boundary_face : boundary_faces) { - if (!validate_boundary_face(boundary_face)) return stable_fluids::invalid_config; - } - if (desc->field_count > 0 && desc->fields == nullptr) return stable_fluids::invalid_argument; - if (desc->field_count > 0 && (out_field_handles == nullptr || out_field_handle_capacity < desc->field_count)) return stable_fluids::invalid_argument; - if (desc->buoyancy_term_count > 0 && desc->buoyancy_terms == nullptr) return stable_fluids::invalid_argument; - for (uint32_t index = 0; index < desc->field_count; ++index) { - const auto& field = desc->fields[index]; - if (field.component_count == 0 || field.component_count > 4) return stable_fluids::invalid_field; - if ((field.flags & ~(STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE)) != 0u) return stable_fluids::invalid_field; - if (field.extension_mode > STABLE_FLUIDS_FIELD_EXTENSION_EXTRAPOLATE) return stable_fluids::invalid_field; - if (field.diffusion < 0.0f) return stable_fluids::invalid_field; - } - for (uint32_t index = 0; index < desc->buoyancy_term_count; ++index) { - const auto& term = desc->buoyancy_terms[index]; - if (term.field_index >= desc->field_count) return stable_fluids::invalid_field; - } - std::unique_ptr context{new (std::nothrow) StableFluidsContext_t{}}; if (!context) return stable_fluids::out_of_memory; context->config = desc->config; @@ -1439,218 +288,200 @@ StableFluidsResult stable_fluids_create_context_cuda(const StableFluidsContextCr if (cudaStreamCreateWithFlags(&context->stream, cudaStreamNonBlocking) != cudaSuccess) return stable_fluids::backend_failure; context->owns_stream = true; } + context->fields.reserve(desc->field_count); for (uint32_t index = 0; index < desc->field_count; ++index) { context->fields.push_back(stable_fluids::FieldStorage{ .desc = desc->fields[index], }); - context->max_field_components = (std::max)(context->max_field_components, desc->fields[index].component_count); - if (out_field_handles != nullptr) out_field_handles[index] = index + 1u; - } - if (desc->buoyancy_term_count > 0) context->buoyancy_terms.assign(desc->buoyancy_terms, desc->buoyancy_terms + desc->buoyancy_term_count); - else context->buoyancy_terms.clear(); - for (const auto& term : context->buoyancy_terms) { - const auto* field = stable_fluids::find_field(*context, term.field_index + 1u); - if (field == nullptr || field->desc.component_count == 0) { - if (context->owns_stream) cudaStreamDestroy(context->stream); - return stable_fluids::invalid_field; - } + if (index < out_field_handle_capacity) out_field_handles[index] = index + 1u; } - if (const StableFluidsResult code = stable_fluids::allocate_buffers(*context); code != stable_fluids::success) { - stable_fluids::destroy_buffers(*context); + + const auto cell_count = static_cast(context->config.nx) * static_cast(context->config.ny) * static_cast(context->config.nz); + const auto bytes = cell_count * sizeof(float); + auto fail = [&](const StableFluidsResult code) { + stable_fluids::destroy_context_buffers(*context); if (context->owns_stream) cudaStreamDestroy(context->stream); return code; + }; + + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.velocity_x), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); } - if (const StableFluidsResult code = reset_fields(*context); code != stable_fluids::success) { - stable_fluids::destroy_buffers(*context); - if (context->owns_stream) cudaStreamDestroy(context->stream); - return code; + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.velocity_y), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.velocity_z), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.temp_velocity_x), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.temp_velocity_y), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.temp_velocity_z), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.pressure), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&context->device.divergence), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + for (auto& field : context->fields) { + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&field.data), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + if (cell_count > 0 && cudaMalloc(reinterpret_cast(&field.temp), bytes) != cudaSuccess) { + return fail(stable_fluids::out_of_memory); + } + } + + const dim3 block(static_cast((std::max) (context->config.block_x, 1)), static_cast((std::max) (context->config.block_y, 1)), static_cast((std::max) (context->config.block_z, 1))); + const dim3 cells(static_cast((context->config.nx + static_cast(block.x) - 1) / static_cast(block.x)), static_cast((context->config.ny + static_cast(block.y) - 1) / static_cast(block.y)), static_cast((context->config.nz + static_cast(block.z) - 1) / static_cast(block.z))); + if (bytes > 0 && cudaMemsetAsync(context->device.velocity_x, 0, bytes, context->stream) != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + if (bytes > 0 && cudaMemsetAsync(context->device.velocity_y, 0, bytes, context->stream) != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + if (bytes > 0 && cudaMemsetAsync(context->device.velocity_z, 0, bytes, context->stream) != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + if (bytes > 0 && cudaMemsetAsync(context->device.pressure, 0, bytes, context->stream) != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + if (bytes > 0 && cudaMemsetAsync(context->device.divergence, 0, bytes, context->stream) != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + for (auto& field : context->fields) { + stable_fluids::fill_kernel<<stream>>>(field.data, field.desc.initial_value, context->config.nx, context->config.ny, context->config.nz); + if (cudaGetLastError() != cudaSuccess) { + return fail(stable_fluids::backend_failure); + } + } + + if (cudaGetLastError() != cudaSuccess) { + return fail(stable_fluids::backend_failure); } + *out_context = context.release(); return stable_fluids::success; } StableFluidsResult stable_fluids_destroy_context_cuda(StableFluidsContext context) { - if (context == nullptr) return stable_fluids::success; - auto* storage = as_storage(context); + auto* storage = static_cast(context); cudaStreamSynchronize(storage->stream); - stable_fluids::destroy_buffers(*storage); + stable_fluids::destroy_context_buffers(*storage); if (storage->owns_stream && storage->stream != nullptr) cudaStreamDestroy(storage->stream); delete context; return stable_fluids::success; } -StableFluidsResult stable_fluids_reset_context_cuda(StableFluidsContext context) { - if (context == nullptr) return stable_fluids::invalid_context; - return reset_fields(*as_storage(context)); -} - -StableFluidsResult stable_fluids_update_scene_cuda(StableFluidsContext context, const StableFluidsSceneDesc* desc) { - if (context == nullptr) return stable_fluids::invalid_context; - if (desc == nullptr) return stable_fluids::invalid_argument; - if (desc->collider_count > 0 && desc->colliders == nullptr) return stable_fluids::invalid_scene; - for (uint32_t index = 0; index < desc->collider_count; ++index) { - const auto& collider = desc->colliders[index]; - if (collider.collider_type > STABLE_FLUIDS_COLLIDER_BOX) return stable_fluids::invalid_scene; - if (collider.velocity_boundary_type > STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP) return stable_fluids::invalid_scene; - if (collider.collider_type == STABLE_FLUIDS_COLLIDER_SPHERE && collider.radius <= 0.0f) return stable_fluids::invalid_scene; - if (collider.collider_type == STABLE_FLUIDS_COLLIDER_BOX && (collider.half_extent_x <= 0.0f || collider.half_extent_y <= 0.0f || collider.half_extent_z <= 0.0f)) return stable_fluids::invalid_scene; - } - auto* storage = as_storage(context); - if (desc->collider_count > 0) storage->colliders.assign(desc->colliders, desc->colliders + desc->collider_count); - else storage->colliders.clear(); - storage->atlas_dirty = true; - return rebuild_atlas_if_needed(*storage); -} - StableFluidsResult stable_fluids_step_cuda(StableFluidsContext context, const StableFluidsStepDesc* desc) { - if (context == nullptr) return stable_fluids::invalid_context; - if (desc == nullptr) return stable_fluids::invalid_argument; - auto& storage = *as_storage(context); - if (desc->velocity_source_count > 0 && desc->velocity_sources == nullptr) return stable_fluids::invalid_argument; - if (desc->field_source_count > 0 && desc->field_sources == nullptr) return stable_fluids::invalid_argument; - for (uint32_t index = 0; index < desc->velocity_source_count; ++index) { - if (desc->velocity_sources[index].radius <= 0.0f) return stable_fluids::invalid_argument; - } - for (uint32_t index = 0; index < desc->field_source_count; ++index) { - const auto& source = desc->field_sources[index]; - if (source.radius <= 0.0f) return stable_fluids::invalid_argument; - if (stable_fluids::find_field(storage, source.field) == nullptr) return stable_fluids::invalid_field; - } - if (const StableFluidsResult code = rebuild_atlas_if_needed(storage); code != stable_fluids::success) return code; - - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); + auto& storage = *static_cast(context); + const dim3 block(static_cast((std::max) (storage.config.block_x, 1)), static_cast((std::max) (storage.config.block_y, 1)), static_cast((std::max) (storage.config.block_z, 1))); + const dim3 cells(static_cast((storage.config.nx + static_cast(block.x) - 1) / static_cast(block.x)), static_cast((storage.config.ny + static_cast(block.y) - 1) / static_cast(block.y)), static_cast((storage.config.nz + static_cast(block.z) - 1) / static_cast(block.z))); + const auto cell_count = static_cast(storage.config.nx) * static_cast(storage.config.ny) * static_cast(storage.config.nz); + const auto bytes = cell_count * sizeof(float); - nvtx3::scoped_range range("stable.step.context"); + nvtx3::scoped_range range("stable.step"); - for (uint32_t index = 0; index < desc->velocity_source_count; ++index) { - const auto& source = desc->velocity_sources[index]; - stable_fluids::add_velocity_source_kernel<<>>(storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.u_flags, storage.device.v_flags, storage.device.w_flags, source.center_x, source.center_y, source.center_z, source.radius, source.velocity_x, source.velocity_y, source.velocity_z, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - } - - for (uint32_t index = 0; index < desc->field_source_count; ++index) { - const auto& source = desc->field_sources[index]; - const auto* field = stable_fluids::find_field(storage, source.field); - if (field == nullptr) return stable_fluids::invalid_field; - stable_fluids::add_field_source_kernel<<>>(field->data, static_cast(field->desc.component_count), storage.device.cell_flags, source.center_x, source.center_y, source.center_z, source.radius, source.value_0, source.value_1, source.value_2, source.value_3, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - } - - if (const StableFluidsResult code = apply_velocity_constraints(storage, launch); code != stable_fluids::success) return code; - - stable_fluids::add_uniform_forces_kernel<<>>(storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.u_flags, storage.device.v_flags, storage.device.w_flags, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.dt, storage.config.uniform_force_x, storage.config.uniform_force_y, storage.config.uniform_force_z); + stable_fluids::add_force_kernel<<>>(storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, desc->force_x, desc->force_y, desc->force_z, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz); if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - for (const auto& term : storage.buoyancy_terms) { - const auto* field = stable_fluids::find_field(storage, term.field_index + 1u); - if (field == nullptr) return stable_fluids::invalid_field; - stable_fluids::add_buoyancy_kernel<<>>(storage.device.velocity_y, field->data, storage.device.v_flags, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.dt, term.weight, term.ambient); - if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - } - - if (const StableFluidsResult code = diffuse_velocity(storage, launch); code != stable_fluids::success) return code; - if (const StableFluidsResult code = project_velocity(storage, launch); code != stable_fluids::success) return code; - stable_fluids::advect_velocity_kernel<<>>(storage.device.temp_velocity_x, storage.device.temp_velocity_y, storage.device.temp_velocity_z, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.u_flags, storage.device.v_flags, storage.device.w_flags, storage.device.u_target, storage.device.v_target, storage.device.w_target, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.dt); + stable_fluids::advect_component_kernel<<>>(storage.device.temp_velocity_x, storage.device.velocity_x, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); + stable_fluids::advect_component_kernel<<>>(storage.device.temp_velocity_y, storage.device.velocity_y, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); + stable_fluids::advect_component_kernel<<>>(storage.device.temp_velocity_z, storage.device.velocity_z, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; - if (const StableFluidsResult code = copy_velocity_fields(storage, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.temp_velocity_x, storage.device.temp_velocity_y, storage.device.temp_velocity_z); code != stable_fluids::success) return code; + auto diffuse_component = [&](float* destination, const float* source, const float diffusion) { + const float alpha = storage.config.dt * diffusion / (storage.config.cell_size * storage.config.cell_size); + if (alpha <= 0.0f) { + if (cell_count == 0) return stable_fluids::success; + return cudaMemcpyAsync(destination, source, bytes, cudaMemcpyDeviceToDevice, storage.stream) == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + } + if (cudaMemcpyAsync(destination, source, bytes, cudaMemcpyDeviceToDevice, storage.stream) != cudaSuccess) return stable_fluids::backend_failure; + for (int iteration = 0; iteration < storage.config.diffuse_iterations; ++iteration) { + stable_fluids::diffuse_rbgs_kernel<<>>(destination, source, alpha, 0, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.boundary); + stable_fluids::diffuse_rbgs_kernel<<>>(destination, source, alpha, 1, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.boundary); + } + return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + }; - if (const StableFluidsResult code = project_velocity(storage, launch); code != stable_fluids::success) return code; - for (const auto& field : storage.fields) { - if (const StableFluidsResult code = advect_and_diffuse_scalar(storage, field, launch.block, launch.cells); code != stable_fluids::success) return code; - } + if (const StableFluidsResult code = diffuse_component(storage.device.velocity_x, storage.device.temp_velocity_x, storage.config.viscosity); code != stable_fluids::success) return code; + if (const StableFluidsResult code = diffuse_component(storage.device.velocity_y, storage.device.temp_velocity_y, storage.config.viscosity); code != stable_fluids::success) return code; + if (const StableFluidsResult code = diffuse_component(storage.device.velocity_z, storage.device.temp_velocity_z, storage.config.viscosity); code != stable_fluids::success) return code; - return stable_fluids::success; -} + stable_fluids::compute_divergence_kernel<<>>(storage.device.divergence, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); + if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; + if (bytes > 0 && cudaMemsetAsync(storage.device.pressure, 0, bytes, storage.stream) != cudaSuccess) return stable_fluids::backend_failure; + const float h2 = storage.config.cell_size * storage.config.cell_size; + for (int iteration = 0; iteration < storage.config.pressure_iterations; ++iteration) { + stable_fluids::pressure_rbgs_kernel<<>>(storage.device.pressure, storage.device.divergence, 0, storage.config.nx, storage.config.ny, storage.config.nz, h2, storage.config.boundary); + stable_fluids::pressure_rbgs_kernel<<>>(storage.device.pressure, storage.device.divergence, 1, storage.config.nx, storage.config.ny, storage.config.nz, h2, storage.config.boundary); + } + stable_fluids::project_velocity_kernel<<>>( + storage.device.temp_velocity_x, storage.device.temp_velocity_y, storage.device.temp_velocity_z, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.pressure, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); + if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; + std::swap(storage.device.velocity_x, storage.device.temp_velocity_x); + std::swap(storage.device.velocity_y, storage.device.temp_velocity_y); + std::swap(storage.device.velocity_z, storage.device.temp_velocity_z); + for (std::size_t field_index = 0; field_index < storage.fields.size(); ++field_index) { + auto& field = storage.fields[field_index]; + const float* source = nullptr; + for (uint32_t source_index = 0; source_index < desc->field_source_count; ++source_index) { + if (desc->field_sources[source_index].field != static_cast(field_index + 1u)) continue; + source = desc->field_sources[source_index].values; + break; + } -StableFluidsResult stable_fluids_export_field_components_cuda(StableFluidsContext context, const StableFluidsFieldHandle field_handle, const uint32_t component_offset, const uint32_t component_count, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); - const auto* field = stable_fluids::find_field(storage, field_handle); - if (field == nullptr || component_count == 0 || component_count > 4 || component_offset + component_count > field->desc.component_count) return stable_fluids::invalid_export; - stable_fluids::export_field_components_kernel<<>>(static_cast(destination), field->data, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz, static_cast(field->desc.component_count), static_cast(component_offset), static_cast(component_count)); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; -} + if (source != nullptr) { + stable_fluids::add_field_source_kernel<<>>(field.data, source, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz); + if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; + } -StableFluidsResult stable_fluids_export_alpha_rgb_rgba_cuda(StableFluidsContext context, const StableFluidsFieldHandle alpha_field_handle, const StableFluidsFieldHandle rgb_field_handle, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - const auto* alpha_field = stable_fluids::find_field(storage, alpha_field_handle); - const auto* rgb_field = stable_fluids::find_field(storage, rgb_field_handle); - if (alpha_field == nullptr || rgb_field == nullptr || alpha_field->desc.component_count < 1 || rgb_field->desc.component_count < 3) return stable_fluids::invalid_export; - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); - stable_fluids::pack_alpha_rgb_rgba_kernel<<>>(static_cast(destination), alpha_field->data, rgb_field->data, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz, static_cast(rgb_field->desc.component_count)); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; -} + stable_fluids::advect_component_kernel<<>>(field.temp, field.data, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.dt, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); + if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; -StableFluidsResult stable_fluids_export_velocity_cuda(StableFluidsContext context, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); - stable_fluids::export_velocity_kernel<<>>(static_cast(destination), storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; -} + if (const StableFluidsResult code = diffuse_component(field.data, field.temp, field.desc.diffusion); code != stable_fluids::success) return code; -StableFluidsResult stable_fluids_export_velocity_magnitude_cuda(StableFluidsContext context, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); - stable_fluids::compute_velocity_magnitude_kernel<<>>(static_cast(destination), storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz); - return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; -} + if (field.desc.dissipation > 0.0f) { + const float factor = 1.0f / (1.0f + storage.config.dt * field.desc.dissipation); + stable_fluids::dissipate_kernel<<>>(field.temp, field.data, factor, storage.config.nx, storage.config.ny, storage.config.nz); + if (cudaGetLastError() != cudaSuccess) return stable_fluids::backend_failure; + std::swap(field.data, field.temp); + } + } -StableFluidsResult stable_fluids_export_solid_mask_cuda(StableFluidsContext context, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - const stable_fluids::LaunchGeometry launch = stable_fluids::make_launch_geometry(storage.config); - stable_fluids::export_solid_mask_kernel<<>>(static_cast(destination), storage.device.cell_flags, storage.config.nx, storage.config.ny, storage.config.nz); + stable_fluids::compute_divergence_kernel<<>>(storage.device.divergence, storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.nx, storage.config.ny, storage.config.nz, storage.config.cell_size, storage.config.boundary); return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; } -StableFluidsResult stable_fluids_export_pressure_cuda(StableFluidsContext context, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - if (cudaMemcpyAsync(destination, storage.device.pressure, stable_fluids::scalar_count(storage.config) * sizeof(float), cudaMemcpyDeviceToDevice, storage.stream) != cudaSuccess) return stable_fluids::backend_failure; - return stable_fluids::success; -} - -StableFluidsResult stable_fluids_export_divergence_cuda(StableFluidsContext context, void* destination) { - if (context == nullptr) return stable_fluids::invalid_context; - if (destination == nullptr) return stable_fluids::invalid_export; - auto& storage = *as_storage(context); - if (cudaMemcpyAsync(destination, storage.device.divergence, stable_fluids::scalar_count(storage.config) * sizeof(float), cudaMemcpyDeviceToDevice, storage.stream) != cudaSuccess) return stable_fluids::backend_failure; - return stable_fluids::success; -} - -StableFluidsResult stable_fluids_get_projection_metrics_cuda(StableFluidsContext context, StableFluidsProjectionMetrics* out_metrics) { - if (context == nullptr) return stable_fluids::invalid_context; - if (out_metrics == nullptr) return stable_fluids::invalid_argument; - auto& storage = *as_storage(context); - stable_fluids::ProjectionMetricsState state{}; - if (cudaStreamSynchronize(storage.stream) != cudaSuccess) return stable_fluids::backend_failure; - if (cudaMemcpy(&state, storage.device.projection_metrics, sizeof(state), cudaMemcpyDeviceToHost) != cudaSuccess) return stable_fluids::backend_failure; - out_metrics->max_abs_divergence = state.max_abs_divergence; - out_metrics->rms_divergence = state.fluid_cell_count > 0 ? std::sqrt(state.sum_sq_divergence / static_cast(state.fluid_cell_count)) : 0.0f; - return stable_fluids::success; -} - -StableFluidsResult stable_fluids_get_grid_desc_cuda(StableFluidsContext context, StableFluidsGridDesc* out_desc) { - if (context == nullptr) return stable_fluids::invalid_context; - if (out_desc == nullptr) return stable_fluids::invalid_argument; - const auto& storage = *as_storage(context); - out_desc->nx = storage.config.nx; - out_desc->ny = storage.config.ny; - out_desc->nz = storage.config.nz; - out_desc->cell_size = storage.config.cell_size; - return stable_fluids::success; +StableFluidsResult stable_fluids_export_cuda(StableFluidsContext context, const StableFluidsExportDesc* desc, void* destination) { + auto& storage = *static_cast(context); + const dim3 block(static_cast((std::max) (storage.config.block_x, 1)), static_cast((std::max) (storage.config.block_y, 1)), static_cast((std::max) (storage.config.block_z, 1))); + const dim3 cells(static_cast((storage.config.nx + static_cast(block.x) - 1) / static_cast(block.x)), static_cast((storage.config.ny + static_cast(block.y) - 1) / static_cast(block.y)), static_cast((storage.config.nz + static_cast(block.z) - 1) / static_cast(block.z))); + const auto cell_count = static_cast(storage.config.nx) * static_cast(storage.config.ny) * static_cast(storage.config.nz); + const auto scalar_bytes = cell_count * sizeof(float); + + switch (desc->kind) { + case STABLE_FLUIDS_EXPORT_FIELD: + { + const auto* field = &storage.fields[static_cast(desc->field - 1u)]; + return cudaMemcpyAsync(destination, field->data, scalar_bytes, cudaMemcpyDeviceToDevice, storage.stream) == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + } + case STABLE_FLUIDS_EXPORT_VELOCITY: + stable_fluids::pack_velocity_kernel<<>>(static_cast(destination), storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.nx, storage.config.ny, storage.config.nz); + return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + case STABLE_FLUIDS_EXPORT_VELOCITY_MAGNITUDE: + stable_fluids::velocity_magnitude_kernel<<>>(static_cast(destination), storage.device.velocity_x, storage.device.velocity_y, storage.device.velocity_z, storage.config.nx, storage.config.ny, storage.config.nz); + return cudaGetLastError() == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + case STABLE_FLUIDS_EXPORT_PRESSURE: return cudaMemcpyAsync(destination, storage.device.pressure, scalar_bytes, cudaMemcpyDeviceToDevice, storage.stream) == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + case STABLE_FLUIDS_EXPORT_DIVERGENCE: return cudaMemcpyAsync(destination, storage.device.divergence, scalar_bytes, cudaMemcpyDeviceToDevice, storage.stream) == cudaSuccess ? stable_fluids::success : stable_fluids::backend_failure; + default: return stable_fluids::backend_failure; + } } } // extern "C" diff --git a/main.cu b/main.cu index ce2da42..4c693fb 100644 --- a/main.cu +++ b/main.cu @@ -1,15 +1,17 @@ #include "stable-fluids-3d.h" #include +#include #include +#include #include #include #include #include #include -#include namespace { + bool cuda_ok(const cudaError_t status, const char* what) { if (status == cudaSuccess) return true; std::fprintf(stderr, "%s failed: %s\n", what, cudaGetErrorString(status)); @@ -22,194 +24,162 @@ namespace { return false; } + __host__ __device__ std::uint64_t index_3d(const int x, const int y, const int z, const int sx, const int sy) { + return static_cast(z) * static_cast(sx) * static_cast(sy) + static_cast(y) * static_cast(sx) + static_cast(x); + } + + __global__ void fill_kernel(float* field, const float value, const int nx, const int ny, const int nz) { + const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); + const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); + if (x >= nx || y >= ny || z >= nz) return; + field[index_3d(x, y, z, nx, ny)] = value; + } + + __global__ void add_blob_kernel(float* field, const float amplitude, const float center_x, const float center_y, const float center_z, const float radius, const int nx, const int ny, const int nz, const float h) { + const int x = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + const int y = static_cast(blockIdx.y * blockDim.y + threadIdx.y); + const int z = static_cast(blockIdx.z * blockDim.z + threadIdx.z); + if (x >= nx || y >= ny || z >= nz) return; + const float px = (static_cast(x) + 0.5f) * h; + const float py = (static_cast(y) + 0.5f) * h; + const float pz = (static_cast(z) + 0.5f) * h; + const float dx = px - center_x; + const float dy = py - center_y; + const float dz = pz - center_z; + const float radius2 = radius * radius; + const float dist2 = dx * dx + dy * dy + dz * dz; + if (dist2 > radius2) return; + const float weight = radius2 > 0.0f ? 1.0f - dist2 / radius2 : 0.0f; + field[index_3d(x, y, z, nx, ny)] += amplitude * weight; + } + } // namespace int main() { - constexpr int32_t nx = 100; - constexpr int32_t ny = 100; - constexpr int32_t nz = 100; - constexpr int frames = 24; - constexpr float cell_size = 0.01f; - constexpr float extent_x = static_cast(nx) * cell_size; - constexpr float extent_y = static_cast(ny) * cell_size; - constexpr float extent_z = static_cast(nz) * cell_size; - constexpr float gravity_y = -9.81f; - constexpr float buoyancy_beta = 0.35f; - constexpr float buoyancy_weight = -gravity_y * buoyancy_beta; - - StableFluidsSimulationConfig config{ + constexpr int32_t nx = 96; + constexpr int32_t ny = 96; + constexpr int32_t nz = 96; + constexpr int frames = 32; + constexpr float cell_size = 0.01f; + constexpr float extent_x = static_cast(nx) * cell_size; + constexpr float extent_y = static_cast(ny) * cell_size; + constexpr float extent_z = static_cast(nz) * cell_size; + constexpr float source_r = 0.055f; + constexpr float source_x = extent_x * 0.35f; + constexpr float source_y = extent_y * 0.16f; + constexpr float source_z = extent_z * 0.50f; + + const StableFluidsSimulationConfig config{ .nx = nx, .ny = ny, .nz = nz, .cell_size = cell_size, - .dt = 1.0f / 120.0f, - .viscosity = 0.00015f, + .dt = 1.0f / 90.0f, + .viscosity = 0.00012f, .diffuse_iterations = 24, .pressure_iterations = 96, - .uniform_force_x = 0.0f, - .uniform_force_y = 0.0f, - .uniform_force_z = 0.0f, - .domain_boundary = { - .x_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .x_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .y_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), .velocity = 0.0f, }, - .y_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .z_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .z_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, + .boundary = { + .x = STABLE_FLUIDS_BOUNDARY_PERIODIC, + .y = STABLE_FLUIDS_BOUNDARY_FIXED, + .z = STABLE_FLUIDS_BOUNDARY_PERIODIC, }, .block_x = 8, .block_y = 8, .block_z = 4, }; - std::array fields{ + const std::array fields{ StableFluidsFieldCreateDesc{ .name = "density", - .component_count = 1, - .flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE, .diffusion = 0.00005f, - .extension_mode = static_cast(STABLE_FLUIDS_FIELD_EXTENSION_STREAK), - .default_value_0 = 0.0f, - .default_value_1 = 0.0f, - .default_value_2 = 0.0f, - .default_value_3 = 0.0f, - }, - StableFluidsFieldCreateDesc{ - .name = "dye", - .component_count = 3, - .flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE, - .diffusion = 0.00002f, - .extension_mode = static_cast(STABLE_FLUIDS_FIELD_EXTENSION_STREAK), - .default_value_0 = 0.0f, - .default_value_1 = 0.0f, - .default_value_2 = 0.0f, - .default_value_3 = 0.0f, + .dissipation = 0.35f, + .initial_value = 0.0f, }, }; - std::array buoyancy_terms{ - StableFluidsBuoyancyDesc{ - .field_index = 0, - .weight = buoyancy_weight, - .ambient = 0.0f, - }, - }; - std::array field_handles{}; + std::array field_handles{}; - StableFluidsContextCreateDesc create_desc{ + const StableFluidsContextCreateDesc create_desc{ .config = config, .stream = nullptr, .fields = fields.data(), .field_count = static_cast(fields.size()), - .buoyancy_terms = buoyancy_terms.data(), - .buoyancy_term_count = static_cast(buoyancy_terms.size()), }; StableFluidsContext context = nullptr; if (!stable_ok(stable_fluids_create_context_cuda(&create_desc, &context, field_handles.data(), static_cast(field_handles.size())), "stable_fluids_create_context_cuda")) return EXIT_FAILURE; - const StableFluidsColliderDesc collider{ - .collider_type = static_cast(STABLE_FLUIDS_COLLIDER_SPHERE), - .velocity_boundary_type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), - .center_x = extent_x * 0.5f, - .center_y = extent_y * 0.36f, - .center_z = extent_z * 0.5f, - .radius = 0.08f, - .half_extent_x = 0.0f, - .half_extent_y = 0.0f, - .half_extent_z = 0.0f, - .linear_velocity_x = 0.0f, - .linear_velocity_y = 0.0f, - .linear_velocity_z = 0.0f, - }; - const StableFluidsSceneDesc scene_desc{ - .colliders = &collider, - .collider_count = 1, - }; - if (!stable_ok(stable_fluids_update_scene_cuda(context, &scene_desc), "stable_fluids_update_scene_cuda")) { - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } + const dim3 block(static_cast(config.block_x), static_cast(config.block_y), static_cast(config.block_z)); + const dim3 cells( + static_cast((config.nx + config.block_x - 1) / config.block_x), + static_cast((config.ny + config.block_y - 1) / config.block_y), + static_cast((config.nz + config.block_z - 1) / config.block_z)); + + const auto cell_count = static_cast(nx) * static_cast(ny) * static_cast(nz); + const auto scalar_size = cell_count * sizeof(float); + + float* force_x = nullptr; + float* force_y = nullptr; + float* force_z = nullptr; + float* density_source = nullptr; + float* device_density = nullptr; + + if (!cuda_ok(cudaMalloc(reinterpret_cast(&force_x), scalar_size), "cudaMalloc force_x")) return EXIT_FAILURE; + if (!cuda_ok(cudaMalloc(reinterpret_cast(&force_y), scalar_size), "cudaMalloc force_y")) return EXIT_FAILURE; + if (!cuda_ok(cudaMalloc(reinterpret_cast(&force_z), scalar_size), "cudaMalloc force_z")) return EXIT_FAILURE; + if (!cuda_ok(cudaMalloc(reinterpret_cast(&density_source), scalar_size), "cudaMalloc density_source")) return EXIT_FAILURE; + if (!cuda_ok(cudaMalloc(reinterpret_cast(&device_density), scalar_size), "cudaMalloc device_density")) return EXIT_FAILURE; const auto begin = std::chrono::steady_clock::now(); for (int frame = 0; frame < frames; ++frame) { - const float center_x = extent_x * 0.18f; - const float center_y = extent_y * 0.14f; - const float center_z = extent_z * 0.28f + static_cast(frame & 1) * 0.005f; - const StableFluidsVelocitySourceDesc velocity_source{ - .center_x = center_x, - .center_y = center_y, - .center_z = center_z, - .radius = 0.045f, - .velocity_x = 0.18f, - .velocity_y = 0.42f, - .velocity_z = 0.12f, - }; - const std::array field_sources{ - StableFluidsFieldSourceDesc{ - .field = field_handles[0], - .center_x = center_x, - .center_y = center_y, - .center_z = center_z, - .radius = 0.045f, - .value_0 = 0.55f, - .value_1 = 0.0f, - .value_2 = 0.0f, - .value_3 = 0.0f, - }, - StableFluidsFieldSourceDesc{ - .field = field_handles[1], - .center_x = center_x, - .center_y = center_y, - .center_z = center_z, - .radius = 0.045f, - .value_0 = 0.85f, - .value_1 = 0.22f, - .value_2 = 1.10f, - .value_3 = 0.0f, - }, + fill_kernel<<>>(force_x, 0.0f, nx, ny, nz); + fill_kernel<<>>(force_y, 0.0f, nx, ny, nz); + fill_kernel<<>>(force_z, 0.0f, nx, ny, nz); + fill_kernel<<>>(density_source, 0.0f, nx, ny, nz); + if (!cuda_ok(cudaGetLastError(), "fill_kernel")) return EXIT_FAILURE; + + const float lateral = std::sin(static_cast(frame) * 0.35f); + const float swirl = std::cos(static_cast(frame) * 0.27f); + add_blob_kernel<<>>(density_source, 32.0f, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size); + add_blob_kernel<<>>(force_x, 2.2f * lateral, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size); + add_blob_kernel<<>>(force_y, 7.5f, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size); + add_blob_kernel<<>>(force_z, 1.8f * swirl, source_x, source_y, source_z, source_r, nx, ny, nz, cell_size); + if (!cuda_ok(cudaGetLastError(), "add_blob_kernel")) return EXIT_FAILURE; + + const StableFluidsFieldSourceDesc field_source{ + .field = field_handles[0], + .values = density_source, }; const StableFluidsStepDesc step_desc{ - .velocity_sources = &velocity_source, - .velocity_source_count = 1, - .field_sources = field_sources.data(), - .field_source_count = static_cast(field_sources.size()), + .force_x = force_x, + .force_y = force_y, + .force_z = force_z, + .field_sources = &field_source, + .field_source_count = 1, }; - if (!stable_ok(stable_fluids_step_cuda(context, &step_desc), "stable_fluids_step_cuda")) { - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } + if (!stable_ok(stable_fluids_step_cuda(context, &step_desc), "stable_fluids_step_cuda")) return EXIT_FAILURE; } - std::vector density(static_cast(nx) * static_cast(ny) * static_cast(nz), 0.0f); - float* device_density = nullptr; - const auto scalar_bytes = density.size() * sizeof(float); - if (!cuda_ok(cudaMalloc(reinterpret_cast(&device_density), scalar_bytes), "cudaMalloc export density")) { - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } + const StableFluidsExportDesc export_desc{ + .kind = STABLE_FLUIDS_EXPORT_FIELD, + .field = field_handles[0], + }; + if (!stable_ok(stable_fluids_export_cuda(context, &export_desc, device_density), "stable_fluids_export_cuda")) return EXIT_FAILURE; + if (!cuda_ok(cudaDeviceSynchronize(), "cudaDeviceSynchronize")) return EXIT_FAILURE; - if (!stable_ok(stable_fluids_export_field_components_cuda(context, field_handles[0], 0, 1, device_density), "stable_fluids_export_field_components_cuda")) { - cudaFree(device_density); - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } - if (!cuda_ok(cudaDeviceSynchronize(), "cudaDeviceSynchronize")) { - cudaFree(device_density); - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } - if (!cuda_ok(cudaMemcpy(density.data(), device_density, scalar_bytes, cudaMemcpyDeviceToHost), "cudaMemcpy density")) { - cudaFree(device_density); - stable_fluids_destroy_context_cuda(context); - return EXIT_FAILURE; - } + std::vector density(cell_count, 0.0f); + if (!cuda_ok(cudaMemcpy(density.data(), device_density, scalar_size, cudaMemcpyDeviceToHost), "cudaMemcpy density")) return EXIT_FAILURE; + cudaFree(force_x); + cudaFree(force_y); + cudaFree(force_z); + cudaFree(density_source); cudaFree(device_density); stable_fluids_destroy_context_cuda(context); const float total_density = std::accumulate(density.begin(), density.end(), 0.0f); - const float peak_density = density.empty() ? 0.0f : *std::max_element(density.begin(), density.end()); - const auto elapsed_ms = std::chrono::duration(std::chrono::steady_clock::now() - begin).count(); + const float peak_density = density.empty() ? 0.0f : *std::max_element(density.begin(), density.end()); + const auto elapsed_ms = std::chrono::duration(std::chrono::steady_clock::now() - begin).count(); std::printf("frames=%d total_density=%.6f peak_density=%.6f elapsed_ms=%.3f\n", frames, total_density, peak_density, elapsed_ms); return EXIT_SUCCESS; } diff --git a/stable-fluids-3d.h b/stable-fluids-3d.h index be51e00..0535666 100644 --- a/stable-fluids-3d.h +++ b/stable-fluids-3d.h @@ -20,77 +20,31 @@ extern "C" { #endif typedef enum StableFluidsResult { - STABLE_FLUIDS_RESULT_OK = 0, - STABLE_FLUIDS_RESULT_INVALID_ARGUMENT = 1, - STABLE_FLUIDS_RESULT_INVALID_CONTEXT = 2, - STABLE_FLUIDS_RESULT_INVALID_CONFIG = 3, - STABLE_FLUIDS_RESULT_INVALID_FIELD = 4, - STABLE_FLUIDS_RESULT_INVALID_SCENE = 5, - STABLE_FLUIDS_RESULT_INVALID_EXPORT = 6, - STABLE_FLUIDS_RESULT_OUT_OF_MEMORY = 7, - STABLE_FLUIDS_RESULT_BACKEND_FAILURE = 8, + STABLE_FLUIDS_RESULT_OK = 0, + STABLE_FLUIDS_RESULT_OUT_OF_MEMORY = 1, + STABLE_FLUIDS_RESULT_BACKEND_FAILURE = 2, } StableFluidsResult; -typedef enum StableFluidsVelocityBoundaryType { - STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP = 0, - STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP = 1, - STABLE_FLUIDS_VELOCITY_BOUNDARY_INFLOW = 2, - STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW = 3, -} StableFluidsVelocityBoundaryType; - -typedef enum StableFluidsColliderType { - STABLE_FLUIDS_COLLIDER_SPHERE = 0, - STABLE_FLUIDS_COLLIDER_BOX = 1, -} StableFluidsColliderType; - -typedef enum StableFluidsFieldExtensionMode { - STABLE_FLUIDS_FIELD_EXTENSION_CONSTANT = 0, - STABLE_FLUIDS_FIELD_EXTENSION_STREAK = 1, - STABLE_FLUIDS_FIELD_EXTENSION_REPEAT = 2, - STABLE_FLUIDS_FIELD_EXTENSION_EXTRAPOLATE = 3, -} StableFluidsFieldExtensionMode; - -typedef enum StableFluidsFieldFlags { - STABLE_FLUIDS_FIELD_ADVECT = 1u << 0, - STABLE_FLUIDS_FIELD_DIFFUSE = 1u << 1, -} StableFluidsFieldFlags; +typedef enum StableFluidsBoundaryMode { + STABLE_FLUIDS_BOUNDARY_FIXED = 0, + STABLE_FLUIDS_BOUNDARY_PERIODIC = 1, +} StableFluidsBoundaryMode; typedef uint32_t StableFluidsFieldHandle; -typedef struct StableFluidsBoundaryFaceDesc { - uint32_t type; - float velocity; -} StableFluidsBoundaryFaceDesc; +typedef struct StableFluidsBoundaryConfig { + uint32_t x; + uint32_t y; + uint32_t z; +} StableFluidsBoundaryConfig; typedef struct StableFluidsFieldCreateDesc { const char* name; - uint32_t component_count; - uint32_t flags; float diffusion; - uint32_t extension_mode; - float default_value_0; - float default_value_1; - float default_value_2; - float default_value_3; + float dissipation; + float initial_value; } StableFluidsFieldCreateDesc; -typedef struct StableFluidsBuoyancyDesc { - uint32_t field_index; - float weight; - float ambient; -} StableFluidsBuoyancyDesc; - -typedef struct StableFluidsContext_t* StableFluidsContext; - -typedef struct StableFluidsDomainBoundaryDesc { - StableFluidsBoundaryFaceDesc x_min; - StableFluidsBoundaryFaceDesc x_max; - StableFluidsBoundaryFaceDesc y_min; - StableFluidsBoundaryFaceDesc y_max; - StableFluidsBoundaryFaceDesc z_min; - StableFluidsBoundaryFaceDesc z_max; -} StableFluidsDomainBoundaryDesc; - typedef struct StableFluidsSimulationConfig { int32_t nx; int32_t ny; @@ -100,115 +54,51 @@ typedef struct StableFluidsSimulationConfig { float viscosity; int32_t diffuse_iterations; int32_t pressure_iterations; - float uniform_force_x; - float uniform_force_y; - float uniform_force_z; - StableFluidsDomainBoundaryDesc domain_boundary; + StableFluidsBoundaryConfig boundary; int32_t block_x; int32_t block_y; int32_t block_z; } StableFluidsSimulationConfig; +typedef struct StableFluidsContext_t* StableFluidsContext; + typedef struct StableFluidsContextCreateDesc { StableFluidsSimulationConfig config; void* stream; const StableFluidsFieldCreateDesc* fields; uint32_t field_count; - const StableFluidsBuoyancyDesc* buoyancy_terms; - uint32_t buoyancy_term_count; } StableFluidsContextCreateDesc; -typedef struct StableFluidsColliderDesc { - uint32_t collider_type; - uint32_t velocity_boundary_type; - float center_x; - float center_y; - float center_z; - float radius; - float half_extent_x; - float half_extent_y; - float half_extent_z; - float linear_velocity_x; - float linear_velocity_y; - float linear_velocity_z; -} StableFluidsColliderDesc; - -typedef struct StableFluidsSceneDesc { - const StableFluidsColliderDesc* colliders; - uint32_t collider_count; -} StableFluidsSceneDesc; - -typedef struct StableFluidsVelocitySourceDesc { - float center_x; - float center_y; - float center_z; - float radius; - float velocity_x; - float velocity_y; - float velocity_z; -} StableFluidsVelocitySourceDesc; - typedef struct StableFluidsFieldSourceDesc { StableFluidsFieldHandle field; - float center_x; - float center_y; - float center_z; - float radius; - float value_0; - float value_1; - float value_2; - float value_3; + const float* values; } StableFluidsFieldSourceDesc; typedef struct StableFluidsStepDesc { - const StableFluidsVelocitySourceDesc* velocity_sources; - uint32_t velocity_source_count; + const float* force_x; + const float* force_y; + const float* force_z; const StableFluidsFieldSourceDesc* field_sources; uint32_t field_source_count; } StableFluidsStepDesc; -typedef struct StableFluidsGridDesc { - int32_t nx; - int32_t ny; - int32_t nz; - float cell_size; -} StableFluidsGridDesc; - -typedef struct StableFluidsProjectionMetrics { - float max_abs_divergence; - float rms_divergence; -} StableFluidsProjectionMetrics; - -STABLE_FLUIDS_API StableFluidsResult stable_fluids_create_context_cuda( - const StableFluidsContextCreateDesc* desc, - StableFluidsContext* out_context, - StableFluidsFieldHandle* out_field_handles, - uint32_t out_field_handle_capacity -); +typedef enum StableFluidsExportKind { + STABLE_FLUIDS_EXPORT_FIELD = 0, + STABLE_FLUIDS_EXPORT_VELOCITY = 1, + STABLE_FLUIDS_EXPORT_VELOCITY_MAGNITUDE = 2, + STABLE_FLUIDS_EXPORT_PRESSURE = 3, + STABLE_FLUIDS_EXPORT_DIVERGENCE = 4, +} StableFluidsExportKind; + +typedef struct StableFluidsExportDesc { + uint32_t kind; + StableFluidsFieldHandle field; +} StableFluidsExportDesc; + +STABLE_FLUIDS_API StableFluidsResult stable_fluids_create_context_cuda(const StableFluidsContextCreateDesc* desc, StableFluidsContext* out_context, StableFluidsFieldHandle* out_field_handles, uint32_t out_field_handle_capacity); STABLE_FLUIDS_API StableFluidsResult stable_fluids_destroy_context_cuda(StableFluidsContext context); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_reset_context_cuda(StableFluidsContext context); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_update_scene_cuda(StableFluidsContext context, const StableFluidsSceneDesc* desc); STABLE_FLUIDS_API StableFluidsResult stable_fluids_step_cuda(StableFluidsContext context, const StableFluidsStepDesc* desc); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_field_components_cuda( - StableFluidsContext context, - StableFluidsFieldHandle field_handle, - uint32_t component_offset, - uint32_t component_count, - void* destination -); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_alpha_rgb_rgba_cuda( - StableFluidsContext context, - StableFluidsFieldHandle alpha_field, - StableFluidsFieldHandle rgb_field, - void* destination -); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_velocity_cuda(StableFluidsContext context, void* destination); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_velocity_magnitude_cuda(StableFluidsContext context, void* destination); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_solid_mask_cuda(StableFluidsContext context, void* destination); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_pressure_cuda(StableFluidsContext context, void* destination); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_divergence_cuda(StableFluidsContext context, void* destination); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_get_projection_metrics_cuda(StableFluidsContext context, StableFluidsProjectionMetrics* out_metrics); -STABLE_FLUIDS_API StableFluidsResult stable_fluids_get_grid_desc_cuda(StableFluidsContext context, StableFluidsGridDesc* out_desc); +STABLE_FLUIDS_API StableFluidsResult stable_fluids_export_cuda(StableFluidsContext context, const StableFluidsExportDesc* desc, void* destination); #ifdef __cplusplus } diff --git a/vulkan-app/app.cpp b/vulkan-app/app.cpp index 7822aba..40e3926 100644 --- a/vulkan-app/app.cpp +++ b/vulkan-app/app.cpp @@ -8,8 +8,8 @@ module; #include #endif -#include #include +#include #include #include @@ -60,26 +60,26 @@ namespace app { camera_.home(); DescriptorSetLayoutBinding field_binding{ - .binding = 0, - .descriptorType = DescriptorType::eStorageBuffer, + .binding = 0, + .descriptorType = DescriptorType::eStorageBuffer, .descriptorCount = 1, - .stageFlags = ShaderStageFlagBits::eFragment, + .stageFlags = ShaderStageFlagBits::eFragment, }; DescriptorSetLayoutCreateInfo field_layout_ci{ .bindingCount = 1, - .pBindings = &field_binding, + .pBindings = &field_binding, }; field_set_layout_ = raii::DescriptorSetLayout{vkctx_.device, field_layout_ci}; DescriptorPoolSize field_pool_size{ - .type = DescriptorType::eStorageBuffer, + .type = DescriptorType::eStorageBuffer, .descriptorCount = 128, }; DescriptorPoolCreateInfo field_pool_ci{ - .flags = DescriptorPoolCreateFlagBits::eFreeDescriptorSet, - .maxSets = 128, + .flags = DescriptorPoolCreateFlagBits::eFreeDescriptorSet, + .maxSets = 128, .poolSizeCount = 1, - .pPoolSizes = &field_pool_size, + .pPoolSizes = &field_pool_size, }; field_descriptor_pool_ = raii::DescriptorPool{vkctx_.device, field_pool_ci}; @@ -91,14 +91,14 @@ namespace app { std::array pipeline_set_layouts{*field_set_layout_}; pipeline::GraphicsPipelineDesc pipeline_desc{ - .color_format = sc_.format, - .use_depth = false, - .use_blend = false, - .topology = PrimitiveTopology::eTriangleList, - .cull = CullModeFlagBits::eNone, - .push_constant_bytes = sizeof(FieldPushConstants), + .color_format = sc_.format, + .use_depth = false, + .use_blend = false, + .topology = PrimitiveTopology::eTriangleList, + .cull = CullModeFlagBits::eNone, + .push_constant_bytes = sizeof(FieldPushConstants), .push_constant_stages = ShaderStageFlagBits::eVertex | ShaderStageFlagBits::eFragment, - .set_layouts = pipeline_set_layouts, + .set_layouts = pipeline_set_layouts, }; pipeline::VertexInput empty_vertex_input{}; @@ -127,95 +127,131 @@ namespace app { const auto now = std::chrono::steady_clock::now(); const float dt_seconds = std::chrono::duration(now - last_frame_time_).count(); last_frame_time_ = now; - if (dt_seconds > 0.0f) { - const float instantaneous_fps = 1.0f / dt_seconds; - render_fps_ = render_fps_ > 0.0f ? std::lerp(render_fps_, instantaneous_fps, 0.1f) : instantaneous_fps; - } if (sctx_.resize_requested) recreate_swapchain(); vk::imgui::begin_frame(); - collect_camera_input(dt_seconds); - } - void VisualizationApp::draw_visualization_ui(VisualizationSettings& settings, const std::optional& snapshot) { - auto smoke_capable = [&](const VisualizationSnapshotView& view) { - return view.field.semantic == FieldSemantic::DyeColor && view.field.component_count == 4; - }; + double mouse_x = 0.0; + double mouse_y = 0.0; + glfwGetCursorPos(window_, &mouse_x, &mouse_y); - bool reframe_requested = false; - if (snapshot && !smoke_capable(*snapshot) && settings.render_mode == RenderMode::Smoke) settings.render_mode = RenderMode::Scalar; + float mouse_dx = 0.0f; + float mouse_dy = 0.0f; + if (window_state_.first_mouse) { + window_state_.first_mouse = false; + } else { + mouse_dx = static_cast(mouse_x - window_state_.last_x); + mouse_dy = static_cast(mouse_y - window_state_.last_y); + } + window_state_.last_x = mouse_x; + window_state_.last_y = mouse_y; - ImGui::Begin("Visualization"); - if (snapshot) { - ImGui::Text("Field: %.*s", static_cast(snapshot->field.label.size()), snapshot->field.label.data()); - ImGui::Text("Grid: %u x %u x %u", snapshot->grid.nx, snapshot->grid.ny, snapshot->grid.nz); - ImGui::Text("Generation: %llu", static_cast(snapshot->field.ready_generation)); - - int view_mode = static_cast(settings.view_mode); - const char* labels[] = {"Plane", "Volume"}; - if (ImGui::Combo("View", &view_mode, labels, 2)) { - settings.view_mode = static_cast(view_mode); - reframe_requested = true; - } + auto& io = ImGui::GetIO(); + vk::camera::CameraInput camera_input{}; + if (!io.WantCaptureMouse) { + camera_input.lmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_LEFT) == GLFW_PRESS; + camera_input.mmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_MIDDLE) == GLFW_PRESS; + camera_input.rmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_RIGHT) == GLFW_PRESS; + camera_input.mouse_dx = mouse_dx; + camera_input.mouse_dy = mouse_dy; + camera_input.scroll = window_state_.scroll; + } + if (!io.WantCaptureKeyboard) { + camera_input.forward = glfwGetKey(window_, GLFW_KEY_W) == GLFW_PRESS; + camera_input.backward = glfwGetKey(window_, GLFW_KEY_S) == GLFW_PRESS; + camera_input.left = glfwGetKey(window_, GLFW_KEY_A) == GLFW_PRESS; + camera_input.right = glfwGetKey(window_, GLFW_KEY_D) == GLFW_PRESS; + camera_input.up = glfwGetKey(window_, GLFW_KEY_E) == GLFW_PRESS; + camera_input.down = glfwGetKey(window_, GLFW_KEY_Q) == GLFW_PRESS; + camera_input.shift = glfwGetKey(window_, GLFW_KEY_LEFT_SHIFT) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_SHIFT) == GLFW_PRESS; + camera_input.ctrl = glfwGetKey(window_, GLFW_KEY_LEFT_CONTROL) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_CONTROL) == GLFW_PRESS; + camera_input.alt = glfwGetKey(window_, GLFW_KEY_LEFT_ALT) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_ALT) == GLFW_PRESS; + camera_input.space = glfwGetKey(window_, GLFW_KEY_SPACE) == GLFW_PRESS; + } + window_state_.scroll = 0.0f; + camera_.update(dt_seconds, sc_.extent.width, sc_.extent.height, camera_input); + } - const auto resolved_view = settings.view_mode; - if (resolved_view == ViewMode::Plane) { - int plane_axis = static_cast(settings.plane_axis); - const char* axis_labels[] = {"XY", "XZ", "YZ"}; - if (ImGui::Combo("Plane Axis", &plane_axis, axis_labels, 3)) { - settings.plane_axis = static_cast(plane_axis); - reframe_requested = true; + void VisualizationApp::draw_visualization_ui(AppState& state, const SceneInfo& scene, const std::span fields, const std::span scene_labels, bool& reset_requested, bool& field_changed, bool& scene_changed, const std::optional& snapshot) { + bool reframe_requested = false; + auto& settings = state.render; + state.selected_scene = std::clamp(state.selected_scene, 0, static_cast(scene_labels.empty() ? 0 : scene_labels.size() - 1)); + if (fields.empty()) throw std::runtime_error("scene must expose at least one field"); + state.selected_field = std::clamp(state.selected_field, 0, static_cast(fields.size()) - 1); + const auto& field = fields[static_cast(state.selected_field)]; + + ImGui::Begin("Stable Fluids"); + if (scene_labels.size() > 1 && ImGui::BeginCombo("Scene", scene_labels[static_cast(state.selected_scene)].data())) { + for (int i = 0; i < static_cast(scene_labels.size()); ++i) { + const bool is_selected = state.selected_scene == i; + if (ImGui::Selectable(scene_labels[static_cast(i)].data(), is_selected)) { + state.selected_scene = i; + state.selected_field = 0; + scene_changed = true; + reframe_requested = true; } - ImGui::SliderFloat("Slice", &settings.slice_position, 0.0f, 1.0f, "%.3f"); + if (is_selected) ImGui::SetItemDefaultFocus(); } - - if (smoke_capable(*snapshot)) { - int render_mode = static_cast(settings.render_mode); - const char* labels[] = {"Smoke", "Scalar"}; - if (ImGui::Combo("Field Mode", &render_mode, labels, 2)) settings.render_mode = static_cast(render_mode); - } else { - ImGui::TextUnformatted("Field Mode: Scalar"); + ImGui::EndCombo(); + } + if (ImGui::BeginCombo("Field", field.label.data())) { + for (int i = 0; i < static_cast(fields.size()); ++i) { + const bool is_selected = state.selected_field == i; + if (ImGui::Selectable(fields[static_cast(i)].label.data(), is_selected)) { + state.selected_field = i; + apply_field_preset(settings, fields[static_cast(i)].preset); + field_changed = true; + } + if (is_selected) ImGui::SetItemDefaultFocus(); } + ImGui::EndCombo(); + } - ImGui::SliderFloat("Density Scale", &settings.density_scale, 0.05f, 8.0f, "%.2f"); - if (resolved_view == ViewMode::Volume) ImGui::SliderInt("March Steps", &settings.march_steps, 24, 224); - if (settings.render_mode == RenderMode::Smoke) { - ImGui::SliderFloat("Absorption", &settings.absorption, 0.05f, 8.0f, "%.2f"); - } else { - ImGui::SliderFloat("Value Min", &settings.scalar_min, -200.0f, 200.0f, "%.3f"); - ImGui::SliderFloat("Value Max", &settings.scalar_max, -200.0f, 200.0f, "%.3f"); - ImGui::SliderFloat("Opacity", &settings.scalar_opacity, 0.05f, 8.0f, "%.2f"); - ImGui::ColorEdit3("Low Color", &settings.scalar_low_r); - ImGui::ColorEdit3("High Color", &settings.scalar_high_r); - } + ImGui::Checkbox("Pause", &state.paused); + ImGui::SameLine(); + if (ImGui::Button("Reset")) reset_requested = true; - ImGui::Separator(); - ImGui::Checkbox("Show Bounds", &settings.show_bounds); - ImGui::Checkbox("Show Collider", &settings.show_collider); - ImGui::Checkbox("Show Velocity Plane", &settings.show_velocity_plane); - if (settings.show_velocity_plane) { - ImGui::SliderInt("Vector Grid", &settings.velocity_grid, 4, 48); - ImGui::SliderInt("Vector Steps", &settings.velocity_steps, 4, 96); - ImGui::SliderFloat("Vector Step", &settings.velocity_step, 0.10f, 3.0f, "%.2f"); - ImGui::SliderFloat("Min Speed", &settings.velocity_min_speed, 0.01f, 4.0f, "%.3f"); - ImGui::SliderFloat("Line Width", &settings.velocity_thickness, 0.5f, 4.0f, "%.2f"); + int view_mode = static_cast(settings.view_mode); + constexpr std::array view_labels{ + "Plane", + "Volume", + }; + if (ImGui::Combo("View", &view_mode, view_labels.data(), static_cast(view_labels.size()))) { + settings.view_mode = static_cast(view_mode); + reframe_requested = true; + } + + if (settings.view_mode == ViewMode::Plane || settings.show_velocity_plane) { + int plane_axis = static_cast(settings.plane_axis); + constexpr std::array plane_labels{ + "XY", + "XZ", + "YZ", + }; + if (ImGui::Combo("Axis", &plane_axis, plane_labels.data(), static_cast(plane_labels.size()))) { + settings.plane_axis = static_cast(plane_axis); + reframe_requested = true; } - } else { - ImGui::TextUnformatted("Field: None"); + ImGui::SliderFloat("Slice", &settings.slice_position, 0.0f, 1.0f, "%.3f"); } + if (ImGui::Checkbox("Velocity Plane", &settings.show_velocity_plane)) field_changed = true; + if (settings.show_velocity_plane) { + ImGui::SliderInt("Arrow Seeds", &settings.velocity_plane_seed_count, 4, 48); + ImGui::SliderFloat("Arrow Scale", &settings.velocity_plane_arrow_cells, 0.1f, 2.0f, "%.2f"); + ImGui::SliderFloat("Min Speed", &settings.velocity_plane_min_speed, 0.001f, 0.5f, "%.3f"); + ImGui::SliderFloat("Arrow Width", &settings.velocity_plane_thickness, 0.5f, 4.0f, "%.2f"); + } + + ImGui::Separator(); + ImGui::Text("Grid: %u x %u x %u", scene.grid.nx, scene.grid.ny, scene.grid.nz); + ImGui::Text("dt: %.5f h: %.4f", scene.dt, scene.grid.cell_size); + ImGui::Text("Field: %.*s", static_cast(field.label.size()), field.label.data()); + ImGui::Text("Steps: %llu", static_cast(scene.step_count)); + ImGui::Text("Step Call: %.3f ms", scene.last_step_call_ms); ImGui::End(); if (reframe_requested && snapshot) frame_content(settings, *snapshot); - - if (const ImGuiViewport* viewport = ImGui::GetMainViewport()) { - ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + 12.0f, viewport->Pos.y + 12.0f), ImGuiCond_Always); - ImGui::SetNextWindowBgAlpha(0.35f); - ImGuiWindowFlags overlay_flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoInputs; - ImGui::Begin("Render Stats Overlay", nullptr, overlay_flags); - ImGui::Text("Render: %.1f FPS", render_fps_); - ImGui::End(); - } } bool VisualizationApp::render_frame(const VisualizationSettings& settings, const std::optional& snapshot) { @@ -235,35 +271,35 @@ namespace app { const uint32_t image_index = acquire_result.image_index; const ImageLayout previous_layout = frames_.swapchain_image_layout[image_index]; const ImageMemoryBarrier2 to_color_barrier{ - .srcStageMask = previous_layout == ImageLayout::eUndefined ? PipelineStageFlagBits2::eNone : PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = previous_layout == ImageLayout::eUndefined ? AccessFlags2{} : (AccessFlagBits2::eMemoryRead | AccessFlagBits2::eMemoryWrite), - .dstStageMask = PipelineStageFlagBits2::eColorAttachmentOutput, - .dstAccessMask = AccessFlagBits2::eColorAttachmentWrite, - .oldLayout = previous_layout, - .newLayout = ImageLayout::eColorAttachmentOptimal, - .image = sc_.images[image_index], + .srcStageMask = previous_layout == ImageLayout::eUndefined ? PipelineStageFlagBits2::eNone : PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = previous_layout == ImageLayout::eUndefined ? AccessFlags2{} : (AccessFlagBits2::eMemoryRead | AccessFlagBits2::eMemoryWrite), + .dstStageMask = PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = previous_layout, + .newLayout = ImageLayout::eColorAttachmentOptimal, + .image = sc_.images[image_index], .subresourceRange = ImageSubresourceRange{ImageAspectFlagBits::eColor, 0, 1, 0, 1}, }; cmd.pipelineBarrier2(DependencyInfo{ .imageMemoryBarrierCount = 1, - .pImageMemoryBarriers = &to_color_barrier, + .pImageMemoryBarriers = &to_color_barrier, }); frames_.swapchain_image_layout[image_index] = ImageLayout::eColorAttachmentOptimal; ClearValue clear_value{}; - clear_value.color = ClearColorValue{std::array{0.035f, 0.04f, 0.05f, 1.0f}}; + clear_value.color = ClearColorValue{std::array{settings.background_bottom_r, settings.background_bottom_g, settings.background_bottom_b, 1.0f}}; RenderingAttachmentInfo color_attachment{ - .imageView = *sc_.image_views[image_index], + .imageView = *sc_.image_views[image_index], .imageLayout = ImageLayout::eColorAttachmentOptimal, - .loadOp = AttachmentLoadOp::eClear, - .storeOp = AttachmentStoreOp::eStore, - .clearValue = clear_value, + .loadOp = AttachmentLoadOp::eClear, + .storeOp = AttachmentStoreOp::eStore, + .clearValue = clear_value, }; RenderingInfo rendering_info{ - .renderArea = Rect2D{Offset2D{0, 0}, sc_.extent}, - .layerCount = 1, + .renderArea = Rect2D{Offset2D{0, 0}, sc_.extent}, + .layerCount = 1, .colorAttachmentCount = 1, - .pColorAttachments = &color_attachment, + .pColorAttachments = &color_attachment, }; cmd.beginRendering(rendering_info); @@ -277,15 +313,11 @@ namespace app { }); cmd.setScissor(0, Rect2D{{0, 0}, sc_.extent}); - ViewMode resolved_view = ViewMode::Plane; - PlaneAxis resolved_plane = settings.plane_axis; if (snapshot) { - resolved_view = settings.view_mode; - const auto& matrices = camera_.matrices(); + const auto& matrices = camera_.matrices(); const auto& camera_config = camera_.config(); - const float aspect = static_cast(sc_.extent.width) / static_cast((std::max)(sc_.extent.height, 1u)); - const float half_fov_tan = std::tan(camera_config.fov_y_rad * 0.5f); - const float slice_position = std::clamp(settings.slice_position, 0.0f, 1.0f); + const float aspect = static_cast(sc_.extent.width) / static_cast((std::max) (sc_.extent.height, 1u)); + const float half_fov_tan = std::tan(camera_config.fov_y_rad * 0.5f); FieldPushConstants push{}; push.eye = {matrices.eye.x, matrices.eye.y, matrices.eye.z, 1.0f}; push.right = {matrices.right.x, matrices.right.y, matrices.right.z, 0.0f}; @@ -293,13 +325,15 @@ namespace app { push.forward = {matrices.forward.x, matrices.forward.y, matrices.forward.z, 0.0f}; push.volume_min = {0.0f, 0.0f, 0.0f, 0.0f}; push.volume_max = {snapshot->grid.extent_x(), snapshot->grid.extent_y(), snapshot->grid.extent_z(), 0.0f}; - push.color_a = settings.render_mode == RenderMode::Smoke ? vk::math::vec4{} : vk::math::vec4{settings.scalar_low_r, settings.scalar_low_g, settings.scalar_low_b, 1.0f}; - push.color_b = settings.render_mode == RenderMode::Smoke ? vk::math::vec4{} : vk::math::vec4{settings.scalar_high_r, settings.scalar_high_g, settings.scalar_high_b, 1.0f}; + push.background_bottom = {settings.background_bottom_r, settings.background_bottom_g, settings.background_bottom_b, 1.0f}; + push.background_top = {settings.background_top_r, settings.background_top_g, settings.background_top_b, 1.0f}; + push.color_a = {settings.scalar_low_r, settings.scalar_low_g, settings.scalar_low_b, 1.0f}; + push.color_b = {settings.scalar_high_r, settings.scalar_high_g, settings.scalar_high_b, 1.0f}; push.params0 = { aspect, half_fov_tan, settings.density_scale, - settings.render_mode == RenderMode::Smoke ? settings.absorption : settings.scalar_opacity, + settings.scalar_opacity, }; push.params1 = { snapshot->grid.nx, @@ -308,19 +342,19 @@ namespace app { static_cast(settings.march_steps), }; push.params2 = { - static_cast(settings.render_mode), - snapshot->field.component_count, - static_cast(resolved_plane), static_cast(camera_config.projection), + static_cast(settings.plane_axis), + settings.shaded_volume ? 1u : 0u, + 0u, }; push.params3 = { settings.scalar_min, settings.scalar_max, - slice_position, + settings.slice_position, camera_config.ortho_height, }; - const auto& pipeline = resolved_view == ViewMode::Volume ? volume_pipeline_ : plane_pipeline_; + const auto& pipeline = settings.view_mode == ViewMode::Volume ? volume_pipeline_ : plane_pipeline_; cmd.bindPipeline(PipelineBindPoint::eGraphics, *pipeline.pipeline); cmd.bindDescriptorSets(PipelineBindPoint::eGraphics, *pipeline.layout, 0, {snapshot->field.descriptor_set}, {}); const ArrayProxy push_block(1, &push); @@ -329,7 +363,7 @@ namespace app { } cmd.endRendering(); - if (snapshot && (settings.show_bounds || (settings.show_collider && snapshot->collider.enabled) || (settings.show_velocity_plane && snapshot->velocity.data != nullptr))) { + if (snapshot && settings.show_velocity_plane && snapshot->velocity != nullptr) { if (ImGuiViewport* viewport = ImGui::GetMainViewport()) { ImDrawList* draw_list = ImGui::GetForegroundDrawList(viewport); const auto& view_proj = camera_.matrices().view_proj; @@ -345,25 +379,18 @@ namespace app { out.y = viewport->Pos.y + (1.0f - (ndc_y * 0.5f + 0.5f)) * viewport->Size.y; return true; }; - auto draw_segment = [&](const vk::math::vec3& a, const vk::math::vec3& b, const ImU32 color, const float thickness) { + auto draw_segment = [&](const vk::math::vec3& a, const vk::math::vec3& b, const ImU32 color) { ImVec2 screen_a{}; ImVec2 screen_b{}; if (!project_point(a, screen_a)) return; if (!project_point(b, screen_b)) return; - draw_list->AddLine(screen_a, screen_b, color, thickness); - }; - auto draw_box = [&](const std::array& corners, const ImU32 color, const float thickness) { - constexpr std::array, 12> edges{{ - {0, 1}, {1, 2}, {2, 3}, {3, 0}, - {4, 5}, {5, 6}, {6, 7}, {7, 4}, - {0, 4}, {1, 5}, {2, 6}, {3, 7}, - }}; - for (const auto& edge : edges) draw_segment(corners[static_cast(edge[0])], corners[static_cast(edge[1])], color, thickness); + draw_list->AddLine(screen_a, screen_b, color, settings.velocity_plane_thickness); }; auto sample_velocity = [&](const float px, const float py, const float pz) { const auto nx = static_cast(snapshot->grid.nx); const auto ny = static_cast(snapshot->grid.ny); const auto nz = static_cast(snapshot->grid.nz); + const auto cell_count = static_cast(nx) * static_cast(ny) * static_cast(nz); const float gx = std::clamp(px / snapshot->grid.cell_size - 0.5f, 0.0f, static_cast(nx - 1)); const float gy = std::clamp(py / snapshot->grid.cell_size - 0.5f, 0.0f, static_cast(ny - 1)); const float gz = std::clamp(pz / snapshot->grid.cell_size - 0.5f, 0.0f, static_cast(nz - 1)); @@ -376,16 +403,16 @@ namespace app { const float tx = gx - static_cast(x0); const float ty = gy - static_cast(y0); const float tz = gz - static_cast(z0); - const auto load = [&](const int x, const int y, const int z) { + auto load = [&](const int x, const int y, const int z) { const auto index = static_cast(x) + static_cast(nx) * (static_cast(y) + static_cast(ny) * static_cast(z)); return vk::math::vec3{ - snapshot->velocity.data[index * 3u + 0u], - snapshot->velocity.data[index * 3u + 1u], - snapshot->velocity.data[index * 3u + 2u], + snapshot->velocity[index], + snapshot->velocity[cell_count + index], + snapshot->velocity[cell_count * 2u + index], 0.0f, }; }; - const auto lerp3 = [&](const vk::math::vec3& a, const vk::math::vec3& b, const float t) { + auto lerp3 = [&](const vk::math::vec3& a, const vk::math::vec3& b, const float t) { return vk::math::vec3{ std::lerp(a.x, b.x, t), std::lerp(a.y, b.y, t), @@ -400,151 +427,116 @@ namespace app { return lerp3(lerp3(c00, c10, ty), lerp3(c01, c11, ty), tz); }; + const PlaneAxis plane_axis = settings.plane_axis; const float max_x = snapshot->grid.extent_x(); const float max_y = snapshot->grid.extent_y(); const float max_z = snapshot->grid.extent_z(); - if (settings.show_bounds) { - const std::array bounds_corners{ - vk::math::vec3{0.0f, 0.0f, 0.0f, 0.0f}, - vk::math::vec3{max_x, 0.0f, 0.0f, 0.0f}, - vk::math::vec3{max_x, max_y, 0.0f, 0.0f}, - vk::math::vec3{0.0f, max_y, 0.0f, 0.0f}, - vk::math::vec3{0.0f, 0.0f, max_z, 0.0f}, - vk::math::vec3{max_x, 0.0f, max_z, 0.0f}, - vk::math::vec3{max_x, max_y, max_z, 0.0f}, - vk::math::vec3{0.0f, max_y, max_z, 0.0f}, + const float slice_position = std::clamp(settings.slice_position, 0.0f, 1.0f); + const int seed_count = (std::max)(settings.velocity_plane_seed_count, 2); + const float min_speed = (std::max)(settings.velocity_plane_min_speed, 1.0e-5f); + const float arrow_scale = settings.velocity_plane_arrow_cells * snapshot->grid.cell_size; + std::array plane_corners{}; + if (plane_axis == PlaneAxis::XY) { + const float z = slice_position * max_z; + plane_corners = { + vk::math::vec3{0.0f, 0.0f, z, 0.0f}, + vk::math::vec3{max_x, 0.0f, z, 0.0f}, + vk::math::vec3{max_x, max_y, z, 0.0f}, + vk::math::vec3{0.0f, max_y, z, 0.0f}, }; - draw_box(bounds_corners, IM_COL32(236, 238, 244, 196), 1.6f); } - - if (settings.show_collider && snapshot->collider.enabled) { - if (snapshot->collider.type == 0u) { - constexpr int ring_segments = 48; - constexpr float tau = 6.28318530718f; - const vk::math::vec3 center{snapshot->collider.center_x, snapshot->collider.center_y, snapshot->collider.center_z, 0.0f}; - auto draw_ring = [&](const int plane) { - for (int i = 0; i < ring_segments; ++i) { - const float angle_a = tau * static_cast(i) / static_cast(ring_segments); - const float angle_b = tau * static_cast(i + 1) / static_cast(ring_segments); - const float cos_a = std::cos(angle_a); - const float sin_a = std::sin(angle_a); - const float cos_b = std::cos(angle_b); - const float sin_b = std::sin(angle_b); - vk::math::vec3 point_a{}; - vk::math::vec3 point_b{}; - if (plane == 0) { - point_a = {center.x + cos_a * snapshot->collider.radius, center.y + sin_a * snapshot->collider.radius, center.z, 0.0f}; - point_b = {center.x + cos_b * snapshot->collider.radius, center.y + sin_b * snapshot->collider.radius, center.z, 0.0f}; - } else if (plane == 1) { - point_a = {center.x + cos_a * snapshot->collider.radius, center.y, center.z + sin_a * snapshot->collider.radius, 0.0f}; - point_b = {center.x + cos_b * snapshot->collider.radius, center.y, center.z + sin_b * snapshot->collider.radius, 0.0f}; - } else { - point_a = {center.x, center.y + cos_a * snapshot->collider.radius, center.z + sin_a * snapshot->collider.radius, 0.0f}; - point_b = {center.x, center.y + cos_b * snapshot->collider.radius, center.z + sin_b * snapshot->collider.radius, 0.0f}; - } - draw_segment(point_a, point_b, IM_COL32(255, 176, 92, 224), 2.0f); - } + if (plane_axis == PlaneAxis::XZ) { + const float y = slice_position * max_y; + plane_corners = { + vk::math::vec3{0.0f, y, 0.0f, 0.0f}, + vk::math::vec3{max_x, y, 0.0f, 0.0f}, + vk::math::vec3{max_x, y, max_z, 0.0f}, + vk::math::vec3{0.0f, y, max_z, 0.0f}, + }; + } + if (plane_axis == PlaneAxis::YZ) { + const float x = slice_position * max_x; + plane_corners = { + vk::math::vec3{x, 0.0f, 0.0f, 0.0f}, + vk::math::vec3{x, max_y, 0.0f, 0.0f}, + vk::math::vec3{x, max_y, max_z, 0.0f}, + vk::math::vec3{x, 0.0f, max_z, 0.0f}, + }; + } + draw_segment(plane_corners[0], plane_corners[1], IM_COL32(112, 220, 255, 120)); + draw_segment(plane_corners[1], plane_corners[2], IM_COL32(112, 220, 255, 120)); + draw_segment(plane_corners[2], plane_corners[3], IM_COL32(112, 220, 255, 120)); + draw_segment(plane_corners[3], plane_corners[0], IM_COL32(112, 220, 255, 120)); + + for (int j = 0; j < seed_count; ++j) { + for (int i = 0; i < seed_count; ++i) { + const float u = (static_cast(i) + 0.5f) / static_cast(seed_count); + const float v = (static_cast(j) + 0.5f) / static_cast(seed_count); + vk::math::vec3 pos{}; + if (plane_axis == PlaneAxis::XY) pos = {u * max_x, v * max_y, slice_position * max_z, 0.0f}; + if (plane_axis == PlaneAxis::XZ) pos = {u * max_x, slice_position * max_y, v * max_z, 0.0f}; + if (plane_axis == PlaneAxis::YZ) pos = {slice_position * max_x, u * max_y, v * max_z, 0.0f}; + const auto velocity = sample_velocity(pos.x, pos.y, pos.z); + vk::math::vec3 plane_velocity{}; + if (plane_axis == PlaneAxis::XY) plane_velocity = {velocity.x, velocity.y, 0.0f, 0.0f}; + if (plane_axis == PlaneAxis::XZ) plane_velocity = {velocity.x, 0.0f, velocity.z, 0.0f}; + if (plane_axis == PlaneAxis::YZ) plane_velocity = {0.0f, velocity.y, velocity.z, 0.0f}; + const float speed = std::sqrt(plane_velocity.x * plane_velocity.x + plane_velocity.y * plane_velocity.y + plane_velocity.z * plane_velocity.z); + if (speed < min_speed) continue; + const float inv_speed = 1.0f / speed; + const vk::math::vec3 direction{ + plane_velocity.x * inv_speed, + plane_velocity.y * inv_speed, + plane_velocity.z * inv_speed, + 0.0f, }; - draw_ring(0); - draw_ring(1); - draw_ring(2); - } else { - const float min_x = snapshot->collider.center_x - snapshot->collider.half_x; - const float min_y = snapshot->collider.center_y - snapshot->collider.half_y; - const float min_z = snapshot->collider.center_z - snapshot->collider.half_z; - const float max_cx = snapshot->collider.center_x + snapshot->collider.half_x; - const float max_cy = snapshot->collider.center_y + snapshot->collider.half_y; - const float max_cz = snapshot->collider.center_z + snapshot->collider.half_z; - const std::array collider_corners{ - vk::math::vec3{min_x, min_y, min_z, 0.0f}, - vk::math::vec3{max_cx, min_y, min_z, 0.0f}, - vk::math::vec3{max_cx, max_cy, min_z, 0.0f}, - vk::math::vec3{min_x, max_cy, min_z, 0.0f}, - vk::math::vec3{min_x, min_y, max_cz, 0.0f}, - vk::math::vec3{max_cx, min_y, max_cz, 0.0f}, - vk::math::vec3{max_cx, max_cy, max_cz, 0.0f}, - vk::math::vec3{min_x, max_cy, max_cz, 0.0f}, + vk::math::vec3 side{}; + if (plane_axis == PlaneAxis::XY) side = {-direction.y, direction.x, 0.0f, 0.0f}; + if (plane_axis == PlaneAxis::XZ) side = {-direction.z, 0.0f, direction.x, 0.0f}; + if (plane_axis == PlaneAxis::YZ) side = {0.0f, -direction.z, direction.y, 0.0f}; + const float speed_t = std::clamp(speed / (min_speed * 8.0f), 0.0f, 1.0f); + const float glyph_length = arrow_scale * std::lerp(0.35f, 1.0f, speed_t); + const float head_length = glyph_length * 0.34f; + const float wing_span = head_length * 0.55f; + vk::math::vec3 tip{ + pos.x + direction.x * glyph_length, + pos.y + direction.y * glyph_length, + pos.z + direction.z * glyph_length, + 0.0f, }; - draw_box(collider_corners, IM_COL32(255, 176, 92, 224), 2.0f); - } - } - - if (settings.show_velocity_plane && snapshot->velocity.data != nullptr) { - const float slice_position = std::clamp(settings.slice_position, 0.0f, 1.0f); - const int seed_count = (std::max)(settings.velocity_grid, 2); - const int step_count = (std::max)(settings.velocity_steps, 1); - const float step_scale = settings.velocity_step * snapshot->grid.cell_size; - std::array plane_corners{}; - if (resolved_plane == PlaneAxis::XY) { - const float z = slice_position * max_z; - plane_corners = { - vk::math::vec3{0.0f, 0.0f, z, 0.0f}, - vk::math::vec3{max_x, 0.0f, z, 0.0f}, - vk::math::vec3{max_x, max_y, z, 0.0f}, - vk::math::vec3{0.0f, max_y, z, 0.0f}, + tip.x = std::clamp(tip.x, 0.0f, max_x); + tip.y = std::clamp(tip.y, 0.0f, max_y); + tip.z = std::clamp(tip.z, 0.0f, max_z); + if (plane_axis == PlaneAxis::XY) tip.z = pos.z; + if (plane_axis == PlaneAxis::XZ) tip.y = pos.y; + if (plane_axis == PlaneAxis::YZ) tip.x = pos.x; + const vk::math::vec3 head_base{ + tip.x - direction.x * head_length, + tip.y - direction.y * head_length, + tip.z - direction.z * head_length, + 0.0f, }; - } else if (resolved_plane == PlaneAxis::XZ) { - const float y = slice_position * max_y; - plane_corners = { - vk::math::vec3{0.0f, y, 0.0f, 0.0f}, - vk::math::vec3{max_x, y, 0.0f, 0.0f}, - vk::math::vec3{max_x, y, max_z, 0.0f}, - vk::math::vec3{0.0f, y, max_z, 0.0f}, + const vk::math::vec3 head_left{ + head_base.x + side.x * wing_span, + head_base.y + side.y * wing_span, + head_base.z + side.z * wing_span, + 0.0f, }; - } else { - const float x = slice_position * max_x; - plane_corners = { - vk::math::vec3{x, 0.0f, 0.0f, 0.0f}, - vk::math::vec3{x, max_y, 0.0f, 0.0f}, - vk::math::vec3{x, max_y, max_z, 0.0f}, - vk::math::vec3{x, 0.0f, max_z, 0.0f}, + const vk::math::vec3 head_right{ + head_base.x - side.x * wing_span, + head_base.y - side.y * wing_span, + head_base.z - side.z * wing_span, + 0.0f, }; - } - draw_segment(plane_corners[0], plane_corners[1], IM_COL32(112, 220, 255, 120), 1.0f); - draw_segment(plane_corners[1], plane_corners[2], IM_COL32(112, 220, 255, 120), 1.0f); - draw_segment(plane_corners[2], plane_corners[3], IM_COL32(112, 220, 255, 120), 1.0f); - draw_segment(plane_corners[3], plane_corners[0], IM_COL32(112, 220, 255, 120), 1.0f); - - for (int j = 0; j < seed_count; ++j) { - for (int i = 0; i < seed_count; ++i) { - const float u = (static_cast(i) + 0.5f) / static_cast(seed_count); - const float v = (static_cast(j) + 0.5f) / static_cast(seed_count); - vk::math::vec3 pos{}; - if (resolved_plane == PlaneAxis::XY) pos = {u * max_x, v * max_y, slice_position * max_z, 0.0f}; - if (resolved_plane == PlaneAxis::XZ) pos = {u * max_x, slice_position * max_y, v * max_z, 0.0f}; - if (resolved_plane == PlaneAxis::YZ) pos = {slice_position * max_x, u * max_y, v * max_z, 0.0f}; - for (int step = 0; step < step_count; ++step) { - const auto velocity = sample_velocity(pos.x, pos.y, pos.z); - vk::math::vec3 plane_velocity{}; - if (resolved_plane == PlaneAxis::XY) plane_velocity = {velocity.x, velocity.y, 0.0f, 0.0f}; - if (resolved_plane == PlaneAxis::XZ) plane_velocity = {velocity.x, 0.0f, velocity.z, 0.0f}; - if (resolved_plane == PlaneAxis::YZ) plane_velocity = {0.0f, velocity.y, velocity.z, 0.0f}; - const float speed = std::sqrt(plane_velocity.x * plane_velocity.x + plane_velocity.y * plane_velocity.y + plane_velocity.z * plane_velocity.z); - if (speed < settings.velocity_min_speed) break; - const float inv_speed = 1.0f / speed; - vk::math::vec3 next{ - pos.x + plane_velocity.x * inv_speed * step_scale, - pos.y + plane_velocity.y * inv_speed * step_scale, - pos.z + plane_velocity.z * inv_speed * step_scale, - 0.0f, - }; - next.x = std::clamp(next.x, 0.0f, max_x); - next.y = std::clamp(next.y, 0.0f, max_y); - next.z = std::clamp(next.z, 0.0f, max_z); - if (resolved_plane == PlaneAxis::XY) next.z = pos.z; - if (resolved_plane == PlaneAxis::XZ) next.y = pos.y; - if (resolved_plane == PlaneAxis::YZ) next.x = pos.x; - const float speed_t = std::clamp(speed / ((std::max)(settings.velocity_min_speed, 1.0e-4f) * 8.0f), 0.0f, 1.0f); - const ImU32 color = IM_COL32( - static_cast(std::lerp(72.0f, 255.0f, speed_t)), - static_cast(std::lerp(196.0f, 212.0f, speed_t)), - static_cast(std::lerp(255.0f, 96.0f, speed_t)), - static_cast(std::lerp(112.0f, 224.0f, speed_t)) - ); - draw_segment(pos, next, color, settings.velocity_thickness); - pos = next; - } - } + const ImU32 color = IM_COL32( + static_cast(std::lerp(72.0f, 255.0f, speed_t)), + static_cast(std::lerp(196.0f, 212.0f, speed_t)), + static_cast(std::lerp(255.0f, 96.0f, speed_t)), + static_cast(std::lerp(112.0f, 224.0f, speed_t)) + ); + draw_segment(pos, tip, color); + draw_segment(head_left, tip, color); + draw_segment(head_right, tip, color); } } } @@ -559,17 +551,17 @@ namespace app { imgui::render(imgui_sys_, cmd, sc_.extent, *sc_.image_views[image_index], ImageLayout::eColorAttachmentOptimal); const ImageMemoryBarrier2 to_present_barrier{ - .srcStageMask = PipelineStageFlagBits2::eColorAttachmentOutput, - .srcAccessMask = AccessFlagBits2::eColorAttachmentWrite, - .dstStageMask = PipelineStageFlagBits2::eBottomOfPipe, - .oldLayout = ImageLayout::eColorAttachmentOptimal, - .newLayout = ImageLayout::ePresentSrcKHR, - .image = sc_.images[image_index], + .srcStageMask = PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = AccessFlagBits2::eColorAttachmentWrite, + .dstStageMask = PipelineStageFlagBits2::eBottomOfPipe, + .oldLayout = ImageLayout::eColorAttachmentOptimal, + .newLayout = ImageLayout::ePresentSrcKHR, + .image = sc_.images[image_index], .subresourceRange = ImageSubresourceRange{ImageAspectFlagBits::eColor, 0, 1, 0, 1}, }; cmd.pipelineBarrier2(DependencyInfo{ .imageMemoryBarrierCount = 1, - .pImageMemoryBarriers = &to_present_barrier, + .pImageMemoryBarriers = &to_present_barrier, }); frames_.swapchain_image_layout[image_index] = ImageLayout::ePresentSrcKHR; @@ -578,7 +570,7 @@ namespace app { if (snapshot && snapshot->field.timeline_semaphore) { volume_waits[0] = SemaphoreSubmitInfo{ .semaphore = snapshot->field.timeline_semaphore, - .value = snapshot->field.ready_generation, + .value = snapshot->field.ready_generation, .stageMask = PipelineStageFlagBits2::eFragmentShader, }; extra_waits = std::span(volume_waits.data(), volume_waits.size()); @@ -594,38 +586,38 @@ namespace app { void VisualizationApp::frame_content(const VisualizationSettings& settings, const VisualizationSnapshotView& snapshot) { auto update_camera_config = [&](const vk::camera::Projection projection, const float ortho_height) { - auto camera_config = camera_.config(); - camera_config.projection = projection; + auto camera_config = camera_.config(); + camera_config.projection = projection; camera_config.ortho_height = ortho_height; camera_.set_config(camera_config); }; - const auto resolved_view = settings.view_mode; - const float center_x = snapshot.grid.extent_x() * 0.5f; - const float center_y = snapshot.grid.extent_y() * 0.5f; - const float center_z = snapshot.grid.extent_z() * 0.5f; + const float center_x = snapshot.grid.extent_x() * 0.5f; + const float center_y = snapshot.grid.extent_y() * 0.5f; + const float center_z = snapshot.grid.extent_z() * 0.5f; vk::camera::CameraState camera_state = camera_.state(); - camera_state.mode = vk::camera::Mode::Orbit; - camera_state.orbit.target = {center_x, center_y, center_z, 0.0f}; - camera_state.orbit.distance = snapshot.grid.max_extent() * 1.5f; + camera_state.mode = vk::camera::Mode::Orbit; + camera_state.orbit.target = {center_x, center_y, center_z, 0.0f}; + camera_state.orbit.distance = snapshot.grid.max_extent() * 1.35f; - if (resolved_view == ViewMode::Plane) { + if (settings.view_mode == ViewMode::Plane) { update_camera_config(vk::camera::Projection::Orthographic, snapshot.grid.max_extent() * 1.1f); - const PlaneAxis plane_axis = settings.plane_axis; - if (plane_axis == PlaneAxis::XY) { - camera_state.orbit.yaw_rad = 0.0f; + if (settings.plane_axis == PlaneAxis::XY) { + camera_state.orbit.yaw_rad = 0.0f; camera_state.orbit.pitch_rad = 0.0f; - } else if (plane_axis == PlaneAxis::XZ) { - camera_state.orbit.yaw_rad = 0.0f; + } + if (settings.plane_axis == PlaneAxis::XZ) { + camera_state.orbit.yaw_rad = 0.0f; camera_state.orbit.pitch_rad = -1.55334303427f; - } else { - camera_state.orbit.yaw_rad = 1.57079632679f; + } + if (settings.plane_axis == PlaneAxis::YZ) { + camera_state.orbit.yaw_rad = 1.57079632679f; camera_state.orbit.pitch_rad = 0.0f; } } else { update_camera_config(vk::camera::Projection::Perspective, snapshot.grid.max_extent()); - camera_state.orbit.distance = snapshot.grid.max_extent() * 1.15f; - camera_state.orbit.yaw_rad = 0.0f; + camera_state.orbit.distance = snapshot.grid.max_extent() * 1.10f; + camera_state.orbit.yaw_rad = 0.0f; camera_state.orbit.pitch_rad = 0.0f; } camera_.set_state(camera_state); @@ -642,9 +634,9 @@ namespace app { std::vector VisualizationApp::allocate_field_descriptor_sets(const uint32_t count) { std::vector field_layouts(count, *field_set_layout_); vk::DescriptorSetAllocateInfo field_alloc_info{ - .descriptorPool = *field_descriptor_pool_, + .descriptorPool = *field_descriptor_pool_, .descriptorSetCount = count, - .pSetLayouts = field_layouts.data(), + .pSetLayouts = field_layouts.data(), }; return vkctx_.device.allocateDescriptorSets(field_alloc_info); } @@ -662,515 +654,28 @@ namespace app { sctx_.resize_requested = false; } - void VisualizationApp::collect_camera_input(const float dt_seconds) { - double mouse_x = 0.0; - double mouse_y = 0.0; - glfwGetCursorPos(window_, &mouse_x, &mouse_y); - - float mouse_dx = 0.0f; - float mouse_dy = 0.0f; - if (window_state_.first_mouse) { - window_state_.first_mouse = false; - } else { - mouse_dx = static_cast(mouse_x - window_state_.last_x); - mouse_dy = static_cast(mouse_y - window_state_.last_y); - } - window_state_.last_x = mouse_x; - window_state_.last_y = mouse_y; - - auto& io = ImGui::GetIO(); - vk::camera::CameraInput camera_input{}; - if (!io.WantCaptureMouse) { - camera_input.lmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_LEFT) == GLFW_PRESS; - camera_input.mmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_MIDDLE) == GLFW_PRESS; - camera_input.rmb = glfwGetMouseButton(window_, GLFW_MOUSE_BUTTON_RIGHT) == GLFW_PRESS; - camera_input.mouse_dx = mouse_dx; - camera_input.mouse_dy = mouse_dy; - camera_input.scroll = window_state_.scroll; - } - if (!io.WantCaptureKeyboard) { - camera_input.forward = glfwGetKey(window_, GLFW_KEY_W) == GLFW_PRESS; - camera_input.backward = glfwGetKey(window_, GLFW_KEY_S) == GLFW_PRESS; - camera_input.left = glfwGetKey(window_, GLFW_KEY_A) == GLFW_PRESS; - camera_input.right = glfwGetKey(window_, GLFW_KEY_D) == GLFW_PRESS; - camera_input.up = glfwGetKey(window_, GLFW_KEY_E) == GLFW_PRESS; - camera_input.down = glfwGetKey(window_, GLFW_KEY_Q) == GLFW_PRESS; - camera_input.shift = glfwGetKey(window_, GLFW_KEY_LEFT_SHIFT) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_SHIFT) == GLFW_PRESS; - camera_input.ctrl = glfwGetKey(window_, GLFW_KEY_LEFT_CONTROL) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_CONTROL) == GLFW_PRESS; - camera_input.alt = glfwGetKey(window_, GLFW_KEY_LEFT_ALT) == GLFW_PRESS || glfwGetKey(window_, GLFW_KEY_RIGHT_ALT) == GLFW_PRESS; - camera_input.space = glfwGetKey(window_, GLFW_KEY_SPACE) == GLFW_PRESS; - } - window_state_.scroll = 0.0f; - - camera_.update(dt_seconds, sc_.extent.width, sc_.extent.height, camera_input); - } - namespace { constexpr uint32_t snapshot_slot_count = 4; - constexpr std::array boundary_labels{ - "No-slip", - "Free-slip", - "Inflow", - "Outflow", - }; - - constexpr std::array collider_type_labels{ - "Sphere", - "Box", - }; - - constexpr std::array scene_preset_labels{ - "Dual Jet Collider", - "Smoke Plume", - "Custom", - }; - - constexpr std::array field_catalog_storage{ - FieldInfo{ - .id = FieldId::SmokeColor, - .label = "Smoke Color", - .component_count = 4, - .semantic = FieldSemantic::DyeColor, - .preset = { - .display_mode = FieldDisplayMode::Smoke, - .density_scale = 10.0f, - .absorption = 3.0f, - .scalar_min = 0.0f, - .scalar_max = 1.0f, - .scalar_opacity = 2.0f, - .scalar_low_r = 0.08f, - .scalar_low_g = 0.18f, - .scalar_low_b = 0.46f, - .scalar_high_r = 0.98f, - .scalar_high_g = 0.82f, - .scalar_high_b = 0.24f, - }, - }, - FieldInfo{ - .id = FieldId::Density, - .label = "Density", - .component_count = 1, - .semantic = FieldSemantic::Density, - .preset = { - .display_mode = FieldDisplayMode::Scalar, - .density_scale = 10.0f, - .absorption = 3.0f, - .scalar_min = 0.0f, - .scalar_max = 1.40f, - .scalar_opacity = 3.00f, - .scalar_low_r = 0.10f, - .scalar_low_g = 0.08f, - .scalar_low_b = 0.30f, - .scalar_high_r = 1.00f, - .scalar_high_g = 0.24f, - .scalar_high_b = 0.74f, - }, - }, - FieldInfo{ - .id = FieldId::VelocityMagnitude, - .label = "Velocity Magnitude", - .component_count = 1, - .semantic = FieldSemantic::VelocityMagnitude, - .preset = { - .display_mode = FieldDisplayMode::Scalar, - .density_scale = 10.0f, - .absorption = 3.0f, - .scalar_min = 0.0f, - .scalar_max = 1.20f, - .scalar_opacity = 1.80f, - .scalar_low_r = 0.06f, - .scalar_low_g = 0.10f, - .scalar_low_b = 0.42f, - .scalar_high_r = 0.18f, - .scalar_high_g = 0.88f, - .scalar_high_b = 1.00f, - }, - }, - FieldInfo{ - .id = FieldId::SolidMask, - .label = "Solid Mask", - .component_count = 1, - .semantic = FieldSemantic::GenericScalar, - .preset = { - .display_mode = FieldDisplayMode::Scalar, - .density_scale = 1.0f, - .absorption = 1.20f, - .scalar_min = 0.0f, - .scalar_max = 1.0f, - .scalar_opacity = 3.2f, - .scalar_low_r = 0.05f, - .scalar_low_g = 0.06f, - .scalar_low_b = 0.07f, - .scalar_high_r = 0.94f, - .scalar_high_g = 0.92f, - .scalar_high_b = 0.88f, - }, - }, - FieldInfo{ - .id = FieldId::Pressure, - .label = "Pressure", - .component_count = 1, - .semantic = FieldSemantic::GenericScalar, - .preset = { - .display_mode = FieldDisplayMode::Scalar, - .density_scale = 1.0f, - .absorption = 1.20f, - .scalar_min = -0.12f, - .scalar_max = 0.12f, - .scalar_opacity = 2.20f, - .scalar_low_r = 0.06f, - .scalar_low_g = 0.10f, - .scalar_low_b = 0.42f, - .scalar_high_r = 0.18f, - .scalar_high_g = 0.88f, - .scalar_high_b = 1.00f, - }, - }, - FieldInfo{ - .id = FieldId::Divergence, - .label = "Divergence", - .component_count = 1, - .semantic = FieldSemantic::GenericScalar, - .preset = { - .display_mode = FieldDisplayMode::Scalar, - .density_scale = 1.0f, - .absorption = 1.20f, - .scalar_min = -120.0f, - .scalar_max = 120.0f, - .scalar_opacity = 1.80f, - .scalar_low_r = 0.06f, - .scalar_low_g = 0.10f, - .scalar_low_b = 0.42f, - .scalar_high_r = 0.18f, - .scalar_high_g = 0.88f, - .scalar_high_b = 1.00f, - }, - }, - }; - - struct CaptureRequest { - GridShape grid{}; - uint32_t field_component_count = 1; - FieldSemantic semantic = FieldSemantic::GenericScalar; - std::string_view label{}; - bool export_velocity_host = false; - }; - - struct CaptureResources { - void* field_cuda_ptr = nullptr; - void* velocity_cuda_ptr = nullptr; - float* velocity_host_ptr = nullptr; - cudaExternalSemaphore_t external_semaphore = nullptr; - uint64_t ready_generation = 0; - }; - - void check_cuda(const cudaError_t status, const std::string_view what) { - if (status == cudaSuccess) return; - throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); - } - - void check_stable(const StableFluidsResult code, const std::string_view what) { - if (code == STABLE_FLUIDS_RESULT_OK) return; - throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); - } - - uint64_t field_bytes_for(const CaptureRequest& request) { - const uint64_t nx = request.grid.nx; - const uint64_t ny = request.grid.ny; - const uint64_t nz = static_cast((std::max)(request.grid.nz, 1u)); - return nx * ny * nz * static_cast((std::max)(request.field_component_count, 1u)) * sizeof(float); - } - - uint64_t velocity_bytes_for(const CaptureRequest& request) { - if (!request.export_velocity_host) return 0; - const uint64_t nx = request.grid.nx; - const uint64_t ny = request.grid.ny; - const uint64_t nz = static_cast((std::max)(request.grid.nz, 1u)); - return nx * ny * nz * 3ull * sizeof(float); - } - - StableFluidsGridDesc physics_grid_desc(const AppData& data) { - StableFluidsGridDesc desc{}; - check_stable(stable_fluids_get_grid_desc_cuda(data.physics.context, &desc), "stable_fluids_get_grid_desc_cuda"); - return desc; - } - - ColliderOverlay collider_overlay(const AppState& state) { - return ColliderOverlay{ - .enabled = state.physics.scene.collider.enabled, - .type = static_cast(state.physics.scene.collider.type), - .center_x = state.physics.scene.collider.center_x, - .center_y = state.physics.scene.collider.center_y, - .center_z = state.physics.scene.collider.center_z, - .radius = state.physics.scene.collider.radius, - .half_x = state.physics.scene.collider.half_extent_x, - .half_y = state.physics.scene.collider.half_extent_y, - .half_z = state.physics.scene.collider.half_extent_z, - }; - } - - void export_field(const AppState& state, const AppData& data, const FieldId field, void* destination) { - if (field == FieldId::SmokeColor) { - check_stable(stable_fluids_export_alpha_rgb_rgba_cuda(data.physics.context, data.physics.density_field, data.physics.dye_field, destination), "stable_fluids_export_alpha_rgb_rgba_cuda"); - return; - } - if (field == FieldId::Density) { - check_stable(stable_fluids_export_field_components_cuda(data.physics.context, data.physics.density_field, 0, 1, destination), "stable_fluids_export_field_components_cuda"); - return; - } - if (field == FieldId::VelocityMagnitude) { - check_stable(stable_fluids_export_velocity_magnitude_cuda(data.physics.context, destination), "stable_fluids_export_velocity_magnitude_cuda"); - return; - } - if (field == FieldId::SolidMask) { - check_stable(stable_fluids_export_solid_mask_cuda(data.physics.context, destination), "stable_fluids_export_solid_mask_cuda"); - return; - } - if (field == FieldId::Pressure) { - check_stable(stable_fluids_export_pressure_cuda(data.physics.context, destination), "stable_fluids_export_pressure_cuda"); - return; - } - check_stable(stable_fluids_export_divergence_cuda(data.physics.context, destination), "stable_fluids_export_divergence_cuda"); - } - - void export_velocity(const AppData& data, void* destination) { - check_stable(stable_fluids_export_velocity_cuda(data.physics.context, destination), "stable_fluids_export_velocity_cuda"); - } - - CaptureRequest make_capture_request(AppState& state, const AppData& data) { - const auto grid = physics_grid_desc(data); - const auto& field = current_field_info(state); - return CaptureRequest{ - .grid = { - .nx = static_cast(grid.nx), - .ny = static_cast(grid.ny), - .nz = static_cast((std::max)(grid.nz, 1)), - .cell_size = grid.cell_size, - }, - .field_component_count = field.component_count, - .semantic = field.semantic, - .label = field.label, - .export_velocity_host = state.ui.render.show_velocity_plane, - }; - } - - bool capture_matches_request(const AppData& data, const CaptureRequest& request) { - return !data.capture.slots.empty() - && data.capture.request_grid.nx == request.grid.nx - && data.capture.request_grid.ny == request.grid.ny - && data.capture.request_grid.nz == request.grid.nz - && data.capture.request_grid.cell_size == request.grid.cell_size - && data.capture.request_field_component_count == request.field_component_count - && data.capture.request_export_velocity_host == request.export_velocity_host; - } - - int find_available_capture_slot(const AppData& data, const uint32_t frames_in_flight) { - for (uint32_t slot_index = 0; slot_index < data.capture.slots.size(); ++slot_index) { - const auto& slot = data.capture.slots[slot_index]; - if (static_cast(slot_index) == data.capture.active_slot) continue; - if (slot.ready_generation != 0 && data.capture.submit_serial < slot.last_used_submit_serial + frames_in_flight + 1) continue; - return static_cast(slot_index); - } - return -1; - } - - CaptureResources begin_capture(AppData& data, const int slot_index) { - auto& slot = data.capture.slots.at(static_cast(slot_index)); - return CaptureResources{ - .field_cuda_ptr = slot.field_cuda_ptr, - .velocity_cuda_ptr = slot.velocity_cuda_ptr, - .velocity_host_ptr = slot.velocity_host.empty() ? nullptr : slot.velocity_host.data(), - .external_semaphore = slot.external_semaphore, - .ready_generation = data.capture.generation + 1, - }; - } - - void complete_capture(AppData& data, const int slot_index, const CaptureRequest& request, const double capture_ms) { - auto& slot = data.capture.slots.at(static_cast(slot_index)); - slot.ready_generation = data.capture.generation + 1; - slot.grid = request.grid; - slot.field_component_count = request.field_component_count; - slot.semantic = request.semantic; - slot.label = request.label; - slot.has_velocity_host = request.export_velocity_host; - data.capture.generation = slot.ready_generation; - data.capture.active_slot = slot_index; - data.capture.steps_since_snapshot = 0; - data.capture.stats.last_snapshot_ms = capture_ms; - ++data.capture.stats.snapshot_count; - data.capture.stats.average_snapshot_ms += (capture_ms - data.capture.stats.average_snapshot_ms) / static_cast(data.capture.stats.snapshot_count); - } - - void destroy_capture_storage(AppData& data) { - for (auto& slot : data.capture.slots) { - if (slot.field_cuda_ptr != nullptr) cudaFree(slot.field_cuda_ptr); - if (slot.velocity_cuda_ptr != nullptr) cudaFree(slot.velocity_cuda_ptr); - if (slot.external_semaphore != nullptr) cudaDestroyExternalSemaphore(slot.external_semaphore); - if (slot.external_memory != nullptr) cudaDestroyExternalMemory(slot.external_memory); - slot = {}; - } - data.capture = {}; - } - - void upload_scene(const AppState& state, AppData& data) { - StableFluidsSceneDesc scene_desc{ - .colliders = nullptr, - .collider_count = 0, - }; - StableFluidsColliderDesc collider{ - .collider_type = static_cast(state.physics.scene.collider.type == ColliderType::Sphere ? STABLE_FLUIDS_COLLIDER_SPHERE : STABLE_FLUIDS_COLLIDER_BOX), - .velocity_boundary_type = state.physics.scene.collider.boundary, - .center_x = state.physics.scene.collider.center_x, - .center_y = state.physics.scene.collider.center_y, - .center_z = state.physics.scene.collider.center_z, - .radius = state.physics.scene.collider.radius, - .half_extent_x = state.physics.scene.collider.half_extent_x, - .half_extent_y = state.physics.scene.collider.half_extent_y, - .half_extent_z = state.physics.scene.collider.half_extent_z, - .linear_velocity_x = state.physics.scene.collider.velocity_x, - .linear_velocity_y = state.physics.scene.collider.velocity_y, - .linear_velocity_z = state.physics.scene.collider.velocity_z, - }; - if (state.physics.scene.collider.enabled) { - scene_desc.colliders = &collider; - scene_desc.collider_count = 1; - } - check_stable(stable_fluids_update_scene_cuda(data.physics.context, &scene_desc), "stable_fluids_update_scene_cuda"); - } - } // namespace - std::span field_catalog() { - return field_catalog_storage; - } - - const FieldInfo& current_field_info(AppState& state) { - auto& selected = state.physics.selected_field; - selected = std::clamp(selected, 0, static_cast(field_catalog_storage.size()) - 1); - return field_catalog_storage[static_cast(selected)]; - } - - void apply_scene_preset(AppState& state, const ScenePreset preset) { - const int selected_field = state.physics.selected_field; - state.physics = {}; - state.physics.preset = preset; - state.physics.selected_field = selected_field; - state.physics.scene.collider = ColliderSettings{ - .enabled = true, - .type = ColliderType::Sphere, - .center_x = 0.50f, - .center_y = 0.40f, - .center_z = 0.50f, - .radius = 0.15f, - .half_extent_x = 0.10f, - .half_extent_y = 0.08f, - .half_extent_z = 0.10f, - .velocity_x = 0.0f, - .velocity_y = 0.0f, - .velocity_z = 0.0f, - .boundary = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), - }; - - if (preset == ScenePreset::SmokePlume) { - state.physics.solver.backend.pressure_iterations = 120; - state.physics.solver.backend.domain_boundary = { - .x_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP), .velocity = 0.0f, }, - .x_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP), .velocity = 0.0f, }, - .y_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), .velocity = 0.0f, }, - .y_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .z_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP), .velocity = 0.0f, }, - .z_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_FREE_SLIP), .velocity = 0.0f, }, - }; - state.physics.solver.density_diffusion = 0.00001f; - state.physics.solver.dye_diffusion = 0.000008f; - state.physics.solver.buoyancy_beta = 0.65f; - state.physics.scene.collider.enabled = false; - state.physics.emitters.a = SourceEmitterSettings{ - .enabled = true, - .center_x = 0.50f, - .center_y = 0.04f, - .center_z = 0.50f, - .speed = 0.03f, - .radius = 0.035f, - .density_amount = 1.40f, - .dye_amount = 1.10f, - .color_r = 0.95f, - .color_g = 0.92f, - .color_b = 0.86f, - }; - state.physics.emitters.b = {}; - return; - } - - state.physics.solver.density_diffusion = 0.00003f; - state.physics.solver.dye_diffusion = 0.000015f; - state.physics.solver.buoyancy_beta = 0.12f; - state.physics.emitters.a = SourceEmitterSettings{ - .enabled = true, - .center_x = 0.1f, - .center_y = 0.1f, - .center_z = 0.5f, - .direction_x = 1.0f, - .direction_y = 0.0f, - .direction_z = 0.0f, - .speed = 0.62f, - .radius = 0.045f, - .density_amount = 0.80f, - .dye_amount = 0.95f, - .color_r = 1.00f, - .color_g = 0.20f, - .color_b = 0.72f, - }; - state.physics.emitters.b = SourceEmitterSettings{ - .enabled = true, - .center_x = 0.9f, - .center_y = 0.1f, - .center_z = 0.5f, - .direction_x = -1.0f, - .direction_y = 0.0f, - .direction_z = 0.0f, - .speed = 0.62f, - .radius = 0.045f, - .density_amount = 0.80f, - .dye_amount = 0.95f, - .color_r = 0.12f, - .color_g = 0.38f, - .color_b = 1.00f, - }; - } - - void apply_field_visual_preset(AppState& state) { - const auto& preset = current_field_info(state).preset; - auto& render = state.ui.render; - render.render_mode = preset.display_mode == FieldDisplayMode::Smoke ? RenderMode::Smoke : RenderMode::Scalar; - render.density_scale = preset.density_scale; - render.absorption = preset.absorption; - render.scalar_min = preset.scalar_min; - render.scalar_max = preset.scalar_max; - render.scalar_opacity = preset.scalar_opacity; - render.scalar_low_r = preset.scalar_low_r; - render.scalar_low_g = preset.scalar_low_g; - render.scalar_low_b = preset.scalar_low_b; - render.scalar_high_r = preset.scalar_high_r; - render.scalar_high_g = preset.scalar_high_g; - render.scalar_high_b = preset.scalar_high_b; - } - - void create_runtime_data(AppData& data) { - destroy_runtime_data(data); - check_cuda(cudaStreamCreateWithFlags(&data.physics.stream, cudaStreamNonBlocking), "cudaStreamCreateWithFlags"); - } - void destroy_runtime_data(AppData& data) { - destroy_capture_storage(data); - if (data.physics.context != nullptr) stable_fluids_destroy_context_cuda(data.physics.context); - if (data.physics.stream != nullptr) cudaStreamDestroy(data.physics.stream); - data.physics = {}; + for (auto& slot : data.capture.slots) { + if (slot.field_cuda_ptr != nullptr) cudaFree(slot.field_cuda_ptr); + if (slot.velocity_cuda_ptr != nullptr) cudaFree(slot.velocity_cuda_ptr); + if (slot.external_semaphore != nullptr) cudaDestroyExternalSemaphore(slot.external_semaphore); + if (slot.external_memory != nullptr) cudaDestroyExternalMemory(slot.external_memory); + slot = {}; + } + data.capture = {}; } void check_interop_support(const VisualizationApp& renderer) { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; const auto timeline_features = renderer.vk_context().physical_device.getFeatures2(); if (!timeline_features.get().timelineSemaphore) throw std::runtime_error("stable-fluids visualizer requires Vulkan timeline semaphore support"); int cuda_device_index = 0; @@ -1180,170 +685,52 @@ namespace app { if (timeline_supported == 0) throw std::runtime_error("CUDA timeline semaphore interop is required"); } - void rebuild_physics(AppState& state, AppData& data) { - const float extent_x = static_cast(state.physics.solver.backend.nx) * state.physics.solver.backend.cell_size; - const float extent_y = static_cast(state.physics.solver.backend.ny) * state.physics.solver.backend.cell_size; - const float extent_z = static_cast((std::max)(state.physics.solver.backend.nz, 1)) * state.physics.solver.backend.cell_size; - const float min_extent = (std::min)({extent_x, extent_y, extent_z}); - - auto clamp_emitter = [&](SourceEmitterSettings& emitter) { - emitter.center_x = std::clamp(emitter.center_x, 0.0f, extent_x); - emitter.center_y = std::clamp(emitter.center_y, 0.0f, extent_y); - emitter.center_z = std::clamp(emitter.center_z, 0.0f, extent_z); - emitter.radius = std::clamp(emitter.radius, state.physics.solver.backend.cell_size, min_extent * 0.25f); - emitter.speed = std::max(emitter.speed, 0.0f); - }; - clamp_emitter(state.physics.emitters.a); - clamp_emitter(state.physics.emitters.b); - auto& collider = state.physics.scene.collider; - collider.center_x = std::clamp(collider.center_x, 0.0f, extent_x); - collider.center_y = std::clamp(collider.center_y, 0.0f, extent_y); - collider.center_z = std::clamp(collider.center_z, 0.0f, extent_z); - collider.radius = std::clamp(collider.radius, state.physics.solver.backend.cell_size, min_extent * 0.45f); - collider.half_extent_x = std::clamp(collider.half_extent_x, state.physics.solver.backend.cell_size, extent_x * 0.45f); - collider.half_extent_y = std::clamp(collider.half_extent_y, state.physics.solver.backend.cell_size, extent_y * 0.45f); - collider.half_extent_z = std::clamp(collider.half_extent_z, state.physics.solver.backend.cell_size, extent_z * 0.45f); - - if (data.physics.context != nullptr) { - check_stable(stable_fluids_destroy_context_cuda(data.physics.context), "stable_fluids_destroy_context_cuda"); - data.physics.context = nullptr; - } - data.physics.density_field = 0; - data.physics.dye_field = 0; - - std::array fields{ - StableFluidsFieldCreateDesc{ - .name = "density", - .component_count = 1, - .flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE, - .diffusion = state.physics.solver.density_diffusion, - .extension_mode = static_cast(STABLE_FLUIDS_FIELD_EXTENSION_STREAK), - .default_value_0 = 0.0f, - .default_value_1 = 0.0f, - .default_value_2 = 0.0f, - .default_value_3 = 0.0f, - }, - StableFluidsFieldCreateDesc{ - .name = "dye", - .component_count = 3, - .flags = STABLE_FLUIDS_FIELD_ADVECT | STABLE_FLUIDS_FIELD_DIFFUSE, - .diffusion = state.physics.solver.dye_diffusion, - .extension_mode = static_cast(STABLE_FLUIDS_FIELD_EXTENSION_STREAK), - .default_value_0 = 0.0f, - .default_value_1 = 0.0f, - .default_value_2 = 0.0f, - .default_value_3 = 0.0f, - }, - }; - const float buoyancy_weight = -state.physics.solver.gravity_y * state.physics.solver.buoyancy_beta; - std::array buoyancy_terms{ - StableFluidsBuoyancyDesc{ - .field_index = 0, - .weight = buoyancy_weight, - .ambient = state.physics.solver.ambient_density, - }, - }; - const uint32_t buoyancy_term_count = std::abs(buoyancy_weight) > 1.0e-6f ? static_cast(buoyancy_terms.size()) : 0u; - std::array field_handles{}; - StableFluidsContextCreateDesc create_desc{ - .config = state.physics.solver.backend, - .stream = data.physics.stream, - .fields = fields.data(), - .field_count = static_cast(fields.size()), - .buoyancy_terms = buoyancy_term_count > 0 ? buoyancy_terms.data() : nullptr, - .buoyancy_term_count = buoyancy_term_count, - }; - check_stable(stable_fluids_create_context_cuda(&create_desc, &data.physics.context, field_handles.data(), static_cast(field_handles.size())), "stable_fluids_create_context_cuda"); - data.physics.density_field = field_handles[0]; - data.physics.dye_field = field_handles[1]; - data.physics.stats = {}; - - upload_scene(state, data); + void apply_field_preset(VisualizationSettings& settings, const FieldVisualPreset& preset) { + settings.density_scale = preset.density_scale; + settings.scalar_min = preset.scalar_min; + settings.scalar_max = preset.scalar_max; + settings.scalar_opacity = preset.scalar_opacity; + settings.scalar_low_r = preset.scalar_low_r; + settings.scalar_low_g = preset.scalar_low_g; + settings.scalar_low_b = preset.scalar_low_b; + settings.scalar_high_r = preset.scalar_high_r; + settings.scalar_high_g = preset.scalar_high_g; + settings.scalar_high_b = preset.scalar_high_b; + settings.shaded_volume = preset.shaded_volume; } - void step_physics(const AppState& state, AppData& data, const int sim_steps) { - std::array velocity_sources{}; - std::array field_sources{}; - uint32_t velocity_source_count = 0; - uint32_t field_source_count = 0; - const auto append_emitter = [&](const SourceEmitterSettings& emitter) { - if (!emitter.enabled) return; - const float dir_len = std::sqrt(emitter.direction_x * emitter.direction_x + emitter.direction_y * emitter.direction_y + emitter.direction_z * emitter.direction_z); - const float inv_len = dir_len > 1.0e-5f ? 1.0f / dir_len : 1.0f; - const float dir_x = dir_len > 1.0e-5f ? emitter.direction_x * inv_len : 0.0f; - const float dir_y = dir_len > 1.0e-5f ? emitter.direction_y * inv_len : 1.0f; - const float dir_z = dir_len > 1.0e-5f ? emitter.direction_z * inv_len : 0.0f; - velocity_sources[velocity_source_count++] = { - .center_x = emitter.center_x, - .center_y = emitter.center_y, - .center_z = emitter.center_z, - .radius = emitter.radius, - .velocity_x = dir_x * emitter.speed, - .velocity_y = dir_y * emitter.speed, - .velocity_z = dir_z * emitter.speed, - }; - field_sources[field_source_count++] = { - .field = data.physics.density_field, - .center_x = emitter.center_x, - .center_y = emitter.center_y, - .center_z = emitter.center_z, - .radius = emitter.radius, - .value_0 = emitter.density_amount, - .value_1 = 0.0f, - .value_2 = 0.0f, - .value_3 = 0.0f, - }; - field_sources[field_source_count++] = { - .field = data.physics.dye_field, - .center_x = emitter.center_x, - .center_y = emitter.center_y, - .center_z = emitter.center_z, - .radius = emitter.radius, - .value_0 = emitter.dye_amount * emitter.color_r, - .value_1 = emitter.dye_amount * emitter.color_g, - .value_2 = emitter.dye_amount * emitter.color_b, - .value_3 = 0.0f, - }; + bool sync_capture_storage(AppData& data, VisualizationApp& renderer, const GridShape& grid, const bool with_velocity_plane) { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); }; - append_emitter(state.physics.emitters.a); - append_emitter(state.physics.emitters.b); - - for (int step_index = 0; step_index < sim_steps; ++step_index) { - StableFluidsStepDesc step_desc{ - .velocity_sources = state.physics.emit_source ? velocity_sources.data() : nullptr, - .velocity_source_count = state.physics.emit_source ? velocity_source_count : 0u, - .field_sources = state.physics.emit_source ? field_sources.data() : nullptr, - .field_source_count = state.physics.emit_source ? field_source_count : 0u, - }; - const auto begin = std::chrono::steady_clock::now(); - check_stable(stable_fluids_step_cuda(data.physics.context, &step_desc), "stable_fluids_step_cuda"); - const auto elapsed_ms = std::chrono::duration(std::chrono::steady_clock::now() - begin).count(); - data.physics.stats.last_step_call_ms = elapsed_ms; - ++data.physics.stats.step_count; - data.physics.stats.average_step_call_ms += (elapsed_ms - data.physics.stats.average_step_call_ms) / static_cast(data.physics.stats.step_count); - } - if (sim_steps <= 0) return; - StableFluidsProjectionMetrics metrics{}; - check_stable(stable_fluids_get_projection_metrics_cuda(data.physics.context, &metrics), "stable_fluids_get_projection_metrics_cuda"); - data.physics.stats.projection_max_abs_divergence = metrics.max_abs_divergence; - data.physics.stats.projection_rms_divergence = metrics.rms_divergence; - } + const GridShape request_grid{ + .nx = grid.nx, + .ny = grid.ny, + .nz = grid.nz, + .cell_size = grid.cell_size, + }; + const bool matches = !data.capture.slots.empty() && data.capture.request_grid.nx == request_grid.nx && data.capture.request_grid.ny == request_grid.ny && data.capture.request_grid.nz == request_grid.nz && data.capture.request_grid.cell_size == request_grid.cell_size && data.capture.has_velocity_storage == with_velocity_plane; + if (matches) return false; - bool sync_capture_storage(AppState& state, AppData& data, VisualizationApp& renderer) { - const auto request = make_capture_request(state, data); - if (capture_matches_request(data, request)) return false; renderer.vk_context().device.waitIdle(); - destroy_capture_storage(data); - data.capture.request_grid = request.grid; - data.capture.request_field_component_count = request.field_component_count; - data.capture.request_export_velocity_host = request.export_velocity_host; - data.capture.field_bytes = field_bytes_for(request); - data.capture.velocity_bytes = velocity_bytes_for(request); + for (auto& slot : data.capture.slots) { + if (slot.field_cuda_ptr != nullptr) cudaFree(slot.field_cuda_ptr); + if (slot.velocity_cuda_ptr != nullptr) cudaFree(slot.velocity_cuda_ptr); + if (slot.external_semaphore != nullptr) cudaDestroyExternalSemaphore(slot.external_semaphore); + if (slot.external_memory != nullptr) cudaDestroyExternalMemory(slot.external_memory); + slot = {}; + } + data.capture = {}; + data.capture.request_grid = request_grid; + data.capture.has_velocity_storage = with_velocity_plane; + const auto field_bytes = static_cast(request_grid.nx) * static_cast(request_grid.ny) * static_cast((std::max) (request_grid.nz, 1u)) * sizeof(float); + const auto velocity_bytes = field_bytes * 3u; auto descriptor_sets = renderer.allocate_field_descriptor_sets(snapshot_slot_count); data.capture.slots.reserve(descriptor_sets.size()); for (size_t slot_index = 0; slot_index < descriptor_sets.size(); ++slot_index) { - auto& slot = data.capture.slots.emplace_back(); + auto& slot = data.capture.slots.emplace_back(); slot.descriptor_set = std::move(descriptor_sets[slot_index]); #if defined(_WIN32) constexpr auto memory_handle_type = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueWin32; @@ -1354,10 +741,10 @@ namespace app { #endif vk::SemaphoreTypeCreateInfo timeline_semaphore_ci{ .semaphoreType = vk::SemaphoreType::eTimeline, - .initialValue = 0, + .initialValue = 0, }; vk::ExportSemaphoreCreateInfo export_semaphore_ci{ - .pNext = &timeline_semaphore_ci, + .pNext = &timeline_semaphore_ci, .handleTypes = semaphore_handle_type, }; vk::SemaphoreCreateInfo semaphore_ci{ @@ -1369,19 +756,19 @@ namespace app { .handleTypes = memory_handle_type, }; vk::BufferCreateInfo buffer_ci{ - .pNext = &external_buffer_ci, - .size = data.capture.field_bytes, - .usage = vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc, + .pNext = &external_buffer_ci, + .size = field_bytes, + .usage = vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc, .sharingMode = vk::SharingMode::eExclusive, }; - slot.buffer = vk::raii::Buffer{renderer.vk_context().device, buffer_ci}; + slot.buffer = vk::raii::Buffer{renderer.vk_context().device, buffer_ci}; const vk::MemoryRequirements requirements = slot.buffer.getMemoryRequirements(); vk::ExportMemoryAllocateInfo export_memory_ci{ .handleTypes = memory_handle_type, }; vk::MemoryAllocateInfo alloc_ci{ - .pNext = &export_memory_ci, - .allocationSize = requirements.size, + .pNext = &export_memory_ci, + .allocationSize = requirements.size, .memoryTypeIndex = vk::memory::find_memory_type(renderer.vk_context().physical_device, requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal), }; slot.memory = vk::raii::DeviceMemory{renderer.vk_context().device, alloc_ci}; @@ -1389,118 +776,111 @@ namespace app { #if defined(_WIN32) vk::MemoryGetWin32HandleInfoKHR memory_handle_info{ - .memory = *slot.memory, + .memory = *slot.memory, .handleType = memory_handle_type, }; HANDLE memory_handle = renderer.vk_context().device.getMemoryWin32HandleKHR(memory_handle_info); cudaExternalMemoryHandleDesc external_memory_desc{ .type = cudaExternalMemoryHandleTypeOpaqueWin32, - .handle = { .win32 = { .handle = memory_handle, }, }, + .handle = + { + .win32 = + { + .handle = memory_handle, + }, + }, .size = requirements.size, }; check_cuda(cudaImportExternalMemory(&slot.external_memory, &external_memory_desc), "cudaImportExternalMemory"); CloseHandle(memory_handle); vk::SemaphoreGetWin32HandleInfoKHR semaphore_handle_info{ - .semaphore = *slot.timeline_semaphore, + .semaphore = *slot.timeline_semaphore, .handleType = semaphore_handle_type, }; HANDLE semaphore_handle = renderer.vk_context().device.getSemaphoreWin32HandleKHR(semaphore_handle_info); cudaExternalSemaphoreHandleDesc external_semaphore_desc{ .type = cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32, - .handle = { .win32 = { .handle = semaphore_handle, }, }, + .handle = + { + .win32 = + { + .handle = semaphore_handle, + }, + }, }; check_cuda(cudaImportExternalSemaphore(&slot.external_semaphore, &external_semaphore_desc), "cudaImportExternalSemaphore"); CloseHandle(semaphore_handle); #else vk::MemoryGetFdInfoKHR memory_handle_info{ - .memory = *slot.memory, + .memory = *slot.memory, .handleType = memory_handle_type, }; const int memory_fd = renderer.vk_context().device.getMemoryFdKHR(memory_handle_info); cudaExternalMemoryHandleDesc external_memory_desc{ .type = cudaExternalMemoryHandleTypeOpaqueFd, - .handle = { .fd = memory_fd, }, + .handle = + { + .fd = memory_fd, + }, .size = requirements.size, }; check_cuda(cudaImportExternalMemory(&slot.external_memory, &external_memory_desc), "cudaImportExternalMemory"); vk::SemaphoreGetFdInfoKHR semaphore_handle_info{ - .semaphore = *slot.timeline_semaphore, + .semaphore = *slot.timeline_semaphore, .handleType = semaphore_handle_type, }; const int semaphore_fd = renderer.vk_context().device.getSemaphoreFdKHR(semaphore_handle_info); cudaExternalSemaphoreHandleDesc external_semaphore_desc{ .type = cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd, - .handle = { .fd = semaphore_fd, }, + .handle = + { + .fd = semaphore_fd, + }, }; check_cuda(cudaImportExternalSemaphore(&slot.external_semaphore, &external_semaphore_desc), "cudaImportExternalSemaphore"); #endif cudaExternalMemoryBufferDesc buffer_desc{ .offset = 0, - .size = data.capture.field_bytes, + .size = field_bytes, }; check_cuda(cudaExternalMemoryGetMappedBuffer(&slot.field_cuda_ptr, slot.external_memory, &buffer_desc), "cudaExternalMemoryGetMappedBuffer"); - if (data.capture.velocity_bytes != 0) { - check_cuda(cudaMalloc(&slot.velocity_cuda_ptr, data.capture.velocity_bytes), "cudaMalloc velocity snapshot"); - slot.velocity_host.resize(static_cast(data.capture.velocity_bytes / sizeof(float))); + if (with_velocity_plane) { + check_cuda(cudaMalloc(&slot.velocity_cuda_ptr, velocity_bytes), "cudaMalloc velocity snapshot"); + slot.velocity_host.resize(static_cast(velocity_bytes / sizeof(float))); } vk::DescriptorBufferInfo field_info{ .buffer = *slot.buffer, .offset = 0, - .range = data.capture.field_bytes, + .range = field_bytes, }; vk::WriteDescriptorSet field_write{ - .dstSet = *slot.descriptor_set, - .dstBinding = 0, + .dstSet = *slot.descriptor_set, + .dstBinding = 0, .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageBuffer, - .pBufferInfo = &field_info, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &field_info, }; renderer.vk_context().device.updateDescriptorSets(field_write, {}); } return true; } - bool capture_snapshot(AppState& state, AppData& data, VisualizationApp& renderer, const char* tag) { - const int slot_index = find_available_capture_slot(data, renderer.frames_in_flight()); - if (slot_index < 0) return false; - nvtx3::scoped_range range{tag}; - const auto request = make_capture_request(state, data); - const auto begin = std::chrono::steady_clock::now(); - const auto capture = begin_capture(data, slot_index); - export_field(state, data, current_field_info(state).id, capture.field_cuda_ptr); - if (request.export_velocity_host) { - export_velocity(data, capture.velocity_cuda_ptr); - check_cuda(cudaMemcpyAsync(capture.velocity_host_ptr, capture.velocity_cuda_ptr, data.capture.velocity_bytes, cudaMemcpyDeviceToHost, data.physics.stream), "cudaMemcpyAsync velocity snapshot"); - } - cudaExternalSemaphoreSignalParams signal_params{}; - signal_params.params.fence.value = capture.ready_generation; - check_cuda(cudaSignalExternalSemaphoresAsync(&capture.external_semaphore, &signal_params, 1, data.physics.stream), "cudaSignalExternalSemaphoresAsync"); - check_cuda(cudaStreamSynchronize(data.physics.stream), "cudaStreamSynchronize"); - complete_capture(data, slot_index, request, std::chrono::duration(std::chrono::steady_clock::now() - begin).count()); - return true; - } - - std::optional active_snapshot(const AppState& state, const AppData& data) { + std::optional active_snapshot(const AppData& data) { if (data.capture.active_slot < 0) return std::nullopt; const auto& slot = data.capture.slots.at(static_cast(data.capture.active_slot)); return VisualizationSnapshotView{ - .grid = slot.grid, - .field = { - .descriptor_set = *slot.descriptor_set, - .timeline_semaphore = slot.external_semaphore != nullptr ? *slot.timeline_semaphore : vk::Semaphore{}, - .ready_generation = slot.ready_generation, - .component_count = slot.field_component_count, - .semantic = slot.semantic, - .label = slot.label, - }, - .collider = collider_overlay(state), - .velocity = { - .data = slot.has_velocity_host ? slot.velocity_host.data() : nullptr, - }, + .grid = data.capture.request_grid, + .field = + { + .descriptor_set = *slot.descriptor_set, + .timeline_semaphore = slot.external_semaphore != nullptr ? *slot.timeline_semaphore : vk::Semaphore{}, + .ready_generation = slot.ready_generation, + }, + .velocity = slot.has_velocity_host && !slot.velocity_host.empty() ? slot.velocity_host.data() : nullptr, }; } @@ -1510,185 +890,76 @@ namespace app { data.capture.submit_serial = next_submit_serial; } - void draw_simulation_controls(AppState& state, const AppData& data, bool& reset_requested, bool& field_changed) { - ImGui::Begin("Simulation"); - auto& physics = state.physics; - auto& solver = physics.solver; - auto& playback = state.ui.playback; - const float extent_x = static_cast(solver.backend.nx) * solver.backend.cell_size; - const float extent_y = static_cast(solver.backend.ny) * solver.backend.cell_size; - const float extent_z = static_cast(solver.backend.nz) * solver.backend.cell_size; - const float min_extent = (std::min)({extent_x, extent_y, extent_z}); - auto request_scene_reset = [&]() { - if (physics.preset != ScenePreset::Custom) physics.preset = ScenePreset::Custom; - reset_requested = true; - }; - - if (ImGui::BeginCombo("Field", current_field_info(state).label.data())) { - for (int i = 0; i < static_cast(field_catalog_storage.size()); ++i) { - const bool is_selected = physics.selected_field == i; - if (ImGui::Selectable(field_catalog_storage[static_cast(i)].label.data(), is_selected)) { - physics.selected_field = i; - field_changed = true; - } - if (is_selected) ImGui::SetItemDefaultFocus(); - } - ImGui::EndCombo(); - } - - int scene_preset = std::clamp(static_cast(physics.preset), 0, static_cast(scene_preset_labels.size()) - 1); - if (ImGui::BeginCombo("Scene Preset", scene_preset_labels[static_cast(scene_preset)])) { - for (int i = 0; i < static_cast(scene_preset_labels.size()); ++i) { - const bool is_selected = scene_preset == i; - if (ImGui::Selectable(scene_preset_labels[static_cast(i)], is_selected)) { - if (i < static_cast(ScenePreset::Custom)) { - apply_scene_preset(state, static_cast(i)); - reset_requested = true; - } else { - physics.preset = ScenePreset::Custom; - } + int run_scene_switcher(const std::span scenes) { + AppState state{}; + AppData data{}; + std::unique_ptr renderer{}; + try { + if (scenes.empty()) throw std::runtime_error("run_scene_switcher requires at least one scene"); + renderer = std::make_unique(); + check_interop_support(*renderer); + + std::vector scene_labels{}; + scene_labels.reserve(scenes.size()); + for (const auto& scene : scenes) scene_labels.push_back(scene.label); + + renderer->vk_context().device.waitIdle(); + for (auto& scene : scenes) scene.rebuild(); + + auto activate_scene = [&](const bool reframe) { + auto& scene = scenes[static_cast(state.selected_scene)]; + state.selected_field = 0; + state.render = scene.default_visualization(); + sync_capture_storage(data, *renderer, scene.info().grid, state.render.show_velocity_plane); + capture_snapshot(state, data, scene, *renderer); + if (!reframe) return; + if (const auto snapshot = active_snapshot(data)) renderer->frame_content(state.render, *snapshot); + }; + + activate_scene(true); + while (!renderer->should_close()) { + renderer->begin_frame(); + + bool reset_requested = false; + bool field_changed = false; + bool scene_changed = false; + auto& scene = scenes[static_cast(state.selected_scene)]; + auto snapshot = active_snapshot(data); + renderer->draw_visualization_ui(state, scene.info(), scene.fields(), scene_labels, reset_requested, field_changed, scene_changed, snapshot); + + if (scene_changed) { + renderer->vk_context().device.waitIdle(); + activate_scene(true); + } else if (reset_requested) { + renderer->vk_context().device.waitIdle(); + scene.rebuild(); + sync_capture_storage(data, *renderer, scene.info().grid, state.render.show_velocity_plane); + capture_snapshot(state, data, scene, *renderer); + snapshot = active_snapshot(data); + if (snapshot) renderer->frame_content(state.render, *snapshot); + } else { + sync_capture_storage(data, *renderer, scene.info().grid, state.render.show_velocity_plane); + if (!state.paused) scene.step(1); + if (!state.paused || field_changed || !snapshot) capture_snapshot(state, data, scene, *renderer); } - if (is_selected) ImGui::SetItemDefaultFocus(); - } - ImGui::EndCombo(); - } - - ImGui::Checkbox("Pause Simulation", &playback.paused); - if (ImGui::Button("Single Step")) playback.step_once = true; - ImGui::SameLine(); - if (ImGui::Button("Reset Backend")) reset_requested = true; - ImGui::SliderInt("Sim Steps / Frame", &playback.sim_steps_per_frame, 1, 8); - ImGui::SliderInt("Snapshot Interval", &playback.snapshot_interval, 1, 8); - ImGui::Separator(); - ImGui::TextUnformatted("Solver"); - ImGui::Text("Step calls: %llu", static_cast(data.physics.stats.step_count)); - ImGui::Text("Last step call: %.3f ms", data.physics.stats.last_step_call_ms); - ImGui::Text("Avg step call: %.3f ms", data.physics.stats.average_step_call_ms); - ImGui::Text("Projection max |div|: %.6g", data.physics.stats.projection_max_abs_divergence); - ImGui::Text("Projection RMS div: %.6g", data.physics.stats.projection_rms_divergence); - ImGui::Text("Snapshot commits: %llu", static_cast(data.capture.stats.snapshot_count)); - ImGui::Text("Last snapshot: %.3f ms", data.capture.stats.last_snapshot_ms); - ImGui::Text("Avg snapshot: %.3f ms", data.capture.stats.average_snapshot_ms); - - ImGui::Separator(); - ImGui::TextUnformatted("Grid / Time"); - ImGui::Text("Domain Size: %.3f m x %.3f m x %.3f m", extent_x, extent_y, extent_z); - if (ImGui::SliderInt("Grid X (cells)", &solver.backend.nx, 16, 512)) request_scene_reset(); - if (ImGui::SliderInt("Grid Y (cells)", &solver.backend.ny, 16, 512)) request_scene_reset(); - if (ImGui::SliderInt("Grid Z (cells)", &solver.backend.nz, 16, 512)) request_scene_reset(); - if (ImGui::SliderFloat("Dt (s)", &solver.backend.dt, 1.0f / 480.0f, 1.0f / 24.0f, "%.5f")) request_scene_reset(); - if (ImGui::SliderFloat("Cell Size (m)", &solver.backend.cell_size, 0.0025f, 0.05f, "%.4f")) request_scene_reset(); - if (ImGui::SliderFloat("Viscosity (m^2/s)", &solver.backend.viscosity, 0.0f, 0.002f, "%.5f")) request_scene_reset(); - if (ImGui::SliderFloat("Density Diffusion (m^2/s)", &solver.density_diffusion, 0.0f, 0.002f, "%.5f")) request_scene_reset(); - if (ImGui::SliderFloat("Dye Diffusion (m^2/s)", &solver.dye_diffusion, 0.0f, 0.002f, "%.5f")) request_scene_reset(); - if (ImGui::SliderInt("Diffuse Iterations", &solver.backend.diffuse_iterations, 1, 64)) request_scene_reset(); - if (ImGui::SliderInt("Pressure Iterations", &solver.backend.pressure_iterations, 4, 192)) request_scene_reset(); - - auto draw_boundary_combo = [&](const char* label, StableFluidsBoundaryFaceDesc& face) { - int boundary = std::clamp(static_cast(face.type), 0, static_cast(boundary_labels.size()) - 1); - if (ImGui::BeginCombo(label, boundary_labels[static_cast(boundary)])) { - for (int i = 0; i < static_cast(boundary_labels.size()); ++i) { - const bool is_selected = boundary == i; - if (ImGui::Selectable(boundary_labels[static_cast(i)], is_selected)) { - boundary = i; - face.type = static_cast(i); - request_scene_reset(); - } - if (is_selected) ImGui::SetItemDefaultFocus(); - } - ImGui::EndCombo(); + snapshot = active_snapshot(data); + const bool submitted = renderer->render_frame(state.render, snapshot); + if (submitted) mark_snapshot_submitted(data); } - }; - - ImGui::Separator(); - ImGui::TextUnformatted("Domain Boundary"); - draw_boundary_combo("Boundary X-", solver.backend.domain_boundary.x_min); - draw_boundary_combo("Boundary X+", solver.backend.domain_boundary.x_max); - draw_boundary_combo("Boundary Y-", solver.backend.domain_boundary.y_min); - draw_boundary_combo("Boundary Y+", solver.backend.domain_boundary.y_max); - draw_boundary_combo("Boundary Z-", solver.backend.domain_boundary.z_min); - draw_boundary_combo("Boundary Z+", solver.backend.domain_boundary.z_max); - if (ImGui::SliderFloat("Inflow Vel X- (m/s)", &solver.backend.domain_boundary.x_min.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat("Inflow Vel X+ (m/s)", &solver.backend.domain_boundary.x_max.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat("Inflow Vel Y- (m/s)", &solver.backend.domain_boundary.y_min.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat("Inflow Vel Y+ (m/s)", &solver.backend.domain_boundary.y_max.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat("Inflow Vel Z- (m/s)", &solver.backend.domain_boundary.z_min.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat("Inflow Vel Z+ (m/s)", &solver.backend.domain_boundary.z_max.velocity, -4.0f, 4.0f, "%.2f")) request_scene_reset(); - ImGui::Separator(); - ImGui::TextUnformatted("Forces"); - if (ImGui::SliderFloat("Gravity Y (m/s^2)", &solver.gravity_y, -20.0f, 20.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Buoyancy Beta", &solver.buoyancy_beta, 0.0f, 2.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Ambient Density", &solver.ambient_density, 0.0f, 2.0f, "%.3f")) request_scene_reset(); - ImGui::Text("Buoyancy accel / density: %.3f m/s^2", -solver.gravity_y * solver.buoyancy_beta); - if (ImGui::SliderFloat("Uniform Force X (m/s^2)", &solver.backend.uniform_force_x, -20.0f, 20.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Uniform Force Y (m/s^2)", &solver.backend.uniform_force_y, -20.0f, 20.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Uniform Force Z (m/s^2)", &solver.backend.uniform_force_z, -20.0f, 20.0f, "%.3f")) request_scene_reset(); - - ImGui::Separator(); - ImGui::TextUnformatted("Sources"); - if (ImGui::Checkbox("Emit Source", &physics.emit_source)) request_scene_reset(); - auto draw_emitter_controls = [&](const char* label, SourceEmitterSettings& emitter) { - if (!ImGui::TreeNode(label)) return; - if (ImGui::Checkbox((std::string("Enabled##") + label).c_str(), &emitter.enabled)) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Center X (m)##") + label).c_str(), &emitter.center_x, 0.0f, extent_x, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Center Y (m)##") + label).c_str(), &emitter.center_y, 0.0f, extent_y, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Center Z (m)##") + label).c_str(), &emitter.center_z, 0.0f, extent_z, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Dir X##") + label).c_str(), &emitter.direction_x, -1.0f, 1.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Dir Y##") + label).c_str(), &emitter.direction_y, -1.0f, 1.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Dir Z##") + label).c_str(), &emitter.direction_z, -1.0f, 1.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Speed (m/s)##") + label).c_str(), &emitter.speed, 0.0f, 5.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Radius (m)##") + label).c_str(), &emitter.radius, solver.backend.cell_size, min_extent * 0.25f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Density##") + label).c_str(), &emitter.density_amount, 0.0f, 3.0f, "%.2f")) request_scene_reset(); - if (ImGui::SliderFloat((std::string("Dye##") + label).c_str(), &emitter.dye_amount, 0.0f, 3.0f, "%.2f")) request_scene_reset(); - if (ImGui::ColorEdit3((std::string("Color##") + label).c_str(), &emitter.color_r)) request_scene_reset(); - ImGui::TreePop(); - }; - draw_emitter_controls("Emitter A", physics.emitters.a); - draw_emitter_controls("Emitter B", physics.emitters.b); - - ImGui::Separator(); - ImGui::TextUnformatted("Collider"); - if (ImGui::Checkbox("Enable Collider", &physics.scene.collider.enabled)) request_scene_reset(); - int collider_type = std::clamp(static_cast(physics.scene.collider.type), 0, static_cast(collider_type_labels.size()) - 1); - if (ImGui::BeginCombo("Collider Type", collider_type_labels[static_cast(collider_type)])) { - for (int i = 0; i < static_cast(collider_type_labels.size()); ++i) { - const bool is_selected = collider_type == i; - if (ImGui::Selectable(collider_type_labels[static_cast(i)], is_selected)) { - physics.scene.collider.type = static_cast(i); - request_scene_reset(); - } - if (is_selected) ImGui::SetItemDefaultFocus(); - } - ImGui::EndCombo(); - } - int collider_boundary = std::clamp(static_cast(physics.scene.collider.boundary), 0, 1); - if (ImGui::BeginCombo("Collider Boundary", boundary_labels[static_cast(collider_boundary)])) { - for (int i = 0; i < 2; ++i) { - const bool is_selected = collider_boundary == i; - if (ImGui::Selectable(boundary_labels[static_cast(i)], is_selected)) { - physics.scene.collider.boundary = static_cast(i); - request_scene_reset(); - } - if (is_selected) ImGui::SetItemDefaultFocus(); + renderer->vk_context().device.waitIdle(); + destroy_runtime_data(data); + return 0; + } catch (const std::exception& e) { + try { + if (renderer) renderer->vk_context().device.waitIdle(); + } catch (...) { } - ImGui::EndCombo(); - } - if (ImGui::SliderFloat("Collider X (m)", &physics.scene.collider.center_x, 0.0f, extent_x, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Collider Y (m)", &physics.scene.collider.center_y, 0.0f, extent_y, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Collider Z (m)", &physics.scene.collider.center_z, 0.0f, extent_z, "%.3f")) request_scene_reset(); - if (physics.scene.collider.type == ColliderType::Sphere) { - if (ImGui::SliderFloat("Collider Radius (m)", &physics.scene.collider.radius, solver.backend.cell_size, min_extent * 0.45f, "%.3f")) request_scene_reset(); - } else { - if (ImGui::SliderFloat("Half Extent X (m)", &physics.scene.collider.half_extent_x, solver.backend.cell_size, extent_x * 0.45f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Half Extent Y (m)", &physics.scene.collider.half_extent_y, solver.backend.cell_size, extent_y * 0.45f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Half Extent Z (m)", &physics.scene.collider.half_extent_z, solver.backend.cell_size, extent_z * 0.45f, "%.3f")) request_scene_reset(); + destroy_runtime_data(data); + std::fprintf(stderr, "%s\n", e.what()); + return 1; } - if (ImGui::SliderFloat("Collider Vel X (m/s)", &physics.scene.collider.velocity_x, -3.0f, 3.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Collider Vel Y (m/s)", &physics.scene.collider.velocity_y, -3.0f, 3.0f, "%.3f")) request_scene_reset(); - if (ImGui::SliderFloat("Collider Vel Z (m/s)", &physics.scene.collider.velocity_z, -3.0f, 3.0f, "%.3f")) request_scene_reset(); - ImGui::End(); } } // namespace app diff --git a/vulkan-app/app.ixx b/vulkan-app/app.ixx index 6dd4376..7ae0a1c 100644 --- a/vulkan-app/app.ixx +++ b/vulkan-app/app.ixx @@ -1,9 +1,8 @@ module; #include "stable-fluids-3d.h" - -#include #include +#include #include @@ -20,23 +19,11 @@ import vk.swapchain; export namespace app { - enum class FieldSemantic : uint32_t { - Density = 0, - VelocityMagnitude = 1, - DyeColor = 2, - GenericScalar = 3, - }; - enum class ViewMode : uint32_t { Plane = 0, Volume = 1, }; - enum class RenderMode : uint32_t { - Smoke = 0, - Scalar = 1, - }; - enum class PlaneAxis : uint32_t { XY = 0, XZ = 1, @@ -51,9 +38,9 @@ export namespace app { }; struct GridShape { - uint32_t nx = 0; - uint32_t ny = 0; - uint32_t nz = 1; + uint32_t nx = 0; + uint32_t ny = 0; + uint32_t nz = 1; float cell_size = 1.0f; [[nodiscard]] float extent_x() const { @@ -65,11 +52,11 @@ export namespace app { } [[nodiscard]] float extent_z() const { - return static_cast((std::max)(nz, 1u)) * cell_size; + return static_cast((std::max) (nz, 1u)) * cell_size; } [[nodiscard]] float max_extent() const { - return (std::max)({extent_x(), extent_y(), extent_z()}); + return (std::max) ({extent_x(), extent_y(), extent_z()}); } }; @@ -77,32 +64,12 @@ export namespace app { vk::DescriptorSet descriptor_set{nullptr}; vk::Semaphore timeline_semaphore{nullptr}; uint64_t ready_generation = 0; - uint32_t component_count = 1; - FieldSemantic semantic = FieldSemantic::GenericScalar; - std::string_view label{}; - }; - - struct ColliderOverlay { - bool enabled = false; - uint32_t type = 0; - float center_x = 0.0f; - float center_y = 0.0f; - float center_z = 0.0f; - float radius = 0.0f; - float half_x = 0.0f; - float half_y = 0.0f; - float half_z = 0.0f; - }; - - struct VectorFieldOverlay { - const float* data = nullptr; }; struct VisualizationSnapshotView { GridShape grid{}; FieldResourceView field{}; - ColliderOverlay collider{}; - VectorFieldOverlay velocity{}; + const float* velocity = nullptr; }; struct alignas(16) FieldPushConstants { @@ -112,6 +79,8 @@ export namespace app { vk::math::vec4 forward{}; vk::math::vec4 volume_min{}; vk::math::vec4 volume_max{}; + vk::math::vec4 background_bottom{}; + vk::math::vec4 background_top{}; vk::math::vec4 color_a{}; vk::math::vec4 color_b{}; vk::math::vec4 params0{}; @@ -121,186 +90,65 @@ export namespace app { }; struct VisualizationSettings { - ViewMode view_mode = ViewMode::Volume; - RenderMode render_mode = RenderMode::Smoke; - PlaneAxis plane_axis = PlaneAxis::XY; - int march_steps = 96; - float slice_position = 0.5f; - float density_scale = 0.95f; - float absorption = 1.20f; - float scalar_min = 0.0f; - float scalar_max = 1.0f; - float scalar_opacity = 2.0f; - float scalar_low_r = 0.08f; - float scalar_low_g = 0.18f; - float scalar_low_b = 0.46f; - float scalar_high_r = 0.98f; - float scalar_high_g = 0.82f; - float scalar_high_b = 0.24f; - bool show_bounds = false; - bool show_collider = true; - bool show_velocity_plane = false; - int velocity_grid = 24; - int velocity_steps = 64; - float velocity_step = 0.24f; - float velocity_min_speed = 0.02f; - float velocity_thickness = 1.6f; - }; - - enum class ScenePreset : uint32_t { - DualJetCollider = 0, - SmokePlume = 1, - Custom = 2, - }; - - enum class ColliderType : uint32_t { - Sphere = 0, - Box = 1, - }; - - enum class FieldId : uint32_t { - SmokeColor = 0, - Density = 1, - VelocityMagnitude = 2, - SolidMask = 3, - Pressure = 4, - Divergence = 5, - }; - - enum class FieldDisplayMode : uint32_t { - Scalar = 0, - Smoke = 1, + ViewMode view_mode = ViewMode::Volume; + PlaneAxis plane_axis = PlaneAxis::XY; + int march_steps = 112; + float slice_position = 0.42f; + bool show_velocity_plane = false; + int velocity_plane_seed_count = 40; + float velocity_plane_arrow_cells = 0.6f; + float velocity_plane_min_speed = 0.015f; + float velocity_plane_thickness = 1.4f; + float density_scale = 1.35f; + float scalar_min = 0.0f; + float scalar_max = 3.5f; + float scalar_opacity = 5.4f; + float scalar_low_r = 0.03f; + float scalar_low_g = 0.04f; + float scalar_low_b = 0.07f; + float scalar_high_r = 0.94f; + float scalar_high_g = 0.90f; + float scalar_high_b = 0.84f; + bool shaded_volume = false; + float background_bottom_r = 0.035f; + float background_bottom_g = 0.04f; + float background_bottom_b = 0.05f; + float background_top_r = 0.05f; + float background_top_g = 0.06f; + float background_top_b = 0.08f; }; struct FieldVisualPreset { - FieldDisplayMode display_mode = FieldDisplayMode::Scalar; - float density_scale = 0.95f; - float absorption = 1.20f; - float scalar_min = 0.0f; - float scalar_max = 1.0f; - float scalar_opacity = 2.0f; - float scalar_low_r = 0.08f; - float scalar_low_g = 0.18f; - float scalar_low_b = 0.46f; - float scalar_high_r = 0.98f; - float scalar_high_g = 0.82f; - float scalar_high_b = 0.24f; + float density_scale = 1.0f; + float scalar_min = 0.0f; + float scalar_max = 1.0f; + float scalar_opacity = 3.0f; + float scalar_low_r = 0.06f; + float scalar_low_g = 0.08f; + float scalar_low_b = 0.12f; + float scalar_high_r = 0.95f; + float scalar_high_g = 0.86f; + float scalar_high_b = 0.72f; + bool shaded_volume = false; }; struct FieldInfo { - FieldId id{}; std::string_view label{}; - uint32_t component_count = 1; - FieldSemantic semantic = FieldSemantic::GenericScalar; FieldVisualPreset preset{}; }; - struct SolverStats { - double last_step_call_ms = 0.0; - double average_step_call_ms = 0.0; - uint64_t step_count = 0; - float projection_max_abs_divergence = 0.0f; - float projection_rms_divergence = 0.0f; - }; - - struct CaptureStats { - double last_snapshot_ms = 0.0; - double average_snapshot_ms = 0.0; - uint64_t snapshot_count = 0; - }; - - struct ColliderSettings { - bool enabled = false; - ColliderType type = ColliderType::Sphere; - float center_x = 0.50f; - float center_y = 0.50f; - float center_z = 0.50f; - float radius = 0.20f; - float half_extent_x = 0.10f; - float half_extent_y = 0.08f; - float half_extent_z = 0.10f; - float velocity_x = 0.0f; - float velocity_y = 0.0f; - float velocity_z = 0.0f; - uint32_t boundary = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP); - }; - - struct SourceEmitterSettings { - bool enabled = false; - float center_x = 0.50f; - float center_y = 0.10f; - float center_z = 0.50f; - float direction_x = 0.0f; - float direction_y = 1.0f; - float direction_z = 0.0f; - float speed = 0.0f; - float radius = 0.03f; - float density_amount = 0.0f; - float dye_amount = 0.0f; - float color_r = 1.00f; - float color_g = 1.00f; - float color_b = 1.00f; - }; - - struct PlaybackSettings { - bool paused = false; - bool step_once = false; - int sim_steps_per_frame = 1; - int snapshot_interval = 2; + struct SceneInfo { + GridShape grid{}; + float dt = 0.0f; + uint64_t step_count = 0; + double last_step_call_ms = 0.0; }; struct AppState { - struct { - ScenePreset preset = ScenePreset::DualJetCollider; - int selected_field = 0; - bool emit_source = true; - - struct { - StableFluidsSimulationConfig backend{ - .nx = 100, - .ny = 100, - .nz = 100, - .cell_size = 0.01f, - .dt = 1.0f / 120.0f, - .viscosity = 0.0f, - .diffuse_iterations = 24, - .pressure_iterations = 96, - .uniform_force_x = 0.0f, - .uniform_force_y = 0.0f, - .uniform_force_z = 0.0f, - .domain_boundary = { - .x_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .x_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .y_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_NO_SLIP), .velocity = 0.0f, }, - .y_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .z_min = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - .z_max = { .type = static_cast(STABLE_FLUIDS_VELOCITY_BOUNDARY_OUTFLOW), .velocity = 0.0f, }, - }, - .block_x = 8, - .block_y = 8, - .block_z = 4, - }; - float density_diffusion = 0.00005f; - float dye_diffusion = 0.00003f; - float gravity_y = -9.81f; - float buoyancy_beta = 0.10f; - float ambient_density = 0.0f; - } solver{}; - - struct { - SourceEmitterSettings a{}; - SourceEmitterSettings b{}; - } emitters{}; - - struct { - ColliderSettings collider{}; - } scene{}; - } physics{}; - - struct { - VisualizationSettings render{}; - PlaybackSettings playback{}; - } ui{}; + int selected_scene = 0; + int selected_field = 0; + bool paused = false; + VisualizationSettings render{}; }; struct CaptureSlot { @@ -315,33 +163,16 @@ export namespace app { std::vector velocity_host{}; uint64_t ready_generation = 0; uint64_t last_used_submit_serial = 0; - GridShape grid{}; - uint32_t field_component_count = 1; - FieldSemantic semantic = FieldSemantic::GenericScalar; - std::string_view label{}; bool has_velocity_host = false; }; struct AppData { struct { - cudaStream_t stream = nullptr; - StableFluidsContext context = nullptr; - StableFluidsFieldHandle density_field = 0; - StableFluidsFieldHandle dye_field = 0; - SolverStats stats{}; - } physics{}; - - struct { - CaptureStats stats{}; - uint64_t field_bytes = 0; - uint64_t velocity_bytes = 0; - uint64_t generation = 0; - uint64_t submit_serial = 0; - uint32_t steps_since_snapshot = 0; - int active_slot = -1; + uint64_t generation = 0; + uint64_t submit_serial = 0; + int active_slot = -1; + bool has_velocity_storage = false; GridShape request_grid{}; - uint32_t request_field_component_count = 1; - bool request_export_velocity_host = false; std::vector slots{}; } capture{}; }; @@ -358,7 +189,7 @@ export namespace app { [[nodiscard]] bool should_close() const; void begin_frame(); - void draw_visualization_ui(VisualizationSettings& settings, const std::optional& snapshot); + void draw_visualization_ui(AppState& state, const SceneInfo& scene, std::span fields, std::span scene_labels, bool& reset_requested, bool& field_changed, bool& scene_changed, const std::optional& snapshot); bool render_frame(const VisualizationSettings& settings, const std::optional& snapshot); void frame_content(const VisualizationSettings& settings, const VisualizationSnapshotView& snapshot); @@ -368,7 +199,6 @@ export namespace app { private: void recreate_swapchain(); - void collect_camera_input(float dt_seconds); static constexpr uint32_t frames_in_flight_value_ = 2; @@ -394,24 +224,177 @@ export namespace app { vk::raii::ShaderModule volume_shader_module_{nullptr}; vk::pipeline::GraphicsPipeline plane_pipeline_{}; vk::pipeline::GraphicsPipeline volume_pipeline_{}; - float render_fps_ = 0.0f; uint32_t frame_index_ = 0; - std::chrono::steady_clock::time_point last_frame_time_ = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point last_frame_time_{}; // default-initialized + }; + + template + concept SceneSample = requires(TScene scene, const TScene const_scene, const uint32_t field_index, const int sim_steps, void* device_destination, float* host_destination) { + { const_scene.fields() } -> std::convertible_to>; + { const_scene.default_visualization() } -> std::same_as; + { const_scene.info() } -> std::same_as; + { scene.rebuild() } -> std::same_as; + { scene.step(sim_steps) } -> std::same_as; + { const_scene.export_field(field_index, device_destination) } -> std::same_as; + { const_scene.export_velocity(device_destination, host_destination) } -> std::same_as; + { const_scene.stream() } -> std::same_as; + }; + + struct SceneEntry { + std::string_view label{}; + void* storage = nullptr; + void (*destroy_fn)(void*) = nullptr; + std::span (*fields_fn)(const void*) = nullptr; + VisualizationSettings (*default_visualization_fn)(const void*) = nullptr; + SceneInfo (*info_fn)(const void*) = nullptr; + void (*rebuild_fn)(void*) = nullptr; + void (*step_fn)(void*, int) = nullptr; + void (*export_field_fn)(const void*, uint32_t, void*) = nullptr; + void (*export_velocity_fn)(const void*, void*, float*) = nullptr; + cudaStream_t (*stream_fn)(const void*) = nullptr; + + SceneEntry() = default; + ~SceneEntry() { + if (storage != nullptr && destroy_fn != nullptr) destroy_fn(storage); + } + + SceneEntry(const SceneEntry&) = delete; + SceneEntry& operator=(const SceneEntry&) = delete; + + SceneEntry(SceneEntry&& other) noexcept { + *this = std::move(other); + } + + SceneEntry& operator=(SceneEntry&& other) noexcept { + if (this == &other) return *this; + if (storage != nullptr && destroy_fn != nullptr) destroy_fn(storage); + label = other.label; + storage = other.storage; + destroy_fn = other.destroy_fn; + fields_fn = other.fields_fn; + default_visualization_fn = other.default_visualization_fn; + info_fn = other.info_fn; + rebuild_fn = other.rebuild_fn; + step_fn = other.step_fn; + export_field_fn = other.export_field_fn; + export_velocity_fn = other.export_velocity_fn; + stream_fn = other.stream_fn; + other.label = {}; + other.storage = nullptr; + other.destroy_fn = nullptr; + other.fields_fn = nullptr; + other.default_visualization_fn = nullptr; + other.info_fn = nullptr; + other.rebuild_fn = nullptr; + other.step_fn = nullptr; + other.export_field_fn = nullptr; + other.export_velocity_fn = nullptr; + other.stream_fn = nullptr; + return *this; + } + + [[nodiscard]] std::span fields() const { + return fields_fn(storage); + } + + [[nodiscard]] VisualizationSettings default_visualization() const { + return default_visualization_fn(storage); + } + + [[nodiscard]] SceneInfo info() const { + return info_fn(storage); + } + + void rebuild() { + rebuild_fn(storage); + } + + void step(const int sim_steps) { + step_fn(storage, sim_steps); + } + + void export_field(const uint32_t field_index, void* const device_destination) const { + export_field_fn(storage, field_index, device_destination); + } + + void export_velocity(void* const device_destination, float* const host_destination) const { + export_velocity_fn(storage, device_destination, host_destination); + } + + [[nodiscard]] cudaStream_t stream() const { + return stream_fn(storage); + } }; - [[nodiscard]] std::span field_catalog(); - [[nodiscard]] const FieldInfo& current_field_info(AppState& state); - void apply_scene_preset(AppState& state, ScenePreset preset); - void apply_field_visual_preset(AppState& state); - void create_runtime_data(AppData& data); + template + [[nodiscard]] SceneEntry make_scene_entry(const std::string_view label) { + SceneEntry entry{}; + entry.label = label; + entry.storage = new TScene{}; + entry.destroy_fn = [](void* const storage) { delete static_cast(storage); }; + entry.fields_fn = [](const void* const storage) { return static_cast(storage)->fields(); }; + entry.default_visualization_fn = [](const void* const storage) { return static_cast(storage)->default_visualization(); }; + entry.info_fn = [](const void* const storage) { return static_cast(storage)->info(); }; + entry.rebuild_fn = [](void* const storage) { static_cast(storage)->rebuild(); }; + entry.step_fn = [](void* const storage, const int sim_steps) { static_cast(storage)->step(sim_steps); }; + entry.export_field_fn = [](const void* const storage, const uint32_t field_index, void* const device_destination) { static_cast(storage)->export_field(field_index, device_destination); }; + entry.export_velocity_fn = [](const void* const storage, void* const device_destination, float* const host_destination) { static_cast(storage)->export_velocity(device_destination, host_destination); }; + entry.stream_fn = [](const void* const storage) { return static_cast(storage)->stream(); }; + return entry; + } + void destroy_runtime_data(AppData& data); void check_interop_support(const VisualizationApp& renderer); - void rebuild_physics(AppState& state, AppData& data); - void step_physics(const AppState& state, AppData& data, int sim_steps); - bool sync_capture_storage(AppState& state, AppData& data, VisualizationApp& renderer); - bool capture_snapshot(AppState& state, AppData& data, VisualizationApp& renderer, const char* tag); - [[nodiscard]] std::optional active_snapshot(const AppState& state, const AppData& data); + void apply_field_preset(VisualizationSettings& settings, const FieldVisualPreset& preset); + bool sync_capture_storage(AppData& data, VisualizationApp& renderer, const GridShape& grid, bool with_velocity_plane); + + template + bool capture_snapshot(AppState& state, AppData& data, TScene& scene, VisualizationApp& renderer) { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + int slot_index = -1; + for (uint32_t i = 0; i < data.capture.slots.size(); ++i) { + const auto& slot = data.capture.slots[i]; + if (static_cast(i) == data.capture.active_slot) continue; + if (slot.ready_generation != 0 && data.capture.submit_serial < slot.last_used_submit_serial + renderer.frames_in_flight() + 1) continue; + slot_index = static_cast(i); + break; + } + if (slot_index < 0) return false; + + const auto& const_scene = scene; + const auto fields = const_scene.fields(); + if (fields.empty()) throw std::runtime_error("scene must expose at least one field"); + state.selected_field = std::clamp(state.selected_field, 0, static_cast(fields.size()) - 1); + + auto& slot = data.capture.slots.at(static_cast(slot_index)); + const auto field_index = static_cast(state.selected_field); + scene.export_field(field_index, slot.field_cuda_ptr); + if (state.render.show_velocity_plane && slot.velocity_cuda_ptr != nullptr && !slot.velocity_host.empty()) scene.export_velocity(slot.velocity_cuda_ptr, slot.velocity_host.data()); + cudaExternalSemaphoreSignalParams signal_params{}; + signal_params.params.fence.value = data.capture.generation + 1; + check_cuda(cudaSignalExternalSemaphoresAsync(&slot.external_semaphore, &signal_params, 1, scene.stream()), "cudaSignalExternalSemaphoresAsync"); + check_cuda(cudaStreamSynchronize(scene.stream()), "cudaStreamSynchronize"); + slot.ready_generation = data.capture.generation + 1; + slot.has_velocity_host = state.render.show_velocity_plane && slot.velocity_cuda_ptr != nullptr && !slot.velocity_host.empty(); + data.capture.generation = slot.ready_generation; + data.capture.active_slot = slot_index; + return true; + } + + [[nodiscard]] std::optional active_snapshot(const AppData& data); void mark_snapshot_submitted(AppData& data); - void draw_simulation_controls(AppState& state, const AppData& data, bool& reset_requested, bool& field_changed); + + int run_scene_switcher(std::span scenes); + + template + int run_scene() { + auto scenes = std::array{ + make_scene_entry("Scene"), + }; + return run_scene_switcher(scenes); + } } // namespace app diff --git a/vulkan-app/main.cpp b/vulkan-app/main.cpp index e3d82eb..a995ed4 100644 --- a/vulkan-app/main.cpp +++ b/vulkan-app/main.cpp @@ -1,84 +1,12 @@ -#include -#include - import app; +import scene_cloud; +import scene_plume; import std; int main() { - app::AppState state{}; - app::AppData data{}; - std::unique_ptr renderer{}; - - try { - renderer = std::make_unique(); - - app::apply_scene_preset(state, state.physics.preset); - app::create_runtime_data(data); - app::check_interop_support(*renderer); - app::apply_field_visual_preset(state); - - renderer->vk_context().device.waitIdle(); - app::rebuild_physics(state, data); - if (state.physics.emit_source) app::step_physics(state, data, 1); - app::sync_capture_storage(state, data, *renderer); - app::capture_snapshot(state, data, *renderer, "stable_fluids.initial_snapshot"); - if (const auto snapshot = app::active_snapshot(state, data)) renderer->frame_content(state.ui.render, *snapshot); - - while (!renderer->should_close()) { - renderer->begin_frame(); - - bool reset_requested = false; - bool field_changed = false; - app::draw_simulation_controls(state, data, reset_requested, field_changed); - if (field_changed) app::apply_field_visual_preset(state); - - if (reset_requested) { - renderer->vk_context().device.waitIdle(); - app::rebuild_physics(state, data); - if (state.physics.emit_source) app::step_physics(state, data, 1); - app::sync_capture_storage(state, data, *renderer); - app::capture_snapshot(state, data, *renderer, "stable_fluids.initial_snapshot"); - if (const auto snapshot = app::active_snapshot(state, data)) renderer->frame_content(state.ui.render, *snapshot); - } else if (field_changed && app::sync_capture_storage(state, data, *renderer)) { - app::capture_snapshot(state, data, *renderer, "stable_fluids.field_change_realloc"); - if (const auto snapshot = app::active_snapshot(state, data)) renderer->frame_content(state.ui.render, *snapshot); - } - - auto snapshot = app::active_snapshot(state, data); - renderer->draw_visualization_ui(state.ui.render, snapshot); - if (app::sync_capture_storage(state, data, *renderer)) { - app::capture_snapshot(state, data, *renderer, "stable_fluids.visual_storage_reset"); - snapshot = app::active_snapshot(state, data); - if (snapshot) renderer->frame_content(state.ui.render, *snapshot); - } - - if (!reset_requested) { - const bool run_simulation = !state.ui.playback.paused || state.ui.playback.step_once; - if (run_simulation) { - app::step_physics(state, data, state.ui.playback.sim_steps_per_frame); - if (data.capture.steps_since_snapshot < static_cast(state.ui.playback.snapshot_interval)) ++data.capture.steps_since_snapshot; - if (data.capture.steps_since_snapshot >= static_cast(state.ui.playback.snapshot_interval)) app::capture_snapshot(state, data, *renderer, "stable_fluids.simulation_snapshot"); - } - } - - state.ui.playback.step_once = false; - if ((field_changed || !snapshot) && !reset_requested) app::capture_snapshot(state, data, *renderer, "stable_fluids.refresh_snapshot"); - - snapshot = app::active_snapshot(state, data); - const bool submitted = renderer->render_frame(state.ui.render, snapshot); - if (submitted) app::mark_snapshot_submitted(data); - } - - renderer->vk_context().device.waitIdle(); - app::destroy_runtime_data(data); - return 0; - } catch (const std::exception& e) { - try { - if (renderer) renderer->vk_context().device.waitIdle(); - } catch (...) { - } - app::destroy_runtime_data(data); - std::fprintf(stderr, "%s\n", e.what()); - return 1; - } + auto scenes = std::array{ + app::make_scene_entry("Cloud"), + app::make_scene_entry("Plume"), + }; + return app::run_scene_switcher(scenes); } diff --git a/vulkan-app/scene_cloud.cpp b/vulkan-app/scene_cloud.cpp new file mode 100644 index 0000000..71362cc --- /dev/null +++ b/vulkan-app/scene_cloud.cpp @@ -0,0 +1,400 @@ +module; + +#include "stable-fluids-3d.h" + +#include + +module scene_cloud; + +import app; +import std; + +namespace scene_cloud { + + namespace { + + struct CloudFieldInfo { + app::FieldInfo view{}; + uint32_t export_kind = STABLE_FLUIDS_EXPORT_FIELD; + bool use_density_field = false; + }; + + struct CloudSeed { + float x; + float y; + float z; + float rx; + float ry; + float rz; + float amplitude; + }; + + constexpr std::array field_catalog_storage{ + CloudFieldInfo{ + .view = + { + .label = "Cloud Density", + .preset = + { + .density_scale = 10.08f, + .scalar_min = 0.0f, + .scalar_max = 0.66f, + .scalar_opacity = 5.7f, + .scalar_low_r = 0.82f, + .scalar_low_g = 0.87f, + .scalar_low_b = 0.93f, + .scalar_high_r = 0.99f, + .scalar_high_g = 0.99f, + .scalar_high_b = 0.98f, + .shaded_volume = true, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_FIELD, + .use_density_field = true, + }, + CloudFieldInfo{ + .view = + { + .label = "Velocity Magnitude", + .preset = + { + .density_scale = 1.0f, + .scalar_min = 0.0f, + .scalar_max = 0.65f, + .scalar_opacity = 2.0f, + .scalar_low_r = 0.17f, + .scalar_low_g = 0.31f, + .scalar_low_b = 0.52f, + .scalar_high_r = 0.98f, + .scalar_high_g = 0.95f, + .scalar_high_b = 0.76f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_VELOCITY_MAGNITUDE, + }, + CloudFieldInfo{ + .view = + { + .label = "Pressure", + .preset = + { + .density_scale = 1.0f, + .scalar_min = -0.10f, + .scalar_max = 0.10f, + .scalar_opacity = 1.9f, + .scalar_low_r = 0.11f, + .scalar_low_g = 0.30f, + .scalar_low_b = 0.72f, + .scalar_high_r = 0.95f, + .scalar_high_g = 0.66f, + .scalar_high_b = 0.24f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_PRESSURE, + }, + CloudFieldInfo{ + .view = + { + .label = "Divergence", + .preset = + { + .density_scale = 1.0f, + .scalar_min = -12.0f, + .scalar_max = 12.0f, + .scalar_opacity = 1.8f, + .scalar_low_r = 0.10f, + .scalar_low_g = 0.26f, + .scalar_low_b = 0.65f, + .scalar_high_r = 0.96f, + .scalar_high_g = 0.42f, + .scalar_high_b = 0.24f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_DIVERGENCE, + }, + }; + constexpr auto field_views = [] { + std::array result{}; + for (size_t i = 0; i < result.size(); ++i) result[i] = field_catalog_storage[i].view; + return result; + }(); + + } // namespace + + Scene::Scene() { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + check_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking), "cudaStreamCreateWithFlags"); + } + + Scene::~Scene() { + if (context_ != nullptr) stable_fluids_destroy_context_cuda(context_); + if (force_x_device_ != nullptr) cudaFree(force_x_device_); + if (force_y_device_ != nullptr) cudaFree(force_y_device_); + if (force_z_device_ != nullptr) cudaFree(force_z_device_); + if (density_source_device_ != nullptr) cudaFree(density_source_device_); + if (stream_ != nullptr) cudaStreamDestroy(stream_); + } + + std::span Scene::fields() const { + return std::span{field_views}; + } + + app::VisualizationSettings Scene::default_visualization() const { + app::VisualizationSettings settings{ + .view_mode = app::ViewMode::Volume, + .plane_axis = app::PlaneAxis::XY, + .march_steps = 128, + .slice_position = 0.76f, + .show_velocity_plane = false, + .background_bottom_r = 0.82f, + .background_bottom_g = 0.91f, + .background_bottom_b = 1.00f, + .background_top_r = 0.33f, + .background_top_g = 0.58f, + .background_top_b = 0.96f, + }; + app::apply_field_preset(settings, field_catalog_storage[0].view.preset); + return settings; + } + + app::SceneInfo Scene::info() const { + return info_; + } + + cudaStream_t Scene::stream() const { + return stream_; + } + + void Scene::rebuild() { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + if (context_ != nullptr) check_stable(stable_fluids_destroy_context_cuda(context_), "stable_fluids_destroy_context_cuda"); + if (force_x_device_ != nullptr) cudaFree(force_x_device_); + if (force_y_device_ != nullptr) cudaFree(force_y_device_); + if (force_z_device_ != nullptr) cudaFree(force_z_device_); + if (density_source_device_ != nullptr) cudaFree(density_source_device_); + context_ = nullptr; + density_field_ = 0; + force_x_device_ = nullptr; + force_y_device_ = nullptr; + force_z_device_ = nullptr; + density_source_device_ = nullptr; + force_x_host_.clear(); + force_z_host_.clear(); + wind_mask_.clear(); + shear_mask_.clear(); + curl_x_mask_.clear(); + curl_z_mask_.clear(); + pulse_mask_.clear(); + + const std::array fields{ + StableFluidsFieldCreateDesc{ + .name = "cloud_density", + .diffusion = 0.00002f, + .dissipation = 0.040f, + .initial_value = 0.0f, + }, + }; + std::array field_handles{}; + const StableFluidsContextCreateDesc create_desc{ + .config = config_, + .stream = stream_, + .fields = fields.data(), + .field_count = static_cast(fields.size()), + }; + check_stable(stable_fluids_create_context_cuda(&create_desc, &context_, field_handles.data(), static_cast(field_handles.size())), "stable_fluids_create_context_cuda"); + density_field_ = field_handles[0]; + + const auto nx = config_.nx; + const auto ny = config_.ny; + const auto nz = config_.nz; + const auto cell_count = static_cast(nx) * static_cast(ny) * static_cast(nz); + const auto scalar_bytes = cell_count * sizeof(float); + const float h = config_.cell_size; + const float extent_x = static_cast(nx) * h; + const float extent_y = static_cast(ny) * h; + const float extent_z = static_cast(nz) * h; + grid_ = { + .nx = static_cast(nx), + .ny = static_cast(ny), + .nz = static_cast(nz), + .cell_size = h, + }; + + force_x_host_.assign(cell_count, 0.0f); + force_z_host_.assign(cell_count, 0.0f); + wind_mask_.assign(cell_count, 0.0f); + shear_mask_.assign(cell_count, 0.0f); + curl_x_mask_.assign(cell_count, 0.0f); + curl_z_mask_.assign(cell_count, 0.0f); + pulse_mask_.assign(cell_count, 0.0f); + + std::vector force_y_host(cell_count, 0.0f); + std::vector density_source_host(cell_count, 0.0f); + constexpr std::array cloud_seeds{ + CloudSeed{0.09f, 0.77f, 0.18f, 0.10f, 0.045f, 0.10f, 0.82f}, + CloudSeed{0.18f, 0.79f, 0.24f, 0.08f, 0.040f, 0.08f, 0.58f}, + CloudSeed{0.31f, 0.75f, 0.58f, 0.12f, 0.050f, 0.12f, 0.88f}, + CloudSeed{0.40f, 0.78f, 0.67f, 0.09f, 0.040f, 0.09f, 0.56f}, + CloudSeed{0.53f, 0.72f, 0.34f, 0.13f, 0.055f, 0.11f, 0.84f}, + CloudSeed{0.63f, 0.74f, 0.42f, 0.08f, 0.040f, 0.07f, 0.50f}, + CloudSeed{0.74f, 0.80f, 0.76f, 0.12f, 0.050f, 0.11f, 0.90f}, + CloudSeed{0.83f, 0.77f, 0.67f, 0.08f, 0.040f, 0.08f, 0.54f}, + CloudSeed{0.91f, 0.75f, 0.27f, 0.10f, 0.045f, 0.09f, 0.76f}, + }; + + for (int z = 0; z < nz; ++z) { + for (int y = 0; y < ny; ++y) { + for (int x = 0; x < nx; ++x) { + const auto index = static_cast(x) + static_cast(nx) * (static_cast(y) + static_cast(ny) * static_cast(z)); + const float px = (static_cast(x) + 0.5f) * h; + const float py = (static_cast(y) + 0.5f) * h; + const float pz = (static_cast(z) + 0.5f) * h; + const float fx = px / extent_x; + const float fy = py / extent_y; + const float fz = pz / extent_z; + + float layer = 0.0f; + const float layer_center = 0.76f; + const float layer_half = 0.11f; + const float layer_y = std::abs(fy - layer_center); + if (layer_y < layer_half) layer = 1.0f - layer_y / layer_half; + + float cover = 0.0f; + for (const auto& seed : cloud_seeds) { + float dx = fx - seed.x; + float dz = fz - seed.z; + if (dx > 0.5f) dx -= 1.0f; + if (dx < -0.5f) dx += 1.0f; + if (dz > 0.5f) dz -= 1.0f; + if (dz < -0.5f) dz += 1.0f; + const float dy = fy - seed.y; + const float norm = + (dx * dx) / (seed.rx * seed.rx) + + (dy * dy) / (seed.ry * seed.ry) + + (dz * dz) / (seed.rz * seed.rz); + if (norm >= 1.0f) continue; + cover += seed.amplitude * (1.0f - norm); + } + cover = std::clamp(cover, 0.0f, 1.35f); + const float band = layer * cover; + const float wave = 0.5f + 0.5f * std::sin(fx * 11.0f + fz * 7.5f); + const float warp = 0.5f + 0.5f * std::cos(fx * 6.0f - fz * 9.0f); + const float puff = std::clamp(band * band * (1.22f + 0.28f * wave + 0.20f * warp), 0.0f, 1.38f); + const float dx_center = fx - 0.5f; + const float dz_center = fz - 0.5f; + const float radial = std::sqrt(dx_center * dx_center + dz_center * dz_center); + const float inv_radial = radial > 1.0e-5f ? 1.0f / radial : 0.0f; + wind_mask_[index] = puff * (0.72f + 0.28f * wave); + shear_mask_[index] = puff * std::sin((fy - 0.68f) * 17.0f); + curl_x_mask_[index] = -dz_center * inv_radial * puff * (0.35f + 0.65f * warp); + curl_z_mask_[index] = dx_center * inv_radial * puff * (0.35f + 0.65f * wave); + pulse_mask_[index] = puff * (0.65f + 0.35f * std::sin(fx * 13.0f + fz * 4.0f)); + density_source_host[index] = 1.95f * puff; + force_y_host[index] = 0.055f * puff; + } + } + } + + check_cuda(cudaMalloc(reinterpret_cast(&force_x_device_), scalar_bytes), "cudaMalloc force_x_device"); + check_cuda(cudaMalloc(reinterpret_cast(&force_y_device_), scalar_bytes), "cudaMalloc force_y_device"); + check_cuda(cudaMalloc(reinterpret_cast(&force_z_device_), scalar_bytes), "cudaMalloc force_z_device"); + check_cuda(cudaMalloc(reinterpret_cast(&density_source_device_), scalar_bytes), "cudaMalloc density_source_device"); + check_cuda(cudaMemsetAsync(force_x_device_, 0, scalar_bytes, stream_), "cudaMemsetAsync force_x_device"); + check_cuda(cudaMemsetAsync(force_z_device_, 0, scalar_bytes, stream_), "cudaMemsetAsync force_z_device"); + check_cuda(cudaMemcpyAsync(force_y_device_, force_y_host.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_y_device"); + check_cuda(cudaMemcpyAsync(density_source_device_, density_source_host.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync density_source_device"); + animation_step_ = 0; + info_ = { + .grid = grid_, + .dt = config_.dt, + .step_count = 0, + .last_step_call_ms = 0.0, + }; + } + + void Scene::step(const int sim_steps) { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + if (sim_steps <= 0) return; + const auto scalar_bytes = force_x_host_.size() * sizeof(float); + const StableFluidsFieldSourceDesc field_source{ + .field = density_field_, + .values = density_source_device_, + }; + + for (int step_index = 0; step_index < sim_steps; ++step_index) { + const float phase = static_cast(animation_step_) * 0.018f; + const float wind = 0.14f + 0.02f * std::sin(phase * 0.37f); + const float shear = 0.025f * std::sin(phase * 0.63f); + const float drift = 0.035f * std::cos(phase * 0.29f); + const float curl = 0.055f * std::sin(phase * 0.51f); + for (size_t i = 0; i < force_x_host_.size(); ++i) { + force_x_host_[i] = wind * wind_mask_[i] + shear * shear_mask_[i] + curl * curl_x_mask_[i]; + force_z_host_[i] = drift * pulse_mask_[i] + curl * curl_z_mask_[i]; + } + + const auto begin = std::chrono::steady_clock::now(); + check_cuda(cudaMemcpyAsync(force_x_device_, force_x_host_.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_x_device"); + check_cuda(cudaMemcpyAsync(force_z_device_, force_z_host_.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_z_device"); + const StableFluidsStepDesc step_desc{ + .force_x = force_x_device_, + .force_y = force_y_device_, + .force_z = force_z_device_, + .field_sources = &field_source, + .field_source_count = 1, + }; + check_stable(stable_fluids_step_cuda(context_, &step_desc), "stable_fluids_step_cuda"); + info_.last_step_call_ms = std::chrono::duration(std::chrono::steady_clock::now() - begin).count(); + ++info_.step_count; + ++animation_step_; + } + } + + void Scene::export_field(const uint32_t field_index, void* const device_destination) const { + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + const auto& field = field_catalog_storage[(std::min)(static_cast(field_index), field_catalog_storage.size() - 1)]; + const StableFluidsExportDesc export_desc{ + .kind = field.export_kind, + .field = field.use_density_field ? density_field_ : 0u, + }; + check_stable(stable_fluids_export_cuda(context_, &export_desc, device_destination), "stable_fluids_export_cuda"); + } + + void Scene::export_velocity(void* const device_destination, float* const host_destination) const { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + const StableFluidsExportDesc export_desc{ + .kind = STABLE_FLUIDS_EXPORT_VELOCITY, + }; + check_stable(stable_fluids_export_cuda(context_, &export_desc, device_destination), "stable_fluids_export_cuda"); + if (host_destination == nullptr) return; + const auto velocity_bytes = static_cast(grid_.nx) * static_cast(grid_.ny) * static_cast(grid_.nz) * 3u * sizeof(float); + check_cuda(cudaMemcpyAsync(host_destination, device_destination, velocity_bytes, cudaMemcpyDeviceToHost, stream_), "cudaMemcpyAsync velocity snapshot"); + } + +} // namespace scene_cloud diff --git a/vulkan-app/scene_cloud.ixx b/vulkan-app/scene_cloud.ixx new file mode 100644 index 0000000..956e988 --- /dev/null +++ b/vulkan-app/scene_cloud.ixx @@ -0,0 +1,72 @@ +module; + +#include "stable-fluids-3d.h" +#include + +export module scene_cloud; + +import app; +import std; + +export namespace scene_cloud { + + class Scene { + public: + Scene(); + ~Scene(); + + Scene(const Scene&) = delete; + Scene& operator=(const Scene&) = delete; + Scene(Scene&&) noexcept = delete; + Scene& operator=(Scene&&) noexcept = delete; + + [[nodiscard]] std::span fields() const; + [[nodiscard]] app::VisualizationSettings default_visualization() const; + [[nodiscard]] app::SceneInfo info() const; + [[nodiscard]] cudaStream_t stream() const; + + void rebuild(); + void step(int sim_steps); + void export_field(uint32_t field_index, void* device_destination) const; + void export_velocity(void* device_destination, float* host_destination) const; + + private: + StableFluidsSimulationConfig config_{ + .nx = 160, + .ny = 96, + .nz = 96, + .cell_size = 0.0125f, + .dt = 1.0f / 72.0f, + .viscosity = 0.00008f, + .diffuse_iterations = 20, + .pressure_iterations = 88, + .boundary = + { + .x = STABLE_FLUIDS_BOUNDARY_PERIODIC, + .y = STABLE_FLUIDS_BOUNDARY_FIXED, + .z = STABLE_FLUIDS_BOUNDARY_PERIODIC, + }, + .block_x = 8, + .block_y = 8, + .block_z = 4, + }; + cudaStream_t stream_ = nullptr; + StableFluidsContext context_ = nullptr; + StableFluidsFieldHandle density_field_ = 0; + app::GridShape grid_{}; + float* force_x_device_ = nullptr; + float* force_y_device_ = nullptr; + float* force_z_device_ = nullptr; + float* density_source_device_ = nullptr; + std::vector force_x_host_{}; + std::vector force_z_host_{}; + std::vector wind_mask_{}; + std::vector shear_mask_{}; + std::vector curl_x_mask_{}; + std::vector curl_z_mask_{}; + std::vector pulse_mask_{}; + uint64_t animation_step_ = 0; + app::SceneInfo info_{}; + }; + +} // namespace scene_cloud diff --git a/vulkan-app/scene_plume.cpp b/vulkan-app/scene_plume.cpp new file mode 100644 index 0000000..bb90697 --- /dev/null +++ b/vulkan-app/scene_plume.cpp @@ -0,0 +1,355 @@ +module; + +#include "stable-fluids-3d.h" + +#include + +module scene_plume; + +import app; +import std; + +namespace scene_plume { + + namespace { + + struct PlumeFieldInfo { + app::FieldInfo view{}; + uint32_t export_kind = STABLE_FLUIDS_EXPORT_FIELD; + bool use_density_field = false; + }; + + constexpr std::array field_catalog_storage{ + PlumeFieldInfo{ + .view = + { + .label = "Density", + .preset = + { + .density_scale = 1.35f, + .scalar_min = 0.0f, + .scalar_max = 3.5f, + .scalar_opacity = 5.4f, + .scalar_low_r = 0.03f, + .scalar_low_g = 0.04f, + .scalar_low_b = 0.07f, + .scalar_high_r = 0.94f, + .scalar_high_g = 0.90f, + .scalar_high_b = 0.84f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_FIELD, + .use_density_field = true, + }, + PlumeFieldInfo{ + .view = + { + .label = "Velocity Magnitude", + .preset = + { + .density_scale = 1.0f, + .scalar_min = 0.0f, + .scalar_max = 1.3f, + .scalar_opacity = 2.2f, + .scalar_low_r = 0.06f, + .scalar_low_g = 0.10f, + .scalar_low_b = 0.24f, + .scalar_high_r = 0.24f, + .scalar_high_g = 0.88f, + .scalar_high_b = 1.00f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_VELOCITY_MAGNITUDE, + }, + PlumeFieldInfo{ + .view = + { + .label = "Pressure", + .preset = + { + .density_scale = 1.0f, + .scalar_min = -0.18f, + .scalar_max = 0.18f, + .scalar_opacity = 2.3f, + .scalar_low_r = 0.08f, + .scalar_low_g = 0.22f, + .scalar_low_b = 0.62f, + .scalar_high_r = 0.96f, + .scalar_high_g = 0.58f, + .scalar_high_b = 0.18f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_PRESSURE, + }, + PlumeFieldInfo{ + .view = + { + .label = "Divergence", + .preset = + { + .density_scale = 1.0f, + .scalar_min = -24.0f, + .scalar_max = 24.0f, + .scalar_opacity = 2.3f, + .scalar_low_r = 0.05f, + .scalar_low_g = 0.14f, + .scalar_low_b = 0.50f, + .scalar_high_r = 0.94f, + .scalar_high_g = 0.28f, + .scalar_high_b = 0.22f, + }, + }, + .export_kind = STABLE_FLUIDS_EXPORT_DIVERGENCE, + }, + }; + constexpr auto field_views = [] { + std::array result{}; + for (size_t i = 0; i < result.size(); ++i) result[i] = field_catalog_storage[i].view; + return result; + }(); + + } // namespace + + Scene::Scene() { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + check_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking), "cudaStreamCreateWithFlags"); + } + + Scene::~Scene() { + if (context_ != nullptr) stable_fluids_destroy_context_cuda(context_); + if (force_x_device_ != nullptr) cudaFree(force_x_device_); + if (force_y_device_ != nullptr) cudaFree(force_y_device_); + if (force_z_device_ != nullptr) cudaFree(force_z_device_); + if (density_source_device_ != nullptr) cudaFree(density_source_device_); + if (stream_ != nullptr) cudaStreamDestroy(stream_); + } + + std::span Scene::fields() const { + return std::span{field_views}; + } + + app::VisualizationSettings Scene::default_visualization() const { + app::VisualizationSettings settings{ + .view_mode = app::ViewMode::Volume, + .plane_axis = app::PlaneAxis::XY, + .march_steps = 112, + .slice_position = 0.42f, + .show_velocity_plane = false, + }; + app::apply_field_preset(settings, field_catalog_storage[0].view.preset); + return settings; + } + + app::SceneInfo Scene::info() const { + return info_; + } + + cudaStream_t Scene::stream() const { + return stream_; + } + + void Scene::rebuild() { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + if (context_ != nullptr) check_stable(stable_fluids_destroy_context_cuda(context_), "stable_fluids_destroy_context_cuda"); + if (force_x_device_ != nullptr) cudaFree(force_x_device_); + if (force_y_device_ != nullptr) cudaFree(force_y_device_); + if (force_z_device_ != nullptr) cudaFree(force_z_device_); + if (density_source_device_ != nullptr) cudaFree(density_source_device_); + context_ = nullptr; + density_field_ = 0; + force_x_device_ = nullptr; + force_y_device_ = nullptr; + force_z_device_ = nullptr; + density_source_device_ = nullptr; + force_x_host_.clear(); + force_z_host_.clear(); + source_mask_.clear(); + swirl_x_mask_.clear(); + swirl_z_mask_.clear(); + drift_mask_.clear(); + + const std::array fields{ + StableFluidsFieldCreateDesc{ + .name = "density", + .diffusion = 0.00005f, + .dissipation = 0.35f, + .initial_value = 0.0f, + }, + }; + std::array field_handles{}; + const StableFluidsContextCreateDesc create_desc{ + .config = config_, + .stream = stream_, + .fields = fields.data(), + .field_count = static_cast(fields.size()), + }; + check_stable(stable_fluids_create_context_cuda(&create_desc, &context_, field_handles.data(), static_cast(field_handles.size())), "stable_fluids_create_context_cuda"); + density_field_ = field_handles[0]; + + const auto nx = config_.nx; + const auto ny = config_.ny; + const auto nz = config_.nz; + const auto cell_count = static_cast(nx) * static_cast(ny) * static_cast(nz); + const auto scalar_bytes = cell_count * sizeof(float); + const float h = config_.cell_size; + const float extent_x = static_cast(nx) * h; + const float extent_y = static_cast(ny) * h; + const float extent_z = static_cast(nz) * h; + grid_ = { + .nx = static_cast(nx), + .ny = static_cast(ny), + .nz = static_cast(nz), + .cell_size = h, + }; + const float source_x = extent_x * 0.50f; + const float source_y = extent_y * 0.13f; + const float source_z = extent_z * 0.50f; + const float source_r = h * 6.0f; + const float swirl_y = source_y + source_r * 0.90f; + const float drift_y = source_y + source_r * 1.35f; + + force_x_host_.assign(cell_count, 0.0f); + force_z_host_.assign(cell_count, 0.0f); + source_mask_.assign(cell_count, 0.0f); + swirl_x_mask_.assign(cell_count, 0.0f); + swirl_z_mask_.assign(cell_count, 0.0f); + drift_mask_.assign(cell_count, 0.0f); + + std::vector force_y_host(cell_count, 0.0f); + std::vector density_source_host(cell_count, 0.0f); + auto radial_weight = [](const float px, const float py, const float pz, const float cx, const float cy, const float cz, const float radius) { + const float dx = px - cx; + const float dy = py - cy; + const float dz = pz - cz; + const float radius2 = radius * radius; + const float dist2 = dx * dx + dy * dy + dz * dz; + if (dist2 >= radius2 || radius2 <= 0.0f) return 0.0f; + return 1.0f - dist2 / radius2; + }; + + for (int z = 0; z < nz; ++z) { + for (int y = 0; y < ny; ++y) { + for (int x = 0; x < nx; ++x) { + const auto index = static_cast(x) + static_cast(nx) * (static_cast(y) + static_cast(ny) * static_cast(z)); + const float px = (static_cast(x) + 0.5f) * h; + const float py = (static_cast(y) + 0.5f) * h; + const float pz = (static_cast(z) + 0.5f) * h; + const float source_weight = radial_weight(px, py, pz, source_x, source_y, source_z, source_r); + const float swirl_weight = radial_weight(px, py, pz, source_x, swirl_y, source_z, source_r * 1.65f); + const float drift_weight = radial_weight(px, py, pz, source_x, drift_y, source_z, source_r * 2.10f); + const float dx = px - source_x; + const float dz = pz - source_z; + const float radial = std::sqrt(dx * dx + dz * dz); + const float inv_radial = radial > 1.0e-5f ? 1.0f / radial : 0.0f; + source_mask_[index] = source_weight; + swirl_x_mask_[index] = -dz * inv_radial * swirl_weight; + swirl_z_mask_[index] = dx * inv_radial * swirl_weight; + drift_mask_[index] = drift_weight; + density_source_host[index] = 32.0f * source_weight; + force_y_host[index] = 7.6f * source_weight; + } + } + } + + check_cuda(cudaMalloc(reinterpret_cast(&force_x_device_), scalar_bytes), "cudaMalloc force_x_device"); + check_cuda(cudaMalloc(reinterpret_cast(&force_y_device_), scalar_bytes), "cudaMalloc force_y_device"); + check_cuda(cudaMalloc(reinterpret_cast(&force_z_device_), scalar_bytes), "cudaMalloc force_z_device"); + check_cuda(cudaMalloc(reinterpret_cast(&density_source_device_), scalar_bytes), "cudaMalloc density_source_device"); + check_cuda(cudaMemsetAsync(force_x_device_, 0, scalar_bytes, stream_), "cudaMemsetAsync force_x_device"); + check_cuda(cudaMemsetAsync(force_z_device_, 0, scalar_bytes, stream_), "cudaMemsetAsync force_z_device"); + check_cuda(cudaMemcpyAsync(force_y_device_, force_y_host.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_y_device"); + check_cuda(cudaMemcpyAsync(density_source_device_, density_source_host.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync density_source_device"); + animation_step_ = 0; + info_ = { + .grid = grid_, + .dt = config_.dt, + .step_count = 0, + .last_step_call_ms = 0.0, + }; + } + + void Scene::step(const int sim_steps) { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + if (sim_steps <= 0) return; + const auto scalar_bytes = force_x_host_.size() * sizeof(float); + const StableFluidsFieldSourceDesc field_source{ + .field = density_field_, + .values = density_source_device_, + }; + + for (int step_index = 0; step_index < sim_steps; ++step_index) { + const float phase = static_cast(animation_step_) * 0.045f; + const float drift_x = 1.15f * std::sin(phase); + const float drift_z = 0.85f * std::cos(phase * 0.71f); + const float swirl = 1.65f * std::cos(phase * 0.53f); + for (size_t i = 0; i < force_x_host_.size(); ++i) { + force_x_host_[i] = drift_x * drift_mask_[i] + swirl * swirl_x_mask_[i]; + force_z_host_[i] = drift_z * drift_mask_[i] + swirl * swirl_z_mask_[i]; + } + + const auto begin = std::chrono::steady_clock::now(); + check_cuda(cudaMemcpyAsync(force_x_device_, force_x_host_.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_x_device"); + check_cuda(cudaMemcpyAsync(force_z_device_, force_z_host_.data(), scalar_bytes, cudaMemcpyHostToDevice, stream_), "cudaMemcpyAsync force_z_device"); + const StableFluidsStepDesc step_desc{ + .force_x = force_x_device_, + .force_y = force_y_device_, + .force_z = force_z_device_, + .field_sources = &field_source, + .field_source_count = 1, + }; + check_stable(stable_fluids_step_cuda(context_, &step_desc), "stable_fluids_step_cuda"); + info_.last_step_call_ms = std::chrono::duration(std::chrono::steady_clock::now() - begin).count(); + ++info_.step_count; + ++animation_step_; + } + } + + void Scene::export_field(const uint32_t field_index, void* const device_destination) const { + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + const auto& field = field_catalog_storage[(std::min) (static_cast(field_index), field_catalog_storage.size() - 1)]; + const StableFluidsExportDesc export_desc{ + .kind = field.export_kind, + .field = field.use_density_field ? density_field_ : 0u, + }; + check_stable(stable_fluids_export_cuda(context_, &export_desc, device_destination), "stable_fluids_export_cuda"); + } + + void Scene::export_velocity(void* const device_destination, float* const host_destination) const { + auto check_cuda = [](const cudaError_t status, const std::string_view what) { + if (status == cudaSuccess) return; + throw std::runtime_error(std::string(what) + ": " + cudaGetErrorString(status)); + }; + auto check_stable = [](const StableFluidsResult code, const std::string_view what) { + if (code == STABLE_FLUIDS_RESULT_OK) return; + throw std::runtime_error(std::string(what) + " failed (" + std::to_string(static_cast(code)) + ")"); + }; + const StableFluidsExportDesc export_desc{ + .kind = STABLE_FLUIDS_EXPORT_VELOCITY, + }; + check_stable(stable_fluids_export_cuda(context_, &export_desc, device_destination), "stable_fluids_export_cuda"); + if (host_destination == nullptr) return; + const auto velocity_bytes = static_cast(grid_.nx) * static_cast(grid_.ny) * static_cast(grid_.nz) * 3u * sizeof(float); + check_cuda(cudaMemcpyAsync(host_destination, device_destination, velocity_bytes, cudaMemcpyDeviceToHost, stream_), "cudaMemcpyAsync velocity snapshot"); + } + +} // namespace scene_plume diff --git a/vulkan-app/scene_plume.ixx b/vulkan-app/scene_plume.ixx new file mode 100644 index 0000000..52dc64d --- /dev/null +++ b/vulkan-app/scene_plume.ixx @@ -0,0 +1,71 @@ +module; + +#include "stable-fluids-3d.h" +#include + +export module scene_plume; + +import app; +import std; + +export namespace scene_plume { + + class Scene { + public: + Scene(); + ~Scene(); + + Scene(const Scene&) = delete; + Scene& operator=(const Scene&) = delete; + Scene(Scene&&) noexcept = delete; + Scene& operator=(Scene&&) noexcept = delete; + + [[nodiscard]] std::span fields() const; + [[nodiscard]] app::VisualizationSettings default_visualization() const; + [[nodiscard]] app::SceneInfo info() const; + [[nodiscard]] cudaStream_t stream() const; + + void rebuild(); + void step(int sim_steps); + void export_field(uint32_t field_index, void* device_destination) const; + void export_velocity(void* device_destination, float* host_destination) const; + + private: + StableFluidsSimulationConfig config_{ + .nx = 96, + .ny = 128, + .nz = 96, + .cell_size = 0.01f, + .dt = 1.0f / 90.0f, + .viscosity = 0.00012f, + .diffuse_iterations = 24, + .pressure_iterations = 96, + .boundary = + { + .x = STABLE_FLUIDS_BOUNDARY_PERIODIC, + .y = STABLE_FLUIDS_BOUNDARY_FIXED, + .z = STABLE_FLUIDS_BOUNDARY_PERIODIC, + }, + .block_x = 8, + .block_y = 8, + .block_z = 4, + }; + cudaStream_t stream_ = nullptr; + StableFluidsContext context_ = nullptr; + StableFluidsFieldHandle density_field_ = 0; + app::GridShape grid_{}; + float* force_x_device_ = nullptr; + float* force_y_device_ = nullptr; + float* force_z_device_ = nullptr; + float* density_source_device_ = nullptr; + std::vector force_x_host_{}; + std::vector force_z_host_{}; + std::vector source_mask_{}; + std::vector swirl_x_mask_{}; + std::vector swirl_z_mask_{}; + std::vector drift_mask_{}; + uint64_t animation_step_ = 0; + app::SceneInfo info_{}; + }; + +} // namespace scene_plume diff --git a/vulkan-app/shaders/field_plane.slang b/vulkan-app/shaders/field_plane.slang index 3cd57b5..b0b467a 100644 --- a/vulkan-app/shaders/field_plane.slang +++ b/vulkan-app/shaders/field_plane.slang @@ -6,6 +6,8 @@ struct PushConstants float4 forward; float4 volumeMin; float4 volumeMax; + float4 backgroundBottom; + float4 backgroundTop; float4 colorA; float4 colorB; float4 params0; @@ -26,8 +28,6 @@ ConstantBuffer pc; [[vk::binding(0, 0)]] StructuredBuffer fieldData; -static const uint RENDER_MODE_SMOKE = 0; -static const uint RENDER_MODE_SCALAR = 1; static const uint PROJECTION_PERSPECTIVE = 0; static const uint PROJECTION_ORTHOGRAPHIC = 1; static const uint PLANE_XY = 0; @@ -41,10 +41,10 @@ uint field_index(uint x, uint y, uint z) uint field_offset(uint x, uint y, uint z) { - return field_index(x, y, z) * max(pc.params2.y, 1u); + return field_index(x, y, z); } -float4 sample_field(float3 uvw) +float sample_field(float3 uvw) { uvw = clamp(uvw, 0.0.xxx, 1.0.xxx); @@ -53,41 +53,27 @@ float4 sample_field(float3 uvw) uint3 hi = min(lo + 1, pc.params1.xyz - 1); float3 t = frac(p); - float4 c000 = 0.0.xxxx; - float4 c100 = 0.0.xxxx; - float4 c010 = 0.0.xxxx; - float4 c110 = 0.0.xxxx; - float4 c001 = 0.0.xxxx; - float4 c101 = 0.0.xxxx; - float4 c011 = 0.0.xxxx; - float4 c111 = 0.0.xxxx; - - [unroll] - for (uint component = 0; component < 4; ++component) - { - if (component >= pc.params2.y) break; - c000[component] = fieldData[field_offset(lo.x, lo.y, lo.z) + component]; - c100[component] = fieldData[field_offset(hi.x, lo.y, lo.z) + component]; - c010[component] = fieldData[field_offset(lo.x, hi.y, lo.z) + component]; - c110[component] = fieldData[field_offset(hi.x, hi.y, lo.z) + component]; - c001[component] = fieldData[field_offset(lo.x, lo.y, hi.z) + component]; - c101[component] = fieldData[field_offset(hi.x, lo.y, hi.z) + component]; - c011[component] = fieldData[field_offset(lo.x, hi.y, hi.z) + component]; - c111[component] = fieldData[field_offset(hi.x, hi.y, hi.z) + component]; - } - - float4 c00 = lerp(c000, c100, t.x); - float4 c10 = lerp(c010, c110, t.x); - float4 c01 = lerp(c001, c101, t.x); - float4 c11 = lerp(c011, c111, t.x); - float4 c0 = lerp(c00, c10, t.y); - float4 c1 = lerp(c01, c11, t.y); + float c000 = fieldData[field_offset(lo.x, lo.y, lo.z)]; + float c100 = fieldData[field_offset(hi.x, lo.y, lo.z)]; + float c010 = fieldData[field_offset(lo.x, hi.y, lo.z)]; + float c110 = fieldData[field_offset(hi.x, hi.y, lo.z)]; + float c001 = fieldData[field_offset(lo.x, lo.y, hi.z)]; + float c101 = fieldData[field_offset(hi.x, lo.y, hi.z)]; + float c011 = fieldData[field_offset(lo.x, hi.y, hi.z)]; + float c111 = fieldData[field_offset(hi.x, hi.y, hi.z)]; + + float c00 = lerp(c000, c100, t.x); + float c10 = lerp(c010, c110, t.x); + float c01 = lerp(c001, c101, t.x); + float c11 = lerp(c011, c111, t.x); + float c0 = lerp(c00, c10, t.y); + float c1 = lerp(c01, c11, t.y); return lerp(c0, c1, t.z); } void make_ray(float2 uv, out float3 rayOrigin, out float3 rayDir) { - if (pc.params2.w == PROJECTION_ORTHOGRAPHIC) + if (pc.params2.x == PROJECTION_ORTHOGRAPHIC) { float hh = pc.params3.w * 0.5; float hw = hh * pc.params0.x; @@ -108,7 +94,7 @@ bool intersect_plane(float3 rayOrigin, float3 rayDir, out float3 hitPoint) hitPoint = 0.0.xxx; float denom = 0.0; float planeCoord = pc.params3.z; - if (pc.params2.z == PLANE_XY) + if (pc.params2.y == PLANE_XY) { float z = lerp(pc.volumeMin.z, pc.volumeMax.z, planeCoord); denom = rayDir.z; @@ -118,7 +104,7 @@ bool intersect_plane(float3 rayOrigin, float3 rayDir, out float3 hitPoint) hitPoint = rayOrigin + rayDir * t; return hitPoint.x >= pc.volumeMin.x && hitPoint.x <= pc.volumeMax.x && hitPoint.y >= pc.volumeMin.y && hitPoint.y <= pc.volumeMax.y; } - if (pc.params2.z == PLANE_XZ) + if (pc.params2.y == PLANE_XZ) { float y = lerp(pc.volumeMin.y, pc.volumeMax.y, planeCoord); denom = rayDir.y; @@ -161,31 +147,35 @@ float4 fs_main(VSOut input) : SV_Target float3 rayOrigin; float3 rayDir; make_ray(uv, rayOrigin, rayDir); + float skyT = saturate(0.5 + 0.5 * rayDir.y); + float3 background = lerp(pc.backgroundBottom.rgb, pc.backgroundTop.rgb, skyT); float3 worldPos; if (!intersect_plane(rayOrigin, rayDir, worldPos)) { - return float4(0.0.xxx, 1.0); + return float4(background, 1.0); } float3 uvw = (worldPos - pc.volumeMin.xyz) / max(pc.volumeMax.xyz - pc.volumeMin.xyz, 1.0e-5.xxx); - float4 sampleData = sample_field(uvw); - if (pc.params2.x == RENDER_MODE_SMOKE && pc.params2.y == 4) - { - float density = max(sampleData.x * pc.params0.z, 0.0); - float3 dyeRgb = max(sampleData.yzw, 0.0.xxx); - float3 sampleColor = saturate(dyeRgb / max(sampleData.x, 1.0e-6)); - float alpha = 1.0 - exp(-density * pc.params0.w); - return float4(sampleColor * alpha, 1.0); - } - - float value = sampleData.x; - if (pc.params2.y > 1 && pc.params2.y < 4) value = length(sampleData.xyz); - value *= pc.params0.z; + float value = sample_field(uvw) * pc.params0.z; float valueMin = pc.params3.x; float valueMax = max(pc.params3.y, valueMin + 1.0e-5); - float normalized = saturate((value - valueMin) / (valueMax - valueMin)); - float alpha = 1.0 - exp(-normalized * pc.params0.w); - float3 color = lerp(pc.colorA.rgb, pc.colorB.rgb, normalized); - return float4(color * alpha, 1.0); + float alpha = 0.0; + float3 color = 0.0.xxx; + if (valueMin < 0.0 && valueMax > 0.0) + { + float valueAbsMax = max(abs(valueMin), abs(valueMax)); + float magnitude = valueAbsMax > 1.0e-5 ? saturate(abs(value) / valueAbsMax) : 0.0; + if (magnitude < 1.0e-4) return float4(background, 1.0); + alpha = 1.0 - exp(-magnitude * pc.params0.w); + color = value < 0.0 ? pc.colorA.rgb : pc.colorB.rgb; + } + else + { + float normalized = saturate((value - valueMin) / (valueMax - valueMin)); + if (normalized < 1.0e-4) return float4(background, 1.0); + alpha = 1.0 - exp(-normalized * pc.params0.w); + color = lerp(pc.colorA.rgb, pc.colorB.rgb, normalized); + } + return float4(lerp(background, color, alpha), 1.0); } diff --git a/vulkan-app/shaders/field_volume.slang b/vulkan-app/shaders/field_volume.slang index ceb6e76..71b7f17 100644 --- a/vulkan-app/shaders/field_volume.slang +++ b/vulkan-app/shaders/field_volume.slang @@ -6,6 +6,8 @@ struct PushConstants float4 forward; float4 volumeMin; float4 volumeMax; + float4 backgroundBottom; + float4 backgroundTop; float4 colorA; float4 colorB; float4 params0; @@ -26,7 +28,6 @@ ConstantBuffer pc; [[vk::binding(0, 0)]] StructuredBuffer fieldData; -static const uint RENDER_MODE_SMOKE = 0; static const uint PROJECTION_PERSPECTIVE = 0; uint field_index(uint x, uint y, uint z) @@ -36,10 +37,10 @@ uint field_index(uint x, uint y, uint z) uint field_offset(uint x, uint y, uint z) { - return field_index(x, y, z) * max(pc.params2.y, 1u); + return field_index(x, y, z); } -float4 sample_field(float3 uvw) +float sample_field(float3 uvw) { uvw = clamp(uvw, 0.0.xxx, 1.0.xxx); @@ -48,41 +49,27 @@ float4 sample_field(float3 uvw) uint3 hi = min(lo + 1, pc.params1.xyz - 1); float3 t = frac(p); - float4 c000 = 0.0.xxxx; - float4 c100 = 0.0.xxxx; - float4 c010 = 0.0.xxxx; - float4 c110 = 0.0.xxxx; - float4 c001 = 0.0.xxxx; - float4 c101 = 0.0.xxxx; - float4 c011 = 0.0.xxxx; - float4 c111 = 0.0.xxxx; - - [unroll] - for (uint component = 0; component < 4; ++component) - { - if (component >= pc.params2.y) break; - c000[component] = fieldData[field_offset(lo.x, lo.y, lo.z) + component]; - c100[component] = fieldData[field_offset(hi.x, lo.y, lo.z) + component]; - c010[component] = fieldData[field_offset(lo.x, hi.y, lo.z) + component]; - c110[component] = fieldData[field_offset(hi.x, hi.y, lo.z) + component]; - c001[component] = fieldData[field_offset(lo.x, lo.y, hi.z) + component]; - c101[component] = fieldData[field_offset(hi.x, lo.y, hi.z) + component]; - c011[component] = fieldData[field_offset(lo.x, hi.y, hi.z) + component]; - c111[component] = fieldData[field_offset(hi.x, hi.y, hi.z) + component]; - } - - float4 c00 = lerp(c000, c100, t.x); - float4 c10 = lerp(c010, c110, t.x); - float4 c01 = lerp(c001, c101, t.x); - float4 c11 = lerp(c011, c111, t.x); - float4 c0 = lerp(c00, c10, t.y); - float4 c1 = lerp(c01, c11, t.y); + float c000 = fieldData[field_offset(lo.x, lo.y, lo.z)]; + float c100 = fieldData[field_offset(hi.x, lo.y, lo.z)]; + float c010 = fieldData[field_offset(lo.x, hi.y, lo.z)]; + float c110 = fieldData[field_offset(hi.x, hi.y, lo.z)]; + float c001 = fieldData[field_offset(lo.x, lo.y, hi.z)]; + float c101 = fieldData[field_offset(hi.x, lo.y, hi.z)]; + float c011 = fieldData[field_offset(lo.x, hi.y, hi.z)]; + float c111 = fieldData[field_offset(hi.x, hi.y, hi.z)]; + + float c00 = lerp(c000, c100, t.x); + float c10 = lerp(c010, c110, t.x); + float c01 = lerp(c001, c101, t.x); + float c11 = lerp(c011, c111, t.x); + float c0 = lerp(c00, c10, t.y); + float c1 = lerp(c01, c11, t.y); return lerp(c0, c1, t.z); } void make_ray(float2 uv, out float3 rayOrigin, out float3 rayDir) { - if (pc.params2.w != PROJECTION_PERSPECTIVE) + if (pc.params2.x != PROJECTION_PERSPECTIVE) { float hh = pc.params3.w * 0.5; float hw = hh * pc.params0.x; @@ -138,13 +125,15 @@ float4 fs_main(VSOut input) : SV_Target float3 rayOrigin; float3 rayDir; make_ray(uv, rayOrigin, rayDir); + float skyT = saturate(0.5 + 0.5 * rayDir.y); + float3 background = lerp(pc.backgroundBottom.rgb, pc.backgroundTop.rgb, skyT); float2 hit = intersect_aabb(rayOrigin, rayDir, pc.volumeMin.xyz, pc.volumeMax.xyz); float t0 = max(hit.x, 0.0); float t1 = hit.y; if (t1 <= t0) { - return float4(0.0.xxx, 1.0); + return float4(background, 1.0); } float distanceInside = t1 - t0; @@ -154,6 +143,8 @@ float4 fs_main(VSOut input) : SV_Target float3 color = 0.0.xxx; float valueMin = pc.params3.x; float valueMax = max(pc.params3.y, valueMin + 1.0e-5); + float3 voxel = 1.0.xxx / max(float3(pc.params1.xyz - 1), 1.0.xxx); + const float3 lightDir = normalize(float3(-0.45, 0.82, -0.34)); [loop] for (uint i = 0; i < steps; ++i) @@ -161,28 +152,37 @@ float4 fs_main(VSOut input) : SV_Target float travel = t0 + (i + 0.5) * stepLength; float3 worldPos = rayOrigin + travel * rayDir; float3 uvw = (worldPos - pc.volumeMin.xyz) / max(pc.volumeMax.xyz - pc.volumeMin.xyz, 1.0e-5.xxx); - float4 sampleData = sample_field(uvw); - - if (pc.params2.x == RENDER_MODE_SMOKE && pc.params2.y == 4) + float value = sample_field(uvw) * pc.params0.z; + if (valueMin < 0.0 && valueMax > 0.0) { - float density = max(sampleData.x * pc.params0.z, 0.0); - if (density < 1.0e-4) continue; - - float3 dyeRgb = max(sampleData.yzw, 0.0.xxx); - float3 sampleColor = saturate(dyeRgb / max(sampleData.x, 1.0e-6)); - float alpha = 1.0 - exp(-density * pc.params0.w * stepLength); + float valueAbsMax = max(abs(valueMin), abs(valueMax)); + float magnitude = valueAbsMax > 1.0e-5 ? saturate(abs(value) / valueAbsMax) : 0.0; + if (magnitude < 1.0e-4) continue; + float alpha = 1.0 - exp(-magnitude * pc.params0.w * stepLength); + float3 sampleColor = value < 0.0 ? pc.colorA.rgb : pc.colorB.rgb; color += transmittance * alpha * sampleColor; transmittance *= 1.0 - alpha; } else { - float value = sampleData.x; - if (pc.params2.y > 1 && pc.params2.y < 4) value = length(sampleData.xyz); - value *= pc.params0.z; float normalized = saturate((value - valueMin) / (valueMax - valueMin)); if (normalized < 1.0e-4) continue; float alpha = 1.0 - exp(-normalized * pc.params0.w * stepLength); float3 sampleColor = lerp(pc.colorA.rgb, pc.colorB.rgb, normalized); + if (pc.params2.z != 0) + { + float dx = sample_field(clamp(uvw + float3(voxel.x, 0.0, 0.0), 0.0.xxx, 1.0.xxx)) * pc.params0.z - value; + float dy = sample_field(clamp(uvw + float3(0.0, voxel.y, 0.0), 0.0.xxx, 1.0.xxx)) * pc.params0.z - value; + float dz = sample_field(clamp(uvw + float3(0.0, 0.0, voxel.z), 0.0.xxx, 1.0.xxx)) * pc.params0.z - value; + float3 normal = normalize(float3(-dx, -dy, -dz) + 1.0e-5.xxx); + float diffuse = saturate(dot(normal, lightDir)); + float probe = sample_field(clamp(uvw + lightDir * 0.035, 0.0.xxx, 1.0.xxx)) * pc.params0.z; + float shadow = exp(-probe * 1.4); + float lighting = 0.42 + 0.48 * diffuse + 0.22 * shadow; + float silver = pow(saturate(1.0 - dot(normal, -rayDir)), 3.0); + sampleColor *= lighting; + sampleColor += 0.18 * silver.xxx; + } color += transmittance * alpha * sampleColor; transmittance *= 1.0 - alpha; } @@ -190,5 +190,5 @@ float4 fs_main(VSOut input) : SV_Target if (transmittance < 0.01) break; } - return float4(color, 1.0); + return float4(background * transmittance + color, 1.0); }