diff --git a/.gitignore b/.gitignore index 1532f34..3a9522e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.mod +*.txt +*.case *.o *.smod *.f0* @@ -10,3 +12,8 @@ fst_spectrum.dat *.chkp sphere.dat sphere.csv +FST/example/fort.0 +FST/example/neko +FST/example/usr_driver.f90 +tripping/neko +FST/src/generate_FST diff --git a/FST/README.md b/FST/README.md index 868ad1f..5b02b93 100644 --- a/FST/README.md +++ b/FST/README.md @@ -15,28 +15,79 @@ The initialization, generation, application of the FST is driven by `07_fst_bc_d - `fst_bc_driver_apply()` - `fst_bc_driver_finalize()` -An example of usage is given in the user file `example.f90`. Note that to apply the boundary condition we use the `field_dirichlet_update` function which -requires the use of the `user_velocity` boundary condition on the desired boundary (see `example.case`). +An example of usage is given in the user file `example/user.f90`. Note that to apply the boundary condition we use the `field_dirichlet_update` function which +requires the use of the `user_velocity` boundary condition on the desired boundary (see `example/run.case`). ## Case file -The driver module uses some parameters that should be given in the case file. Below is the JSON object taken from `example.case` that shows which parameters to use: +The driver module uses some parameters that should be given in the case file. Below is the JSON object taken from `example/run.case` that shows which parameters to use: +With file regeneration: ```.json "FST": { "enabled": true, // default is true "t_start": 0.0001, // Time at which to start applying FST "t_ramp": 0.001, // Length of the linear ramp in time "alpha": 0.2, // see below for full explanation of what this is - "ystart": -0.01, // Lower bound for the fringe function - "yend": 0.01, // High bound for the fringe function - "periodic_z": true // Self-explanatory. If periodic in y add "periodic_y": true + "ystart": -0.01, // Lower bound for the fringe function (Also exists for z, if y is periodic) + "yend": 0.01, // High bound for the fringe function (Also exists for z, if y is periodic) + "periodic_z": true, // Self-explanatory. If periodic in y add "periodic_y": true + "regen_files": true, // Set to true to generate wavenumbers etc. See below for further explanation + "Uinf": 1.0, // Free-stream velocity. Only read if "regen_files" is false. "fst_path": "src" // Path where the fst files should be written. } ``` +Without file regeneration (reuse previously written files) +```.json +"FST": { + "enabled": true, // default is true + "t_start": 0.0001, // Time at which to start applying FST + "t_ramp": 0.001, // Length of the linear ramp in time + "alpha": 0.2, // see below for full explanation of what this is + "ystart": -0.01, // Lower bound for the fringe function (Also exists for z, if y is periodic) + "yend": 0.01, // High bound for the fringe function (Also exists for z, if y is periodic) + "periodic_z": true, // Self-explanatory. If periodic in y add "periodic_y": true + "regen_files": false, // Set to true to generate wavenumbers etc. See below for further explanation + "Uinf": 1.0, // Free-stream velocity. Only read if "regen_files" is false. + "fst_path": "src" // Path to the fst files from which to read. +} +``` + +@note That `fst_path` is interpreted differently based on the value of +`regen_files`. + +### FST generation + +In the original implementation, FST wavenumbers and amplitudes are generated +on-the-fly at the beginning of the simulation. This is the default behavior in +the present implementation. Generating wavenumbers/amplitudes on-the-fly will +create three separate files: +- `bb.txt`, which contains the random phases, +- `fst_spectrum.csv`, which contains wavenumbers, amplitudes and the random, + unitary, divergence-free vectors, and +- `sphere.dat`, which contains information about # shells, points per shell, + and wavenumber discretization parameters. + +You also have the possibility to reuse previously generated files to keep +the same FST parameters across two simulations. To do that, use the parameter +`FST.regen_files` and set it to `false`. Be careful that the 3 files mentioned +above must be present in the same folder as your executable. + +The format of the fst_spectrum.csv file is a bit special and differs +from previous implementations. This version of the code requires it to have 8 +columns, which are as follows: + +```.csv +ShellNo,kx,ky,kz,amp,u_hat_pn1,u_hat_pn2,u_hat_pn3 +``` + +If `regen_files` is `false`, you must also specify the free-stream velocity +since nothing from `01_global_params.f90` will be used. This can be done via +the parameters `FST.Uinf`. + ### Spatial fringe parameters -A smooth fringe function is applied on the 2D inlet plane, which at the moment is assumed to be `(y,z)`. +A smoothing function in space is applied on the 2D inlet boundary. The shape of this fringe is the one used in SIMSON and by lots of other people: $$ @@ -51,9 +102,20 @@ $$ Note that $\lambda_u = 1$ if the direction `u` is set to be periodic. -`_start` and `_end` parameters need to be set by the user, which represent geometrical coordinates. -By default, and only if the direction is not periodic, `_start` will be set to the minimum coordinate on the boundary (in that direction). -The same goes for `_end`, it will be by default set to the maximum value. + +Below is an example of fringe function in 1 dimension. +![An example of fringe function with different alphas](fringe.png "Example of fringe function") + +And here is an example of how it looks like on an inlet boundary. In this case, we are showing +the z-component of velocity where the baseflow has 0 velocity. The z-direction is set to be +periodic and therefore there is no smoothing along the z-direction. The y-direction is not +periodic. We have exaggerated the extents of the fringe function to make it obvious, but +this will very much be case dependent. +![A practical example of an inlet boundary](fst.png "Example of z-component of FST on an inlet boundary") + +The `_start` and `_end` parameters can be set by the user. By default, and only if the direction is not +periodic, `_start`/`_end` will be set to the minimum/maximum coordinate on the boundary (in that direction). + The quantities $\delta_{u,*}$ are computed as a percentage $\alpha$ of the total boundary length in the direction `u`: $\delta_{u,rise} = \delta_{u,fall} = \alpha * L_u$, where $L_u$ is the total domain length at the inlet in the direction u. diff --git a/FST/example/makeneko_cpu b/FST/example/makeneko_cpu new file mode 100755 index 0000000..f518025 --- /dev/null +++ b/FST/example/makeneko_cpu @@ -0,0 +1,29 @@ +#!/bin/bash + +usage() { + + echo "Usage" + echo "./makeneko_cpu " + echo "" + echo "Example" + echo "./makeneko_cpu swept-wing.f90 /cfs/klemming/scratch/y/ycl/Neko/neko ./src" + +} + +# Check if exactly 4 arguments are given +if [ "$#" -ne 3 ]; then + echo "Error: Exactly 4 arguments are required." + usage + exit +fi + +NEKO_INSTALL_PATH=${2} +src_path=${3} + +backend="${src_path}/bcknd/cpu/opr_fst_cpu.f90 ${src_path}/bcknd/device/opr_fst_device.F90" +interface="${src_path}/fst_operator.f90" +fst_core="${src_path}/FST-core/0*" +driver="${src_path}/drivers/0*" + +echo ${NEKO_INSTALL_PATH}/bin/makeneko $backend $interface $fst_core $driver $1 +${NEKO_INSTALL_PATH}/bin/makeneko $backend $interface $fst_core $driver $1 diff --git a/FST/example/makeneko_gpu b/FST/example/makeneko_gpu new file mode 100755 index 0000000..5d46d35 --- /dev/null +++ b/FST/example/makeneko_gpu @@ -0,0 +1,79 @@ +#!/bin/bash + +usage() { + + echo "Usage" + echo "./makeneko_gpu " + echo "" + echo "Example" + echo "./makeneko_gpu swept-wing.f90 /cfs/klemming/scratch/y/ycl/Neko/neko hip ./src" + +} + +NEKO_INSTALL_PATH=${2} +which_gpu=${3} +src_path=${4} + +# Check if exactly 4 arguments are given +if [ "$#" -ne 4 ]; then + echo "Error: Exactly 4 arguments are required." + usage + exit +fi + +# Check if $3 is 'cuda' or 'hip' +if [ "$3" != "cuda" ] && [ "$3" != "hip" ]; then + echo "Error: Argument 3 must be 'cuda' or 'hip'." + usage +fi + +echo "[INFO] Neko install path: $NEKO_INSTALL_PATH" +echo "[INFO] GPU backend : $which_gpu" +echo "[INFO] User source path : $src_path" +echo + +# First, copy makeneko to a dummy one +cp "${NEKO_INSTALL_PATH}/bin/makeneko" dummy_makeneko + +# Add relevant flag depending on CUDA or HIP +if [ "${which_gpu}" == "cuda" ]; then + HAVE_CUDA=$(grep "HAVE_CUDA_BCKND=" dummy_makeneko | cut -d'=' -f2) + ADD_CUSTOM_DEF="-DHAVE_CUDA=${HAVE_CUDA}" +else + HAVE_HIP=$(grep "HAVE_HIP_BCKND=" dummy_makeneko | cut -d'=' -f2) + ADD_CUSTOM_DEF="-DHAVE_HIP=${HAVE_HIP}" +fi + +# Modify the dummy makeneko and add the relevant variables HAVE_CUDA +# and HAVE_HIP to the FCFLAGS +FCFLAGS_CURRENT=$(grep "FCFLAGS=" dummy_makeneko | cut -d"'" -f2) +FCFLAGS_NEW="${FCFLAGS_CURRENT} ${ADD_CUSTOM_DEF}" + +sed -i "s|^FCFLAGS=.*|FCFLAGS='${FCFLAGS_NEW}'|g" dummy_makeneko + +# Set the correct files to compile +backend_path_gpu="${src_path}/bcknd/device" +backend_path_cpu="${src_path}/bcknd/cpu" + +backend_cpu="$backend_path_cpu/opr_fst_cpu.f90" + +if [ "${which_gpu}" == "cuda" ]; then + backend_device="$backend_path_gpu/cuda/opr_fst.cu" +else + backend_device="$backend_path_gpu/hip/opr_fst.hip" +fi + +backend_interface="$backend_path_gpu/opr_fst_device.F90" + +backend="$backend_device $backend_cpu $backend_interface" +interface="${src_path}/fst_operator.f90" +fst_core="${src_path}/FST-core/0*" +driver="${src_path}/drivers/0*" + +# Put here any extra files to compile +extras="${src_path}/extras/0*" + +echo $NEKO_INSTALL_PATH/bin/makeneko $backend $interface $fst_core $driver $extras $1 +./dummy_makeneko $backend $interface $fst_core $driver $extras $1 + +rm dummy_makeneko diff --git a/FST/example/run.case b/FST/example/run.case index b398f7e..d8bd7fe 100644 --- a/FST/example/run.case +++ b/FST/example/run.case @@ -13,9 +13,8 @@ "enabled": false }, "time": { - "end_time": 1.5, - "variable_timestep": true, - "target_cfl": 0.5, + "end_time": 0.3, + "variable_timestep": false, "timestep": 1e-3, "max_timestep": 0.25e-2 }, @@ -27,10 +26,14 @@ "FST": { "enabled": true, // !!! FST IS DISABLED !! - "t_start": 0.3, + "t_start": 0.05, "t_ramp": 0.001, - "alpha": 0.3, // 15% distance to the y boundaries - "periodic_z": true + "alpha": 0.15, // 15% distance to the y boundaries + "periodic_z": true, + "y_start": -0.1, + "y_end": 0.1, + "regen_files": false, + "Uinf": 1.0 }, "fluid": { diff --git a/FST/example/user.f90 b/FST/example/user.f90 index f5937c8..1617614 100644 --- a/FST/example/user.f90 +++ b/FST/example/user.f90 @@ -5,7 +5,7 @@ module user contains - ! Register user defined functions (see user_intf.f90) +! Register user defined functions (see user_intf.f90) subroutine user_setup(u) type(user_t), intent(inout) :: u u%initialize => initialize ! Initialize parameters @@ -28,17 +28,16 @@ subroutine initialize(time) p => neko_registry%get_field("p") coef => neko_user_access%case%fluid%c_Xh - - call fst_bc_driver_initialize(time%t,u,v,w,p,coef,neko_user_access%case%params) - end subroutine initialize + call fst_bc_driver_initialize(time%t,u,v,w,p,coef,neko_user_access%case%params) - !> Set boundary conditions + end subroutine initialize + +!> Set boundary conditions subroutine user_bc(fields, bc, time) type(field_list_t), intent(inout) :: fields type(field_dirichlet_t), intent(in) :: bc type(time_state_t), intent(in) :: time - type(field_t), pointer :: ui,vi,wi,pi type(coef_t) , pointer :: coef @@ -61,8 +60,8 @@ subroutine user_bc(fields, bc, time) ! At the first time step, apply velocity BC by copying ! what has been applied in the initial condition ! - if (time%tstep .eq. 1) then - + if (time%tstep .eq. 1) then + ! Get solution fields (u,v,w) which contain the initial condition ! if we are at the first timestep ui => neko_registry%get_field("u") @@ -81,12 +80,14 @@ subroutine user_bc(fields, bc, time) end if - ! ! Apply FST ! coef => neko_user_access%case%fluid%c_Xh - call fst_bc_driver_apply(u, v, w, bc, coef, time%t, time%tstep, 0.0_rp, .false.) + call fst_bc_driver_apply(u, v, w, bc, coef, time%t, time%tstep, & + 0.0_xp, & ! Angle in the x-y direction + .false.) ! Wether to do FST on CPU + end associate ! @@ -95,10 +96,10 @@ subroutine user_bc(fields, bc, time) else if (trim(fields%items(1)%ptr%name) .eq. "p") then associate(p => fields%items(1)%ptr) - - if (time%tstep .eq. 1) then + + if (time%tstep .eq. 1) then pi => neko_registry%get_field("p") - + ! Pressure can be handled directly on GPU if (neko_bcknd_device .eq. 1) then call device_masked_copy_0(p%x_d, pi%x_d, bc%msk_d, p%dof%size(), bc%msk(0)) diff --git a/FST/fringe.png b/FST/fringe.png new file mode 100644 index 0000000..d0d2fbe Binary files /dev/null and b/FST/fringe.png differ diff --git a/FST/fst.png b/FST/fst.png new file mode 100644 index 0000000..26b9d21 Binary files /dev/null and b/FST/fst.png differ diff --git a/FST/src/FST-core/01_global_params.f90 b/FST/src/FST-core/01_global_params.f90 index 02b317e..04cc753 100644 --- a/FST/src/FST-core/01_global_params.f90 +++ b/FST/src/FST-core/01_global_params.f90 @@ -25,7 +25,7 @@ module global_params integer :: k_length integer :: shell(fst_modes) - integer :: seed + !integer :: seed real(kind=rp) :: k_num(fst_modes, 3) real(kind=rp) :: k_num_all(fst_modes, 3) @@ -44,7 +44,7 @@ subroutine print_param(name, value) real(kind=rp), intent(in) :: value character(len=LOG_SIZE) :: log_buf - write(log_buf, *) name, ": ", value + write(log_buf, *) "[FST] ", name, ": ", value call neko_log%message(log_buf) end subroutine print_param @@ -57,7 +57,7 @@ subroutine print_int(name, value) integer, intent(in) :: value character(len=LOG_SIZE) :: log_buf - write(log_buf, *) name, ": ", value + write(log_buf, *) "[FST] ", name, ": ", value call neko_log%message(log_buf) end subroutine print_int diff --git a/FST/src/FST-core/02_sphere.f90 b/FST/src/FST-core/02_sphere.f90 index 2e1d522..71be5cb 100644 --- a/FST/src/FST-core/02_sphere.f90 +++ b/FST/src/FST-core/02_sphere.f90 @@ -1,7 +1,6 @@ module sphere use neko use global_params - ! use global_params implicit none contains diff --git a/FST/src/FST-core/04_spec.f90 b/FST/src/FST-core/04_spec.f90 index 99ef4a8..f2d42a4 100644 --- a/FST/src/FST-core/04_spec.f90 +++ b/FST/src/FST-core/04_spec.f90 @@ -14,9 +14,12 @@ module spec contains - subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) + subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z, & + seed, write_file_path) real(kind=rp), intent(out) :: dlx, dly, dlz logical, intent(in) :: periodic_x, periodic_y, periodic_z + integer, intent(inout) :: seed + character(len=*), intent(in) :: write_file_path character(len=LOG_SIZE) :: log_buf @@ -87,7 +90,7 @@ subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) tke_tot1 = tke_tot1 + ek(kstart + i*dkint, fst_il, 1._rp) end do tke_tot1 = tke_tot1*dkint - call print_param('FST - integrated energy in spectrum ',tke_tot1) + call print_param('integrated energy in spectrum ',tke_tot1) ! ------------------------------------------------------------------------ ! ----- integrate the energy spectrum with nshells points ---------------- @@ -97,13 +100,13 @@ subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) tke_tot = tke_tot + ek(kstart + (i-1)*dkint,fst_il,1._rp) end do tke_tot = tke_tot*dkint - write (log_buf, *) 'FST - discretized on ', nshells, ' shells :' , tke_tot + write (log_buf, *) '[FST] discretized on ', nshells, ' shells :' , tke_tot call neko_log%message(log_buf) ! ------------------------------------------------------------------------- ! Write wavenumbers to ffst_ile if (write_files) then - open(file='sphere.dat', unit=10) + open(file=trim(write_file_path) // '/sphere.dat', unit=10) write(10,*) 'energy shell parameters' write(10,'(a20,i18)') 'Nshells',nshells @@ -138,7 +141,7 @@ subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) kk(i),Np,seed) call periodicity_chk(co(1,i,1),co(1,i,2),co(1,i,3), & - Np,kk(i),dlx,dly,dlz, periodic_x, periodic_y, periodic_z) + Np,kk(i),dlx,dly,dlz, periodic_x, periodic_y, periodic_z, seed) !write (*,*) "COCO", co(1,i,1) ! add second dodecaeder mirrored at (x)-axis @@ -223,9 +226,9 @@ subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) end do ! j=1,2*Np end do ! i=1,nshells ! write(6,*) 'FST - (0,0,0) wavenumber removed' - call neko_log%message('FST - (0,0,0) wavenumber removed') + call neko_log%message(' [FST] (0,0,0) wavenumber removed') - write(log_buf, *) 'Saved ',z1,' of ',z1+z2, ' fst modes.' + write(log_buf, *) '[FST] Saved ',z1,' of ',z1+z2, ' fst modes.' call neko_log%message(log_buf) ! close(12) @@ -293,16 +296,16 @@ subroutine spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) ! close(11) - write (log_buf, *) 'FST - ',k_length,'wavenumbers generated' + write (log_buf, *) '[FST]', k_length, 'wavenumbers generated' call neko_log%message(log_buf) ! 2012 format(A7,1x,i5,1x,A21) - call print_param('FST - Largest wavelength in x', 2.0*pi/kxmin) - call print_param('FST - Smallest wavelength in x', 2.0*pi/kxmax) - call print_param('FST - Largest wavelength in y', 2.0*pi/kymin) - call print_param('FST - Smallest wavelength in y', 2.0*pi/kymax) - call print_param('FST - Largest wavelength in z', 2.0*pi/kzmin) - call print_param('FST - Smallest wavelength in z', 2.0*pi/kzmax) + call print_param('Largest wavelength in x', 2.0*pi/kxmin) + call print_param('Smallest wavelength in x', 2.0*pi/kxmax) + call print_param('Largest wavelength in y', 2.0*pi/kymin) + call print_param('Smallest wavelength in y', 2.0*pi/kymax) + call print_param('Largest wavelength in z', 2.0*pi/kzmin) + call print_param('Smallest wavelength in z', 2.0*pi/kzmax) ! write(6,2015) 'FST - Largest wavelength in x', 2.0*pi/kxmin! ,kxmin ! write(6,2016) 'FST - Smallest wavelength in x', 2.0*pi/kxmax! ,kxmax @@ -318,7 +321,7 @@ end subroutine spec_s !---------------------------------------------------------------------- - subroutine periodicity_chk(kx,ky,kz,np,kk,dlx,dly,dlz, ifxp, ifyp, ifzp) + subroutine periodicity_chk(kx,ky,kz,np,kk,dlx,dly,dlz, ifxp, ifyp, ifzp, seed) ! implicit none @@ -326,7 +329,7 @@ subroutine periodicity_chk(kx,ky,kz,np,kk,dlx,dly,dlz, ifxp, ifyp, ifzp) integer :: nmax,nmin,kn logical :: ifxp,ifyp,ifzp - !integer :: seed + integer :: seed integer :: np real(kind=rp) :: flip diff --git a/FST/src/FST-core/05_turbu.f90 b/FST/src/FST-core/05_turbu.f90 index 296e104..7b44557 100644 --- a/FST/src/FST-core/05_turbu.f90 +++ b/FST/src/FST-core/05_turbu.f90 @@ -9,12 +9,18 @@ module turbu !---------------------------------------------------------------------- - subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) - type(coef_t), intent(in) :: coef + subroutine make_turbu(periodic_x, periodic_y, periodic_z, seed, & + write_file_path, dx, dy, dz, gdim, coef) logical, intent(in) :: periodic_x, periodic_y, periodic_z + integer, intent(inout) :: seed + character(len=*), intent(in) :: write_file_path + real(kind=rp), intent(in), optional :: dx, dy, dz + integer, intent(in), optional :: gdim + type(coef_t), intent(in), optional :: coef integer :: k,i,j integer :: shellno + integer :: gdim_ = 3 !integer :: seed real(kind=rp) :: ue,ve,we real(kind=rp) :: uamp,vamp,wamp @@ -22,23 +28,62 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) real(kind=rp) :: u_hat(fst_modes, 3), u_hat_p(fst_modes, 3) character(len=LOG_SIZE) :: log_buf - dlx = glmax(coef%dof%x, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & - glmin(coef%dof%x, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) + if (present(gdim)) gdim_ = gdim - dly = glmax(coef%dof%y, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & - glmin(coef%dof%y, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) + call print_param("nshells", real(nshells, kind=rp)) + call print_param("Npmax", real(Npmax, kind=rp)) + call print_param("fst_ti", fst_ti) + call print_param("fst_il", fst_il) + call print_param("kstart", kstart) + call print_param("kend", kend) + + if (.not. present(coef)) then + if (.not. (present(dx) .and. present(dy) .and. present(dz))) then + call neko_error("Please specify either coef or dx,dy,dz") + end if + end if + + if (present(dx)) then + dlx = dx + else + dlx = glmax(coef%dof%x, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & + glmin(coef%dof%x, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) + end if + + if (present(dy)) then + dly = dy + else + dly = glmax(coef%dof%y, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & + glmin(coef%dof%y, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) + end if + + if (present(dz)) then + dlz = dz + else + dlz = glmax(coef%dof%z, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & + glmin(coef%dof%z, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) + end if + + write (log_buf, *) "[FST] Length in x: ", dlx + call neko_log%message(log_buf) + write (log_buf, *) "[FST] Length in y: ", dly + call neko_log%message(log_buf) + write (log_buf, *) "[FST] Length in z: ", dlz + call neko_log%message(log_buf) - dlz = glmax(coef%dof%z, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) - & - glmin(coef%dof%z, coef%Xh%lx * coef%Xh%ly * coef%Xh%lz * coef%msh%nelv) if ( pe_rank.eq.0 ) then - - seed = -143 + + ! Hardcoded seed for random # generation.This way the FST generated is always the same. + !seed = -143 + + if (write_files) open(unit=137,form='formatted', & + file=trim(write_file_path) // '/bb.txt') - if (write_files) open(unit=137,form='formatted',file='bb.txt') - call spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z) ! get isotropically distributed wavenumbers in spheres + call spec_s(dlx, dly, dlz, periodic_x, periodic_y, periodic_z, seed, & + write_file_path) ! get isotropically distributed wavenumbers in spheres - do k=1,coef%msh%gdim + do k=1,gdim_ do i=1,fst_modes bb(i,k) = ran2(seed)*2.0*pi ! random phase shift @@ -51,15 +96,15 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) if (write_files) close(137) ! write(6,*) 'FST - Random amplitude generated' - call neko_log%message("FST - Random amplitude generated") + call neko_log%message("[FST] Random amplitude generated") ! make sure that continuity is enforced by ensuring u_vec.k_vec=(0 0 0) do i=1,k_length - do j= 1,coef%msh%gdim + do j= 1,gdim_ u_hat(i,j)=bb1(i,j) enddo - do j=1,coef%msh%gdim + do j=1,gdim_ u_hat_p(i,j) = u_hat(i,j) & - (u_hat(i,1)*k_num_all(i,1) & + u_hat(i,2)*k_num_all(i,2) & @@ -70,7 +115,7 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) + k_num_all(i,3)**2) enddo - do j=1,coef%msh%gdim + do j=1,gdim_ u_hat_pn(i,j) = u_hat_p(i,j) & / sqrt(u_hat_p(i,1)**2 & + u_hat_p(i,2)**2 & @@ -78,16 +123,16 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) enddo enddo - call neko_log%message('FST - Amplitudes projection done') + call neko_log%message('[FST] Amplitudes projection done') ! Check energy in individual modes ue=0. ve=0. we=0. ! Also write the modes - if (write_files) open(file='fst_spectrum.csv',unit=13) - if (write_files) write(13,'(9(A, ","),A)') 'ShellNo','kx','ky','kz', & - 'u_amp','v_amp','w_amp','u_hat_pn1','u_hat_pn2', 'u_hat_pn3' + if (write_files) open(file=trim(write_file_path) // '/fst_spectrum.csv', unit=13) + if (write_files) write(13,'(7(A, ","),A)') 'ShellNo','kx','ky','kz', & + 'amp','u_hat_pn1','u_hat_pn2', 'u_hat_pn3' do i=1,k_length shellno = shell(i) amp = shell_amp(shellno) @@ -98,8 +143,8 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) vamp = u_hat_pn(i,2)*amp wamp = u_hat_pn(i,3)*amp - if (write_files) write(13,'(9(g0, ","), g0)') shellno,k_num_all(i,1),k_num_all(i,2), & - k_num_all(i,3),uamp,vamp,wamp, u_hat_pn(i,1), u_hat_pn(i,2), u_hat_pn(i,3) + if (write_files) write(13,'(7(g0, ","), g0)') shellno,k_num_all(i,1),k_num_all(i,2), & + k_num_all(i,3),amp, u_hat_pn(i,1), u_hat_pn(i,2), u_hat_pn(i,3) ue = ue + ((uamp)**2)/2. ve = ve + ((vamp)**2)/2. @@ -108,16 +153,16 @@ subroutine make_turbu(coef, periodic_x, periodic_y, periodic_z) if (write_files) close(13) - write(log_buf,'(A18,10x,E12.5E2)') 'FST - Energy in u',ue + write(log_buf,'(A18,10x,E12.5E2)') '[FST] Energy in u',ue call neko_log%message(log_buf) - write(log_buf,'(A18,10x,E12.5E2)') 'FST - Energy in v',ve + write(log_buf,'(A18,10x,E12.5E2)') '[FST] Energy in v',ve call neko_log%message(log_buf) - write(log_buf,'(A18,10x,E12.5E2)') 'FST - Energy in w',we + write(log_buf,'(A18,10x,E12.5E2)') '[FST] Energy in w',we call neko_log%message(log_buf) - write(log_buf,'(A20,8x,E12.5E2)') 'FST - Estimated tke', & + write(log_buf,'(A20,8x,E12.5E2)') '[FST] Estimated tke', & (ue+ve+we)/2. call neko_log%message(log_buf) - write(log_buf,'(A24,9x,E12.5E2)') 'FST - Estimated Tu*U_inf', & + write(log_buf,'(A24,9x,E12.5E2)') '[FST] Estimated Tu*U_inf', & sqrt((ue+ve+we)/3.) call neko_log%message(log_buf) diff --git a/FST/src/FST-core/06_FST.f90 b/FST/src/FST-core/06_FST.f90 index 92278e5..552fda8 100644 --- a/FST/src/FST-core/06_FST.f90 +++ b/FST/src/FST-core/06_FST.f90 @@ -6,21 +6,29 @@ module FST - use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR - use global_params + !use global_params use fst_operator, only: fst_bc_compute - use turbu + use turbu, only: make_turbu, glb_uinf use utils, only: neko_error + use field, only: field_t + use coefs, only: coef_t + use logger, only: neko_log, LOG_SIZE use point_zone, only: point_zone_t - use comm, only: pe_rank use math, only: masked_gather_copy_0 use device_math, only: device_masked_gather_copy_0 - use device, only: device_map, device_memcpy, HOST_TO_DEVICE + use num_types, only: rp, xp + use comm, only: pe_rank, MPI_EXTRA_PRECISION, NEKO_COMM + use neko_config, only: NEKO_BCKND_DEVICE + use mpi_f08 + use device, only: device_map, device_memcpy, HOST_TO_DEVICE, device_get_ptr + use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR implicit none type, public :: FST_t + real(kind=xp) :: Uinf + ! periodic directions logical :: periodic_x logical :: periodic_y @@ -29,7 +37,7 @@ module FST ! x fringe real(kind=rp) :: xmin real(kind=rp) :: xmax - real(kind=rp) :: xstart + real(kind=rp) :: xstart real(kind=rp) :: xend real(kind=rp) :: x_delta_rise real(kind=rp) :: x_delta_fall @@ -43,17 +51,20 @@ module FST real(kind=rp) :: y_delta_fall !> Total fringe amplitude - real(kind=rp) :: fringe_max + real(kind=xp) :: fringe_max !> Final ramp time - real(kind=rp) :: t_end - real(kind=rp) :: t_start + real(kind=xp) :: t_end + real(kind=xp) :: t_start logical :: is_forcing logical :: is_bc + integer :: nshells + integer :: n_modes_total ! = k_length + !> Fringe in space - real(kind=rp), allocatable :: fringe_space(:) + real(kind=xp), allocatable :: fringe_space(:) type(c_ptr) :: fringe_space_d = C_NULL_PTR !> Baseflows, if applying on a non-uniform inflow @@ -65,22 +76,33 @@ module FST type(c_ptr) :: w_baseflow_d = C_NULL_PTR !> Variable that is precomputed to save some time - real(kind=rp), allocatable :: phi_0(:,:) + real(kind=xp), allocatable :: phi_0(:,:) type(c_ptr) :: phi_0_d = C_NULL_PTR - real(kind=rp), allocatable :: random_vectors(:,:) ! u_hat_pn but reshaped + real(kind=xp), allocatable :: random_vectors(:,:) ! u_hat_pn but reshaped type(c_ptr) :: random_vectors_d = C_NULL_PTR - type(c_ptr) :: u_hat_pn_d = C_NULL_PTR + integer, allocatable :: shell(:) type(c_ptr) :: shell_d = C_NULL_PTR + real(kind=xp), allocatable :: shell_amp(:) type(c_ptr) :: shell_amp_d = C_NULL_PTR - real(kind=rp), allocatable :: k_x(:) + real(kind=xp), allocatable :: k_y(:) + real(kind=xp), allocatable :: k_z(:) + real(kind=xp), allocatable :: k_x(:) type(c_ptr) :: k_x_d = C_NULL_PTR + real(kind=xp), allocatable :: phase_shifts(:) + + !> Indicates whether the FST has been generated + logical :: is_generated = .false. + + integer :: seed !< Seed for random number generation + contains ! ======== Init/Free procedures procedure, pass(this) :: init_common => FST_init_common procedure, pass(this) :: init_bc => FST_init_bc + procedure, pass(this) :: read_from_files => FST_read_from_files procedure, pass(this) :: init_forcing => FST_init_forcing procedure, pass(this) :: free => FST_free_params ! ========================================================================= @@ -107,20 +129,23 @@ subroutine FST_init_common(this, & ymin, ymax, ystart, yend, y_delta_rise, y_delta_fall, & fringe_max, & t_start, t_end, & - periodic_x, periodic_y, periodic_z) + periodic_x, periodic_y, periodic_z, seed) class(FST_t), intent(inout) :: this - real(kind=rp), intent(in) :: xmin, xmax, xstart - real(kind=rp), intent(in) :: xend - real(kind=rp), intent(in) :: ymin, ymax, ystart - real(kind=rp), intent(in) :: yend + real(kind=rp), intent(in) :: xstart, xend, xmin, xmax + real(kind=rp), intent(in) :: ystart, yend, ymin, ymax real(kind=rp), intent(in) :: x_delta_rise real(kind=rp), intent(in) :: x_delta_fall real(kind=rp), intent(in) :: y_delta_rise real(kind=rp), intent(in) :: y_delta_fall - real(kind=rp), intent(in) :: fringe_max - real(kind=rp), intent(in) :: t_start - real(kind=rp), intent(in) :: t_end + real(kind=xp), intent(in) :: fringe_max + real(kind=xp), intent(in) :: t_start + real(kind=xp), intent(in) :: t_end logical, intent(in) :: periodic_x, periodic_y, periodic_z + integer, intent(inout), optional :: seed + + integer :: seed_ = -143 + if (present(seed)) seed_ = seed + this%seed = seed_ this%periodic_x = periodic_x this%periodic_y = periodic_y @@ -144,6 +169,8 @@ subroutine FST_init_common(this, & this%t_end = t_end this%t_start = t_start + call this%print() ! show parameters + end subroutine FST_init_common @@ -164,19 +191,19 @@ subroutine FST_init_forcing(this, & real(kind=rp), intent(in) :: x_delta_fall real(kind=rp), intent(in) :: y_delta_rise real(kind=rp), intent(in) :: y_delta_fall - real(kind=rp), intent(in) :: fringe_max - real(kind=rp), intent(in) :: t_start - real(kind=rp), intent(in) :: t_end + real(kind=xp), intent(in) :: fringe_max + real(kind=xp), intent(in) :: t_start + real(kind=xp), intent(in) :: t_end logical, intent(in) :: periodic_x, periodic_y, periodic_z - call neko_log%section('Initializing FST') + call neko_log%section(' [FST] Initializing forcing') - call this%init_common(xstart, xend,xstart,xend,x_delta_rise, x_delta_fall, ystart, & - yend, ystart, yend, y_delta_rise, y_delta_fall, fringe_max, t_start, t_end, & + call this%init_common(xstart, xend, xstart, xend, x_delta_rise, & + x_delta_fall, ystart, yend, ystart, yend, y_delta_rise, & + y_delta_fall, fringe_max, t_start, t_end, & periodic_x, periodic_y, periodic_z) - call this%print() ! show parameters - call neko_log%end_section('Done --> Intializing FST') + call neko_log%end_section('') this%is_forcing = .true. this%is_bc = .false. @@ -197,24 +224,203 @@ subroutine FST_init_bc(this, & real(kind=rp), intent(in) :: x_delta_fall real(kind=rp), intent(in) :: y_delta_rise real(kind=rp), intent(in) :: y_delta_fall - real(kind=rp), intent(in) :: t_start - real(kind=rp), intent(in) :: t_end + real(kind=xp), intent(in) :: t_start + real(kind=xp), intent(in) :: t_end logical, intent(in) :: periodic_x, periodic_y, periodic_z - call neko_log%section('Initializing FST') + call neko_log%section(' [FST] Initializing BC') call this%init_common(xmin, xmax, xstart, xend, x_delta_rise, & x_delta_fall, ymin, ymax, ystart, yend, y_delta_rise, & - y_delta_fall, 1.0_rp, t_start, t_end, & + y_delta_fall, 1.0_xp, t_start, t_end, & periodic_x, periodic_y, periodic_z) - call this%print() ! show parameters - call neko_log%end_section('Done --> Intializing FST') + call neko_log%end_section('') this%is_forcing = .false. this%is_bc = .true. end subroutine FST_init_bc + + !> Initialize all the stuff from files generated by the FST code: + !! - fst_spectrum.csv + !! - sphere.dat + !! - bb.txt + !! + !! NOTE: The structure of fst_spectrum.csv should be: + !! shellno, kx, ky, kz, amp, u_hat_pn(1), u_hat_pn(2), u_hat_pn(3) + subroutine FST_read_from_files(this, path) + class(FST_t), intent(inout) :: this + character(len=*), intent(in) :: path + + integer :: unit, ios, num_columns, num_lines, n_modes_total, i, np_eff, & + ierr, prev_shell, idx_shell_amp + character(len=1) :: delimiter + character(len=1024) :: line + character(len=2048) :: fpath + character(len=20) :: keyword + real(kind=xp) :: tmp + character(len=LOG_SIZE) :: log_buf + + call neko_log%section(' [FST] Reading files') + + delimiter = ',' + + if (pe_rank .eq. 0) then + + ! + ! Read sphere.dat to get number of spheres + ! + fpath = trim(path) // "/sphere.dat" + call neko_log%message("[FST] Reading " // trim(fpath)) + open(file = trim(fpath), unit = unit, status = "old", & + action="read", iostat=ios) + + if (ios /= 0) then + call neko_error("[FST] Error opening " // trim(fpath)) + end if + + read(unit,*) line + read(unit,*) keyword, this%nshells + close(unit) + + write (log_buf, '(A ,I4.4)') "[FST] # of shells:", this%nshells + call neko_log%message(log_buf) + + ! + ! Read FST spectrum, count # of lines to allocate all the arrays + ! + fpath = trim(path) // "/fst_spectrum.csv" + open(file=trim(fpath), unit=unit, status="old", action="read", & + iostat=ios) + call neko_log%message("[FST] Reading " // trim(fpath)) + if (ios /= 0) then + call neko_error("[FST] Error opening " // trim(fpath)) + end if + + num_columns = 1 + num_lines = 0 + + ! Read the file line by line + do + read(unit, '(A)', iostat=ios) line + if (ios /= 0) exit + + ! If it's the first line, count the columns + if (num_columns .eq. 1) then + + ! Count the number of delimiters in the line + do i = 1, len_trim(line) + if (line(i:i) == delimiter) then + num_columns = num_columns + 1 + end if + end do + + end if ! if num_columns .eq. 1 + + num_lines = num_lines + 1 + end do + close(unit) + + ! NOTE: this requirement on the number of columns is a bit hardcoded.. + ! AND different from the original implementation. Hence the + ! extra verbose error message. + if (num_columns .ne. 8) then + call neko_log%message("[FST] ***ERROR READING fst_spectrum.csv***") + call neko_log%message("[FST] fst_spectrum should have 8 cols.") + call neko_log%message("[FST] shell,kx,ky,kz,amp,u_hat_pn1,u_hat_pn2,u_hat_pn3") + call neko_error("[FST] fst_spectrum.csv should have 8 columns") + end if + + ! NOTE: The total number of modes in the file can sometimes be + ! different from the theoretical value 2*N_shells*N_points_per_shell. + ! This is because some modes are removed in the process. + this%n_modes_total = num_lines - 1 ! Remove the header + np_eff = this%n_modes_total / this%nshells + write (log_buf, '(A ,I5.5)') "[FST] Total # of modes:", this%n_modes_total + call neko_log%message(log_buf) + write (log_buf, '(A ,I5.5)') "[FST] Eff. # of points:", np_eff + call neko_log%message(log_buf) + + end if ! pe_rank .eq. 0 + + call MPI_Bcast(this%n_modes_total, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) + call MPI_Bcast(this%nshells, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) + + call MPI_Barrier(NEKO_COMM, ierr) + + ! + ! Allocate all the relevant arrays: + ! shell, kx, ky, kz, amplitudes, u_hat_pn, v_hat_pn, w_hat_pn + ! + allocate(this%shell(this%n_modes_total)) + allocate(this%k_x(this%n_modes_total)) + allocate(this%k_y(this%n_modes_total)) + allocate(this%k_z(this%n_modes_total)) + allocate(this%shell_amp(this%nshells)) + allocate(this%random_vectors(this%n_modes_total,3)) + allocate(this%phase_shifts(this%n_modes_total)) + + if (pe_rank .eq. 0) then + + ! + ! Now read fst_spectrum again and populate all the arrays + ! + open(file=trim(fpath), unit=unit, status="old", action="read", & + iostat=ios) + if (ios /= 0) then + call neko_error("[FST] Error opening " // trim(fpath)) + end if + + read(unit,*) line! read the header + + ! Read the file line by line + do i = 1, this%n_modes_total + + !if (i//this%nshells .eq. 0) + read(unit,*) this%shell(i), this%k_x(i), & + this%k_y(i), this%k_z(i), this%shell_amp((i-1)/np_eff+1), & + this%random_vectors(i,1), this%random_vectors(i,2), & + this%random_vectors(i,3) + !print *, "<<0>>",this%shell(i), this%k_x(i), this%k_y(i), this%k_z(i), this%shell_amp((i-1)/np_eff+1), & + ! this%random_vectors(i,1), this%random_vectors(i,2), this%random_vectors(i,3) + end do + close(unit) + + ! + ! Read the phase shifts in bb.txt + ! + fpath = trim(path) // "/bb.txt" + open(file=trim(fpath), unit=unit, status="old", action="read", iostat=ios) + if (ios /= 0) then + call neko_error("[FST] Error opening " // trim(fpath)) + end if + + do i = 1, this%n_modes_total + read(unit,*) this%phase_shifts(i), tmp + end do + + end if ! pe_rank .eq. 0 + + call MPI_Bcast(this%k_x, this%n_modes_total, MPI_EXTRA_PRECISION, 0, & + NEKO_COMM, ierr) + call MPI_Bcast(this%k_y, this%n_modes_total, MPI_EXTRA_PRECISION, 0, & + NEKO_COMM, ierr) + call MPI_Bcast(this%k_z, this%n_modes_total, MPI_EXTRA_PRECISION, 0, & + NEKO_COMM, ierr) + call MPI_Bcast(this%shell, this%n_modes_total, MPI_INTEGER, 0, NEKO_COMM, & + ierr) + call MPI_Bcast(this%shell_amp, this%nshells, MPI_EXTRA_PRECISION, 0, & + NEKO_COMM, ierr) + call MPI_Bcast(this%random_vectors, this%n_modes_total*3, & + MPI_EXTRA_PRECISION, 0, NEKO_COMM, ierr) + call MPI_Bcast(this%phase_shifts, this%n_modes_total, MPI_EXTRA_PRECISION, & + 0, NEKO_COMM, ierr) + call MPI_Barrier(NEKO_COMM, ierr) + + call neko_log%end_section('') + + end subroutine FST_read_from_files !! Free parameters in global params subroutine FST_free_params(this) @@ -222,6 +428,12 @@ subroutine FST_free_params(this) if(allocated(this%fringe_space)) deallocate(this%fringe_space) if(allocated(this%phi_0)) deallocate(this%phi_0) + if(allocated(this%k_x)) deallocate(this%k_x) + if(allocated(this%k_y)) deallocate(this%k_y) + if(allocated(this%k_z)) deallocate(this%k_z) + if(allocated(this%shell)) deallocate(this%shell) + if(allocated(this%shell_amp)) deallocate(this%shell_amp) + if(allocated(this%phi_0)) deallocate(this%phi_0) if (allocated(this%u_baseflow)) deallocate(this%u_baseflow) if (allocated(this%v_baseflow)) deallocate(this%v_baseflow) @@ -232,28 +444,41 @@ end subroutine FST_free_params subroutine FST_print_params(this) class(FST_t) :: this - call print_param("nshells", real(nshells, kind=rp)) - call print_param("Npmax", real(Npmax, kind=rp)) - call print_param("fst_ti", fst_ti) - call print_param("fst_il", fst_il) - call print_param("kstart", kstart) - call print_param("kend", kend) - call print_param("xmin", this%xmin) - call print_param("xmax", this%xmax) - call print_param("xstart", this%xstart) - call print_param("xend", this%xend) - call print_param("ymin", this%ymin) - call print_param("ymax", this%ymax) - call print_param("ystart", this%ystart) - call print_param("yend", this%yend) - call print_param("fringe_max", this%fringe_max) - call print_param("x_delta_rise", this%x_delta_rise) - call print_param("x_delta_fall", this%x_delta_fall) - call print_param("y_delta_rise", this%y_delta_rise) - call print_param("y_delta_fall", this%y_delta_fall) - call print_param("t_start", this%t_start) - call print_param("t_end", this%t_end) - + character(len=LOG_SIZE) :: log_buf + + if (pe_rank .ne. 0) return + write(log_buf, '(A ,F15.7)') "[FST] xstart ", this%xstart + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] xend ", this%xend + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] xmin ", this%xmin + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] xmax ", this%xmax + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] ystart ", this%ystart + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] yend ", this%yend + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] ymin ", this%ymin + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] ymax ", this%ymax + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] fringe_max ", this%fringe_max + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] x_delta_rise", this%x_delta_rise + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] x_delta_fall", this%x_delta_fall + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] y_delta_rise", this%y_delta_rise + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] y_delta_fall", this%y_delta_fall + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] t_start ", this%t_start + call neko_log%message(log_buf) + write(log_buf, '(A ,F15.7)') "[FST] t_end ", this%t_end + call neko_log%message(log_buf) + write(log_buf, '(A ,I0)') "[FST] seed ", this%seed + call neko_log%message(log_buf) end subroutine FST_print_params !> Apply a specific baseflow in our region, from a boundary mask. @@ -331,89 +556,109 @@ subroutine FST_apply_baseflow(this, mask, n, u, v, w) end subroutine FST_apply_baseflow !> Common function for generation - subroutine FST_generate_common(this, coef) + subroutine FST_generate_common(this, coef, path) class(FST_t), intent(inout) :: this type(coef_t), intent(in) :: coef + character(len=*), intent(in) :: path integer :: ierr - call neko_log%section ('Generating FST') - call make_turbu(coef, this%periodic_x, this%periodic_y, this%periodic_z) - - call MPI_Bcast(k_length , 1 , & - MPI_INTEGER , 0, NEKO_COMM, ierr) - call MPI_Bcast(k_num_all, fst_modes*coef%msh%gdim, & - MPI_REAL_PRECISION, 0, NEKO_COMM, ierr) - call MPI_Bcast(k_num , fst_modes*coef%msh%gdim, & - MPI_REAL_PRECISION, 0, NEKO_COMM, ierr) - call MPI_Bcast(u_hat_pn , fst_modes*coef%msh%gdim, & - MPI_REAL_PRECISION, 0, NEKO_COMM, ierr) - call MPI_Bcast(bb , fst_modes*coef%msh%gdim, & - MPI_REAL_PRECISION, 0, NEKO_COMM, ierr) - call MPI_Bcast(shell , fst_modes , & - MPI_INTEGER , 0, NEKO_COMM, ierr) - call MPI_Bcast(shell_amp, nshells , & - MPI_REAL_PRECISION, 0, NEKO_COMM, ierr) - - call neko_log%end_section('Done --> Generating FST') + ! + ! First, generate everything as usual + ! + call neko_log%section (' [FST] Generating') + call make_turbu(this%periodic_x, this%periodic_y, this%periodic_z, & + this%seed, path, coef=coef) + call neko_log%end_section('') + + ! + ! Next, use the arrays defined end subroutine FST_generate_common - !> Generate FST for forcing + !> Generate FST for forcing (empty) subroutine FST_generate_forcing(this, coef, zone, u, v, w) class(FST_t), intent(inout) :: this type(coef_t), intent(in) :: coef class(point_zone_t), intent(in) :: zone type(field_t), intent(in) :: u, v, w - real(kind=rp) :: x, y, z + real(kind=xp) :: x, y, z integer :: ierr, i, idx - integer, pointer :: mask(:) + !! Do the general generation + !call this%generate_common(coef) - ! Do the general generation - call this%generate_common(coef) + !! + !! Copy the baseflow in the zone + !! + !call this%apply_baseflow(zone%mask, zone%size, u, v, w) - ! - ! Copy the baseflow in the zone - ! - call this%apply_baseflow(mask, zone%size, u, v, w) - - ! Generate the fringe in space - allocate(this%fringe_space(zone%size)) + !! Generate the fringe in space + !allocate(this%fringe_space(zone%size)) - ! Initialize the fringe in space - do idx = 1, zone%size - i = mask(idx) - x = coef%dof%x(i,1,1,1) - y = coef%dof%y(i,1,1,1) - z = coef%dof%z(i,1,1,1) + !! Initialize the fringe in space + !do idx = 1, zone%size + ! i = zone%mask(idx) + ! x = coef%dof%x(i,1,1,1) + ! y = coef%dof%y(i,1,1,1) + ! z = coef%dof%z(i,1,1,1) - if ( x .lt. this%xmin .or. x .gt. this%xmax .or. & - y .lt. this%ymin .or. y .gt. this%ymax ) then - this%fringe_space(idx) = 0.0_rp - else - this%fringe_space(idx) = fringe(x, y, this) - end if + ! if ( x .lt. this%xmin .or. x .gt. this%xmax .or. & + ! y .lt. this%ymin .or. y .gt. this%ymax ) then + ! this%fringe_space(idx) = 0.0_xp + ! else + ! this%fringe_space(idx) = fringe(x, y, this) + ! end if - end do + !end do end subroutine FST_generate_forcing !> Do the generation for BC. - subroutine FST_generate_bc(this, coef, bc_mask, n, u, v, w) + subroutine FST_generate_bc(this, coef, bc_mask, n, u, v, w, regen, Uinf, path) class(FST_t), intent(inout) :: this type(coef_t), intent(in) :: coef integer, intent(in) :: bc_mask(0:n) integer, intent(in) :: n type(field_t), intent(in) :: u, v, w + logical, intent(in) :: regen + character(len=*), intent(in) :: path + real(kind=xp), intent(in), optional :: Uinf character(len=LOG_SIZE) :: log_buf real(kind=rp) :: x, y, z integer :: ierr, i, idx, m, j + + if (regen) then + + ! Generate everything from scratch. This will create the files + ! bb.txt, sphere.dat and fst_spectrum.csv + call this%generate_common(coef, path) + this%Uinf = glb_uinf + + else + + if (.not. present(Uinf)) then + call neko_error("[FST] Provide a value for Uinf if you init from file!") + else + this%Uinf = Uinf + end if + + end if + + ! + ! Read data from files (will not use anything from global_params!) + ! + call this%read_from_files(path) + + ! + ! Print some diagnostics just in case + ! + write (log_buf, '(A,F15.7)') "[FST] Uinf set to ", this%Uinf + call neko_log%message(log_buf) + - ! Do the general generation - call this%generate_common(coef) ! ! Apply baseflow in the bc zone @@ -433,7 +678,7 @@ subroutine FST_generate_bc(this, coef, bc_mask, n, u, v, w) if ( z .lt. this%xmin .or. z .gt. this%xmax .or. & y .lt. this%ymin .or. y .gt. this%ymax ) then - this%fringe_space(idx) = 0.0_rp + this%fringe_space(idx) = 0.0_xp else this%fringe_space(idx) = fringe(z, y, this) end if @@ -443,64 +688,68 @@ subroutine FST_generate_bc(this, coef, bc_mask, n, u, v, w) ! ! Precompute time-independent term ! - allocate(this%phi_0(k_length, n)) + allocate(this%phi_0(this%n_modes_total, bc_mask(0))) do j = 1, n x = coef%dof%x(bc_mask(j), 1,1,1) y = coef%dof%y(bc_mask(j), 1,1,1) z = coef%dof%z(bc_mask(j), 1,1,1) - do m = 1, k_length - this%phi_0(m,j) = k_num_all(m,1)*x + k_num_all(m,2)*y + k_num_all(m,3)*z + bb(m,1) ! bb is phase_shift - end do + do m = 1, this%n_modes_total + this%phi_0(m,j) = this%k_x(m)*x + this%k_y(m)*y + this%k_z(m)*z + & + this%phase_shifts(m) + end do end do ! ! Copy the wavenumbers in x direction ! - allocate(this%k_x(fst_modes)) - - do m = 1, fst_modes - this%k_x(m) = k_num_all(m,1) - end do - +!!$ allocate(this%k_x(fst_modes)) +!!$ +!!$ do m = 1, fst_modes +!!$ this%k_x(m) = k_num_all(m,1) +!!$ end do +!!$ ! ! Copy a proper version of u_hat_pn with correct size ! - allocate(this%random_vectors(k_length, 3)) - - do m = 1, k_length - do j = 1, 3 - this%random_vectors(m,j) = u_hat_pn(m,j) - end do - end do - +!!$ allocate(this%random_vectors(this%n_modes_total, 3)) +!!$ +!!$ do m = 1, this%n_modes_total +!!$ do j = 1, 3 +!!$ this%random_vectors(m,j) = u_hat_pn(m,j) +!!$ end do +!!$ end do +!!$ ! Copy everything to device and map with relevant device array pointers if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%fringe_space, this%fringe_space_d, n) call device_memcpy(this%fringe_space, this%fringe_space_d, n, & HOST_TO_DEVICE, .false.) - call device_map(this%phi_0, this%phi_0_d, k_length*n) - call device_memcpy(this%phi_0, this%phi_0_d, k_length*n, & - HOST_TO_DEVICE, .false.) + call device_map(this%phi_0, this%phi_0_d, this%n_modes_total*bc_mask(0)) + call device_memcpy(this%phi_0, this%phi_0_d, & + this%n_modes_total*bc_mask(0), HOST_TO_DEVICE, .false.) - call device_map(this%random_vectors, this%random_vectors_d, k_length*3) - call device_memcpy(this%random_vectors, this%random_vectors_d, k_length*3, & - HOST_TO_DEVICE, .false.) + call device_map(this%random_vectors, this%random_vectors_d, & + this%n_modes_total*3) + call device_memcpy(this%random_vectors, this%random_vectors_d, & + this%n_modes_total*3, HOST_TO_DEVICE, .false.) - call device_map(shell, this%shell_d, fst_modes) - call device_memcpy(shell, this%shell_d, fst_modes, & + call device_map(this%shell, this%shell_d, this%n_modes_total) + call device_memcpy(this%shell, this%shell_d, this%n_modes_total, & HOST_TO_DEVICE, .false.) - call device_map(shell_amp, this%shell_amp_d, nshells) - call device_memcpy(shell_amp, this%shell_amp_d, nshells, & + call device_map(this%shell_amp, this%shell_amp_d, this%nshells) + call device_memcpy(this%shell_amp, this%shell_amp_d, this%nshells, & HOST_TO_DEVICE, .false.) - call device_map(this%k_x, this%k_x_d, fst_modes) - call device_memcpy(this%k_x, this%k_x_d, fst_modes, & + call device_map(this%k_x, this%k_x_d, this%n_modes_total) + call device_memcpy(this%k_x, this%k_x_d, this%n_modes_total, & HOST_TO_DEVICE, .true.) end if + this%is_generated = .true. + end subroutine FST_generate_bc ! Forcing to be performed on entire domain, on a local element ix @@ -523,128 +772,72 @@ subroutine FST_forcing_zone(this, zone, & real(kind=rp) :: rand_vec(gdim) real(kind=rp) :: fringe_time - integer, pointer :: mask(:) - mask => zone%mask%get() - - fringe_time = time_ramp(t, this%t_end, this%t_start) - - ! Loop on all points in the point zone - do idx = 1, zone%size - i = mask(idx) - - !> This vector will contain the sum of all Fourier modes - rand_vec = 0.0_rp - - ! Sum all sin modes for each gll point - do m = 1, k_length - phase_shft = bb(m,1) - - ! kx(x - U*t) + ky*y + kz*z - random_phase[-pi, pi] - ! = kx*x + ky*y + kz*z - kx*U*t - random_phase - phi = k_num_all(m,1) * (x(i,1,1,1) - glb_uinf*t) + & - k_num_all(m,2) * y(i,1,1,1) + & - k_num_all(m,3) * z(i,1,1,1) + & - phase_shft - - shellno = shell(m) - pert = shell_amp(shellno)*sin(phi) - - rand_vec(1) = rand_vec(1) + u_hat_pn(m,1)*pert - rand_vec(2) = rand_vec(2) + u_hat_pn(m,2)*pert - rand_vec(3) = rand_vec(3) + u_hat_pn(m,3)*pert - enddo - - fu(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & - this%u_baseflow(idx) + rand_vec(1) - u(i,1,1,1)) - - fv(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & - this%v_baseflow(idx) + rand_vec(2) - v(i,1,1,1)) - - fw(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & - this%w_baseflow(idx) + rand_vec(3) - w(i,1,1,1)) - end do +!!$ fringe_time = time_ramp(t, this%t_end, this%t_start) +!!$ +!!$ ! Loop on all points in the point zone +!!$ do idx = 1, zone%size +!!$ i = zone%mask(idx) +!!$ +!!$ !> This vector will contain the sum of all Fourier modes +!!$ rand_vec = 0.0_rp +!!$ +!!$ ! Sum all sin modes for each gll point +!!$ do m = 1, this%n_modes_total +!!$ phase_shft = bb(m,1) +!!$ +!!$ ! k_x(x - U*t) + ky*y + kz*z - random_phase[-pi, pi] +!!$ ! = k_x*x + ky*y + kz*z - k_x*U*t - random_phase +!!$ phi = k_num_all(m,1) * (x(i,1,1,1) - glb_uinf*t) + & +!!$ k_num_all(m,2) * y(i,1,1,1) + & +!!$ k_num_all(m,3) * z(i,1,1,1) + & +!!$ phase_shft +!!$ +!!$ shellno = shell(m) +!!$ pert = shell_amp(shellno)*sin(phi) +!!$ +!!$ rand_vec(1) = rand_vec(1) + u_hat_pn(m,1)*pert +!!$ rand_vec(2) = rand_vec(2) + u_hat_pn(m,2)*pert +!!$ rand_vec(3) = rand_vec(3) + u_hat_pn(m,3)*pert +!!$ enddo +!!$ +!!$ fu(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & +!!$ this%u_baseflow(idx) + rand_vec(1) - u(i,1,1,1)) +!!$ +!!$ fv(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & +!!$ this%v_baseflow(idx) + rand_vec(2) - v(i,1,1,1)) +!!$ +!!$ fw(i,1,1,1) = fringe_time*this%fringe_space(idx)*( & +!!$ this%w_baseflow(idx) + rand_vec(3) - w(i,1,1,1)) +!!$ end do end subroutine FST_forcing_zone ! Apply FST as a boundary condition based on the bc mask ! Assumes that u,v,w have the same bc masks subroutine FST_apply_BC(this, bc_mask, n, & - x, y, z, t, & + t, & u_bc, v_bc, w_bc, angleXY, on_host) class(FST_t), intent(in) :: this integer, intent(in) :: n ! size of the bc mask integer, intent(in) :: bc_mask(0:n) - real(kind=rp), intent(in), dimension(:,:,:,:) :: x, y, z real(kind=rp), intent(in) :: t - real(kind=rp), intent(inout), dimension(:,:,:,:) :: u_bc, v_bc, w_bc - real(kind=rp), intent(in) :: angleXY + !real(kind=rp), intent(inout), dimension(:,:,:,:) :: u_bc, v_bc, w_bc + type(field_t), intent(inout) :: u_bc, v_bc, w_bc + real(kind=xp), intent(in) :: angleXY logical, intent(in) :: on_host -!!$ integer :: idx, l, m, i, shellno -!!$ integer, parameter :: gdim = 3 -!!$ real(kind=rp) :: phase_shft, phi, amp, pert, urand, vrand, wrand -!!$ real(kind=rp) :: rand_vec(gdim), vel_mag, phi_t - - real(kind=rp) :: fringe_time, cosa, sina + real(kind=xp) :: fringe_time, cosa, sina fringe_time = time_ramp(t, this%t_end, this%t_start) cosa = cos(angleXY) sina = sin(angleXY) - call fst_bc_compute(t, glb_uinf,u_bc, v_bc, w_bc, bc_mask, n, & + call fst_bc_compute(t, this%Uinf, u_bc%x, v_bc%x, w_bc%x, bc_mask, n, & this%u_baseflow, this%v_baseflow, this%w_baseflow, & - this%k_x, k_length, this%phi_0, shell, shell_amp, & + this%k_x, this%n_modes_total, this%phi_0, this%shell, this%shell_amp, & this%random_vectors, angleXY, fringe_time, this%fringe_space, on_host) -!!$ phi_t = glb_uinf*t -!!$ ! Loop on all points in the point zone -!!$ do idx = 1, bc_mask(0) -!!$ -!!$ i = bc_mask(idx) -!!$ -!!$ !> This vector will contain the sum of all Fourier modes -!!$ rand_vec = 0.0_rp -!!$ -!!$ ! Sum all sin modes for each gll point -!!$ do m = 1, k_length -!!$ -!!$ ! Random phase shifts -!!$ !phase_shft = bb(m,1) -!!$ -!!$ ! kx(x - U*t) + ky*y + kz*z + phi -!!$ ! = kx*x + ky*y + kz*z - kx*U*t + phi -!!$ !phi = k_num_all(m,1) * (x(i,1,1,1) - glb_uinf*t) + & -!!$ ! k_num_all(m,2) * y(i,1,1,1) + & -!!$ ! k_num_all(m,3) * z(i,1,1,1) + & -!!$ ! phase_shft -!!$ -!!$ phi = this%phi_0(m,idx) - this%k_x(m)*phi_t -!!$ -!!$ shellno = shell(m) -!!$ -!!$ pert = shell_amp(shellno)*sin(phi) -!!$ -!!$ rand_vec(1) = rand_vec(1) + u_hat_pn(m,1)*pert -!!$ rand_vec(2) = rand_vec(2) + u_hat_pn(m,2)*pert -!!$ rand_vec(3) = rand_vec(3) + u_hat_pn(m,3)*pert -!!$ -!!$ enddo -!!$ -!!$ ! Project the rand_vec if there is a rotation -!!$ urand = rand_vec(1)*cosa - rand_vec(2)*sina -!!$ vrand = rand_vec(1)*sina + rand_vec(2)*cosa -!!$ wrand = rand_vec(3) -!!$ -!!$ u_bc(i,1,1,1) = this%u_baseflow(idx) + & -!!$ fringe_time*this%fringe_space(idx)*urand -!!$ v_bc(i,1,1,1) = this%v_baseflow(idx) + & -!!$ fringe_time*this%fringe_space(idx)*vrand -!!$ w_bc(i,1,1,1) = this%w_baseflow(idx) + & -!!$ fringe_time*this%fringe_space(idx)*wrand -!!$ -!!$ end do - end subroutine FST_apply_BC ! ! Fringe function as defined in original FST. @@ -660,7 +853,7 @@ end subroutine FST_apply_BC ! else if (x.ge.f%xend) then ! S=1 ! else - ! S=1/(1+erp(1/(x-f%xend)+1/(x-f%xstart))) + ! S=1/(1+exp(1/(x-f%xend)+1/(x-f%xstart))) ! endif ! y = f%fringe_max * (S*(x-f%xstart)/(f%delta_rise)-S*((x-f%xend)/f%delta_fall+1)) @@ -671,17 +864,17 @@ end subroutine FST_apply_BC ! Linear ramp in time function time_ramp(t, t_end, t_start) result(ramp) real(kind=rp), intent(in) :: t - real(kind=rp), intent(in) :: t_end - real(kind=rp), intent(in) :: t_start + real(kind=xp), intent(in) :: t_end + real(kind=xp), intent(in) :: t_start - real(kind=rp) :: ramp + real(kind=xp) :: ramp if (t .le. t_start) then - ramp = 0.0_rp + ramp = 0.0_xp else if (t .lt. t_end) then ramp = (t - t_start)/(t_end - t_start) else - ramp = 1.0_rp + ramp = 1.0_xp end if end function time_ramp @@ -690,7 +883,7 @@ end function time_ramp ! Fringe function as described in Schlatter (2001), extended to take y bounds into account too ! ! Here is what the fringe looks like, except the ramp-up is not linear - ! but erponential (see function S below) + ! but exponential (see function S below) ! ! fringe_max ________ ! / \ @@ -707,7 +900,7 @@ function fringe(x, y, f) result(fr) real(kind=rp), intent(in) :: x real(kind=rp), intent(in), optional :: y type(FST_t), intent(in) :: f - real(kind=rp) :: fr + real(kind=xp) :: fr integer :: i character :: a @@ -719,17 +912,17 @@ function fringe(x, y, f) result(fr) end function fringe - ! Smooth step function, 0 if x <= 0, 1 if x >= 1, 1/erp(1/(x-1) + 1/x) between 0 and 1 + ! Smooth step function, 0 if x <= 0, 1 if x >= 1, 1/exp(1/(x-1) + 1/x) between 0 and 1 function S(x) result(y) real(kind=rp), intent(in) :: x real(kind=rp) :: y - if ( x.le.0._rp ) then - y = 0._rp - else if ( x.ge.1._rp ) then - y = 1._rp + if ( x.le.0._xp ) then + y = 0._xp + else if ( x.ge.1._xp ) then + y = 1._xp else - y = 1._rp / (1._rp + exp( 1._rp/(x-1._rp) + 1._rp/x)) + y = 1._xp / (1._xp + exp( 1._xp/(x-1._xp) + 1._xp/x)) end if end function S diff --git a/FST/src/bcknd/cpu/opr_fst_cpu.f90 b/FST/src/bcknd/cpu/opr_fst_cpu.f90 index 7c241c2..12256d6 100644 --- a/FST/src/bcknd/cpu/opr_fst_cpu.f90 +++ b/FST/src/bcknd/cpu/opr_fst_cpu.f90 @@ -47,13 +47,14 @@ subroutine opr_fst_cpu_fst(t, Uinf, u,v,w, mask,n_mask, & random_vectors, cosa, sina, fringe_time, fringe_space) real(kind=rp), intent(inout), dimension(:,:,:,:) :: u, v, w + real(kind=rp), intent(in) :: t real(kind=rp), intent(in) :: u_baseflow(:), v_baseflow(:), w_baseflow(:) integer, intent(in) :: n_mask, n_total_modes integer, intent(in) :: mask(0:n_mask), shell(:) real(kind=xp), intent(in), dimension(:) :: wavenumbers_x, shell_amplitudes, & fringe_space real(kind=xp), intent(in), dimension(:,:) :: phi_0, random_vectors - real(kind=xp), intent(in) :: t, Uinf, fringe_time, sina, cosa + real(kind=xp), intent(in) :: Uinf, fringe_time, sina, cosa real(kind=xp) :: phi_t, f, rand_vec(3), pert, urand, vrand, wrand, phi @@ -65,7 +66,7 @@ subroutine opr_fst_cpu_fst(t, Uinf, u,v,w, mask,n_mask, & do idx = 1, mask(0) !> This vector will contain the sum of all Fourier modes - rand_vec = 0.0_rp + rand_vec = 0.0_xp ! Sum all sin modes for each gll point do m = 1, n_total_modes diff --git a/FST/src/bcknd/device/cuda/opr_fst.cu b/FST/src/bcknd/device/cuda/opr_fst.cu index a37e5ff..6e81e76 100644 --- a/FST/src/bcknd/device/cuda/opr_fst.cu +++ b/FST/src/bcknd/device/cuda/opr_fst.cu @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include @@ -46,7 +46,7 @@ */ template< typename T, typename U> __global__ void fst_kernel( - const U t, + const T t, const U Uinf, T * __restrict__ u, T * __restrict__ v, @@ -110,27 +110,27 @@ __global__ void fst_kernel( extern "C" { -void cuda_fst(real *t, real *Uinf, +void cuda_fst(real *t, real_xp *Uinf, void *u_d, void *v_d, void *w_d, int *mask_d, int *n_mask, void *ubf_d, void *vbf_d, void *wbf_d, void *k_x_d, int *n_total_modes, void *phi_0_d, int *shell_d, - void *shell_amp_d, void *randvec_d, real *cosa, real *sina, - real *fringe_time, void *fs_d) { + void *shell_amp_d, void *randvec_d, real_xp *cosa, real_xp *sina, + real_xp *fringe_time, void *fs_d) { const dim3 nthrds(1024, 1, 1); const dim3 nblcks(*n_mask, 1, 1); const cudaStream_t stream = (cudaStream_t) glb_cmd_queue; - fst_kernel + fst_kernel <<>>( *t, *Uinf, (real *) u_d, (real *) v_d, (real *) w_d, (int *) mask_d, *n_mask, (real *) ubf_d, (real *) vbf_d, (real *) wbf_d, - (real *) k_x_d, *n_total_modes, - (real *) phi_0_d, (int *) shell_d, (real *) shell_amp_d, - (real *) randvec_d, - *cosa, *sina, *fringe_time, (real *) fs_d + (real_xp *) k_x_d, *n_total_modes, + (real_xp *) phi_0_d, (int *) shell_d, (real_xp *) shell_amp_d, + (real_xp *) randvec_d, + *cosa, *sina, *fringe_time, (real_xp *) fs_d ); CUDA_CHECK(cudaGetLastError()); diff --git a/FST/src/bcknd/device/hip/opr_fst.hip b/FST/src/bcknd/device/hip/opr_fst.hip index f46f8c3..a5cfd16 100644 --- a/FST/src/bcknd/device/hip/opr_fst.hip +++ b/FST/src/bcknd/device/hip/opr_fst.hip @@ -43,7 +43,7 @@ */ template< typename T, typename U> __global__ void fst_kernel( - const U t, + const T t, const U Uinf, T * __restrict__ u, T * __restrict__ v, @@ -104,26 +104,26 @@ __global__ void fst_kernel( extern "C" { -void hip_fst(real *t, real *Uinf, +void hip_fst(real *t, real_xp *Uinf, void *u_d, void *v_d, void *w_d, int *mask_d, int *n_mask, void *ubf_d, void *vbf_d, void *wbf_d, void *k_x_d, int *n_total_modes, void *phi_0_d, int *shell_d, - void *shell_amp_d, void *randvec_d, real *cosa, real *sina, - real *fringe_time, void *fs_d) { + void *shell_amp_d, void *randvec_d, real_xp *cosa, real_xp *sina, + real_xp *fringe_time, void *fs_d) { const dim3 nthrds(1024, 1, 1); const dim3 nblcks(*n_mask, 1, 1); - hipLaunchKernelGGL(HIP_KERNEL_NAME(fst_kernel), + hipLaunchKernelGGL(HIP_KERNEL_NAME(fst_kernel), nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue, *t, *Uinf, (real *) u_d, (real *) v_d, (real *) w_d, (int *) mask_d, *n_mask, (real *) ubf_d, (real *) vbf_d, (real *) wbf_d, - (real *) k_x_d, *n_total_modes, - (real *) phi_0_d, (int *) shell_d, (real *) shell_amp_d, - (real *) randvec_d, - *cosa, *sina, *fringe_time, (real *) fs_d + (real_xp *) k_x_d, *n_total_modes, + (real_xp *) phi_0_d, (int *) shell_d, (real_xp *) shell_amp_d, + (real_xp *) randvec_d, + *cosa, *sina, *fringe_time, (real_xp *) fs_d ); HIP_CHECK(hipGetLastError()); diff --git a/FST/src/bcknd/device/opr_fst_device.F90 b/FST/src/bcknd/device/opr_fst_device.F90 index 96e851d..9fff88b 100644 --- a/FST/src/bcknd/device/opr_fst_device.F90 +++ b/FST/src/bcknd/device/opr_fst_device.F90 @@ -32,7 +32,7 @@ ! !> Operators accelerator backends module opr_fst_device - use num_types, only : rp, c_rp + use num_types, only : rp, xp, c_rp, c_xp use device, only : device_get_ptr use utils, only : neko_error use comm @@ -49,10 +49,11 @@ subroutine cuda_fst(t, Uinf, u_d,v_d,w_d, mask_d,n_mask, & shell_amp_d, randvec_d, cosa, sina, fringe_time, fs_d) & bind(c, name = 'cuda_fst') use, intrinsic :: iso_c_binding - import c_rp + import c_rp, c_xp type(c_ptr), value :: u_d,v_d,w_d,mask_d,ubf_d,vbf_d,wbf_d,& k_x_d, phi_0_d, shell_d, shell_amp_d, randvec_d, fs_d - real(kind=c_rp) :: t, Uinf, cosa, sina, fringe_time + real(kind=c_rp) :: t + real(kind=c_xp) :: Uinf, cosa, sina, fringe_time integer(c_int) :: n_mask, n_total_modes end subroutine cuda_fst end interface @@ -63,10 +64,11 @@ subroutine hip_fst(t, Uinf, u_d,v_d,w_d, mask_d,n_mask, & shell_amp_d, randvec_d, cosa, sina, fringe_time, fs_d) & bind(c, name = 'hip_fst') use, intrinsic :: iso_c_binding - import c_rp + import c_rp, c_xp type(c_ptr), value :: u_d,v_d,w_d,mask_d,ubf_d,vbf_d,wbf_d,& k_x_d, phi_0_d, shell_d, shell_amp_d, randvec_d, fs_d - real(c_rp) :: t, Uinf, cosa, sina, fringe_time + real(kind=c_rp) :: t + real(kind=c_xp) :: Uinf, cosa, sina, fringe_time integer(c_int) :: n_mask, n_total_modes end subroutine hip_fst end interface @@ -77,7 +79,8 @@ end subroutine hip_fst subroutine opr_fst_device_fst(t, Uinf,u_d,v_d,w_d, mask_d,n_mask, & ubf_d, vbf_d, wbf_d, k_x_d, n_total_modes, phi_0_d, shell_d, & shell_amp_d, randvec_d, cosa, sina, fringe_time, fs_d) - real(kind=rp) :: t, Uinf, cosa, sina, fringe_time + real(kind=c_rp) :: t + real(kind=c_xp) :: Uinf, cosa, sina, fringe_time integer :: n_mask, n_total_modes type(c_ptr) :: fs_d, ubf_d, vbf_d, wbf_d, phi_0_d, randvec_d, shell_d, & shell_amp_d, k_x_d, u_d, v_d, w_d, mask_d diff --git a/FST/src/drivers/01_fst_bc_driver.f90 b/FST/src/drivers/01_fst_bc_driver.f90 index 8b28708..4706412 100644 --- a/FST/src/drivers/01_fst_bc_driver.f90 +++ b/FST/src/drivers/01_fst_bc_driver.f90 @@ -18,8 +18,16 @@ module fst_bc_driver character(len=LOG_SIZE) :: LOG_BUF ! ============================================================================ + ! Whether or not FST is enabled logical :: ENABLED + ! Whether or not to generate the FST wavenumbers etc from scratch or to + ! read from the files bb.txt, sphere.dat, fst_spectrum.csv + logical :: REGEN + + ! Free-stream velocity (this is only populated if REGEN is true + real(kind=xp) :: UINF + ! ! For outputting the forcing as a field file ! @@ -33,6 +41,9 @@ module fst_bc_driver !! to use `masked_copy` (see later for clarifications). integer, allocatable :: STUPID_MASK(:) + !> Path to the fst files. + character(len=:), allocatable :: PATH + ! ============================================================================ public :: fst_bc_driver_initialize, fst_bc_driver_finalize, & @@ -55,8 +66,8 @@ subroutine fst_bc_driver_initialize(t, u, v, w, p, coef, params) type(field_t), pointer :: fu,fv,fw character(len=:), allocatable :: read_str, fname logical :: px, py, pz - real(kind=xp) :: x, ymin, ymax, zmin, zmax, delta_y, delta_z, Ly, Lz - real(kind=xp) :: ystart, yend, zstart, zend + real(kind=rp) :: x, ymin, ymax, zmin, zmax, delta_y, delta_z, Ly, Lz + real(kind=rp) :: ystart, yend, zstart, zend integer :: i, idx, ierr, n real(kind=xp) :: alpha, beta, t_ramp, t_start, amp @@ -114,8 +125,8 @@ subroutine fst_bc_driver_initialize(t, u, v, w, p, coef, params) ystart = ymin + delta_y yend = ymax - delta_y else - call json_get_or_default(params, "case.FST.ystart", ystart, ymin) - call json_get_or_default(params, "case.FST.yend", yend, ymax) + call json_get_or_default(params, "case.FST.y_start", ystart, ymin) + call json_get_or_default(params, "case.FST.y_end", yend, ymax) delta_y = alpha*Ly end if @@ -127,14 +138,20 @@ subroutine fst_bc_driver_initialize(t, u, v, w, p, coef, params) zstart = zmin + delta_z zend = zmax - delta_z else - call json_get_or_default(params, "case.FST.zstart", zstart, zmin) - call json_get_or_default(params, "case.FST.zend", zend, zmax) + call json_get_or_default(params, "case.FST.z_start", zstart, zmin) + call json_get_or_default(params, "case.FST.z_end", zend, zmax) delta_z = alpha * Lz end if ! Read parameters for the FST fringe in time call json_get(params, "case.FST.t_ramp", t_ramp) - call json_get_or_default(params, "case.FST.t_start", t_start, 0.0_rp) + call json_get_or_default(params, "case.FST.t_start", t_start, 0.0_xp) + + ! Read options for generating FST from scratch or not + call json_get(params, "case.FST.regen_files", REGEN) + if (.not. REGEN) call json_get(params, "case.FST.Uinf", UINF) + + call json_get_or_default(params, 'case.FST.fst_path', PATH, ".") ! Initialize the fst parameters call FST_OBJ%init_bc(zmin, zmax, zstart, zend, & @@ -154,8 +171,8 @@ subroutine fst_bc_driver_apply(u, v, w, bc, coef, t, tstep, angle, on_cpu) type(coef_t), intent(inout) :: coef real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - real(kind=rp), intent(in) :: angle - logical, intent(in), optional :: on_cpu + real(kind=xp), intent(in) :: angle + logical, intent(in) :: on_cpu integer :: i, idx @@ -165,8 +182,9 @@ subroutine fst_bc_driver_apply(u, v, w, bc, coef, t, tstep, angle, on_cpu) ! At the first timestep we generate the FST based ! on the boundry mask! ! - if (tstep .eq. 1) then - call FST_obj%generate_bc(coef, bc%msk, bc%msk(0), u=u, v=v, w=w) + if (.not. FST_obj%is_generated) then + call FST_obj%generate_bc(coef, bc%msk, bc%msk(0), u, v, w, REGEN, & + UINF, PATH) end if ! Then, apply the free stream turbulence that will add on @@ -174,7 +192,7 @@ subroutine fst_bc_driver_apply(u, v, w, bc, coef, t, tstep, angle, on_cpu) ! NOTE: if on_cpu is true, memory is not copied to the GPU (you need ! to do it yourself) call FST_obj%apply_BC(bc%msk, bc%msk(0), & - u%dof%x, u%dof%y, u%dof%z, t, u%x, v%x, w%x, angle, on_cpu) + t, u, v, w, angle, on_cpu) ! If we compute on cpu, copy memory. This is slower! if (NEKO_BCKND_DEVICE .eq. 1 .and. on_cpu) then diff --git a/FST/src/fst_operator.f90 b/FST/src/fst_operator.f90 index 9e0cc88..a880739 100644 --- a/FST/src/fst_operator.f90 +++ b/FST/src/fst_operator.f90 @@ -54,17 +54,18 @@ subroutine fst_bc_compute(t, Uinf, u, v, w, mask, n_mask, & real(kind=rp), intent(in) :: u_baseflow(:), v_baseflow(:), w_baseflow(:) integer, intent(in) :: n_mask, n_total_modes integer, intent(in) :: mask(0:n_mask), shell(:) - real(kind=rp), intent(in), dimension(:) :: wavenumbers_x, shell_amplitudes, & + real(kind=xp), intent(in), dimension(:) :: wavenumbers_x, shell_amplitudes, & fringe_space - real(kind=rp), intent(in), dimension(:,:) :: phi_0, random_vectors - real(kind=rp), intent(in) :: t, Uinf, fringe_time, angleXY + real(kind=xp), intent(in), dimension(:,:) :: phi_0, random_vectors + real(kind=rp), intent(in) :: t + real(kind=xp), intent(in) :: Uinf, fringe_time, angleXY logical, intent(in) :: on_host type(c_ptr) :: fs_d, ubf_d, vbf_d, wbf_d, phi_0_d, randvec_d, shell_d, & shell_amp_d, k_x_d, u_d, v_d, w_d, mask_d integer :: i - real(kind=rp) :: sina, cosa + real(kind=xp) :: sina, cosa cosa = cos(angleXY) sina = sin(angleXY) diff --git a/FST/src/generate_FST.f90 b/FST/src/generate_FST.f90 new file mode 100644 index 0000000..f044f8a --- /dev/null +++ b/FST/src/generate_FST.f90 @@ -0,0 +1,62 @@ +program generate_FST + use num_types, only: rp + use turbu, only: make_turbu + use global_params, only: kstart, kend, Npmax, Nshells + use utils, only: NEKO_FNAME_LEN, neko_error + use neko, only: neko_init, neko_finalize + implicit none + + integer :: seed, argc, stat + character(len=NEKO_FNAME_LEN) :: inputchar, output_dir + real(kind=rp) :: dx, dy, dz + + call neko_init + + ! + ! Read arguments + ! + argc = command_argument_count() + + if ((argc .lt. 2) .or. (argc .gt. 2)) then + write(*,*) 'Usage: ./generate_FST seed output_dir' + write(*,*) 'Example command: ./generate_FST -143 my/output/dir' + stop + end if + + call get_command_argument(1, inputchar) + read(inputchar, *) seed + call get_command_argument(2, output_dir) + !read(inputchar, *) output_dir + + ! + ! Build output directory + ! + + ! First stage: seed + ! write(output_dir,'(A)') "generated_fst_files/seed_" + ! if (seed < 0) write (output_dir, '(A,A)') trim(output_dir), 'm' + ! write (output_dir, '(A,I0)') trim(output_dir), abs(seed) + + ! ! Second stage: parameters + ! write(output_dir, '(A,A,g0,A,g0,A,I0,A,I0)') trim(output_dir), "_ks_", & + ! kstart, "_ke_", kend, "_Ns_", Nshells, "_Np_", Npmax + + write (*,*) "Output directory: ", trim(output_dir) + + ! Create output directory + call execute_command_line('mkdir -p ' // trim(output_dir), exitstat=stat) + if (stat /= 0) call neko_error("Failed to create directory" // & + trim(output_dir)) + + ! + ! Generate the FST + ! + dx = 0.79994602501392365_rp + dy = 0.40000000596046448_rp + dz = 0.15000000596046453_rp + call make_turbu(.false., .false., .true., seed, trim(output_dir), dx=dx, & + dy=dy, dz=dz) + + call neko_finalize + +end program generate_FST diff --git a/FST/src/makeneko_cpu b/FST/src/makeneko_cpu new file mode 100755 index 0000000..85f52f6 --- /dev/null +++ b/FST/src/makeneko_cpu @@ -0,0 +1,32 @@ +#!/bin/bash + +usage() { + + echo "Usage" + echo "./makeneko_cpu " + echo "" + echo "Example" + echo "./makeneko_cpu swept-wing.f90 /cfs/klemming/scratch/y/ycl/Neko/neko ./src" + +} + +# Check if exactly 4 arguments are given +if [ "$#" -ne 3 ]; then + echo "Error: Exactly 4 arguments are required." + usage + exit +fi + +NEKO_INSTALL_PATH=${2} +src_path=${3} + +backend="${src_path}/bcknd/cpu/opr_fst_cpu.f90 ${src_path}/bcknd/device/opr_fst_device.F90" +interface="${src_path}/fst_operator.f90" +fst_core="${src_path}/FST-core/0*" +driver="${src_path}/drivers/0*" + +#$MAKENEKO_PATH=$NEKO_INSTALL_PATH/bin +MAKENEKO_PATH="." + +echo $MAKENEKO_PATH/makeneko $backend $interface $fst_core $driver $1 +$MAKENEKO_PATH/makeneko $backend $interface $fst_core $driver $1 diff --git a/FST/src/test.py b/FST/src/test.py new file mode 100644 index 0000000..5cf05c5 --- /dev/null +++ b/FST/src/test.py @@ -0,0 +1,54 @@ +import numpy as np + +def S(x): + if (x <= 0.0): + return 0.0 + elif (x >= 1.0): + return 1.0 + else: + return 1.0 / ( 1.0 + np.exp( 1.0/(x-1.0) + 1.0/x )) + +def fringe(x, xs, xe, xmin, xmax, dr, df): + if (x < xmin): return 0.0 + elif (x > xmax): return 0.0 + else: + return S((x - xs)/dr) - S((x-xe)/df + 1.0) + +xmin =-0.1 +xmax = 0.1 +alpha= 0.15 + +#xs =-0.075 +xe = xmax +xs =xmin +#xe = 0.075 +dr = (xmax-xmin)*alpha +df = dr + +x = np.linspace(xmin, xmax, 501) +y = np.zeros_like(x) + +for i in range(len(x)): + y[i] = fringe(x[i], xs, xe, xmin, xmax, dr, df) + +import matplotlib.pyplot as plt + +def line(x): + plt.plot([x,x], [0.0, 1.2], "k--", alpha=0.5) + +plt.plot(x,y) +plt.xlim(xmin, xmax) +plt.ylim(0.0,1.1) +plt.xlabel("u") +plt.ylabel(r"$\lambda_u$") +plt.xticks([xmin, xs, xs+dr, xe-df, xe, xmax], [r"$u_{min}$", r"$u_{start}$", r"$u_{start}+\delta_{rise}$", r"$u_{end} - \delta_{fall}$", r"$u_{end}$", r"$u_{max}$"]) +line(xmin) +line(xmax) +line(xs) +line(xs+dr) +line(xe) +line(xe-df) +line(xmax) +plt.tight_layout() +plt.savefig("fringe.png") +plt.show()