From dad92d89747b808502a0ca70a5fb20a7da296c8b Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 26 Mar 2026 15:37:02 -0400 Subject: [PATCH] Solid particle implementation for E-L solver --- docs/documentation/case.md | 52 +- docs/documentation/equations.md | 40 + docs/module_categories.json | 2 + src/common/m_boundary_common.fpp | 529 ++++- src/common/m_constants.fpp | 12 +- src/common/m_derived_types.fpp | 141 +- src/common/m_helper.fpp | 111 +- src/common/m_helper_basic.fpp | 42 +- src/common/m_mpi_common.fpp | 518 +++- src/post_process/m_global_parameters.fpp | 136 +- src/post_process/m_mpi_proxy.fpp | 53 +- src/post_process/m_start_up.fpp | 98 +- src/pre_process/m_global_parameters.fpp | 207 +- src/pre_process/m_mpi_proxy.fpp | 33 +- src/pre_process/m_start_up.fpp | 152 +- src/simulation/m_bubbles.fpp | 300 ++- src/simulation/m_bubbles_EE.fpp | 37 +- src/simulation/m_bubbles_EL.fpp | 1347 +++++++---- src/simulation/m_bubbles_EL_kernels.fpp | 623 +++-- src/simulation/m_compute_levelset.fpp | 87 +- src/simulation/m_data_output.fpp | 264 +- src/simulation/m_global_parameters.fpp | 229 +- src/simulation/m_ib_patches.fpp | 218 +- src/simulation/m_ibm.fpp | 140 +- src/simulation/m_mpi_proxy.fpp | 1231 +++++++++- src/simulation/m_particles_EL.fpp | 2645 +++++++++++++++++++++ src/simulation/m_particles_EL_kernels.fpp | 904 +++++++ src/simulation/m_rhs.fpp | 165 +- src/simulation/m_sim_helpers.fpp | 109 +- src/simulation/m_start_up.fpp | 140 +- src/simulation/m_time_steppers.fpp | 74 +- toolchain/mfc/params/definitions.py | 35 +- toolchain/mfc/params/descriptions.py | 8 + 33 files changed, 9195 insertions(+), 1487 deletions(-) create mode 100644 src/simulation/m_particles_EL.fpp create mode 100644 src/simulation/m_particles_EL_kernels.fpp diff --git a/docs/documentation/case.md b/docs/documentation/case.md index ecdeb786d4..1ae3851b33 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -266,6 +266,16 @@ Setup: Only requires specifying `init_dir` and filename pattern via `zeros_defau Implementation: All variables and file handling are managed in `src/common/include/ExtrusionHardcodedIC.fpp` with no manual grid configuration needed. Usage: Ideal for initializing simulations from lower-dimensional solutions, enabling users to add perturbations or modifications to the base extruded fields for flow instability studies. +The following parameters support hardcoded initial conditions that read interface data from files: + +| Parameter | Type | Description | +| ---: | :---: | :--- | +| `interface_file` | String | Path to interface geometry data file | +| `normFac` | Real | Interface normalization factor | +| `normMag` | Real | Interface normal magnitude | +| `g0_ic` | Real | Initial gas volume fraction for interfacial IC | +| `p0_ic` | Real | Initial pressure for interfacial IC | + #### Parameter Descriptions - `num_patches` defines the total number of patches defined in the domain. @@ -788,7 +798,7 @@ Details of the transducer acoustic source model can be found in \cite Maeda17. | ---: | :----: | :--- | | `bubbles_euler` | Logical | Ensemble-averaged bubble modeling | | `bubbles_lagrange` | Logical | Volume-averaged bubble modeling | -| `bubble_model` | Integer | [1] Gilmore; [2] Keller--Miksis; [3] Rayleigh-Plesset | +| `bubble_model` | Integer | [0] Particle; [1] Gilmore; [2] Keller--Miksis; [3] Rayleigh-Plesset | | `Ca` | Real | Cavitation number | | `Web` | Real | Weber number | | `Re_inv` | Real | Inverse Reynolds number | @@ -892,6 +902,13 @@ When ``polytropic = 'F'``, the gas compression is modeled as non-polytropic due | `epsilonb` | Real | Standard deviation scaling for the gaussian function | | `charwidth` | Real | Domain virtual depth (z direction, for 2D simulations) | | `valmaxvoid` | Real | Maximum void fraction permitted | +| `drag_model` | Integer | Drag model for bubble dynamics | +| `vel_model` | Integer | Velocity model for bubble interface | +| `charNz` | Integer | Characteristic size parameter | +| `input_path` | String | Path to bubble input file (default: `./input`) | +| `pressure_force` | Logical | Enable pressure gradient force | +| `gravity_force` | Logical | Enable gravitational force | +| `write_void_evol` | Logical | Write void fraction evolution data | - `nBubs_glb` Total number of bubbles. Their initial conditions need to be specified in the ./input/lag_bubbles.dat file. See the example cases for additional information. @@ -905,6 +922,39 @@ When ``polytropic = 'F'``, the gas compression is modeled as non-polytropic due - `massTransfer_model` Activates the mass transfer model at the bubble's interface based on (\cite Preston07). +#### 9.3 Lagrangian Solid Particle Model + +| Parameter | Type | Description | +| ---: | :---: | :--- | +| `particles_lagrange` | Logical | Lagrangian solid particle model switch | +| `nParticles_glb` | Integer | Global number of particles | +| `solver_approach` | Integer | 1: One-way coupling, 2: Two-way coupling | +| `smooth_type` | Integer | Smoothing function. 1: Gaussian, 2: Delta 3x3 | +| `stokes_drag` | Integer | Stokes drag model flag | +| `qs_drag_model` | Integer | Quasi-steady drag model (0: off, 1: Parmar, 2: Modified Parmar, 3: Osnes, 4: Gidaspow) | +| `added_mass_model` | Integer | Added mass model (0: off, >0: active) | +| `interpolation_order` | Integer | Polynomial order for barycentric field interpolation | +| `collision_force` | Logical | Enable soft-sphere DEM particle-particle collisions | +| `pressure_force` | Logical | Enable pressure gradient force on particles | +| `gravity_force` | Logical | Enable gravitational force on particles | +| `write_void_evol` | Logical | Write void fraction evolution data | +| `epsilonb` | Real | Standard deviation scaling for the Gaussian kernel | +| `valmaxvoid` | Real | Maximum void fraction permitted | +| `particle_pp%%rho0ref_particle` | Real | Reference particle material density | +| `particle_pp%%cp_particle` | Real | Particle specific heat capacity | + +- `particles_lagrange` activates the Euler-Lagrange solid particle solver. Particle initial conditions are read from `./input/lag_particles.dat`. The solver tracks non-deformable spherical particles in a compressible carrier flow using volume-averaged source terms (\cite Maeda18). + +- `nParticles_glb` specifies the total number of particles across all MPI ranks. Their initial positions, velocities, and radii must be specified in the input file. + +- `solver_approach` specifies the coupling method: [1] one-way coupling where particles are advected by the flow but do not influence it, [2] two-way coupling where particle forces are projected back onto the Eulerian grid as source terms. + +- `qs_drag_model` selects the quasi-steady drag correlation: [1] Parmar et al. (2010) with Sangani volume fraction correction, [2] Modified Parmar with Osnes et al. (2023) volume fraction correction, [3] Osnes et al. (2023) full correlation with Loth et al. (2021) rarefied regime, [4] Gidaspow (1994) correlation for dense particle suspensions. + +- `collision_force` activates soft-sphere DEM collisions using a spring-dashpot contact model with Hertzian stiffness. Collision forces between particles on different MPI ranks are communicated via non-blocking point-to-point messaging. + +- `interpolation_order` sets the order of the barycentric Lagrange polynomial used to interpolate Eulerian field quantities (pressure, velocity, density) to particle positions. Must be even; the interpolation stencil uses `N/2` points in each direction. + ### 10. Velocity Field Setup {#sec-velocity-field-setup} | Parameter | Type | Description | diff --git a/docs/documentation/equations.md b/docs/documentation/equations.md index 6bb17aa9de..b69a29adf2 100644 --- a/docs/documentation/equations.md +++ b/docs/documentation/equations.md @@ -514,6 +514,46 @@ with \f$\sigma = \varepsilon_b \max(\Delta x^{1/3}_\text{cell},\;R_\text{bubble} Each bubble is tracked individually with Keller-Miksis dynamics and 4th-order adaptive Runge-Kutta time integration. +### 6.3 Euler-Lagrange Solid Particles (`particles_lagrange = .true.`) + +**Source:** `src/simulation/m_particles_EL.fpp`, `src/simulation/m_particles_EL_kernels.fpp` + +The Euler-Lagrange particle solver tracks non-deformable solid particles in a compressible carrier flow. +The volume-averaged carrier flow equations use the same source term framework as the bubble model (Section 6.2), +with the particle volume fraction \f$\alpha_p\f$ replacing the bubble void fraction \f$\alpha\f$. + +**Particle volume fraction via Gaussian kernel:** + +\f[\alpha_p(\mathbf{x}) = \sum_n V_{p,n}\,\delta_\sigma(\mathbf{x} - \mathbf{x}_n)\f] + +where \f$V_{p,n} = \frac{4}{3}\pi R_n^3\f$ is the volume of particle \f$n\f$ and \f$\delta_\sigma\f$ is the Gaussian regularization kernel from Section 6.2. + +**Particle equation of motion:** + +\f[m_p \frac{d\mathbf{u}_p}{dt} = \mathbf{F}_\text{drag} + \mathbf{F}_\text{pressure} + \mathbf{F}_\text{AM} + \mathbf{F}_\text{gravity} + \mathbf{F}_\text{collision}\f] + +**Quasi-steady drag force:** + +\f[\mathbf{F}_\text{drag} = \beta\,(\mathbf{u}_f - \mathbf{u}_p)\f] + +where \f$\beta\f$ is a drag coefficient computed from one of several correlations selected via `qs_drag_model`: +- Parmar et al. (2010): Re and Ma corrections with Sangani et al. (1991) volume fraction correction +- Modified Parmar: Re and Ma corrections with Osnes et al. (2023) volume fraction correction +- Osnes et al. (2023): comprehensive correlation for compressible flow through random particle suspensions +- Gidaspow (1994): Ergun/Wen-Yu correlation for dense suspensions + +**Pressure gradient force** (`pressure_force = .true.`): + +\f[\mathbf{F}_\text{pressure} = -V_p\,\nabla p\f] + +**Added mass force** (`added_mass_model > 0`): + +\f[\mathbf{F}_\text{AM} = C_\text{AM}\,\rho_f\,V_p\left(\frac{D\mathbf{u}_f}{Dt} - \frac{d\mathbf{u}_p}{dt}\right)\f] + +**Collision force** (`collision_force = .true.`): + +Soft-sphere DEM model with Hertzian contact stiffness and viscous damping. + --- ## 7. Fluid-Structure Interaction diff --git a/docs/module_categories.json b/docs/module_categories.json index d1da0ed5ed..6d64a0e42f 100644 --- a/docs/module_categories.json +++ b/docs/module_categories.json @@ -19,6 +19,8 @@ "m_bubbles_EE", "m_bubbles_EL", "m_bubbles_EL_kernels", + "m_particles_EL", + "m_particles_EL_kernels", "m_qbmm", "m_hyperelastic", "m_hypoelastic", diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 918b0daea5..eb3b32a342 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -20,13 +20,16 @@ module m_boundary_common type(scalar_field), dimension(:,:), allocatable :: bc_buffers $:GPU_DECLARE(create='[bc_buffers]') + type(int_bounds_info), dimension(3) :: beta_bc_bounds + $:GPU_DECLARE(create='[beta_bc_bounds]') + #ifdef MFC_MPI integer, dimension(1:3,1:2) :: MPI_BC_TYPE_TYPE integer, dimension(1:3,1:2) :: MPI_BC_BUFFER_TYPE #endif - private; public :: s_initialize_boundary_common_module, s_populate_variables_buffers, s_create_mpi_types, & - & s_populate_capillary_buffers, s_populate_F_igr_buffers, s_write_serial_boundary_condition_files, & + private; public :: s_initialize_boundary_common_module, s_populate_variables_buffers, s_populate_beta_buffers, & + & s_create_mpi_types, s_populate_capillary_buffers, s_populate_F_igr_buffers, s_write_serial_boundary_condition_files, & & s_write_parallel_boundary_condition_files, s_read_serial_boundary_condition_files, & & s_read_parallel_boundary_condition_files, s_assign_default_bc_type, s_populate_grid_variables_buffers, & & s_finalize_boundary_common_module @@ -39,7 +42,7 @@ module m_boundary_common contains - !> Allocate and set up boundary condition buffer arrays for all coordinate directions. + !> @brief Allocates and sets up boundary condition buffer arrays for all coordinate directions. impure subroutine s_initialize_boundary_common_module() integer :: i, j @@ -68,9 +71,26 @@ contains end do end if + if (bubbles_lagrange .or. particles_lagrange) then + beta_bc_bounds(1)%beg = -mapcells - 1 + beta_bc_bounds(1)%end = m + mapcells + 1 + ! n > 0 always for bubbles_lagrange + beta_bc_bounds(2)%beg = -mapcells - 1 + beta_bc_bounds(2)%end = n + mapcells + 1 + if (p == 0) then + beta_bc_bounds(3)%beg = 0 + beta_bc_bounds(3)%end = 0 + else + beta_bc_bounds(3)%beg = -mapcells - 1 + beta_bc_bounds(3)%end = p + mapcells + 1 + end if + end if + $:GPU_UPDATE(device='[beta_bc_bounds]') + end subroutine s_initialize_boundary_common_module - !> Populate the buffers of the primitive variables based on the selected boundary conditions. + !> The purpose of this procedure is to populate the buffers of the primitive variables, depending on the selected boundary + !! conditions. impure subroutine s_populate_variables_buffers(bc_type, q_prim_vf, pb_in, mv_in) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf @@ -78,7 +98,7 @@ contains type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type integer :: k, l - ! BC type codes defined in m_constants.fpp; non-negative values are MPI boundaries + ! Population of Buffers in x-direction if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) @@ -87,7 +107,7 @@ contains do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) case (BC_REFLECTIVE) call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) @@ -268,10 +288,11 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:endif + ! END: Population of Buffers in z-direction end subroutine s_populate_variables_buffers - !> Fill ghost cells by copying the nearest boundary cell value along the specified direction. + !> @brief Fills ghost cells by copying the nearest boundary cell value along the specified direction. subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True) @@ -281,13 +302,13 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l) end do end do - else !< bc_x%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l) @@ -295,13 +316,13 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l) end do end do - else !< bc_y%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l) @@ -315,7 +336,7 @@ contains q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0) end do end do - else !< bc_z%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p) @@ -326,7 +347,8 @@ contains end subroutine s_ghost_cell_extrapolation - !> Apply reflective (symmetry) boundary conditions by mirroring primitive variables and flipping the normal velocity component. + !> @brief Applies reflective (symmetry) boundary conditions by mirroring primitive variables and flipping the normal velocity + !! component. subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') @@ -337,7 +359,7 @@ contains integer :: j, q, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then !< bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do j = 1, buff_size do i = 1, contxe q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(j - 1, k, l) @@ -371,7 +393,7 @@ contains end do end do end if - else !< bc_x%end + else do j = 1, buff_size do i = 1, contxe q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l) @@ -406,7 +428,7 @@ contains end if end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do j = 1, buff_size do i = 1, momxb q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l) @@ -440,7 +462,7 @@ contains end do end do end if - else !< bc_y%end + else do j = 1, buff_size do i = 1, momxb q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l) @@ -510,7 +532,7 @@ contains end do end do end if - else !< bc_z%end + else do j = 1, buff_size do i = 1, momxb + 1 q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p - (j - 1)) @@ -549,7 +571,7 @@ contains end subroutine s_symmetry - !> Apply periodic boundary conditions by copying values from the opposite domain boundary. + !> @brief Applies periodic boundary conditions by copying values from the opposite domain boundary. subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') @@ -560,7 +582,7 @@ contains integer :: j, q, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then !< bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l) @@ -577,7 +599,7 @@ contains end do end do end if - else !< bc_x%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(j - 1, k, l) @@ -596,7 +618,7 @@ contains end if end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l) @@ -613,7 +635,7 @@ contains end do end do end if - else !< bc_y%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, j - 1, l) @@ -649,7 +671,7 @@ contains end do end do end if - else !< bc_z%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, j - 1) @@ -671,7 +693,8 @@ contains end subroutine s_periodic - !> Apply axis boundary conditions for cylindrical coordinates by reflecting values across the axis with azimuthal phase shift. + !> @brief Applies axis boundary conditions for cylindrical coordinates by reflecting values across the axis with azimuthal phase + !! shift. subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l) $:GPU_ROUTINE(parallelism='[seq]') @@ -721,7 +744,7 @@ contains end subroutine s_axis - !> Apply slip wall boundary conditions by extrapolating scalars and reflecting the wall-normal velocity component. + !> @brief Applies slip wall boundary conditions by extrapolating scalars and reflecting the wall-normal velocity component. subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', cray_inline=True) @@ -731,7 +754,7 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then !< bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -741,7 +764,7 @@ contains end if end do end do - else !< bc_x%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -753,7 +776,7 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size if (i == momxb + 1) then @@ -763,7 +786,7 @@ contains end if end do end do - else !< bc_y%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxb + 1) then @@ -785,7 +808,7 @@ contains end if end do end do - else !< bc_z%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxe) then @@ -800,7 +823,7 @@ contains end subroutine s_slip_wall - !> Apply no-slip wall boundary conditions by reflecting and negating all velocity components at the wall. + !> @brief Applies no-slip wall boundary conditions by reflecting and negating all velocity components at the wall. subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', cray_inline=True) @@ -811,7 +834,7 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then !< bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -825,7 +848,7 @@ contains end if end do end do - else !< bc_x%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -841,7 +864,7 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -855,7 +878,7 @@ contains end if end do end do - else !< bc_y%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -885,7 +908,7 @@ contains end if end do end do - else !< bc_z%end + else do i = 1, sys_size do j = 1, buff_size if (i == momxb) then @@ -904,7 +927,7 @@ contains end subroutine s_no_slip_wall - !> Apply Dirichlet boundary conditions by prescribing ghost cell values from stored boundary buffers. + !> @brief Applies Dirichlet boundary conditions by prescribing ghost cell values from stored boundary buffers. subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', cray_inline=True) @@ -915,13 +938,13 @@ contains #ifdef MFC_SIMULATION if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(-j, k, l) = bc_buffers(1, 1)%sf(i, k, l) end do end do - else !< bc_x%end + else do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(m + j, k, l) = bc_buffers(1, 2)%sf(i, k, l) @@ -930,13 +953,13 @@ contains end if else if (bc_dir == 2) then !< y-direction #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, -j, l) = bc_buffers(2, 1)%sf(k, i, l) end do end do - else !< bc_y%end + else !< bc_z%end do i = 1, sys_size do j = 1, buff_size q_prim_vf(i)%sf(k, n + j, l) = bc_buffers(2, 2)%sf(k, i, l) @@ -967,7 +990,7 @@ contains end subroutine s_dirichlet - !> Extrapolate QBMM bubble pressure and mass-vapor variables into ghost cells by copying boundary values. + !> @brief Extrapolates QBMM bubble pressure and mass-vapor variables into ghost cells by copying boundary values. subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in) $:GPU_ROUTINE(parallelism='[seq]') @@ -977,7 +1000,7 @@ contains integer :: j, q, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, nb do q = 1, nnode do j = 1, buff_size @@ -986,7 +1009,7 @@ contains end do end do end do - else !< bc_x%end + else do i = 1, nb do q = 1, nnode do j = 1, buff_size @@ -997,7 +1020,7 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, nb do q = 1, nnode do j = 1, buff_size @@ -1006,7 +1029,7 @@ contains end do end do end do - else !< bc_y%end + else do i = 1, nb do q = 1, nnode do j = 1, buff_size @@ -1026,7 +1049,7 @@ contains end do end do end do - else !< bc_z%end + else do i = 1, nb do q = 1, nnode do j = 1, buff_size @@ -1040,7 +1063,329 @@ contains end subroutine s_qbmm_extrapolation - !> Populate ghost cell buffers for the color function and its divergence used in capillary surface tension. + impure subroutine s_populate_beta_buffers(q_beta, bc_type, nvar) + + type(scalar_field), dimension(:), intent(inout) :: q_beta + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, intent(in) :: nvar + integer :: k, l + + !> x-direction + + if (bc_x%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 1, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + select case (bc_x%beg) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 1, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 1, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_x%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 1, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + select case (bc_x%end) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 1, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 1, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + !> y-direction + if (bc_y%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 2, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_y%beg) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 2, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 2, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_y%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 2, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(3)%beg, beta_bc_bounds(3)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_y%end) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 2, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 2, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (num_dims == 2) return + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + !> z-direction + if (bc_z%beg >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 3, -1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 3, -1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 3, -1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (bc_z%end >= 0) then + call s_mpi_reduce_beta_variables_buffers(q_beta, 3, 1, nvar) + else + $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2) + do l = beta_bc_bounds(2)%beg, beta_bc_bounds(2)%end + do k = beta_bc_bounds(1)%beg, beta_bc_bounds(1)%end + select case (bc_type(3, 2)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_beta_periodic(q_beta, 3, 1, k, l, nvar) + case (BC_REFLECTIVE) + call s_beta_reflective(q_beta, 3, 1, k, l, nvar) + case default + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + #:endif + + end subroutine s_populate_beta_buffers + + subroutine s_beta_periodic(q_beta, bc_dir, bc_loc, k, l, nvar) + + $:GPU_ROUTINE(function_name='s_beta_periodic', parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + integer :: j, i + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = -mapCells - 1, mapCells + q_beta(beta_vars(i))%sf(j, k, l) = q_beta(beta_vars(i))%sf(j, k, l) + q_beta(beta_vars(i))%sf(m + j + 1, & + & k, l) + end do + end do + else + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(m + j, k, l) = q_beta(beta_vars(i))%sf(j - 1, k, l) + end do + end do + end if + else if (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = -mapcells - 1, mapcells + q_beta(beta_vars(i))%sf(k, j, l) = q_beta(beta_vars(i))%sf(k, j, l) + q_beta(beta_vars(i))%sf(k, & + & n + j + 1, l) + end do + end do + else + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(k, n + j, l) = q_beta(beta_vars(i))%sf(k, j - 1, l) + end do + end do + end if + else if (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = -mapcells - 1, mapcells + q_beta(beta_vars(i))%sf(k, l, j) = q_beta(beta_vars(i))%sf(k, l, j) + q_beta(beta_vars(i))%sf(k, l, & + & p + j + 1) + end do + end do + else + do i = 1, nvar + do j = -mapcells, mapcells + 1 + q_beta(beta_vars(i))%sf(k, l, p + j) = q_beta(beta_vars(i))%sf(k, l, j - 1) + end do + end do + end if + end if + + end subroutine s_beta_periodic + + subroutine s_beta_extrapolation(q_beta, bc_dir, bc_loc, k, l, nvar) + + $:GPU_ROUTINE(function_name='s_beta_extrapolation', parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + integer :: j, i + + ! Set beta in buffer regions equal to zero + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(-j, k, l) = 0._wp + end do + end do + else + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(m + j, k, l) = 0._wp + end do + end do + end if + else if (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, -j, l) = 0._wp + end do + end do + else + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, n + j, l) = 0._wp + end do + end do + end if + else if (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, l, -j) = 0._wp + end do + end do + else + do i = 1, nvar + do j = 1, buff_size + q_beta(beta_vars(i))%sf(k, l, p + j) = 0._wp + end do + end do + end if + end if + + end subroutine s_beta_extrapolation + + subroutine s_beta_reflective(q_beta, bc_dir, bc_loc, k, l, nvar) + + $:GPU_ROUTINE(function_name='s_beta_reflective', parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: q_beta + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + integer, intent(in) :: nvar + integer :: j, i + + ! Reflective BC for void fraction: 1) Fold ghost-cell contributions back onto their mirror interior cells 2) Set ghost cells + ! = mirror of (now-folded) interior values + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(j - 1, k, l) = q_beta(beta_vars(i))%sf(j - 1, k, l) + q_beta(beta_vars(i))%sf(-j, & + & k, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(-j, k, l) = q_beta(beta_vars(i))%sf(j - 1, k, l) + end do + end do + else + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(m - (j - 1), k, l) = q_beta(beta_vars(i))%sf(m - (j - 1), k, & + & l) + q_beta(beta_vars(i))%sf(m + j, k, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(m + j, k, l) = q_beta(beta_vars(i))%sf(m - (j - 1), k, l) + end do + end do + end if + else if (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, j - 1, l) = q_beta(beta_vars(i))%sf(k, j - 1, l) + q_beta(beta_vars(i))%sf(k, & + & -j, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, -j, l) = q_beta(beta_vars(i))%sf(k, j - 1, l) + end do + end do + else + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, n - (j - 1), l) = q_beta(beta_vars(i))%sf(k, n - (j - 1), & + & l) + q_beta(beta_vars(i))%sf(k, n + j, l) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, n + j, l) = q_beta(beta_vars(i))%sf(k, n - (j - 1), l) + end do + end do + end if + else if (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, j - 1) = q_beta(beta_vars(i))%sf(k, l, j - 1) + q_beta(beta_vars(i))%sf(k, & + & l, -j) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, -j) = q_beta(beta_vars(i))%sf(k, l, j - 1) + end do + end do + else + do i = 1, nvar + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, p - (j - 1)) = q_beta(beta_vars(i))%sf(k, l, & + & p - (j - 1)) + q_beta(beta_vars(i))%sf(k, l, p + j) + end do + do j = 1, mapCells + 1 + q_beta(beta_vars(i))%sf(k, l, p + j) = q_beta(beta_vars(i))%sf(k, l, p - (j - 1)) + end do + end do + end if + end if + + end subroutine s_beta_reflective + + !> @brief Populates ghost cell buffers for the color function and its divergence used in capillary surface tension. impure subroutine s_populate_capillary_buffers(c_divs, bc_type) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs @@ -1175,7 +1520,7 @@ contains end subroutine s_populate_capillary_buffers - !> Apply periodic boundary conditions to the color function and its divergence fields. + !> @brief Applies periodic boundary conditions to the color function and its divergence fields. subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_periodic', parallelism='[seq]', cray_inline=True) @@ -1185,13 +1530,13 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(m - (j - 1), k, l) end do end do - else !< bc_x%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(j - 1, k, l) @@ -1199,13 +1544,13 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, n - (j - 1), l) end do end do - else !< bc_y%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, j - 1, l) @@ -1219,7 +1564,7 @@ contains c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, p - (j - 1)) end do end do - else !< bc_z%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, j - 1) @@ -1230,7 +1575,7 @@ contains end subroutine s_color_function_periodic - !> Apply reflective boundary conditions to the color function and its divergence fields. + !> @brief Applies reflective boundary conditions to the color function and its divergence fields. subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_reflective', parallelism='[seq]', cray_inline=True) @@ -1240,7 +1585,7 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size if (i == bc_dir) then @@ -1250,7 +1595,7 @@ contains end if end do end do - else !< bc_x%end + else do i = 1, num_dims + 1 do j = 1, buff_size if (i == bc_dir) then @@ -1262,7 +1607,7 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size if (i == bc_dir) then @@ -1272,7 +1617,7 @@ contains end if end do end do - else !< bc_y%end + else do i = 1, num_dims + 1 do j = 1, buff_size if (i == bc_dir) then @@ -1294,7 +1639,7 @@ contains end if end do end do - else !< bc_z%end + else do i = 1, num_dims + 1 do j = 1, buff_size if (i == bc_dir) then @@ -1309,7 +1654,7 @@ contains end subroutine s_color_function_reflective - !> Extrapolate the color function and its divergence into ghost cells by copying boundary values. + !> @brief Extrapolates the color function and its divergence into ghost cells by copying boundary values. subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True) @@ -1319,13 +1664,13 @@ contains integer :: j, i if (bc_dir == 1) then !< x-direction - if (bc_loc == -1) then ! bc_x%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(0, k, l) end do end do - else !< bc_x%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(m, k, l) @@ -1333,13 +1678,13 @@ contains end do end if else if (bc_dir == 2) then !< y-direction - if (bc_loc == -1) then !< bc_y%beg + if (bc_loc == -1) then !< bc_z%beg do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, 0, l) end do end do - else !< bc_y%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, n, l) @@ -1353,7 +1698,7 @@ contains c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, 0) end do end do - else !< bc_z%end + else do i = 1, num_dims + 1 do j = 1, buff_size c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, p) @@ -1364,7 +1709,7 @@ contains end subroutine s_color_function_ghost_cell_extrapolation - !> Populate ghost cell buffers for the Jacobian scalar field used in the IGR elliptic solver. + !> @brief Populates ghost cell buffers for the Jacobian scalar field used in the IGR elliptic solver. impure subroutine s_populate_F_igr_buffers(bc_type, jac_sf) type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type @@ -1531,7 +1876,7 @@ contains end subroutine s_populate_F_igr_buffers - !> Create MPI derived datatypes for boundary condition type arrays and buffer arrays used in parallel I/O. + !> @brief Creates MPI derived datatypes for boundary condition type arrays and buffer arrays used in parallel I/O. impure subroutine s_create_mpi_types(bc_type) type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type @@ -1566,7 +1911,7 @@ contains end subroutine s_create_mpi_types - !> Write boundary condition type and buffer data to serial (unformatted) restart files. + !> @brief Writes boundary condition type and buffer data to serial (unformatted) restart files. subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid_in) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -1605,7 +1950,7 @@ contains end subroutine s_write_serial_boundary_condition_files - !> Write boundary condition type and buffer data to per-rank parallel files using MPI I/O. + !> @brief Writes boundary condition type and buffer data to per-rank parallel files using MPI I/O. subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -1670,7 +2015,7 @@ contains end subroutine s_write_parallel_boundary_condition_files - !> Read boundary condition type and buffer data from serial (unformatted) restart files. + !> @brief Reads boundary condition type and buffer data from serial (unformatted) restart files. subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type) character(LEN=*), intent(in) :: step_dirpath @@ -1715,7 +2060,7 @@ contains end subroutine s_read_serial_boundary_condition_files - !> Read boundary condition type and buffer data from per-rank parallel files using MPI I/O. + !> @brief Reads boundary condition type and buffer data from per-rank parallel files using MPI I/O. subroutine s_read_parallel_boundary_condition_files(bc_type) type(integer_field), dimension(1:num_dims,1:2), intent(inout) :: bc_type @@ -1780,7 +2125,7 @@ contains end subroutine s_read_parallel_boundary_condition_files - !> Pack primitive variable boundary slices into bc_buffers arrays for serialization. + !> @brief Packs primitive variable boundary slices into bc_buffers arrays for serialization. subroutine s_pack_boundary_condition_buffers(q_prim_vf) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -1823,7 +2168,7 @@ contains end subroutine s_pack_boundary_condition_buffers - !> Initialize the per-cell boundary condition type arrays with the global default BC values. + !> @brief Initializes the per-cell boundary condition type arrays with the global default BC values. subroutine s_assign_default_bc_type(bc_type) type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type @@ -1849,8 +2194,8 @@ contains end subroutine s_assign_default_bc_type - !> Populate the buffers of the grid variables, which are constituted of the cell-boundary locations and cell-width - !! distributions, based on the boundary conditions. + !> The purpose of this subroutine is to populate the buffers of the grid variables, which are constituted of the cell- boundary + !! locations and cell-width distributions, based on the boundary conditions. subroutine s_populate_grid_variables_buffers integer :: i @@ -1858,10 +2203,33 @@ contains #ifdef MFC_SIMULATION ! Required for compatibility between codes type(int_bounds_info) :: offset_x, offset_y, offset_z - offset_x%beg = buff_size; offset_x%end = buff_size offset_y%beg = buff_size; offset_y%end = buff_size offset_z%beg = buff_size; offset_z%end = buff_size + +#ifdef MFC_MPI + ! Populate global domain boundaries with stretched grids + call s_mpi_allreduce_min(x_cb(-1), glb_bounds(1)%beg) + call s_mpi_allreduce_max(x_cb(m), glb_bounds(1)%end) + + if (n > 0) then + call s_mpi_allreduce_min(y_cb(-1), glb_bounds(2)%beg) + call s_mpi_allreduce_max(y_cb(n), glb_bounds(2)%end) + if (p > 0) then + call s_mpi_allreduce_min(z_cb(-1), glb_bounds(3)%beg) + call s_mpi_allreduce_max(z_cb(p), glb_bounds(3)%end) + end if + end if +#else + glb_bounds(1)%beg = x_cb(-1); glb_bounds(1)%end = x_cb(m) + if (n > 0) then + glb_bounds(2)%beg = y_cb(-1); glb_bounds(2)%end = y_cb(n) + if (p > 0) then + glb_bounds(3)%beg = z_cb(-1); glb_bounds(3)%end = z_cb(p) + end if + end if +#endif + $:GPU_UPDATE(device='[glb_bounds]') #endif #ifndef MFC_PRE_PROCESS @@ -1918,6 +2286,7 @@ contains do i = 1, buff_size x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp end do + ! END: Population of Buffers in x-direction ! Population of Buffers in y-direction @@ -1974,6 +2343,7 @@ contains do i = 1, buff_size y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp end do + ! END: Population of Buffers in y-direction ! Population of Buffers in z-direction @@ -2030,11 +2400,12 @@ contains do i = 1, buff_size z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp end do + ! END: Population of Buffers in z-direction #endif end subroutine s_populate_grid_variables_buffers - !> Deallocate boundary condition buffer arrays allocated during module initialization. + !> @brief Deallocates boundary condition buffer arrays allocated during module initialization. subroutine s_finalize_boundary_common_module() if (bc_io) then diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 8e01059691..3f9d136dce 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -42,13 +42,16 @@ module m_constants real(wp), parameter :: broadband_spectral_level_constant = 20._wp !> The spectral level constant to correct the magnitude at each frequency to ensure the source is overall broadband real(wp), parameter :: broadband_spectral_level_growth_rate = 10._wp + ! Reconstruction Types integer, parameter :: WENO_TYPE = 1 !< Using WENO for reconstruction type integer, parameter :: MUSCL_TYPE = 2 !< Using MUSCL for reconstruction type + ! Interface Compression real(wp), parameter :: dflt_ic_eps = 1e-4_wp !< Ensure compression is only applied to surface cells in THINC real(wp), parameter :: dflt_ic_beta = 1.6_wp !< Sharpness parameter's default value used in THINC real(wp), parameter :: moncon_cutoff = 1e-8_wp !< Monotonicity constraint's limiter to prevent extremas in THINC + ! Chemistry real(wp), parameter :: dflt_T_guess = 1200._wp !< Default guess for temperature (when a previous value is not available) @@ -58,14 +61,16 @@ module m_constants real(wp), parameter :: threshold_vector_zero = 1.e-10_wp !< Threshold to treat the component of a vector to be zero real(wp), parameter :: threshold_edge_zero = 1.e-10_wp !< Threshold to treat two edges to be overlapped real(wp), parameter :: initial_distance_buffer = 1.e12_wp !< Initialized levelset distance for the shortest path pair algorithm + ! Lagrange bubbles constants - integer, parameter :: mapCells = 3 !< Number of cells around the bubble where the smoothening function will have effect + integer, parameter :: mapCells = 3 !< Number of cells around the bubble/particle where the smoothening function will have effect real(wp), parameter :: R_uni = 8314._wp !< Universal gas constant - J/kmol/K - integer, parameter :: lag_io_vars = 21 !< Number of variables per particle for MPI_IO + integer, parameter :: lag_io_vars = 21 !< Number of variables per particle for MPI_IO ! Strang Splitting constants real(wp), parameter :: dflt_adap_dt_tol = 1.e-4_wp !< Default tolerance for adaptive step size integer, parameter :: dflt_adap_dt_max_iters = 100 !< Default max iteration for adaptive step size + ! Constants of the algorithm described by Heirer, E. Hairer, S. P.Norsett, G. Wanner, Solving Ordinary Differential Equations I, ! Chapter II.4 to choose the initial time step size for the adaptive time stepping routine real(wp), parameter :: threshold_first_guess = 1.e-5_wp !< Threshold for initial step size estimate @@ -75,10 +80,9 @@ module m_constants real(wp), parameter :: small_guess = 1.e-6_wp !< Minimum initial step size ! Relativity - !> Max Newton-Raphson iterations for relativistic primitive recovery integer, parameter :: relativity_cons_to_prim_max_iter = 100 - ! Linear congruential pseudo-random number generator parameters + ! Pseudo-random number generator integer, parameter :: modulus = 2**30 - 1 !< PRNG modulus integer, parameter :: multiplier = 1664525 !< PRNG multiplier integer, parameter :: increment = 1013904223 !< PRNG increment diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 0c00de90d7..d7dae2840e 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -187,9 +187,15 @@ module m_derived_types type ic_patch_parameters integer :: geometry !< Type of geometry for the patch + + !> Location of the geometric center, i.e. the centroid, of the patch. It is specified through its x-, y- and z-coordinates, + !! respectively. real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch - real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. - real(wp) :: radius !< Dimensions of the patch. radius. + real(wp) :: length_x, length_y, length_z !< Dimensions of the patch. x,y,z Lengths. + real(wp) :: radius !< Dimensions of the patch. radius. + + !> Vector indicating the various radii for the elliptical and ellipsoidal patch geometries. It is specified through its x-, + !! y-, and z-components respectively. real(wp), dimension(3) :: radii !< Elliptical/ellipsoidal patch radii in x, y, z real(wp) :: epsilon, beta !< The isentropic vortex parameters for the amplitude of the disturbance and domain of influence. real(wp), dimension(2:9) :: a !< Used by hardcoded IC and as temporary variables. @@ -200,53 +206,68 @@ module m_derived_types logical :: modal_clip_r_to_min !< When true, clip boundary radius: R(theta) = max(R(theta), modal_r_min) (Non-exp form only) real(wp) :: modal_r_min !< Minimum boundary radius when modal_clip_r_to_min is true (Non-exp form only) logical :: modal_use_exp_form !< When true, boundary = radius*exp(Fourier series) + ! Geometry 14 (3D spherical harmonic): sph_har_coeff(l,m) for real Y_lm real(wp), dimension(0:max_sph_harm_degree,-max_sph_harm_degree:max_sph_harm_degree) :: sph_har_coeff + + !> Normal vector indicating the orientation of the patch. It is specified through its x-, y- and z-components, respectively. real(wp), dimension(3) :: normal !< Patch orientation normal vector (x, y, z) + + !> List of permissions that indicate to the current patch which preceding patches it is allowed to overwrite when it is in + !! process of being laid out in the domain logical, dimension(0:num_patches_max - 1) :: alter_patch !< Overwrite permissions for preceding patches - logical :: smoothen !< Whether patch boundaries are smoothed across cells + + !> Permission indicating to the current patch whether its boundaries will be smoothed out across a few cells or whether they + !! are to remain sharp + logical :: smoothen !< Whether patch boundaries are smoothed across cells integer :: smooth_patch_id !< Identity (id) of the patch with which current patch is to get smoothed - real(wp) :: smooth_coeff !< Smoothing stencil size coefficient + + !> Smoothing coefficient (coeff) for the size of the stencil of cells across which boundaries of the current patch will be + !! smeared out + real(wp) :: smooth_coeff real(wp), dimension(num_fluids_max) :: alpha_rho - real(wp) :: rho - real(wp), dimension(3) :: vel - real(wp) :: pres + real(wp) :: rho + real(wp), dimension(3) :: vel + real(wp) :: pres real(wp), dimension(num_fluids_max) :: alpha - real(wp) :: gamma - real(wp) :: pi_inf - real(wp) :: cv - real(wp) :: qv - real(wp) :: qvp !< Reference entropy per unit mass (SGEOS) - real(wp) :: Bx, By, Bz !< Magnetic field components; B%x is not used for 1D - real(wp), dimension(6) :: tau_e !< Elastic stresses added to primitive variables if hypoelasticity = True - real(wp) :: R0 !< Bubble size - real(wp) :: V0 !< Bubble velocity - real(wp) :: p0 !< Bubble size - real(wp) :: m0 !< Bubble velocity - integer :: hcid !< Hardcoded initial condition ID - real(wp) :: cf_val !< Color function value - real(wp) :: Y(1:num_species) !< Species mass fractions + real(wp) :: gamma + real(wp) :: pi_inf + real(wp) :: cv + real(wp) :: qv + !> Primitive variables associated with the patch. In order, these include the partial densities, density, velocity, + !! pressure, volume fractions, specific heat ratio function and the liquid stiffness function. + real(wp) :: qvp !< reference entropy per unit mass for SGEOS, q' (see Le Metayer (2004)) + real(wp) :: Bx, By, Bz !< Magnetic field components; B%x is not used for 1D + real(wp), dimension(6) :: tau_e !< Elastic stresses added to primitive variables if hypoelasticity = True + real(wp) :: R0 !< Bubble size + real(wp) :: V0 !< Bubble velocity + real(wp) :: p0 !< Bubble size + real(wp) :: m0 !< Bubble velocity + integer :: hcid + !! id for hard coded initial condition - ! STL or OBJ model input parameter - character(LEN=pathlen_max) :: model_filepath !< Path the STL file relative to case_dir. - real(wp), dimension(1:3) :: model_translate !< Translation of the STL object. - real(wp), dimension(1:3) :: model_scale !< Scale factor for the STL object. - !> Angle to rotate the STL object along each cartesian coordinate axis, in radians. + real(wp) :: cf_val !< Color function value + real(wp) :: Y(1:num_species) !< Species mass fractions STL or OBJ model input parameter + character(LEN=pathlen_max) :: model_filepath !< Path the STL file relative to case_dir. + real(wp), dimension(1:3) :: model_translate !< Translation of the STL object. + real(wp), dimension(1:3) :: model_scale !< Scale factor for the STL object. real(wp), dimension(1:3) :: model_rotate - integer :: model_spc !< Number of samples per cell to use when discretizing the STL object. - real(wp) :: model_threshold !< Threshold to turn on smoothen STL patch. + integer :: model_spc !< Number of samples per cell to use when discretizing the STL object. + real(wp) :: model_threshold !< Threshold to turn on smoothen STL patch. Patch conditions for moving imersed boundaries end type ic_patch_parameters type ib_patch_parameters - integer :: geometry !< Type of geometry for the patch + integer :: geometry !< Type of geometry for the patch + + !> Location of the geometric center, i.e. the centroid, of the patch. It is specified through its x-, y- and z-coordinates, + !! respectively. real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch !> Centroid locations of intermediate steps in the time_stepper module - real(wp) :: step_x_centroid, step_y_centroid, step_z_centroid - real(wp), dimension(1:3) :: centroid_offset !< offset of center of mass from computed cell center for odd-shaped IBs - real(wp), dimension(1:3) :: angles - real(wp), dimension(1:3) :: step_angles - !> matrix that converts from IB reference frame to fluid reference frame + real(wp) :: step_x_centroid, step_y_centroid, step_z_centroid + real(wp), dimension(1:3) :: centroid_offset ! offset of center of mass from computed cell center for odd-shaped IBs + real(wp), dimension(1:3) :: angles + real(wp), dimension(1:3) :: step_angles real(wp), dimension(1:3,1:3) :: rotation_matrix !> matrix that converts from fluid reference frame to IB reference frame real(wp), dimension(1:3,1:3) :: rotation_matrix_inverse @@ -256,11 +277,10 @@ module m_derived_types real(wp) :: theta logical :: slip - ! STL or OBJ model input parameter - character(LEN=pathlen_max) :: model_filepath !< Path the STL file relative to case_dir. - real(wp), dimension(1:3) :: model_translate !< Translation of the STL object. - real(wp), dimension(1:3) :: model_scale !< Scale factor for the STL object. - !> Angle to rotate the STL object along each cartesian coordinate axis, in radians. + !! STL or OBJ model input parameter + character(LEN=pathlen_max) :: model_filepath !< Path the STL file relative to case_dir. + real(wp), dimension(1:3) :: model_translate !< Translation of the STL object. + real(wp), dimension(1:3) :: model_scale !< Scale factor for the STL object. real(wp), dimension(1:3) :: model_rotate integer :: model_spc !< Number of samples per cell to use when discretizing the STL object. real(wp) :: model_threshold !< Threshold to turn on smoothen STL patch. Patch conditions for moving imersed boundaries @@ -309,6 +329,12 @@ module m_derived_types real(wp) :: R_g !< gas constant of gas (bubble) end type subgrid_bubble_physical_parameters + !> Derived type annexing the physical parameters required for sub-grid particle models + type subgrid_particle_physical_parameters + real(wp) :: rho0ref_particle !< reference density + real(wp) :: cp_particle !< solid particle specific heat + end type subgrid_particle_physical_parameters + type mpi_io_airfoil_ib_var integer, dimension(2) :: view type(vec3_dt), allocatable, dimension(:) :: var @@ -396,18 +422,31 @@ module m_derived_types !> Lagrangian bubble parameters type bubbles_lagrange_parameters - integer :: solver_approach !< 1: One-way coupling, 2: two-way coupling - integer :: cluster_type !< Cluster model to find p_inf - logical :: pressure_corrector !< Cell pressure correction term - integer :: smooth_type !< Smoothing function. 1: Gaussian, 2:Delta 3x3 - logical :: heatTransfer_model !< Activate HEAT transfer model at the bubble-liquid interface - logical :: massTransfer_model !< Activate MASS transfer model at the bubble-liquid interface - logical :: write_bubbles !< Write files to track the bubble evolution each time step - logical :: write_bubbles_stats !< Write the maximum and minimum radius of each bubble - integer :: nBubs_glb !< Global number of bubbles - real(wp) :: epsilonb !< Standard deviation scaling for the gaussian function - real(wp) :: charwidth !< Domain virtual depth (z direction, for 2D simulations) - real(wp) :: valmaxvoid !< Maximum void fraction permitted + integer :: solver_approach !< 1: One-way coupling, 2: two-way coupling + integer :: cluster_type !< Cluster model to find p_inf + logical :: pressure_corrector !< Cell pressure correction term + integer :: smooth_type !< Smoothing function. 1: Gaussian, 2:Delta 3x3 + logical :: heatTransfer_model !< Activate HEAT transfer model at the bubble-liquid interface + logical :: massTransfer_model !< Activate MASS transfer model at the bubble-liquid interface + logical :: write_void_evol !< Write files to track evolution of void fraction at each time step + logical :: write_bubbles !< Write files to track the bubble evolution each time step + logical :: write_bubbles_stats !< Write the maximum and minimum radius of each bubble + integer :: nBubs_glb !< Global number of bubbles + integer :: nParticles_glb !< Global number of particles + integer :: vel_model !< Particle velocity model + integer :: drag_model !< Particle drag model + logical :: pressure_force !< Include pressure force translational motion + logical :: gravity_force !< Include gravity force in translational motion + integer :: qs_drag_model !< Particle QS drag model + integer :: stokes_drag !< Particle stokes drag + integer :: added_mass_model !< Particle added mass model + integer :: interpolation_order !< Fluid-to-Particle barycentric interpolation order + logical :: collision_force !< Include collision forces + character(LEN=pathlen_max) :: input_path !< Path to lag_bubbles.dat + real(wp) :: epsilonb !< Standard deviation scaling for the gaussian function + real(wp) :: charwidth !< Domain virtual depth (z direction, for 2D simulations) + integer :: charNz !< Number of grid cells in characteristic depth + real(wp) :: valmaxvoid !< Maximum void fraction permitted end type bubbles_lagrange_parameters !> Max and min number of cells in a direction of each combination of x-,y-, and z- diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index 4074530a8f..7741b9bc19 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -18,11 +18,15 @@ module m_helper public :: s_comp_n_from_prim, s_comp_n_from_cons, s_initialize_bubbles_model, s_initialize_nonpoly, s_simpson, s_transcoeff, & & s_int_to_str, s_transform_vec, s_transform_triangle, s_transform_model, s_swap, f_cross, f_create_transform_matrix, & & f_create_bbox, s_print_2D_array, f_xor, f_logical_to_int, associated_legendre, real_ylm, double_factorial, factorial, & - & f_cut_on, f_cut_off, s_downsample_data, s_upsample_data + & f_cut_on, f_cut_off, s_downsample_data, s_upsample_data, s_initialize_particles_model contains !> Computes the bubble number density n from the primitive variables + !! @param vftmp is the void fraction + !! @param Rtmp is the bubble radii + !! @param ntmp is the output number bubble density + !! @param weights is the quadrature weights subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) $:GPU_ROUTINE(parallelism='[seq]') @@ -37,7 +41,7 @@ contains end subroutine s_comp_n_from_prim - !> Compute the bubble number density from the conservative void fraction and weighted bubble radii. + !> @brief Computes the bubble number density from the conservative void fraction and weighted bubble radii. subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) $:GPU_ROUTINE(parallelism='[seq]') @@ -52,7 +56,7 @@ contains end subroutine s_comp_n_from_cons - !> Print a 2D real array to standard output, optionally dividing each element by a given scalar. + !> @brief Prints a 2D real array to standard output, optionally dividing each element by a given scalar. impure subroutine s_print_2D_array(A, div) real(wp), dimension(:,:), intent(in) :: A @@ -82,7 +86,14 @@ contains end subroutine s_print_2D_array - !> Initialize bubble model arrays for Euler or Lagrangian bubbles with polytropic or non-polytropic gas. + impure subroutine s_initialize_particles_model() + + rho0ref_particle = particle_pp%rho0ref_particle + cp_particle = particle_pp%cp_particle + + end subroutine s_initialize_particles_model + + !> bubbles_euler + polytropic bubbles_euler + non-polytropic bubbles_lagrange + non-polytropic impure subroutine s_initialize_bubbles_model() ! Allocate memory @@ -112,7 +123,7 @@ contains end subroutine s_initialize_bubbles_model - !> Set bubble physical parameters and nondimensional numbers from the input configuration. + !> impure subroutine s_initialize_bubble_vars() R0ref = bub_pp%R0ref; p0ref = bub_pp%p0ref @@ -128,6 +139,7 @@ contains R_v = bub_pp%R_v; R_g = bub_pp%R_g Tw = bub_pp%T0ref end if + if (bubbles_lagrange) then cp_v = bub_pp%cp_v; cp_g = bub_pp%cp_g k_vl = bub_pp%k_v; k_gl = bub_pp%k_g @@ -171,18 +183,19 @@ contains integer :: ir real(wp), dimension(nb) :: chi_vw0, cp_m0, k_m0, rho_m0, x_vw, omegaN, rhol0 real(wp), parameter :: k_poly = 1._wp !< polytropic index used to compute isothermal natural frequency - ! Chapman-Enskog transport coefficients for vapor-gas mixture, Ando JAS (2010) + + ! phi_vg & phi_gv (phi_gg = phi_vv = 1) (Eq. 2.22 in Ando 2010) phi_vg = (1._wp + sqrt(mu_v/mu_g)*(M_g/M_v)**(0.25_wp))**2/(sqrt(8._wp)*sqrt(1._wp + M_v/M_g)) phi_gv = (1._wp + sqrt(mu_g/mu_v)*(M_v/M_g)**(0.25_wp))**2/(sqrt(8._wp)*sqrt(1._wp + M_g/M_v)) - ! Initial internal bubble pressure (Euler number + Laplace pressure) + ! internal bubble pressure pb0 = Eu + 2._wp/Web/R0 - ! Vapor mass fraction at bubble wall, Ando JAS (2010) + ! mass fraction of vapor (Eq. 2.19 in Ando 2010) chi_vw0 = 1._wp/(1._wp + R_v/R_g*(pb0/pv - 1._wp)) - ! Mixture specific heat from mass-weighted vapor/gas contributions + ! specific heat for gas/vapor mixture cp_m0 = chi_vw0*R_v*gam_v/(gam_v - 1._wp) + (1._wp - chi_vw0)*R_g*gam_g/(gam_g - 1._wp) ! mole fraction of vapor (Eq. 2.23 in Ando 2010) @@ -203,7 +216,7 @@ contains ! Peclet numbers Pe_T(:) = rho_m0*cp_m0(:)/k_m0(:) - ! Bubble natural frequency, Ando JAS (2010) + ! natural frequencies (Eq. B.1) omegaN(:) = sqrt(3._wp*k_poly*Ca + 2._wp*(3._wp*k_poly - 1._wp)/(Web*R0))/R0/sqrt(rho0ref) do ir = 1, nb call s_transcoeff(omegaN(ir)*R0(ir), Pe_T(ir)*R0(ir), Re_trans_T(ir), Im_trans_T(ir)) @@ -214,6 +227,10 @@ contains end subroutine s_initialize_nonpoly !> Computes the transfer coefficient for the non-polytropic bubble compression process + !! @param omega natural frequencies + !! @param peclet Peclet number + !! @param Re_trans Real part of the transport coefficients + !! @param Im_trans Imaginary part of the transport coefficients elemental subroutine s_transcoeff(omega, peclet, Re_trans, Im_trans) real(wp), intent(in) :: omega, peclet @@ -232,7 +249,7 @@ contains end subroutine s_transcoeff - !> Convert an integer to its trimmed string representation. + !> @brief Converts an integer to its trimmed string representation. elemental subroutine s_int_to_str(i, res) integer, intent(in) :: i @@ -283,7 +300,10 @@ contains end subroutine s_simpson - !> Compute the cross product of two vectors. + !> This procedure computes the cross product of two vectors. + !! @param a First vector. + !! @param b Second vector. + !! @return The cross product of the two vectors. pure function f_cross(a, b) result(c) $:GPU_ROUTINE(parallelism='[seq]') @@ -297,7 +317,9 @@ contains end function f_cross - !> Swap two real numbers. + !> This procedure swaps two real numbers. + !! @param lhs Left-hand side. + !! @param rhs Right-hand side. elemental subroutine s_swap(lhs, rhs) real(wp), intent(inout) :: lhs, rhs @@ -309,7 +331,10 @@ contains end subroutine s_swap - !> Create a transformation matrix. + !> This procedure creates a transformation matrix. + !! @param param Parameters for the transformation. + !! @param center Optional center point for the transformation. + !! @return Transformation matrix. function f_create_transform_matrix(param, center) result(out_matrix) type(ic_model_parameters), intent(in) :: param @@ -347,7 +372,9 @@ contains end function f_create_transform_matrix - !> Transform a vector by a matrix. + !> This procedure transforms a vector by a matrix. + !! @param vec Vector to transform. + !! @param matrix Transformation matrix. subroutine s_transform_vec(vec, matrix) real(wp), dimension(1:3), intent(inout) :: vec @@ -359,7 +386,10 @@ contains end subroutine s_transform_vec - !> Transform a triangle by a matrix, one vertex at a time. + !> This procedure transforms a triangle by a matrix, one vertex at a time. + !! @param triangle Triangle to transform. + !! @param matrix Transformation matrix. + !! @param matrix_n Normal transformation matrix. subroutine s_transform_triangle(triangle, matrix, matrix_n) type(t_triangle), intent(inout) :: triangle @@ -374,7 +404,10 @@ contains end subroutine s_transform_triangle - !> Transform a model by a matrix, one triangle at a time. + !> This procedure transforms a model by a matrix, one triangle at a time. + !! @param model Model to transform. + !! @param matrix Transformation matrix. + !! @param matrix_n Normal transformation matrix. subroutine s_transform_model(model, matrix, matrix_n) type(t_model), intent(inout) :: model @@ -387,7 +420,9 @@ contains end subroutine s_transform_model - !> Create a bounding box for a model. + !> This procedure creates a bounding box for a model. + !! @param model Model to create bounding box for. + !! @return Bounding box. function f_create_bbox(model) result(bbox) type(t_model), intent(in) :: model @@ -412,7 +447,10 @@ contains end function f_create_bbox - !> Perform XOR on lhs and rhs. + !> This procedure performs xor on lhs and rhs. + !! @param lhs logical input. + !! @param rhs other logical input. + !! @return xored result. elemental function f_xor(lhs, rhs) result(res) logical, intent(in) :: lhs, rhs @@ -422,7 +460,9 @@ contains end function f_xor - !> Convert a logical to 1 or 0. + !> This procedure converts logical to 1 or 0. + !! @param predicate A Logical argument. + !! @return 1 if .true., 0 if .false.. elemental function f_logical_to_int(predicate) result(int) logical, intent(in) :: predicate @@ -468,6 +508,7 @@ contains !! @param x argument (typically cos(theta)), should be in [-1,1] !! @param l degree (>= 0) !! @param m_order order (0 <= m_order <= l) + !! @return result_P P_l^m(x) recursive function associated_legendre(x, l, m_order) result(result_P) integer, intent(in) :: l, m_order @@ -502,7 +543,9 @@ contains end function associated_legendre - !> Calculate the double factorial of an integer + !> This function calculates the double factorial value of an integer + !! @param n_in is the input integer + !! @return R is the double factorial value of n elemental function double_factorial(n_in) result(R_result) integer, intent(in) :: n_in @@ -514,7 +557,9 @@ contains end function double_factorial - !> Calculate the factorial of an integer + !> The following function calculates the factorial value of an integer + !! @param n_in is the input integer + !! @return R is the factorial value of n elemental function factorial(n_in) result(R_result) integer, intent(in) :: n_in @@ -526,8 +571,11 @@ contains end function factorial - !> Calculate a smooth cut-on function that is zero for x values smaller than zero and goes to one, for generating smooth initial - !! conditions + !> This function calculates a smooth cut-on function that is zero for x values smaller than zero and goes to one. It can be used + !! for generating smooth initial conditions + !! @param x is the input value + !! @param eps is the smoothing parameter + !! @return fx is the cut-on function evaluated at x function f_cut_on(x, eps) result(fx) real(wp), intent(in) :: x, eps @@ -537,8 +585,11 @@ contains end function f_cut_on - !> Calculate a smooth cut-off function that is one for x values smaller than zero and goes to zero, for generating smooth - !! initial conditions + !> This function calculates a smooth cut-off function that is one for x values smaller than zero and goes to zero. It can be + !! used for generating smooth initial conditions + !! @param x is the input value + !! @param eps is the smoothing parameter + !! @return fx is the cut-ff function evaluated at x function f_cut_off(x, eps) result(fx) real(wp), intent(in) :: x, eps @@ -548,7 +599,9 @@ contains end function f_cut_off - !> Helper function for f_cut_on and f_cut_off + !> This function is a helper function for the functions f_cut_on and f_cut_off + !! @param x is the input value + !! @return gx is the result function f_gx(x) result(gx) real(wp), intent(in) :: x @@ -562,7 +615,7 @@ contains end function f_gx - !> Downsample conservative variable fields by a factor of 3 in each direction using volume averaging. + !> @brief Downsamples conservative variable fields by a factor of 3 in each direction using volume averaging. subroutine s_downsample_data(q_cons_vf, q_cons_temp, m_ds, n_ds, p_ds, m_glb_ds, n_glb_ds, p_glb_ds) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf, q_cons_temp @@ -604,7 +657,7 @@ contains end subroutine s_downsample_data - !> Upsample conservative variable fields from a coarsened grid back to the original resolution using interpolation. + !> @brief Upsamples conservative variable fields from a coarsened grid back to the original resolution using interpolation. subroutine s_upsample_data(q_cons_vf, q_cons_temp) type(scalar_field), intent(inout), dimension(sys_size) :: q_cons_vf, q_cons_temp diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 7208a451da..0138a5da35 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -17,8 +17,11 @@ module m_helper_basic contains - !> Check if two floating point numbers of wp are within tolerance. + !> This procedure checks if two floating point numbers of wp are within tolerance. + !! @param a First number. + !! @param b Second number. !! @param tol_input Relative error (default = 1.e-10_wp). + !! @return Result of the comparison. logical elemental function f_approx_equal(a, b, tol_input) result(res) $:GPU_ROUTINE(parallelism='[seq]') @@ -42,8 +45,11 @@ contains end function f_approx_equal - !> Check if a wp value approximately matches any element of an array within tolerance. + !> This procedure checks if the point numbers of wp belongs to another array are within tolerance. + !! @param a First number. + !! @param b Array that contains several point numbers. !! @param tol_input Relative error (default = 1e-10_wp). + !! @return Result of the comparison. logical function f_approx_in_array(a, b, tol_input) result(res) $:GPU_ROUTINE(parallelism='[seq]') @@ -71,6 +77,7 @@ contains end function f_approx_in_array !> Checks if a real(wp) variable is of default value. + !! @param var Variable to check. logical elemental function f_is_default(var) result(res) $:GPU_ROUTINE(parallelism='[seq]') @@ -81,15 +88,23 @@ contains end function f_is_default !> Checks if ALL elements of a real(wp) array are of default value. + !! @param var_array Array to check. logical function f_all_default(var_array) result(res) real(wp), intent(in) :: var_array(:) res = all(f_is_default(var_array)) + ! logical :: res_array(size(var_array)) integer :: i + + ! do i = 1, size(var_array) res_array(i) = f_is_default(var_array(i)) end do + + ! res = all(res_array) + end function f_all_default !> Checks if a real(wp) variable is an integer. + !! @param var Variable to check. logical elemental function f_is_integer(var) result(res) $:GPU_ROUTINE(parallelism='[seq]') @@ -99,20 +114,19 @@ contains end function f_is_integer - !> Compute ghost-cell buffer size and set interior/buffered coordinate index bounds. subroutine s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, igr_order, buff_size, idwint, idwbuff, viscous, & - - & bubbles_lagrange, m, n, p, num_dims, igr, ib) + & bubbles_lagrange, particles_lagrange, m, n, p, num_dims, igr, ib, fd_number) integer, intent(in) :: recon_type, weno_polyn, muscl_polyn - integer, intent(in) :: m, n, p, num_dims, igr_order + integer, intent(in) :: m, n, p, num_dims, igr_order, fd_number integer, intent(inout) :: buff_size type(int_bounds_info), dimension(3), intent(inout) :: idwint, idwbuff - logical, intent(in) :: viscous, bubbles_lagrange + logical, intent(in) :: viscous, bubbles_lagrange, particles_lagrange logical, intent(in) :: igr logical, intent(in) :: ib - ! Determine ghost cell buffer size for boundary conditions + ! Determining the number of cells that are needed in order to store sufficient boundary conditions data as to iterate the + ! solution in the physical computational domain from one time-step iteration to the next one if (igr) then buff_size = (igr_order - 1)/2 + 2 @@ -128,7 +142,12 @@ contains ! Correction for smearing function in the lagrangian subgrid bubble model if (bubbles_lagrange) then - buff_size = max(buff_size, 6) + buff_size = max(buff_size + fd_number, mapCells + 1 + fd_number) + end if + + ! Correction for smearing function in the lagrangian subgrid particle model + if (particles_lagrange) then + buff_size = max(buff_size + fd_number, mapCells + 1 + fd_number) end if if (ib) then @@ -150,7 +169,10 @@ contains end subroutine s_configure_coordinate_bounds !> Updates the min and max number of cells in each set of axes - !! @param bounds Min and max values to update + !! @param bounds Min ans max values to update + !! @param m Number of cells in x-axis + !! @param n Number of cells in y-axis + !! @param p Number of cells in z-axis elemental subroutine s_update_cell_bounds(bounds, m, n, p) type(cell_num_bounds), intent(out) :: bounds diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 8a719f5758..643646c400 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -22,9 +22,20 @@ module m_mpi_common integer, private :: v_size $:GPU_DECLARE(create='[v_size]') + !! Generic flags used to identify and report MPI errors + !> This variable is utilized to pack and send the buffer of the cell-average primitive variables, for a single computational + !! domain boundary at the time, to the relevant neighboring processor. real(wp), private, allocatable, dimension(:) :: buff_send !< Primitive variable send buffer for halo exchange + + !> buff_recv is utilized to receive and unpack the buffer of the cell- average primitive variables, for a single computational + !! domain boundary at the time, from the relevant neighboring processor. real(wp), private, allocatable, dimension(:) :: buff_recv !< Primitive variable receive buffer for halo exchange + type(int_bounds_info) :: comm_coords(3) + integer :: comm_size(3) + $:GPU_DECLARE(create='[comm_coords, comm_size]') + !! Variables for EL bubbles communication + #ifndef __NVCOMPILER_GPU_UNIFIED_MEM $:GPU_DECLARE(create='[buff_send, buff_recv]') #endif @@ -34,9 +45,12 @@ module m_mpi_common contains - !> Initialize the module. + !> The computation of parameters, the allocation of memory, the association of pointers and/or the execution of any other + !! procedures that are necessary to setup the module. impure subroutine s_initialize_mpi_common_module + integer :: beta_v_size, beta_comm_size_1, beta_comm_size_2, beta_comm_size_3, beta_halo_size + #ifdef MFC_MPI ! Allocating buff_send/recv and. Please note that for the sake of simplicity, both variables are provided sufficient storage ! to hold the largest buffer in the computational domain. @@ -51,13 +65,31 @@ contains if (p > 0) then halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)*(p + 2*buff_size & & + 1)/(cells_bounds%mnp_min + 2*buff_size + 1)) - else + else ! PBC at the end halo_size = -1 + buff_size*(v_size)*(cells_bounds%mn_max + 2*buff_size + 1) end if else halo_size = -1 + buff_size*(v_size) end if + if (bubbles_lagrange .or. particles_lagrange) then + beta_v_size = size(beta_vars) + beta_comm_size_1 = m + 2*mapCells + 3 + beta_comm_size_2 = merge(n + 2*mapCells + 3, 1, n > 0) + beta_comm_size_3 = merge(p + 2*mapCells + 3, 1, p > 0) + if (n > 0) then + if (p > 0) then + beta_halo_size = 2*(mapCells + 1)*beta_v_size*max(beta_comm_size_2*beta_comm_size_3, & + & beta_comm_size_1*beta_comm_size_3, beta_comm_size_1*beta_comm_size_2) - 1 + else ! PBC at the end only + beta_halo_size = 2*(mapCells + 1)*beta_v_size*max(beta_comm_size_2, beta_comm_size_1) - 1 + end if + else ! PBC at the end + beta_halo_size = 2*(mapCells + 1)*beta_v_size - 1 + end if + halo_size = max(halo_size, beta_halo_size) + end if + $:GPU_UPDATE(device='[halo_size, v_size]') #ifndef __NVCOMPILER_GPU_UNIFIED_MEM @@ -71,30 +103,39 @@ contains end subroutine s_initialize_mpi_common_module - !> Initialize the MPI execution environment and query the number of processors and local rank. + !> The subroutine initializes the MPI execution environment and queries both the number of processors which will be available + !! for the job and the local processor rank. impure subroutine s_mpi_initialize #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Initializing the MPI environment call MPI_INIT(ierr) + ! Checking whether the MPI environment has been properly initialized if (ierr /= MPI_SUCCESS) then print '(A)', 'Unable to initialize MPI environment. Exiting.' call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) end if + ! Querying the number of processors available for the job call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr) + ! Querying the rank of the local processor call MPI_COMM_RANK(MPI_COMM_WORLD, proc_rank, ierr) #else + ! Serial run only has 1 processor num_procs = 1 + ! Local processor rank is 0 proc_rank = 0 #endif end subroutine s_mpi_initialize - !> Set up MPI I/O data views and variable pointers for parallel file output. + !! @param q_cons_vf Conservative variables + !! @param ib_markers track if a cell is within the immersed boundary + !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, beta) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf @@ -104,8 +145,11 @@ contains integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start #ifdef MFC_MPI + ! Generic loop iterator integer :: i, j integer :: ierr !< Generic flag used to identify and report MPI errors + + ! Altered system size for the lagrangian subgrid bubble model integer :: alt_sys if (present(beta)) then @@ -176,7 +220,7 @@ contains end subroutine s_initialize_mpi_data - !> Set up MPI I/O data views for downsampled (coarsened) parallel file output. + !! @param q_cons_vf Conservative variables subroutine s_initialize_mpi_data_ds(q_cons_vf) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf @@ -184,6 +228,7 @@ contains integer, dimension(3) :: sf_start_idx #ifdef MFC_MPI + ! Generic loop iterator integer :: i, j, q, k, l, m_ds, n_ds, p_ds, ierr sf_start_idx = (/0, 0, 0/) @@ -222,7 +267,7 @@ contains end subroutine s_initialize_mpi_data_ds - !> Gather variable-length real vectors from all MPI ranks onto the root process. + !> @brief Gathers variable-length real vectors from all MPI ranks onto the root process. impure subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root) integer, intent(in) :: counts !< Array of vector lengths for each process @@ -252,7 +297,7 @@ contains end subroutine s_mpi_gather_data - !> Gather per-rank time step wall-clock times onto rank 0 for performance reporting. + !> @brief Gathers per-rank time step wall-clock times onto rank 0 for performance reporting. impure subroutine mpi_bcast_time_step_values(proc_time, time_avg) real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time @@ -266,7 +311,7 @@ contains end subroutine mpi_bcast_time_step_values - !> Print a case file error with the prohibited condition and message, then abort execution. + !> @brief Prints a case file error with the prohibited condition and message, then aborts execution. impure subroutine s_prohibit_abort(condition, message) character(len=*), intent(in) :: condition, message @@ -286,40 +331,82 @@ contains !! performed by sifting through the local extrema of each stability criterion. Note that each of the local extrema is from a !! single process, within its assigned section of the computational domain. Finally, note that the global extrema values are !! only bookkeept on the rank 0 processor. - impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, Rc_min_loc, icfl_max_glb, vcfl_max_glb, & - - & Rc_min_glb) + !! @param icfl_max_loc Local maximum ICFL stability criterion + !! @param vcfl_max_loc Local maximum VCFL stability criterion + !! @param Rc_min_loc Local minimum Rc stability criterion + !! @param icfl_max_glb Global maximum ICFL stability criterion + !! @param vcfl_max_glb Global maximum VCFL stability criterion + !! @param Rc_min_glb Global minimum Rc stability criterion + impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, Rc_min_loc, bubs_loc, icfl_max_glb, & + & vcfl_max_glb, Rc_min_glb, bubs_glb) real(wp), intent(in) :: icfl_max_loc real(wp), intent(in) :: vcfl_max_loc real(wp), intent(in) :: Rc_min_loc + integer, intent(in) :: bubs_loc real(wp), intent(out) :: icfl_max_glb real(wp), intent(out) :: vcfl_max_glb real(wp), intent(out) :: Rc_min_glb + integer, intent(out) :: bubs_glb #ifdef MFC_SIMULATION #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + bubs_glb = 0 + + ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their global extrema and bookkeeping the results on the rank + ! 0 processor call MPI_REDUCE(icfl_max_loc, icfl_max_glb, 1, mpi_p, MPI_MAX, 0, MPI_COMM_WORLD, ierr) if (viscous) then call MPI_REDUCE(vcfl_max_loc, vcfl_max_glb, 1, mpi_p, MPI_MAX, 0, MPI_COMM_WORLD, ierr) call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, mpi_p, MPI_MIN, 0, MPI_COMM_WORLD, ierr) end if + + if (bubbles_lagrange) then + call MPI_REDUCE(bubs_loc, bubs_glb, 1, MPI_INTEGER, MPI_SUM, 0, MPI_COMM_WORLD, ierr) + end if #else icfl_max_glb = icfl_max_loc + bubs_glb = 0 if (viscous) then vcfl_max_glb = vcfl_max_loc Rc_min_glb = Rc_min_loc end if + + if (bubbles_lagrange) bubs_glb = bubs_loc #endif #endif end subroutine s_mpi_reduce_stability_criteria_extrema - !> Reduce a local real value to its global sum across all MPI ranks. + !> The following subroutine takes the inputted variable and determines its sum on the entire computational domain. + ! ! @param var_loc holds the local value to be reduced among all the processors in communicator. On output, the variable holds + ! the sum, reduced amongst all of the local values. + subroutine s_mpi_reduce_int_sum(var_loc, sum) + + integer, intent(in) :: var_loc + integer, intent(out) :: sum + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + ! Performing reduction procedure and eventually storing its result into the variable that was initially inputted into the + ! subroutine + call MPI_REDUCE(var_loc, sum, 1, MPI_INTEGER, MPI_SUM, 0, MPI_COMM_WORLD, ierr) +#else + sum = var_loc +#endif + + end subroutine s_mpi_reduce_int_sum + + !> The following subroutine takes the input local variable from all processors and reduces to the sum of all values. The reduced + !! variable is recorded back onto the original local variable on each processor. + ! ! @param var_loc Some variable containing the local value which should be reduced amongst all the processors in the + ! communicator. + !! @param var_glb The globally reduced value impure subroutine s_mpi_allreduce_sum(var_loc, var_glb) real(wp), intent(in) :: var_loc @@ -328,12 +415,14 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Performing the reduction procedure call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, MPI_SUM, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_allreduce_sum - !> Reduce an array of vectors to their global sums across all MPI ranks. + !> This subroutine follows the behavior of the s_mpi_allreduce_sum subroutine + !> with the additional feature that it reduces an array of vectors. impure subroutine s_mpi_allreduce_vectors_sum(var_loc, var_glb, num_vectors, vector_length) integer, intent(in) :: num_vectors, vector_length @@ -343,6 +432,7 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Performing the reduction procedure if (loc(var_loc) == loc(var_glb)) then call MPI_Allreduce(MPI_IN_PLACE, var_glb, num_vectors*vector_length, mpi_p, MPI_SUM, MPI_COMM_WORLD, ierr) else @@ -354,7 +444,11 @@ contains end subroutine s_mpi_allreduce_vectors_sum - !> Reduce a local integer value to its global sum across all MPI ranks. + !> The following subroutine takes the input local variable from all processors and reduces to the sum of all values. The reduced + !! variable is recorded back onto the original local variable on each processor. + ! ! @param var_loc Some variable containing the local value which should be reduced amongst all the processors in the + ! communicator. + !! @param var_glb The globally reduced value impure subroutine s_mpi_allreduce_integer_sum(var_loc, var_glb) integer, intent(in) :: var_loc @@ -363,6 +457,7 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Performing the reduction procedure call MPI_ALLREDUCE(var_loc, var_glb, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) #else var_glb = var_loc @@ -370,7 +465,11 @@ contains end subroutine s_mpi_allreduce_integer_sum - !> Reduce a local real value to its global minimum across all MPI ranks. + !> The following subroutine takes the input local variable from all processors and reduces to the minimum of all values. The + !! reduced variable is recorded back onto the original local variable on each processor. + ! ! @param var_loc Some variable containing the local value which should be reduced amongst all the processors in the + ! communicator. + !! @param var_glb The globally reduced value impure subroutine s_mpi_allreduce_min(var_loc, var_glb) real(wp), intent(in) :: var_loc @@ -379,12 +478,17 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Performing the reduction procedure call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, MPI_MIN, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_allreduce_min - !> Reduce a local real value to its global maximum across all MPI ranks. + !> The following subroutine takes the input local variable from all processors and reduces to the maximum of all values. The + !! reduced variable is recorded back onto the original local variable on each processor. + ! ! @param var_loc Some variable containing the local value which should be reduced amongst all the processors in the + ! communicator. + !! @param var_glb The globally reduced value impure subroutine s_mpi_allreduce_max(var_loc, var_glb) real(wp), intent(in) :: var_loc @@ -393,20 +497,28 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Performing the reduction procedure call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, MPI_MAX, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_allreduce_max - !> Reduce a local real value to its global minimum across all ranks + !> The following subroutine takes the inputted variable and determines its minimum value on the entire computational domain. The + !! result is stored back into inputted variable. + ! ! @param var_loc holds the local value to be reduced among all the processors in communicator. On output, the variable holds + ! the minimum value, reduced amongst all of the local values. impure subroutine s_mpi_reduce_min(var_loc) real(wp), intent(inout) :: var_loc #ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors + integer :: ierr !< Generic flag used to identify and report MPI errors + + ! Temporary storage variable that holds the reduced minimum value real(wp) :: var_glb + ! Performing reduction procedure and eventually storing its result into the variable that was initially inputted into the + ! subroutine call MPI_REDUCE(var_loc, var_glb, 1, mpi_p, MPI_MIN, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(var_glb, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -416,15 +528,25 @@ contains end subroutine s_mpi_reduce_min - !> Reduce a 2-element variable to its global maximum value with the owning processor rank (MPI_MAXLOC). - !> Reduce a local value to its global maximum with location (rank) across all ranks + !> The following subroutine takes the first element of the 2-element inputted variable and determines its maximum value on the + !! entire computational domain. The result is stored back into the first element of the variable while the rank of the processor + !! that is in charge of the sub- domain containing the maximum is stored into the second element of the variable. + ! ! @param var_loc On input, this variable holds the local value and processor rank, which are to be reduced among all the + ! processors in communicator. On output, this variable holds the maximum value, reduced amongst all of the local values, and the + ! process rank to which the value belongs. impure subroutine s_mpi_reduce_maxloc(var_loc) real(wp), dimension(2), intent(inout) :: var_loc #ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors + integer :: ierr !< Generic flag used to identify and report MPI errors + + !> Temporary storage variable that holds the reduced maximum value and the rank of the processor with which the value is + !! associated real(wp), dimension(2) :: var_glb !< Reduced (max value, rank) pair + + ! Performing reduction procedure and eventually storing its result into the variable that was initially inputted into the + ! subroutine call MPI_REDUCE(var_loc, var_glb, 1, mpi_2p, MPI_MAXLOC, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(var_glb, 1, mpi_2p, 0, MPI_COMM_WORLD, ierr) @@ -435,6 +557,8 @@ contains end subroutine s_mpi_reduce_maxloc !> The subroutine terminates the MPI execution environment. + !! @param prnt error message to be printed + !! @param code optional exit code impure subroutine s_mpi_abort(prnt, code) character(len=*), intent(in), optional :: prnt @@ -456,6 +580,7 @@ contains stop 1 end if #else + ! Terminating the MPI environment if (present(code)) then call MPI_ABORT(MPI_COMM_WORLD, code, ierr) else @@ -471,6 +596,7 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Calling MPI_BARRIER call MPI_BARRIER(MPI_COMM_WORLD, ierr) #endif @@ -482,6 +608,7 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Finalizing the MPI environment call MPI_FINALIZE(ierr) #endif @@ -489,6 +616,12 @@ contains !> The goal of this procedure is to populate the buffers of the cell-average conservative variables by communicating with the !! neighboring processors. + !! @param q_comm Cell-average conservative variables + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + !! @param nVar Number of variables to communicate + !! @param pb_in Optional internal bubble pressure + !! @param mv_in Optional bubble mass velocity subroutine s_mpi_sendrecv_variables_buffers(q_comm, mpi_dir, pbc_loc, nVar, pb_in, mv_in) type(scalar_field), dimension(1:), intent(inout) :: q_comm @@ -695,7 +828,7 @@ contains #:endif end if #:endfor - call nvtxEndRange ! Packbuf + call nvtxEndRange ! Send/Recv #ifdef MFC_SIMULATION @@ -720,7 +853,7 @@ contains call MPI_SENDRECV(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, & & src_proc, recv_tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + call nvtxEndRange call nvtxStartRange("RHS-COMM-HOST2DEV") $:GPU_UPDATE(device='[buff_recv]') @@ -842,6 +975,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:else + ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = -buff_size, -1 @@ -903,19 +1037,265 @@ contains end subroutine s_mpi_sendrecv_variables_buffers - !> Decompose the computational domain among processors by balancing cells per rank in each coordinate direction. + !> The goal of this procedure is to populate the buffers of the cell-average conservative variables by communicating with the + !! neighboring processors. + !! @param q_cons_vf Cell-average conservative variables + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + subroutine s_mpi_reduce_beta_variables_buffers(q_comm, mpi_dir, pbc_loc, nVar) + + type(scalar_field), dimension(1:), intent(inout) :: q_comm + integer, intent(in) :: mpi_dir, pbc_loc, nVar + integer :: i, j, k, l, r, q !< Generic loop iterators + integer :: lb_size + integer :: buffer_counts(1:3), buffer_count + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + logical :: beg_end_geq_0, qbmm_comm, replace_buff + integer :: pack_offset, unpack_offset + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + call nvtxStartRange("BETA-COMM-PACKBUF") + + ! Set bounds for each dimension Always include the full buffer range for each existing dimension. The Gaussian smearing + ! kernel writes to buffer cells even at physical boundaries, and these contributions must be communicated to neighbors in + ! other directions via ADD operations. + comm_coords(1)%beg = -mapcells - 1 + comm_coords(1)%end = m + mapcells + 1 + comm_coords(2)%beg = merge(-mapcells - 1, 0, n > 0) + comm_coords(2)%end = merge(n + mapcells + 1, n, n > 0) + comm_coords(3)%beg = merge(-mapcells - 1, 0, p > 0) + comm_coords(3)%end = merge(p + mapcells + 1, p, p > 0) + + ! Compute sizes + comm_size(1) = comm_coords(1)%end - comm_coords(1)%beg + 1 + comm_size(2) = comm_coords(2)%end - comm_coords(2)%beg + 1 + comm_size(3) = comm_coords(3)%end - comm_coords(3)%beg + 1 + + ! Buffer counts using the conditional sizes + v_size = nVar + lb_size = 2*(mapcells + 1) ! Size of the buffer region for beta variables (-mapcells - 1, mapcells) + buffer_counts = (/lb_size*v_size*comm_size(2)*comm_size(3), lb_size*v_size*comm_size(1)*comm_size(3), & + & lb_size*v_size*comm_size(1)*comm_size(2)/) + + $:GPU_UPDATE(device='[v_size, comm_coords, comm_size]') + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] -1 (=0) 1 + ! -> [0,0] [1,0] | 0 1 [0,0] [end,beg] +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] + ! [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + 1 + end if + + replace_buff = .false. + if (pbc_loc == 1 .and. beg_end_geq_0) replace_buff = .true. + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = -mapcells - 1, mapcells + do i = 1, v_size + r = (i - 1) + v_size*((j + mapcells + 1) + lb_size*((k - comm_coords(2)%beg) + comm_size(2) & + & *(l - comm_coords(3)%beg))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j + pack_offset, k, l), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, v_size + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = -mapcells - 1, mapcells + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*((j - comm_coords(1)%beg) + comm_size(1)*((k + mapcells + 1) & + & + lb_size*(l - comm_coords(3)%beg))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j, k + pack_offset, l), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, v_size + do l = -mapcells - 1, mapcells + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*((j - comm_coords(1)%beg) + comm_size(1)*((k - comm_coords(2)%beg) & + & + comm_size(2)*(l + mapcells + 1))) + buff_send(r) = real(q_comm(beta_vars(i))%sf(j, k, l + pack_offset), kind=wp) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange + + ! Send/Recv +#ifdef MFC_SIMULATION + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + #:call GPU_HOST_DATA(use_device_addr='[buff_send, buff_recv]') + call nvtxStartRange("BETA-COMM-SENDRECV-RDMA") + + call MPI_SENDRECV(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, & + & src_proc, recv_tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + #:endcall GPU_HOST_DATA + $:GPU_WAIT() + #:else + call nvtxStartRange("BETA-COMM-DEV2HOST") + $:GPU_UPDATE(host='[buff_send]') + call nvtxEndRange + call nvtxStartRange("BETA-COMM-SENDRECV-NO-RDMA") + + call MPI_SENDRECV(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, & + & src_proc, recv_tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange + + call nvtxStartRange("BETA-COMM-HOST2DEV") + $:GPU_UPDATE(device='[buff_recv]') + call nvtxEndRange + #:endif + end if + #:endfor +#else + call MPI_SENDRECV(buff_send, buffer_count, mpi_p, dst_proc, send_tag, buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Unpack Received Buffer + call nvtxStartRange("BETA-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = -mapcells - 1, mapcells + do i = 1, v_size + r = (i - 1) + v_size*((j + mapcells + 1) + lb_size*((k - comm_coords(2)%beg) + comm_size(2) & + & *(l - comm_coords(3)%beg))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j + unpack_offset, k, & + & l) = q_comm(beta_vars(i))%sf(j + unpack_offset, k, l) + real(buff_recv(r), & + & kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do i = 1, v_size + do l = comm_coords(3)%beg, comm_coords(3)%end + do k = -mapcells - 1, mapcells + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*((j - comm_coords(1)%beg) + comm_size(1)*((k + mapcells + 1) & + & + lb_size*(l - comm_coords(3)%beg))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j, k + unpack_offset, l) = q_comm(beta_vars(i))%sf(j, & + & k + unpack_offset, l) + real(buff_recv(r), kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]',copyin='[replace_buff]') + do i = 1, v_size + do l = -mapcells - 1, mapcells + do k = comm_coords(2)%beg, comm_coords(2)%end + do j = comm_coords(1)%beg, comm_coords(1)%end + r = (i - 1) + v_size*((j - comm_coords(1)%beg) + comm_size(1)*((k - comm_coords(2)%beg) & + & + comm_size(2)*(l + mapcells + 1))) + if (replace_buff) then + q_comm(beta_vars(i))%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + else + q_comm(beta_vars(i))%sf(j, k, l + unpack_offset) = q_comm(beta_vars(i))%sf(j, k, & + & l + unpack_offset) + real(buff_recv(r), kind=stp) + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_reduce_beta_variables_buffers + + !> The purpose of this procedure is to optimally decompose the computational domain among the available processors. This is + !! performed by attempting to award each processor, in each of the coordinate directions, approximately the same number of + !! cells, and then recomputing the affected global parameters. subroutine s_mpi_decompose_computational_domain #ifdef MFC_MPI integer :: num_procs_x, num_procs_y, num_procs_z !< Optimal number of processors in the x-, y- and z-directions + !> Non-optimal number of processors in the x-, y- and z-directions real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z real(wp) :: fct_min !< Processor factorization (fct) minimization parameter integer :: MPI_COMM_CART !< Cartesian processor topology communicator - integer :: rem_cells !< Remaining cells after distribution among processors - integer :: recon_order !< WENO or MUSCL reconstruction order - integer :: i, j !< Generic loop iterators - integer :: ierr !< Generic flag used to identify and report MPI errors + + !> Remaining number of cells, in a particular coordinate direction, after the majority is divided up among the available + !! processors + integer :: rem_cells + integer :: recon_order !< WENO or MUSCL reconstruction order + integer :: i, j, k !< Generic loop iterators + integer :: ierr !< Generic flag used to identify and report MPI errors + + ! temp array to store neighbor rank coordinates + integer, dimension(1:num_dims) :: neighbor_coords + + ! Zeroing out communication needs for moving EL bubbles/particles + nidx(1)%beg = 0; nidx(1)%end = 0 + nidx(2)%beg = 0; nidx(2)%end = 0 + nidx(3)%beg = 0; nidx(3)%end = 0 if (recon_type == WENO_TYPE) then recon_order = weno_order @@ -964,9 +1344,11 @@ contains end if end if end do - else + else ! PBC at the end only if (cyl_coord .and. p > 0) then - ! Pencil blocking for cylindrical coordinates (Fourier filter near axis) + ! Implement pencil processor blocking if using cylindrical coordinates so that all cells in azimuthal + ! direction are stored on a single processor. This is necessary for efficient application of Fourier filter + ! near axis. ! Initial values of the processor factorization optimization num_procs_x = 1 @@ -1048,6 +1430,7 @@ contains ! Finding the Cartesian coordinates of the local process call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, proc_coords, ierr) + ! END: 3D Cartesian Processor Topology ! Global Parameters for z-direction @@ -1069,6 +1452,7 @@ contains proc_coords(3) = proc_coords(3) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_z%beg, ierr) proc_coords(3) = proc_coords(3) + 1 + nidx(3)%beg = -1 end if ! Boundary condition at the end @@ -1076,20 +1460,21 @@ contains proc_coords(3) = proc_coords(3) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_z%end, ierr) proc_coords(3) = proc_coords(3) - 1 + nidx(3)%end = 1 end if #ifdef MFC_POST_PROCESS ! Ghost zone at the beginning if (proc_coords(3) > 0 .and. format == 1) then offset_z%beg = 2 - else + else ! PBC at the end only offset_z%beg = 0 end if ! Ghost zone at the end if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then offset_z%end = 2 - else + else ! PBC at the end only offset_z%end = 0 end if #endif @@ -1101,7 +1486,7 @@ contains else start_idx(3) = (p + 1)*proc_coords(3) + rem_cells end if - else + else ! PBC at the end only #ifdef MFC_PRE_PROCESS if (old_grid .neqv. .true.) then dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp) @@ -1119,7 +1504,7 @@ contains end if ! 2D Cartesian Processor Topology - else + else ! PBC at the end ! Initial estimate of optimal processor topology num_procs_x = 1 num_procs_y = num_procs @@ -1160,6 +1545,7 @@ contains ! Finding the Cartesian coordinates of the local process call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, proc_coords, ierr) end if + ! END: 2D Cartesian Processor Topology ! Global Parameters for y-direction @@ -1181,6 +1567,7 @@ contains proc_coords(2) = proc_coords(2) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_y%beg, ierr) proc_coords(2) = proc_coords(2) + 1 + nidx(2)%beg = -1 end if ! Boundary condition at the end @@ -1188,20 +1575,21 @@ contains proc_coords(2) = proc_coords(2) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_y%end, ierr) proc_coords(2) = proc_coords(2) - 1 + nidx(2)%end = 1 end if #ifdef MFC_POST_PROCESS ! Ghost zone at the beginning if (proc_coords(2) > 0 .and. format == 1) then offset_y%beg = 2 - else + else ! PBC at the end offset_y%beg = 0 end if ! Ghost zone at the end if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then offset_y%end = 2 - else + else ! PBC at the end offset_y%end = 0 end if #endif @@ -1210,10 +1598,10 @@ contains if (parallel_io) then if (proc_coords(2) < rem_cells) then start_idx(2) = (n + 1)*proc_coords(2) - else + else ! PBC at the end only start_idx(2) = (n + 1)*proc_coords(2) + rem_cells end if - else + else ! PBC at the end #ifdef MFC_PRE_PROCESS if (old_grid .neqv. .true.) then dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp) @@ -1264,6 +1652,7 @@ contains proc_coords(1) = proc_coords(1) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) proc_coords(1) = proc_coords(1) + 1 + nidx(1)%beg = -1 end if ! Boundary condition at the end @@ -1271,6 +1660,7 @@ contains proc_coords(1) = proc_coords(1) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) proc_coords(1) = proc_coords(1) - 1 + nidx(1)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1293,7 +1683,7 @@ contains if (parallel_io) then if (proc_coords(1) < rem_cells) then start_idx(1) = (m + 1)*proc_coords(1) - else + else ! PBC at the end start_idx(1) = (m + 1)*proc_coords(1) + rem_cells end if else @@ -1304,13 +1694,27 @@ contains if (proc_coords(1) < rem_cells) then x_domain%beg = x_domain%beg + dx*real((m + 1)*proc_coords(1)) x_domain%end = x_domain%end - dx*real((m + 1)*(num_procs_x - proc_coords(1) - 1) - (num_procs_x - rem_cells)) - else + else ! PBC at the end only x_domain%beg = x_domain%beg + dx*real((m + 1)*proc_coords(1) + rem_cells) x_domain%end = x_domain%end - dx*real((m + 1)*(num_procs_x - proc_coords(1) - 1)) end if end if #endif end if + + @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end)) + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + neighbor_coords(1) = proc_coords(1) + i + if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j + if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k + call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, neighbor_ranks(i, j, k), ierr) + end if + end do + end do + end do #endif end subroutine s_mpi_decompose_computational_domain @@ -1318,6 +1722,8 @@ contains !> The goal of this procedure is to populate the buffers of the grid variables by communicating with the neighboring processors. !! Note that only the buffers of the cell-width distributions are handled in such a way. This is because the buffers of !! cell-boundary locations may be calculated directly from those of the cell-width distributions. + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location #ifndef MFC_PRE_PROCESS subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) @@ -1327,64 +1733,90 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! MPI Communication in x-direction if (mpi_dir == 1) then if (pbc_loc == -1) then ! PBC at the beginning if (bc_x%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, mpi_p, bc_x%end, 0, dx(-buff_size), buff_size, mpi_p, & & bc_x%beg, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - else ! PBC at the beginning only + else ! PBC at the end only + ! Send/receive buffer to/from bc_x%beg/bc_x%beg call MPI_SENDRECV(dx(0), buff_size, mpi_p, bc_x%beg, 1, dx(-buff_size), buff_size, mpi_p, bc_x%beg, 0, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if else ! PBC at the end if (bc_x%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_x%beg/bc_x%end call MPI_SENDRECV(dx(0), buff_size, mpi_p, bc_x%beg, 1, dx(m + 1), buff_size, mpi_p, bc_x%end, 1, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) else ! PBC at the end only + ! Send/receive buffer to/from bc_x%end/bc_x%end call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, mpi_p, bc_x%end, 0, dx(m + 1), buff_size, mpi_p, & & bc_x%end, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if end if + ! END: MPI Communication in x-direction + + ! MPI Communication in y-direction else if (mpi_dir == 2) then if (pbc_loc == -1) then ! PBC at the beginning if (bc_y%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_y%end/bc_y%beg call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, mpi_p, bc_y%end, 0, dy(-buff_size), buff_size, mpi_p, & & bc_y%beg, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - else ! PBC at the beginning only + else ! PBC at the end only + ! Send/receive buffer to/from bc_y%beg/bc_y%beg call MPI_SENDRECV(dy(0), buff_size, mpi_p, bc_y%beg, 1, dy(-buff_size), buff_size, mpi_p, bc_y%beg, 0, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if else ! PBC at the end if (bc_y%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_y%beg/bc_y%end call MPI_SENDRECV(dy(0), buff_size, mpi_p, bc_y%beg, 1, dy(n + 1), buff_size, mpi_p, bc_y%end, 1, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) else ! PBC at the end only + ! Send/receive buffer to/from bc_y%end/bc_y%end call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, mpi_p, bc_y%end, 0, dy(n + 1), buff_size, mpi_p, & & bc_y%end, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if end if + ! END: MPI Communication in y-direction + + ! MPI Communication in z-direction else if (pbc_loc == -1) then ! PBC at the beginning if (bc_z%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_z%end/bc_z%beg call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, mpi_p, bc_z%end, 0, dz(-buff_size), buff_size, mpi_p, & & bc_z%beg, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - else ! PBC at the beginning only + else ! PBC at the end only + ! Send/receive buffer to/from bc_z%beg/bc_z%beg call MPI_SENDRECV(dz(0), buff_size, mpi_p, bc_z%beg, 1, dz(-buff_size), buff_size, mpi_p, bc_z%beg, 0, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if else ! PBC at the end if (bc_z%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_z%beg/bc_z%end call MPI_SENDRECV(dz(0), buff_size, mpi_p, bc_z%beg, 1, dz(p + 1), buff_size, mpi_p, bc_z%end, 1, & & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) else ! PBC at the end only + ! Send/receive buffer to/from bc_z%end/bc_z%end call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, mpi_p, bc_z%end, 0, dz(p + 1), buff_size, mpi_p, & & bc_z%end, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) end if end if end if + ! END: MPI Communication in z-direction #endif end subroutine s_mpi_sendrecv_grid_variables_buffers diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 0530c31081..b25fb651d5 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -26,6 +26,7 @@ module m_global_parameters ! Computational Domain Parameters integer :: proc_rank !< Rank of the local processor + !> @name Number of cells in the x-, y- and z-coordinate directions !> @{ integer :: m, m_root @@ -50,6 +51,7 @@ module m_global_parameters integer :: num_dims !< Number of spatial dimensions integer :: num_vels !< Number of velocity components (different from num_dims for mhd) + !> @name Cell-boundary locations in the x-, y- and z-coordinate directions !> @{ real(wp), allocatable, dimension(:) :: x_cb, x_root_cb, y_cb, z_cb @@ -66,10 +68,14 @@ module m_global_parameters real(wp), allocatable, dimension(:) :: dx, dy, dz !> @} - integer :: buff_size !< Number of ghost cells for boundary condition storage - integer :: t_step_start !< First time-step directory - integer :: t_step_stop !< Last time-step directory - integer :: t_step_save !< Interval between consecutive time-step directory + !> Number of cells in buffer region. For the variables which feature a buffer region, this region is used to store information + !! outside the computational domain based on the boundary conditions. + integer :: buff_size + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + integer :: t_step_start !< First time-step directory + integer :: t_step_stop !< Last time-step directory + integer :: t_step_save !< Interval between consecutive time-step directory + !> @name IO options for adaptive time-stepping !> @{ logical :: cfl_adap_dt, cfl_const_dt, cfl_dt @@ -80,7 +86,8 @@ module m_global_parameters integer :: n_start !> @} - ! NOTE: m_root, x_root_cb, x_root_cc = defragmented grid (1D only; equals m, x_cb, x_cc in serial) + ! NOTE: The variables m_root, x_root_cb and x_root_cc contain the grid data of the defragmented computational domain. They are + ! only used in 1D. For serial simulations, they are equal to m, x_cb and x_cc, respectively. !> @name Simulation Algorithm Parameters !> @{ @@ -110,6 +117,7 @@ module m_global_parameters !> @} integer :: avg_state !< Average state evaluation method + !> @name Annotations of the structure, i.e. the organization, of the state vectors !> @{ type(int_bounds_info) :: cont_idx !< Indexes of first & last continuity eqns. @@ -135,7 +143,8 @@ module m_global_parameters ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). Stands for "InDices With BUFFer". type(int_bounds_info) :: idwint(1:3) - ! Cell indices (InDices With BUFFer): includes buffer in simulation only + ! Cell Indices for the entire (local) domain. In simulation, this includes the buffer region. idwbuff and idwint are the same + ! otherwise. Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) integer :: num_bc_patches logical :: bc_io @@ -144,16 +153,23 @@ module m_global_parameters type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions - integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) - logical :: parallel_io !< Format of the data files - logical :: sim_data - logical :: file_per_process !< output format - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM + integer :: shear_num !! Number of shear stress components + integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress + integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions + !> Indices of shear stress components to reflect for boundary conditions. Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, + !! [indices]) + integer, dimension(3, 2) :: shear_BC_flip_indices + logical :: parallel_io !< Format of the data files + logical :: sim_data + logical :: file_per_process !< output format + integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor processor ranks + integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid - integer :: num_ibs !< Number of immersed boundaries + integer :: num_ibs !< Number of immersed boundaries + #ifdef MFC_MPI type(mpi_io_var), public :: MPI_IO_DATA type(mpi_io_ib_var), public :: MPI_IO_IB_DATA @@ -168,10 +184,17 @@ module m_global_parameters integer :: mpi_info_int !> @} + !> Database of the physical parameters of each of the fluids that is present in the flow. These include the stiffened gas + !! equation of state parameters, and the Reynolds numbers. type(physical_parameters), dimension(num_fluids_max) :: fluid_pp !< Stiffened gas EOS parameters and Reynolds numbers per fluid + ! Subgrid Bubble Parameters type(subgrid_bubble_physical_parameters) :: bub_pp - real(wp), allocatable, dimension(:) :: adv !< Advection variables + + ! Subgrid Particle Parameters + type(subgrid_particle_physical_parameters) :: particle_pp + real(wp), allocatable, dimension(:) :: adv !< Advection variables + ! Formatted Database File(s) Structure Parameters integer :: format !< Format of the database file(s) @@ -180,6 +203,7 @@ module m_global_parameters logical :: output_partial_domain !< Specify portion of domain to output for post-processing type(bounds_info) :: x_output, y_output, z_output !< Portion of domain to output for post-processing type(int_bounds_info) :: x_output_idx, y_output_idx, z_output_idx !< Indices of domain to output for post-processing + !> @name Size of the ghost zone layer in the x-, y- and z-coordinate directions. The definition of the ghost zone layers is only !! necessary when using the Silo database file format in multidimensions. These zones provide VisIt with the subdomain !! connectivity information that it requires in order to produce smooth plots. @@ -201,6 +225,8 @@ module m_global_parameters logical :: E_wrt logical, dimension(num_fluids_max) :: alpha_rho_e_wrt logical :: fft_wrt + !> AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional + !! is false logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug logical :: pres_wrt logical, dimension(num_fluids_max) :: alpha_wrt @@ -240,9 +266,19 @@ module m_global_parameters logical :: lag_betaC_wrt !> @} + !> Amplitude coefficients of the numerical Schlieren function that are used to adjust the intensity of numerical Schlieren + !! renderings for individual fluids. This enables waves and interfaces of varying strengths and in all of the fluids to be made + !! simultaneously visible on a single plot. real(wp), dimension(num_fluids_max) :: schlieren_alpha !< Per-fluid Schlieren intensity amplitude coefficients - integer :: fd_order !< Finite-difference order for vorticity and Schlieren derivatives - integer :: fd_number !< Finite-difference half-stencil size: MAX(1, fd_order/2) + + !> The order of the finite-difference (fd) approximations of the first-order derivatives that need to be evaluated when + !! vorticity and/or the numerical Schlieren function are to be outputted to the formatted database file(s). + integer :: fd_order + + !> The finite-difference number is given by MAX(1, fd_order/2). Essentially, it is a measure of the half-size of the + !! finite-difference stencil for the selected order of accuracy. + integer :: fd_number + !> @name Reference parameters for Tait EOS !> @{ real(wp) :: rhoref, pref @@ -271,6 +307,11 @@ module m_global_parameters integer :: nmom !> @} + !> @name Particle modeling variables and parameters + !> @{ + real(wp) :: cp_particle, rho0ref_particle + !> @} + !> @name surface tension coefficient !> @{ real(wp) :: sigma @@ -292,6 +333,7 @@ module m_global_parameters !> @name Lagrangian bubbles !> @{ logical :: bubbles_lagrange + logical :: particles_lagrange !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) @@ -303,7 +345,8 @@ contains !! parameters once they are read from the input file. impure subroutine s_assign_default_values_to_user_inputs - integer :: i !< Generic loop iterator + integer :: i + ! Logistics case_dir = '.' @@ -398,6 +441,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + ! Formatted database file(s) structure parameters format = dflt_int @@ -477,6 +524,7 @@ contains ! Lagrangian bubbles modeling bubbles_lagrange = .false. + particles_lagrange = .false. ! IBM num_ibs = dflt_int @@ -511,11 +559,11 @@ contains ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the gamma/pi_inf model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = cont_idx%beg - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation adv_idx%beg = E_idx + 1 adv_idx%end = adv_idx%beg + 1 gamma_idx = adv_idx%beg @@ -526,17 +574,17 @@ contains else if (model_eqns == 2) then ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the volume fraction model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation if (igr) then ! Volume fractions are stored in the indices immediately following the energy equation. IGR tracks a total of (N-1) ! volume fractions for N fluids, hence the "-1" in adv_idx%end. If num_fluids = 1 then adv_idx%end < adv_idx%beg, ! which skips all loops over the volume fractions since there is no volume fraction to track - adv_idx%beg = E_idx + 1 ! Alpha for fluid 1 + adv_idx%beg = E_idx + 1 adv_idx%end = E_idx + num_fluids - 1 else ! Volume fractions are stored in the indices immediately following the energy equation. WENO/MUSCL + Riemann tracks @@ -606,10 +654,9 @@ contains end if end if - if (bubbles_lagrange) then - beta_idx = sys_size + 1 - sys_size = beta_idx - end if + ! if (bubbles_lagrange) then beta_idx = sys_size + 1 sys_size = beta_idx end if + + ! if (particles_lagrange) then beta_idx = sys_size + 1 sys_size = beta_idx end if if (mhd) then B_idx%beg = sys_size + 1 @@ -625,11 +672,11 @@ contains else if (model_eqns == 3) then ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the volume fraction model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation adv_idx%beg = E_idx + 1 adv_idx%end = E_idx + num_fluids internalEnergies_idx%beg = adv_idx%end + 1 @@ -733,6 +780,16 @@ contains sys_size = c_idx end if + if (bubbles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + + if (particles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + if (cont_damage) then damage_idx = sys_size + 1 sys_size = damage_idx @@ -748,6 +805,14 @@ contains end if end if + if (bubbles_lagrange) then + allocate (beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + else if (particles_lagrange) then + allocate (beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -889,7 +954,7 @@ contains grid_geometry = 1 else if (cyl_coord .and. p == 0) then ! Axisymmetric cylindrical grid grid_geometry = 2 - else ! Fully 3D cylindrical grid + else grid_geometry = 3 end if @@ -968,6 +1033,9 @@ contains if (ib) MPI_IO_IB_DATA%var%sf => null() #endif + if (allocated(neighbor_ranks)) deallocate (neighbor_ranks) + if (allocated(beta_vars)) deallocate (beta_vars) + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index c91f140d39..a4d37f464c 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -31,10 +31,10 @@ contains #ifdef MFC_MPI integer :: i !< Generic loop iterator integer :: ierr !< Generic flag used to identify and report MPI errors + ! Allocating and configuring the receive counts and the displacement vector variables used in variable-gather communication ! procedures. Note that these are only needed for either multidimensional runs that utilize the Silo database file format or ! for 1D simulations. - if ((format == 1 .and. n > 0) .or. n == 0) then allocate (recvcounts(0:num_procs - 1)) allocate (displs(0:num_procs - 1)) @@ -64,8 +64,8 @@ contains #ifdef MFC_MPI integer :: i !< Generic loop iterator integer :: ierr !< Generic flag used to identify and report MPI errors - ! Logistics + ! Logistics call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) #:for VAR in [ 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & @@ -87,11 +87,22 @@ contains & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & - & 'down_sample','fft_wrt', 'hyper_cleaning', 'ib_state_wrt'] + & 'down_sample','fft_wrt', 'hyper_cleaning', 'ib_state_wrt', & + & 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor if (bubbles_lagrange) then + #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & + & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & + & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & + & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & + & 'lag_betaT_wrt', 'lag_betaC_wrt'] + call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + + if (particles_lagrange) then #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & @@ -127,6 +138,13 @@ contains #:endfor end if + ! Subgrid particle parameters + if (particles_lagrange) then + #:for VAR in ['rho0ref_particle','cp_particle'] + call MPI_BCAST(particle_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + #:for VAR in [ 'pref', 'rhoref', 'R0ref', 'poly_sigma', 'Web', 'Ca', & & 'Re_inv', 'Bx0', 'sigma', 't_save', 't_stop', & & 'x_output%beg', 'x_output%end', 'y_output%beg', & @@ -138,7 +156,10 @@ contains end subroutine s_mpi_bcast_user_inputs - !> Gather spatial extents from all ranks for Silo database metadata + !> This subroutine gathers the Silo database metadata for the spatial extents in order to boost the performance of the + !! multidimensional visualization. + ! ! @param spatial_extents Spatial extents for each processor's sub-domain. First dimension corresponds to the minimum and + ! maximum values, respectively, while the second dimension corresponds to the processor rank. impure subroutine s_mpi_gather_spatial_extents(spatial_extents) real(wp), dimension(1:,0:), intent(inout) :: spatial_extents @@ -148,7 +169,6 @@ contains real(wp) :: ext_temp(0:num_procs - 1) ! Simulation is 3D - if (p > 0) then if (grid_geometry == 3) then ! Minimum spatial extent in the r-direction @@ -229,14 +249,14 @@ contains end subroutine s_mpi_gather_spatial_extents - !> Collect the sub-domain cell-boundary or cell-center location data from all processors and put back together the grid of the - !! entire computational domain on the rank 0 processor. This is only done for 1D simulations. + !> This subroutine collects the sub-domain cell-boundary or cell-center locations data from all of the processors and puts back + !! together the grid of the entire computational domain on the rank 0 processor. This is only done for 1D simulations. impure subroutine s_mpi_defragment_1d_grid_variable #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors - ! Silo-HDF5 database format + ! Silo-HDF5 database format if (format == 1) then call MPI_GATHERV(x_cc(0), m + 1, mpi_p, x_root_cc(0), recvcounts, displs, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -250,8 +270,11 @@ contains end subroutine s_mpi_defragment_1d_grid_variable - !> Gather the Silo database metadata for the flow variable's extents to boost performance of the multidimensional visualization. - !! @param q_sf Flow variable on a single computational sub-domain + !> This subroutine gathers the Silo database metadata for the flow variable's extents as to boost performance of the + !! multidimensional visualization. + !! @param q_sf Flow variable defined on a single computational sub-domain + ! ! @param data_extents The flow variable extents on each of the processor's sub-domain. First dimension of array corresponds to + ! the former's minimum and maximum values, respectively, while second dimension corresponds to each processor's rank. impure subroutine s_mpi_gather_data_extents(q_sf, data_extents) real(wp), dimension(:,:,:), intent(in) :: q_sf @@ -283,10 +306,10 @@ contains end subroutine s_mpi_gather_data_extents - !> Gather the sub-domain flow variable data from all processors and reassemble it for the entire computational domain on the - !! rank 0 processor. This is only done for 1D simulations. - !! @param q_sf Flow variable on a single computational sub-domain - !! @param q_root_sf Flow variable on the entire computational domain + !> This subroutine gathers the sub-domain flow variable data from all of the processors and puts it back together for the entire + !! computational domain on the rank 0 processor. This is only done for 1D simulations. + !! @param q_sf Flow variable defined on a single computational sub-domain + !! @param q_root_sf Flow variable defined on the entire computational domain impure subroutine s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf) real(wp), dimension(0:m), intent(in) :: q_sf @@ -294,9 +317,9 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors + ! Gathering the sub-domain flow variable data from all the processes and putting it back together for the entire ! computational domain on the process with rank 0 - call MPI_GATHERV(q_sf(0), m + 1, mpi_p, q_root_sf(0), recvcounts, displs, mpi_p, 0, MPI_COMM_WORLD, ierr) #endif diff --git a/src/post_process/m_start_up.fpp b/src/post_process/m_start_up.fpp index c65e4cf7cf..7efb841780 100644 --- a/src/post_process/m_start_up.fpp +++ b/src/post_process/m_start_up.fpp @@ -8,6 +8,8 @@ module m_start_up + ! Dependencies + use, intrinsic :: iso_c_binding use m_derived_types @@ -55,9 +57,15 @@ contains impure subroutine s_read_input_file character(LEN=name_len) :: file_loc - logical :: file_check - integer :: iostatus - character(len=1000) :: line + + !> Generic logical used for the purpose of asserting whether a file is or is not present in the designated location + logical :: file_check + integer :: iostatus + !! Integer to check iostat of file read + + character(len=1000) :: line + + ! Namelist for all of the parameters to be inputted by the user namelist /user_inputs/ case_dir, m, n, p, t_step_start, t_step_stop, t_step_save, model_eqns, num_fluids, mpp_lim, & & weno_order, bc_x, bc_y, bc_z, fluid_pp, bub_pp, format, precision, output_partial_domain, x_output, y_output, & @@ -70,11 +78,14 @@ contains & bubbles_lagrange, sim_data, hyperelasticity, Bx0, relativity, cont_damage, hyper_cleaning, num_bc_patches, igr, & & igr_order, down_sample, recon_type, muscl_order, lag_header, lag_txt_wrt, lag_db_wrt, lag_id_wrt, lag_pos_wrt, & & lag_pos_prev_wrt, lag_vel_wrt, lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, lag_rmin_wrt, lag_dphidt_wrt, & - & lag_pres_wrt, lag_mv_wrt, lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt, alpha_rho_e_wrt, ib_state_wrt + & lag_pres_wrt, lag_mv_wrt, lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt, alpha_rho_e_wrt, ib_state_wrt, & + & particles_lagrange, particle_pp + ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' inquire (FILE=trim(file_loc), EXIST=file_check) + ! Checking whether the input file is there. If it is, the input file is read. If not, the program is terminated. if (file_check) then open (1, FILE=trim(file_loc), form='formatted', STATUS='old', ACTION='read') read (1, NML=user_inputs, iostat=iostatus) @@ -96,6 +107,7 @@ contains p = int((p + 1)/3) - 1 end if + ! Store m,n,p into global m,n,p m_glb = m n_glb = n p_glb = p @@ -120,12 +132,15 @@ contains character(LEN=len_trim(case_dir)) :: file_loc logical :: dir_check + ! Checking the existence of the case folder + case_dir = adjustl(case_dir) file_loc = trim(case_dir) // '/.' call my_inquire(file_loc, dir_check) + ! Constraint on the location of the case directory if (dir_check .neqv. .true.) then call s_mpi_abort('Unsupported choice for the value of ' // 'case_dir. Exiting.') end if @@ -135,7 +150,7 @@ contains end subroutine s_check_input_file - !> Load grid and conservative data for a time step, fill ghost-cell buffers, and convert to primitive variables. + !> @brief Load grid and conservative data for a time step, fill ghost-cell buffers, and convert to primitive variables. impure subroutine s_perform_time_step(t_step) integer, intent(inout) :: t_step @@ -152,20 +167,24 @@ contains end if end if + ! Populating the grid and conservative variables call s_read_data_files(t_step) + ! Populating the buffer regions of the grid and conservative variables if (buff_size > 0) then call s_populate_grid_variables_buffers() call s_populate_variables_buffers(bc_type, q_cons_vf) end if + ! Initialize the Temperature cache. if (chemistry) call s_compute_q_T_sf(q_T_sf, q_cons_vf, idwbuff) + ! Converting the conservative variables to the primitive ones call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwbuff) end subroutine s_perform_time_step - !> Derive requested flow quantities from primitive variables and write them to the formatted database files. + !> @brief Derive requested flow quantities from primitive variables and write them to the formatted database files. impure subroutine s_save_data(t_step, varname, pres, c, H) integer, intent(inout) :: t_step @@ -200,6 +219,7 @@ contains z_end = offset_z%end + p end if + ! Opening a new formatted database file call s_open_formatted_database_file(t_step) if (sim_data .and. proc_rank == 0) then @@ -212,20 +232,25 @@ contains call s_write_energy_data_file(q_prim_vf, q_cons_vf) end if + ! Adding the grid to the formatted database file call s_write_grid_to_formatted_database_file(t_step) + ! Computing centered finite-difference coefficients in x-direction if (omega_wrt(2) .or. omega_wrt(3) .or. qm_wrt .or. liutex_wrt .or. schlieren_wrt) then call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, fd_number, fd_order, offset_x) end if + ! Computing centered finite-difference coefficients in y-direction if (omega_wrt(1) .or. omega_wrt(3) .or. qm_wrt .or. liutex_wrt .or. (n > 0 .and. schlieren_wrt)) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, fd_number, fd_order, offset_y) end if + ! Computing centered finite-difference coefficients in z-direction if (omega_wrt(1) .or. omega_wrt(2) .or. qm_wrt .or. liutex_wrt .or. (p > 0 .and. schlieren_wrt)) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, fd_number, fd_order, offset_z) end if + ! Adding the partial densities to the formatted database file if ((model_eqns == 2) .or. (model_eqns == 3) .or. (model_eqns == 4)) then do i = 1, num_fluids if (alpha_rho_wrt(i) .or. (cons_vars_wrt .or. prim_vars_wrt)) then @@ -242,6 +267,7 @@ contains end do end if + ! Adding the density to the formatted database file if ((rho_wrt .or. (model_eqns == 1 .and. (cons_vars_wrt .or. prim_vars_wrt))) .and. (.not. relativity)) then q_sf(:,:,:) = rho_sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A)') 'rho' @@ -267,6 +293,7 @@ contains varname(:) = ' ' end if + ! Adding the momentum to the formatted database file do i = 1, E_idx - mom_idx%beg if (mom_wrt(i) .or. cons_vars_wrt) then q_sf(:,:,:) = q_cons_vf(i + cont_idx%end)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) @@ -277,6 +304,7 @@ contains end if end do + ! Adding the velocity to the formatted database file do i = 1, E_idx - mom_idx%beg if (vel_wrt(i) .or. prim_vars_wrt) then q_sf(:,:,:) = q_prim_vf(i + cont_idx%end)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) @@ -287,6 +315,7 @@ contains end if end do + ! Adding the species' concentrations to the formatted database file if (chemistry) then do i = 1, num_species if (chem_wrt_Y(i) .or. prim_vars_wrt) then @@ -307,6 +336,7 @@ contains end if end if + ! Adding the flux limiter function to the formatted database file do i = 1, E_idx - mom_idx%beg if (flux_wrt(i)) then call s_derive_flux_limiter(i, q_prim_vf, q_sf) @@ -318,6 +348,7 @@ contains end if end do + ! Adding the energy to the formatted database file if (E_wrt .or. cons_vars_wrt) then q_sf(:,:,:) = q_cons_vf(E_idx)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A)') 'E' @@ -326,6 +357,7 @@ contains varname(:) = ' ' end if + ! Adding the individual energies to the formatted database file if (model_eqns == 3) then do i = 1, num_fluids if (alpha_rho_e_wrt(i) .or. cons_vars_wrt) then @@ -338,6 +370,7 @@ contains end do end if + ! Adding Energy cascade FFT if (fft_wrt) then do l = 0, p do k = 0, n @@ -443,6 +476,7 @@ contains end do end if + ! Adding the magnetic field to the formatted database file if (mhd .and. prim_vars_wrt) then do i = B_idx%beg, B_idx%end q_sf(:,:,:) = q_prim_vf(i)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) @@ -470,6 +504,7 @@ contains end do end if + ! Adding the elastic shear stresses to the formatted database file if (elasticity) then do i = 1, stress_idx%end - stress_idx%beg + 1 if (prim_vars_wrt) then @@ -508,6 +543,7 @@ contains varname(:) = ' ' end if + ! Adding the pressure to the formatted database file if (pres_wrt .or. prim_vars_wrt) then q_sf(:,:,:) = q_prim_vf(E_idx)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A)') 'pres' @@ -516,6 +552,7 @@ contains varname(:) = ' ' end if + ! Adding the volume fraction(s) to the formatted database file if (((model_eqns == 2) .and. (bubbles_euler .neqv. .true.)) .or. (model_eqns == 3)) then do i = 1, num_fluids - 1 if (alpha_wrt(i) .or. (cons_vars_wrt .or. prim_vars_wrt)) then @@ -549,6 +586,7 @@ contains end if end if + ! Adding specific heat ratio function to formatted database file if (gamma_wrt .or. (model_eqns == 1 .and. (cons_vars_wrt .or. prim_vars_wrt))) then q_sf(:,:,:) = gamma_sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A)') 'gamma' @@ -557,6 +595,7 @@ contains varname(:) = ' ' end if + ! Adding the specific heat ratio to the formatted database file if (heat_ratio_wrt) then call s_derive_specific_heat_ratio(q_sf) @@ -566,6 +605,7 @@ contains varname(:) = ' ' end if + ! Adding liquid stiffness function to formatted database file if (pi_inf_wrt .or. (model_eqns == 1 .and. (cons_vars_wrt .or. prim_vars_wrt))) then q_sf(:,:,:) = pi_inf_sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A)') 'pi_inf' @@ -574,6 +614,7 @@ contains varname(:) = ' ' end if + ! Adding the liquid stiffness to the formatted database file if (pres_inf_wrt) then call s_derive_liquid_stiffness(q_sf) @@ -583,6 +624,7 @@ contains varname(:) = ' ' end if + ! Adding the sound speed to the formatted database file if (c_wrt) then do k = -offset_z%beg, p + offset_z%end do j = -offset_y%beg, n + offset_y%end @@ -609,6 +651,7 @@ contains varname(:) = ' ' end if + ! Adding the vorticity to the formatted database file do i = 1, 3 if (omega_wrt(i)) then call s_derive_vorticity_component(i, q_prim_vf, q_sf) @@ -627,6 +670,7 @@ contains call s_write_variable_to_formatted_database_file(varname, t_step) end if + ! Adding Q_M to the formatted database file if (p > 0 .and. qm_wrt) then call s_derive_qm(q_prim_vf, q_sf) @@ -636,9 +680,12 @@ contains varname(:) = ' ' end if + ! Adding Liutex magnitude to the formatted database file if (liutex_wrt) then + ! Compute Liutex vector and its magnitude call s_derive_liutex(q_prim_vf, liutex_mag, liutex_axis) + ! Liutex magnitude q_sf = liutex_mag write (varname, '(A)') 'liutex_mag' @@ -646,6 +693,7 @@ contains varname(:) = ' ' + ! Liutex axis do i = 1, 3 q_sf = liutex_axis(:,:,:,i) @@ -656,6 +704,7 @@ contains end do end if + ! Adding numerical Schlieren function to formatted database file if (schlieren_wrt) then call s_derive_numerical_schlieren_function(q_cons_vf, q_sf) @@ -665,6 +714,7 @@ contains varname(:) = ' ' end if + ! Adding the color function to formatted database file if (cf_wrt) then q_sf(:,:,:) = q_cons_vf(c_idx)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) write (varname, '(A,I0)') 'color_function' @@ -672,6 +722,7 @@ contains varname(:) = ' ' end if + ! Adding the volume fraction(s) to the formatted database file if (bubbles_euler) then do i = adv_idx%beg, adv_idx%end q_sf(:,:,:) = q_cons_vf(i)%sf(x_beg:x_end,y_beg:y_end,z_beg:z_end) @@ -681,6 +732,7 @@ contains end do end if + ! Adding the bubble variables to the formatted database file if (bubbles_euler) then ! nR do i = 1, nb @@ -724,8 +776,9 @@ contains end if end if - if (bubbles_lagrange) then - ! Void fraction field + ! Adding the lagrangian subgrid variables to the formatted database file + if (bubbles_lagrange .or. particles_lagrange) then + !! Void fraction field q_sf(:,:,:) = 1._wp - q_cons_vf(beta_idx)%sf(-offset_x%beg:m + offset_x%end,-offset_y%beg:n + offset_y%end, & & -offset_z%beg:p + offset_z%end) write (varname, '(A)') 'voidFraction' @@ -741,11 +794,12 @@ contains call s_close_energy_data_file() end if + ! Closing the formatted database file call s_close_formatted_database_file() end subroutine s_save_data - !> Transpose 3-D complex data from x-pencil to y-pencil layout via MPI_Alltoall. + !> @brief Transpose 3-D complex data from x-pencil to y-pencil layout via MPI_Alltoall. subroutine s_mpi_transpose_x2y complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) @@ -787,7 +841,7 @@ contains end subroutine s_mpi_transpose_x2y - !> Transpose 3-D complex data from y-pencil to z-pencil layout via MPI_Alltoall. + !> @brief Transpose 3-D complex data from y-pencil to z-pencil layout via MPI_Alltoall. subroutine s_mpi_transpose_y2z complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) @@ -830,15 +884,19 @@ contains end subroutine s_mpi_transpose_y2z - !> Initialize all post-process sub-modules, set up I/O pointers, and prepare FFTW plans and MPI communicators. + !> @brief Initialize all post-process sub-modules, set up I/O pointers, and prepare FFTW plans and MPI communicators. impure subroutine s_initialize_modules + ! Computation of parameters, allocation procedures, and/or any other tasks needed to properly setup the modules integer :: size_n(1), inembed(1), onembed(1) call s_initialize_global_parameters_module() if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + if (particles_lagrange) then + call s_initialize_particles_model() + end if if (num_procs > 1) then call s_initialize_mpi_proxy_module() call s_initialize_mpi_common_module() @@ -849,6 +907,7 @@ contains call s_initialize_derived_variables_module() call s_initialize_data_output_module() + ! Associate pointers for serial or parallel I/O if (parallel_io .neqv. .true.) then s_read_data_files => s_read_serial_data_files else @@ -919,7 +978,7 @@ contains end subroutine s_initialize_modules - !> Perform a distributed forward 3-D FFT using pencil decomposition with FFTW and MPI transposes. + !> @brief Perform a distributed forward 3-D FFT using pencil decomposition with FFTW and MPI transposes. subroutine s_mpi_FFT_fwd integer :: j, k, l @@ -986,13 +1045,17 @@ contains end subroutine s_mpi_FFT_fwd - !> Set up the MPI environment, read and broadcast user inputs, and decompose the computational domain. + !> @brief Set up the MPI environment, read and broadcast user inputs, and decompose the computational domain. impure subroutine s_initialize_mpi_domain num_dims = 1 + min(1, n) + min(1, p) + ! Initialization of the MPI environment call s_mpi_initialize() + ! Processor with rank 0 assigns default user input values prior to reading those in from the input file. Next, the user + ! inputs are read in and their consistency is checked. The detection of any inconsistencies automatically leads to the + ! termination of the post-process. if (proc_rank == 0) then call s_assign_default_values_to_user_inputs() call s_read_input_file() @@ -1001,6 +1064,8 @@ contains print '(" Post-processing a ", I0, "x", I0, "x", I0, " case on ", I0, " rank(s)")', m, n, p, num_procs end if + ! Broadcasting the user inputs to all of the processors and performing the parallel computational domain decomposition. + ! Neither procedure has to be carried out if the post-process is in fact not truly executed in parallel. call s_mpi_bcast_user_inputs() call s_initialize_parallel_io() call s_mpi_decompose_computational_domain() @@ -1008,11 +1073,14 @@ contains end subroutine s_initialize_mpi_domain - !> Destroy FFTW plans, free MPI communicators, and finalize all post-process sub-modules. + !> @brief Destroy FFTW plans, free MPI communicators, and finalize all post-process sub-modules. impure subroutine s_finalize_modules + ! Disassociate pointers for serial and parallel I/O s_read_data_files => null() + ! if (sim_data .and. proc_rank == 0) then call s_close_intf_data_file() call s_close_energy_data_file() end if + if (fft_wrt) then if (c_associated(fwd_plan_x)) call fftw_destroy_plan(fwd_plan_x) if (c_associated(fwd_plan_y)) call fftw_destroy_plan(fwd_plan_y) @@ -1035,6 +1103,7 @@ contains end if #endif + ! Deallocation procedures for the modules call s_finalize_data_output_module() call s_finalize_derived_variables_module() call s_finalize_data_input_module() @@ -1045,6 +1114,7 @@ contains end if call s_finalize_global_parameters_module() + ! Finalizing the MPI environment call s_mpi_finalize() end subroutine s_finalize_modules diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 077262aece..ddfd3e7724 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -41,14 +41,19 @@ module m_global_parameters integer :: num_vels !< Number of velocity components (different from num_dims for mhd) logical :: cyl_coord integer :: grid_geometry !< Cylindrical coordinates (either axisymmetric or full 3D) + !> Locations of cell-centers (cc) in x-, y- and z-directions, respectively real(wp), allocatable, dimension(:) :: x_cc, y_cc, z_cc + !> Locations of cell-boundaries (cb) in x-, y- and z-directions, respectively real(wp), allocatable, dimension(:) :: x_cb, y_cb, z_cb real(wp) :: dx, dy, dz !< Minimum cell-widths in the x-, y- and z-coordinate directions type(bounds_info) :: x_domain, y_domain, z_domain !< Locations of the domain bounds in the x-, y- and z-coordinate directions logical :: stretch_x, stretch_y, stretch_z !< Grid stretching flags for the x-, y- and z-coordinate directions - ! Grid stretching: a_x/a_y/a_z = rate, x_a/y_a/z_a = location + + ! Parameters of the grid stretching function for the x-, y- and z-coordinate directions. The "a" parameters are a measure of the + ! rate at which the grid is stretched while the remaining parameters are indicative of the location on the grid at which the + ! stretching begins. real(wp) :: a_x, a_y, a_z integer :: loops_x, loops_y, loops_z real(wp) :: x_a, y_a, z_a @@ -81,6 +86,7 @@ module m_global_parameters logical :: igr !< Use information geometric regularization integer :: igr_order !< IGR reconstruction order logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling + ! Annotations of the structure, i.e. the organization, of the state vectors type(int_bounds_info) :: cont_idx !< Indexes of first & last continuity eqns. type(int_bounds_info) :: mom_idx !< Indexes of first & last momentum eqns. @@ -99,42 +105,68 @@ module m_global_parameters type(int_bounds_info) :: species_idx !< Indexes of first & last concentration eqns. integer :: damage_idx !< Index of damage state variable (D) for continuum damage model integer :: psi_idx !< Index of hyperbolic cleaning state variable for MHD + ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). Stands for "InDices With BUFFer". type(int_bounds_info) :: idwint(1:3) - ! Cell indices (InDices With BUFFer): includes buffer except in pre_process - type(int_bounds_info) :: idwbuff(1:3) - type(int_bounds_info) :: bc_x, bc_y, bc_z !< Boundary conditions in the x-, y- and z-coordinate directions - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions - integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) - logical :: parallel_io !< Format of the data files - logical :: file_per_process !< type of data output - integer :: precision !< Precision of output files - logical :: down_sample !< Down-sample the output data - logical :: mixlayer_vel_profile !< Set hyperbolic tangent streamwise velocity profile - real(wp) :: mixlayer_vel_coef !< Coefficient for the hyperbolic tangent streamwise velocity profile - logical :: mixlayer_perturb !< Superimpose instability waves to surrounding fluid flow - integer :: mixlayer_perturb_nk !< Number of Fourier modes for perturbation with mixlayer_perturb flag - real(wp) :: mixlayer_perturb_k0 !< Peak wavenumber for mixlayer perturbation (default: most unstable mode) + ! Cell Indices for the entire (local) domain. In simulation and post_process, this includes the buffer region. idwbuff and + ! idwint are the same otherwise. Stands for "InDices With BUFFer". + type(int_bounds_info) :: idwbuff(1:3) + + !> The order of the finite-difference (fd) approximations of the first-order derivatives that need to be evaluated when the CoM + !! or flow probe data files are to be written at each time step + integer :: fd_order + + !> The finite-difference number is given by MAX(1, fd_order/2). Essentially, it is a measure of the half-size of the + !! finite-difference stencil for the selected order of accuracy. + integer :: fd_number + + !> @name lagrangian subgrid bubble parameters + !> @{! + type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters + !> @} + + type(int_bounds_info) :: bc_x, bc_y, bc_z !< Boundary conditions in the x-, y- and z-coordinate directions + integer :: shear_num !! Number of shear stress components + integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress + integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions + !> Indices of shear stress components to reflect for boundary conditions. Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, + !! [indices]) + integer, dimension(3, 2) :: shear_BC_flip_indices + logical :: parallel_io !< Format of the data files + logical :: file_per_process !< type of data output + integer :: precision !< Precision of output files + logical :: down_sample !< Down-sample the output data + logical :: mixlayer_vel_profile !< Set hyperbolic tangent streamwise velocity profile + real(wp) :: mixlayer_vel_coef !< Coefficient for the hyperbolic tangent streamwise velocity profile + logical :: mixlayer_perturb !< Superimpose instability waves to surrounding fluid flow + integer :: mixlayer_perturb_nk !< Number of Fourier modes for perturbation with mixlayer_perturb flag + !> Peak wavenumber of prescribed energy spectra with mixlayer_perturb flag Default value (k0 = 0.4446) is most unstable mode + !! obtained from linear stability analysis See Michalke (1964, JFM) for details + real(wp) :: mixlayer_perturb_k0 !< Peak wavenumber for mixlayer perturbation (default: most unstable mode) logical :: simplex_perturb type(simplex_noise_params) :: simplex_params - real(wp) :: pi_fac !< Factor for artificial pi_inf + real(wp) :: pi_fac !< Factor for artificial pi_inf logical :: viscous logical :: bubbles_lagrange + logical :: particles_lagrange ! Perturb density of surrounding air so as to break symmetry of grid - logical :: perturb_flow - integer :: perturb_flow_fluid !< Fluid to be perturbed with perturb_flow flag - real(wp) :: perturb_flow_mag !< Magnitude of perturbation with perturb_flow flag - logical :: perturb_sph - integer :: perturb_sph_fluid !< Fluid to be perturbed with perturb_sph flag - real(wp), dimension(num_fluids_max) :: fluid_rho - logical :: elliptic_smoothing - integer :: elliptic_smoothing_iters - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + logical :: perturb_flow + integer :: perturb_flow_fluid !< Fluid to be perturbed with perturb_flow flag + real(wp) :: perturb_flow_mag !< Magnitude of perturbation with perturb_flow flag + logical :: perturb_sph + integer :: perturb_sph_fluid !< Fluid to be perturbed with perturb_sph flag + real(wp), dimension(num_fluids_max) :: fluid_rho + logical :: elliptic_smoothing + integer :: elliptic_smoothing_iters + integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor ranks + + integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + #ifdef MFC_MPI type(mpi_io_var), public :: MPI_IO_DATA character(LEN=name_len) :: mpiiofs @@ -142,18 +174,28 @@ module m_global_parameters #endif ! Initial Condition Parameters - integer :: num_patches !< Number of patches composing initial condition - type(ic_patch_parameters), dimension(num_patches_max) :: patch_icpp !< IC patch parameters (max: num_patches_max) - integer :: num_bc_patches !< Number of boundary condition patches - logical :: bc_io !< whether or not to save BC data - type(bc_patch_parameters), dimension(num_bc_patches_max) :: patch_bc !< Boundary condition patch parameters + integer :: num_patches !< Number of patches composing initial condition + + !> Database of the initial condition patch parameters (icpp) for each of the patches employed in the configuration of the + !! initial condition. Note that the maximum allowable number of patches, num_patches_max, may be changed in the module + !! m_derived_types.f90. + type(ic_patch_parameters), dimension(num_patches_max) :: patch_icpp !< IC patch parameters (max: num_patches_max) + integer :: num_bc_patches !< Number of boundary condition patches + logical :: bc_io !< whether or not to save BC data + !> Boundary condition patch parameters Database of the boundary condition patch parameters for each of the patches employed in + !! the configuration of the boundary conditions + type(bc_patch_parameters), dimension(num_bc_patches_max) :: patch_bc ! Fluids Physical Parameters + !> Database of the physical parameters of each of the fluids that is present in the flow. These include the stiffened gas + !! equation of state parameters, and the Reynolds numbers. type(physical_parameters), dimension(num_fluids_max) :: fluid_pp !< Stiffened gas EOS parameters and Reynolds numbers per fluid + ! Subgrid Bubble Parameters - type(subgrid_bubble_physical_parameters) :: bub_pp - real(wp) :: rhoref, pref !< Reference parameters for Tait EOS - type(chemistry_parameters) :: chem_params + type(subgrid_bubble_physical_parameters) :: bub_pp + type(subgrid_particle_physical_parameters) :: particle_pp + real(wp) :: rhoref, pref !< Reference parameters for Tait EOS + type(chemistry_parameters) :: chem_params !> @name Bubble modeling !> @{ integer :: nb @@ -173,6 +215,9 @@ module m_global_parameters integer :: Np type(ib_patch_parameters), dimension(num_patches_max) :: patch_ib !< Immersed boundary patch parameters type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l + !! Database of the immersed boundary patch parameters for each of the patches employed in the configuration of the initial + !! condition. Note that the maximum allowable number of patches, num_patches_max, may be changed in the module + !! m_derived_types.f90. !> @} !> @name Non-polytropic bubble gas compression @@ -187,6 +232,9 @@ module m_global_parameters real(wp), dimension(:), allocatable :: pb0, mass_g0, mass_v0, Pe_T, k_v, k_g real(wp), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN real(wp) :: R0ref, p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g + + ! Solid particle physical parameters + real(wp) :: cp_particle, rho0ref_particle !> @} !> @name Surface Tension Modeling @@ -210,10 +258,20 @@ module m_global_parameters integer, allocatable, dimension(:,:,:) :: logic_grid type(pres_field) :: pb type(pres_field) :: mv - real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) - integer :: buff_size !< Number of ghost cells for boundary condition storage - logical :: fft_wrt - logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug + real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) + + !> The number of cells that are necessary to be able to store enough boundary conditions data to march the solution in the + !! physical computational domain to the next time-step. + integer :: buff_size + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + logical :: fft_wrt + !> AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional + !! is false + logical :: dummy + + ! Variables for hardcoded initial conditions that are read from input files + character(LEN=2*path_len) :: interface_file + real(wp) :: normFac, normMag, g0_ic, p0_ic contains @@ -221,7 +279,8 @@ contains !! parameters once they are read from the input file. impure subroutine s_assign_default_values_to_user_inputs - integer :: i !< Generic loop operator + integer :: i + ! Logistics case_dir = '.' @@ -322,6 +381,8 @@ contains elliptic_smoothing_iters = dflt_int elliptic_smoothing = .false. + particles_lagrange = .false. + fft_wrt = .false. dummy = .false. @@ -338,6 +399,27 @@ contains ! Initial condition parameters num_patches = dflt_int + fd_order = dflt_int + lag_params%cluster_type = dflt_int + lag_params%pressure_corrector = .false. + lag_params%smooth_type = dflt_int + lag_params%heatTransfer_model = .false. + lag_params%massTransfer_model = .false. + lag_params%write_bubbles = .false. + lag_params%write_bubbles_stats = .false. + lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int + lag_params%epsilonb = 1._wp + lag_params%charwidth = dflt_real + lag_params%nParticles_glb = dflt_int + lag_params%qs_drag_model = dflt_int + lag_params%stokes_drag = dflt_int + lag_params%added_mass_model = dflt_int + lag_params%interpolation_order = dflt_int + lag_params%charNz = dflt_int + lag_params%valmaxvoid = dflt_real + do i = 1, num_patches_max patch_icpp(i)%geometry = dflt_int patch_icpp(i)%model_scale(:) = 1._wp @@ -540,6 +622,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or any other tasks needed to properly setup the module @@ -563,11 +649,11 @@ contains ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the gamma/pi_inf model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = cont_idx%beg - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each direction mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation adv_idx%beg = E_idx + 1 adv_idx%end = adv_idx%beg + 1 gamma_idx = adv_idx%beg @@ -578,11 +664,11 @@ contains else if (model_eqns == 2) then ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the volume fraction model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each direction mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation if (igr) then ! Volume fractions are stored in the indices immediately following the energy equation. IGR tracks a total of (N-1) @@ -678,11 +764,11 @@ contains else if (model_eqns == 3) then ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the volume fraction model - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each direction mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation adv_idx%beg = E_idx + 1 adv_idx%end = E_idx + num_fluids internalEnergies_idx%beg = adv_idx%end + 1 @@ -797,6 +883,14 @@ contains end if end if + if (bubbles_lagrange) then + allocate (beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + else if (particles_lagrange) then + allocate (beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -820,8 +914,10 @@ contains chemxb = species_idx%beg chemxe = species_idx%end + if (bubbles_lagrange .or. particles_lagrange) fd_number = max(1, fd_order/2) + call s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, igr_order, buff_size, idwint, idwbuff, viscous, & - & bubbles_lagrange, m, n, p, num_dims, igr, ib) + & bubbles_lagrange, particles_lagrange, m, n, p, num_dims, igr, ib, fd_number) #ifdef MFC_MPI if (qbmm .and. .not. polytropic) then @@ -860,7 +956,7 @@ contains grid_geometry = 1 else if (cyl_coord .and. p == 0) then ! Axisymmetric cylindrical grid grid_geometry = 2 - else ! Fully 3D cylindrical grid + else grid_geometry = 3 end if @@ -870,7 +966,7 @@ contains end subroutine s_initialize_global_parameters_module - !> Configure MPI parallel I/O settings and allocate processor coordinate arrays. + !> @brief Configures MPI parallel I/O settings and allocates processor coordinate arrays. impure subroutine s_initialize_parallel_io #ifdef MFC_MPI @@ -904,7 +1000,7 @@ contains end subroutine s_initialize_parallel_io - !> Deallocate all global grid, index, and equation-of-state parameter arrays. + !> @brief Deallocates all global grid, index, and equation-of-state parameter arrays. impure subroutine s_finalize_global_parameters_module integer :: i @@ -934,6 +1030,9 @@ contains end if #endif + if (allocated(neighbor_ranks)) deallocate (neighbor_ranks) + if (allocated(beta_vars)) deallocate (beta_vars) + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index e9545ce865..55546aaa9b 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -22,9 +22,12 @@ contains impure subroutine s_mpi_bcast_user_inputs #ifdef MFC_MPI + ! Generic loop iterator integer :: i, j + ! Generic flag used to identify and report MPI errors integer :: ierr + ! Logistics call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) #:for VAR in ['t_step_old', 't_step_start', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & @@ -33,7 +36,7 @@ contains & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',& & 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', & & 'num_bc_patches', 'mixlayer_perturb_nk', 'recon_type', & - & 'muscl_order', 'igr_order' ] + & 'muscl_order', 'igr_order', 'fd_order'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -45,7 +48,8 @@ contains & 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'hyperelasticity', 'pre_stress', 'elliptic_smoothing', 'viscous',& & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage', & - & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning' ] + & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning',& + & 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor call MPI_BCAST(fluid_rho(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -133,6 +137,22 @@ contains #:endfor end do + ! Variables from input files for hardcoded patches + call MPI_BCAST(interface_file, len(interface_file), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(normFac, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(normMag, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(g0_ic, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(p0_ic, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + + ! Subgrid bubble parameters + if (bubbles_euler .or. bubbles_lagrange) then + #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & + 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g', & + 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] + call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + ! Simplex noise and fluid physical parameters do i = 1, num_fluids_max #:for VAR in [ 'gamma','pi_inf', 'G', 'cv', 'qv', 'qvp' ] @@ -148,15 +168,6 @@ contains end do end do - ! Subgrid bubble parameters - if (bubbles_euler .or. bubbles_lagrange) then - #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & - 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g', & - 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] - call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - do i = 1, 3 call MPI_BCAST(simplex_params%perturb_vel(i), 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(simplex_params%perturb_vel_freq(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index a3a7c51270..73ca46a7bc 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -43,12 +43,13 @@ module m_start_up abstract interface - !> Abstract interface for reading grid data files in serial or parallel. + !> @brief Abstract interface for reading grid data files in serial or parallel. impure subroutine s_read_abstract_grid_data_files end subroutine s_read_abstract_grid_data_files - !> Abstract interface for reading initial condition data files in serial or parallel. + !> @brief Abstract interface for reading initial condition data files in serial or parallel. + !! @param q_cons_vf Conservative variables impure subroutine s_read_abstract_ic_data_files(q_cons_vf_in) import :: scalar_field, integer_field, sys_size, pres_field @@ -59,7 +60,11 @@ module m_start_up end interface character(LEN=path_len + name_len) :: proc_rank_dir !< Location of the folder associated with the rank of the local processor + + !> Possible location of time-step folder containing preexisting grid and/or conservative variables data to be used as starting + !! point for pre-process character(LEN=path_len + 2*name_len), private :: t_step_dir !< Path to preexisting time-step folder for restart + procedure(s_read_abstract_grid_data_files), pointer :: s_read_grid_data_files => null() procedure(s_read_abstract_ic_data_files), pointer :: s_read_ic_data_files => null() @@ -70,9 +75,15 @@ contains impure subroutine s_read_input_file character(LEN=name_len) :: file_loc - logical :: file_check - integer :: iostatus - character(len=1000) :: line + + !> Generic logical used for the purpose of asserting whether a file is or is not present in the designated location + logical :: file_check + integer :: iostatus + !! Integer to check iostat of file read + + character(len=1000) :: line + + ! Namelist for all of the parameters to be inputted by the user namelist /user_inputs/ case_dir, old_grid, old_ic, t_step_old, t_step_start, m, n, p, x_domain, y_domain, z_domain, & & stretch_x, stretch_y, stretch_z, a_x, a_y, a_z, x_a, y_a, z_a, x_b, y_b, z_b, model_eqns, num_fluids, mpp_lim, & @@ -84,11 +95,14 @@ contains & ptgalpha_eps, ib, num_ibs, patch_ib, sigma, adv_n, cfl_adap_dt, cfl_const_dt, n_start, n_start_old, & & surface_tension, hyperelasticity, pre_stress, elliptic_smoothing, elliptic_smoothing_iters, viscous, & & bubbles_lagrange, num_bc_patches, patch_bc, Bx0, relativity, cont_damage, igr, igr_order, down_sample, recon_type, & - & muscl_order, hyper_cleaning, simplex_perturb, simplex_params, fft_wrt + & muscl_order, fft_wrt, fd_order, lag_params, simplex_perturb, simplex_params, interface_file, normFac, normMag, & + & g0_ic, p0_ic, hyper_cleaning, particles_lagrange, particle_pp + ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' inquire (FILE=trim(file_loc), EXIST=file_check) + ! Checking whether the input file is there. If it is, the input file is read. If not, the program is terminated. if (file_check) then open (1, FILE=trim(file_loc), form='formatted', STATUS='old', ACTION='read') read (1, NML=user_inputs, iostat=iostatus) @@ -102,6 +116,7 @@ contains call s_update_cell_bounds(cells_bounds, m, n, p) + ! Store m,n,p into global m,n,p m_glb = m n_glb = n p_glb = p @@ -126,6 +141,8 @@ contains character(LEN=len_trim(case_dir)) :: file_loc logical :: dir_check + ! Checking the existence of the case folder + case_dir = adjustl(case_dir) file_loc = trim(case_dir) // '/.' @@ -141,6 +158,7 @@ contains call s_check_inputs_common() call s_check_inputs() + ! Check all the patch properties call s_check_patches() if (ib) call s_check_ib_patches() @@ -151,9 +169,16 @@ contains !! necessary global computational domain parameters. impure subroutine s_read_serial_grid_data_files + ! Generic string used to store the address of a particular file character(LEN=len_trim(case_dir) + 3*name_len) :: file_loc - logical :: dir_check - logical :: file_check + + ! Logical variable used to test the existence of folders + logical :: dir_check + + ! Generic logical used for the purpose of asserting whether a file is or is not present in the designated location + logical :: file_check + + ! Setting address of the local processor rank and time-step directory write (proc_rank_dir, '(A,I0)') '/p_all/p', proc_rank proc_rank_dir = trim(case_dir) // trim(proc_rank_dir) @@ -161,16 +186,22 @@ contains write (t_step_dir, '(A,I0)') '/', t_step_start t_step_dir = trim(proc_rank_dir) // trim(t_step_dir) + ! Inquiring as to the existence of the time-step directory file_loc = trim(t_step_dir) // '/.' call my_inquire(file_loc, dir_check) + ! If the time-step directory is missing, the pre-process exits if (dir_check .neqv. .true.) then call s_mpi_abort('Time-step folder ' // trim(t_step_dir) // ' is missing. Exiting.') end if + ! Reading the Grid Data File for the x-direction + + ! Checking whether x_cb.dat exists file_loc = trim(t_step_dir) // '/x_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, x_cb.dat is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) x_cb(-1:m) @@ -179,18 +210,25 @@ contains call s_mpi_abort('File x_cb.dat is missing in ' // trim(t_step_dir) // '. Exiting.') end if + ! Computing cell-center locations x_cc(0:m) = (x_cb(0:m) + x_cb(-1:(m - 1)))/2._wp + ! Computing minimum cell-width dx = minval(x_cb(0:m) - x_cb(-1:m - 1)) if (num_procs > 1) call s_mpi_reduce_min(dx) + ! Setting locations of domain bounds x_domain%beg = x_cb(-1) x_domain%end = x_cb(m) + ! Reading the Grid Data File for the y-direction + if (n > 0) then + ! Checking whether y_cb.dat exists file_loc = trim(t_step_dir) // '/y_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, y_cb.dat is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) y_cb(-1:n) @@ -199,18 +237,24 @@ contains call s_mpi_abort('File y_cb.dat is missing in ' // trim(t_step_dir) // '. Exiting.') end if + ! Computing cell-center locations y_cc(0:n) = (y_cb(0:n) + y_cb(-1:(n - 1)))/2._wp + ! Computing minimum cell-width dy = minval(y_cb(0:n) - y_cb(-1:n - 1)) if (num_procs > 1) call s_mpi_reduce_min(dy) + ! Setting locations of domain bounds y_domain%beg = y_cb(-1) y_domain%end = y_cb(n) + ! Reading the Grid Data File for the z-direction if (p > 0) then + ! Checking whether z_cb.dat exists file_loc = trim(t_step_dir) // '/z_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, z_cb.dat is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) z_cb(-1:p) @@ -219,17 +263,23 @@ contains call s_mpi_abort('File z_cb.dat is missing in ' // trim(t_step_dir) // '. Exiting.') end if + ! Computing cell-center locations z_cc(0:p) = (z_cb(0:p) + z_cb(-1:(p - 1)))/2._wp + ! Computing minimum cell-width dz = minval(z_cb(0:p) - z_cb(-1:p - 1)) if (num_procs > 1) call s_mpi_reduce_min(dz) + ! Setting locations of domain bounds z_domain%beg = z_cb(-1) z_domain%end = z_cb(p) end if end if - ! Clean processor dir and create time-step dir (unless reading preexisting IC) + ! If only the preexisting grid data files are read in and there will not be any preexisting initial condition data files + ! imported, then the directory associated with the rank of the local processor may be cleaned to make room for the new + ! pre-process data. In addition, the time-step directory that will contain the new grid and initial condition data are also + ! generated. if (old_ic .neqv. .true.) then call s_delete_directory(trim(proc_rank_dir)) call s_create_directory(trim(proc_rank_dir) // '/0') @@ -241,15 +291,21 @@ contains !! coordinate directions and making sure that all of the cell-widths are positively valued impure subroutine s_check_grid_data_files + ! Cell-boundary Data Consistency Check in x-direction + if (any(x_cb(0:m) - x_cb(-1:m - 1) <= 0._wp)) then call s_mpi_abort('x_cb.dat in ' // trim(t_step_dir) // ' contains non-positive cell-spacings. Exiting.') end if + ! Cell-boundary Data Consistency Check in y-direction + if (n > 0) then if (any(y_cb(0:n) - y_cb(-1:n - 1) <= 0._wp)) then call s_mpi_abort('y_cb.dat in ' // trim(t_step_dir) // ' contains non-positive cell-spacings. ' // 'Exiting.') end if + ! Cell-boundary Data Consistency Check in z-direction + if (p > 0) then if (any(z_cb(0:p) - z_cb(-1:p - 1) <= 0._wp)) then call s_mpi_abort('z_cb.dat in ' // trim(t_step_dir) // ' contains non-positive cell-spacings' // ' .Exiting.') @@ -261,19 +317,29 @@ contains !> The goal of this subroutine is to read in any preexisting initial condition data files so that they may be used by the !! pre-process as a starting point in the creation of an all new initial condition. + !! @param q_cons_vf_in Conservative variables impure subroutine s_read_serial_ic_data_files(q_cons_vf_in) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf_in - character(LEN=len_trim(case_dir) + 3*name_len) :: file_loc + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf_in + character(LEN=len_trim(case_dir) + 3*name_len) :: file_loc + ! Generic string used to store the address of a particular file + + !> Used to store the variable position, in character form, of the currently manipulated conservative variable file character(LEN=int(floor(log10(real(sys_size, wp)))) + 1) :: file_num - logical :: file_check - integer :: i, r + + !> Generic logical used for the purpose of asserting whether a file is or is not present in the designated location + logical :: file_check + integer :: i, r !< Generic loop iterator + + ! Reading the Conservative Variables Data Files do i = 1, sys_size + ! Checking whether data file associated with variable position of the currently manipulated conservative variable exists write (file_num, '(I0)') i file_loc = trim(t_step_dir) // '/q_cons_vf' // trim(file_num) // '.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, the data file is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) q_cons_vf_in(i)%sf @@ -283,13 +349,17 @@ contains end if end do + ! Read bubble variables pb and mv for non-polytropic qbmm if (qbmm .and. .not. polytropic) then do i = 1, nb do r = 1, nnode + ! Checking whether data file associated with variable position of the currently manipulated bubble variable + ! exists write (file_num, '(I0)') sys_size + r + (i - 1)*nnode file_loc = trim(t_step_dir) // '/pb' // trim(file_num) // '.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, the data file is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) pb%sf(:,:,:,r, i) @@ -302,10 +372,13 @@ contains do i = 1, nb do r = 1, nnode + ! Checking whether data file associated with variable position of the currently manipulated bubble variable + ! exists write (file_num, '(I0)') sys_size + r + (i - 1)*nnode file_loc = trim(t_step_dir) // '/mv' // trim(file_num) // '.dat' inquire (FILE=trim(file_loc), EXIST=file_check) + ! If it exists, the data file is read if (file_check) then open (1, FILE=trim(file_loc), form='unformatted', STATUS='old', ACTION='read') read (1) mv%sf(:,:,:,r, i) @@ -340,6 +413,7 @@ contains allocate (y_cb_glb(-1:n_glb)) allocate (z_cb_glb(-1:p_glb)) + ! Read in cell boundary locations in x-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'x_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -352,14 +426,19 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting. ') end if + ! Assigning local cell boundary locations x_cb(-1:m) = x_cb_glb((start_idx(1) - 1):(start_idx(1) + m)) + ! Computing cell center locations x_cc(0:m) = (x_cb(0:m) + x_cb(-1:(m - 1)))/2._wp + ! Computing minimum cell width dx = minval(x_cb(0:m) - x_cb(-1:(m - 1))) if (num_procs > 1) call s_mpi_reduce_min(dx) + ! Setting locations of domain bounds x_domain%beg = x_cb(-1) x_domain%end = x_cb(m) if (n > 0) then + ! Read in cell boundary locations in y-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'y_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -372,14 +451,19 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting. ') end if + ! Assigning local cell boundary locations y_cb(-1:n) = y_cb_glb((start_idx(2) - 1):(start_idx(2) + n)) + ! Computing cell center locations y_cc(0:n) = (y_cb(0:n) + y_cb(-1:(n - 1)))/2._wp + ! Computing minimum cell width dy = minval(y_cb(0:n) - y_cb(-1:(n - 1))) if (num_procs > 1) call s_mpi_reduce_min(dy) + ! Setting locations of domain bounds y_domain%beg = y_cb(-1) y_domain%end = y_cb(n) if (p > 0) then + ! Read in cell boundary locations in z-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'z_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -392,10 +476,14 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting. ') end if + ! Assigning local cell boundary locations z_cb(-1:p) = z_cb_glb((start_idx(3) - 1):(start_idx(3) + p)) + ! Computing cell center locations z_cc(0:p) = (z_cb(0:p) + z_cb(-1:(p - 1)))/2._wp + ! Computing minimum cell width dz = minval(z_cb(0:p) - z_cb(-1:(p - 1))) if (num_procs > 1) call s_mpi_reduce_min(dz) + ! Setting locations of domain bounds z_domain%beg = z_cb(-1) z_domain%end = z_cb(p) end if @@ -408,6 +496,7 @@ contains !> The goal of this subroutine is to read in any preexisting initial condition data files so that they may be used by the !! pre-process as a starting point in the creation of an all new initial condition. + !! @param q_cons_vf_in Conservative variables impure subroutine s_read_parallel_ic_data_files(q_cons_vf_in) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf_in @@ -424,6 +513,7 @@ contains logical :: file_exist integer :: i + ! Open the file to read if (cfl_adap_dt) then write (file_loc, '(I0,A)') n_start, '.dat' else @@ -437,6 +527,7 @@ contains call s_initialize_mpi_data(q_cons_vf_in) + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) ! Resize some integers so MPI can read even the biggest files @@ -448,6 +539,7 @@ contains str_MOK = int(name_len, MPI_OFFSET_KIND) NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) + ! Read the data for each variable do i = 1, sys_size var_MOK = int(i, MPI_OFFSET_KIND) @@ -482,13 +574,17 @@ contains end subroutine s_read_parallel_ic_data_files - !> Initialize all pre-process modules, allocate data structures, and set I/O procedure pointers. + !> @brief Initializes all pre-process modules, allocates data structures, and sets I/O procedure pointers. impure subroutine s_initialize_modules + ! Computation of parameters, allocation procedures, and/or any other tasks needed to properly setup the modules call s_initialize_global_parameters_module() if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + if (particles_lagrange) then + call s_initialize_particles_model() + end if call s_initialize_mpi_common_module() call s_initialize_data_output_module() call s_initialize_variables_conversion_module() @@ -502,6 +598,7 @@ contains ! Create the D directory if it doesn't exit, to store the serial data files call s_create_directory('D') + ! Associate pointers for serial or parallel I/O if (parallel_io .neqv. .true.) then s_generate_grid => s_generate_serial_grid s_read_grid_data_files => s_read_serial_grid_data_files @@ -516,7 +613,7 @@ contains end subroutine s_initialize_modules - !> Read an existing grid from data files or generate a new grid from user inputs. + !> @brief Reads an existing grid from data files or generates a new grid from user inputs. impure subroutine s_read_grid() if (old_grid) then @@ -535,13 +632,21 @@ contains end subroutine s_read_grid - !> Generate or read the initial condition, apply relaxation if needed, and write output data files. + !> @brief Generates or reads the initial condition, applies relaxation if needed, and writes output data files. impure subroutine s_apply_initial_condition(start, finish) real(wp), intent(inout) :: start, finish integer :: j, k, l real(wp) :: r2 + ! Setting up the grid and the initial condition. If the grid is read in from preexisting grid data files, it is checked for + ! consistency. If the grid is not read in, it is generated from scratch according to the inputs provided by the user. The + ! initial condition may also be read in. It in turn is not checked for consistency since it WILL further be edited by the + ! pre-process and also because it may be incomplete at the time it is read in. Finally, when the grid and initial condition + ! are completely setup, they are written to their respective data files. + + ! Setting up grid and initial condition + call cpu_time(start) if (old_ic) call s_read_ic_data_files(q_cons_vf) @@ -578,7 +683,7 @@ contains end subroutine s_apply_initial_condition - !> Gather processor timing data and write elapsed wall-clock time to a summary file. + !> @brief Gathers processor timing data and writes elapsed wall-clock time to a summary file. impure subroutine s_save_data(proc_time, time_avg, time_final, file_exists) real(wp), dimension(:), intent(inout) :: proc_time @@ -614,11 +719,17 @@ contains end subroutine s_save_data - !> Initialize MPI, read and validate user inputs on rank 0, and decompose the computational domain. + !> @brief Initializes MPI, reads and validates user inputs on rank 0, and decomposes the computational domain. impure subroutine s_initialize_mpi_domain + ! Initialization of the MPI environment + call s_mpi_initialize() + ! Rank 0 processor assigns default values to user inputs prior to reading those in from the input file. Next, the user + ! inputs are read in and their consistency is checked. The detection of any inconsistencies automatically leads to the + ! termination of the pre-process. + if (proc_rank == 0) then call s_assign_default_values_to_user_inputs() call s_read_input_file() @@ -635,14 +746,16 @@ contains end subroutine s_initialize_mpi_domain - !> Finalize all pre-process modules, deallocate resources, and shut down MPI. + !> @brief Finalizes all pre-process modules, deallocates resources, and shuts down MPI. impure subroutine s_finalize_modules + ! Disassociate pointers for serial and parallel I/O s_generate_grid => null() s_read_grid_data_files => null() s_read_ic_data_files => null() s_write_data_files => null() + ! Deallocation procedures for the modules call s_finalize_mpi_common_module() call s_finalize_grid_module() call s_finalize_variables_conversion_module() @@ -653,6 +766,7 @@ contains call s_finalize_boundary_common_module() if (relax) call s_finalize_relaxation_solver_module() call s_finalize_initial_condition_module() + ! Finalization of the MPI environment call s_mpi_finalize() end subroutine s_finalize_modules diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 2ff5952827..60a7ed8f4b 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -13,6 +13,7 @@ module m_bubbles use m_mpi_proxy use m_variables_conversion use m_helper_basic + use m_bubbles_EL_kernels implicit none @@ -23,8 +24,21 @@ module m_bubbles contains - !> Compute the bubble radial acceleration based on the selected bubble model - elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) + !> Function that computes the bubble radial acceleration based on bubble models + !! @param fRho Current density + !! @param fP Current driving pressure + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fR0 Equilibrium bubble radius + !! @param fpb Internal bubble pressure + !! @param fpbdot Time-derivative of internal bubble pressure + !! @param alf bubble volume fraction + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param f_bub_adv_src Source for bubble volume fraction + !! @param f_divu Divergence of velocity + !! @param fCson Speed of sound from fP (EL) + function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf @@ -63,8 +77,12 @@ contains end function f_rddot - !> Bubble wall pressure: stiffened gas with Laplace pressure and viscous stress - elemental function f_cpbw(fR0, fR, fV, fpb) + !> Function that computes that bubble wall pressure for Gilmore bubbles + !! @param fR0 Equilibrium bubble radius + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fpb Internal bubble pressure + function f_cpbw(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb @@ -78,8 +96,12 @@ contains end function f_cpbw - !> Compute the bubble enthalpy - elemental function f_H(fCpbw, fCpinf, fntait, fBtait) + !> Function that computes the bubble enthalpy + !! @param fCpbw Bubble wall pressure + !! @param fCpinf Driving bubble pressure + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + function f_H(fCpbw, fCpinf, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait @@ -94,8 +116,12 @@ contains end function f_H - !> Compute the sound speed for the bubble - elemental function f_cgas(fCpinf, fntait, fBtait, fH) + !> Function that computes the sound speed for the bubble + !! @param fCpinf Driving bubble pressure + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param fH Bubble enthalpy + function f_cgas(fCpinf, fntait, fBtait, fH) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpinf, fntait, fBtait, fH @@ -110,8 +136,15 @@ contains end function f_cgas - !> Compute the time derivative of the driving pressure - elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) + !> Function that computes the time derivative of the driving pressure + !! @param fRho Local liquid density + !! @param fP Local pressure + !! @param falf Local void fraction + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param advsrc Advection equation source term + !! @param divu Divergence of velocity + function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu @@ -130,8 +163,17 @@ contains end function f_cpinfdot - !> Enthalpy derivative for Gilmore bubble model, Gilmore (1952) - elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) + !> Function that computes the time derivative of the enthalpy + !! @param fCpbw Bubble wall pressure + !! @param fCpinf Driving bubble pressure + !! @param fCpinf_dot Time derivative of the driving pressure + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fR0 Equilibrium bubble radius + !! @param fpbdot Time derivative of the internal bubble pressure + function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait @@ -150,21 +192,43 @@ contains f_Hdot = (fCpbw/(1._wp + fBtait) + 1._wp)**(-1._wp/fntait)*(tmp1 + tmp2) - (fCpinf/(1._wp + fBtait) + 1._wp) & & **(-1._wp/fntait)*fCpinf_dot + ! Hdot = (Cpbw/(1+B) + 1)^(-1/n_tait)*(-3 gam)*(R0/R)^(3gam) V/R f_Hdot = + ! ((fCpbw/(1._wp+fBtait)+1._wp)**(-1._wp/fntait))*(-3._wp)*gam * & ( (fR0/fR)**(3._wp*gam ))*(fV/fR) + + ! Hdot = Hdot - (Cpinf/(1+B) + 1)^(-1/n_tait) Cpinfdot f_Hdot = f_Hdot - + ! ((fCpinf/(1._wp+fBtait)+1._wp)**(-1._wp/fntait))*fCpinf_dot + end function f_Hdot - !> Rayleigh-Plesset bubble radial acceleration - elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) + !> Function that computes the bubble radial acceleration for Rayleigh-Plesset bubbles + !! @param fCp Driving pressure + !! @param fRho Current density + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fCpbw Boundary wall pressure + function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw real(wp) :: f_rddot_RP + !! rddot = (1/r) ( -3/2 rdot^2 + ((r0/r)^3\gamma - Cp)/rho ) rddot = (1/r) ( -3/2 rdot^2 + (tmp1 - Cp)/rho ) rddot = (1/r) ( + !! tmp2 ) + f_rddot_RP = (-1.5_wp*(fV**2._wp) + (fCpbw - fCp)/fRho)/fR end function f_rddot_RP - !> Compute the Gilmore bubble radial acceleration - elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) + !> Function that computes the bubble radial acceleration + !! @param fCpbw Bubble wall pressure + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fH Current enthalpy + !! @param fHdot Current time derivative of the enthalpy + !! @param fcgas Current gas sound speed + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot @@ -180,8 +244,12 @@ contains end function f_rddot_G - !> Keller-Miksis bubble wall pressure - elemental function f_cpbw_KM(fR0, fR, fV, fpb) + !> Function that computes the bubble wall pressure for Keller--Miksis bubbles + !! @param fR0 Equilibrium bubble radius + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fpb Internal bubble pressure + function f_cpbw_KM(fR0, fR, fV, fpb) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb @@ -199,8 +267,16 @@ contains end function f_cpbw_KM - !> Keller-Miksis bubble radial acceleration - elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) + !> Function that computes the bubble radial acceleration for Keller--Miksis bubbles + !! @param fpbdot Time-derivative of internal bubble pressure + !! @param fCp Driving pressure + !! @param fCpbw Bubble wall pressure + !! @param fRho Current density + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fR0 Equilibrium bubble radius + !! @param fC Current sound speed + function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fpbdot, fCp, fCpbw @@ -228,8 +304,10 @@ contains end function f_rddot_KM - !> Compute bubble wall properties for vapor bubbles - elemental subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) + !> Subroutine that computes bubble wall properties for vapor bubbles + !! @param pb_in Internal bubble pressure + !! @param iR0 Current bubble size index + subroutine s_bwproperty(pb_in, iR0, chi_vw_out, k_mw_out, rho_mw_out) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pb_in @@ -249,8 +327,18 @@ contains end subroutine s_bwproperty - !> Compute the vapour flux - elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_g, fbeta_c, fR_m, fgamma_m) + !> Function that computes the vapour flux + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fpb + !! @param fmass_v Current mass of vapour + !! @param iR0 Bubble size index (EE) or bubble identifier (EL) + !! @param vflux Computed vapour flux + !! @param fmass_g Current gas mass (EL) + !! @param fbeta_c Mass transfer coefficient (EL) + !! @param fR_m Mixture gas constant (EL) + !! @param fgamma_m Mixture gamma (EL) + subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_g, fbeta_c, fR_m, fgamma_m) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR @@ -266,7 +354,7 @@ contains real(wp) :: grad_chi real(wp) :: conc_v - if (thermal == 3) then ! transfer + if (thermal == 3) then ! constant transfer model if (bubbles_lagrange) then ! Mixture properties (gas+vapor) in the bubble @@ -297,9 +385,19 @@ contains end subroutine s_vflux - !> Compute the time derivative of the internal bubble pressure - elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) - + !> Function that computes the time derivative of the internal bubble pressure + !! @param fvflux Vapour flux + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fpb Current internal bubble pressure + !! @param fmass_v Current mass of vapour + !! @param iR0 Bubble size index (EE) or bubble identifier (EL) + !! @param fbeta_t Mass transfer coefficient (EL) + !! @param fR_m Mixture gas constant (EL) + !! @param fgamma_m Mixture gamma (EL) + function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) + + !$DIR INLINENEVER f_bpres_dot $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR @@ -332,25 +430,48 @@ contains !> Adaptive time stepping routine for subgrid bubbles (See Heirer, E. Hairer S.P.Norsett G. Wanner, Solving Ordinary !! Differential Equations I, Chapter II.4) - subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, bub_id, fmass_v, & - - & fmass_g, fbeta_c, fbeta_t, fCson, adap_dt_stop) - $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', cray_inline=True) + !! @param fRho Current density + !! @param fP Current driving pressure + !! @param fR Current bubble radius + !! @param fV Current bubble radial velocity + !! @param fR0 Equilibrium bubble radius + !! @param fpb Internal bubble pressure + !! @param fpbdot Time-derivative of internal bubble pressure + !! @param alf bubble volume fraction + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param f_bub_adv_src Source for bubble volume fraction + !! @param f_divu Divergence of velocity + !! @param bub_id Bubble identifier (EL) + !! @param fmass_v Current mass of vapour (EL) + !! @param fmass_g Current mass of gas (EL) + !! @param fbeta_c Mass transfer coefficient (EL) + !! @param fbeta_t Heat transfer coefficient (EL) + !! @param fCson Speed of sound (EL) + !! @param adap_dt_stop Fail-safe exit if max iteration count reached + function f_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, bub_id, fmass_v, & + & fmass_g, fbeta_c, fbeta_t, fCson, fRe, fPos, fVel, cell, q_prim_vf) result(adap_dt_stop) + $:GPU_ROUTINE(parallelism='[seq]') - real(wp), intent(inout) :: fR, fV, fpb, fmass_v - real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf - real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu - integer, intent(in) :: bub_id - real(wp), intent(in) :: fmass_g, fbeta_c, fbeta_t, fCson - integer, intent(inout) :: adap_dt_stop - real(wp), dimension(5) :: err !< Error estimates for adaptive time stepping - real(wp) :: t_new !< Updated time step size - real(wp) :: h0, h !< Time step size + real(wp), intent(inout) :: fR, fV, fpb, fmass_v + real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf + real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu + integer, intent(in) :: bub_id + real(wp), intent(in) :: fmass_g, fbeta_c, fbeta_t, fCson + real(wp), intent(inout), dimension(3), optional :: fPos, fVel + real(wp), intent(in), optional :: fRe + integer, intent(in), dimension(3), optional :: cell + type(scalar_field), intent(in), dimension(sys_size), optional :: q_prim_vf + real(wp), dimension(5) :: err !< Error estimates for adaptive time stepping + real(wp) :: t_new !< Updated time step size + real(wp) :: h0, h !< Time step size !> Bubble radius, radial velocity, and radial acceleration for the inner loop real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL) - real(wp) :: fR2, fV2, fpb2, fmass_v2 - integer :: iter_count + real(wp) :: fR2, fV2, fpb2, fmass_v2, f_bTemp + real(wp), dimension(3) :: vTemp, aTemp + integer :: adap_dt_stop + integer :: l, iter_count call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson, h0) h = h0 @@ -416,6 +537,42 @@ contains ! Update pb and mass_v fpb = myPb_tmp1(4) fmass_v = myMv_tmp1(4) + + select case (lag_vel_model) + case (1) + do l = 1, num_dims + vTemp(l) = f_interpolate_velocity(fR, cell, l, q_prim_vf) + end do + do l = 1, num_dims + fVel(l) = vTemp(l) + fPos(l) = fPos(l) + h*vTemp(l) + end do + case (2) + do l = 1, num_dims + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel(l), fmass_g, fmass_v, fRe, fRho, cell, l, & + & q_prim_vf) + aTemp(l) = f_bTemp/(fmass_g + fmass_v) + end do + do l = 1, num_dims + fVel(l) = fVel(l) + h*aTemp(l) + fPos(l) = fPos(l) + h*fVel(l) + end do + case (3) + do l = 1, num_dims + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel(l), fmass_g, fmass_v, fRe, fRho, cell, l, & + & q_prim_vf) + aTemp(l) = 2._wp*f_bTemp/(fmass_g + fmass_v) - 3._wp*fV*fVel(l)/fR + end do + do l = 1, num_dims + fVel(l) = fVel(l) + h*aTemp(l) + fPos(l) = fPos(l) + h*fVel(l) + end do + case default + do l = 1, num_dims + fVel(l) = fVel(l) + fPos(l) = fPos(l) + end do + end select end if ! Update step size for the next sub-step @@ -438,10 +595,24 @@ contains if (iter_count >= adap_dt_max_iters) adap_dt_stop = 1 - end subroutine s_advance_step + end function f_advance_step !> Choose the initial time step size for the adaptive time stepping routine (See Heirer, E. Hairer S.P.Norsett G. Wanner, !! Solving Ordinary Differential Equations I, Chapter II.4) + !! @param fRho Current density + !! @param fP Current driving pressure + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fR0 Equilibrium bubble radius + !! @param fpb Internal bubble pressure + !! @param fpbdot Time-derivative of internal bubble pressure + !! @param alf bubble volume fraction + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param f_bub_adv_src Source for bubble volume fraction + !! @param f_divu Divergence of velocity + !! @param fCson Speed of sound (EL) + !! @param h Time step size subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson, h) $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', cray_inline=True) @@ -453,6 +624,7 @@ contains real(wp), dimension(2) :: h_size !< Time step size (h0, h1) real(wp), dimension(3) :: d_norms !< norms (d_0, d_1, d_2) real(wp), dimension(2) :: myR_tmp, myV_tmp, myA_tmp !< Bubble radius, radial velocity, and radial acceleration + ! Determine the starting time step Evaluate f(x0,y0) myR_tmp(1) = fR myV_tmp(1) = fV @@ -487,9 +659,32 @@ contains end subroutine s_initial_substep_h !> Integrate bubble variables over the given time step size, h, using a third-order accurate embedded Runge-Kutta scheme. + !! @param err Estimated error + !! @param fRho Current density + !! @param fP Current driving pressure + !! @param fR Current bubble radius + !! @param fV Current bubble velocity + !! @param fR0 Equilibrium bubble radius + !! @param fpb Internal bubble pressure + !! @param fpbdot Time-derivative of internal bubble pressure + !! @param alf bubble volume fraction + !! @param fntait Tait EOS parameter + !! @param fBtait Tait EOS parameter + !! @param f_bub_adv_src Source for bubble volume fraction + !! @param f_divu Divergence of velocity + !! @param bub_id Bubble identifier (EL) + !! @param fmass_v Current mass of vapour (EL) + !! @param fmass_g Current mass of gas (EL) + !! @param fbeta_c Mass transfer coefficient (EL) + !! @param fbeta_t Heat transfer coefficient (EL) + !! @param fCson Speed of sound (EL) + !! @param h Time step size + !! @param myR_tmp Bubble radius at each stage + !! @param myV_tmp Bubble radial velocity at each stage + !! @param myPb_tmp Internal bubble pressure at each stage (EL) + !! @param myMv_tmp Mass of vapor in the bubble at each stage (EL) subroutine s_advance_substep(err, fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, bub_id, & - - & fmass_v, fmass_g, fbeta_c, fbeta_t, fCson, h, myR_tmp, myV_tmp, myPb_tmp, myMv_tmp) + & fmass_v, fmass_g, fbeta_c, fbeta_t, fCson, h, myR_tmp, myV_tmp, myPb_tmp, myMv_tmp) $:GPU_ROUTINE(function_name='s_advance_substep',parallelism='[seq]', cray_inline=True) real(wp), intent(out) :: err @@ -577,7 +772,18 @@ contains end subroutine s_advance_substep !> Changes of pressure and vapor mass in the lagrange bubbles. - elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, fmass_g, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) + !! @param fR_tmp Bubble radius + !! @param fV_tmp Bubble radial velocity + !! @param fPb_tmp Internal bubble pressure + !! @param fMv_tmp Mass of vapor in the bubble + !! @param bub_id Bubble identifier + !! @param fmass_g Current mass of gas + !! @param fbeta_c Mass transfer coefficient + !! @param fbeta_t Heat transfer coefficient + !! @param fdPbdt_tmp Rate of change of the internal bubble pressure + !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble + !! @param advance_EL Rate of change of the mass of vapor in the bubble + subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, fmass_g, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index b2c5f1fd97..94f6605dd5 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -27,7 +27,7 @@ module m_bubbles_EE contains - !> Initialize the Euler-Euler bubble module + !> @brief Allocates and initializes arrays for the Euler-Euler bubble model. impure subroutine s_initialize_bubbles_EE_module integer :: l @@ -65,7 +65,8 @@ contains end subroutine s_initialize_bubbles_EE_module - !> Compute the bubble volume fraction alpha from the bubble number density + !> @brief Computes the bubble volume fraction alpha from the bubble number density. + !! @param q_cons_vf is the conservative variable subroutine s_comp_alpha_from_n(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -90,6 +91,8 @@ contains end subroutine s_comp_alpha_from_n !> Compute the right-hand side for Euler-Euler bubble transport + !! @param idir Direction index + !! @param q_prim_vf Primitive variables subroutine s_compute_bubbles_EE_rhs(idir, q_prim_vf, divu_in) integer, intent(in) :: idir @@ -137,7 +140,10 @@ contains end subroutine s_compute_bubbles_EE_rhs - !> Compute the Euler-Euler bubble source terms + !> The purpose of this procedure is to compute the source terms that are needed for the bubble modeling + !! @param q_prim_vf Primitive variables + !! @param q_cons_vf Conservative variables + !! @param rhs_vf Right-hand side variables impure subroutine s_compute_bubble_EE_source(q_cons_vf, q_prim_vf, rhs_vf, divu_in) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -159,7 +165,7 @@ contains real(wp) :: nbub !< Bubble number density real(wp) :: my_divu integer :: i, j, k, l, q, ii !< Loop variables - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached + integer :: adap_dt_stop_sum, adap_dt_stop !< Fail-safe exit if max iteration count reached integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson @@ -181,10 +187,10 @@ contains end do $:END_GPU_PARALLEL_LOOP() - adap_dt_stop_max = 0 + adap_dt_stop_sum = 0 $:GPU_PARALLEL_LOOP(private='[j, k, l, Rtmp, Vtmp, myalpha_rho, myalpha, myR, myV, alf, myP, myRho, R2Vav, R3, nbub, & - & pb_local, mv_local, vflux, pbdot, rddot, n_tait, B_tait, my_divu]', collapse=3, & - & reduction = '[[adap_dt_stop_max]]', reductionOp = '[MAX]', copy = '[adap_dt_stop_max]') + & pb_local, mv_local, vflux, pbdot, rddot, n_tait, B_tait, & + & my_divu]', collapse=3, copy='[adap_dt_stop_sum]') do l = 0, p do k = 0, n do j = 0, m @@ -272,24 +278,25 @@ contains pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp end if + adap_dt_stop = 0 + ! Adaptive time stepping if (adap_dt) then - adap_dt_stop = 0 - - call s_advance_step(myRho, myP, myR, myV, R0(q), pb_local, pbdot, alf, n_tait, B_tait, & - & bub_adv_src(j, k, l), divu_in%sf(j, k, l), dmBub_id, dmMass_v, dmMass_n, & - & dmBeta_c, dmBeta_t, dmCson, adap_dt_stop) + adap_dt_stop = f_advance_step(myRho, myP, myR, myV, R0(q), pb_local, pbdot, alf, n_tait, B_tait, & + & bub_adv_src(j, k, l), divu_in%sf(j, k, l), dmBub_id, dmMass_v, & + & dmMass_n, dmBeta_c, dmBeta_t, dmCson) q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) else rddot = f_rddot(myRho, myP, myR, myV, R0(q), pb_local, pbdot, alf, n_tait, B_tait, bub_adv_src(j, & & k, l), divu_in%sf(j, k, l), dmCson) bub_v_src(j, k, l, q) = nbub*rddot bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) end if + + $:GPU_ATOMIC(atomic='update') + adap_dt_stop_sum = adap_dt_stop_sum + adap_dt_stop end if end do end do @@ -297,7 +304,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") + if (adap_dt .and. adap_dt_stop_sum > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then $:GPU_PARALLEL_LOOP(private='[i, k, l, q]', collapse=3) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 437dbe0db2..05a0600f0f 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -17,6 +17,9 @@ module m_bubbles_EL use m_helper_basic use m_sim_helpers use m_helper + use m_mpi_common + use m_ibm + use m_finite_differences implicit none @@ -55,22 +58,34 @@ module m_bubbles_EL $:GPU_DECLARE(create='[intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt]') integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme + $:GPU_DECLARE(create='[lag_num_ts]') - integer :: nBubs !< Number of bubbles in the local domain real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain !> Projection of the lagrangian particles in the Eulerian framework type(scalar_field), dimension(:), allocatable :: q_beta + type(scalar_field), dimension(:), allocatable :: kahan_comp !< Kahan compensation for q_beta accumulation integer :: q_beta_idx !< Size of the q_beta vector field - $:GPU_DECLARE(create='[nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx]') + + $:GPU_DECLARE(create='[Rmax_glb, Rmin_glb, q_beta, kahan_comp, q_beta_idx]') + + integer, parameter :: LAG_EVOL_ID = 11 ! File id for lag_bubbles_evol_*.dat + integer, parameter :: LAG_STATS_ID = 12 ! File id for stats_lag_bubbles_*.dat + integer, parameter :: LAG_VOID_ID = 13 ! File id for voidfraction.dat + integer, allocatable, dimension(:) :: keep_bubble + integer, allocatable, dimension(:,:) :: wrap_bubble_loc, wrap_bubble_dir + $:GPU_DECLARE(create='[keep_bubble]') + $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') contains !> Initializes the lagrangian subgrid bubble solver - impure subroutine s_initialize_bubbles_EL_module(q_cons_vf) + !! @param q_cons_vf Initial conservative variables + impure subroutine s_initialize_bubbles_EL_module(q_cons_vf, bc_type) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - integer :: nBubs_glb, i + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer :: nBubs_glb, i ! Setting number of time-stages for selected time-stepping scheme @@ -83,7 +98,7 @@ contains else if (lag_params%solver_approach == 2) then ! Two-way coupling q_beta_idx = 4 - if (p == 0) then + if (p == 0) then ! 2D ! Subgrid noise model for 2D approximation q_beta_idx = 6 end if @@ -91,22 +106,36 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if + pcomm_coords(1)%beg = x_cb(-1) + pcomm_coords(1)%end = x_cb(m) + $:GPU_UPDATE(device='[pcomm_coords(1)]') + if (n > 0) then + pcomm_coords(2)%beg = y_cb(-1) + pcomm_coords(2)%end = y_cb(n) + $:GPU_UPDATE(device='[pcomm_coords(2)]') + if (p > 0) then + pcomm_coords(3)%beg = z_cb(-1) + pcomm_coords(3)%end = z_cb(p) + $:GPU_UPDATE(device='[pcomm_coords(3)]') + end if + end if + $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]') @:ALLOCATE(q_beta(1:q_beta_idx)) + @:ALLOCATE(kahan_comp(1:q_beta_idx)) do i = 1, q_beta_idx @:ALLOCATE(q_beta(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) - end do - - do i = 1, q_beta_idx @:ACC_SETUP_SFs(q_beta(i)) + @:ALLOCATE(kahan_comp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + & idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(kahan_comp(i)) end do ! Allocating space for lagrangian variables nBubs_glb = lag_params%nBubs_glb - @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(bub_R0(1:nBubs_glb)) @:ALLOCATE(Rmax_stats(1:nBubs_glb)) @:ALLOCATE(Rmin_stats(1:nBubs_glb)) @@ -114,6 +143,7 @@ contains @:ALLOCATE(gas_betaT(1:nBubs_glb)) @:ALLOCATE(gas_betaC(1:nBubs_glb)) @:ALLOCATE(bub_dphidt(1:nBubs_glb)) + @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_p(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2)) @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2)) @@ -129,23 +159,70 @@ contains @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts)) @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(keep_bubble(1:nBubs_glb)) + @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb, 1:num_dims), wrap_bubble_dir(1:nBubs_glb, 1:num_dims)) + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) + ! Starting bubbles - call s_read_input_bubbles(q_cons_vf) + if (lag_params%write_void_evol) call s_open_void_evol + if (lag_params%write_bubbles) call s_open_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_open_lag_bubble_stats() + + if (lag_params%vel_model > 0) then + moving_lag_bubbles = .true. + lag_pressure_force = lag_params%pressure_force + lag_gravity_force = lag_params%gravity_force + lag_vel_model = lag_params%vel_model + lag_drag_model = lag_params%drag_model + end if + $:GPU_UPDATE(device='[moving_lag_bubbles, lag_pressure_force, lag_gravity_force, lag_vel_model, lag_drag_model]') + + ! Allocate cell-centered pressure gradient arrays and FD coefficients + if (lag_params%vel_model > 0 .and. lag_params%pressure_force) then + @:ALLOCATE(grad_p_x(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_x_pgrad(-fd_number:fd_number, 0:m)) + call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_pgrad, buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_x_pgrad]') + if (n > 0) then + @:ALLOCATE(grad_p_y(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_y_pgrad(-fd_number:fd_number, 0:n)) + call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_pgrad, buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_y_pgrad]') + end if + if (p > 0) then + @:ALLOCATE(grad_p_z(0:m, 0:n, 0:p)) + @:ALLOCATE(fd_coeff_z_pgrad(-fd_number:fd_number, 0:p)) + call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_pgrad, buff_size, fd_number, fd_order) + $:GPU_UPDATE(device='[fd_coeff_z_pgrad]') + end if + end if + + ! Allocate cell list arrays for atomic-free Gaussian smearing + @:ALLOCATE(cell_list_start(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_count(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_idx(1:lag_params%nBubs_glb)) + + call s_read_input_bubbles(q_cons_vf, bc_type) end subroutine s_initialize_bubbles_EL_module - !> Read initial bubble data from input files - impure subroutine s_read_input_bubbles(q_cons_vf) + !> The purpose of this procedure is to obtain the initial bubbles' information + !! @param q_cons_vf Conservative variables + impure subroutine s_read_input_bubbles(q_cons_vf, bc_type) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + real(wp), dimension(8) :: inputBubble + real(wp) :: qtime + integer :: id, bub_id, save_count + integer :: i, ios + logical :: file_exist, indomain + integer, dimension(3) :: cell + character(LEN=path_len + 2*name_len) :: path_D_dir - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - real(wp), dimension(8) :: inputBubble - real(wp) :: qtime - integer :: id, bub_id, save_count - integer :: i, ios - logical :: file_exist, indomain - character(LEN=path_len + 2*name_len) :: path_D_dir ! Initialize number of particles bub_id = 0 @@ -162,9 +239,9 @@ contains if (save_count == 0) then if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.' - inquire (file='input/lag_bubbles.dat', exist=file_exist) + call my_inquire(trim(lag_params%input_path), file_exist) if (file_exist) then - open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios) + open (94, file=trim(lag_params%input_path), form='formatted', iostat=ios) do while (ios == 0) read (94, *, iostat=ios) (inputBubble(i), i=1, 8) if (ios /= 0) cycle @@ -178,12 +255,12 @@ contains call s_add_bubbles(inputBubble, q_cons_vf, bub_id) lag_id(bub_id, 1) = id ! global ID lag_id(bub_id, 2) = bub_id ! local ID - nBubs = bub_id ! local number of bubbles + n_el_bubs_loc = bub_id ! local number of bubbles end if end do close (94) else - call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat") + call s_mpi_abort("Initialize the lagrange bubbles in " // trim(lag_params%input_path)) end if else if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count @@ -192,11 +269,21 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id + if (num_procs > 1) then + call s_mpi_reduce_int_sum(n_el_bubs_loc, n_el_bubs_glb) + else + n_el_bubs_glb = n_el_bubs_loc + end if + + if (proc_rank == 0) then + if (n_el_bubs_glb == 0) call s_mpi_abort('No bubbles in the domain. Check ' // trim(lag_params%input_path)) + end if + $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]') $:GPU_UPDATE(device='[lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, gas_p, gas_mv, & & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - & gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs]') + & gas_dmvdt, mtn_dposdt, mtn_dveldt, n_el_bubs_loc]') Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) @@ -206,22 +293,28 @@ contains ! Populate temporal variables call s_transfer_data_to_tmp() - call s_smear_voidfraction() - - if (lag_params%write_bubbles) call s_write_lag_particles(qtime) + call s_smear_voidfraction(bc_type) if (save_count == 0) then ! Create ./D directory - write (path_D_dir, '(A,I0,A,I0)') trim(case_dir) // '/D' - call my_inquire(path_D_dir, file_exist) - if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + if (proc_rank == 0) then + write (path_D_dir, '(A,I0,A,I0)') trim(case_dir) // '/D' + call my_inquire(trim(path_D_dir), file_exist) + if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + end if + call s_mpi_barrier() call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing - call s_write_void_evol(qtime) + if (lag_params%write_void_evol) call s_write_void_evol(qtime) end if + if (lag_params%write_bubbles) call s_write_lag_bubble_evol(qtime) + end subroutine s_read_input_bubbles - !> Add a new bubble from input data for a fresh start + !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh + !! @param inputBubble Bubble information + !! @param q_cons_vf Conservative variables + !! @param bub_id Local id of the bubble impure subroutine s_add_bubbles(inputBubble, q_cons_vf, bub_id) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf @@ -256,7 +349,7 @@ contains mtn_posPrev(bub_id,1:3,1) = mtn_pos(bub_id,1:3,1) end if - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id,1:3,1), cell, mtn_s(bub_id,1:3,1)) ! Check if the bubble is located in the ghost cell of a symmetric, or wall boundary @@ -329,7 +422,9 @@ contains end subroutine s_add_bubbles - !> Restore bubble data from a restart file + !> The purpose of this procedure is to obtain the information of the bubbles from a restart point. + !! @param bub_id Local ID of the particle + !! @param save_count File identifier impure subroutine s_restart_bubbles(bub_id, save_count) integer, intent(inout) :: bub_id, save_count @@ -349,7 +444,6 @@ contains integer :: i integer, dimension(:), allocatable :: proc_bubble_counts real(wp), dimension(1:1,1:lag_io_vars) :: dummy - dummy = 0._wp ! Construct file path @@ -431,7 +525,7 @@ contains call MPI_FILE_CLOSE(ifile, ierr) call MPI_TYPE_FREE(view, ierr) - nBubs = bub_id + n_el_bubs_loc = bub_id do i = 1, bub_id lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) @@ -455,7 +549,7 @@ contains deallocate (MPI_IO_DATA_lag_bubbles) else - nBubs = 0 + n_el_bubs_loc = 0 call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) @@ -484,16 +578,20 @@ contains end subroutine s_restart_bubbles !> Contains the bubble dynamics subroutines. - subroutine s_compute_bubble_EL_dynamics(q_prim_vf, stage) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - integer, intent(in) :: stage - real(wp) :: myVapFlux - real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac - real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v - real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot - real(wp) :: myPinf, aux1, aux2, myCson, myRho - real(wp) :: gamma, pi_inf, qv + !! @param q_prim_vf Primitive variables + !! @param stage Current stage in the time-stepper algorithm + subroutine s_compute_bubble_EL_dynamics(q_prim_vf, bc_type, stage) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, intent(in) :: stage + real(wp) :: myVapFlux + real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac + real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v + real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot + real(wp) :: myPinf, aux1, aux2, myCson, myRho + real(wp), dimension(3) :: myPos, myVel + real(wp) :: gamma, pi_inf, qv, f_b, myRe #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: myalpha_rho, myalpha @@ -502,17 +600,16 @@ contains #:endif real(wp), dimension(2) :: Re integer, dimension(3) :: cell - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached + integer :: adap_dt_stop_sum, adap_dt_stop !< Fail-safe exit if max iteration count reached real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines - integer :: i, k, l - - call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") + integer :: k, l ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then + call nvtxStartRange("LAGRANGE-BUBBLE-PINF-CORRECTION") ! Calculate velocity potentials (valid for one bubble per cell) $:GPU_PARALLEL_LOOP(private='[k, cell, paux, preterm1, term2, Romega, myR0, myR, myV, myPb, pint, term1_fac]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) myR = intfc_rad(k, 2) @@ -529,16 +626,24 @@ contains end if end do $:END_GPU_PARALLEL_LOOP() + call nvtxEndRange() end if + ! Precompute cell-centered pressure gradients for translational motion + if (moving_lag_bubbles .and. lag_pressure_force) then + call nvtxStartRange("LAGRANGE-BUBBLE-PRESSURE-GRADIENT") + call s_compute_pressure_gradients(q_prim_vf) + call nvtxEndRange() + end if + + call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") ! Radial motion model - adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k, i, myalpha_rho, myalpha, Re, cell, myVapFlux, preterm1, term2, paux, pint, Romega, & + adap_dt_stop_sum = 0 + $:GPU_PARALLEL_LOOP(private='[k, myalpha_rho, myalpha, Re, cell, myVapFlux, preterm1, term2, paux, pint, Romega, & & term1_fac, myR_m, mygamma_m, myPb, myMass_n, myMass_v, myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, & & myMvdot, myPinf, aux1, aux2, myCson, myRho, gamma, pi_inf, qv, dmalf, dmntait, dmBtait, & - & dm_bub_adv_src, dm_divu, adap_dt_stop]', reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & - & copy = '[adap_dt_stop_max]', copyin = '[stage]') - do k = 1, nBubs + & dm_bub_adv_src, dm_divu, adap_dt_stop, myPos, myVel]', copy='[adap_dt_stop_sum]',copyin='[stage]') + do k = 1, n_el_bubs_loc ! Keller-Miksis model ! Current bubble state @@ -550,6 +655,8 @@ contains myBeta_c = gas_betaC(k) myBeta_t = gas_betaT(k) myR0 = bub_R0(k) + myPos = mtn_pos(k,:,2) + myVel = mtn_vel(k,:,2) ! Vapor and heat fluxes call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) @@ -568,44 +675,74 @@ contains adap_dt_stop = 0 if (adap_dt) then - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, dmntait, dmBtait, dm_bub_adv_src, & - & dm_divu, k, myMass_v, myMass_n, myBeta_c, myBeta_t, myCson, adap_dt_stop) + mtn_posPrev(k,:,1) = myPos + + myRe = Re(1) + adap_dt_stop = f_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, dmntait, dmBtait, & + & dm_bub_adv_src, dm_divu, k, myMass_v, myMass_n, myBeta_c, myBeta_t, myCson, myRe, & + & myPos, myVel, cell, q_prim_vf) ! Update bubble state intfc_rad(k, 1) = myR intfc_vel(k, 1) = myV gas_p(k, 1) = myPb gas_mv(k, 1) = myMass_v + mtn_pos(k,:,1) = myPos + mtn_vel(k,:,1) = myVel else ! Radial acceleration from bubble models intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, dmntait, dmBtait, & & dm_bub_adv_src, dm_divu, myCson) + intfc_draddt(k, stage) = myV gas_dmvdt(k, stage) = myMvdot gas_dpdt(k, stage) = myPbdot + + if (moving_lag_bubbles) then + do l = 1, num_dims + select case (lag_vel_model) + case (1) + mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + case (2) + mtn_dposdt(k, l, stage) = myVel(l) + f_b = f_get_bubble_force(myPos(l), myR, myV, myVel(l), myMass_n, myMass_v, Re(1), myRho, cell, l, & + & q_prim_vf) + mtn_dveldt(k, l, stage) = f_b/(myMass_n + myMass_v) + case (3) + mtn_dposdt(k, l, stage) = myVel(l) + f_b = f_get_bubble_force(myPos(l), myR, myV, myVel(l), myMass_n, myMass_v, Re(1), myRho, cell, l, & + & q_prim_vf) + mtn_dveldt(k, l, stage) = 2._wp*f_b/(myMass_n + myMass_v) - 3._wp*myV*myVel(l)/myR + case default + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end select + end do + end if end if - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + $:GPU_ATOMIC(atomic='update') + adap_dt_stop_sum = adap_dt_stop_sum + adap_dt_stop end do $:END_GPU_PARALLEL_LOOP() + call nvtxEndRange - if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") - - ! Bubbles remain in a fixed position - $:GPU_PARALLEL_LOOP(collapse=2, private='[k, l]', copyin='[stage]') - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end do - end do - $:END_GPU_PARALLEL_LOOP() + if (adap_dt .and. adap_dt_stop_sum > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") - call nvtxEndRange + if (adap_dt) then + call s_transfer_data_to_tmp() + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + end if end subroutine s_compute_bubble_EL_dynamics - !> Compute the Lagrangian bubble source terms and add them to the RHS + !> The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018) and add them to the + !! RHS scalar field. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Conservative variables + !! @param rhs_vf Time derivative of the conservative variables subroutine s_compute_bubbles_EL_source(q_cons_vf, q_prim_vf, rhs_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -613,91 +750,96 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf integer :: i, j, k, l - if (.not. adap_dt) call s_smear_voidfraction() - - if (lag_params%solver_approach == 2) then - ! (q / (1 - beta)) * d(beta)/dt source - if (p == 0) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, & - & k) + q_beta(5)%sf(i, j, k)) - end if - end do + call nvtxStartRange("LAGRANGE-BUBBLE-EL-SOURCE") + ! (q / (1 - beta)) * d(beta)/dt source + if (lag_params%cluster_type >= 4) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + do l = 1, E_idx + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, & + & k) + q_beta(5)%sf(i, j, k)) + end if end do end do end do - $:END_GPU_PARALLEL_LOOP() - else - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, & - & k)*q_beta(2)%sf(i, j, k) - end if - end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + do l = 1, E_idx + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + (q_cons_vf(l)%sf(i, j, k)/q_beta(1)%sf(i, j, & + & k))*q_beta(2)%sf(i, j, k) + end if end do end do end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + $:END_GPU_PARALLEL_LOOP() + end if - do l = 1, num_dims - call s_gradient_dir(q_prim_vf(E_idx)%sf, q_beta(3)%sf, l) + do l = 1, num_dims + call s_gradient_dir(q_prim_vf(E_idx)%sf, q_beta(3)%sf, l) - ! (q / (1 - beta)) * d(beta)/dt source - $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - (1._wp - q_beta(1)%sf(i, j, & - & k))/q_beta(1)%sf(i, j, k)*q_beta(3)%sf(i, j, k) - end if - end do + ! (q / (1 - beta)) * d(beta)/dt source + $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - (1._wp - q_beta(1)%sf(i, j, & + & k))/q_beta(1)%sf(i, j, k)*q_beta(3)%sf(i, j, k) + end if end do end do - $:END_GPU_PARALLEL_LOOP() + end do + $:END_GPU_PARALLEL_LOOP() - ! source in energy - $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) - end do + ! source in energy + $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) end do end do - $:END_GPU_PARALLEL_LOOP() + end do + $:END_GPU_PARALLEL_LOOP() - call s_gradient_dir(q_beta(3)%sf, q_beta(4)%sf, l) + call s_gradient_dir(q_beta(3)%sf, q_beta(4)%sf, l) - ! (beta / (1 - beta)) * d(Pu)/dl source - $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - q_beta(4)%sf(i, j, & - & k)*(1._wp - q_beta(1)%sf(i, j, k))/q_beta(1)%sf(i, j, k) - end if - end do + ! (beta / (1 - beta)) * d(Pu)/dl source + $:GPU_PARALLEL_LOOP(private='[i, j, k]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - q_beta(4)%sf(i, j, & + & k)*(1._wp - q_beta(1)%sf(i, j, k))/q_beta(1)%sf(i, j, k) + end if end do end do - $:END_GPU_PARALLEL_LOOP() end do - end if + $:END_GPU_PARALLEL_LOOP() + end do + call nvtxEndRange end subroutine s_compute_bubbles_EL_source - !> Compute the speed of sound from a given driving pressure + !> This procedure computes the speed of sound from a given driving pressure + !! @param q_prim_vf Primitive variables + !! @param pinf Driving pressure + !! @param cell Bubble cell + !! @param rhol Liquid density + !! @param gamma Liquid specific heat ratio + !! @param pi_inf Liquid stiffness + !! @param cson Calculated speed of sound subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) $:GPU_ROUTINE(function_name='s_compute_cson_from_pinf', parallelism='[seq]', cray_inline=True) @@ -725,26 +867,38 @@ contains end subroutine s_compute_cson_from_pinf - !> Smear the bubble effects onto the Eulerian grid - subroutine s_smear_voidfraction() - - integer :: i, j, k, l + !> The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework + subroutine s_smear_voidfraction(bc_type) - call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer :: i, j, k, l + call nvtxStartRange("BUBBLES-LAGRANGE-SMEARING") $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end q_beta(i)%sf(j, k, l) = 0._wp + kahan_comp(i)%sf(j, k, l) = 0._wp end do end do end do end do $:END_GPU_PARALLEL_LOOP() - call s_smoothfunction(nBubs, intfc_rad, intfc_vel, mtn_s, mtn_pos, q_beta) + ! Build cell list before smearing (CPU-side counting sort) + call s_build_cell_list(n_el_bubs_loc, mtn_s) + + call s_smoothfunction(n_el_bubs_loc, intfc_rad, intfc_vel, mtn_s, mtn_pos, q_beta, kahan_comp) + + call nvtxStartRange("BUBBLES-LAGRANGE-BETA-COMM") + if (lag_params%cluster_type >= 4) then + call s_populate_beta_buffers(q_beta, bc_type, 3) + else + call s_populate_beta_buffers(q_beta, bc_type, 2) + end if + call nvtxEndRange ! Store 1-beta $:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3) @@ -758,12 +912,19 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - call nvtxEndRange end subroutine s_smear_voidfraction - !> Compute the bubble driving pressure p_inf + !> The purpose of this procedure is obtain the bubble driving pressure p_inf + !! @param bub_id Particle identifier + !! @param q_prim_vf Primitive variables + !! @param ptype 1: p at infinity, 2: averaged P at the bubble location + !! @param f_pinfl Driving pressure + !! @param cell Bubble cell + !! @param preterm1 Pre-computed term 1 + !! @param term2 Computed term 2 + !! @param Romega Control volume radius subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', cray_inline=True) @@ -773,91 +934,170 @@ contains real(wp), intent(out) :: f_pinfl integer, dimension(3), intent(out) :: cell real(wp), intent(out), optional :: preterm1, term2, Romega - real(wp), dimension(3) :: scoord, psi + real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z + real(wp) :: xi, eta, zeta real(wp) :: dc, vol, aux real(wp) :: volgas, term1, Rbeq, denom real(wp) :: charvol, charpres, charvol2, charpres2 integer, dimension(3) :: cellaux integer :: i, j, k integer :: smearGrid, smearGridz - logical :: celloutside - scoord = mtn_s(bub_id,1:3,2) f_pinfl = 0._wp - !> Find current bubble cell - cell(:) = int(scoord(:)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 - end do + if (moving_lag_bubbles) then + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(bub_id,1:3,2), cell, mtn_s(bub_id,1:3,2)) + scoord = mtn_s(bub_id,1:3,2) + else + scoord = mtn_s(bub_id,1:3,2) + cell(:) = int(scoord(:)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + end if if ((lag_params%cluster_type == 1)) then !> Getting p_cell in terms of only the current cell by interpolation - !> Getting the cell volulme as Omega - if (p > 0) then - vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - else - if (cyl_coord) then - vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + + if (fd_order == 2) then ! Bilinear interpolation + + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) else - vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if end if - end if - !> Obtain bilinear interpolation coefficients, based on the current location of the bubble. - psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - if (cell(1) == (m + buff_size)) then - cell(1) = cell(1) - 1 - psi(1) = 1._wp - else if (cell(1) == (-buff_size)) then - psi(1) = 0._wp - else - if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 - psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - end if + !> Obtain bilinear interpolation coefficients, based on the current location of the bubble. + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = abs((psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - if (cell(2) == (n + buff_size)) then - cell(2) = cell(2) - 1 - psi(2) = 1._wp - else if (cell(2) == (-buff_size)) then - psi(2) = 0._wp - else - if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 - psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - end if + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = abs((psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - if (p > 0) then - psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - if (cell(3) == (p + buff_size)) then - cell(3) = cell(3) - 1 - psi(3) = 1._wp - else if (cell(3) == (-buff_size)) then - psi(3) = 0._wp + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = abs((psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) else - if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 - psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) + psi_pos(3) = 0._wp end if - else - psi(3) = 0._wp - end if - !> Perform bilinear interpolation - if (p == 0) then ! 2D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2) - else ! 3D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3) + ! Calculate bilinear basis functions for each direction For normalized coordinate xi in [0, 1], the two basis + ! functions are: phi_0(xi) = 1 - xi, phi_1(xi) = xi + + ! X-direction basis functions + psi_x(1) = 1._wp - psi_pos(1) ! Left basis function + psi_x(2) = psi_pos(1) ! Right basis function + + ! Y-direction basis functions + psi_y(1) = 1._wp - psi_pos(2) ! Left basis function + psi_y(2) = psi_pos(2) ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + psi_z(1) = 1._wp - psi_pos(3) ! Left basis function + psi_z(2) = psi_pos(3) ! Right basis function + else + psi_z(1) = 1._wp + psi_z(2) = 0._wp + end if + + !> Perform bilinear interpolation + f_pinfl = 0._wp + + if (p == 0) then ! 2D - 4 point interpolation (2x2) + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3))*psi_x(i)*psi_y(j) + end do + end do + else + do k = 1, 2 + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, & + & cell(3) + k - 1)*psi_x(i)*psi_y(j)*psi_z(k) + end do + end do + end do + end if + else if (fd_order == 4) then ! Biquadratic interpolation + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if + end if + + !> Obtain biquadratic interpolation coefficients, based on the current location of the bubble. + ! For biquadratic interpolation, we need coefficients for 3 points in each direction + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = (psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))) + + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = (psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))) + + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = (psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))) + else + psi_pos(3) = 0._wp + end if + + ! Calculate biquadratic basis functions for each direction For normalized coordinate xi in [-1, 1], the three basis + ! functions are: phi_0(xi) = xi*(xi-1)/2, phi_1(xi) = (1-xi)*(1+xi), phi_2(xi) = xi*(xi+1)/2 + + ! X-direction basis functions + xi = 2._wp*psi_pos(1) - 1._wp ! Convert to [-1, 1] range + psi_x(1) = xi*(xi - 1._wp)/2._wp ! Left basis function + psi_x(2) = (1._wp - xi)*(1._wp + xi) ! Center basis function + psi_x(3) = xi*(xi + 1._wp)/2._wp ! Right basis function + + ! Y-direction basis functions + eta = 2._wp*psi_pos(2) - 1._wp ! Convert to [-1, 1] range + psi_y(1) = eta*(eta - 1._wp)/2._wp ! Left basis function + psi_y(2) = (1._wp - eta)*(1._wp + eta) ! Center basis function + psi_y(3) = eta*(eta + 1._wp)/2._wp ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + zeta = 2._wp*psi_pos(3) - 1._wp ! Convert to [-1, 1] range + psi_z(1) = zeta*(zeta - 1._wp)/2._wp ! Left basis function + psi_z(2) = (1._wp - zeta)*(1._wp + zeta) ! Center basis function + psi_z(3) = zeta*(zeta + 1._wp)/2._wp ! Right basis function + else + psi_z(1) = 0._wp + psi_z(2) = 1._wp + psi_z(3) = 0._wp + end if + + !> Perform biquadratic interpolation + f_pinfl = 0._wp + + if (p == 0) then ! 2D - 9 point interpolation (3x3) + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3))*psi_x(i)*psi_y(j) + end do + end do + else + do k = 1, 3 + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, & + & cell(3) + k - 2)*psi_x(i)*psi_y(j)*psi_z(k) + end do + end do + end do + end if end if ! R_Omega @@ -887,56 +1127,25 @@ contains cellaux(3) = cell(3) + k - (mapCells + 1) if (p == 0) cellaux(3) = 0 - !> check if the current cell is outside the computational domain or not (including ghost cells) - celloutside = .false. - if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then - celloutside = .true. - end if - if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then - celloutside = .true. - end if + !> Obtaining the cell volume + if (p > 0) then + vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) else - if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - - if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) & - & then - celloutside = .true. - end if - end if - if (.not. celloutside) then - if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then - celloutside = .true. - end if - end if - - if (.not. celloutside) then - !> Obtaining the cell volulme - if (p > 0) then - vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) + if (cyl_coord) then + vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi else - if (cyl_coord) then - vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi - else - vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth - end if + vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth end if - !> Update values - charvol = charvol + vol - charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol - charvol2 = charvol2 + vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) - charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), & - & cellaux(3))*vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) end if + !> Update values + charvol = charvol + vol + charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol + charvol2 = charvol2 + vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) + charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), & + & cellaux(3))*vol*q_beta(1)%sf(cellaux(1), cellaux(2), cellaux(3)) end do end do end do - f_pinfl = charpres2/charvol2 vol = charvol dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp) @@ -966,126 +1175,342 @@ contains end subroutine s_get_pinf - !> Update Lagrangian bubble variables using TVD Runge-Kutta time stepping - impure subroutine s_update_lagrange_tdv_rk(stage) + !> This subroutine updates the Lagrange variables using the tvd RK time steppers. The time derivative of the bubble variables + !! must be stored at every stage to avoid precision errors. + !! @param stage Current tvd RK stage + impure subroutine s_update_lagrange_tdv_rk(q_prim_vf, bc_type, stage) - integer, intent(in) :: stage - integer :: k + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, intent(in) :: stage + integer :: k if (time_stepper == 1) then ! 1st order TVD RK + $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) - mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,1) = mtn_pos(k,1:3,1) + mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) + mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p, gas_mv, intfc_rad, intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if else if (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) - mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,2) = mtn_pos(k,1:3,1) + mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) + mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) + end if end do $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) else if (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + dt*(mtn_dposdt(k,1:3,1) + mtn_dposdt(k,1:3,2))/2._wp - mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + dt*(mtn_dveldt(k,1:3,1) + mtn_dveldt(k,1:3,2))/2._wp gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,1) = mtn_pos(k,1:3,2) + mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + dt*(mtn_dposdt(k,1:3,1) + mtn_dposdt(k,1:3,2))/2._wp + mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + dt*(mtn_dveldt(k,1:3,1) + mtn_dveldt(k,1:3,2))/2._wp + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p, gas_mv, intfc_rad, intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if end if else if (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) - mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,2) = mtn_pos(k,1:3,1) + mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*mtn_dposdt(k,1:3,1) + mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*mtn_dveldt(k,1:3,1) + end if end do $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) else if (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*(mtn_dposdt(k,1:3,1) + mtn_dposdt(k,1:3,2))/4._wp - mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*(mtn_dveldt(k,1:3,1) + mtn_dveldt(k,1:3,2))/4._wp gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,2) = mtn_pos(k,1:3,2) + mtn_pos(k,1:3,2) = mtn_pos(k,1:3,1) + dt*(mtn_dposdt(k,1:3,1) + mtn_dposdt(k,1:3,2))/4._wp + mtn_vel(k,1:3,2) = mtn_vel(k,1:3,1) + dt*(mtn_dveldt(k,1:3,1) + mtn_dveldt(k,1:3,2))/4._wp + end if end do $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) else if (stage == 3) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc ! u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, & & 2)/4._wp + intfc_draddt(k, 3)) intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, & & 2)/4._wp + intfc_dveldt(k, 3)) - mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + (2._wp/3._wp)*dt*(mtn_dposdt(k,1:3,1)/4._wp + mtn_dposdt(k,1:3, & - & 2)/4._wp + mtn_dposdt(k,1:3,3)) - mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + (2._wp/3._wp)*dt*(mtn_dveldt(k,1:3,1)/4._wp + mtn_dveldt(k,1:3, & - & 2)/4._wp + mtn_dveldt(k,1:3,3)) gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + if (moving_lag_bubbles) then + mtn_posPrev(k,1:3,1) = mtn_pos(k,1:3,2) + mtn_pos(k,1:3,1) = mtn_pos(k,1:3,1) + (2._wp/3._wp)*dt*(mtn_dposdt(k,1:3,1)/4._wp + mtn_dposdt(k,1:3, & + & 2)/4._wp + mtn_dposdt(k,1:3,3)) + mtn_vel(k,1:3,1) = mtn_vel(k,1:3,1) + (2._wp/3._wp)*dt*(mtn_dveldt(k,1:3,1)/4._wp + mtn_dveldt(k,1:3, & + & 2)/4._wp + mtn_dveldt(k,1:3,3)) + end if end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) + if (moving_lag_bubbles) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + call s_smear_voidfraction(bc_type) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p, gas_mv, gas_mg, intfc_rad, intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if end if end if end subroutine s_update_lagrange_tdv_rk - !> Locate the cell index for a given physical position + !> This subroutine enforces reflective and wall boundary conditions for EL bubbles + !! @param dest Destination for the bubble position update + impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_prim_vf) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + integer :: k, i, q + integer :: patch_id, newBubs, new_idx + real(wp) :: offset + integer, dimension(3) :: cell + + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + & gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, n_el_bubs_loc, wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of bubbles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-BC-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_bubs_loc, mtn_pos(:,:,2), mtn_posPrev(:,:,2)) + call nvtxEndRange + + call nvtxStartRange("LAG-BC-SENDRECV") + call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, & + & gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, & + & intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, & + & n_el_bubs_loc, 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + & gas_dmvdt, mtn_dposdt, mtn_dveldt, n_el_bubs_loc]') + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(private='[k, cell]') + do k = 1, n_el_bubs_loc + keep_bubble(k) = 1 + wrap_bubble_loc(k,:) = 0 + wrap_bubble_dir(k,:) = 0 + + ! Relocate bubbles at solid boundaries and delete bubbles that leave buffer regions + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 1, & + & 2) < x_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 1, 2) = x_cb(-1) + intfc_rad(k, 2) + else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 1, & + & 2) > x_cb(m) - intfc_rad(k, 2)) then + mtn_pos(k, 1, 2) = x_cb(m) - intfc_rad(k, 2) + else if (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, 2) < pcomm_coords(1)%beg .and. mtn_posPrev(k, 1, & + & 2) >= pcomm_coords(1)%beg) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = -1 + else if (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, 2) > pcomm_coords(1)%end .and. mtn_posPrev(k, 1, & + & 2) <= pcomm_coords(1)%end) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = 1 + else if (mtn_pos(k, 1, 2) >= x_cb(m)) then + keep_bubble(k) = 0 + else if (mtn_pos(k, 1, 2) < x_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 2, & + & 2) < y_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 2, 2) = y_cb(-1) + intfc_rad(k, 2) + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 2, & + & 2) > y_cb(n) - intfc_rad(k, 2)) then + mtn_pos(k, 2, 2) = y_cb(n) - intfc_rad(k, 2) + else if (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, 2) < pcomm_coords(2)%beg .and. mtn_posPrev(k, 2, & + & 2) >= pcomm_coords(2)%beg) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = -1 + else if (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, 2) > pcomm_coords(2)%end .and. mtn_posPrev(k, 2, & + & 2) <= pcomm_coords(2)%end) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = 1 + else if (mtn_pos(k, 2, 2) >= y_cb(n)) then + keep_bubble(k) = 0 + else if (mtn_pos(k, 2, 2) < y_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 3, & + & 2) < z_cb(-1) + intfc_rad(k, 2)) then + mtn_pos(k, 3, 2) = z_cb(-1) + intfc_rad(k, 2) + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. mtn_pos(k, 3, & + & 2) > z_cb(p) - intfc_rad(k, 2)) then + mtn_pos(k, 3, 2) = z_cb(p) - intfc_rad(k, 2) + else if (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, 2) < pcomm_coords(3)%beg .and. mtn_posPrev(k, 3, & + & 2) >= pcomm_coords(3)%beg) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = -1 + else if (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, 2) > pcomm_coords(3)%end .and. mtn_posPrev(k, 3, & + & 2) <= pcomm_coords(3)%end) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = 1 + else if (mtn_pos(k, 3, 2) >= z_cb(p)) then + keep_bubble(k) = 0 + else if (mtn_pos(k, 3, 2) < z_cb(-1)) then + keep_bubble(k) = 0 + end if + end if + + if (keep_bubble(k) == 1) then + ! Remove bubbles that are no longer in a liquid + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k,1:3,2), cell, mtn_s(k,1:3,2)) + + if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + keep_bubble(k) = 0 + end if + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (n_el_bubs_loc > 0) then + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + & gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, n_el_bubs_loc, wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + newBubs = 0 + do k = 1, n_el_bubs_loc + if (keep_bubble(k) == 1) then + newBubs = newBubs + 1 + if (newBubs /= k) then + call s_copy_lag_bubble(newBubs, k) + wrap_bubble_dir(newBubs,:) = wrap_bubble_dir(k,:) + wrap_bubble_loc(newBubs,:) = wrap_bubble_loc(k,:) + end if + end if + end do + n_el_bubs_loc = newBubs + + ! Handle periodic wrapping of bubbles on same processor + do k = 1, n_el_bubs_loc + if (any(wrap_bubble_dir(k,:) == 1)) then + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k, i) == 1) then + do q = 1, 2 + mtn_pos(k, i, q) = mtn_pos(k, i, q) - offset + mtn_posPrev(k, i, q) = mtn_posPrev(k, i, q) - offset + end do + else if (wrap_bubble_loc(k, i) == -1) then + do q = 1, 2 + mtn_pos(k, i, q) = mtn_pos(k, i, q) + offset + mtn_posPrev(k, i, q) = mtn_posPrev(k, i, q) + offset + end do + end if + end if + end do + end if + end do + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, gas_p, & + & gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, n_el_bubs_loc]') + call nvtxEndRange + end if + + $:GPU_PARALLEL_LOOP(private='[cell]') + do k = 1, n_el_bubs_loc + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k,1:3,2), cell, mtn_s(k,1:3,2)) + end do + + call nvtxEndRange + + end subroutine s_enforce_EL_bubbles_boundary_conditions + + !> This subroutine returns the computational coordinate of the cell for the given position. + !! @param pos Input coordinates + !! @param cell Computational coordinate of the cell + !! @param scoord Calculated particle coordinates subroutine s_locate_cell(pos, cell, scoord) + $:GPU_ROUTINE(function_name='s_locate_cell',parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: pos real(wp), dimension(3), intent(out) :: scoord integer, dimension(3), intent(inout) :: cell @@ -1095,7 +1520,7 @@ contains cell(1) = cell(1) - 1 end do - do while (pos(1) > x_cb(cell(1))) + do while (pos(1) >= x_cb(cell(1))) cell(1) = cell(1) + 1 end do @@ -1103,7 +1528,7 @@ contains cell(2) = cell(2) - 1 end do - do while (pos(2) > y_cb(cell(2))) + do while (pos(2) >= y_cb(cell(2))) cell(2) = cell(2) + 1 end do @@ -1111,7 +1536,7 @@ contains do while (pos(3) < z_cb(cell(3) - 1)) cell(3) = cell(3) - 1 end do - do while (pos(3) > z_cb(cell(3))) + do while (pos(3) >= z_cb(cell(3))) cell(3) = cell(3) + 1 end do end if @@ -1132,13 +1557,13 @@ contains end subroutine s_locate_cell - !> Transfer data into the temporal variables + !> This subroutine transfer data into the temporal variables. impure subroutine s_transfer_data_to_tmp() integer :: k $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) intfc_rad(k, 2) = intfc_rad(k, 1) @@ -1152,7 +1577,9 @@ contains end subroutine s_transfer_data_to_tmp - !> Determine if a bubble position lies within the current MPI subdomain including ghost cells + !> The purpose of this procedure is to determine if the global coordinates of the bubbles are present in the current MPI + !! processor (including ghost cells). + !! @param pos_part Spatial coordinates of the bubble function particle_in_domain(pos_part) logical :: particle_in_domain @@ -1162,22 +1589,23 @@ contains if (p == 0 .and. cyl_coord .neqv. .true.) then ! Defining a virtual z-axis that has the same dimensions as y-axis defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) & - & .and. (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) & - & .and. (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= & - & -lag_params%charwidth/2._wp)) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. (pos_part(2) & + & >= y_cb(fd_number - buff_size - 1)) .and. (pos_part(3) < lag_params%charwidth/2._wp) & + & .and. (pos_part(3) > -lag_params%charwidth/2._wp)) else ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) & - & .and. (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size & - & - 1), 0._wp))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) & + & .and. (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp))) end if ! 3D if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) & - & .and. (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) & - & .and. (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. (pos_part(2) & + & >= y_cb(fd_number - buff_size - 1)) .and. (pos_part(3) < z_cb(p + buff_size - fd_number)) & + & .and. (pos_part(3) >= z_cb(fd_number - buff_size - 1))) end if ! For symmetric and wall boundary condition @@ -1204,7 +1632,9 @@ contains end function particle_in_domain - !> Determine if a Lagrangian bubble is within the physical domain excluding ghost cells + !> The purpose of this procedure is to determine if the lagrangian bubble is located in the physical domain. The ghost cells are + !! not part of the physical domain. + !! @param pos_part Spatial coordinates of the bubble function particle_in_domain_physical(pos_part) logical :: particle_in_domain_physical @@ -1220,7 +1650,11 @@ contains end function particle_in_domain_physical - !> Compute the gradient of a scalar field using second-order central differences on a non-uniform grid + !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions following a + !! second-order central difference considering uneven widths + !! @param q Input scalar field + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction subroutine s_gradient_dir(q, dq, dir) real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:), intent(inout) :: q, dq @@ -1270,7 +1704,8 @@ contains end subroutine s_gradient_dir - !> Write Lagrangian bubble state data at each time step + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !! @param qtime Current time impure subroutine s_write_lag_particles(qtime) real(wp), intent(in) :: qtime @@ -1281,7 +1716,34 @@ contains write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' file_loc = trim(case_dir) // '/D/' // trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A16,A14,8A16)" + else + FMT = "(A24,A14,8A24)" + end if + + if (.not. file_exist) then + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', 'coreVaporMass', 'coreVaporConcentration', & + & 'radius', 'interfaceVelocity', 'corePressure' + else + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='append') + end if + + end subroutine s_write_lag_particles + + !> @Brief Subroutine that opens the file to write the evolution of the lagrangian bubbles on each time step. + impure subroutine s_open_lag_bubble_evol() + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + character(LEN=25) :: FMT + + write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' + file_loc = trim(case_dir) // '/D/' // trim(file_loc) + call my_inquire(trim(file_loc), file_exist) if (precision == 1) then FMT = "(A16,A14,8A16)" @@ -1290,13 +1752,25 @@ contains end if if (.not. file_exist) then - open (11, FILE=trim(file_loc), form='formatted', position='rewind') - write (11, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', 'coreVaporMass', 'coreVaporConcentration', 'radius', & - & 'interfaceVelocity', 'corePressure' + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', 'coreVaporMass', 'coreVaporConcentration', & + & 'radius', 'interfaceVelocity', 'corePressure' else - open (11, FILE=trim(file_loc), form='formatted', position='append') + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='append') end if + end subroutine s_open_lag_bubble_evol + + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !! @param q_time Current time + impure subroutine s_write_lag_bubble_evol(qtime) + + real(wp), intent(in) :: qtime + integer :: k, ios + character(LEN=25) :: FMT + character(LEN=path_len + 2*name_len) :: file_loc, path + logical :: file_exist + if (precision == 1) then FMT = "(F16.8,I14,8F16.8)" else @@ -1304,37 +1778,53 @@ contains end if ! Cycle through list - do k = 1, nBubs - write (11, FMT) qtime, lag_id(k, 1), mtn_pos(k, 1, 1), mtn_pos(k, 2, 1), mtn_pos(k, 3, 1), gas_mv(k, 1), gas_mv(k, & - & 1)/(gas_mv(k, 1) + gas_mg(k)), intfc_rad(k, 1), intfc_vel(k, 1), gas_p(k, 1) + do k = 1, n_el_bubs_loc + write (LAG_EVOL_ID, FMT) qtime, lag_id(k, 1), mtn_pos(k, 1, 1), mtn_pos(k, 2, 1), mtn_pos(k, 3, 1), gas_mv(k, 1), & + & gas_mv(k, 1)/(gas_mv(k, 1) + gas_mg(k)), intfc_rad(k, 1), intfc_vel(k, 1), gas_p(k, 1) end do - close (11) + end subroutine s_write_lag_bubble_evol - end subroutine s_write_lag_particles + impure subroutine s_close_lag_bubble_evol - !> Write void fraction statistics at each time step - impure subroutine s_write_void_evol(qtime) + close (LAG_EVOL_ID) + + end subroutine s_close_lag_bubble_evol + + subroutine s_open_void_evol - real(wp), intent(in) :: qtime - real(wp) :: volcell, voltot - real(wp) :: lag_void_max, lag_void_avg, lag_vol - real(wp) :: void_max_glb, void_avg_glb, vol_glb - integer :: i, j, k character(LEN=path_len + 2*name_len) :: file_loc logical :: file_exist if (proc_rank == 0) then write (file_loc, '(A)') 'voidfraction.dat' file_loc = trim(case_dir) // '/D/' // trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + call my_inquire(trim(file_loc), file_exist) if (.not. file_exist) then - open (12, FILE=trim(file_loc), form='formatted', position='rewind') + open (LAG_VOID_ID, FILE=trim(file_loc), form='formatted', position='rewind') + ! write (12, *) 'currentTime, averageVoidFraction, ', & 'maximumVoidFraction, totalParticlesVolume' write (12, *) + ! 'The averageVoidFraction value does ', & 'not reflect the real void fraction in the cloud since the ', & 'cells + ! which do not have bubbles are not accounted' else - open (12, FILE=trim(file_loc), form='formatted', position='append') + open (LAG_VOID_ID, FILE=trim(file_loc), form='formatted', position='append') end if end if + end subroutine s_open_void_evol + + !> Subroutine that writes some useful statistics related to the volume fraction of the particles (void fraction) in the + !! computational domain on each time step. + !! @param qtime Current time + impure subroutine s_write_void_evol(qtime) + + real(wp), intent(in) :: qtime + real(wp) :: volcell, voltot + real(wp) :: lag_void_max, lag_void_avg, lag_vol + real(wp) :: void_max_glb, void_avg_glb, vol_glb + integer :: i, j, k + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp @@ -1370,13 +1860,19 @@ contains if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol if (proc_rank == 0) then - write (12, '(6X,4e24.8)') qtime, lag_void_avg, lag_void_max, voltot - close (12) + write (LAG_VOID_ID, '(6X,4e24.8)') qtime, lag_void_avg, lag_void_max, voltot end if end subroutine s_write_void_evol - !> Write restart files for the Lagrangian bubble solver + subroutine s_close_void_evol + + if (proc_rank == 0) close (LAG_VOID_ID) + + end subroutine s_close_void_evol + + !> Subroutine that writes the restarting files for the particles in the lagrangian solver. + !! @param t_step Current time step impure subroutine s_write_restart_lag_bubbles(t_step) ! Generic string used to store the address of a particular file @@ -1393,14 +1889,14 @@ contains integer(KIND=MPI_OFFSET_KIND) :: disp integer :: view integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer, allocatable :: proc_bubble_counts(:) + integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_bubble_counts real(wp), dimension(1:1,1:lag_io_vars) :: dummy - dummy = 0._wp bub_id = 0._wp - if (nBubs /= 0) then - do k = 1, nBubs + if (n_el_bubs_loc /= 0) then + do k = 1, n_el_bubs_loc if (particle_in_domain_physical(mtn_pos(k,1:3,1))) then bub_id = bub_id + 1 end if @@ -1409,8 +1905,6 @@ contains if (.not. parallel_io) return - allocate (proc_bubble_counts(num_procs)) - lsizes(1) = bub_id lsizes(2) = lag_io_vars @@ -1458,26 +1952,22 @@ contains if (bub_id > 0) then allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id),1:lag_io_vars)) - i = 1 - do k = 1, nBubs - if (particle_in_domain_physical(mtn_pos(k,1:3,1))) then - MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) - MPI_IO_DATA_lag_bubbles(i,2:4) = mtn_pos(k,1:3,1) - MPI_IO_DATA_lag_bubbles(i,5:7) = mtn_posPrev(k,1:3,1) - MPI_IO_DATA_lag_bubbles(i,8:10) = mtn_vel(k,1:3,1) - MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1) - MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1) - MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k) - MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k) - MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k) - MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k) - MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1) - MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1) - MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) - MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) - MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) - i = i + 1 - end if + do k = 1, n_el_bubs_loc + MPI_IO_DATA_lag_bubbles(k, 1) = real(lag_id(k, 1)) + MPI_IO_DATA_lag_bubbles(k,2:4) = mtn_pos(k,1:3,1) + MPI_IO_DATA_lag_bubbles(k,5:7) = mtn_posPrev(k,1:3,1) + MPI_IO_DATA_lag_bubbles(k,8:10) = mtn_vel(k,1:3,1) + MPI_IO_DATA_lag_bubbles(k, 11) = intfc_rad(k, 1) + MPI_IO_DATA_lag_bubbles(k, 12) = intfc_vel(k, 1) + MPI_IO_DATA_lag_bubbles(k, 13) = bub_R0(k) + MPI_IO_DATA_lag_bubbles(k, 14) = Rmax_stats(k) + MPI_IO_DATA_lag_bubbles(k, 15) = Rmin_stats(k) + MPI_IO_DATA_lag_bubbles(k, 16) = bub_dphidt(k) + MPI_IO_DATA_lag_bubbles(k, 17) = gas_p(k, 1) + MPI_IO_DATA_lag_bubbles(k, 18) = gas_mv(k, 1) + MPI_IO_DATA_lag_bubbles(k, 19) = gas_mg(k) + MPI_IO_DATA_lag_bubbles(k, 20) = gas_betaT(k) + MPI_IO_DATA_lag_bubbles(k, 21) = gas_betaC(k) end do call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, MPI_ORDER_FORTRAN, mpi_p, view, ierr) @@ -1510,20 +2000,18 @@ contains call MPI_FILE_CLOSE(ifile, ierr) end if - - deallocate (proc_bubble_counts) #endif end subroutine s_write_restart_lag_bubbles - !> Compute the maximum and minimum radius of each bubble + !> This procedure calculates the maximum and minimum radius of each bubble. subroutine s_calculate_lag_bubble_stats() integer :: k $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', reductionOp='[MAX, MIN]', & & copy='[Rmax_glb, Rmin_glb]') - do k = 1, nBubs + do k = 1, n_el_bubs_loc Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) @@ -1533,17 +2021,15 @@ contains end subroutine s_calculate_lag_bubble_stats - !> Write the maximum and minimum radius statistics for each bubble - impure subroutine s_write_lag_bubble_stats() + impure subroutine s_open_lag_bubble_stats() - integer :: k character(LEN=path_len + 2*name_len) :: file_loc - character(len=20) :: FMT + character(LEN=20) :: FMT + logical :: file_exist write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' file_loc = trim(case_dir) // '/D/' // trim(file_loc) - - $:GPU_UPDATE(host='[Rmax_glb, Rmin_glb]') + call my_inquire(trim(file_loc), file_exist) if (precision == 1) then FMT = "(A10,A14,5A16)" @@ -1551,68 +2037,89 @@ contains FMT = "(A10,A14,5A24)" end if - open (13, FILE=trim(file_loc), form='formatted', position='rewind') - write (13, FMT) 'proc_rank', 'particleID', 'x', 'y', 'z', 'Rmax_glb', 'Rmin_glb' - - if (precision == 1) then - FMT = "(I10,I14,5F16.8)" + if (.not. file_exist) then + open (LAG_STATS_ID, FILE=trim(file_loc), form='formatted', position='rewind') + write (LAG_STATS_ID, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' else - FMT = "(I10,I14,5F24.16)" + open (LAG_STATS_ID, FILE=trim(file_loc), form='formatted', position='append') end if - do k = 1, nBubs - write (13, FMT) proc_rank, lag_id(k, 1), mtn_pos(k, 1, 1), mtn_pos(k, 2, 1), mtn_pos(k, 3, 1), Rmax_stats(k), & - & Rmin_stats(k) - end do + end subroutine s_open_lag_bubble_stats - close (13) + !> Subroutine that writes the maximum and minimum radius of each bubble. + impure subroutine s_write_lag_bubble_stats() - end subroutine s_write_lag_bubble_stats + integer :: k + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT - !> Remove a specific Lagrangian bubble when dt becomes too small - impure subroutine s_remove_lag_bubble(bub_id) + $:GPU_UPDATE(host='[Rmax_glb, Rmin_glb]') - integer, intent(in) :: bub_id - integer :: i + if (precision == 1) then + FMT = "(I10,I14,5F16.8)" + else + FMT = "(I10,I14,5F24.16)" + end if - $:GPU_LOOP(parallelism='[seq]') - do i = bub_id, nBubs - 1 - lag_id(i, 1) = lag_id(i + 1, 1) - bub_R0(i) = bub_R0(i + 1) - Rmax_stats(i) = Rmax_stats(i + 1) - Rmin_stats(i) = Rmin_stats(i + 1) - gas_mg(i) = gas_mg(i + 1) - gas_betaT(i) = gas_betaT(i + 1) - gas_betaC(i) = gas_betaC(i + 1) - bub_dphidt(i) = bub_dphidt(i + 1) - gas_p(i,1:2) = gas_p(i + 1,1:2) - gas_mv(i,1:2) = gas_mv(i + 1,1:2) - intfc_rad(i,1:2) = intfc_rad(i + 1,1:2) - intfc_vel(i,1:2) = intfc_vel(i + 1,1:2) - mtn_pos(i,1:3,1:2) = mtn_pos(i + 1,1:3,1:2) - mtn_posPrev(i,1:3,1:2) = mtn_posPrev(i + 1,1:3,1:2) - mtn_vel(i,1:3,1:2) = mtn_vel(i + 1,1:3,1:2) - mtn_s(i,1:3,1:2) = mtn_s(i + 1,1:3,1:2) - intfc_draddt(i,1:lag_num_ts) = intfc_draddt(i + 1,1:lag_num_ts) - intfc_dveldt(i,1:lag_num_ts) = intfc_dveldt(i + 1,1:lag_num_ts) - gas_dpdt(i,1:lag_num_ts) = gas_dpdt(i + 1,1:lag_num_ts) - gas_dmvdt(i,1:lag_num_ts) = gas_dmvdt(i + 1,1:lag_num_ts) + do k = 1, n_el_bubs_loc + write (LAG_STATS_ID, FMT) proc_rank, lag_id(k, 1), mtn_pos(k, 1, 1), mtn_pos(k, 2, 1), mtn_pos(k, 3, 1), & + & Rmax_stats(k), Rmin_stats(k) end do - nBubs = nBubs - 1 - $:GPU_UPDATE(device='[nBubs]') - - end subroutine s_remove_lag_bubble + end subroutine s_write_lag_bubble_stats - !> Finalize the Lagrangian bubble solver + subroutine s_close_lag_bubble_stats + + close (LAG_STATS_ID) + + end subroutine s_close_lag_bubble_stats + + !> The purpose of this subroutine is to remove one specific particle if dt is too small. + !! @param bub_id Particle id + impure subroutine s_copy_lag_bubble(dest, src) + + integer, intent(in) :: src, dest + + bub_R0(dest) = bub_R0(src) + Rmax_stats(dest) = Rmax_stats(src) + Rmin_stats(dest) = Rmin_stats(src) + gas_mg(dest) = gas_mg(src) + gas_betaT(dest) = gas_betaT(src) + gas_betaC(dest) = gas_betaC(src) + bub_dphidt(dest) = bub_dphidt(src) + lag_id(dest, 1) = lag_id(src, 1) + gas_p(dest,1:2) = gas_p(src,1:2) + gas_mv(dest,1:2) = gas_mv(src,1:2) + intfc_rad(dest,1:2) = intfc_rad(src,1:2) + intfc_vel(dest,1:2) = intfc_vel(src,1:2) + mtn_vel(dest,1:3,1:2) = mtn_vel(src,1:3,1:2) + mtn_s(dest,1:3,1:2) = mtn_s(src,1:3,1:2) + mtn_pos(dest,1:3,1:2) = mtn_pos(src,1:3,1:2) + mtn_posPrev(dest,1:3,1:2) = mtn_posPrev(src,1:3,1:2) + intfc_draddt(dest,1:lag_num_ts) = intfc_draddt(src,1:lag_num_ts) + intfc_dveldt(dest,1:lag_num_ts) = intfc_dveldt(src,1:lag_num_ts) + gas_dpdt(dest,1:lag_num_ts) = gas_dpdt(src,1:lag_num_ts) + gas_dmvdt(dest,1:lag_num_ts) = gas_dmvdt(src,1:lag_num_ts) + mtn_dposdt(dest,1:3,1:lag_num_ts) = mtn_dposdt(src,1:3,1:lag_num_ts) + mtn_dveldt(dest,1:3,1:lag_num_ts) = mtn_dveldt(src,1:3,1:lag_num_ts) + + end subroutine s_copy_lag_bubble + + !> The purpose of this subroutine is to deallocate variables impure subroutine s_finalize_lagrangian_solver() integer :: i + if (lag_params%write_void_evol) call s_close_void_evol + if (lag_params%write_bubbles) call s_close_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_close_lag_bubble_stats() + do i = 1, q_beta_idx @:DEALLOCATE(q_beta(i)%sf) + @:DEALLOCATE(kahan_comp(i)%sf) end do @:DEALLOCATE(q_beta) + @:DEALLOCATE(kahan_comp) ! Deallocating space @:DEALLOCATE(lag_id) @@ -1638,6 +2145,28 @@ contains @:DEALLOCATE(mtn_dposdt) @:DEALLOCATE(mtn_dveldt) + @:DEALLOCATE(keep_bubble) + @:DEALLOCATE(wrap_bubble_loc, wrap_bubble_dir) + + ! Deallocate pressure gradient arrays and FD coefficients + if (lag_params%vel_model > 0 .and. lag_params%pressure_force) then + @:DEALLOCATE(grad_p_x) + @:DEALLOCATE(fd_coeff_x_pgrad) + if (n > 0) then + @:DEALLOCATE(grad_p_y) + @:DEALLOCATE(fd_coeff_y_pgrad) + if (p > 0) then + @:DEALLOCATE(grad_p_z) + @:DEALLOCATE(fd_coeff_z_pgrad) + end if + end if + end if + + ! Deallocate cell list arrays + @:DEALLOCATE(cell_list_start) + @:DEALLOCATE(cell_list_count) + @:DEALLOCATE(cell_list_idx) + end subroutine s_finalize_lagrangian_solver end module m_bubbles_EL diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index f1f80980b0..23bfc81b31 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -11,166 +11,272 @@ module m_bubbles_EL_kernels implicit none + ! Cell-centered pressure gradients (precomputed for translational motion) + real(wp), allocatable, dimension(:,:,:) :: grad_p_x, grad_p_y, grad_p_z + $:GPU_DECLARE(create='[grad_p_x, grad_p_y, grad_p_z]') + + ! Finite-difference coefficients for pressure gradient computation + real(wp), allocatable, dimension(:,:) :: fd_coeff_x_pgrad + real(wp), allocatable, dimension(:,:) :: fd_coeff_y_pgrad + real(wp), allocatable, dimension(:,:) :: fd_coeff_z_pgrad + $:GPU_DECLARE(create='[fd_coeff_x_pgrad, fd_coeff_y_pgrad, fd_coeff_z_pgrad]') + + ! Cell list for bubble-to-cell mapping (rebuilt each RK stage before smearing) + integer, allocatable, dimension(:,:,:) :: cell_list_start ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:,:,:) :: cell_list_count ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:) :: cell_list_idx ! (1:nBubs_glb) sorted bubble indices + $:GPU_DECLARE(create='[cell_list_start, cell_list_count, cell_list_idx]') + contains - !> Smear the Lagrangian bubble effects onto the Eulerian grid using the selected kernel - subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + !> The purpose of this subroutine is to smear the strength of the lagrangian bubbles into the Eulerian framework using different + !! approaches. + !! @param nBubs Number of lagrangian bubbles in the current domain + !! @param lbk_rad Radius of the bubbles + !! @param lbk_vel Interface velocity of the bubbles + !! @param lbk_s Computational coordinates of the bubbles + !! @param lbk_pos Spatial coordinates of the bubbles + !! @param updatedvar Eulerian variable to be updated + subroutine s_smoothfunction(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb,1:3,1:2), intent(in) :: lbk_s, lbk_pos real(wp), dimension(1:lag_params%nBubs_glb,1:2), intent(in) :: lbk_rad, lbk_vel type(scalar_field), dimension(:), intent(inout) :: updatedvar + type(scalar_field), dimension(:), intent(inout) :: kcomp smoothfunc:select case(lag_params%smooth_type) case (1) - call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + call s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) case (2) - call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) + call s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar, kcomp) end select smoothfunc end subroutine s_smoothfunction - !> Apply the delta kernel function to map bubble effects onto the containing cell - subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar) + !> Builds a sorted cell list mapping each interior cell (0:m,0:n,0:p) to its resident bubbles. Uses a counting-sort on the host + !! (O(nBubs + N_cells)). Must be called before s_gaussian each RK stage. + !! @param nBubs Number of lagrangian bubbles in the current domain + !! @param lbk_s Computational coordinates of the bubbles + subroutine s_build_cell_list(nBubs, lbk_s) integer, intent(in) :: nBubs real(wp), dimension(1:lag_params%nBubs_glb,1:3,1:2), intent(in) :: lbk_s - real(wp), dimension(1:lag_params%nBubs_glb,1:2), intent(in) :: lbk_rad, lbk_vel - type(scalar_field), dimension(:), intent(inout) :: updatedvar - integer, dimension(3) :: cell - real(wp) :: strength_vel, strength_vol - real(wp) :: addFun1, addFun2, addFun3 - real(wp) :: volpart, Vol + integer :: l, ci, cj, ck, idx real(wp), dimension(3) :: s_coord - integer :: l - $:GPU_PARALLEL_LOOP(private='[l, s_coord, cell]') + ! Bring current bubble positions to host + + $:GPU_UPDATE(host='[lbk_s]') + + ! Pass 1: zero counts and count bubbles per cell + cell_list_count = 0 do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp s_coord(1:3) = lbk_s(l,1:3,2) - call s_get_cell(s_coord, cell) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + ci = int(s_coord(1)) + cj = int(s_coord(2)) + ck = int(s_coord(3)) + ! Clamp to interior (bubbles should already be in [0:m,0:n,0:p]) + ci = max(0, min(ci, m)) + cj = max(0, min(cj, n)) + ck = max(0, min(ck, p)) + cell_list_count(ci, cj, ck) = cell_list_count(ci, cj, ck) + 1 + end do - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + ! Prefix sum to compute start indices (1-based into cell_list_idx) + idx = 1 + do ck = 0, p + do cj = 0, n + do ci = 0, m + cell_list_start(ci, cj, ck) = idx + idx = idx + cell_list_count(ci, cj, ck) + end do + end do + end do - ! Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) - - ! Update time derivative of void fraction - addFun2 = strength_vel/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) - - ! Product of two smeared functions Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) - end if + ! Pass 2: place bubble indices into cell_list_idx Temporarily reuse cell_list_count as a running offset + cell_list_count = 0 + do l = 1, nBubs + s_coord(1:3) = lbk_s(l,1:3,2) + ci = int(s_coord(1)) + cj = int(s_coord(2)) + ck = int(s_coord(3)) + ci = max(0, min(ci, m)) + cj = max(0, min(cj, n)) + ck = max(0, min(ck, p)) + cell_list_idx(cell_list_start(ci, cj, ck) + cell_list_count(ci, cj, ck)) = l + cell_list_count(ci, cj, ck) = cell_list_count(ci, cj, ck) + 1 end do - $:END_GPU_PARALLEL_LOOP() - end subroutine s_deltafunc + ! Send cell list arrays to GPU + $:GPU_UPDATE(device='[cell_list_start, cell_list_count, cell_list_idx]') - !> Apply the Gaussian kernel function to smear bubble effects onto surrounding cells - subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar) + end subroutine s_build_cell_list + + !> Cell-centric delta-function smearing using the cell list (no GPU atomics). Each bubble only affects the cell it resides in. + !! The outer GPU loop iterates over interior cells and sums contributions from resident bubbles. + subroutine s_deltafunc(nBubs, lbk_rad, lbk_vel, lbk_s, updatedvar, kcomp) integer, intent(in) :: nBubs - real(wp), dimension(1:lag_params%nBubs_glb,1:3,1:2), intent(in) :: lbk_s, lbk_pos + real(wp), dimension(1:lag_params%nBubs_glb,1:3,1:2), intent(in) :: lbk_s real(wp), dimension(1:lag_params%nBubs_glb,1:2), intent(in) :: lbk_rad, lbk_vel type(scalar_field), dimension(:), intent(inout) :: updatedvar - real(wp), dimension(3) :: center - integer, dimension(3) :: cell - real(wp) :: stddsv + type(scalar_field), dimension(:), intent(inout) :: kcomp real(wp) :: strength_vel, strength_vol - real(wp), dimension(3) :: nodecoord - real(wp) :: addFun1, addFun2, addFun3 - real(wp) :: func, func2, volpart - integer, dimension(3) :: cellaux - real(wp), dimension(3) :: s_coord - integer :: l, i, j, k - logical :: celloutside - integer :: smearGrid, smearGridz - - smearGrid = mapCells - (-mapCells) + 1 ! Include the cell that contains the bubble (3+1+3) - smearGridz = smearGrid - if (p == 0) smearGridz = 1 - - $:GPU_PARALLEL_LOOP(private='[nodecoord, l, s_coord, cell, center]', copyin='[smearGrid, smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l,1:3,2) - center(1:2) = lbk_pos(l,1:2,2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux, nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - ! Check if the cells intended to smear the bubbles in are in the computational domain and redefine the cells - ! for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) - if (p == 0) cellaux(3) = 0 + real(wp) :: volpart, Vol + real(wp) :: y_kahan, t_kahan + integer :: i, j, k, lb, bub_idx + + $:GPU_PARALLEL_LOOP(collapse=3, & + & private='[i, j, k, lb, bub_idx, volpart, Vol, strength_vel, strength_vol, y_kahan, t_kahan]') + do k = 0, p + do j = 0, n + do i = 0, m + ! Cell volume + if (num_dims == 2) then + Vol = dx(i)*dy(j)*lag_params%charwidth + if (cyl_coord) Vol = dx(i)*dy(j)*y_cc(j)*2._wp*pi + else + Vol = dx(i)*dy(j)*dz(k) + end if + + ! Loop over bubbles in this cell + $:GPU_LOOP(parallelism='[seq]') + do lb = cell_list_start(i, j, k), cell_list_start(i, j, k) + cell_list_count(i, j, k) - 1 + bub_idx = cell_list_idx(lb) + + volpart = 4._wp/3._wp*pi*lbk_rad(bub_idx, 2)**3._wp + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(bub_idx, 2)**2._wp*lbk_vel(bub_idx, 2) + + ! Kahan summation for void fraction + y_kahan = real(strength_vol/Vol, kind=wp) - kcomp(1)%sf(i, j, k) + t_kahan = updatedvar(1)%sf(i, j, k) + y_kahan + kcomp(1)%sf(i, j, k) = (t_kahan - updatedvar(1)%sf(i, j, k)) - y_kahan + updatedvar(1)%sf(i, j, k) = t_kahan + + ! Kahan summation for time derivative of void fraction + y_kahan = real(strength_vel/Vol, kind=wp) - kcomp(2)%sf(i, j, k) + t_kahan = updatedvar(2)%sf(i, j, k) + y_kahan + kcomp(2)%sf(i, j, k) = (t_kahan - updatedvar(2)%sf(i, j, k)) - y_kahan + updatedvar(2)%sf(i, j, k) = t_kahan + + ! Product of two smeared functions + if (lag_params%cluster_type >= 4) then + y_kahan = real((strength_vol*strength_vel)/Vol, kind=wp) - kcomp(5)%sf(i, j, k) + t_kahan = updatedvar(5)%sf(i, j, k) + y_kahan + kcomp(5)%sf(i, j, k) = (t_kahan - updatedvar(5)%sf(i, j, k)) - y_kahan + updatedvar(5)%sf(i, j, k) = t_kahan end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() - ! Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = updatedvar(1)%sf(cellaux(1), cellaux(2), & - & cellaux(3)) + real(addFun1, kind=stp) + end subroutine s_deltafunc - ! Update time derivative of void fraction - addFun2 = func*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = updatedvar(2)%sf(cellaux(1), cellaux(2), & - & cellaux(3)) + real(addFun2, kind=stp) + !> Cell-centric gaussian smearing using the cell list (no GPU atomics). Each grid cell accumulates contributions from nearby + !! bubbles looked up via cell_list_start/count/idx. + subroutine s_gaussian(nBubs, lbk_rad, lbk_vel, lbk_s, lbk_pos, updatedvar, kcomp) - ! Product of two smeared functions Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = updatedvar(5)%sf(cellaux(1), cellaux(2), & - & cellaux(3)) + real(addFun3, kind=stp) - end if + integer, intent(in) :: nBubs + real(wp), dimension(1:lag_params%nBubs_glb,1:3,1:2), intent(in) :: lbk_s, lbk_pos + real(wp), dimension(1:lag_params%nBubs_glb,1:2), intent(in) :: lbk_rad, lbk_vel + type(scalar_field), dimension(:), intent(inout) :: updatedvar + type(scalar_field), dimension(:), intent(inout) :: kcomp + real(wp), dimension(3) :: center, nodecoord, s_coord + integer, dimension(3) :: cell, cellijk + real(wp) :: stddsv, volpart + real(wp) :: strength_vel, strength_vol + real(wp) :: func, func2 + real(wp) :: y_kahan, t_kahan + integer :: i, j, k, di, dj, dk, lb, bub_idx + integer :: di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end + integer :: smear_x_beg, smear_x_end + integer :: smear_y_beg, smear_y_end + integer :: smear_z_beg, smear_z_end + + ! Extended grid range for smearing (includes buffer cells for MPI communication) + + smear_x_beg = -mapCells - 1 + smear_x_end = m + mapCells + 1 + smear_y_beg = merge(-mapCells - 1, 0, n > 0) + smear_y_end = merge(n + mapCells + 1, n, n > 0) + smear_z_beg = merge(-mapCells - 1, 0, p > 0) + smear_z_end = merge(p + mapCells + 1, p, p > 0) + + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, di, dj, dk, lb, bub_idx, center, nodecoord, s_coord, cell, cellijk, & + & stddsv, volpart, strength_vel, strength_vol, func, func2, y_kahan, t_kahan, di_beg, di_end, dj_beg, & + & dj_end, dk_beg, dk_end]', copyin='[smear_x_beg, smear_x_end, smear_y_beg, smear_y_end, smear_z_beg, & + & smear_z_end]') + do k = smear_z_beg, smear_z_end + do j = smear_y_beg, smear_y_end + do i = smear_x_beg, smear_x_end + cellijk(1) = i + cellijk(2) = j + cellijk(3) = k + + nodecoord(1) = x_cc(i) + nodecoord(2) = y_cc(j) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(k) + + ! Neighbor cell range clamped to interior [0:m, 0:n, 0:p] + di_beg = max(i - mapCells, 0) + di_end = min(i + mapCells, m) + dj_beg = max(j - mapCells, 0) + dj_end = min(j + mapCells, n) + dk_beg = max(k - mapCells, 0) + dk_end = min(k + mapCells, p) + + $:GPU_LOOP(parallelism='[seq]') + do dk = dk_beg, dk_end + $:GPU_LOOP(parallelism='[seq]') + do dj = dj_beg, dj_end + $:GPU_LOOP(parallelism='[seq]') + do di = di_beg, di_end + $:GPU_LOOP(parallelism='[seq]') + do lb = cell_list_start(di, dj, dk), cell_list_start(di, dj, dk) + cell_list_count(di, dj, dk) - 1 + bub_idx = cell_list_idx(lb) + + ! Bubble properties + volpart = 4._wp/3._wp*pi*lbk_rad(bub_idx, 2)**3._wp + s_coord(1:3) = lbk_s(bub_idx,1:3,2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(bub_idx, 2)**2._wp*lbk_vel(bub_idx, 2) + + center(1:2) = lbk_pos(bub_idx,1:2,2) + center(3) = 0._wp + if (p > 0) center(3) = lbk_pos(bub_idx, 3, 2) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + ! Kahan summation for void fraction + y_kahan = real(func*strength_vol, kind=wp) - kcomp(1)%sf(i, j, k) + t_kahan = updatedvar(1)%sf(i, j, k) + y_kahan + kcomp(1)%sf(i, j, k) = (t_kahan - updatedvar(1)%sf(i, j, k)) - y_kahan + updatedvar(1)%sf(i, j, k) = t_kahan + + ! Kahan summation for time derivative of void fraction + y_kahan = real(func*strength_vel, kind=wp) - kcomp(2)%sf(i, j, k) + t_kahan = updatedvar(2)%sf(i, j, k) + y_kahan + kcomp(2)%sf(i, j, k) = (t_kahan - updatedvar(2)%sf(i, j, k)) - y_kahan + updatedvar(2)%sf(i, j, k) = t_kahan + + if (lag_params%cluster_type >= 4) then + call s_applygaussian(center, cellijk, nodecoord, stddsv, 1._wp, func2) + y_kahan = real(func2*strength_vol*strength_vel, kind=wp) - kcomp(5)%sf(i, j, k) + t_kahan = updatedvar(5)%sf(i, j, k) + y_kahan + kcomp(5)%sf(i, j, k) = (t_kahan - updatedvar(5)%sf(i, j, k)) - y_kahan + updatedvar(5)%sf(i, j, k) = t_kahan + end if + end do + end do + end do end do end do end do @@ -179,7 +285,7 @@ contains end subroutine s_gaussian - !> Evaluate the Gaussian kernel at a grid node for a given bubble center + !> The purpose of this subroutine is to apply the gaussian kernel function for each bubble (Maeda and Colonius, 2018)). subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', cray_inline=True) @@ -190,8 +296,9 @@ contains real(wp), intent(in) :: stddsv real(wp), intent(in) :: strength_idx real(wp), intent(out) :: func + integer :: i real(wp) :: distance - real(wp) :: theta, dtheta, L2, dzp, Lz2 + real(wp) :: theta, dtheta, L2, dzp, Lz2, zc real(wp) :: Nr, Nr_count distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + (center(3) - nodecoord(3))**2._wp) @@ -222,28 +329,26 @@ contains & **(3._wp*(strength_idx + 1._wp)) end do else - !> 2D cartesian function: - ! We smear particles considering a virtual depth (lag_params%charwidth) - theta = 0._wp - Nr = ceiling(lag_params%charwidth/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1))) - Nr_count = 1._wp - mapCells*1._wp - dzp = y_cb(cellaux(2) + 1) - y_cb(cellaux(2)) - Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp - distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) - func = dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp - do while (Nr_count < Nr - 1._wp + ((mapCells - 1)*1._wp)) - Nr_count = Nr_count + 1._wp - Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp + !> 2D cartesian function: Equation (48) from Madea and Colonius 2018 + ! We smear particles considering a virtual depth (lag_params%charwidth) with lag_params%charNz cells + dzp = (lag_params%charwidth/(lag_params%charNz + 1._wp)) + + func = 0._wp + do i = 0, lag_params%charNz + zc = (-lag_params%charwidth/2._wp + dzp*(0.5_wp + i)) ! Center of virtual cell i in z-direction + Lz2 = (center(3) - zc)**2._wp distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) - func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv) & - & **(3._wp*(strength_idx + 1._wp)) + func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp end do end if end if end subroutine s_applygaussian - !> Check if the current cell is outside the computational domain including ghost cells + !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost + !! cells). + !! @param cellaux Tested cell to smear the bubble effect in. + !! @param celloutside If true, then cellaux is outside the computational domain. subroutine s_check_celloutside(cellaux, celloutside) $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', cray_inline=True) @@ -275,7 +380,9 @@ contains end subroutine s_check_celloutside - !> Relocate cells that intersect a symmetric boundary + !> This subroutine relocates the current cell, if it intersects a symmetric boundary. + !! @param cell Cell of the current bubble + !! @param cellaux Cell to map the bubble effect in. subroutine s_shift_cell_symmetric_bc(cellaux, cell) $:GPU_ROUTINE(function_name='s_shift_cell_symmetric_bc', parallelism='[seq]', cray_inline=True) @@ -312,6 +419,9 @@ contains end subroutine s_shift_cell_symmetric_bc !> Calculates the standard deviation of the bubble being smeared in the Eulerian framework. + !! @param cell Cell where the bubble is located + !! @param volpart Volume of the bubble + !! @param stddsv Standard deviaton subroutine s_compute_stddsv(cell, volpart, stddsv) $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', cray_inline=True) @@ -338,7 +448,7 @@ contains end if !> Compute Standard deviaton - if (((volpart/charvol) > 0.5_wp*lag_params%valmaxvoid) .or. (lag_params%smooth_type == 1)) then + if ((volpart/charvol) > 0.5_wp*lag_params%valmaxvoid .or. (lag_params%smooth_type == 1)) then rad = (3._wp*volpart/(4._wp*pi))**(1._wp/3._wp) stddsv = 1._wp*lag_params%epsilonb*max(chardist, rad) else @@ -347,7 +457,11 @@ contains end subroutine s_compute_stddsv - !> Compute the characteristic cell volume + !> The purpose of this procedure is to calculate the characteristic cell volume + !! @param cellx x-direction cell index + !! @param celly y-direction cell index + !! @param cellz z-direction cell index + !! @param Charvol Characteristic volume subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', cray_inline=True) @@ -367,7 +481,9 @@ contains end subroutine s_get_char_vol - !> Convert bubble computational coordinates from real to integer cell indices + !> This subroutine transforms the computational coordinates of the bubble from real type into integer. + !! @param s_cell Computational coordinates of the bubble, real type + !! @param get_cell Computational coordinates of the bubble, integer type subroutine s_get_cell(s_cell, get_cell) $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', cray_inline=True) @@ -383,4 +499,221 @@ contains end subroutine s_get_cell + !> Precomputes cell-centered pressure gradients (dp/dx, dp/dy, dp/dz) at all cell centers using finite-difference coefficients + !! of the specified order. This avoids scattered memory accesses to the pressure field when computing translational bubble + !! forces. + !! @param q_prim_vf Primitive variables (pressure is at index E_idx) + subroutine s_compute_pressure_gradients(q_prim_vf) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + integer :: i, j, k, r + + ! dp/dx at all cell centers + + $:GPU_PARALLEL_LOOP(private='[i, j, k, r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_x(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_x(i, j, k) = grad_p_x(i, j, k) + q_prim_vf(E_idx)%sf(i + r, j, k)*fd_coeff_x_pgrad(r, i) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + ! dp/dy at all cell centers + if (n > 0) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_y(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_y(i, j, k) = grad_p_y(i, j, k) + q_prim_vf(E_idx)%sf(i, j + r, k)*fd_coeff_y_pgrad(r, j) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + ! dp/dz at all cell centers + if (p > 0) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, r]', collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + grad_p_z(i, j, k) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + grad_p_z(i, j, k) = grad_p_z(i, j, k) + q_prim_vf(E_idx)%sf(i, j, k + r)*fd_coeff_z_pgrad(r, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_compute_pressure_gradients + + !! This function interpolates the velocity of Eulerian field at the position of the bubble. + !! @param pos Position of the bubble in directiion i + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return v Interpolated velocity at the position of the bubble + function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp) :: v + real(wp), dimension(fd_order + 1) :: xi, eta, L + + if (fd_order == 2) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 1) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(2) = x_cc(cell(1)) + eta(2) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) + xi(3) = x_cc(cell(1) + 1) + eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + else if (i == 2) then + xi(1) = y_cc(cell(2) - 1) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(2) = y_cc(cell(2)) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) + xi(3) = y_cc(cell(2) + 1) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + else if (i == 3) then + xi(1) = z_cc(cell(3) - 1) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(2) = z_cc(cell(3)) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) + xi(3) = z_cc(cell(3) + 1) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + end if + + L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + else if (fd_order == 4) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 2) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 2, cell(2), cell(3)) + xi(2) = x_cc(cell(1) - 1) + eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(3) = x_cc(cell(1)) + eta(3) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) + xi(4) = x_cc(cell(1) + 1) + eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + xi(5) = x_cc(cell(1) + 2) + eta(5) = q_prim_vf(momxb)%sf(cell(1) + 2, cell(2), cell(3)) + else if (i == 2) then + xi(1) = y_cc(cell(2) - 2) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 2, cell(3)) + xi(2) = y_cc(cell(2) - 1) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(3) = y_cc(cell(2)) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) + xi(4) = y_cc(cell(2) + 1) + eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + xi(5) = y_cc(cell(2) + 2) + eta(5) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 2, cell(3)) + else if (i == 3) then + xi(1) = z_cc(cell(3) - 2) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 2) + xi(2) = z_cc(cell(3) - 1) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(3) = z_cc(cell(3)) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) + xi(4) = z_cc(cell(3) + 1) + eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + xi(5) = z_cc(cell(3) + 2) + eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2) + end if + + L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(4)) & + & *(xi(2) - xi(5))) + L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(4)) & + & *(xi(2) - xi(5))) + L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5)))/((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4)) & + & *(xi(3) - xi(5))) + L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3)) & + & *(xi(4) - xi(5))) + L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3)) & + & *(xi(5) - xi(4))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5) + end if + + end function f_interpolate_velocity + + !! This function calculates the force on a bubble based on the pressure gradient, velocity, and drag model. + !! @param pos Position of the bubble in direction i + !! @param rad Radius of the bubble + !! @param rdot Radial velocity of the bubble + !! @param vel Velocity of the bubble + !! @param mg Mass of the gas in the bubble + !! @param mv Mass of the liquid in the bubble + !! @param Re Reynolds number + !! @param rho Density of the fluid + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return a Acceleration of the bubble in direction i + function f_get_bubble_force(pos, rad, rdot, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(force) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: pos, rad, rdot, mg, mv, Re, rho, vel + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp) :: dp, vol, force + real(wp) :: v_rel + + if (fd_order > 1) then + v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf) + else + v_rel = vel - q_prim_vf(momxb + i - 1)%sf(cell(1), cell(2), cell(3)) + end if + + force = 0._wp + + if (lag_params%drag_model == 1) then ! Free slip Stokes drag + force = force - (4._wp*pi*rad*v_rel)/Re + else if (lag_params%drag_model == 2) then ! No slip Stokes drag + force = force - (6._wp*pi*rad*v_rel)/Re + else if (lag_params%drag_model == 3) then ! Levich drag + force = force - (12._wp*pi*rad*v_rel)/Re + end if + + if (lag_pressure_force) then + ! Use precomputed cell-centered pressure gradients + if (i == 1) then + dp = grad_p_x(cell(1), cell(2), cell(3)) + else if (i == 2) then + dp = grad_p_y(cell(1), cell(2), cell(3)) + else if (i == 3) then + dp = grad_p_z(cell(1), cell(2), cell(3)) + end if + + vol = (4._wp/3._wp)*pi*(rad**3._wp) + force = force - vol*dp + end if + + if (lag_params%gravity_force) then + force = force + (mg + mv)*accel_bf(i) + end if + + end function f_get_bubble_force + end module m_bubbles_EL_kernels diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index da1dcd9109..88153df8ee 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -20,7 +20,7 @@ module m_compute_levelset contains - !> Dispatch level-set distance and normal computations for all ghost points based on patch geometry type + !> @brief Dispatches level-set distance and normal computations for all ghost points based on their patch geometry type. impure subroutine s_apply_levelset(gps, num_gps) type(ghost_point), dimension(:), intent(inout) :: gps @@ -73,24 +73,27 @@ contains end subroutine s_apply_levelset - !> Compute the signed distance and outward normal from a ghost point to a circular immersed boundary + !> @brief Computes the signed distance and outward normal from a ghost point to a circular immersed boundary. subroutine s_circle_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') type(ghost_point), intent(inout) :: gp real(wp) :: radius, dist - real(wp), dimension(2) :: center + real(wp), dimension(2) :: center !< x and y coordinates in local IB frame real(wp), dimension(3) :: dist_vec integer :: i, j, ib_patch_id !< Loop index variables + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) radius = patch_ib(ib_patch_id)%radius - dist_vec(1) = x_cc(i) - patch_ib(ib_patch_id)%x_centroid - real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - dist_vec(2) = y_cc(j) - patch_ib(ib_patch_id)%y_centroid - real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + dist_vec(1) = x_cc(i) - patch_ib(ib_patch_id)%x_centroid - real(gp%x_periodicity, & + & wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + dist_vec(2) = y_cc(j) - patch_ib(ib_patch_id)%y_centroid - real(gp%y_periodicity, & + & wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) dist_vec(3) = 0._wp dist = sqrt(sum(dist_vec**2)) @@ -103,7 +106,7 @@ contains end subroutine s_circle_levelset - !> Compute the signed distance and outward normal from a ghost point to a 2D NACA airfoil surface + !> @brief Computes the signed distance and outward normal from a ghost point to a 2D NACA airfoil surface. subroutine s_airfoil_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -116,18 +119,19 @@ contains real(wp), dimension(1:2) :: center real(wp), dimension(1:3,1:3) :: rotation, inverse_rotation integer :: i, j, k, ib_patch_id !< Loop index variables + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix(:,:) offset(:) = patch_ib(ib_patch_id)%centroid_offset(:) - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinate + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset if (xy_local(2) >= 0._wp) then @@ -182,7 +186,8 @@ contains end subroutine s_airfoil_levelset - !> Compute the signed distance and outward normal from a ghost point to a 3D extruded airfoil surface + !> @brief Computes the signed distance and outward normal from a ghost point to a 3D extruded airfoil surface including spanwise + !! end caps. subroutine s_3d_airfoil_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -196,14 +201,15 @@ contains real(wp), dimension(1:3,1:3) :: rotation, inverse_rotation real(wp) :: length_z integer :: i, j, k, l, ib_patch_id !< Loop index variables + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) l = gp%loc(3) - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) lz = patch_ib(ib_patch_id)%length_z inverse_rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix(:,:) @@ -213,7 +219,7 @@ contains z_min = -lz/2 xyz_local = [x_cc(i), y_cc(j), z_cc(l)] - center - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + xyz_local = matmul(inverse_rotation, xyz_local) xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset if (xyz_local(2) >= 0._wp) then @@ -296,14 +302,15 @@ contains integer :: i, j, k !< Loop index variables integer :: idx !< Shortest path direction indicator integer :: ib_patch_id !< patch ID + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) length_x = patch_ib(ib_patch_id)%length_x length_y = patch_ib(ib_patch_id)%length_y - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix(:,:) @@ -348,7 +355,8 @@ contains end subroutine s_rectangle_levelset - !> Compute the signed distance and outward normal from a ghost point to an elliptical immersed boundary + !> @brief Computes the signed distance and outward normal from a ghost point to an elliptical immersed boundary via a quadratic + !! projection. subroutine s_ellipse_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -363,14 +371,15 @@ contains integer :: i, j, k !< Loop index variables integer :: idx !< Shortest path direction indicator integer :: ib_patch_id !< patch ID + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) length_x = patch_ib(ib_patch_id)%length_x length_y = patch_ib(ib_patch_id)%length_y - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix(:,:) @@ -398,7 +407,7 @@ contains end subroutine s_ellipse_levelset - !> Compute the signed distance and outward normal from a ghost point to a cuboid immersed boundary + !> @brief Computes the signed distance and outward normal from a ghost point to the nearest face of a cuboid immersed boundary. subroutine s_cuboid_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -413,6 +422,7 @@ contains real(wp), dimension(1:3,1:3) :: rotation, inverse_rotation integer :: i, j, k !< Loop index variables integer :: ib_patch_id !< patch ID + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) @@ -422,9 +432,9 @@ contains length_y = patch_ib(ib_patch_id)%length_y length_z = patch_ib(ib_patch_id)%length_z - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) inverse_rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(ib_patch_id)%rotation_matrix(:,:) @@ -437,7 +447,7 @@ contains Back = -length_z/2 xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinate + xyz_local = matmul(inverse_rotation, xyz_local) dist_left = Left - xyz_local(1) dist_right = xyz_local(1) - Right @@ -485,7 +495,7 @@ contains end subroutine s_cuboid_levelset - !> Compute the signed distance and outward normal from a ghost point to a spherical immersed boundary + !> @brief Computes the signed distance and outward normal from a ghost point to a spherical immersed boundary. subroutine s_sphere_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -494,15 +504,16 @@ contains real(wp) :: radius, dist real(wp), dimension(3) :: dist_vec, center, periodicity integer :: i, j, k, ib_patch_id !< Loop index variables + ib_patch_id = gp%ib_patch_id i = gp%loc(1) j = gp%loc(2) k = gp%loc(3) radius = patch_ib(ib_patch_id)%radius - periodicity(1) = real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - periodicity(2) = real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - periodicity(3) = real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + periodicity(1) = real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + periodicity(2) = real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + periodicity(3) = real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) center(1) = patch_ib(ib_patch_id)%x_centroid center(2) = patch_ib(ib_patch_id)%y_centroid center(3) = patch_ib(ib_patch_id)%z_centroid @@ -521,7 +532,8 @@ contains end subroutine s_sphere_levelset - !> Compute the signed distance and outward normal from a ghost point to a cylindrical immersed boundary + !> @brief Computes the signed distance and outward normal from a ghost point to a cylindrical immersed boundary surface and end + !! caps. subroutine s_cylinder_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -542,9 +554,9 @@ contains k = gp%loc(3) radius = patch_ib(ib_patch_id)%radius - center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(ib_patch_id)%x_centroid + real(gp%x_periodicity, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(ib_patch_id)%y_centroid + real(gp%y_periodicity, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(ib_patch_id)%z_centroid + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(ib_patch_id)%length_x length(2) = patch_ib(ib_patch_id)%length_y length(3) = patch_ib(ib_patch_id)%length_z @@ -570,7 +582,7 @@ contains end if xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + xyz_local = matmul(inverse_rotation, xyz_local) ! get distance to flat edge of cylinder side_pos = dot_product(xyz_local, dist_sides_vec) @@ -596,6 +608,7 @@ contains end subroutine s_cylinder_levelset !> The STL patch is a 2/3D geometry that is imported from an STL file. + !! @param gp Ghost point to compute levelset for subroutine s_model_levelset(gp) $:GPU_ROUTINE(parallelism='[seq]') @@ -618,12 +631,12 @@ contains center = 0._wp if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid + real(gp%x_periodicity, & - & wp)*(x_domain%end - x_domain%beg) + & wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) if (.not. f_is_default(patch_ib(patch_id)%y_centroid)) center(2) = patch_ib(patch_id)%y_centroid + real(gp%y_periodicity, & - & wp)*(y_domain%end - y_domain%beg) + & wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) if (p > 0) then if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid & - & + real(gp%z_periodicity, wp)*(z_domain%end - z_domain%beg) + & + real(gp%z_periodicity, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) end if inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 086a218289..5161b244dc 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -30,19 +30,8 @@ module m_data_output & s_finalize_data_output_module integer :: ib_state_unit = -1 !< I/O unit for IB state binary file - real(wp), allocatable, dimension(:,:,:) :: icfl_sf !< ICFL stability criterion - real(wp), allocatable, dimension(:,:,:) :: vcfl_sf !< VCFL stability criterion - real(wp), allocatable, dimension(:,:,:) :: ccfl_sf !< CCFL stability criterion - real(wp), allocatable, dimension(:,:,:) :: Rc_sf !< Rc stability criterion real(wp), public, allocatable, dimension(:,:) :: c_mass - $:GPU_DECLARE(create='[icfl_sf, vcfl_sf, ccfl_sf, Rc_sf, c_mass]') - - real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids - real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids - real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids - real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids - $:GPU_DECLARE(create='[icfl_max_loc, icfl_max_glb, vcfl_max_loc, vcfl_max_glb]') - $:GPU_DECLARE(create='[ccfl_max_loc, ccfl_max_glb, Rc_min_loc, Rc_min_glb]') + $:GPU_DECLARE(create='[c_mass]') !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps !> @{ @@ -57,6 +46,12 @@ module m_data_output contains !> Write data files. Dispatch subroutine that replaces procedure pointer. + !! @param q_cons_vf Conservative variables + !! @param q_T_sf Temperature scalar field + !! @param q_prim_vf Primitive variables + !! @param t_step Current time step + !! @param bc_type Boundary condition type + !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -74,13 +69,17 @@ contains end subroutine s_write_data_files - !> Open the run-time information file and write the stability criteria table header + !> The purpose of this subroutine is to open a new or pre- existing run-time information file and append to it the basic header + !! information relevant to current simulation. In general, this requires generating a table header for those stability criteria + !! which will be written at every time-step. impure subroutine s_open_run_time_information_file character(LEN=name_len), parameter :: file_name = 'run_time.inf' !< Name of the run-time information file character(LEN=path_len + name_len) :: file_path !< Relative path to a file in the case directory character(LEN=8) :: file_date !< Creation date of the run-time information file + ! Opening the run-time information file + file_path = trim(case_dir) // '/' // trim(file_name) open (3, FILE=trim(file_path), form='formatted', STATUS='replace') @@ -98,25 +97,32 @@ contains write (3, '(A)') ''; write (3, '(A)') '' + ! Generating table header for the stability criteria to be outputted write (3, '(13X,A9,13X,A10,13X,A10,13X,A10)', advance="no") trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max') if (viscous) then write (3, '(13X,A10,13X,A16)', advance="no") trim('VCFL Max'), trim('Rc Min') end if + if (bubbles_lagrange) then + write (3, '(13X,A10)', advance="no") trim('N Bubbles') + end if + write (3, *) ! new line end subroutine s_open_run_time_information_file - !> Open center-of-mass data files for writing + !> This opens a formatted data file where the root processor can write out the CoM information impure subroutine s_open_com_files() character(len=path_len + 3*name_len) :: file_path !< Relative path to the CoM file in the case directory integer :: i !< Generic loop iterator do i = 1, num_fluids + ! Generating the relative path to the CoM data file write (file_path, '(A,I0,A)') '/fluid', i, '_com.dat' file_path = trim(case_dir) // trim(file_path) + ! Creating the formatted data file and setting up its structure open (i + 120, file=trim(file_path), form='formatted', position='append', status='unknown') if (n == 0) then write (i + 120, '(A)') ' Non-Dimensional Time ' // ' Total Mass ' // ' x-loc ' // ' Total Volume ' @@ -133,17 +139,19 @@ contains end subroutine s_open_com_files - !> Open flow probe data files for writing + !> This opens a formatted data file where the root processor can write out flow probe information impure subroutine s_open_probe_files - character(LEN=path_len + 3*name_len) :: file_path !< Relative path to the probe data file in the case directory + character(LEN=path_len + 3*name_len) :: file_path !< Relative path to the grid and conservative variables data files integer :: i !< Generic loop iterator logical :: file_exist do i = 1, num_probes + ! Generating the relative path to the data file write (file_path, '(A,I0,A)') '/D/probe', i, '_prim.dat' file_path = trim(case_dir) // trim(file_path) + ! Creating the formatted data file and setting up its structure inquire (file=trim(file_path), exist=file_exist) if (file_exist) then @@ -164,7 +172,6 @@ contains end subroutine s_open_probe_files - !> Open the immersed boundary state file for binary output impure subroutine s_open_ib_state_file character(len=path_len + 2*name_len) :: file_loc @@ -177,7 +184,11 @@ contains end subroutine s_open_ib_state_file - !> Write stability criteria extrema to the run-time information file at the given time step + !> The goal of the procedure is to output to the run-time information file the stability criteria extrema in the entire + !! computational domain and at the given time-step. Moreover, the subroutine is also in charge of tracking these stability + !! criteria extrema over all time-steps. + !! @param q_prim_vf Cell-average primitive variables + !! @param t_step Current time step impure subroutine s_write_run_time_information(q_prim_vf, t_step) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -191,19 +202,27 @@ contains real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity #:endif - real(wp) :: vel_sum !< Cell-avg. velocity sum - real(wp) :: pres !< Cell-avg. pressure - real(wp) :: gamma !< Cell-avg. sp. heat ratio - real(wp) :: pi_inf !< Cell-avg. liquid stiffness function - real(wp) :: qv !< Cell-avg. internal energy reference value - real(wp) :: c !< Cell-avg. sound speed - real(wp) :: H !< Cell-avg. enthalpy - real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers + real(wp) :: vel_sum !< Cell-avg. velocity sum + real(wp) :: pres !< Cell-avg. pressure + real(wp) :: gamma !< Cell-avg. sp. heat ratio + real(wp) :: pi_inf !< Cell-avg. liquid stiffness function + real(wp) :: qv !< Cell-avg. internal energy reference value + real(wp) :: c !< Cell-avg. sound speed + real(wp) :: H !< Cell-avg. enthalpy + real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers integer :: j, k, l - + real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids + real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids + real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids + real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids + real(wp) :: icfl, vcfl, Rc + + icfl_max_loc = 0._wp + vcfl_max_loc = 0._wp + Rc_min_loc = huge(1.0_wp) ! Computing Stability Criteria at Current Time-step - - $:GPU_PARALLEL_LOOP(collapse=3, private='[j, k, l, vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j, k, l, vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv, icfl, & + & vcfl, Rc]', reduction='[[icfl_max_loc, vcfl_max_loc], [Rc_min_loc]]', reductionOp='[max, min]') do l = 0, p do k = 0, n do j = 0, m @@ -212,49 +231,32 @@ contains call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c, qv) if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl, vcfl, Rc) else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl) end if + + icfl_max_loc = max(icfl_max_loc, icfl) + vcfl_max_loc = max(vcfl_max_loc, merge(vcfl, 0.0_wp, viscous)) + Rc_min_loc = min(Rc_min_loc, merge(Rc, huge(1.0_wp), viscous)) end do end do end do $:END_GPU_PARALLEL_LOOP() + ! end: Computing Stability Criteria at Current Time-step -#ifdef _CRAYFTN - $:GPU_UPDATE(host='[icfl_sf]') - - if (viscous) then - $:GPU_UPDATE(host='[vcfl_sf, Rc_sf]') - end if - - icfl_max_loc = maxval(icfl_sf) - - if (viscous) then - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) - end if -#else - #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') - icfl_max_loc = maxval(icfl_sf) - #:endcall GPU_PARALLEL - if (viscous .or. dummy) then - #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) - #:endcall GPU_PARALLEL - end if -#endif - + ! Determining global stability criteria extrema at current time-step if (num_procs > 1) then - call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, Rc_min_loc, icfl_max_glb, vcfl_max_glb, & - & Rc_min_glb) + call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, vcfl_max_loc, Rc_min_loc, n_el_bubs_loc, icfl_max_glb, & + & vcfl_max_glb, Rc_min_glb, n_el_bubs_glb) else icfl_max_glb = icfl_max_loc if (viscous) vcfl_max_glb = vcfl_max_loc if (viscous) Rc_min_glb = Rc_min_loc + if (bubbles_lagrange) n_el_bubs_glb = n_el_bubs_loc end if + ! Determining the stability criteria extrema over all the time-steps if (icfl_max_glb > icfl_max) icfl_max = icfl_max_glb if (viscous) then @@ -262,6 +264,7 @@ contains if (Rc_min_glb < Rc_min) Rc_min = Rc_min_glb end if + ! Outputting global stability criteria extrema at current time-step if (proc_rank == 0) then write (3, '(13X,I9,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") t_step, dt, mytime, icfl_max_glb @@ -269,6 +272,10 @@ contains write (3, '(13X,F10.6,13X,ES16.6)', advance="no") vcfl_max_glb, Rc_min_glb end if + if (bubbles_lagrange) then + write (3, '(13X,I10)', advance="no") n_el_bubs_glb + end if + write (3, *) ! new line if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then @@ -286,13 +293,25 @@ contains call s_mpi_abort('VCFL is greater than 1.0. Exiting.') end if end if + + if (bubbles_lagrange) then + if (n_el_bubs_glb == 0) then + call s_mpi_abort('No Lagrangian bubbles remain in the domain. Exiting.') + end if + end if end if call s_mpi_barrier() end subroutine s_write_run_time_information - !> Write grid and conservative variable data files in serial format + !> The goal of this subroutine is to output the grid and conservative variables data files for given time-step. + !! @param q_cons_vf Cell-average conservative variables + !! @param q_T_sf Temperature scalar field + !! @param q_prim_vf Cell-average primitive variables + !! @param t_step Current time-step + !! @param bc_type Boundary condition type + !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -301,14 +320,18 @@ contains integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta type(integer_field), dimension(1:num_dims,-1:1), intent(in) :: bc_type - character(LEN=path_len + 2*name_len) :: t_step_dir !< Relative path to the current time-step directory - character(LEN=path_len + 3*name_len) :: file_path !< Relative path to the grid and conservative variables data files - logical :: file_exist !< Logical used to check existence of current time-step directory + character(LEN=path_len + 2*name_len) :: t_step_dir + character(LEN=path_len + 3*name_len) :: file_path !< Relative path to the grid and conservative variables data files + logical :: file_exist !< Logical used to check existence of current time-step directory character(LEN=15) :: FMT integer :: i, j, k, l, r - real(wp) :: gamma, lit_gamma, pi_inf, qv !< Temporary EOS params + real(wp) :: gamma, lit_gamma, pi_inf, qv !< Temporary EOS params + + ! Creating or overwriting the time-step root directory write (t_step_dir, '(A,I0,A,I0)') trim(case_dir) // '/p_all' + + ! Creating or overwriting the current time-step directory write (t_step_dir, '(a,i0,a,i0)') trim(case_dir) // '/p_all/p', proc_rank, '/', t_step file_path = trim(t_step_dir) // '/.' @@ -316,11 +339,13 @@ contains if (file_exist) call s_delete_directory(trim(t_step_dir)) call s_create_directory(trim(t_step_dir)) + ! Writing the grid data file in the x-direction file_path = trim(t_step_dir) // '/x_cb.dat' open (2, FILE=trim(file_path), form='unformatted', STATUS='new') write (2) x_cb(-1:m); close (2) + ! Writing the grid data files in the y- and z-directions if (n > 0) then file_path = trim(t_step_dir) // '/y_cb.dat' @@ -335,6 +360,7 @@ contains end if end if + ! Writing the conservative variables data files do i = 1, sys_size write (file_path, '(A,I0,A)') trim(t_step_dir) // '/q_cons_vf', i, '.dat' @@ -378,6 +404,11 @@ contains ! Writing the IB markers if (ib) then call s_write_serial_ib_data(t_step) + ! write (file_path, '(A,I0,A)') trim(t_step_dir)//'/ib.dat' + + ! open (2, FILE=trim(file_path), & FORM='unformatted', & STATUS='new') + + ! write (2) ib_markers%sf(0:m, 0:n, 0:p); close (2) end if gamma = gammas(1) @@ -391,6 +422,7 @@ contains FMT = "(2F40.14)" end if + ! writing an output directory write (t_step_dir, '(A,I0,A,I0)') trim(case_dir) // '/D' file_path = trim(t_step_dir) // '/.' @@ -409,9 +441,10 @@ contains end if end if + ! 1D if (n == 0 .and. p == 0) then if (model_eqns == 2 .and. (.not. igr)) then - do i = 1, sys_size + do i = 1, sys_size ! TODO: check if sys_size is correct write (file_path, '(A,I0,A,I2.2,A,I6.6,A)') trim(t_step_dir) // '/prim.', i, '.', proc_rank, '.', t_step, '.dat' open (2, FILE=trim(file_path)) @@ -471,6 +504,7 @@ contains FMT = "(3F40.14)" end if + ! 2D if ((n > 0) .and. (p == 0)) then do i = 1, sys_size write (file_path, '(A,I0,A,I2.2,A,I6.6,A)') trim(t_step_dir) // '/cons.', i, '.', proc_rank, '.', t_step, '.dat' @@ -528,7 +562,7 @@ contains end if if (prim_vars_wrt .and. (.not. igr)) then - do i = 1, sys_size + do i = 1, sys_size ! TODO: check if sys_size is correct write (file_path, '(A,I0,A,I2.2,A,I6.6,A)') trim(t_step_dir) // '/prim.', i, '.', proc_rank, '.', t_step, '.dat' open (2, FILE=trim(file_path)) @@ -555,6 +589,7 @@ contains FMT = "(4F40.14)" end if + ! 3D if (p > 0) then do i = 1, sys_size write (file_path, '(A,I0,A,I2.2,A,I6.6,A)') trim(t_step_dir) // '/cons.', i, '.', proc_rank, '.', t_step, '.dat' @@ -622,7 +657,7 @@ contains end if if (prim_vars_wrt .and. (.not. igr)) then - do i = 1, sys_size + do i = 1, sys_size ! TODO: check if sys_size is correct write (file_path, '(A,I0,A,I2.2,A,I6.6,A)') trim(t_step_dir) // '/prim.', i, '.', proc_rank, '.', t_step, '.dat' open (2, FILE=trim(file_path)) @@ -648,7 +683,11 @@ contains end subroutine s_write_serial_data_files - !> Write grid and conservative variable data files in parallel via MPI I/O + !> The goal of this subroutine is to output the grid and conservative variables data files for given time-step. + !! @param q_cons_vf Cell-average conservative variables + !! @param t_step Current time-step + !! @param bc_type Boundary condition type + !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -669,6 +708,7 @@ contains character(len=10) :: t_step_string integer :: i !< Generic loop iterator integer :: alt_sys !< Altered system size for the lagrangian subgrid bubble model + ! Down sampling variables integer :: m_ds, n_ds, p_ds integer :: m_glb_ds, n_glb_ds, p_glb_ds @@ -687,6 +727,7 @@ contains if (file_per_process) then call s_int_to_str(t_step, t_step_string) + ! Initialize MPI data I/O if (down_sample) then call s_initialize_mpi_data_ds(q_cons_temp_ds) else @@ -708,8 +749,10 @@ contains call s_mpi_barrier() call DelayFileAccess(proc_rank) + ! Initialize MPI data I/O call s_initialize_mpi_data(q_cons_vf) + ! Open the file to write all flow variables write (file_loc, '(I0,A,i7.7,A)') t_step, '_', proc_rank, '.dat' file_loc = trim(case_dir) // '/restart_data/lustre_' // trim(t_step_string) // trim(mpiiofs) // trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -719,17 +762,20 @@ contains call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) if (down_sample) then + ! Size of local arrays data_size = (m_ds + 3)*(n_ds + 3)*(p_ds + 3) m_glb_save = m_glb_ds + 1 n_glb_save = n_glb_ds + 1 p_glb_save = p_glb_ds + 1 else + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) m_glb_save = m_glb + 1 n_glb_save = n_glb + 1 p_glb_save = p_glb + 1 end if + ! Resize some integers so MPI can write even the biggest files m_MOK = int(m_glb_save + 1, MPI_OFFSET_KIND) n_MOK = int(n_glb_save + 1, MPI_OFFSET_KIND) p_MOK = int(p_glb_save + 1, MPI_OFFSET_KIND) @@ -739,11 +785,13 @@ contains NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) if (bubbles_euler) then - do i = 1, sys_size + ! Write the data for each variable + do i = 1, sys_size ! TODO: check if sys_size is correct var_MOK = int(i, MPI_OFFSET_KIND) call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size*mpi_io_type, mpi_io_p, status, ierr) end do + ! Write pb and mv for non-polytropic qbmm if (qbmm .and. .not. polytropic) then do i = sys_size + 1, sys_size + 2*nb*nnode var_MOK = int(i, MPI_OFFSET_KIND) @@ -769,6 +817,8 @@ contains call MPI_FILE_CLOSE(ifile, ierr) else + ! Initialize MPI data I/O + if (ib) then call s_initialize_mpi_data(q_cons_vf, ib_markers) else if (present(beta)) then @@ -785,8 +835,10 @@ contains end if call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) + ! Resize some integers so MPI can write even the biggest files m_MOK = int(m_glb + 1, MPI_OFFSET_KIND) n_MOK = int(n_glb + 1, MPI_OFFSET_KIND) p_MOK = int(p_glb + 1, MPI_OFFSET_KIND) @@ -796,18 +848,22 @@ contains NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) if (bubbles_euler) then - do i = 1, sys_size + ! Write the data for each variable + do i = 1, sys_size ! TODO: check if sys_size is correct var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size*mpi_io_type, mpi_io_p, status, ierr) end do + ! Write pb and mv for non-polytropic qbmm if (qbmm .and. .not. polytropic) then do i = sys_size + 1, sys_size + 2*nb*nnode var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) @@ -818,6 +874,7 @@ contains do i = 1, sys_size ! TODO: check if sys_size is correct var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) @@ -825,9 +882,11 @@ contains end do end if + ! Correction for the lagrangian subgrid bubble model if (present(beta)) then var_MOK = int(sys_size + 1, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(sys_size + 1), 'native', mpi_info_int, ierr) @@ -836,21 +895,33 @@ contains call MPI_FILE_CLOSE(ifile, ierr) + ! Write ib data if (ib) then call s_write_parallel_ib_data(t_step) + ! write (file_loc, '(A)') 'ib.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) call + ! MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & mpi_info_int, ifile, ierr) + + ! var_MOK = int(sys_size + 1, MPI_OFFSET_KIND) disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1 + + ! int(t_step/t_step_save)) + + ! call MPI_FILE_SET_VIEW(ifile, disp, MPI_INTEGER, MPI_IO_IB_DATA%view, & 'native', mpi_info_int, ierr) call + ! MPI_FILE_WRITE_ALL(ifile, MPI_IO_IB_DATA%var%sf, data_size, & MPI_INTEGER, status, ierr) call + ! MPI_FILE_CLOSE(ifile, ierr) end if end if #endif end subroutine s_write_parallel_data_files - !> Write immersed boundary marker data to a serial (per-processor) unformatted file + !> @brief Writes immersed boundary marker data to a serial (per-processor) unformatted file. subroutine s_write_serial_ib_data(time_step) integer, intent(in) :: time_step character(LEN=path_len + 2*name_len) :: file_path character(LEN=path_len + 2*name_len) :: t_step_dir + ! Creating or overwriting the time-step root directory + write (t_step_dir, '(A,I0,A,I0)') trim(case_dir) // '/p_all' write (t_step_dir, '(a,i0,a,i0)') trim(case_dir) // '/p_all/p', proc_rank, '/', time_step write (file_path, '(A,I0,A)') trim(t_step_dir) // '/ib_data.dat' @@ -862,7 +933,7 @@ contains end subroutine s_write_serial_ib_data - !> Write immersed boundary marker data in parallel using MPI I/O + !> @brief Writes immersed boundary marker data in parallel using MPI I/O. subroutine s_write_parallel_ib_data(time_step) integer, intent(in) :: time_step @@ -877,6 +948,7 @@ contains $:GPU_UPDATE(host='[ib_markers%sf]') + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) m_MOK = int(m_glb + 1, MPI_OFFSET_KIND) n_MOK = int(n_glb + 1, MPI_OFFSET_KIND) @@ -899,7 +971,7 @@ contains end subroutine s_write_parallel_ib_data - !> Dispatch immersed boundary data output to the serial or parallel writer + !> @brief Dispatches immersed boundary data output to the serial or parallel writer. subroutine s_write_ib_data_file(time_step) integer, intent(in) :: time_step @@ -912,7 +984,7 @@ contains end subroutine s_write_ib_data_file - !> Write IB state records to D/ib_state.dat (rank 0 only) + !> @brief Writes IB state records to D/ib_state.dat. Must be called only on rank 0. impure subroutine s_write_ib_state_file() integer :: i @@ -924,7 +996,9 @@ contains end subroutine s_write_ib_state_file - !> Write center-of-mass data at the current time step + !> This writes a formatted data file where the root processor can write out the CoM information + !! @param t_step Current time-step + !! @param c_mass_in Center of mass information impure subroutine s_write_com_files(t_step, c_mass_in) integer, intent(in) :: t_step @@ -932,6 +1006,8 @@ contains integer :: i !< Generic loop iterator real(wp) :: nondim_time !< Non-dimensional time + ! Non-dimensional time calculation + if (t_step_old /= dflt_int) then nondim_time = real(t_step + t_step_old, wp)*dt else @@ -957,7 +1033,10 @@ contains end subroutine s_write_com_files - !> Write flow probe data at the current time step + !> This writes a formatted data file for the flow probe information + !! @param t_step Current time-step + !! @param q_cons_vf Conservative variables + !! @param accel_mag Acceleration magnitude information impure subroutine s_write_probe_files(t_step, q_cons_vf, accel_mag) integer, intent(in) :: t_step @@ -1004,6 +1083,7 @@ contains T = dflt_T_guess + ! Non-dimensional time calculation if (time_stepper == 23) then nondim_time = mytime else @@ -1015,6 +1095,7 @@ contains end if do i = 1, num_probes + ! Zeroing out flow variables for all processors rho = 0._wp do s = 1, num_vels vel(s) = 0._wp @@ -1041,6 +1122,7 @@ contains end do damage_state = 0._wp + ! Find probe location in terms of indices on a specific processor if (n == 0) then if ((probe(i)%x >= x_cb(-1)) .and. (probe(i)%x <= x_cb(m))) then do s = -1, m @@ -1494,17 +1576,21 @@ contains end subroutine s_write_probe_files - !> Write footer with stability criteria extrema and run-time to the information file, then close it + !> The goal of this subroutine is to write to the run-time information file basic footer information applicable to the current + !! computation and to close the file when done. The footer contains the stability criteria extrema over all of the time-steps + !! and the simulation run-time. impure subroutine s_close_run_time_information_file real(wp) :: run_time !< Run-time of the simulation + ! Writing the footer of and closing the run-time information file + write (3, '(A)') ' ' write (3, '(A)') '' write (3, '(A,F9.6)') 'ICFL Max: ', icfl_max if (viscous) write (3, '(A,F9.6)') 'VCFL Max: ', vcfl_max - if (viscous) write (3, '(A,F10.6)') 'Rc Min: ', Rc_min + if (viscous) write (3, '(A,ES16.6)') 'Rc Min: ', Rc_min call cpu_time(run_time) @@ -1518,7 +1604,7 @@ contains !> Closes communication files impure subroutine s_close_com_files() - integer :: i !< Generic loop iterator + integer :: i do i = 1, num_fluids close (i + 120) @@ -1529,7 +1615,7 @@ contains !> Closes probe files impure subroutine s_close_probe_files - integer :: i !< Generic loop iterator + integer :: i do i = 1, num_probes close (i + 30) @@ -1537,28 +1623,25 @@ contains end subroutine s_close_probe_files - !> Close the immersed boundary state file impure subroutine s_close_ib_state_file close (ib_state_unit) end subroutine s_close_ib_state_file - !> Initialize the data output module + !> The computation of parameters, the allocation of memory, the association of pointers and/or the execution of any other + !! procedures that are necessary to setup the module. impure subroutine s_initialize_data_output_module integer :: i, m_ds, n_ds, p_ds + ! Allocating/initializing ICFL, VCFL, CCFL and Rc stability criteria + if (run_time_info) then - @:ALLOCATE(icfl_sf(0:m, 0:n, 0:p)) icfl_max = 0._wp - if (viscous) then - @:ALLOCATE(vcfl_sf(0:m, 0:n, 0:p)) - @:ALLOCATE(Rc_sf (0:m, 0:n, 0:p)) - vcfl_max = 0._wp - Rc_min = 1.e3_wp + Rc_min = 1.e12_wp end if end if @@ -1588,13 +1671,6 @@ contains @:DEALLOCATE(c_mass) end if - if (run_time_info) then - @:DEALLOCATE(icfl_sf) - if (viscous) then - @:DEALLOCATE(vcfl_sf, Rc_sf) - end if - end if - if (down_sample) then do i = 1, sys_size deallocate (q_cons_temp_ds(i)%sf) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 079d36ebc2..c0d3d9c2ae 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -14,6 +14,7 @@ module m_global_parameters use m_derived_types use m_helper_basic + ! $:USE_GPU_MODULE() implicit none @@ -26,8 +27,10 @@ module m_global_parameters character(LEN=path_len) :: case_dir !< Case folder location logical :: run_time_info !< Run-time output flag integer :: t_step_old !< Existing IC/grid folder + ! Computational Domain Parameters integer :: proc_rank !< Rank of the local processor + !> @name Number of cells in the x-, y- and z-directions, respectively !> @{ integer :: m, n, p @@ -51,20 +54,22 @@ module m_global_parameters !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ real(wp), target, allocatable, dimension(:) :: x_cb, y_cb, z_cb + type(bounds_info), dimension(3) :: glb_bounds !> @} !> @name Cell-center (CC) locations in the x-, y- and z-directions, respectively !> @{ real(wp), target, allocatable, dimension(:) :: x_cc, y_cc, z_cc !> @} - ! type(bounds_info) :: x_domain, y_domain, z_domain !< Locations of the domain bounds in the x-, y- and z-coordinate directions + !> @name Cell-width distributions in the x-, y- and z-directions, respectively !> @{ real(wp), target, allocatable, dimension(:) :: dx, dy, dz !> @} real(wp) :: dt !< Size of the time-step - $:GPU_DECLARE(create='[x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p]') + + $:GPU_DECLARE(create='[x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p, glb_bounds]') !> @name Starting time-step iteration, stopping time-step iteration and the number of time-step iterations between successive !! solution backups, respectively @@ -81,6 +86,7 @@ module m_global_parameters logical :: cfl_adap_dt, cfl_const_dt, cfl_dt integer :: t_step_print !< Number of time-steps between printouts + ! Simulation Algorithm Parameters integer :: model_eqns !< Multicomponent flow model #:if MFC_CASE_OPTIMIZATION @@ -206,10 +212,12 @@ module m_global_parameters integer :: relax_model !< Relaxation model real(wp) :: palpha_eps !< trigger parameter for the p relaxation procedure, phase change model real(wp) :: ptgalpha_eps !< trigger parameter for the pTg relaxation procedure, phase change model + $:GPU_DECLARE(create='[relax, relax_model, palpha_eps, ptgalpha_eps]') - integer :: num_bc_patches - logical :: bc_io + integer :: num_bc_patches + logical :: bc_io + logical, dimension(3) :: periodic_bc !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -221,18 +229,27 @@ module m_global_parameters #elif defined(MFC_OpenMP) $:GPU_DECLARE(create='[bc_x, bc_y, bc_z]') #endif - type(bounds_info) :: x_domain, y_domain, z_domain - $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]') - real(wp) :: x_a, y_a, z_a - real(wp) :: x_b, y_b, z_b - logical :: parallel_io !< Format of the data files - logical :: file_per_process !< shared file or not when using parallel io - integer :: precision !< Precision of output files - logical :: down_sample !< down sample the output files + + logical :: parallel_io !< Format of the data files + logical :: file_per_process !< shared file or not when using parallel io + integer :: precision !< Precision of output files + logical :: down_sample !< down sample the output files $:GPU_DECLARE(create='[down_sample]') - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM + type(bounds_info), allocatable, dimension(:) :: pcomm_coords + $:GPU_DECLARE(create='[pcomm_coords]') + !! Coordinates for EL particle transfer + + type(bounds_info), allocatable, dimension(:) :: pcomm_coords_ghost + $:GPU_DECLARE(create='[pcomm_coords_ghost]') + !! Coordinates for EL particle transfer + + type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor ranks + + integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid type(mpi_io_var), public :: MPI_IO_DATA type(mpi_io_ib_var), public :: MPI_IO_IB_DATA type(mpi_io_airfoil_ib_var), public :: MPI_IO_airfoil_IB_DATA @@ -293,7 +310,9 @@ module m_global_parameters $:GPU_DECLARE(create='[Re_size, Re_size_max, Re_idx]') - ! WENO averaging flag: use arithmetic mean or unaltered WENO-reconstructed cell-boundary values + ! The WENO average (WA) flag regulates whether the calculation of any cell- average spatial derivatives is carried out in each + ! cell by utilizing the arithmetic mean of the left and right, WENO-reconstructed, cell-boundary values or simply, the unaltered + ! left and right, WENO-reconstructed, cell- boundary values. !> @{ real(wp) :: wa_flg !> @} @@ -311,24 +330,46 @@ module m_global_parameters $:GPU_DECLARE(create='[dir_idx, dir_flg, dir_idx_tau]') + !> The number of cells that are necessary to be able to store enough boundary conditions data to march the solution in the + !! physical computational domain to the next time-step. integer :: buff_size !< Number of ghost cells for boundary condition storage + $:GPU_DECLARE(create='[buff_size]') - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions + integer, allocatable :: beta_vars(:) !< Indices of variables to communicate for bubble/particle coupling + + $:GPU_DECLARE(create='[beta_vars]') + + integer :: shear_num !! Number of shear stress components + integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress + integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions + !> Indices of shear stress components to reflect for boundary conditions. Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, + !! [indices]) integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) + $:GPU_DECLARE(create='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') ! END: Simulation Algorithm Parameters ! Fluids Physical Parameters + !> Database of the physical parameters of each of the fluids that is present in the flow. These include the stiffened gas + !! equation of state parameters, and the Reynolds numbers. type(physical_parameters), dimension(num_fluids_max) :: fluid_pp !< Stiffened gas EOS parameters and Reynolds numbers per fluid + ! Subgrid Bubble Parameters type(subgrid_bubble_physical_parameters) :: bub_pp - integer :: fd_order !< Finite-difference order for CoM and flow probe derivatives - integer :: fd_number !< Finite-difference half-stencil size: MAX(1, fd_order/2) + + ! Subgrid Particle Parameters + type(subgrid_particle_physical_parameters) :: particle_pp + + !> The order of the finite-difference (fd) approximations of the first-order derivatives that need to be evaluated when the CoM + !! or flow probe data files are to be written at each time step + integer :: fd_order + + !> The finite-difference number is given by MAX(1, fd_order/2). Essentially, it is a measure of the half-size of the + !! finite-difference stencil for the selected order of accuracy. + integer :: fd_number $:GPU_DECLARE(create='[fd_order, fd_number]') logical :: probe_wrt @@ -352,6 +393,9 @@ module m_global_parameters type(ib_patch_parameters), dimension(num_patches_max) :: patch_ib !< Immersed boundary patch parameters type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l integer :: Np + !! Database of the immersed boundary patch parameters for each of the patches employed in the configuration of the initial + !! condition. Note that the maximum allowable number of patches, num_patches_max, may be changed in the module + !! m_derived_types.f90. $:GPU_DECLARE(create='[ib, num_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l]') !> @} @@ -429,6 +473,12 @@ module m_global_parameters $:GPU_DECLARE(create='[R0ref, p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g]') !> @} + !> @name Solid particle physical parameters + !> @{ + real(wp) :: cp_particle, rho0ref_particle + $:GPU_DECLARE(create='[rho0ref_particle, cp_particle]') + !> @} + !> @name Acoustic acoustic_source parameters !> @{ logical :: acoustic_source !< Acoustic source switch @@ -470,16 +520,31 @@ module m_global_parameters !> @name lagrangian subgrid bubble parameters !> @{! - logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch - type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters - $:GPU_DECLARE(create='[bubbles_lagrange, lag_params]') + logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch + logical :: particles_lagrange !< Lagrangian subgrid particle model switch + type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters + integer :: n_el_bubs_loc, n_el_bubs_glb !< Number of Lagrangian bubbles (local and global) + integer :: n_el_particles_loc, n_el_particles_glb !< Number of Lagrangian bubbles (local and global) + logical :: moving_lag_bubbles + logical :: moving_lag_particles + logical :: lag_pressure_force + logical :: lag_gravity_force + integer :: lag_vel_model, lag_drag_model + $:GPU_DECLARE(create='[bubbles_lagrange, lag_params, n_el_bubs_loc, n_el_bubs_glb]') + $:GPU_DECLARE(create='[particles_lagrange, n_el_particles_loc, n_el_particles_glb]') + $:GPU_DECLARE(create='[moving_lag_particles]') + $:GPU_DECLARE(create='[moving_lag_bubbles, lag_vel_model, lag_drag_model]') + $:GPU_DECLARE(create='[lag_pressure_force, lag_gravity_force]') !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) $:GPU_DECLARE(create='[Bx0]') logical :: fft_wrt + !> AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional + !! is false logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug + !> @name Continuum damage model parameters !> @{! real(wp) :: tau_star !< Stress threshold for continuum damage modeling @@ -502,6 +567,7 @@ contains impure subroutine s_assign_default_values_to_user_inputs integer :: i, j !< Generic loop iterator + ! Logistics case_dir = '.' @@ -596,6 +662,7 @@ contains num_bc_patches = 0 bc_io = .false. + periodic_bc = .false. bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int @@ -608,9 +675,9 @@ contains #:endfor #:endfor - x_domain%beg = dflt_real; x_domain%end = dflt_real - y_domain%beg = dflt_real; y_domain%end = dflt_real - z_domain%beg = dflt_real; z_domain%end = dflt_real + glb_bounds(1)%beg = dflt_real; glb_bounds(1)%end = dflt_real + glb_bounds(2)%beg = dflt_real; glb_bounds(2)%end = dflt_real + glb_bounds(3)%beg = dflt_real; glb_bounds(3)%end = dflt_real ! Fluids physical parameters do i = 1, num_fluids_max @@ -645,6 +712,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real + ! Subgrid particle parameters + particle_pp%rho0ref_particle = dflt_real + particle_pp%cp_particle = dflt_real + ! Tait EOS rhoref = dflt_real pref = dflt_real @@ -774,10 +845,29 @@ contains lag_params%massTransfer_model = .false. lag_params%write_bubbles = .false. lag_params%write_bubbles_stats = .false. + lag_params%write_void_evol = .false. lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int + lag_params%pressure_force = .true. + lag_params%gravity_force = .false. lag_params%epsilonb = 1._wp lag_params%charwidth = dflt_real + lag_params%charNz = dflt_int lag_params%valmaxvoid = dflt_real + lag_params%input_path = 'input/lag_bubbles.dat' + lag_params%nParticles_glb = dflt_int + lag_params%qs_drag_model = dflt_int + lag_params%stokes_drag = dflt_int + lag_params%added_mass_model = dflt_int + lag_params%interpolation_order = dflt_int + lag_params%collision_force = .false. + + moving_lag_bubbles = .false. + lag_vel_model = dflt_int + + particles_lagrange = .false. + moving_lag_particles = .false. ! Continuum damage model tau_star = dflt_real @@ -837,7 +927,8 @@ contains end subroutine s_assign_default_values_to_user_inputs - !> Initialize the global parameters module + !> The computation of parameters, the allocation of memory, the association of pointers and/or the execution of any other + !! procedures that are necessary to setup the module. impure subroutine s_initialize_global_parameters_module integer :: i, j, k @@ -845,7 +936,6 @@ contains #:if not MFC_CASE_OPTIMIZATION ! Determining the degree of the WENO polynomials - if (recon_type == WENO_TYPE) then weno_polyn = (weno_order - 1)/2 if (teno) then @@ -863,7 +953,9 @@ contains $:GPU_UPDATE(device='[igr, igr_order, igr_iter_solver]') #:endif - ! Initialize counts: viscous fluids, surface-tension interfaces, curvature interfaces + ! Initializing the number of fluids for which viscous effects will be non-negligible, the number of distinctive material + ! interfaces for which surface tension will be important and also, the number of fluids for which the physical and geometric + ! curvatures of the interfaces will be computed Re_size = 0 Re_size_max = 0 @@ -883,19 +975,21 @@ contains sys_size = adv_idx%end ! Volume Fraction Model - else + else ! Fully 3D cylindrical grid ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number ! of spatial dimensions and the volume fraction model if (model_eqns == 2) then - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each direction mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation if (igr) then - ! IGR: volume fractions after energy (N-1 for N fluids; skipped when num_fluids=1) - adv_idx%beg = E_idx + 1 ! Alpha for fluid 1 + ! Volume fractions are stored in the indices immediately following the energy equation. IGR tracks a total of + ! (N-1) volume fractions for N fluids, hence the "-1" in adv_idx%end. If num_fluids = 1 then adv_idx%end < + ! adv_idx%beg, which skips all loops over the volume fractions since there is no volume fraction to track + adv_idx%beg = E_idx + 1 adv_idx%end = E_idx + num_fluids - 1 else ! Volume fractions are stored in the indices immediately following the energy equation. WENO/MUSCL + Riemann @@ -978,11 +1072,11 @@ contains sys_size = B_idx%end end if else if (model_eqns == 3) then - cont_idx%beg = 1 + cont_idx%beg = 1 ! one continuity equation cont_idx%end = num_fluids - mom_idx%beg = cont_idx%end + 1 + mom_idx%beg = cont_idx%end + 1 ! one momentum equation in each direction mom_idx%end = cont_idx%end + num_vels - E_idx = mom_idx%end + 1 + E_idx = mom_idx%end + 1 ! one energy equation adv_idx%beg = E_idx + 1 adv_idx%end = E_idx + num_fluids alf_idx = adv_idx%end @@ -1029,7 +1123,8 @@ contains end if end if - ! Count fluids with non-negligible viscous effects (Re > 0) + ! Determining the number of fluids for which the shear and the volume Reynolds numbers, e.g. viscous effects, are + ! important do i = 1, num_fluids if (fluid_pp(i)%Re(1) > 0) Re_size(1) = Re_size(1) + 1 if (fluid_pp(i)%Re(2) > 0) Re_size(2) = Re_size(2) + 1 @@ -1122,6 +1217,16 @@ contains ! END: Volume Fraction Model + if (bubbles_lagrange) then + @:ALLOCATE(beta_vars(1:3)) + beta_vars(1:3) = [1, 2, 5] + $:GPU_UPDATE(device='[beta_vars]') + else if (particles_lagrange) then + @:ALLOCATE(beta_vars(1:8)) + beta_vars(1:8) = [1, 2, 3, 4, 5, 6, 7, 8] + $:GPU_UPDATE(device='[beta_vars]') + end if + if (chemistry) then species_idx%beg = sys_size + 1 species_idx%end = sys_size + num_species @@ -1134,7 +1239,10 @@ contains else if (bubbles_lagrange) then allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) - else + else if (particles_lagrange) then + allocate (MPI_IO_DATA%view(1:sys_size + 1)) + allocate (MPI_IO_DATA%var(1:sys_size + 1)) + else ! Fully 3D cylindrical grid allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) end if @@ -1155,9 +1263,16 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m,0:n,0:p)) MPI_IO_DATA%var(i)%sf => null() end do + else if (particles_lagrange) then + do i = 1, sys_size + 1 + allocate (MPI_IO_DATA%var(i)%sf(0:m,0:n,0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do end if - ! Configure WENO averaging flag (arithmetic mean vs. unaltered values) + ! Configuring the WENO average flag that will be used to regulate whether any spatial derivatives are to computed in each + ! cell by using the arithmetic mean of left and right, WENO-reconstructed, cell-boundary values or otherwise, the unaltered + ! left and right, WENO-reconstructed, cell-boundary values wa_flg = 0._wp; if (weno_avg) wa_flg = 1._wp $:GPU_UPDATE(device='[wa_flg]') @@ -1169,20 +1284,16 @@ contains if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m,0:n,0:p)) Np = 0 - if (elasticity) then - fd_number = max(1, fd_order/2) - end if - + if (elasticity) fd_number = max(1, fd_order/2) if (mhd) then ! TODO merge with above; waiting for hyperelasticity PR fd_number = max(1, fd_order/2) end if - - if (probe_wrt) then - fd_number = max(1, fd_order/2) - end if + if (probe_wrt) fd_number = max(1, fd_order/2) + if (bubbles_lagrange) fd_number = max(1, fd_order/2) + if (particles_lagrange) fd_number = max(1, fd_order/2) call s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, igr_order, buff_size, idwint, idwbuff, viscous, & - & bubbles_lagrange, m, n, p, num_dims, igr, ib) + & bubbles_lagrange, particles_lagrange, m, n, p, num_dims, igr, ib, fd_number) $:GPU_UPDATE(device='[idwint, idwbuff]') ! Configuring Coordinate Direction Indexes @@ -1295,6 +1406,8 @@ contains #:endif allocate (proc_coords(1:num_dims)) + @:ALLOCATE(pcomm_coords(1:num_dims)) + @:ALLOCATE(pcomm_coords_ghost(1:num_dims)) if (parallel_io .neqv. .true.) return @@ -1327,6 +1440,14 @@ contains end if deallocate (proc_coords) + + @:DEALLOCATE(pcomm_coords) + @:DEALLOCATE(pcomm_coords_ghost) + + if (bubbles_lagrange .or. particles_lagrange) then + @:DEALLOCATE(beta_vars) + end if + if (parallel_io) then deallocate (start_idx) @@ -1334,6 +1455,10 @@ contains do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() end do + else if (particles_lagrange) then + do i = 1, sys_size + 1 + MPI_IO_DATA%var(i)%sf => null() + end do else do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() @@ -1355,6 +1480,10 @@ contains if (p == 0) return @:DEALLOCATE(z_cb, z_cc, dz) + if (allocated(neighbor_ranks)) then + @:DEALLOCATE(neighbor_ranks) + end if + end subroutine s_finalize_global_parameters_module end module m_global_parameters diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index a7d8e45eee..b394e25e58 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -32,26 +32,26 @@ module m_ib_patches integer :: smooth_patch_id real(wp) :: smooth_coeff $:GPU_DECLARE(create='[smooth_patch_id, smooth_coeff]') - ! These variables are analogous in both meaning and use to the similarly named components in the ic_patch_parameters type (see - ! m_derived_types.f90 for additional details). They are employed as a means to more concisely perform the actions necessary to - ! lay out a particular patch on the grid. + !! These variables are analogous in both meaning and use to the similarly named components in the ic_patch_parameters type (see + !! m_derived_types.f90 for additional details). They are employed as a means to more concisely perform the actions necessary to + !! lay out a particular patch on the grid. real(wp) :: cart_x, cart_y, cart_z real(wp) :: sph_phi $:GPU_DECLARE(create='[cart_x, cart_y, cart_z, sph_phi]') - ! Variables to be used to hold cell locations in Cartesian coordinates if 3D simulation is using cylindrical coordinates + !! Variables to be used to hold cell locations in Cartesian coordinates if 3D simulation is using cylindrical coordinates type(bounds_info) :: x_boundary, y_boundary, z_boundary $:GPU_DECLARE(create='[x_boundary, y_boundary, z_boundary]') - ! These variables combine the centroid and length parameters associated with a particular patch to yield the locations of the - ! patch boundaries in the x-, y- and z-coordinate directions. They are used as a means to concisely perform the actions - ! necessary to lay out a particular patch on the grid. + !! These variables combine the centroid and length parameters associated with a particular patch to yield the locations of the + !! patch boundaries in the x-, y- and z-coordinate directions. They are used as a means to concisely perform the actions + !! necessary to lay out a particular patch on the grid. character(len=5) :: istr !< string to store int to string result for error checking contains - !> Apply all immersed boundary patch geometries to mark interior cells in the IB marker array + !> @brief Applies all immersed boundary patch geometries to mark interior cells in the IB marker array. impure subroutine s_apply_ib_patches(ib_markers) type(integer_field), intent(inout) :: ib_markers @@ -112,21 +112,26 @@ contains end subroutine s_apply_ib_patches - !> Mark cells inside a circular immersed boundary + !> The circular patch is a 2D geometry that may be used, for example, in creating a bubble or a droplet. The geometry of the + !! patch is well-defined when its centroid and radius are provided. Note that the circular patch DOES allow for the smoothing of + !! its boundary. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary subroutine s_ib_circle(patch_id, ib_markers, xp, yp) integer, intent(in) :: patch_id integer, intent(in) :: xp, yp !< integers containing the periodicity projection information type(integer_field), intent(inout) :: ib_markers - real(wp), dimension(1:2) :: center + real(wp), dimension(1:2) :: center !< x and y coordinates in local IB frame real(wp) :: radius integer :: i, j, il, ir, jl, jr !< Generic loop iterators integer :: encoded_patch_id ! Transferring the circular patch's radius, centroid, smearing patch identity and smearing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id @@ -140,7 +145,8 @@ contains call get_bounding_indices(center(1) - radius, center(1) + radius, x_cc, il, ir) call get_bounding_indices(center(2) - radius, center(2) + radius, y_cc, jl, jr) - ! Assign primitive variables if circle covers cell and patch has write permission + ! Checking whether the circle covers a particular cell in the domain and verifying whether the current patch has permission + ! to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[encoded_patch_id, center, radius]', collapse=2) do j = jl, jr @@ -154,23 +160,25 @@ contains end subroutine s_ib_circle - !> Mark cells inside a 2D NACA 4-digit airfoil immersed boundary + !> @brief Marks cells inside a 2D NACA 4-digit airfoil immersed boundary using upper and lower surface grids. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids subroutine s_ib_airfoil(patch_id, ib_markers, xp, yp) integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: xp, yp !< integers containing the periodicity projection information + integer, intent(in) :: xp, yp !< integers containing the periodicity projection information real(wp) :: f, ca_in, pa, ma, ta real(wp) :: xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c - integer :: i, j, k, il, ir, jl, jr + integer :: i, j, k, il, ir, jl, jr !< Generic loop iterators integer :: Np1, Np2 integer :: encoded_patch_id - real(wp), dimension(1:3) :: xy_local, offset !< x and y coordinates in local IB frame - real(wp), dimension(1:2) :: center !< x and y coordinates in local IB frame + real(wp), dimension(1:3) :: xy_local, offset !< x and y coordinates in local IB frame + real(wp), dimension(1:2) :: center !< x and y coordinates in local IB frame real(wp), dimension(1:3,1:3) :: inverse_rotation - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) ca_in = patch_ib(patch_id)%c pa = patch_ib(patch_id)%p ma = patch_ib(patch_id)%m @@ -187,7 +195,7 @@ contains @:ALLOCATE(airfoil_grid_u(1:Np)) @:ALLOCATE(airfoil_grid_l(1:Np)) - ! TODO :: The below instantiations are already handled by the loop below + ! TODO :: The below instantiations are already handles by the loop below airfoil_grid_u(1)%x = 0._wp airfoil_grid_u(1)%y = 0._wp @@ -195,7 +203,7 @@ contains airfoil_grid_l(1)%y = 0._wp do i = 1, Np1 + Np2 - 1 - ! TODO :: This allocates the upper and lower airfoil arrays, and does not need to be performed each time the IB + ! TODO :: This allocated the upper and lower airfoil arrays, and does not need to be performed each time the IB ! markers are updated. Place this as a separate subroutine. if (i <= Np1) then xc = i*(pa*ca_in/Np1) @@ -257,9 +265,9 @@ contains & ca_in, airfoil_grid_u, airfoil_grid_l]', collapse=2) do j = jl, jr do i = il, ir - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates - xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + xy_local = xy_local - offset if (xy_local(1) >= 0._wp .and. xy_local(1) <= ca_in) then xa = xy_local(1)/ca_in @@ -277,11 +285,13 @@ contains end do if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then if (xy_local(2) <= airfoil_grid_u(k)%y) then + !!IB ib_markers%sf(i, j, 0) = encoded_patch_id end if else f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + !!IB ib_markers%sf(i, j, 0) = encoded_patch_id end if end if @@ -292,12 +302,14 @@ contains end do if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then if (xy_local(2) >= airfoil_grid_l(k)%y) then + !!IB ib_markers%sf(i, j, 0) = encoded_patch_id end if else f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + !!IB ib_markers%sf(i, j, 0) = encoded_patch_id end if end if @@ -309,7 +321,10 @@ contains end subroutine s_ib_airfoil - !> Mark cells inside a 3D extruded NACA 4-digit airfoil immersed boundary with finite span + !> @brief Marks cells inside a 3D extruded NACA 4-digit airfoil immersed boundary with finite span. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary subroutine s_ib_3D_airfoil(patch_id, ib_markers, xp, yp, zp) integer, intent(in) :: patch_id @@ -322,9 +337,9 @@ contains real(wp), dimension(1:3) :: xyz_local, center, offset !< x, y, z coordinates in local IB frame real(wp), dimension(1:3,1:3) :: inverse_rotation - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) lz = patch_ib(patch_id)%length_z ca_in = patch_ib(patch_id)%c pa = patch_ib(patch_id)%p @@ -418,8 +433,8 @@ contains do i = il, ir xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), & & z_cc(l) - center(3)] ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset + xyz_local = matmul(inverse_rotation, xyz_local) + xyz_local = xyz_local - offset if (xyz_local(3) >= z_min .and. xyz_local(3) <= z_max) then if (xyz_local(1) >= 0._wp .and. xyz_local(1) <= ca_in) then @@ -436,6 +451,7 @@ contains else f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + !!IB ib_markers%sf(i, j, l) = encoded_patch_id end if end if @@ -446,12 +462,14 @@ contains end do if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then if (xyz_local(2) >= airfoil_grid_l(k)%y) then + !!IB ib_markers%sf(i, j, l) = encoded_patch_id end if else f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + !!IB ib_markers%sf(i, j, l) = encoded_patch_id end if end if @@ -465,23 +483,29 @@ contains end subroutine s_ib_3D_airfoil - !> Mark cells inside a rectangular immersed boundary + !> The rectangular patch is a 2D geometry that may be used, for example, in creating a solid boundary, or pre-/post- shock + !! region, in alignment with the axes of the Cartesian coordinate system. The geometry of such a patch is well- defined when its + !! centroid and lengths in the x- and y- coordinate directions are provided. Please note that the rectangular patch DOES NOT + !! allow for the smoothing of its boundaries. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary subroutine s_ib_rectangle(patch_id, ib_markers, xp, yp) integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers integer, intent(in) :: xp, yp !< integers containing the periodicity projection information - integer :: i, j, il, ir, jl, jr !< generic loop iterators + integer :: i, j, il, ir, jl, jr !< Generic loop iterators integer :: encoded_patch_id - real(wp) :: corner_distance !< Equation of state parameters + real(wp) :: corner_distance real(wp), dimension(1:3) :: xy_local !< x and y coordinates in local IB frame real(wp), dimension(1:2) :: length, center !< x and y coordinates in local IB frame real(wp), dimension(1:3,1:3) :: inverse_rotation ! Transferring the rectangle's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) @@ -498,7 +522,9 @@ contains call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) - ! Assign primitive variables if rectangle covers cell and patch has write permission + ! Checking whether the rectangle covers a particular cell in the domain and verifying whether the current patch has the + ! permission to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to + ! this cell. $:GPU_PARALLEL_LOOP(private='[i, j, xy_local]', copyin='[encoded_patch_id, center, length, inverse_rotation, x_cc, & & y_cc]', collapse=2) do j = jl, jr @@ -518,12 +544,18 @@ contains end subroutine s_ib_rectangle - !> Mark cells inside a spherical immersed boundary + !> The spherical patch is a 3D geometry that may be used, for example, in creating a bubble or a droplet. The patch geometry is + !! well-defined when its centroid and radius are provided. Please note that the spherical patch DOES allow for the smoothing of + !! its boundary. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary subroutine s_ib_sphere(patch_id, ib_markers, xp, yp, zp) integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers integer, intent(in) :: xp, yp, zp !< integers containing the periodicity projection information + ! Generic loop iterators integer :: i, j, k integer :: il, ir, jl, jr, kl, kr @@ -531,13 +563,14 @@ contains real(wp) :: radius real(wp), dimension(1:3) :: center - ! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. (2013) + !! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. + !! (2013) ! Transferring spherical patch's radius, centroid, smoothing patch identity and smoothing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id @@ -560,6 +593,7 @@ contains do k = kl, kr do j = jl, jr do i = il, ir + ! do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) else @@ -577,7 +611,12 @@ contains end subroutine s_ib_sphere - !> Mark cells inside a cuboidal immersed boundary + !> The cuboidal patch is a 3D geometry that may be used, for example, in creating a solid boundary, or pre-/post-shock region, + !! which is aligned with the axes of the Cartesian coordinate system. The geometry of such a patch is well- defined when its + !! centroid and lengths in the x-, y- and z-coordinate directions are provided. Please notice that the cuboidal patch DOES NOT + !! allow for the smearing of its boundaries. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids subroutine s_ib_cuboid(patch_id, ib_markers, xp, yp, zp) integer, intent(in) :: patch_id @@ -591,9 +630,9 @@ contains ! Transferring the cuboid's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y length(3) = patch_ib(patch_id)%length_z @@ -630,7 +669,7 @@ contains cart_z = z_cc(k) end if xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + xyz_local = matmul(inverse_rotation, xyz_local) if (-0.5*length(1) <= xyz_local(1) .and. 0.5*length(1) >= xyz_local(1) .and. -0.5*length(2) <= xyz_local(2) & & .and. 0.5*length(2) >= xyz_local(2) .and. -0.5*length(3) <= xyz_local(3) .and. 0.5*length(3) & @@ -645,7 +684,13 @@ contains end subroutine s_ib_cuboid - !> Mark cells inside a cylindrical immersed boundary + !> The cylindrical patch is a 3D geometry that may be used, for example, in setting up a cylindrical solid boundary confinement, + !! like a blood vessel. The geometry of this patch is well-defined when the centroid, the radius and the length along the + !! cylinder's axis, parallel to the x-, y- or z-coordinate direction, are provided. Please note that the cylindrical patch DOES + !! allow for the smoothing of its lateral boundary. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary subroutine s_ib_cylinder(patch_id, ib_markers, xp, yp, zp) integer, intent(in) :: patch_id @@ -660,9 +705,9 @@ contains ! Transferring the cylindrical patch's centroid, length, radius, - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) length(1) = patch_ib(patch_id)%length_x length(2) = patch_ib(patch_id)%length_y length(3) = patch_ib(patch_id)%length_z @@ -698,7 +743,7 @@ contains cart_z = z_cc(k) end if xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + xyz_local = matmul(inverse_rotation, xyz_local) if (((.not. f_is_default(length(1)) .and. xyz_local(2)**2 + xyz_local(3)**2 <= radius**2 .and. & & -0.5_wp*length(1) <= xyz_local(1) .and. 0.5_wp*length(1) >= xyz_local(1)) & @@ -716,7 +761,7 @@ contains end subroutine s_ib_cylinder - !> Mark cells inside a 2D elliptical immersed boundary + !> @brief Marks cells inside a 2D elliptical immersed boundary defined by semi-axis lengths and rotation. subroutine s_ib_ellipse(patch_id, ib_markers, xp, yp) integer, intent(in) :: patch_id @@ -731,8 +776,8 @@ contains ! Transferring the ellipse's centroid and length information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) ellipse_coeffs(1) = 0.5_wp*patch_ib(patch_id)%length_x ellipse_coeffs(2) = 0.5_wp*patch_ib(patch_id)%length_y inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) @@ -769,6 +814,8 @@ contains end subroutine s_ib_ellipse !> The STL patch is a 2D geometry that is imported from an STL file. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids subroutine s_ib_model(patch_id, ib_markers, xp, yp) integer, intent(in) :: patch_id @@ -786,8 +833,8 @@ contains real(wp), dimension(1:3,1:3) :: inverse_rotation, rotation center = 0._wp - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) rotation(:,:) = patch_ib(patch_id)%rotation_matrix(:,:) offset(:) = patch_ib(patch_id)%centroid_offset(:) @@ -846,6 +893,8 @@ contains end subroutine s_ib_model !> The STL patch is a 3D geometry that is imported from an STL file. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids subroutine s_ib_3d_model(patch_id, ib_markers, xp, yp, zp) integer, intent(in) :: patch_id @@ -862,9 +911,9 @@ contains real(wp), dimension(1:3) :: bbox_min, bbox_max, local_corner, world_corner center = 0._wp - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) - center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) - center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) + center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(glb_bounds(1)%end - glb_bounds(1)%beg) + center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(glb_bounds(2)%end - glb_bounds(2)%beg) + center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(glb_bounds(3)%end - glb_bounds(3)%beg) inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) offset(:) = patch_ib(patch_id)%centroid_offset(:) spc = patch_ib(patch_id)%model_spc @@ -932,7 +981,7 @@ contains end subroutine s_ib_3d_model - !> Compute a rotation matrix for converting to the rotating frame of the boundary + !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary subroutine s_update_ib_rotation_matrix(patch_id) integer, intent(in) :: patch_id @@ -978,7 +1027,7 @@ contains end subroutine s_update_ib_rotation_matrix - !> Convert cylindrical (r, theta) coordinates to Cartesian (y, z) + !> @brief Converts cylindrical (r, theta) coordinates to Cartesian (y, z) and stores in module variables. subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z) $:GPU_ROUTINE(parallelism='[seq]') @@ -990,7 +1039,7 @@ contains end subroutine s_convert_cylindrical_to_cartesian_coord - !> Convert a 3D cylindrical coordinate vector (x, r, theta) to Cartesian (x, y, z) + !> @brief Converts a 3D cylindrical coordinate vector (x, r, theta) to Cartesian (x, y, z). pure function f_convert_cyl_to_cart(cyl) result(cart) $:GPU_ROUTINE(parallelism='[seq]') @@ -1002,7 +1051,7 @@ contains end function f_convert_cyl_to_cart - !> Convert cylindrical coordinates (x, r) to the spherical azimuthal angle phi + !> @brief Converts cylindrical coordinates (x, r) to the spherical azimuthal angle phi and stores in a module variable. subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y) $:GPU_ROUTINE(parallelism='[seq]') @@ -1052,7 +1101,7 @@ contains end subroutine get_bounding_indices - !> Encode the patch ID with a unique offset containing periodicity information + !> @brief encodes the patch id with a unique offset that contains information on how the IB marker wraps periodically subroutine s_encode_patch_periodicity(patch_id, x_periodicity, y_periodicity, z_periodicity, encoded_patch_id) integer, intent(in) :: patch_id, x_periodicity, y_periodicity, z_periodicity @@ -1070,7 +1119,7 @@ contains end subroutine s_encode_patch_periodicity - !> Decode the encoded ID to recover the original patch ID and periodicity + !> @brief decodes the encoded id to get out the original id and the way in which it is periodic subroutine s_decode_patch_periodicity(encoded_patch_id, patch_id, x_periodicity, y_periodicity, z_periodicity) $:GPU_ROUTINE(parallelism='[seq]') @@ -1096,7 +1145,7 @@ contains end subroutine s_decode_patch_periodicity - !> Determine the periodic wrapping bounds in each direction + !> @brief Determines if we should wrap periodically subroutine s_get_periodicities(xp_lower, xp_upper, yp_lower, yp_upper, zp_lower, zp_upper) integer, intent(out) :: xp_lower, xp_upper, yp_lower, yp_upper @@ -1104,33 +1153,26 @@ contains ! check domain wraps in x, y - #:for X in [('x'), ('y')] - ! check for periodicity - - if (bc_${X}$%beg == BC_PERIODIC) then - ${X}$p_lower = -1 - ${X}$p_upper = 1 - else - ! if it is not periodic, then both elements are 0 - ${X}$p_lower = 0 - ${X}$p_upper = 0 + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + ! check for periodicity + if (bc_${X}$%beg == BC_PERIODIC) then + ${X}$p_lower = -1 + ${X}$p_upper = 1 + else + ! if it is not periodic, then both elements are 0 + ${X}$p_lower = 0 + ${X}$p_upper = 0 + end if end if #:endfor - ! z only if 3D - if (present(zp_lower) .and. p /= 0) then - if (bc_z%beg == BC_PERIODIC) then - zp_lower = -1 - zp_upper = 1 - else - zp_lower = 0 - zp_upper = 0 - end if - end if - end subroutine s_get_periodicities !> Archimedes spiral function + !! @param myth Angle + !! @param offset Thickness + !! @param a Starting position pure elemental function f_r(myth, offset, a) $:GPU_ROUTINE(parallelism='[seq]') diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 785bf2dec3..eb241db76a 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -47,7 +47,7 @@ contains if (p > 0) then @:ALLOCATE(ib_markers%sf(-buff_size:m+buff_size, -buff_size:n+buff_size, -buff_size:p+buff_size)) - else + else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum @:ALLOCATE(ib_markers%sf(-buff_size:m+buff_size, -buff_size:n+buff_size, 0:0)) end if @@ -79,10 +79,11 @@ contains $:GPU_UPDATE(device='[patch_ib(1:num_ibs)]') ! GPU routines require updated cell centers - $:GPU_UPDATE(device='[num_ibs, x_cc, y_cc, dx, dy, x_domain, y_domain]') + $:GPU_UPDATE(device='[num_ibs, x_cc, y_cc, dx, dy]') if (p /= 0) then - $:GPU_UPDATE(device='[z_cc, dz, z_domain]') + $:GPU_UPDATE(device='[z_cc, dz]') end if + $:GPU_UPDATE(device='[glb_bounds]') ! allocate STL models call s_instantiate_STL_models() @@ -102,7 +103,7 @@ contains if (moving_immersed_boundary_flag) then call s_mpi_allreduce_integer_sum(num_gps, max_num_gps) max_num_gps = min(max_num_gps*2, (m + 1)*(n + 1)*(p + 1)) - else + else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum max_num_gps = num_gps end if @@ -111,7 +112,6 @@ contains @:ALLOCATE(ghost_points(1:max_num_gps)) $:GPU_ENTER_DATA(copyin='[ghost_points]') - ! Ghost-cell IBM, Tseng & Ferziger JCP (2003), Mittal & Iaccarino ARFM (2005) call s_find_ghost_points(ghost_points) call s_apply_levelset(ghost_points, num_gps) @@ -122,14 +122,16 @@ contains end subroutine s_ibm_setup - !> Update the conservative variables at the ghost points + !> Subroutine that updates the conservative variables at the ghost points + !! @param pb_in Internal bubble pressure + !! @param mv_in Mass of vapor in bubble subroutine s_ibm_correct_state(q_cons_vf, q_prim_vf, pb_in, mv_in) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf !< Primitive Variables type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf !< Primitive Variables real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), optional, intent(inout) :: pb_in, mv_in integer :: i, j, k, l, q, r !< Iterator variables - integer :: patch_id !< Patch ID of ghost point + integer :: patch_id real(wp) :: rho, gamma, pi_inf, dyn_pres !< Mixture variables real(wp), dimension(2) :: Re_K real(wp) :: G_K @@ -151,7 +153,7 @@ contains real(wp), dimension(nb*nmom) :: nmom_IP real(wp), dimension(nb*nnode) :: presb_IP, massv_IP #:endif - ! Primitive variables at the image point associated with a ghost point, interpolated from surrounding fluid cells. + !! Primitive variables at the image point associated with a ghost point, interpolated from surrounding fluid cells. real(wp), dimension(3) :: norm !< Normal vector from GP to IP real(wp), dimension(3) :: physical_loc !< Physical loc of GP @@ -164,7 +166,6 @@ contains type(ghost_point) :: innerp ! set the Moving IBM interior conservative variables - $:GPU_PARALLEL_LOOP(private='[i, j, k, patch_id, rho]', copyin='[E_idx, momxb]', collapse=3) do l = 0, p do k = 0, n @@ -240,10 +241,11 @@ contains q_prim_vf(E_idx)%sf(j, k, l) = 0._wp $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids - ! Pressure correction for moving IB: accounts for acceleration of IB surface + ! Se the pressure inside a moving immersed boundary based upon the pressure of the image point. + ! acceleration, and normal vector direction q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, & & l) + pres_IP/(1._wp - 2._wp*abs(gp%levelset*alpha_rho_IP(q)/pres_IP) & - & *dot_product(patch_ib(patch_id) %force/patch_ib(patch_id)%mass, gp%levelset_norm)) + & *dot_product(patch_ib(patch_id)%force/patch_ib(patch_id)%mass, gp%levelset_norm)) end do end if @@ -371,7 +373,8 @@ contains end subroutine s_ibm_correct_state - !> Compute the image points for each ghost point + !> Function that computes the image points for each ghost point + !! @param ghost_points_in Ghost Points impure subroutine s_compute_image_points(ghost_points_in) type(ghost_point), dimension(num_gps), intent(inout) :: ghost_points_in @@ -384,7 +387,7 @@ contains type(ghost_point) :: gp integer :: q, dim !< Iterator variables integer :: i, j, k, l !< Location indexes - integer :: patch_id !< IB Patch ID + integer :: patch_id integer :: dir integer :: index logical :: bounds_error @@ -450,12 +453,12 @@ contains print *, [x_cc(i), y_cc(j), z_cc(k)] end if print *, "We are searching in dimension ", dim, " for image point at ", ghost_points_in(q)%ip_loc(:) - print *, "Domain size: ", [x_cc(-buff_size), y_cc(-buff_size), z_cc(-buff_size)] + print *, "Domain size: " print *, "x: ", x_cc(-buff_size), " to: ", x_cc(m + buff_size - 1) print *, "y: ", y_cc(-buff_size), " to: ", y_cc(n + buff_size - 1) if (p /= 0) print *, "z: ", z_cc(-buff_size), " to: ", z_cc(p + buff_size - 1) print *, "Image point is located approximately ", & - & (ghost_points_in(q)%loc(dim) - ghost_points_in(q) %ip_loc(dim))/(s_cc(1) - s_cc(0)), & + & (ghost_points_in(q)%loc(dim) - ghost_points_in(q)%ip_loc(dim))/(s_cc(1) - s_cc(0)), & & " grid cells away" print *, "Levelset ", dist, " and Norm: ", norm(:) print *, & @@ -480,7 +483,7 @@ contains end subroutine s_compute_image_points - !> Count the number of ghost points for memory allocation + !> Subroutine that finds the number of ghost points, used for allocating memory. subroutine s_find_num_ghost_points(num_gps_out) integer, intent(out) :: num_gps_out @@ -525,7 +528,7 @@ contains end subroutine s_find_num_ghost_points - !> Locate all ghost points in the domain + !> Function that finds the ghost points subroutine s_find_ghost_points(ghost_points_in) type(ghost_point), dimension(num_gps), intent(inout) :: ghost_points_in @@ -541,8 +544,7 @@ contains if (p == 0) gp_layers_z = 0 $:GPU_PARALLEL_LOOP(private='[i, j, k, ii, jj, kk, is_gp, local_idx, patch_id, encoded_patch_id, xp, yp, zp]', & - & copyin='[count, count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers, gp_layers_z]', & - & collapse=3) + & copyin='[count, count_i, glb_bounds]', firstprivate='[gp_layers, gp_layers_z]', collapse=3) do i = 0, m do j = 0, n do k = 0, p @@ -562,7 +564,7 @@ contains if (is_gp) then $:GPU_ATOMIC(atomic='capture') - count = count + 1 + count = count + 1 ! increment the count of total cells in the boundary local_idx = count $:END_GPU_ATOMIC_CAPTURE() @@ -576,26 +578,26 @@ contains ghost_points_in(local_idx)%z_periodicity = zp ghost_points_in(local_idx)%slip = patch_ib(patch_id)%slip - if ((x_cc(i) - dx(i)) < x_domain%beg) then + if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then ghost_points_in(local_idx)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then + else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then ghost_points_in(local_idx)%DB(1) = 1 else ghost_points_in(local_idx)%DB(1) = 0 end if - if ((y_cc(j) - dy(j)) < y_domain%beg) then + if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then ghost_points_in(local_idx)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then + else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then ghost_points_in(local_idx)%DB(2) = 1 else ghost_points_in(local_idx)%DB(2) = 0 end if if (p /= 0) then - if ((z_cc(k) - dz(k)) < z_domain%beg) then + if ((z_cc(k) - dz(k)) < glb_bounds(3)%beg) then ghost_points_in(local_idx)%DB(3) = -1 - else if ((z_cc(k) + dz(k)) > z_domain%end) then + else if ((z_cc(k) + dz(k)) > glb_bounds(3)%end) then ghost_points_in(local_idx)%DB(3) = 1 else ghost_points_in(local_idx)%DB(3) = 0 @@ -610,7 +612,7 @@ contains end subroutine s_find_ghost_points - !> Compute the interpolation coefficients for image points + !> Function that computes the interpolation coefficients of image points subroutine s_compute_interpolation_coeffs(ghost_points_in) type(ghost_point), dimension(num_gps), intent(inout) :: ghost_points_in @@ -715,10 +717,27 @@ contains end subroutine s_compute_interpolation_coeffs - !> Interpolate primitive variables to a ghost point's image point using bilinear or trilinear interpolation + !> Function that uses the interpolation coefficients and the current state at the cell centers in order to estimate the state at + !! the image point + !! @param gp Ghost point data structure + !> @brief Interpolates primitive variables from the fluid domain to a ghost point's image point using bilinear or trilinear + !! interpolation. + !! @param alpha_rho_IP Partial density at image point + !! @param alpha_IP Volume fraction at image point + !! @param pres_IP Pressure at image point + !! @param vel_IP Velocity at image point + !! @param c_IP Speed of sound at image point + !! @param r_IP Bubble radius at image point + !! @param v_IP Bubble radial velocity at image point + !! @param pb_IP Bubble pressure at image point + !! @param mv_IP Bubble vapor mass at image point + !! @param nmom_IP Bubble moment at image point + !! @param pb_in Internal bubble pressure array + !! @param mv_in Mass of vapor in bubble array + !! @param presb_IP Bubble node pressure at image point + !! @param massv_IP Bubble node vapor mass at image point subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, & - - & nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + & nmom_IP, pb_in, mv_in, presb_IP, massv_IP) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf !< Primitive Variables real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(in) :: pb_in, mv_in @@ -879,9 +898,11 @@ contains end subroutine s_update_mib - !> Compute pressure and viscous forces and torques on immersed bodies via volume integration + !> @brief Computes pressure and viscous forces and torques on immersed bodies via a volume integration method. subroutine s_compute_ib_forces(q_prim_vf, fluid_pp) + ! real(wp), dimension(idwbuff(1)%beg:idwbuff(1)%end, & idwbuff(2)%beg:idwbuff(2)%end, & idwbuff(3)%beg:idwbuff(3)%end), + ! intent(in) :: pressure type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf type(physical_parameters), dimension(1:num_fluids), intent(in) :: fluid_pp integer :: gp_id, i, j, k, l, q, ib_idx, fluid_idx @@ -967,7 +988,7 @@ contains viscous_stress_div(1,1:3) = (viscous_stress_div_2(1,1:3) - viscous_stress_div_1(1, & & 1:3))/(2._wp*dx) ! get x derivative of the first-row of viscous stress tensor local_force_contribution(1:3) = local_force_contribution(1:3) + viscous_stress_div(1, & - & 1:3) ! add the x components of the divergence to the force + & 1:3) ! add the y components of the divergence to the force call s_compute_viscous_stress_tensor(viscous_stress_div_1, q_prim_vf, dynamic_viscosity, i, j - 1, k) call s_compute_viscous_stress_tensor(viscous_stress_div_2, q_prim_vf, dynamic_viscosity, i, j + 1, k) @@ -1031,7 +1052,7 @@ contains end subroutine s_compute_ib_forces - !> Finalize the IBM module + !> Subroutine to deallocate memory reserved for the IBM module impure subroutine s_finalize_ibm_module() @:DEALLOCATE(ib_markers%sf) @@ -1093,13 +1114,14 @@ contains ! rotate the centroid offset back into the local coords of the IB patch_ib(ib_marker)%centroid_offset = matmul(patch_ib(ib_marker)%rotation_matrix_inverse, & & patch_ib(ib_marker)%centroid_offset) - else + else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum patch_ib(ib_marker)%centroid_offset(:) = [0._wp, 0._wp, 0._wp] end if end subroutine s_compute_centroid_offset !> Computes the moment of inertia for an immersed boundary + !! @param ib_marker Immersed boundary marker index subroutine s_compute_moment_of_inertia(ib_marker, axis) real(wp), dimension(3), intent(in) :: axis !< the axis about which we compute the moment. Only required in 3D. @@ -1114,7 +1136,7 @@ contains ! if the object is not actually rotating at this time, return a dummy value and exit patch_ib(ib_marker)%moment = 1._wp return - else + else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum normal_axis = axis/sqrt(sum(axis)) end if @@ -1132,8 +1154,8 @@ contains else ! we do not have an analytic moment of inertia calculation and need to approximate it directly via a sum count = 0 moment = 0._wp - cell_volume = (x_cc(1) - x_cc(0))*(y_cc(1) - y_cc(0)) - ! computed without grid stretching. Update in the loop to perform with stretching + cell_volume = (x_cc(1) - x_cc(0))*(y_cc(1) - y_cc(0)) & + & ! computed without grid stretching. Update in the loop to perform with stretching if (p /= 0) then cell_volume = cell_volume*(z_cc(1) - z_cc(0)) end if @@ -1177,47 +1199,35 @@ contains end subroutine s_compute_moment_of_inertia - !> Wrap immersed boundary positions across periodic domain boundaries + !> @brief Checks for periodic boundary conditions in all directions, and if so, moves patch location if it left the domain subroutine s_wrap_periodic_ibs() integer :: patch_id do patch_id = 1, num_ibs ! check domain wraps in x, y, - #:for X in [('x'), ('y')] - ! check for periodicity - if (bc_${X}$%beg == BC_PERIODIC) then - ! check if the boundary has left the domain, and then correct - if (patch_ib(patch_id)%${X}$_centroid < ${X}$_domain%beg) then - ! if the boundary exited "left", wrap it back around to the "right" - patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid + (${X}$_domain%end & - & - ${X}$_domain%beg) - else if (patch_ib(patch_id)%${X}$_centroid > ${X}$_domain%end) then - ! if the boundary exited "right", wrap it back around to the "left" - patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid - (${X}$_domain%end & - & - ${X}$_domain%beg) + #:for X, ID in [('x', 1), ('y', 2), ('z',3)] + if (num_dims >= ${ID}$) then + ! check for periodicity + if (bc_${X}$%beg == BC_PERIODIC) then + ! check if the boundary has left the domain, and then correct + if (patch_ib(patch_id)%${X}$_centroid < glb_bounds(${ID}$)%beg) then + ! if the boundary exited "left", wrap it back around to the "right" + patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid + (glb_bounds(${ID}$)%end & + & - glb_bounds(${ID}$)%beg) + else if (patch_ib(patch_id)%${X}$_centroid > glb_bounds(${ID}$)%end) then + ! if the boundary exited "right", wrap it back around to the "left" + patch_ib(patch_id)%${X}$_centroid = patch_ib(patch_id)%${X}$_centroid - (glb_bounds(${ID}$)%end & + & - glb_bounds(${ID}$)%beg) + end if end if end if #:endfor - - if (p /= 0) then - ! check for periodicity - if (bc_z%beg == BC_PERIODIC) then - ! check if the boundary has left the domain, and then correct - if (patch_ib(patch_id)%z_centroid < z_domain%beg) then - ! if the boundary exited "left", wrap it back around to the "right" - patch_ib(patch_id)%z_centroid = patch_ib(patch_id)%z_centroid + (z_domain%end - z_domain%beg) - else if (patch_ib(patch_id)%z_centroid > z_domain%end) then - ! if the boundary exited "right", wrap it back around to the "left" - patch_ib(patch_id)%z_centroid = patch_ib(patch_id)%z_centroid - (z_domain%end - z_domain%beg) - end if - end if - end if end do end subroutine s_wrap_periodic_ibs - !> Compute the cross product c = a x b of two 3D vectors + !> @brief Computes the cross product c = a x b of two 3D vectors. subroutine s_cross_product(a, b, c) $:GPU_ROUTINE(parallelism='[seq]') diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index a2b6c84326..880b596ca6 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -22,14 +22,37 @@ module m_mpi_proxy implicit none + !> This variable is utilized to pack and send the buffer of the immersed boundary markers, for a single computational domain + !! boundary at the time, to the relevant neighboring processor. integer, private, allocatable, dimension(:) :: ib_buff_send !< IB marker send buffer for halo exchange + + !> q_cons_buff_recv is utilized to receive and unpack the buffer of the immersed boundary markers, for a single computational + !! domain boundary at the time, from the relevant neighboring processor. integer, private, allocatable, dimension(:) :: ib_buff_recv !< IB marker receive buffer for halo exchange integer :: i_halo_size $:GPU_DECLARE(create='[i_halo_size]') + integer, dimension(-1:1,-1:1,-1:1) :: p_send_counts, p_recv_counts + integer, dimension(:,:,:,:), allocatable :: p_send_ids + character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff + integer :: p_buff_size, p_var_size + !! EL Bubbles communication variables + integer, parameter :: MAX_NEIGHBORS = 27 + integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS) + integer :: recv_offsets(MAX_NEIGHBORS) + integer :: neighbor_list(MAX_NEIGHBORS, 3) + integer :: n_neighbors + $:GPU_DECLARE(create='[p_send_counts]') + integer, allocatable :: force_send_counts(:), force_recv_counts(:) + integer, allocatable :: force_send_ids(:,:) + integer, allocatable :: flat_send_ids(:) + real(wp), allocatable :: force_send_vals(:,:,:) + real(wp), allocatable :: flat_send_vals(:) + $:GPU_DECLARE(create='[force_send_counts, force_send_ids, force_send_vals]') + contains - !> Initialize the MPI proxy module + !> @brief Allocates immersed boundary communication buffers for MPI halo exchanges. subroutine s_initialize_mpi_proxy_module() #ifdef MFC_MPI @@ -52,6 +75,81 @@ contains end subroutine s_initialize_mpi_proxy_module + !! This subroutine initializes the MPI buffers and variables required for the particle communication. + !! @param lag_num_ts Number of stages in time-stepping scheme + subroutine s_initialize_particles_mpi(lag_num_ts) + + integer :: i, j, k + integer :: real_size, int_size, nReal, lag_num_ts + integer :: ierr !< Generic flag used to identify and report MPI errors + +#ifdef MFC_MPI + call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) + call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) + nReal = 7 + 16*2 + 10*lag_num_ts + p_var_size = nReal*real_size + int_size + p_buff_size = lag_params%nBubs_glb*p_var_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) + @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb)) + ! First, collect all neighbor information + n_neighbors = 0 + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) /= 0) then + n_neighbors = n_neighbors + 1 + neighbor_list(n_neighbors, 1) = i + neighbor_list(n_neighbors, 2) = j + neighbor_list(n_neighbors, 3) = k + end if + end do + end do + end do +#endif + + end subroutine s_initialize_particles_mpi + + !! This subroutine initializes the MPI buffers and variables required for the particle communication. + !! @param lag_num_ts Number of stages in time-stepping scheme + subroutine s_initialize_solid_particles_mpi(lag_num_ts) + + integer :: i, j, k + integer :: real_size, int_size, nReal, lag_num_ts + integer :: ierr !< Generic flag used to identify and report MPI errors + +#ifdef MFC_MPI + call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) + call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) + nReal = 7 + 13*2 + 7*lag_num_ts + p_var_size = (nReal*real_size + int_size) + p_buff_size = lag_params%nParticles_glb*p_var_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) + @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, & + & 0:lag_params%nParticles_glb)) + ! First, collect all neighbor information + n_neighbors = 0 + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) /= 0) then + n_neighbors = n_neighbors + 1 + neighbor_list(n_neighbors, 1) = i + neighbor_list(n_neighbors, 2) = j + neighbor_list(n_neighbors, 3) = k + end if + end do + end do + end do + @:ALLOCATE(force_send_counts(0:num_procs-1)) + @:ALLOCATE(force_recv_counts(0:num_procs-1)) + @:ALLOCATE(force_send_ids(0:num_procs-1, 1:lag_params%nParticles_glb)) + @:ALLOCATE(force_send_vals(0:num_procs-1, 1:lag_params%nParticles_glb, 1:3)) + @:ALLOCATE(flat_send_ids(1:lag_params%nParticles_glb)) + @:ALLOCATE(flat_send_vals(1:3*lag_params%nParticles_glb)) +#endif + + end subroutine s_initialize_solid_particles_mpi + !> Since only the processor with rank 0 reads and verifies the consistency of user inputs, these are initially not available to !! the other processors. Then, the purpose of this subroutine is to distribute the user inputs to the remaining processors in !! the communicator. @@ -92,8 +190,8 @@ contains & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt', & - & 'hyper_cleaning', 'ib_state_wrt'] + & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt', & + & 'hyper_cleaning', 'ib_state_wrt', 'particles_lagrange' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -109,17 +207,40 @@ contains if (bubbles_lagrange) then #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats'] + & 'write_bubbles', 'write_bubbles_stats', 'write_void_evol', 'pressure_force', & + & 'gravity_force'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', 'vel_model', & + & 'drag_model', 'charNz'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['epsilonb','charwidth','valmaxvoid'] + call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + call MPI_BCAST(lag_params%input_path, len(lag_params%input_path), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + end if + + if (particles_lagrange) then + #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & + & 'write_bubbles', 'write_bubbles_stats', 'write_void_evol', 'pressure_force', & + & 'gravity_force', 'collision_force'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb'] + #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nParticles_glb', 'vel_model', & + & 'drag_model', 'qs_drag_model', 'stokes_drag', 'added_mass_model', 'interpolation_order'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor #:for VAR in ['epsilonb','charwidth','valmaxvoid'] call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor + + call MPI_BCAST(lag_params%input_path, len(lag_params%input_path), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) end if #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & @@ -128,9 +249,7 @@ contains & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & - & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & - & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & - & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & + & 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', & & 'ic_eps', 'ic_beta', 'hyper_cleaning_speed', & & 'hyper_cleaning_tau' ] @@ -179,6 +298,12 @@ contains #:endfor end if + if (particles_lagrange) then + #:for VAR in [ 'rho0ref_particle','cp_particle'] + call MPI_BCAST(particle_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + do i = 1, num_fluids_max #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in', & & 'bc_z%alpha_in'] @@ -233,11 +358,1096 @@ contains call MPI_BCAST(nv_uvm_out_of_core, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + + ! Extra BC Variable + call MPI_BCAST(periodic_bc, 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_bcast_user_inputs - !> Broadcast random phase numbers from rank 0 to all MPI processes + !> @brief Packs, exchanges, and unpacks immersed boundary marker buffers between neighboring MPI ranks. + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) + + type(integer_field), intent(inout) :: ib_markers + integer, intent(in) :: mpi_dir, pbc_loc + integer :: i, j, k, l, r, q !< Generic loop iterators + integer :: buffer_counts(1:3), buffer_count + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + logical :: beg_end_geq_0, qbmm_comm + integer :: pack_offset, unpack_offset + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + call nvtxStartRange("IB-MARKER-COMM-PACKBUF") + + buffer_counts = (/buff_size*(n + 1)*(p + 1), buff_size*(m + 2*buff_size + 1)*(p + 1), & + & buff_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1)/) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] -1 (=0) 1 + ! -> [0,0] [1,0] | 0 1 [0,0] [end,beg] +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] + ! [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + r = (j + buff_size*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)*(k + buff_size*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n + 2*buff_size + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + #:call GPU_HOST_DATA(use_device_addr='[ib_buff_send, ib_buff_recv]') + call nvtxStartRange("IB-MARKER-SENDRECV-RDMA") + call MPI_SENDRECV(ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, ib_buff_recv, & + & buffer_count, MPI_INTEGER, src_proc, recv_tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange + #:endcall GPU_HOST_DATA + $:GPU_WAIT() + #:else + call nvtxStartRange("IB-MARKER-DEV2HOST") + $:GPU_UPDATE(host='[ib_buff_send]') + call nvtxEndRange + + call nvtxStartRange("IB-MARKER-SENDRECV-NO-RMDA") + call MPI_SENDRECV(ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, ib_buff_recv, buffer_count, & + & MPI_INTEGER, src_proc, recv_tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange + + call nvtxStartRange("IB-MARKER-HOST2DEV") + $:GPU_UPDATE(device='[ib_buff_recv]') + call nvtxEndRange + #:endif + end if + #:endfor + + ! Unpack Received Buffer + call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + r = (j + buff_size*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + buff_size*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:else + ! Unpacking buffer from bc_z%beg + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, r]') + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)*((k + buff_size) + (n + 2*buff_size + 1)*(l & + & + buff_size))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_sendrecv_ib_buffers + + !> This subroutine adds particles to the transfer list for the MPI communication. + !! @param nPart Current LOCAL number of particles + !! @param pos Current position of each particle + !! @param posPrev Previous position of each particle (optional, not used + !! for communication of initial condition) + impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev, include_ghost) + + real(wp), dimension(:,:) :: pos, posPrev + integer :: bubID, nbub + integer :: i, j, k + logical, optional, intent(in) :: include_ghost + + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + p_send_counts(i, j, k) = 0 + end do + end do + end do + + do k = 1, nbub + if (f_crosses_boundary(k, 1, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, 0) + if (n > 0) then + if (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else if (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + end if + end if + else if (f_crosses_boundary(k, 1, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, 0) + if (n > 0) then + if (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else if (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + end if + end if + else if (f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else if (f_crosses_boundary(k, 2, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 0, -1) + else if (f_crosses_boundary(k, 3, 1, pos, posPrev, include_ghost)) then + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + end do + + contains ! f_crosses_boundary(k, 2, -1, pos, posPrev, include_ghost + + logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev, include_ghost) + + integer, intent(in) :: particle_id, dir, loc + real(wp), dimension(:,:), intent(in) :: pos + real(wp), dimension(:,:), optional, intent(in) :: posPrev + logical, optional, intent(in) :: include_ghost + + if (present(include_ghost) .and. include_ghost) then + if (loc == -1) then ! Beginning of the domain + if (nidx(dir)%beg == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = pos(particle_id, dir) < pcomm_coords_ghost(dir)%beg + else if (loc == 1) then ! End of the domain + if (nidx(dir)%end == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = pos(particle_id, dir) > pcomm_coords_ghost(dir)%end + end if + else + if (loc == -1) then ! Beginning of the domain + if (nidx(dir)%beg == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = (posPrev(particle_id, dir) >= pcomm_coords(dir)%beg .and. pos(particle_id, & + & dir) < pcomm_coords(dir)%beg) + else if (loc == 1) then ! End of the domain + if (nidx(dir)%end == 0) then + f_crosses_boundary = .false. + return + end if + + f_crosses_boundary = (posPrev(particle_id, dir) <= pcomm_coords(dir)%end .and. pos(particle_id, & + & dir) > pcomm_coords(dir)%end) + end if + end if + + end function f_crosses_boundary + + subroutine s_add_particle_to_direction(particle_id, dir_x, dir_y, dir_z) + + integer, intent(in) :: particle_id, dir_x, dir_y, dir_z + + p_send_ids(dir_x, dir_y, dir_z, p_send_counts(dir_x, dir_y, dir_z)) = particle_id + p_send_counts(dir_x, dir_y, dir_z) = p_send_counts(dir_x, dir_y, dir_z) + 1 + + end subroutine s_add_particle_to_direction + + end subroutine s_add_particles_to_transfer_list + + !> This subroutine performs the MPI communication for lagrangian particles/ bubbles. + !! @param bub_R0 Initial radius of each bubble + !! @param Rmax_stats Maximum radius of each bubble + !! @param Rmin_stats Minimum radius of each bubble + !! @param gas_mg Mass of gas in each bubble + !! @param gas_betaT Heat flux model coefficient for each bubble + !! @param gas_betaC mass flux model coefficient for each bubble + !! @param bub_dphidt Subgrid velocity potential for each bubble + !! @param lag_id Global and local ID of each bubble + !! @param gas_p Pressure of the gas in each bubble + !! @param gas_mv Mass of vapor in each bubble + !! @param rad Radius of each bubble + !! @param rvel Radial velocity of each bubble + !! @param pos Position of each bubble + !! @param posPrev Previous position of each bubble + !! @param vel Velocity of each bubble + !! @param scoord Cell index in real format of each bubble + !! @param drad Radial velocity of each bubble + !! @param drvel Radial acceleration of each bubble + !! @param dgasp Time derivative of gas pressure in each bubble + !! @param dgasmv Time derivative of vapor mass in each bubble + !! @param dpos Time derivative of position of each bubble + !! @param dvel Time derivative of velocity of each bubble + !! @param lag_num_ts Number of stages in time-stepping scheme + !! @param nBubs Local number of bubbles + impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, lag_id, & + & gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, dgasmv, dpos, dvel, lag_num_ts, nbubs, dest) + + real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt + integer, dimension(:,:) :: lag_id + real(wp), dimension(:,:) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv + real(wp), dimension(:,:,:) :: pos, posPrev, vel, scoord, dpos, dvel + integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest + integer :: i, j, k, l, q, r + integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors + integer :: send_count, send_offset, recv_count, recv_offset + +#ifdef MFC_MPI + ! Phase 1: Exchange particle counts using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives first + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, MPI_COMM_WORLD, recv_requests(recv_count), & + & ierr) + end do + + ! Post all sends + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + send_count = send_count + 1 + call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, MPI_COMM_WORLD, send_requests(send_count), & + & ierr) + end do + + ! Wait for all count exchanges to complete + if (recv_count > 0) then + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + end if + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if + + ! Phase 2: Exchange particle data using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives for particle data first + recv_offset = 1 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, MPI_COMM_WORLD, & + & recv_requests(recv_count), ierr) + recv_offsets(l) = recv_offset + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Pack and send particle data + send_offset = 0 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_send_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + ! Pack data for sending + position = 0 + do q = 0, p_send_counts(i, j, k) - 1 + bub_id = p_send_ids(i, j, k, q) + + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + do r = 1, 2 + call MPI_Pack(gas_p(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(rvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(pos(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(posPrev(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(scoord(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Pack(drad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + call MPI_Pack(drvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + end do + end do + + send_count = send_count + 1 + call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, MPI_COMM_WORLD, & + & send_requests(send_count), ierr) + send_offset = send_offset + position + end if + end do + + ! Wait for all recvs for contiguous data to complete + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + + ! Process received data as it arrives + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_offset = recv_offsets(l) + + position = 0 + ! Unpack received data + do q = 0, p_recv_counts(i, j, k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, & + & ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, & + & ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + do r = 1, 2 + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasmv(bub_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(bub_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + end do + lag_id(bub_id, 2) = bub_id + end do + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Wait for all sends to complete + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if +#endif + + if (any(periodic_bc)) then + call s_wrap_particle_positions(pos, posPrev, nbubs, dest) + end if + + end subroutine s_mpi_sendrecv_particles + + !> This subroutine performs the MPI communication for lagrangian particles/ particles. + !! @param particle_R0 Initial radius of each particle + !! @param Rmax_stats Maximum radius of each particle + !! @param Rmin_stats Minimum radius of each particle + !! @param particle_mass Mass of each particle + !! @param f_p Force on each particle + !! @param lag_id Global and local ID of each particle + !! @param rad Radius of each particle + !! @param pos Position of each particle + !! @param posPrev Previous position of each particle + !! @param vel Velocity of each particle + !! @param scoord Cell index in real format of each particle + !! @param drad Time derivative of particle's radius + !! @param dpos Time derivative of position of each particle + !! @param dvel Time derivative of velocity of each particle + !! @param lag_num_ts Number of stages in time-stepping scheme + !! @param nParticles Local number of particles + impure subroutine s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats, Rmin_stats, particle_mass, f_p, & + & lag_id, rad, pos, posPrev, vel, scoord, drad, dpos, dvel, lag_num_ts, nParticles, dest) + + integer, dimension(:) :: p_owner_rank + real(wp), dimension(:) :: particle_R0, Rmax_stats, Rmin_stats, particle_mass + real(wp), dimension(:,:) :: f_p + integer, dimension(:,:) :: lag_id + real(wp), dimension(:,:) :: rad, drad + real(wp), dimension(:,:,:) :: pos, posPrev, vel, scoord, dpos, dvel + integer :: position, particle_id, lag_num_ts, tag, partner, send_tag, recv_tag, nParticles, p_recv_size, dest + integer :: i, j, k, l, q, r + integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors + integer :: send_count, send_offset, recv_count, recv_offset + +#ifdef MFC_MPI + ! Phase 1: Exchange particle counts using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives first + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, MPI_COMM_WORLD, recv_requests(recv_count), & + & ierr) + end do + + ! Post all sends + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + send_count = send_count + 1 + call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, MPI_COMM_WORLD, send_requests(send_count), & + & ierr) + end do + + ! Wait for all count exchanges to complete + if (recv_count > 0) then + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + end if + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if + + ! Phase 2: Exchange particle data using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives for particle data first + recv_offset = 1 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, MPI_COMM_WORLD, & + & recv_requests(recv_count), ierr) + recv_offsets(l) = recv_offset + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Pack and send particle data + send_offset = 0 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_send_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + ! Pack data for sending + position = 0 + do q = 0, p_send_counts(i, j, k) - 1 + particle_id = p_send_ids(i, j, k, q) + + call MPI_Pack(lag_id(particle_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(particle_R0(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(particle_mass(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(f_p(particle_id,:), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, & + & ierr) + ! call MPI_Pack(gas_betaT(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, + ! MPI_COMM_WORLD, ierr) call MPI_Pack(gas_betaC(particle_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, + ! position, MPI_COMM_WORLD, ierr) call MPI_Pack(bub_dphidt(particle_id), 1, mpi_p, p_send_buff(send_offset), + ! p_buff_size, position, MPI_COMM_WORLD, ierr) + do r = 1, 2 + ! call MPI_Pack(gas_p(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, + ! MPI_COMM_WORLD, ierr) call MPI_Pack(gas_mv(particle_id, r), 1, mpi_p, p_send_buff(send_offset), + ! p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + ! call MPI_Pack(rvel(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, + ! MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Pack(drad(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + ! call MPI_Pack(drvel(particle_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, + ! MPI_COMM_WORLD, ierr) call MPI_Pack(dgasp(particle_id, r), 1, mpi_p, p_send_buff(send_offset), + ! p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(dgasmv(particle_id, r), 1, mpi_p, + ! p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(particle_id,:,r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, & + & MPI_COMM_WORLD, ierr) + end do + end do + + send_count = send_count + 1 + call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, MPI_COMM_WORLD, & + & send_requests(send_count), ierr) + send_offset = send_offset + position + end if + end do + + ! Wait for all recvs for contiguous data to complete + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + + ! Process received data as it arrives + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + p_recv_size = p_recv_counts(i, j, k)*p_var_size + recv_offset = recv_offsets(l) + + position = 0 + ! Unpack received data + do q = 0, p_recv_counts(i, j, k) - 1 + nParticles = nParticles + 1 + particle_id = nParticles + + p_owner_rank(particle_id) = neighbor_ranks(i, j, k) + + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(particle_id, 1), 1, MPI_INTEGER, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, particle_R0(particle_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(particle_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(particle_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, particle_mass(particle_id), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, f_p(particle_id,:), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(particle_id), 1, mpi_p, + ! MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(particle_id), + ! 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, + ! bub_dphidt(particle_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do r = 1, 2 + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(particle_id, r), 1, mpi_p, + ! MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(particle_id, + ! r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(particle_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(particle_id, r), 1, mpi_p, + ! MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(particle_id, r), 1, mpi_p, & + & MPI_COMM_WORLD, ierr) + ! call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(particle_id, r), 1, mpi_p, + ! MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(particle_id, + ! r), 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, + ! dgasmv(particle_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(particle_id,:,r), 3, mpi_p, & + & MPI_COMM_WORLD, ierr) + end do + lag_id(particle_id, 2) = particle_id + end do + recv_offset = recv_offset + p_recv_size + end if + end do + + ! Wait for all sends to complete + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if +#endif + + if (any(periodic_bc)) then + call s_wrap_particle_positions(pos, posPrev, nParticles, dest) + end if + + end subroutine s_mpi_sendrecv_solid_particles + + !> This resets the collision force buffers + impure subroutine s_reset_force_buffers() + + force_send_counts = 0 + force_recv_counts = 0 + force_send_ids = 0 + force_send_vals = 0._wp + + $:GPU_UPDATE(device='[force_send_counts, force_send_ids, force_send_vals]') + + end subroutine s_reset_force_buffers + + !> This adds the forces to the buffer arrays for mpi transfer + impure subroutine s_add_force_to_send_buffer(dest_rank, gid, force) + + $:GPU_ROUTINE(function_name='s_add_force_to_send_buffer', parallelism='[seq]') + + integer, intent(in) :: dest_rank, gid + real(wp), intent(in), dimension(3) :: force + integer :: idx + + $:GPU_ATOMIC(atomic='capture') + force_send_counts(dest_rank) = force_send_counts(dest_rank) + 1 + idx = force_send_counts(dest_rank) + $:END_GPU_ATOMIC_CAPTURE() + + force_send_ids(dest_rank, idx) = gid + force_send_vals(dest_rank, idx, 1) = force(1) + force_send_vals(dest_rank, idx, 2) = force(2) + force_send_vals(dest_rank, idx, 3) = force(3) + + end subroutine s_add_force_to_send_buffer + + !> This communicates the collision forces across neighbor mpi ranks + impure subroutine s_transfer_collision_forces(total_recv, force_recv_ids, force_recv_vals) + + integer, intent(inout) :: total_recv + integer, intent(inout) :: force_recv_ids(:) + real(wp), intent(inout) :: force_recv_vals(:) + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + integer :: i, j, k, l, idx, total_send, recv_tag, send_tag, partner, recv_count, send_count + integer :: send_displs(0:num_procs - 1), recv_displs(0:num_procs - 1) + integer :: sendcounts_vals(0:num_procs - 1), recvcounts_vals(0:num_procs - 1) + integer :: senddispls_vals(0:num_procs - 1), recvdispls_vals(0:num_procs - 1) + ! Local request arrays sized for 2 requests per neighbor (IDs + values) + integer :: coll_send_requests(2*MAX_NEIGHBORS), coll_recv_requests(2*MAX_NEIGHBORS) + + $:GPU_UPDATE(host='[force_send_counts, force_send_ids, force_send_vals]') + + ! Phase 1: Exchange force counts with neighbors only + send_count = 0 + recv_count = 0 + + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_counts(partner), 1, MPI_INTEGER, partner, recv_tag, MPI_COMM_WORLD, & + & recv_requests(recv_count), ierr) + + send_count = send_count + 1 + call MPI_Isend(force_send_counts(partner), 1, MPI_INTEGER, partner, send_tag, MPI_COMM_WORLD, & + & send_requests(send_count), ierr) + end do + + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + + ! Compute displacements + send_displs(0) = 0 + recv_displs(0) = 0 + do i = 1, num_procs - 1 + send_displs(i) = send_displs(i - 1) + force_send_counts(i - 1) + recv_displs(i) = recv_displs(i - 1) + force_recv_counts(i - 1) + end do + + do i = 0, num_procs - 1 + sendcounts_vals(i) = 3*force_send_counts(i) + recvcounts_vals(i) = 3*force_recv_counts(i) + senddispls_vals(i) = 3*send_displs(i) + recvdispls_vals(i) = 3*recv_displs(i) + end do + + total_send = sum(force_send_counts) + total_recv = sum(force_recv_counts) + + ! Flatten send buffers + idx = 1 + do i = 0, num_procs - 1 + do j = 1, force_send_counts(i) + flat_send_ids(idx) = force_send_ids(i, j) + flat_send_vals(3*(idx - 1) + 1) = force_send_vals(i, j, 1) + flat_send_vals(3*(idx - 1) + 2) = force_send_vals(i, j, 2) + flat_send_vals(3*(idx - 1) + 3) = force_send_vals(i, j, 3) + idx = idx + 1 + end do + end do + + ! Phase 2: Exchange force data with neighbors only + send_count = 0 + recv_count = 0 + + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + if (force_recv_counts(partner) > 0) then + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_ids(recv_displs(partner) + 1), force_recv_counts(partner), MPI_INTEGER, partner, & + & recv_tag, MPI_COMM_WORLD, coll_recv_requests(recv_count), ierr) + recv_count = recv_count + 1 + call MPI_Irecv(force_recv_vals(recvdispls_vals(partner) + 1), recvcounts_vals(partner), mpi_p, partner, & + & recv_tag + 1, MPI_COMM_WORLD, coll_recv_requests(recv_count), ierr) + end if + + if (force_send_counts(partner) > 0) then + send_count = send_count + 1 + call MPI_Isend(flat_send_ids(send_displs(partner) + 1), force_send_counts(partner), MPI_INTEGER, partner, & + & send_tag, MPI_COMM_WORLD, coll_send_requests(send_count), ierr) + send_count = send_count + 1 + call MPI_Isend(flat_send_vals(senddispls_vals(partner) + 1), sendcounts_vals(partner), mpi_p, partner, & + & send_tag + 1, MPI_COMM_WORLD, coll_send_requests(send_count), ierr) + end if + end do + + call MPI_Waitall(recv_count, coll_recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + call MPI_Waitall(send_count, coll_send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) +#else + total_recv = 0 +#endif + + end subroutine s_transfer_collision_forces + + !! This function returns a unique tag for each neighbor based on its position relative to the current process. + !! @param i, j, k Indices of the neighbor in the range [-1, 1] + !! @return tag Unique integer tag for the neighbor + integer function neighbor_tag(i, j, k) result(tag) + + integer, intent(in) :: i, j, k + + tag = (k + 1)*9 + (j + 1)*3 + (i + 1) + + end function neighbor_tag + + subroutine s_wrap_particle_positions(pos, posPrev, nbubs, dest) + + real(wp), dimension(:,:,:) :: pos, posPrev + integer :: nbubs, dest + integer :: i, q + real(wp) :: offset + + do i = 1, nbubs + if (periodic_bc(1)) then + offset = glb_bounds(1)%end - glb_bounds(1)%beg + if (pos(i, 1, dest) > x_cb(m + buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) - offset + posPrev(i, 1, q) = posPrev(i, 1, q) - offset + end do + end if + if (pos(i, 1, dest) < x_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) + offset + posPrev(i, 1, q) = posPrev(i, 1, q) + offset + end do + end if + end if + + if (periodic_bc(2)) then + offset = glb_bounds(2)%end - glb_bounds(2)%beg + if (pos(i, 2, dest) > y_cb(n + buff_size)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) - offset + posPrev(i, 2, q) = posPrev(i, 2, q) - offset + end do + end if + if (pos(i, 2, dest) < y_cb(-buff_size - 1)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) + offset + posPrev(i, 2, q) = posPrev(i, 2, q) + offset + end do + end if + end if + + if (periodic_bc(3)) then + offset = glb_bounds(3)%end - glb_bounds(3)%beg + if (pos(i, 3, dest) > z_cb(p + buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) - offset + posPrev(i, 3, q) = posPrev(i, 3, q) - offset + end do + end if + if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) + offset + posPrev(i, 3, q) = posPrev(i, 3, q) + offset + end do + end if + end if + end do + + end subroutine s_wrap_particle_positions + + !> @brief Broadcasts random phase numbers from rank 0 to all MPI processes. impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq @@ -245,13 +1455,12 @@ contains #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors - call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_send_random_number - !> Finalize the MPI proxy module + !> @brief Deallocates immersed boundary MPI communication buffers. subroutine s_finalize_mpi_proxy_module() #ifdef MFC_MPI diff --git a/src/simulation/m_particles_EL.fpp b/src/simulation/m_particles_EL.fpp new file mode 100644 index 0000000000..1674f07c6f --- /dev/null +++ b/src/simulation/m_particles_EL.fpp @@ -0,0 +1,2645 @@ +!> +!! @file m_particles_EL.fpp +!! @brief Contains module m_particles_EL + +#:include 'macros.fpp' + +!> @brief Euler-Lagrange solid particle solver with two-way coupling. +!! +!! Tracks non-deformable solid particles in compressible flow using Gaussian volume-averaging (Maeda & Colonius, J. Computational +!! Physics, 361, 2018). Supports multiple drag correlations, soft-sphere DEM collisions, pressure gradient and added mass forces. +!! Derived from the m_bubbles_EL module. Kernel functions are in m_particles_EL_kernels. +module m_particles_EL + + use m_global_parameters !< Definitions of the global parameters + use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_particles_EL_kernels !< Definitions of the kernel functions + use m_variables_conversion !< State variables type conversion procedures + use m_compile_specific + use m_boundary_common + use m_helper_basic !< Functions to compare floating point numbers + use m_sim_helpers + use m_helper + use m_mpi_common + use m_ibm + + implicit none + + real(wp) :: next_write_time + integer, allocatable, dimension(:,:) :: lag_part_id !< Global and local IDs + integer, allocatable, dimension(:) :: gid_to_local + real(wp), allocatable, dimension(:) :: particle_R0 !< Initial particle radius + real(wp), allocatable, dimension(:) :: Rmax_stats_part !< Maximum radius + real(wp), allocatable, dimension(:) :: Rmin_stats_part !< Minimum radius + $:GPU_DECLARE(create='[lag_part_id, gid_to_local, particle_R0, Rmax_stats_part, Rmin_stats_part]') + + real(wp), allocatable, dimension(:) :: particle_mass !< Particle Mass + $:GPU_DECLARE(create='[particle_mass]') + real(wp), allocatable, dimension(:) :: p_AM !< Particle Added Mass + $:GPU_DECLARE(create='[p_AM]') + + integer, allocatable, dimension(:) :: p_owner_rank !< MPI rank that owns this particle + $:GPU_DECLARE(create='[p_owner_rank]') + + integer, allocatable, dimension(:) :: linked_list !< particle cell linked list + $:GPU_DECLARE(create='[linked_list]') + + integer, allocatable, dimension(:,:,:) :: particle_head !< particle heads at each cell + $:GPU_DECLARE(create='[particle_head]') + + ! Particle state arrays use dimensions (nParticles_glb, component, stage): component: 1=x, 2=y, 3=z for position/velocity stage: + ! 1=committed state at current time level, 2=intermediate RK stage value + + ! (nPart, 1 -> actual val or 2 -> temp val) + real(wp), allocatable, dimension(:,:) :: particle_rad !< Particle radius + $:GPU_DECLARE(create='[particle_rad]') + + ! (nPart, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) + real(wp), allocatable, dimension(:,:,:) :: particle_pos !< Particle's position + real(wp), allocatable, dimension(:,:,:) :: particle_posPrev !< Particle's previous position + real(wp), allocatable, dimension(:,:,:) :: particle_vel !< Particle's velocity + real(wp), allocatable, dimension(:,:,:) :: particle_s !< Particle's computational cell position in real format + $:GPU_DECLARE(create='[particle_pos, particle_posPrev, particle_vel, particle_s]') + ! (nPart, 1-> x or 2->y or 3 ->z, time-stage) + real(wp), allocatable, dimension(:,:) :: particle_draddt !< Time derivative of particle's radius + real(wp), allocatable, dimension(:,:,:) :: particle_dposdt !< Time derivative of the particle's position + real(wp), allocatable, dimension(:,:,:) :: particle_dveldt !< Time derivative of the particle's velocity + $:GPU_DECLARE(create='[particle_draddt, particle_dposdt, particle_dveldt]') + + integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme + $:GPU_DECLARE(create='[lag_num_ts]') + + real(wp) :: Rmax_glb, Rmin_glb !< Global maximum and minimum R/R0 ratio across all particles + !> Eulerian projection of particle data (volume fraction, momentum, sources) + type(scalar_field), dimension(:), allocatable :: q_particles + integer :: q_particles_idx !< Size of the q vector field for particle cell (q)uantities + integer, parameter :: alphaf_id = 1 + integer, parameter :: alphaupx_id = 2 !< x particle momentum index + integer, parameter :: alphaupy_id = 3 !< y particle momentum index + integer, parameter :: alphaupz_id = 4 !< z particle momentum index + integer, parameter :: Smx_id = 5 + integer, parameter :: Smy_id = 6 + integer, parameter :: Smz_id = 7 + integer, parameter :: SE_id = 8 + + !> Interpolated Eulerian field gradients at particle locations + type(scalar_field), dimension(:), allocatable :: field_vars !< For cell quantities (field gradients, etc.) + integer, parameter :: dPx_id = 1 !< Spatial pressure gradient in x, y, and z + integer, parameter :: dPy_id = 2 + integer, parameter :: dPz_id = 3 + integer, parameter :: drhox_id = 4 !< Spatial density gradient in x, y, and z + integer, parameter :: drhoy_id = 5 + integer, parameter :: drhoz_id = 6 + integer, parameter :: dufx_id = 7 !< Spatial velocity gradient in x, y, and z + integer, parameter :: dufy_id = 8 + integer, parameter :: dufz_id = 9 + integer, parameter :: dalphafx_id = 10 !< Spatial fluid volume fraction gradient in x, y, and z + integer, parameter :: dalphafy_id = 11 + integer, parameter :: dalphafz_id = 12 + integer, parameter :: dalphap_upx_id = 13 !< Spatial particle momentum gradient in x, y, and z + integer, parameter :: dalphap_upy_id = 14 + integer, parameter :: dalphap_upz_id = 15 + integer, parameter :: nField_vars = 15 + type(scalar_field), dimension(:), allocatable :: weights_x_interp !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_y_interp !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_z_interp !< For precomputing weights + integer :: nWeights_interp + type(scalar_field), dimension(:), allocatable :: weights_x_grad !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_y_grad !< For precomputing weights + type(scalar_field), dimension(:), allocatable :: weights_z_grad !< For precomputing weights + integer :: nWeights_grad + + $:GPU_DECLARE(create='[Rmax_glb, Rmin_glb, q_particles, q_particles_idx, field_vars]') + $:GPU_DECLARE(create='[weights_x_interp, weights_y_interp, weights_z_interp, nWeights_interp]') + $:GPU_DECLARE(create='[weights_x_grad, weights_y_grad, weights_z_grad, nWeights_grad]') + + ! Particle Source terms for fluid coupling + real(wp), allocatable, dimension(:,:) :: f_p !< force on each particle + $:GPU_DECLARE(create='[f_p]') + + real(wp), allocatable, dimension(:) :: gSum !< gaussian sum for each particle + $:GPU_DECLARE(create='[gSum]') + + integer, allocatable :: force_recv_ids(:) !< ids of collision forces received from other ranks + real(wp), allocatable :: force_recv_vals(:) !< collision forces received from other ranks + $:GPU_DECLARE(create='[force_recv_ids, force_recv_vals]') + + integer, parameter :: LAG_EVOL_ID = 11 ! File id for lag_bubbles_evol_*.dat + integer, parameter :: LAG_STATS_ID = 12 ! File id for stats_lag_bubbles_*.dat + integer, parameter :: LAG_VOID_ID = 13 ! File id for voidfraction.dat + integer, allocatable, dimension(:) :: keep_bubble + integer, allocatable, dimension(:,:) :: wrap_bubble_loc, wrap_bubble_dir + $:GPU_DECLARE(create='[keep_bubble]') + $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') + + integer :: error_flag ! Error flag for collisions + $:GPU_DECLARE(create='[error_flag]') + + integer, parameter :: ncc = 1 !< Number of collisions cells at boundaries + real(wp) :: eps_overlap = 1.e-12 + +contains + + !> Initializes the lagrangian subgrid particle solver + !! @param q_cons_vf Initial conservative variables + impure subroutine s_initialize_particles_EL_module(q_cons_vf, bc_type) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer :: nParticles_glb, i, j, k, nf, l, npts + + ! PRIM TO CONS VARIABLES + real(wp) :: dyn_pres, pi_inf, qv, gamma, pres, T + real(wp) :: rhou, alpharhou, rho_f, alpharho + real(wp), dimension(3) :: fluid_vel + real(wp) :: rhoYks(1:num_species) + integer :: save_count + real(wp) :: qtime + real(wp) :: myR, func_sum + real(wp), dimension(3) :: myPos, myVel, myForce + integer, dimension(3) :: cell + logical :: only_beta + + only_beta = .true. + + next_write_time = 0._wp + + if (cfl_dt) then + save_count = n_start + qtime = n_start*t_save + else + save_count = t_step_start + qtime = t_step_start*dt + end if + + pi_inf = 0._wp + qv = 0._wp + gamma = gammas(1) + + ! Setting number of time-stages for selected time-stepping scheme + lag_num_ts = time_stepper + + ! Allocate space for the Eulerian fields needed to map the effect of the particles + if (lag_params%solver_approach == 1) then + ! One-way coupling + q_particles_idx = 1 ! For tracking volume fraction + else if (lag_params%solver_approach == 2) then + ! Two-way coupling + q_particles_idx = 8 ! For tracking volume fraction(1), x-mom(2), y-mom(3), z-mom(4), and energy(5) sources, and alpha_p u_p (x(6),y(7),z(8)) + else + call s_mpi_abort('Please check the lag_params%solver_approach input') + end if + + nWeights_interp = lag_params%interpolation_order + 1 + nWeights_grad = fd_order + 1 + + pcomm_coords(1)%beg = x_cb(-1) + pcomm_coords(1)%end = x_cb(m) + $:GPU_UPDATE(device='[pcomm_coords(1)]') + if (n > 0) then + pcomm_coords(2)%beg = y_cb(-1) + pcomm_coords(2)%end = y_cb(n) + $:GPU_UPDATE(device='[pcomm_coords(2)]') + if (p > 0) then + pcomm_coords(3)%beg = z_cb(-1) + pcomm_coords(3)%end = z_cb(p) + $:GPU_UPDATE(device='[pcomm_coords(3)]') + end if + end if + + pcomm_coords_ghost(1)%beg = x_cb(-1 + ncc) + pcomm_coords_ghost(1)%end = x_cb(m - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(1)]') + if (n > 0) then + pcomm_coords_ghost(2)%beg = y_cb(-1 + ncc) + pcomm_coords_ghost(2)%end = y_cb(n - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(2)]') + if (p > 0) then + pcomm_coords_ghost(3)%beg = z_cb(-1 + ncc) + pcomm_coords_ghost(3)%end = z_cb(p - ncc) + $:GPU_UPDATE(device='[pcomm_coords_ghost(3)]') + end if + end if + + $:GPU_UPDATE(device='[lag_num_ts, q_particles_idx]') + + @:ALLOCATE(q_particles(1:q_particles_idx)) + do i = 1, q_particles_idx + @:ALLOCATE(q_particles(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + & idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(q_particles(i)) + end do + + @:ALLOCATE(field_vars(1:nField_vars)) + do i = 1, nField_vars + @:ALLOCATE(field_vars(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + & idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(field_vars(i)) + end do + + @:ALLOCATE(weights_x_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_x_interp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_x_interp(i)) + end do + + @:ALLOCATE(weights_y_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_y_interp(i)%sf(idwbuff(2)%beg:idwbuff(2)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_y_interp(i)) + end do + + @:ALLOCATE(weights_z_interp(1:nWeights_interp)) + do i = 1, nWeights_interp + @:ALLOCATE(weights_z_interp(i)%sf(idwbuff(3)%beg:idwbuff(3)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_z_interp(i)) + end do + + @:ALLOCATE(weights_x_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_x_grad(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_x_grad(i)) + end do + + @:ALLOCATE(weights_y_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_y_grad(i)%sf(idwbuff(2)%beg:idwbuff(2)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_y_grad(i)) + end do + + @:ALLOCATE(weights_z_grad(1:nWeights_grad)) + do i = 1, nWeights_grad + @:ALLOCATE(weights_z_grad(i)%sf(idwbuff(3)%beg:idwbuff(3)%end,1:1,1:1)) + @:ACC_SETUP_SFs(weights_z_grad(i)) + end do + + ! Allocating space for lagrangian variables + nParticles_glb = lag_params%nParticles_glb + + @:ALLOCATE(lag_part_id(1:nParticles_glb, 1:2)) + @:ALLOCATE(gid_to_local(1:nParticles_glb)) + @:ALLOCATE(particle_R0(1:nParticles_glb)) + @:ALLOCATE(Rmax_stats_part(1:nParticles_glb)) + @:ALLOCATE(Rmin_stats_part(1:nParticles_glb)) + @:ALLOCATE(particle_mass(1:nParticles_glb)) + @:ALLOCATE(p_AM(1:nParticles_glb)) + @:ALLOCATE(p_owner_rank(1:nParticles_glb)) + @:ALLOCATE(particle_rad(1:nParticles_glb, 1:2)) + @:ALLOCATE(particle_pos(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_posPrev(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_vel(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_s(1:nParticles_glb, 1:3, 1:2)) + @:ALLOCATE(particle_draddt(1:nParticles_glb, 1:lag_num_ts)) + @:ALLOCATE(particle_dposdt(1:nParticles_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(particle_dveldt(1:nParticles_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(f_p(1:nParticles_glb, 1:3)) + @:ALLOCATE(gSum(1:nParticles_glb)) + + @:ALLOCATE(linked_list(1:nParticles_glb)) + + @:ALLOCATE(particle_head(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) + + @:ALLOCATE(force_recv_ids(1:lag_params%nParticles_glb)) + @:ALLOCATE(force_recv_vals(1:3*lag_params%nParticles_glb)) + + @:ALLOCATE(keep_bubble(1:nParticles_glb)) + @:ALLOCATE(wrap_bubble_loc(1:nParticles_glb, 1:num_dims), wrap_bubble_dir(1:nParticles_glb, 1:num_dims)) + + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + + if (num_procs > 1) call s_initialize_solid_particles_mpi(lag_num_ts) + + ! Starting particles + if (lag_params%write_void_evol) call s_open_void_evol + if (lag_params%write_bubbles) call s_open_lag_bubble_evol() + if (lag_params%write_bubbles_stats) call s_open_lag_particle_stats() + + if (lag_params%vel_model > 0) then + moving_lag_particles = .true. + lag_pressure_force = lag_params%pressure_force + lag_gravity_force = lag_params%gravity_force + lag_vel_model = lag_params%vel_model + lag_drag_model = lag_params%drag_model + end if + + $:GPU_UPDATE(device='[moving_lag_particles, lag_pressure_force, lag_gravity_force, lag_vel_model, lag_drag_model]') + + ! Allocate cell list arrays for atomic-free Gaussian smearing + @:ALLOCATE(cell_list_start(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_count(0:m, 0:n, 0:p)) + @:ALLOCATE(cell_list_idx(1:lag_params%nParticles_glb)) + + call s_read_input_particles(q_cons_vf, bc_type) + + call s_reset_cell_vars() + + $:GPU_PARALLEL_LOOP(private='[k, cell, myR, myPos, myVel, myForce, func_sum]',copyin='[only_beta]') + do k = 1, n_el_particles_loc + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k,1:3,1), cell, particle_s(k,1:3,1)) + + myR = particle_R0(k) + myPos = particle_pos(k,1:3,1) + myVel = particle_vel(k,1:3,1) + myForce = f_p(k,:) + ! Compute the total gaussian contribution for each particle for normalization + call s_compute_gaussian_contribution(myR, myPos, cell, func_sum) + gSum(k) = func_sum + + call s_gaussian_atomic(myR, myVel, myPos, myForce, func_sum, cell, q_particles, only_beta) + end do + $:END_GPU_PARALLEL_LOOP() + + call s_finalize_beta_field(bc_type, only_beta) + + npts = (nWeights_interp - 1)/2 + call s_compute_barycentric_weights(npts) ! For interpolation + + npts = (nWeights_grad - 1)/2 + call s_compute_fornberg_fd_weights(npts) ! For finite differences + + if (lag_params%solver_approach == 2) then + if (save_count == 0) then + !> Correcting initial conditions so they account for particles + $:GPU_PARALLEL_LOOP(private='[i, j, k, dyn_pres, fluid_vel, rho_f, alpharho, rhou, alpharhou]', collapse=3, & + & copyin = '[pi_inf, qv, gamma, rhoYks]') + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + !!!!!!!!! Mass + do l = 1, num_fluids ! num_fluid is just 1 right now + rho_f = q_cons_vf(l)%sf(i, j, k) + alpharho = q_particles(alphaf_id)%sf(i, j, k)*rho_f + q_cons_vf(l)%sf(i, j, k) = alpharho + end do + + !!!!!!!!! Momentum + dyn_pres = 0._wp + do l = momxb, momxe + fluid_vel(l - momxb + 1) = q_cons_vf(l)%sf(i, j, k)/rho_f + rhou = q_cons_vf(l)%sf(i, j, k) + alpharhou = q_particles(alphaf_id)%sf(i, j, k)*rhou + q_cons_vf(l)%sf(i, j, k) = alpharhou + dyn_pres = dyn_pres + q_cons_vf(l)%sf(i, j, k)*fluid_vel(l - momxb + 1)/2._wp + end do + + !!!!!!!!!Energy + call s_compute_pressure(q_cons_vf(E_idx)%sf(i, j, k), q_cons_vf(alf_idx)%sf(i, j, k), dyn_pres, & + & pi_inf, gamma, alpharho, qv, rhoYks, pres, T) + + q_cons_vf(E_idx)%sf(i, j, k) = gamma*pres + dyn_pres + pi_inf + qv ! Updating energy in cons + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + end if + + end subroutine s_initialize_particles_EL_module + + !> Initialize particle data from input file or generate initial conditions + !! @param q_cons_vf Conservative variables + impure subroutine s_read_input_particles(q_cons_vf, bc_type) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + real(wp), dimension(8) :: inputParticle + real(wp) :: qtime + integer :: id, particle_id, save_count + integer :: i, ios + logical :: file_exist, indomain + integer, dimension(3) :: cell + character(LEN=path_len + 2*name_len) :: path_D_dir + + ! Initialize number of particles + + particle_id = 0 + id = 0 + + ! Read the input lag_bubble file or restart point + if (cfl_dt) then + save_count = n_start + qtime = n_start*t_save + else + save_count = t_step_start + qtime = t_step_start*dt + end if + + if (save_count == 0) then + if (proc_rank == 0) print *, 'Reading lagrange particles input file.' + call my_inquire(trim(lag_params%input_path), file_exist) + if (file_exist) then + open (94, file=trim(lag_params%input_path), form='formatted', iostat=ios) + do while (ios == 0) + read (94, *, iostat=ios) (inputParticle(i), i=1, 8) + if (ios /= 0) cycle + indomain = particle_in_domain_physical(inputParticle(1:3)) + id = id + 1 + if (id > lag_params%nParticles_glb .and. proc_rank == 0) then + call s_mpi_abort("Current number of particles is larger than nParticles_glb") + end if + if (indomain) then + particle_id = particle_id + 1 + call s_add_particles(inputParticle, q_cons_vf, particle_id) + lag_part_id(particle_id, 1) = id ! global ID + lag_part_id(particle_id, 2) = particle_id ! local ID + n_el_particles_loc = particle_id ! local number of particles + end if + end do + close (94) + else + call s_mpi_abort("Initialize the lagrange particles in " // trim(lag_params%input_path)) + end if + else + if (proc_rank == 0) print *, 'Restarting lagrange particles at save_count: ', save_count + call s_restart_bubbles(particle_id, save_count) + end if + + print *, " Lagrange parrticles running, in proc", proc_rank, "number:", particle_id, "/", id + + if (num_procs > 1) then + call s_mpi_reduce_int_sum(n_el_particles_loc, n_el_particles_glb) + else + n_el_particles_glb = n_el_particles_loc + end if + + if (proc_rank == 0) then + if (n_el_particles_glb == 0) call s_mpi_abort('No particles in the domain. Check ' // trim(lag_params%input_path)) + end if + + $:GPU_UPDATE(device='[particles_lagrange, lag_params]') + + $:GPU_UPDATE(device='[lag_part_id, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, p_AM, p_owner_rank, & + & gid_to_local, particle_rad, particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, & + & particle_dposdt, particle_dveldt, n_el_particles_loc]') + + Rmax_glb = min(dflt_real, -dflt_real) + Rmin_glb = max(dflt_real, -dflt_real) + $:GPU_UPDATE(device='[Rmax_glb, Rmin_glb]') + + $:GPU_UPDATE(device='[dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc]') + + ! Populate temporal variables + call s_transfer_data_to_tmp_particles() + + if (save_count == 0) then + ! Create ./D directory + if (proc_rank == 0) then + write (path_D_dir, '(A,I0,A,I0)') trim(case_dir) // '/D' + call my_inquire(trim(path_D_dir), file_exist) + if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + end if + call s_mpi_barrier() + call s_write_restart_lag_particles(save_count) ! Needed for post_processing + if (lag_params%write_void_evol) call s_write_void_evol_particles(qtime) + end if + + if (lag_params%write_bubbles) call s_write_lag_particle_evol(qtime) + + end subroutine s_read_input_particles + + !> The purpose of this procedure is to obtain the information of the particles when starting fresh + !! @param inputPart Particle information + !! @param q_cons_vf Conservative variables + !! @param part_id Local id of the particle + impure subroutine s_add_particles(inputPart, q_cons_vf, part_id) + + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp), dimension(8), intent(in) :: inputPart + integer, intent(in) :: part_id + integer :: i + real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle + real(wp) :: omegaN_local, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP + integer, dimension(3) :: cell + real(wp), dimension(2) :: Re + real(wp) :: massflag, heatflag, Re_trans, Im_trans, myR, func_sum + real(wp), dimension(3) :: myPos, myVel + + massflag = 0._wp + heatflag = 0._wp + if (lag_params%massTransfer_model) massflag = 1._wp + if (lag_params%heatTransfer_model) heatflag = 1._wp + + particle_R0(part_id) = inputPart(7) + Rmax_stats_part(part_id) = min(dflt_real, -dflt_real) + Rmin_stats_part(part_id) = max(dflt_real, -dflt_real) + particle_rad(part_id, 1) = inputPart(7) + particle_pos(part_id,1:3,1) = inputPart(1:3) + particle_posPrev(part_id,1:3,1) = particle_pos(part_id,1:3,1) + particle_vel(part_id,1:3,1) = inputPart(4:6) + + ! Initialize Particle Sources + f_p(part_id,1:3) = 0._wp + p_AM(part_id) = 0._wp + p_owner_rank(part_id) = proc_rank + gid_to_local(part_id) = -1 + + if (cyl_coord .and. p == 0) then + particle_pos(part_id, 2, 1) = sqrt(particle_pos(part_id, 2, 1)**2._wp + particle_pos(part_id, 3, 1)**2._wp) + ! Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable + particle_pos(part_id, 3, 1) = atan2(inputPart(3), inputPart(2)) + particle_posPrev(part_id,1:3,1) = particle_pos(part_id,1:3,1) + end if + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(part_id,1:3,1), cell, particle_s(part_id,1:3,1)) + + ! Check if the particle is located in the ghost cell of a symmetric, or wall boundary + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange particle is in the ghost cells of a symmetric or wall boundary.") + end if + + if (p > 0) then + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange particle is in the ghost cells of a symmetric or wall boundary.") + end if + end if + + ! Initial particle mass + volparticle = 4._wp/3._wp*pi*particle_R0(part_id)**3 ! volume + particle_mass(part_id) = volparticle*rho0ref_particle ! mass + if (particle_mass(part_id) <= 0._wp) then + call s_mpi_abort("The initial particle mass is negative or zero. Check the particle file.") + end if + + end subroutine s_add_particles + + !> Read particle data from a restart checkpoint + !! @param part_id Local ID of the particle + !! @param save_count File identifier + impure subroutine s_restart_bubbles(part_id, save_count) + + integer, intent(inout) :: part_id, save_count + character(LEN=path_len + 2*name_len) :: file_loc + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part + +#ifndef MFC_MPI + @:PROHIBIT(.true., "Lagrangian particle restart requires MPI (--mpi)") +#else + real(wp), dimension(20) :: inputvals + integer, dimension(MPI_STATUS_SIZE) :: status + integer(kind=MPI_OFFSET_KIND) :: disp + integer :: view + integer, dimension(3) :: cell + logical :: indomain, particle_file, file_exist + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer :: ifile, ierr, tot_data, id + integer :: i + integer, dimension(:), allocatable :: proc_particle_counts + real(wp), dimension(1:1,1:lag_io_vars) :: dummy + dummy = 0._wp + + ! Construct file path + write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' + file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // trim(file_loc) + + ! Check if file exists + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file ' // trim(file_loc) // ' does not exist!') + end if + + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate (proc_particle_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_particle_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_particle_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! Set time variables from file + mytime = file_time + dt = file_dt + + part_id = proc_particle_counts(proc_rank + 1) + + start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_particle_counts(i) + end do + + start_idx_part(2) = 0 + lsizes(1) = part_id + lsizes(2) = lag_io_vars + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + + if (part_id > 0) then + allocate (MPI_IO_DATA_lag_bubbles(part_id,1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) & + & + file_num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, lag_io_vars*part_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + n_el_particles_loc = part_id + + do i = 1, part_id + lag_part_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) + particle_pos(i,1:3,1) = MPI_IO_DATA_lag_bubbles(i,2:4) + particle_posPrev(i,1:3,1) = MPI_IO_DATA_lag_bubbles(i,5:7) + particle_vel(i,1:3,1) = MPI_IO_DATA_lag_bubbles(i,8:10) + particle_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11) + particle_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13) + Rmax_stats_part(i) = MPI_IO_DATA_lag_bubbles(i, 14) + Rmin_stats_part(i) = MPI_IO_DATA_lag_bubbles(i, 15) + particle_mass(i) = MPI_IO_DATA_lag_bubbles(i, 19) + cell = -buff_size + call s_locate_cell(particle_pos(i,1:3,1), cell, particle_s(i,1:3,1)) + end do + + deallocate (MPI_IO_DATA_lag_bubbles) + else + n_el_particles_loc = 0 + + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) & + & + file_num_procs*sizeof(proc_particle_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + end if + + if (proc_rank == 0) then + write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count + write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt + end if + + deallocate (proc_particle_counts) +#endif + + end subroutine s_restart_bubbles + + !> Contains the particle dynamics subroutines. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Primitive variables + !! @param rhs_vf Calculated change of conservative variables + !! @param t_step Current time step + !! @param stage Current stage in the time-stepper algorithm + subroutine s_compute_particle_EL_dynamics(q_prim_vf, bc_type, stage, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, rhs_vf) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + type(scalar_field), dimension(sys_size), intent(in) :: rhs_vf + integer, intent(in) :: stage + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vL_y, vL_z + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x, vR_y, vR_z + integer, dimension(3) :: cell, cellijk + real(wp) :: myMass, myR, myBeta_c, myBeta_t, myR0, myRe, mydrhodt, myVolumeFrac, myGamma, rmass_add, func_sum + real(wp), dimension(3) :: myVel, myPos, force_vec, s_cell + logical :: only_beta + integer :: k, l, i, j + + only_beta = .false. + + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + do l = 1, num_dims + if (l == 1) then + call s_gradient_field(vL_x, vR_x, field_vars(dPx_id)%sf, l, E_idx) + else if (l == 2) then + call s_gradient_field(vL_y, vR_y, field_vars(dPy_id)%sf, l, E_idx) + else if (l == 3) then + call s_gradient_field(vL_z, vR_z, field_vars(dPz_id)%sf, l, E_idx) + end if + end do + end if + + if (lag_params%added_mass_model > 0) then + do l = 1, num_dims + if (l == 1) then + call s_gradient_field(vL_x, vR_x, field_vars(drhox_id)%sf, l, 1) + else if (l == 2) then + call s_gradient_field(vL_y, vR_y, field_vars(drhoy_id)%sf, l, 1) + else if (l == 3) then + call s_gradient_field(vL_z, vR_z, field_vars(drhoz_id)%sf, l, 1) + end if + end do + end if + + myGamma = (1._wp/fluid_pp(1)%gamma) + 1._wp + if (viscous) then + myRe = 1._wp/fluid_pp(1)%Re(1) + else + ! TODO: Wire to a fluid parameter for non-air flows + myRe = 1.845e-5_wp + end if + + call nvtxStartRange("LAGRANGE-PARTICLE-DYNAMICS") + + !> Compute Fluid-Particle Forces (drag/pressure/added mass) and convert to particle acceleration + $:GPU_PARALLEL_LOOP(private='[i, k, l, cell, s_cell, myMass, myR, myR0, myPos, myVel, myVolumeFrac, force_vec, rmass_add, & + & func_sum, mydrhodt]', copyin='[stage, myGamma, myRe, only_beta]') + do k = 1, n_el_particles_loc + f_p(k,:) = 0._wp + p_owner_rank(k) = proc_rank + + s_cell = particle_s(k,1:3,2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + ! Current particle state + myMass = particle_mass(k) + myR = particle_rad(k, 2) + myR0 = particle_R0(k) + myPos = particle_pos(k,:,2) + myVel = particle_vel(k,:,2) + myVolumeFrac = 1._wp - q_particles(alphaf_id)%sf(cell(1), cell(2), cell(3)) + mydrhodt = rhs_vf(1)%sf(cell(1), cell(2), cell(3)) + + particle_dposdt(k,:,stage) = 0._wp + particle_dveldt(k,:,stage) = 0._wp + particle_draddt(k, stage) = 0._wp + + call s_get_particle_force(myPos, myR, myVel, myMass, myRe, myGamma, myVolumeFrac, mydrhodt, cell, q_prim_vf, & + & field_vars, weights_x_interp, weights_y_interp, weights_z_interp, force_vec, rmass_add) + + p_AM(k) = rMass_add + f_p(k,:) = f_p(k,:) + force_vec(:) + + if (.not. lag_params%collision_force) then + myMass = particle_mass(k) + p_AM(k) + myVel = particle_vel(k,:,2) + do l = 1, num_dims + particle_dposdt(k, l, stage) = myVel(l) + particle_dveldt(k, l, stage) = f_p(k, l)/myMass + particle_draddt(k, stage) = 0._wp + end do + end if + + if (lag_params%solver_approach == 2) then + func_sum = gSum(k) + call s_gaussian_atomic(myR, myVel, myPos, force_vec, func_sum, cell, q_particles, only_beta) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (lag_params%solver_approach == 2) then + call s_finalize_beta_field(bc_type, only_beta) + end if + + call nvtxStartRange("LAGRANGE-PARTICLE-COLLISIONS") + if (lag_params%collision_force) then + !> Compute Particle-Particle collision forces + call s_compute_particle_EL_collisions(stage, bc_type) + + $:GPU_PARALLEL_LOOP(private='[k, l, myMass, myVel]') + do k = 1, n_el_particles_loc + myMass = particle_mass(k) + p_AM(k) + myVel = particle_vel(k,:,2) + do l = 1, num_dims + particle_dposdt(k, l, stage) = myVel(l) + particle_dveldt(k, l, stage) = f_p(k, l)/myMass + particle_draddt(k, stage) = 0._wp + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + call nvtxEndRange + + call nvtxEndRange + + end subroutine s_compute_particle_EL_dynamics + + !> Compute inter-particle collision forces using a soft-sphere DEM model. Uses a cell-based linked list for O(N) neighbor + !! search. The contact force model is a spring-dashpot (Hertzian stiffness with viscous damping). Forces on particles owned by + !! other MPI ranks are buffered for later transfer. + subroutine s_compute_particle_EL_collisions(stage, bc_type) + + integer, intent(in) :: stage + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, dimension(3) :: cell + real(wp), dimension(3) :: s_cell + integer, dimension(3) :: cellaux + integer :: i, k, l, q, ip, jp, kp, ii, jj, kk + logical :: celloutside + real(wp) :: pidtksp2, ksp, nu1, nu2, Rp1, Rp2, E1, E2, Estar, cor, rmag, Rstar, dij, eta_n, kappa_n, mp1, mp2, dt_loc + real(wp), dimension(3) :: xp1, xp2, vp1, vp2, v_rel, rpij, nij, vnij, Fnpp_ij, force_vec + integer :: kpz + integer :: total_recv + integer :: glb_id, count + integer :: n_el_particles_loc_before_ghost + + if (num_procs > 1) then + n_el_particles_loc_before_ghost = n_el_particles_loc + call s_reset_force_buffers() + call s_add_ghost_particles() + end if + + kpz = 0 + if (num_dims == 3) kpz = 1 + + ksp = 10._wp ! Spring stiffness multiplier + nu1 = 0.35_wp ! Poisson's ratio, particle 1 (glass/steel-like) + nu2 = 0.35_wp ! Poisson's ratio, particle 2 + E1 = 1.e9_wp ! Young's modulus [Pa], particle 1 + E2 = 1.e9_wp ! Young's modulus [Pa], particle 2 + cor = 0.7_wp ! Coefficient of restitution + + pidtksp2 = (pi**2)/((dt*ksp)**2) + + Estar = 1._wp/(((1._wp - nu1**2)/E1) + ((1._wp - nu2**2)/E2)) + Estar = (4._wp/3._wp)*Estar + + call s_reset_linked_list() + + call nvtxStartRange("LAGRANGE-PARTICLE-COLLISIONS") + error_flag = 0 + $:GPU_UPDATE(device='[error_flag]') + + $:GPU_PARALLEL_LOOP(private='[i, k, cell, ip, jp, kp, Rp1, xp1, mp1, vp1, kk, jj, ii, cellaux, q, Rp2, xp2, mp2, vp2, & + & v_rel, Rstar, rpij, rmag, nij, vnij, dij, kappa_n, eta_n, Fnpp_ij, force_vec, s_cell, celloutside, & + & count]', copyin='[ksp, nu1, nu2, E1, E2, cor, pidtksp2, Estar, kpz]') + do k = 1, n_el_particles_loc + if (.not. particle_in_domain_physical(particle_pos(k,1:3,2))) then + cycle + end if + + s_cell = particle_s(k,1:3,2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + Rp1 = particle_rad(k, 2) + xp1 = particle_pos(k,:,2) + mp1 = particle_mass(k) + vp1 = particle_vel(k,:,2) + + do kk = kp - kpz, kp + kpz + do jj = jp - 1, jp + 1 + do ii = ip - 1, ip + 1 + cellaux(1) = ii + cellaux(2) = jj + cellaux(3) = kk + + call s_check_celloutside_wbuff(cellaux, celloutside) + + if (.not. celloutside) then + q = particle_head(ii, jj, kk) + ! Traverse linked list in that cell + + count = 0 + do while (q /= -1) + count = count + 1 + if (count > n_el_particles_loc) then + $:GPU_ATOMIC(atomic='write') + error_flag = 1 + exit + end if + + if (lag_part_id(q, 1) > lag_part_id(k, 1)) then + Rp2 = particle_rad(q, 2) + xp2 = particle_pos(q,:,2) + mp2 = particle_mass(q) + vp2 = particle_vel(q,:,2) + v_rel = vp2 - vp1 + + Rstar = (Rp1*Rp2)/(Rp1 + Rp2) + rpij = xp2 - xp1 + rmag = sqrt(rpij(1)**2 + rpij(2)**2 + rpij(3)**2) + rmag = max(rmag, eps_overlap) + nij = rpij/rmag + vnij = dot_product(v_rel, nij)*nij + dij = (Rp1 + Rp2) - rmag + + if (dij > 0._wp) then + kappa_n = min((pidtksp2*mp1), (pidtksp2*mp2), (Estar*sqrt(Rstar)*sqrt(abs(dij)))) + + eta_n = ((-2._wp*sqrt(kappa_n)*log(cor))/sqrt((log(cor))**2 + pi**2)) & + & *(1._wp/sqrt((1._wp/mp1) + (1._wp/mp2))) + + Fnpp_ij = -kappa_n*dij*nij - eta_n*vnij + + f_p(k,:) = f_p(k,:) + Fnpp_ij + + if (p_owner_rank(q) == proc_rank) then + ! f_p(q, :) = f_p(q, :) - Fnpp_ij + + $:GPU_ATOMIC(atomic='update') + f_p(q, 1) = f_p(q, 1) - Fnpp_ij(1) + + $:GPU_ATOMIC(atomic='update') + f_p(q, 2) = f_p(q, 2) - Fnpp_ij(2) + + $:GPU_ATOMIC(atomic='update') + f_p(q, 3) = f_p(q, 3) - Fnpp_ij(3) + else + call s_add_force_to_send_buffer(p_owner_rank(q), lag_part_id(q, 1), -Fnpp_ij) + end if + end if + end if + + q = linked_list(q) + end do + end if + end do + end do + end do + + !> Check each local particle for wall collisions + + call s_compute_wall_collisions(xp1, vp1, Rp1, mp1, Estar, pidtksp2, cor, force_vec) + f_p(k,:) = f_p(k,:) + force_vec + end do + $:END_GPU_PARALLEL_LOOP() + + call nvtxEndRange + + $:GPU_UPDATE(host='[error_flag]') + if (error_flag == 1) then + call s_mpi_abort("Linked list infinite loop detected") + end if + + if (num_procs > 1) then + n_el_particles_loc = n_el_particles_loc_before_ghost + $:GPU_UPDATE(device='[n_el_particles_loc]') + + total_recv = 0 + force_recv_ids = 0 + force_recv_vals = 0._wp + + call s_transfer_collision_forces(total_recv, force_recv_ids, force_recv_vals) + + $:GPU_UPDATE(device = '[force_recv_ids, force_recv_vals]') + + $:GPU_PARALLEL_LOOP(private='[i, k]',copyin = '[total_recv]') + do i = 1, total_recv + k = gid_to_local(force_recv_ids(i)) + if (k > 0) then + $:GPU_ATOMIC(atomic='update') + f_p(k, 1) = f_p(k, 1) + force_recv_vals(3*(i - 1) + 1) + + $:GPU_ATOMIC(atomic='update') + f_p(k, 2) = f_p(k, 2) + force_recv_vals(3*(i - 1) + 2) + + $:GPU_ATOMIC(atomic='update') + f_p(k, 3) = f_p(k, 3) + force_recv_vals(3*(i - 1) + 3) + end if + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_compute_particle_EL_collisions + + !> This subroutine checks for particles at solid walls to compute a collision force + subroutine s_compute_wall_collisions(pos, vel, rad, mass, Es, pidtksp, core, wcol_force) + + $:GPU_ROUTINE(function_name='s_compute_wall_collisions',parallelism='[seq]', cray_inline=True) + + real(wp), dimension(3), intent(in) :: pos, vel + real(wp), intent(in) :: rad, mass, Es, pidtksp, core + real(wp), dimension(3), intent(inout) :: wcol_force + real(wp) :: dij + + wcol_force = 0._wp + + ! Check for particles at solid boundaries + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (pos(1) - x_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 1, 1._wp, wcol_force) + end if + end if + + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (x_cb(m) - pos(1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 1, -1._wp, wcol_force) + end if + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (pos(2) - y_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 2, 1._wp, wcol_force) + end if + end if + + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (y_cb(n) - pos(2)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 2, -1._wp, wcol_force) + end if + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (pos(3) - z_cb(-1)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 3, 1._wp, wcol_force) + end if + end if + + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + dij = rad - (z_cb(p) - pos(3)) + + if (dij > 0._wp) then + call s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, 3, -1._wp, wcol_force) + end if + end if + end if + + end subroutine s_compute_wall_collisions + + !> This subroutine computes the collision force with a solid wall + subroutine s_compute_wall_collision_force(dij, vel, rad, mass, Es, pidtksp, core, dir, normal, wcol_force) + + $:GPU_ROUTINE(function_name='s_compute_wall_collision_force',parallelism='[seq]', cray_inline=True) + + real(wp), dimension(3), intent(in) :: vel + real(wp), intent(in) :: dij, rad, mass, Es, pidtksp, core, normal + integer, intent(in) :: dir + real(wp), dimension(3), intent(inout) :: wcol_force + real(wp), dimension(3) :: nij, v_rel, vnij + real(wp) :: kappa_n, eta_n + + ! Normal points away from wall (into domain) + nij = 0._wp + nij(dir) = normal + + ! Relative velocity (wall has zero velocity) + v_rel = vel + vnij = dot_product(v_rel, nij)*nij + + ! Wall has infinite mass so use mp1 only + kappa_n = min((pidtksp*mass), (Es*sqrt(rad)*sqrt(abs(dij)))) + + eta_n = ((-2._wp*sqrt(kappa_n)*log(core))/sqrt((log(core))**2 + pi**2))*(1._wp/sqrt(1._wp/mass)) + + wcol_force = wcol_force + (kappa_n*dij*nij - eta_n*vnij) + + end subroutine s_compute_wall_collision_force + + !> This subroutine adds temporary ghost particles for collision purposes + subroutine s_add_ghost_particles() + + integer :: k, i, q + integer :: patch_id, newBubs + integer, dimension(3) :: cell + logical :: inc_ghost + + inc_ghost = .true. + + call nvtxStartRange("LAG-GHOSTADD") + call nvtxStartRange("LAG-GHOSTADD-DEV2HOST") + $:GPU_UPDATE(host='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, & + & particle_rad, particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, n_el_particles_loc, wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of particles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-GHOSTADD-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_particles_loc, particle_pos(:,:,2), particle_posPrev(:,:,2), inc_ghost) + call nvtxEndRange + + call nvtxStartRange("LAG-GHOSTADD-SENDRECV") + call s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, particle_pos, particle_posPrev, particle_vel, & + & particle_s, particle_draddt, particle_dposdt, particle_dveldt, lag_num_ts, & + & n_el_particles_loc, 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-GHOSTADD-HOST2DEV") + $:GPU_UPDATE(device='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, & + & particle_rad, particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + + call nvtxEndRange ! LAG-GHOSTADD + + $:GPU_PARALLEL_LOOP(private='[k, cell]') + do k = 1, n_el_particles_loc + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k,1:3,2), cell, particle_s(k,1:3,2)) + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_add_ghost_particles + + !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost + !! cells). + !! @param cellaux Tested cell to smear the particle effect in. + !! @param celloutside If true, then cellaux is outside the computational domain. + subroutine s_check_celloutside_wbuff(cellaux, celloutside) + + $:GPU_ROUTINE(function_name='s_check_celloutside_wbuff',parallelism='[seq]', cray_inline=True) + + integer, dimension(3), intent(inout) :: cellaux + logical, intent(out) :: celloutside + + celloutside = .false. + + if (num_dims == 2) then + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size)) then + celloutside = .true. + end if + else + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size) .or. (cellaux(3) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(3) > p + buff_size)) then + celloutside = .true. + end if + end if + + end subroutine s_check_celloutside_wbuff + + !> Compute particle source terms for two-way Euler-Lagrange coupling (Maeda & Colonius, 2018) + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Primitive variables + !! @param rhs_vf Time derivative of the conservative variables + subroutine s_compute_particles_EL_source(q_cons_vf, q_prim_vf, rhs_vf, stage) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf + integer, intent(in) :: stage + integer :: i, j, k, l, nf + real(wp) :: dalphapdt, alpha_f, udot_gradalpha + + ! Spatial derivative of the fluid volume fraction and eulerian particle momentum fields. + + do l = 1, num_dims + call s_gradient_dir_fornberg(q_particles(alphaf_id)%sf, field_vars(dalphafx_id + l - 1)%sf, l) + call s_gradient_dir_fornberg(q_particles(alphaupx_id + l - 1)%sf, field_vars(dalphap_upx_id + l - 1)%sf, l) + end do + + !> Apply particle sources to the Eulerian RHS + $:GPU_PARALLEL_LOOP(private='[i, j, k, alpha_f, dalphapdt, udot_gradalpha]', collapse=3) + do k = idwint(3)%beg, idwint(3)%end + do j = idwint(2)%beg, idwint(2)%end + do i = idwint(1)%beg, idwint(1)%end + if (q_particles(alphaf_id)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + alpha_f = q_particles(alphaf_id)%sf(i, j, k) + + dalphapdt = 0._wp + udot_gradalpha = 0._wp + do l = 1, num_dims + dalphapdt = dalphapdt + field_vars(dalphap_upx_id + l - 1)%sf(i, j, k) + udot_gradalpha = udot_gradalpha + q_prim_vf(momxb + l - 1)%sf(i, j, & + & k)*field_vars(dalphafx_id + l - 1)%sf(i, j, k) + end do + dalphapdt = -dalphapdt + ! Add any contribution to dalphapdt from particles growing or shrinking + + !> Step 1: Source terms for volume fraction corrections + ! cons_var/alpha_f * (dalpha_p/dt - u dot grad(alpha_f)) + do l = 1, E_idx + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + (q_cons_vf(l)%sf(i, j, & + & k)/alpha_f)*(dalphapdt - udot_gradalpha) + end do + + ! momentum term -1/alpha_f * (p*grad(alpha_f) - Tau^v dot grad(alpha_f)) !Viscous term not implemented + do l = 1, num_dims + rhs_vf(momxb + l - 1)%sf(i, j, k) = rhs_vf(momxb + l - 1)%sf(i, j, & + & k) - ((1._wp/alpha_f)*(q_prim_vf(E_idx)%sf(i, j, k)*field_vars(dalphafx_id + l - 1)%sf(i, j, & + & k))) + end do + + ! energy term -1/alpha_f * (p*u dot grad(alpha_f) - (Tau^v dot u) dot grad(alpha_f)) !Viscous term not + ! implemented + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - ((1._wp/alpha_f)*(q_prim_vf(E_idx)%sf(i, j, & + & k)*udot_gradalpha)) + + !> Step 2: Add the drag/pressure/added mass forces to the fluid + rhs_vf(momxb)%sf(i, j, k) = rhs_vf(momxb)%sf(i, j, k) + q_particles(Smx_id)%sf(i, j, k)*(1._wp/alpha_f) + rhs_vf(momxb + 1)%sf(i, j, k) = rhs_vf(momxb + 1)%sf(i, j, k) + q_particles(Smy_id)%sf(i, j, & + & k)*(1._wp/alpha_f) + + ! Energy source + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) + (q_particles(Smx_id)%sf(i, j, & + & k)*q_prim_vf(momxb)%sf(i, j, k) + q_particles(Smy_id)%sf(i, j, k)*q_prim_vf(momxb + 1)%sf(i, j, & + & k) + q_particles(SE_id)%sf(i, j, k))*(1._wp/alpha_f) + + if (num_dims == 3) then + rhs_vf(momxb + 2)%sf(i, j, k) = rhs_vf(momxb + 2)%sf(i, j, k) + q_particles(Smz_id)%sf(i, j, & + & k)*(1._wp/alpha_f) + ! Energy source + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) + (q_particles(Smz_id)%sf(i, j, & + & k)*q_prim_vf(momxb + 2)%sf(i, j, k))*(1._wp/alpha_f) + end if + end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_compute_particles_EL_source + + !> Reset and rebuild the cell-based linked list for particle-to-cell mapping + subroutine s_reset_linked_list() + + integer :: j, k, l + + $:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + particle_head(j, k, l) = -1 + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + linked_list(k) = -1 + end do + $:END_GPU_PARALLEL_LOOP() + + call s_build_linked_list() + + end subroutine s_reset_linked_list + + !> Zero all Eulerian field variables and particle projection arrays + subroutine s_reset_cell_vars() + + integer :: i, j, k, l + + $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + do i = 1, max(nField_vars, q_particles_idx) ! outermost is largest of the i-like dims + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + ! Zero field_vars if i <= nField_vars + if (i <= nField_vars) field_vars(i)%sf(j, k, l) = 0._wp + ! Zero q_particles if i <= q_particles_idx + if (i <= q_particles_idx) then + q_particles(i)%sf(j, k, l) = 0._wp + end if + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_reset_cell_vars + + !> Finalize the particle volume fraction field after Gaussian smearing. Applies boundary conditions to the smeared field, then + !! converts accumulated particle volume alpha_p to fluid volume fraction alpha_f = 1 - alpha_p. + subroutine s_finalize_beta_field(bc_type, onlyBeta) + + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer :: j, k, l + logical, intent(in) :: onlyBeta + + call nvtxStartRange("PARTICLES-LAGRANGE-BETA-COMM") + if (onlyBeta) then + call s_populate_beta_buffers(q_particles, bc_type, 1) + else + call s_populate_beta_buffers(q_particles, bc_type, q_particles_idx) + end if + call nvtxEndRange + + ! Store 1-q_particles(1) + $:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_particles(alphaf_id)%sf(j, k, l) = 1._wp - q_particles(alphaf_id)%sf(j, k, l) + ! Limiting void fraction given max value + q_particles(alphaf_id)%sf(j, k, l) = max(q_particles(alphaf_id)%sf(j, k, l), 1._wp - lag_params%valmaxvoid) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_finalize_beta_field + + !> Build a cell-based linked list for particle-to-cell mapping. particle_head(i,j,k) points to the first particle in cell + !! (i,j,k). linked_list(k) points to the next particle in the same cell (-1 = end). + subroutine s_build_linked_list() + + integer :: k, glb_id, i + integer, dimension(3) :: cell + real(wp), dimension(3) :: s_cell + logical :: celloutside + + $:GPU_PARALLEL_LOOP(private='[i, k, cell, s_cell, glb_id, celloutside]') + do k = 1, n_el_particles_loc + glb_id = lag_part_id(k, 1) + gid_to_local(glb_id) = k + + s_cell = particle_s(k,1:3,2) + cell = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + call s_check_celloutside_wbuff(cell, celloutside) + + if (.not. celloutside) then + !!!!! Particle linked list building + $:GPU_ATOMIC(atomic='capture') + linked_list(k) = particle_head(cell(1), cell(2), cell(3)) + particle_head(cell(1), cell(2), cell(3)) = k + $:END_GPU_ATOMIC_CAPTURE() + end if + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_build_linked_list + + !> This subroutine updates the Lagrange variables using the tvd RK time steppers. The time derivative of the particle variables + !! must be stored at every stage to avoid precision errors. + !! @param stage Current tvd RK stage + impure subroutine s_update_lagrange_particles_tdv_rk(q_prim_vf, bc_type, stage) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, intent(in) :: stage + integer :: k + + if (time_stepper == 1) then ! 1st order TVD RK + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{1} = u{n} + dt * RHS{n} + particle_rad(k, 1) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k,1:3,1) = particle_pos(k,1:3,1) + particle_pos(k,1:3,1) = particle_pos(k,1:3,1) + dt*particle_dposdt(k,1:3,1) + particle_vel(k,1:3,1) = particle_vel(k,1:3,1) + dt*particle_dveldt(k,1:3,1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles) then + ! $:GPU_UPDATE(host='[gas_p,gas_mv,particle_rad,intfc_vel]') + $:GPU_UPDATE(host='[particle_rad]') + call s_write_lag_particle_evol(mytime) + end if + else if (time_stepper == 2) then ! 2nd order TVD RK + if (stage == 1) then + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{1} = u{n} + dt * RHS{n} + particle_rad(k, 2) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k,1:3,2) = particle_pos(k,1:3,1) + particle_pos(k,1:3,2) = particle_pos(k,1:3,1) + dt*particle_dposdt(k,1:3,1) + particle_vel(k,1:3,2) = particle_vel(k,1:3,1) + dt*particle_dveldt(k,1:3,1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + else if (stage == 2) then + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + particle_rad(k, 1) = particle_rad(k, 1) + dt*(particle_draddt(k, 1) + particle_draddt(k, 2))/2._wp + if (moving_lag_particles) then + particle_posPrev(k,1:3,1) = particle_pos(k,1:3,2) + particle_pos(k,1:3,1) = particle_pos(k,1:3,1) + dt*(particle_dposdt(k,1:3,1) + particle_dposdt(k,1:3, & + & 2))/2._wp + particle_vel(k,1:3,1) = particle_vel(k,1:3,1) + dt*(particle_dveldt(k,1:3,1) + particle_dveldt(k,1:3, & + & 2))/2._wp + end if + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles) then + $:GPU_UPDATE(host='[particle_rad]') + call s_write_lag_particle_evol(mytime) + end if + end if + else if (time_stepper == 3) then ! 3rd order TVD RK + if (stage == 1) then + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{1} = u{n} + dt * RHS{n} + particle_rad(k, 2) = particle_rad(k, 1) + dt*particle_draddt(k, 1) + if (moving_lag_particles) then + particle_posPrev(k,1:3,2) = particle_pos(k,1:3,1) + particle_pos(k,1:3,2) = particle_pos(k,1:3,1) + dt*particle_dposdt(k,1:3,1) + particle_vel(k,1:3,2) = particle_vel(k,1:3,1) + dt*particle_dveldt(k,1:3,1) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + else if (stage == 2) then + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + particle_rad(k, 2) = particle_rad(k, 1) + dt*(particle_draddt(k, 1) + particle_draddt(k, 2))/4._wp + if (moving_lag_particles) then + particle_posPrev(k,1:3,2) = particle_pos(k,1:3,2) + particle_pos(k,1:3,2) = particle_pos(k,1:3,1) + dt*(particle_dposdt(k,1:3,1) + particle_dposdt(k,1:3, & + & 2))/4._wp + particle_vel(k,1:3,2) = particle_vel(k,1:3,1) + dt*(particle_dveldt(k,1:3,1) + particle_dveldt(k,1:3, & + & 2))/4._wp + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + else if (stage == 3) then + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + ! u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + particle_rad(k, 1) = particle_rad(k, 1) + (2._wp/3._wp)*dt*(particle_draddt(k, 1)/4._wp + particle_draddt(k, & + & 2)/4._wp + particle_draddt(k, 3)) + if (moving_lag_particles) then + particle_posPrev(k,1:3,1) = particle_pos(k,1:3,2) + particle_pos(k,1:3,1) = particle_pos(k,1:3,1) + (2._wp/3._wp)*dt*(particle_dposdt(k,1:3, & + & 1)/4._wp + particle_dposdt(k,1:3,2)/4._wp + particle_dposdt(k,1:3,3)) + particle_vel(k,1:3,1) = particle_vel(k,1:3,1) + (2._wp/3._wp)*dt*(particle_dveldt(k,1:3, & + & 1)/4._wp + particle_dveldt(k,1:3,2)/4._wp + particle_dveldt(k,1:3,3)) + end if + end do + $:END_GPU_PARALLEL_LOOP() + + call s_transfer_data_to_tmp_particles() + if (moving_lag_particles) call s_enforce_EL_particles_boundary_conditions(q_prim_vf, stage, bc_type) + if (lag_params%write_void_evol) call s_write_void_evol_particles(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_particle_stats() + if (lag_params%write_bubbles .and. mytime >= next_write_time) then + $:GPU_UPDATE(host='[particle_mass, particle_rad]') + call s_write_lag_particle_evol(mytime) + next_write_time = next_write_time + t_save + end if + end if + end if + + end subroutine s_update_lagrange_particles_tdv_rk + + !> Enforce boundary conditions on Lagrangian particles. Phases: (1) GPU->host transfer, (2) MPI particle exchange with + !! neighbors, (3) host->GPU transfer, (4) per-particle BC (periodic wrap / reflect / remove), (5) compaction to remove deleted + !! particles, (6) re-smear onto Eulerian grid. + impure subroutine s_enforce_EL_particles_boundary_conditions(q_prim_vf, nstage, bc_type) + + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type + integer, intent(in) :: nstage + real(wp) :: offset + integer :: k, i, q + integer :: patch_id, newBubs, new_idx + integer, dimension(3) :: cell + logical :: inc_ghost + real(wp) :: myR, func_sum + real(wp), dimension(3) :: myPos, myVel, myForce + logical :: only_beta + + inc_ghost = .false. + only_beta = .true. + + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, & + & particle_rad, particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, keep_bubble, n_el_particles_loc, wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + ! Handle MPI transfer of particles going to another processor's local domain + if (num_procs > 1) then + call nvtxStartRange("LAG-BC-TRANSFER-LIST") + call s_add_particles_to_transfer_list(n_el_particles_loc, particle_pos(:,:,2), particle_posPrev(:,:,2), inc_ghost) + call nvtxEndRange + + call nvtxStartRange("LAG-BC-SENDRECV") + call s_mpi_sendrecv_solid_particles(p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, & + & lag_part_id, particle_rad, particle_pos, particle_posPrev, particle_vel, & + & particle_s, particle_draddt, particle_dposdt, particle_dveldt, lag_num_ts, & + & n_el_particles_loc, 2) + call nvtxEndRange + end if + + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[p_owner_rank, particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, & + & particle_rad, particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(private='[k, cell]',copyin='[nstage]') + do k = 1, n_el_particles_loc + keep_bubble(k) = 1 + wrap_bubble_loc(k,:) = 0 + wrap_bubble_dir(k,:) = 0 + + ! Relocate particles at solid boundaries and delete particles that leave buffer regions + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. particle_pos(k, 1, & + & 2) < x_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 1, 2) = x_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 1, 1) = particle_pos(k, 1, 2) + end if + else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. particle_pos(k, & + & 1, 2) > x_cb(m) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 1, 2) = x_cb(m) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 1, 1) = particle_pos(k, 1, 2) + end if + else if (bc_x%beg == BC_PERIODIC .and. particle_pos(k, 1, 2) < pcomm_coords(1)%beg .and. particle_posPrev(k, 1, & + & 2) >= pcomm_coords(1)%beg) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = -1 + else if (bc_x%end == BC_PERIODIC .and. particle_pos(k, 1, 2) > pcomm_coords(1)%end .and. particle_posPrev(k, 1, & + & 2) <= pcomm_coords(1)%end) then + wrap_bubble_dir(k, 1) = 1 + wrap_bubble_loc(k, 1) = 1 + else if (particle_pos(k, 1, 2) >= x_cb(m)) then + keep_bubble(k) = 0 + else if (particle_pos(k, 1, 2) < x_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. particle_pos(k, 2, & + & 2) < y_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 2, 2) = y_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 2, 1) = particle_pos(k, 2, 2) + end if + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. particle_pos(k, & + & 2, 2) > y_cb(n) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 2, 2) = y_cb(n) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 2, 1) = particle_pos(k, 2, 2) + end if + else if (bc_y%beg == BC_PERIODIC .and. particle_pos(k, 2, 2) < pcomm_coords(2)%beg .and. particle_posPrev(k, 2, & + & 2) >= pcomm_coords(2)%beg) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = -1 + else if (bc_y%end == BC_PERIODIC .and. particle_pos(k, 2, 2) > pcomm_coords(2)%end .and. particle_posPrev(k, 2, & + & 2) <= pcomm_coords(2)%end) then + wrap_bubble_dir(k, 2) = 1 + wrap_bubble_loc(k, 2) = 1 + else if (particle_pos(k, 2, 2) >= y_cb(n)) then + keep_bubble(k) = 0 + else if (particle_pos(k, 2, 2) < y_cb(-1)) then + keep_bubble(k) = 0 + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. particle_pos(k, 3, & + & 2) < z_cb(-1) + eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 3, 2) = z_cb(-1) + eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 3, 1) = particle_pos(k, 3, 2) + end if + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, & + & BC_NO_SLIP_WALL/)) .and. particle_pos(k, 3, 2) > z_cb(p) - eps_overlap*particle_rad(k, 2)) then + particle_pos(k, 3, 2) = z_cb(p) - eps_overlap*particle_rad(k, 2) + if (nstage == lag_num_ts) then + particle_pos(k, 3, 1) = particle_pos(k, 3, 2) + end if + else if (bc_z%beg == BC_PERIODIC .and. particle_pos(k, 3, 2) < pcomm_coords(3)%beg .and. particle_posPrev(k, 3, & + & 2) >= pcomm_coords(3)%beg) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = -1 + else if (bc_z%end == BC_PERIODIC .and. particle_pos(k, 3, 2) > pcomm_coords(3)%end .and. particle_posPrev(k, 3, & + & 2) <= pcomm_coords(3)%end) then + wrap_bubble_dir(k, 3) = 1 + wrap_bubble_loc(k, 3) = 1 + else if (particle_pos(k, 3, 2) >= z_cb(p)) then + keep_bubble(k) = 0 + else if (particle_pos(k, 3, 2) < z_cb(-1)) then + keep_bubble(k) = 0 + end if + end if + + if (keep_bubble(k) == 1) then + ! Remove bubbles that are no longer in a liquid + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k,1:3,2), cell, particle_s(k,1:3,2)) + + if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + keep_bubble(k) = 0 + end if + end if + end do + $:END_GPU_PARALLEL_LOOP() + + if (n_el_particles_loc > 0) then + call nvtxStartRange("LAG-BC") + call nvtxStartRange("LAG-BC-DEV2HOST") + $:GPU_UPDATE(host='[particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, keep_bubble, n_el_particles_loc, wrap_bubble_dir, wrap_bubble_loc]') + call nvtxEndRange + + newBubs = 0 + do k = 1, n_el_particles_loc + if (keep_bubble(k) == 1) then + newBubs = newBubs + 1 + if (newBubs /= k) then + call s_copy_lag_particle(newBubs, k) + wrap_bubble_dir(newBubs,:) = wrap_bubble_dir(k,:) + wrap_bubble_loc(newBubs,:) = wrap_bubble_loc(k,:) + end if + end if + end do + + n_el_particles_loc = newBubs + + ! Handle periodic wrapping of bubbles on same processor + newBubs = 0 + do k = 1, n_el_particles_loc + if (any(wrap_bubble_dir(k,:) == 1)) then + newBubs = newBubs + 1 + new_idx = n_el_particles_loc + newBubs + call s_copy_lag_particle(new_idx, k) + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k, i) == 1) then + do q = 1, 2 + particle_pos(new_idx, i, q) = particle_pos(new_idx, i, q) - offset + particle_posPrev(new_idx, i, q) = particle_posPrev(new_idx, i, q) - offset + end do + else if (wrap_bubble_loc(k, i) == -1) then + do q = 1, 2 + particle_pos(new_idx, i, q) = particle_pos(new_idx, i, q) + offset + particle_posPrev(new_idx, i, q) = particle_posPrev(new_idx, i, q) + offset + end do + end if + end if + end do + end if + end do + call nvtxStartRange("LAG-BC-HOST2DEV") + $:GPU_UPDATE(device='[particle_R0, Rmax_stats_part, Rmin_stats_part, particle_mass, f_p, lag_part_id, particle_rad, & + & particle_pos, particle_posPrev, particle_vel, particle_s, particle_draddt, particle_dposdt, & + & particle_dveldt, n_el_particles_loc]') + call nvtxEndRange + end if + + call s_reset_cell_vars() + + $:GPU_PARALLEL_LOOP(private='[cell, myR, myPos, myVel, myForce, func_sum]',copyin='[only_beta]') + do k = 1, n_el_particles_loc + myR = particle_rad(k, 2) + myPos = particle_pos(k,1:3,2) + myVel = particle_vel(k,1:3,2) + myForce = f_p(k,:) + + cell = fd_number - buff_size + call s_locate_cell(particle_pos(k,1:3,2), cell, particle_s(k,1:3,2)) + + ! Compute the total gaussian contribution for each particle for normalization + call s_compute_gaussian_contribution(myR, myPos, cell, func_sum) + gSum(k) = func_sum + + call s_gaussian_atomic(myR, myVel, myPos, myForce, func_sum, cell, q_particles, only_beta) + end do + $:END_GPU_PARALLEL_LOOP() + + ! Update void fraction and communicate buffers + call s_finalize_beta_field(bc_type, only_beta) + + call nvtxEndRange ! LAG-BC + + end subroutine s_enforce_EL_particles_boundary_conditions + + !> This subroutine returns the computational coordinate of the cell for the given position. + !! @param pos Input coordinates + !! @param cell Computational coordinate of the cell + !! @param scoord Calculated particle coordinates + subroutine s_locate_cell(pos, cell, scoord) + + $:GPU_ROUTINE(function_name='s_locate_cell',parallelism='[seq]', cray_inline=True) + + real(wp), dimension(3), intent(in) :: pos + real(wp), dimension(3), intent(out) :: scoord + integer, dimension(3), intent(inout) :: cell + integer :: i + + do while (pos(1) < x_cb(cell(1) - 1) .and. cell(1) > -buff_size) + cell(1) = cell(1) - 1 + end do + + do while (pos(1) >= x_cb(cell(1)) .and. cell(1) < m + buff_size) + cell(1) = cell(1) + 1 + end do + + do while (pos(2) < y_cb(cell(2) - 1) .and. cell(2) > -buff_size) + cell(2) = cell(2) - 1 + end do + + do while (pos(2) >= y_cb(cell(2)) .and. cell(2) < n + buff_size) + cell(2) = cell(2) + 1 + end do + + if (p > 0) then + do while (pos(3) < z_cb(cell(3) - 1) .and. cell(3) > -buff_size) + cell(3) = cell(3) - 1 + end do + do while (pos(3) >= z_cb(cell(3)) .and. cell(3) < p + buff_size) + cell(3) = cell(3) + 1 + end do + end if + + ! The numbering of the cell of which left boundary is the domain boundary is 0. if comp.coord of the pos is s, the real + ! coordinate of s is (the coordinate of the left boundary of the Floor(s)-th cell) + (s-(int(s))*(cell-width). In other + ! words, the coordinate of the center of the cell is x_cc(cell). + + ! coordinates in computational space + scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1)) + scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2)) + scoord(3) = 0._wp + if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3)) + cell(:) = int(scoord(:)) + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + end subroutine s_locate_cell + + !> This subroutine transfer data into the temporal variables. + impure subroutine s_transfer_data_to_tmp_particles() + + integer :: k + + $:GPU_PARALLEL_LOOP(private='[k]') + do k = 1, n_el_particles_loc + particle_rad(k, 2) = particle_rad(k, 1) + particle_pos(k,1:3,2) = particle_pos(k,1:3,1) + particle_posPrev(k,1:3,2) = particle_posPrev(k,1:3,1) + particle_vel(k,1:3,2) = particle_vel(k,1:3,1) + particle_s(k,1:3,2) = particle_s(k,1:3,1) + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_transfer_data_to_tmp_particles + + !> The purpose of this procedure is to determine if the global coordinates of the bubbles are present in the current MPI + !! processor (including ghost cells). + !! @param pos_part Spatial coordinates of the bubble + function particle_in_domain(pos_part) + + logical :: particle_in_domain + real(wp), dimension(3), intent(in) :: pos_part + + ! 2D + + if (p == 0 .and. cyl_coord .neqv. .true.) then + ! Defining a virtual z-axis that has the same dimensions as y-axis defined in the input file + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. (pos_part(2) & + & >= y_cb(fd_number - buff_size - 1)) .and. (pos_part(3) < lag_params%charwidth/2._wp) & + & .and. (pos_part(3) > -lag_params%charwidth/2._wp)) + else + ! cyl_coord + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) & + & .and. (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp))) + end if + + ! 3D + if (p > 0) then + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. (pos_part(1) >= x_cb(fd_number & + & - buff_size - 1)) .and. (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. (pos_part(2) & + & >= y_cb(fd_number - buff_size - 1)) .and. (pos_part(3) < z_cb(p + buff_size - fd_number)) & + & .and. (pos_part(3) >= z_cb(fd_number - buff_size - 1))) + end if + + ! For symmetric and wall boundary condition + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) + end if + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) + end if + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) + end if + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) + end if + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) + end if + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) + end if + end if + + end function particle_in_domain + + !> Determine if a particle is located within the physical domain (excluding ghost cells) + !! @param pos_part Spatial coordinates of the particle + function particle_in_domain_physical(pos_part) + + $:GPU_ROUTINE(parallelism='[seq]') + + logical :: particle_in_domain_physical + real(wp), dimension(3), intent(in) :: pos_part + + particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. (pos_part(2) < y_cb(n)) & + & .and. (pos_part(2) >= y_cb(-1))) + + if (p > 0) then + particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) & + & >= z_cb(-1))) + end if + + end function particle_in_domain_physical + + !> The purpose of this procedure is to calculate the gradient from reconstructed states along the x, y and z + !! @param vL_field left edge reconstructed values + !! @param vR_field right edge reconstructed values + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction + !! @param field_var variable index for reconstructed states + subroutine s_gradient_field(vL_field, vR_field, dq, dir, field_var) + + real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:), intent(out) :: dq + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(in) :: vL_field + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(in) :: vR_field + integer, intent(in) :: dir, field_var + integer :: i, j, k + real(wp) :: mydx + + if (dir == 1) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + mydx = dx(i) + dq(i, j, k) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (dir == 2) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + do i = idwbuff(2)%beg, idwbuff(2)%end + mydx = dy(i) + dq(j, i, k) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (dir == 3) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, mydx]', collapse=3,copyin='[dir, field_var]') + do k = idwbuff(1)%beg, idwbuff(1)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(3)%beg, idwbuff(3)%end + mydx = dz(i) + dq(k, j, i) = (vR_field(i, j, k, field_var) - vL_field(i, j, k, field_var))/mydx + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_gradient_field + + !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions using Fornberg's + !! method + !! @param q Input scalar field + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction + subroutine s_gradient_dir_fornberg(q, dq, dir) + + real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:), intent(in) :: q + real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:), intent(out) :: dq + integer, intent(in) :: dir + integer :: i, j, k, a, npts, s_idx + + npts = (nWeights_grad - 1)/2 + + if (dir == 1) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, s_idx, a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg + 2, idwbuff(1)%end - 2 + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_x_grad(s_idx)%sf(i, 1, 1)*q(i + a, j, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (dir == 2) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, s_idx, a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg + 2, idwbuff(2)%end - 2 + do i = idwbuff(1)%beg, idwbuff(1)%end + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_y_grad(s_idx)%sf(j, 1, 1)*q(i, j + a, k) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (dir == 3) then + $:GPU_PARALLEL_LOOP(private='[i, j, k, s_idx, a]', collapse=3,copyin='[npts]') + do k = idwbuff(3)%beg + 2, idwbuff(3)%end - 2 + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + dq(i, j, k) = 0._wp + do a = -npts, npts + s_idx = a + npts + 1 + dq(i, j, k) = dq(i, j, k) + weights_z_grad(s_idx)%sf(k, 1, 1)*q(i, j, k + a) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_gradient_dir_fornberg + + !> The purpose of this procedure is to compute the Fornberg finite difference weights for derivatives (only done once at start + !! time) + impure subroutine s_compute_fornberg_fd_weights(npts) + + integer, intent(in) :: npts + integer :: i, j, k, a, m_order + integer :: s_idx + real(wp) :: x0, y0, z0 + real(wp) :: x_stencil(nWeights_grad) + real(wp) :: c(nWeights_grad,0:1) + + m_order = 1 ! first derivative + + $:GPU_PARALLEL_LOOP(private='[i, a, x_stencil, c, s_idx, x0]', copyin='[npts, m_order]') + do i = idwbuff(1)%beg + npts, idwbuff(1)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = x_cc(i + a) + end do + x0 = x_cc(i) + + call s_fornberg_weights(x0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_x_grad(s_idx)%sf(i, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j, a, x_stencil, c, s_idx, y0]', copyin='[npts, m_order]') + do j = idwbuff(2)%beg + npts, idwbuff(2)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = y_cc(j + a) + end do + y0 = y_cc(j) + + call s_fornberg_weights(y0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_y_grad(s_idx)%sf(j, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (num_dims == 3) then + $:GPU_PARALLEL_LOOP(private='[k, a, x_stencil, c, s_idx, z0]', copyin='[npts, m_order]') + do k = idwbuff(3)%beg + npts, idwbuff(3)%end - npts + do a = -npts, npts + s_idx = a + npts + 1 + x_stencil(s_idx) = z_cc(k + a) + end do + z0 = z_cc(k) + + call s_fornberg_weights(z0, x_stencil, nWeights_grad, m_order, c) + + do a = -npts, npts + s_idx = a + npts + 1 + weights_z_grad(s_idx)%sf(k, 1, 1) = c(s_idx, 1) + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + end subroutine s_compute_fornberg_fd_weights + + !> The purpose of this procedure is to compute the Fornberg finite difference weights on a local stencil + subroutine s_fornberg_weights(x0, stencil, npts, m_order, coeffs) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: npts ! number of stencil points + integer, intent(in) :: m_order ! highest derivative order + real(wp), intent(in) :: x0 ! evaluation point + real(wp), intent(in) :: stencil(npts) ! stencil coordinates + real(wp), intent(out) :: coeffs(npts,0:m_order) + integer :: i, j, k, mn + real(wp) :: c1, c2, c3, c4, c5 + + coeffs = 0.0_wp + c1 = 1.0_wp + c4 = stencil(1) - x0 + coeffs(1, 0) = 1.0_wp + + do i = 2, npts + mn = min(i - 1, m_order) + c2 = 1.0_wp + c5 = c4 + c4 = stencil(i) - x0 + + do j = 1, i - 1 + c3 = stencil(i) - stencil(j) + c2 = c2*c3 + + if (j == i - 1) then + do k = mn, 1, -1 + coeffs(i, k) = c1*(k*coeffs(i - 1, k - 1) - c5*coeffs(i - 1, k))/c2 + end do + coeffs(i, 0) = -c1*c5*coeffs(i - 1, 0)/c2 + end if + + do k = mn, 1, -1 + coeffs(j, k) = (c4*coeffs(j, k) - k*coeffs(j, k - 1))/c3 + end do + coeffs(j, 0) = c4*coeffs(j, 0)/c3 + end do + + c1 = c2 + end do + + end subroutine s_fornberg_weights + + !> The purpose of this procedure is to compute the barycentric weights for interpolation (only done once at start time) + impure subroutine s_compute_barycentric_weights(npts) + + integer, intent(in) :: npts + integer :: i, j, k, l, a, b + real(wp) :: prod_x, prod_y, prod_z, dx_loc, dy_loc, dz_loc + + $:GPU_PARALLEL_LOOP(private='[i, a, b, prod_x, dx_loc]', copyin = '[npts]') + do i = idwbuff(1)%beg + npts, idwbuff(1)%end - npts + do a = -npts, npts + prod_x = 1._wp + do b = -npts, npts + if (a /= b) then + dx_loc = x_cc(i + a) - x_cc(i + b) + prod_x = prod_x*dx_loc + end if + end do + weights_x_interp(a + npts + 1)%sf(i, 1, 1) = 1._wp/prod_x + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(private='[j, a, b, prod_y, dy_loc]', copyin = '[npts]') + do j = idwbuff(2)%beg + npts, idwbuff(2)%end - npts + do a = -npts, npts + prod_y = 1._wp + do b = -npts, npts + if (a /= b) then + dy_loc = y_cc(j + a) - y_cc(j + b) + prod_y = prod_y*dy_loc + end if + end do + weights_y_interp(a + npts + 1)%sf(j, 1, 1) = 1._wp/prod_y + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (num_dims == 3) then + $:GPU_PARALLEL_LOOP(private='[k, a, b, prod_z, dz_loc]', copyin = '[npts]') + do k = idwbuff(3)%beg + npts, idwbuff(3)%end - npts + do a = -npts, npts + prod_z = 1._wp + do b = -npts, npts + if (a /= b) then + dz_loc = z_cc(k + a) - z_cc(k + b) + prod_z = prod_z*dz_loc + end if + end do + weights_z_interp(a + npts + 1)%sf(k, 1, 1) = 1._wp/prod_z + end do + end do + end if + + end subroutine s_compute_barycentric_weights + + impure subroutine s_open_lag_bubble_evol + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + character(LEN=25) :: FMT + + write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' + file_loc = trim(case_dir) // '/D/' // trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A16,A14,8A16)" + else + FMT = "(A24,A14,8A24)" + end if + + if (.not. file_exist) then + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='rewind') + write (LAG_EVOL_ID, FMT) 'currentTime', 'particleID', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz', 'Fp_x', 'Fp_y', 'Fp_z', 'radius' + else + open (LAG_EVOL_ID, FILE=trim(file_loc), form='formatted', position='append') + end if + + end subroutine s_open_lag_bubble_evol + + !> Write particle evolution data at each output time step + !! @param qtime Current time + impure subroutine s_write_lag_particle_evol(qtime) + + real(wp), intent(in) :: qtime + integer :: k, ios + character(LEN=25) :: FMT + character(LEN=path_len + 2*name_len) :: file_loc, path + logical :: file_exist + + if (precision == 1) then + ! FMT = "(F16.8,I14,8F16.8)" + FMT = "(F16.8,I14,10F16.8)" + else + ! FMT = "(F24.16,I14,8F24.16)" + FMT = "(F24.16,I14,10F24.16)" + end if + + ! Cycle through list + do k = 1, n_el_particles_loc + write (LAG_EVOL_ID, FMT) qtime, lag_part_id(k, 1), particle_pos(k, 1, 1), particle_pos(k, 2, 1), particle_pos(k, 3, & + & 1), particle_vel(k, 1, 1), particle_vel(k, 2, 1), particle_vel(k, 3, 1), f_p(k, 1), f_p(k, 2), f_p(k, 3), & + & particle_rad(k, 1) + end do + + end subroutine s_write_lag_particle_evol + + impure subroutine s_close_lag_particle_evol + + close (LAG_EVOL_ID) + + end subroutine s_close_lag_particle_evol + + subroutine s_open_void_evol + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + if (proc_rank == 0) then + write (file_loc, '(A)') 'voidfraction.dat' + file_loc = trim(case_dir) // '/D/' // trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + if (.not. file_exist) then + open (LAG_VOID_ID, FILE=trim(file_loc), form='formatted', position='rewind') + ! write (12, *) 'currentTime, averageVoidFraction, ', & 'maximumVoidFraction, totalParticlesVolume' write (12, *) + ! 'The averageVoidFraction value does ', & 'not reflect the real void fraction in the cloud since the ', & 'cells + ! which do not have bubbles are not accounted' + else + open (LAG_VOID_ID, FILE=trim(file_loc), form='formatted', position='append') + end if + end if + + end subroutine s_open_void_evol + + !> Subroutine that writes some useful statistics related to the volume fraction of the particles (void fraction) in the + !! computatioational domain on each time step. + !! @param q_time Current time + impure subroutine s_write_void_evol_particles(qtime) + + real(wp), intent(in) :: qtime + real(wp) :: volcell, voltot + real(wp) :: lag_void_max, lag_void_avg, lag_vol + real(wp) :: void_max_glb, void_avg_glb, vol_glb + integer :: i, j, k + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + lag_void_max = 0._wp + lag_void_avg = 0._wp + lag_vol = 0._wp + $:GPU_PARALLEL_LOOP(private='[volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', & + & reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_particles(alphaf_id)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_particles(alphaf_id)%sf(i, j, k)) > 5.0e-11_wp) then + lag_void_avg = lag_void_avg + (1._wp - q_particles(alphaf_id)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + +#ifdef MFC_MPI + if (num_procs > 1) then + call s_mpi_allreduce_max(lag_void_max, void_max_glb) + lag_void_max = void_max_glb + call s_mpi_allreduce_sum(lag_vol, vol_glb) + lag_vol = vol_glb + call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb) + lag_void_avg = void_avg_glb + end if +#endif + voltot = lag_void_avg + ! This voidavg value does not reflect the real void fraction in the cloud since the cell which does not have bubbles are not + ! accounted + if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol + + if (proc_rank == 0) then + write (LAG_VOID_ID, '(6X,4e24.8)') qtime, lag_void_avg, lag_void_max, voltot + end if + + end subroutine s_write_void_evol_particles + + subroutine s_close_void_evol + + if (proc_rank == 0) close (LAG_VOID_ID) + + end subroutine s_close_void_evol + + !> Subroutine that writes the restarting files for the particles in the lagrangian solver. + !! @param t_step Current time step + impure subroutine s_write_restart_lag_particles(t_step) + + ! Generic string used to store the address of a particular file + integer, intent(in) :: t_step + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + integer :: part_id, tot_part + integer :: i, k + +#ifdef MFC_MPI + ! For Parallel I/O + integer :: ifile, ierr + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp + integer :: view + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_particle_counts + real(wp), dimension(1:1,1:lag_io_vars) :: dummy + dummy = 0._wp + + part_id = 0 + if (n_el_particles_loc /= 0) then + do k = 1, n_el_particles_loc + if (particle_in_domain_physical(particle_pos(k,1:3,1))) then + part_id = part_id + 1 + end if + end do + end if + + if (.not. parallel_io) return + + lsizes(1) = part_id + lsizes(2) = lag_io_vars + + ! Total number of particles + call MPI_ALLREDUCE(part_id, tot_part, 1, MPI_integer, MPI_SUM, MPI_COMM_WORLD, ierr) + + call MPI_ALLGATHER(part_id, 1, MPI_INTEGER, proc_particle_counts, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + + ! Calculate starting index for this processor's particles + call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (proc_rank == 0) start_idx_part(1) = 0 + start_idx_part(2) = 0 + + gsizes(1) = tot_part + gsizes(2) = lag_io_vars + + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // trim(file_loc) + + ! Clean up existing file + if (proc_rank == 0) then + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + end if + + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) + + ! Write header using MPI I/O for consistency + call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, proc_particle_counts, num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + + if (part_id > 0) then + allocate (MPI_IO_DATA_lag_bubbles(max(1, part_id),1:lag_io_vars)) + + i = 1 + do k = 1, n_el_particles_loc + if (particle_in_domain_physical(particle_pos(k,1:3,1))) then + MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_part_id(k, 1)) + MPI_IO_DATA_lag_bubbles(i,2:4) = particle_pos(k,1:3,1) + MPI_IO_DATA_lag_bubbles(i,5:7) = particle_posPrev(k,1:3,1) + MPI_IO_DATA_lag_bubbles(i,8:10) = particle_vel(k,1:3,1) + MPI_IO_DATA_lag_bubbles(i, 11) = particle_rad(k, 1) + MPI_IO_DATA_lag_bubbles(i, 13) = particle_R0(k) + MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats_part(k) + MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats_part(k) + MPI_IO_DATA_lag_bubbles(i, 19) = particle_mass(k) + i = i + 1 + end if + end do + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + num_procs*sizeof(proc_particle_counts(1)), & + & MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, lag_io_vars*part_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + + deallocate (MPI_IO_DATA_lag_bubbles) + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + num_procs*sizeof(proc_particle_counts(1)), & + & MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if +#endif + + end subroutine s_write_restart_lag_particles + + !> Calculate global maximum and minimum R/R0 ratio across all particles. + subroutine s_calculate_lag_particle_stats() + + integer :: k + + $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', reductionOp='[MAX, MIN]', & + & copy='[Rmax_glb, Rmin_glb]') + do k = 1, n_el_particles_loc + Rmax_glb = max(Rmax_glb, particle_rad(k, 1)/particle_R0(k)) + Rmin_glb = min(Rmin_glb, particle_rad(k, 1)/particle_R0(k)) + Rmax_stats_part(k) = max(Rmax_stats_part(k), particle_rad(k, 1)/particle_R0(k)) + Rmin_stats_part(k) = min(Rmin_stats_part(k), particle_rad(k, 1)/particle_R0(k)) + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_calculate_lag_particle_stats + + impure subroutine s_open_lag_particle_stats() + + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT + logical :: file_exist + + write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' + file_loc = trim(case_dir) // '/D/' // trim(file_loc) + call my_inquire(trim(file_loc), file_exist) + + if (precision == 1) then + FMT = "(A10,A14,5A16)" + else + FMT = "(A10,A14,5A24)" + end if + + if (.not. file_exist) then + open (LAG_STATS_ID, FILE=trim(file_loc), form='formatted', position='rewind') + write (LAG_STATS_ID, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' + else + open (LAG_STATS_ID, FILE=trim(file_loc), form='formatted', position='append') + end if + + end subroutine s_open_lag_particle_stats + + !> Write particle radius statistics to file. + impure subroutine s_write_lag_particle_stats() + + integer :: k + character(LEN=path_len + 2*name_len) :: file_loc + character(LEN=20) :: FMT + + $:GPU_UPDATE(host='[Rmax_glb, Rmin_glb]') + + if (precision == 1) then + FMT = "(I10,I14,5F16.8)" + else + FMT = "(I10,I14,5F24.16)" + end if + + do k = 1, n_el_particles_loc + write (LAG_STATS_ID, FMT) proc_rank, lag_part_id(k, 1), particle_pos(k, 1, 1), particle_pos(k, 2, 1), particle_pos(k, & + & 3, 1), Rmax_stats_part(k), Rmin_stats_part(k) + end do + + end subroutine s_write_lag_particle_stats + + subroutine s_close_lag_particle_stats + + close (LAG_STATS_ID) + + end subroutine s_close_lag_particle_stats + + !> The purpose of this subroutine is to remove one specific particle if dt is too small. + !! @param part_id Particle id + impure subroutine s_copy_lag_particle(dest, src) + + integer, intent(in) :: src, dest + + particle_R0(dest) = particle_R0(src) + Rmax_stats_part(dest) = Rmax_stats_part(src) + Rmin_stats_part(dest) = Rmin_stats_part(src) + particle_mass(dest) = particle_mass(src) + lag_part_id(dest, 1) = lag_part_id(src, 1) + particle_rad(dest,1:2) = particle_rad(src,1:2) + particle_vel(dest,1:3,1:2) = particle_vel(src,1:3,1:2) + particle_s(dest,1:3,1:2) = particle_s(src,1:3,1:2) + particle_pos(dest,1:3,1:2) = particle_pos(src,1:3,1:2) + particle_posPrev(dest,1:3,1:2) = particle_posPrev(src,1:3,1:2) + particle_draddt(dest,1:lag_num_ts) = particle_draddt(src,1:lag_num_ts) + f_p(dest,1:3) = f_p(src,1:3) + particle_dposdt(dest,1:3,1:lag_num_ts) = particle_dposdt(src,1:3,1:lag_num_ts) + particle_dveldt(dest,1:3,1:lag_num_ts) = particle_dveldt(src,1:3,1:lag_num_ts) + + end subroutine s_copy_lag_particle + + !> The purpose of this subroutine is to deallocate variables + impure subroutine s_finalize_particle_lagrangian_solver() + + integer :: i + + if (lag_params%write_void_evol) call s_close_void_evol + if (lag_params%write_bubbles) call s_close_lag_particle_evol() + if (lag_params%write_bubbles_stats) call s_close_lag_particle_stats() + + do i = 1, q_particles_idx + @:DEALLOCATE(q_particles(i)%sf) + end do + @:DEALLOCATE(q_particles) + + do i = 1, nField_vars + @:DEALLOCATE(field_vars(i)%sf) + end do + @:DEALLOCATE(field_vars) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_x_interp(i)%sf) + end do + @:DEALLOCATE(weights_x_interp) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_y_interp(i)%sf) + end do + @:DEALLOCATE(weights_y_interp) + + do i = 1, nWeights_interp + @:DEALLOCATE(weights_z_interp(i)%sf) + end do + @:DEALLOCATE(weights_z_interp) + + do i = 1, nWeights_grad + @:DEALLOCATE(weights_x_grad(i)%sf) + end do + @:DEALLOCATE(weights_x_grad) + + do i = 1, nWeights_grad + @:DEALLOCATE(weights_y_grad(i)%sf) + end do + @:DEALLOCATE(weights_y_grad) + + do i = 1, nWeights_grad + @:DEALLOCATE(weights_z_grad(i)%sf) + end do + @:DEALLOCATE(weights_z_grad) + + ! Deallocating space + @:DEALLOCATE(lag_part_id) + @:DEALLOCATE(gid_to_local) + @:DEALLOCATE(particle_R0) + @:DEALLOCATE(Rmax_stats_part) + @:DEALLOCATE(Rmin_stats_part) + @:DEALLOCATE(particle_mass) + @:DEALLOCATE(p_AM) + @:DEALLOCATE(p_owner_rank) + @:DEALLOCATE(particle_rad) + @:DEALLOCATE(particle_pos) + @:DEALLOCATE(particle_posPrev) + @:DEALLOCATE(particle_vel) + @:DEALLOCATE(particle_s) + @:DEALLOCATE(particle_draddt) + @:DEALLOCATE(particle_dposdt) + @:DEALLOCATE(particle_dveldt) + @:DEALLOCATE(f_p) + @:DEALLOCATE(gSum) + + @:DEALLOCATE(force_recv_ids) + @:DEALLOCATE(force_recv_vals) + + @:DEALLOCATE(keep_bubble) + @:DEALLOCATE(wrap_bubble_loc, wrap_bubble_dir) + + @:DEALLOCATE(linked_list) + @:DEALLOCATE(particle_head) + + ! Deallocate cell list arrays + @:DEALLOCATE(cell_list_start) + @:DEALLOCATE(cell_list_count) + @:DEALLOCATE(cell_list_idx) + + end subroutine s_finalize_particle_lagrangian_solver + +end module m_particles_EL diff --git a/src/simulation/m_particles_EL_kernels.fpp b/src/simulation/m_particles_EL_kernels.fpp new file mode 100644 index 0000000000..08d2c22c8c --- /dev/null +++ b/src/simulation/m_particles_EL_kernels.fpp @@ -0,0 +1,904 @@ +!> +!! @file m_particles_EL_kernels.fpp +!! @brief Contains module m_particles_EL_kernels + +#:include 'macros.fpp' + +!> @brief This module contains kernel functions used to map the effect of the lagrangian particles in the Eulerian framework. +module m_particles_EL_kernels + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + use ieee_arithmetic !< For checking NaN + + implicit none + + ! Cell list for particle-to-cell mapping (rebuilt each RK stage before smearing) + integer, allocatable, dimension(:,:,:) :: cell_list_start ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:,:,:) :: cell_list_count ! (0:m, 0:n, 0:p) + integer, allocatable, dimension(:) :: cell_list_idx ! (1:nParticles_glb) sorted particle indices + $:GPU_DECLARE(create='[cell_list_start, cell_list_count, cell_list_idx]') + +contains + + ! !> The purpose of this subroutine is to compute each particles total contribution to the gaussian for proper normalization + subroutine s_compute_gaussian_contribution(rad, pos, cell, func_s) + + $:GPU_ROUTINE(function_name='s_compute_gaussian_contribution',parallelism='[seq]', cray_inline=True) + + real(wp), intent(in) :: rad + real(wp), intent(in), dimension(3) :: pos + integer, intent(in), dimension(3) :: cell + real(wp), intent(out) :: func_s + real(wp) :: volpart, stddsv, Vol_loc, func + real(wp), dimension(3) :: nodecoord, center + integer :: ip, jp, kp, di, dj, dk, di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end, mapCells_loc + integer, dimension(3) :: cellijk + + mapCells_loc = 1 + + volpart = (4._wp/3._wp)*pi*rad**3 + + call s_compute_stddsv(cell, volpart, stddsv) + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + di_beg = ip - mapCells_loc + di_end = ip + mapCells_loc + dj_beg = jp - mapCells_loc + dj_end = jp + mapCells_loc + dk_beg = kp + dk_end = kp + + if (num_dims == 3) then + dk_beg = kp - mapCells_loc + dk_end = kp + mapCells_loc + end if + + func_s = 0._wp + do dk = dk_beg, dk_end + do dj = dj_beg, dj_end + do di = di_beg, di_end + nodecoord(1) = x_cc(di) + nodecoord(2) = y_cc(dj) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(dk) + + cellijk(1) = di + cellijk(2) = dj + cellijk(3) = dk + + center(1:2) = pos(1:2) + center(3) = 0._wp + if (p > 0) center(3) = pos(3) + + Vol_loc = dx(cellijk(1))*dy(cellijk(2)) + if (num_dims == 3) Vol_loc = dx(cellijk(1))*dy(cellijk(2))*dz(cellijk(3)) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + func_s = func_s + func*Vol_loc + end do + end do + end do + + end subroutine s_compute_gaussian_contribution + + !> The purpose of this subroutine is to compute the gaussian smearing of particle volume fraction and source terms with atomic + !! cell updates + subroutine s_gaussian_atomic(rad, vel, pos, force_p, gauSum, cell, updatedvar, onlyBeta) + + $:GPU_ROUTINE(function_name='s_gaussian_atomic',parallelism='[seq]', cray_inline=True) + + real(wp), intent(in) :: rad, gauSum + real(wp), intent(in), dimension(3) :: pos, vel, force_p + integer, intent(in), dimension(3) :: cell + type(scalar_field), dimension(:), intent(inout) :: updatedvar + real(wp) :: volpart, stddsv, Vol_loc, func, weight + real(wp) :: fp_x, fp_y, fp_z, vp_x, vp_y, vp_z + real(wp) :: addFun_alphap, addFun_alphap_vp_x, addFun_alphap_vp_y, addFun_alphap_vp_z + real(wp) :: addFun2_x, addFun2_y, addFun2_z, addFun_E + real(wp), dimension(3) :: nodecoord, center + integer :: ip, jp, kp, di, dj, dk, di_beg, di_end, dj_beg, dj_end, dk_beg, dk_end, mapCells_loc + integer, dimension(3) :: cellijk + logical, intent(in) :: onlyBeta + + mapCells_loc = 1 + + volpart = (4._wp/3._wp)*pi*rad**3 + + call s_compute_stddsv(cell, volpart, stddsv) + + ip = cell(1) + jp = cell(2) + kp = cell(3) + + di_beg = ip - mapCells_loc + di_end = ip + mapCells_loc + dj_beg = jp - mapCells_loc + dj_end = jp + mapCells_loc + dk_beg = kp + dk_end = kp + + if (num_dims == 3) then + dk_beg = kp - mapCells_loc + dk_end = kp + mapCells_loc + end if + + fp_x = -force_p(1) + fp_y = -force_p(2) + fp_z = -force_p(3) + + vp_x = vel(1) + vp_y = vel(2) + vp_z = vel(3) + + center(1:2) = pos(1:2) + center(3) = 0._wp + if (p > 0) center(3) = pos(3) + + do dk = dk_beg, dk_end + do dj = dj_beg, dj_end + do di = di_beg, di_end + nodecoord(1) = x_cc(di) + nodecoord(2) = y_cc(dj) + nodecoord(3) = 0._wp + if (p > 0) nodecoord(3) = z_cc(dk) + + cellijk(1) = di + cellijk(2) = dj + cellijk(3) = dk + + Vol_loc = dx(cellijk(1))*dy(cellijk(2)) + if (num_dims == 3) Vol_loc = Vol_loc*dz(cellijk(3)) + + call s_applygaussian(center, cellijk, nodecoord, stddsv, 0._wp, func) + + if (gauSum <= 0._wp) return + weight = func/gauSum + + addFun_alphap = weight*volpart + $:GPU_ATOMIC(atomic='update') + updatedvar(1)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(1)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun_alphap, kind=stp) + + if (lag_params%solver_approach == 2 .and. .not. onlyBeta) then + ! Update particle momentum field(x) + addFun_alphap_vp_x = weight*volpart*vp_x + $:GPU_ATOMIC(atomic='update') + updatedvar(2)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(2)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun_alphap_vp_x, kind=stp) + + ! Update particle momentum field(y) + addFun_alphap_vp_y = weight*volpart*vp_y + $:GPU_ATOMIC(atomic='update') + updatedvar(3)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(3)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun_alphap_vp_y, kind=stp) + + if (num_dims == 3) then + ! Update particle momentum field(z) + addFun_alphap_vp_z = weight*volpart*vp_z + $:GPU_ATOMIC(atomic='update') + updatedvar(4)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(4)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun_alphap_vp_z, kind=stp) + end if + + ! Update x-momentum source term + addFun2_x = weight*fp_x + $:GPU_ATOMIC(atomic='update') + updatedvar(5)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(5)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun2_x, kind=stp) + + ! Update y-momentum source term + addFun2_y = weight*fp_y + $:GPU_ATOMIC(atomic='update') + updatedvar(6)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(6)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun2_y, kind=stp) + + if (num_dims == 3) then + ! Update z-momentum source term + addFun2_z = weight*fp_z + $:GPU_ATOMIC(atomic='update') + updatedvar(7)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(7)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun2_z, kind=stp) + end if + + ! Update energy source term + addFun_E = 0._wp + $:GPU_ATOMIC(atomic='update') + updatedvar(8)%sf(cellijk(1), cellijk(2), cellijk(3)) = updatedvar(8)%sf(cellijk(1), cellijk(2), & + & cellijk(3)) + real(addFun_E, kind=stp) + end if + end do + end do + end do + + end subroutine s_gaussian_atomic + + !> The purpose of this subroutine is to apply the gaussian kernel function for each particle (Maeda and Colonius, 2018)). + subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) + + $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', cray_inline=True) + + real(wp), dimension(3), intent(in) :: center + integer, dimension(3), intent(in) :: cellaux + real(wp), dimension(3), intent(in) :: nodecoord + real(wp), intent(in) :: stddsv + real(wp), intent(in) :: strength_idx + real(wp), intent(out) :: func + integer :: i + real(wp) :: distance + real(wp) :: theta, dtheta, L2, dzp, Lz2, zc + real(wp) :: Nr, Nr_count + + distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + (center(3) - nodecoord(3))**2._wp) + + if (num_dims == 3) then + !> 3D gaussian function + func = exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + else + if (cyl_coord) then + !> 2D cylindrical function: + ! We smear particles in the azimuthal direction for given r + theta = 0._wp + Nr = ceiling(2._wp*pi*nodecoord(2)/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1))) + dtheta = 2._wp*pi/Nr + L2 = center(2)**2._wp + nodecoord(2)**2._wp - 2._wp*center(2)*nodecoord(2)*cos(theta) + distance = sqrt((center(1) - nodecoord(1))**2._wp + L2) + ! Factor 2._wp is for symmetry (upper half of the 2D field (+r) is considered) + func = dtheta/2._wp/pi*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + Nr_count = 0._wp + do while (Nr_count < Nr - 1._wp) + Nr_count = Nr_count + 1._wp + theta = Nr_count*dtheta + ! trigonometric relation + L2 = center(2)**2._wp + nodecoord(2)**2._wp - 2._wp*center(2)*nodecoord(2)*cos(theta) + distance = sqrt((center(1) - nodecoord(1))**2._wp + L2) + ! nodecoord(2)*dtheta is the azimuthal width of the cell + func = func + dtheta/2._wp/pi*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv) & + & **(3._wp*(strength_idx + 1._wp)) + end do + else + !> 2D cartesian function: Equation (48) from Maeda and Colonius 2018 + ! We smear particles considering a virtual depth (lag_params%charwidth) with lag_params%charNz cells + dzp = (lag_params%charwidth/(lag_params%charNz + 1._wp)) + + func = 0._wp + do i = 0, lag_params%charNz + zc = (-lag_params%charwidth/2._wp + dzp*(0.5_wp + i)) ! Center of virtual cell i in z-direction + Lz2 = (center(3) - zc)**2._wp + distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) + func = func + dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp + end do + end if + end if + + end subroutine s_applygaussian + + !> Calculates the standard deviation of the particle being smeared in the Eulerian framework. + !! @param cell Cell where the particle is located + !! @param volpart Volume of the particle + !! @param stddsv Standard deviaton + subroutine s_compute_stddsv(cell, volpart, stddsv) + + $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', cray_inline=True) + + integer, dimension(3), intent(in) :: cell + real(wp), intent(in) :: volpart + real(wp), intent(out) :: stddsv + real(wp) :: chardist, charvol + real(wp) :: rad + + !> Compute characteristic distance + chardist = sqrt(dx(cell(1))*dy(cell(2))) + if (p > 0) chardist = (dx(cell(1))*dy(cell(2))*dz(cell(3)))**(1._wp/3._wp) + + !> Compute characteristic volume + if (p > 0) then + charvol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + charvol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + charvol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if + end if + + rad = (3._wp*volpart/(4._wp*pi))**(1._wp/3._wp) + stddsv = lag_params%epsilonb*max(chardist, rad) + + end subroutine s_compute_stddsv + + !> The purpose of this subroutine is to check if the current cell is outside the computational domain or not (including ghost + !! cells). + !! @param cellaux Tested cell to smear the particle effect in. + !! @param celloutside If true, then cellaux is outside the computational domain. + subroutine s_check_celloutside(cellaux, celloutside) + + $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', cray_inline=True) + + integer, dimension(3), intent(inout) :: cellaux + logical, intent(out) :: celloutside + + celloutside = .false. + + if (num_dims == 2) then + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size)) then + celloutside = .true. + end if + else + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size) .or. (cellaux(3) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(1) > m + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(3) > p + buff_size)) then + celloutside = .true. + end if + end if + + end subroutine s_check_celloutside + + !> This subroutine transforms the computational coordinates of the particle from real type into integer. + !! @param s Computational coordinates of the particle, real type + !! @param get_cell Computational coordinates of the particle, integer type + subroutine s_get_cell(s_cell, get_cell) + + $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', cray_inline=True) + + real(wp), dimension(3), intent(in) :: s_cell + integer, dimension(3), intent(out) :: get_cell + integer :: i + + get_cell(:) = int(s_cell(:)) + do i = 1, num_dims + if (s_cell(i) < 0._wp) get_cell(i) = get_cell(i) - 1 + end do + + end subroutine s_get_cell + + !> The purpose of this procedure is to calculate the characteristic cell volume + !! @param cell Computational coordinates (x, y, z) + !! @param Charvol Characteristic volume + subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + + $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: cellx, celly, cellz + real(wp), intent(out) :: Charvol + + if (p > 0) then + Charvol = dx(cellx)*dy(celly)*dz(cellz) + else + if (cyl_coord) then + Charvol = dx(cellx)*dy(celly)*y_cc(celly)*2._wp*pi + else + Charvol = dx(cellx)*dy(celly)*lag_params%charwidth + end if + end if + + end subroutine s_get_char_vol + + !! This function calculates the force on a particle based on the pressure gradient, velocity, and drag model. + !! @param pos Position of the particle + !! @param rad Radius of the particle + !! @param vel_p Velocity of the particle + !! @param mass_p Particle mass + !! @param Re Viscosity! + !! @param rho Density of the fluid + !! @param vol_frac Particle Volume Fraction + !! @param cell Computational coordinates of the particle + !! @param q_prim_vf Eulerian field with primitive variables + !! @return a Acceleration of the particle in direction i + subroutine s_get_particle_force(pos, rad, vel_p, mass_p, Re, gamm, vol_frac, drhodt, cell, q_prim_vf, fieldvars, wx, wy, wz, & + & force, rmass_add) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rad, mass_p, Re, gamm, vol_frac, drhodt + real(wp), dimension(3), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + real(wp), dimension(3), intent(in) :: vel_p + type(scalar_field), dimension(:), intent(in) :: fieldvars + type(scalar_field), dimension(:), intent(in) :: wx, wy, wz + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), dimension(3), intent(out) :: force + real(wp), intent(out) :: rmass_add + real(wp) :: a, vol, rho_fluid, pressure_fluid + real(wp), dimension(3) :: v_rel, dp + real(wp), dimension(fd_order) :: xi, eta, L + real(wp) :: particle_diam, gas_mu, vmag, cson + real(wp) :: slip_velocity_x, slip_velocity_y, slip_velocity_z, beta + real(wp), dimension(3) :: fluid_vel + integer :: dir + + ! Added pass params + real(wp) :: mach, Cam, flux_f, flux_b, div_u, SDrho, vpgradrho + real(wp), dimension(3) :: rhoDuDt, grad_rho, fam + integer, dimension(3) :: p1 + + force = 0._wp + dp = 0._wp + grad_rho = 0._wp + fam = 0._wp + fluid_vel = 0._wp + v_rel = 0._wp + rhoDuDt = 0._wp + SDrho = 0._wp + ! div_u = 0._wp + + !!Interpolation - either even ordered barycentric or 0th order + if (lag_params%interpolation_order > 1) then + rho_fluid = f_interp_barycentric(pos, cell, q_prim_vf, 1, wx, wy, wz) + pressure_fluid = f_interp_barycentric(pos, cell, q_prim_vf, E_idx, wx, wy, wz) + do dir = 1, num_dims + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + dp(dir) = f_interp_barycentric(pos, cell, fieldvars, dir, wx, wy, wz) + end if + if (lag_params%added_mass_model > 0) then + grad_rho(dir) = f_interp_barycentric(pos, cell, fieldvars, 3 + dir, wx, wy, wz) + ! div_u = div_u + f_interp_barycentric(pos, cell, fieldvars, 6 + dir, wx, wy, wz) + end if + fluid_vel(dir) = f_interp_barycentric(pos, cell, q_prim_vf, momxb + dir - 1, wx, wy, wz) + end do + else + rho_fluid = q_prim_vf(1)%sf(cell(1), cell(2), cell(3)) + pressure_fluid = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) + do dir = 1, num_dims + if (lag_params%pressure_force .or. lag_params%added_mass_model > 0) then + dp(dir) = fieldvars(dir)%sf(cell(1), cell(2), cell(3)) + end if + if (lag_params%added_mass_model > 0) then + grad_rho(dir) = fieldvars(3 + dir)%sf(cell(1), cell(2), cell(3)) + ! div_u = div_u + fieldvars(6 + dir)%sf(cell(1), cell(2), cell(3)) + end if + fluid_vel(dir) = q_prim_vf(momxb + dir - 1)%sf(cell(1), cell(2), cell(3)) + end do + end if + + v_rel = vel_p - fluid_vel + + if (lag_params%qs_drag_model > 0 .or. lag_params%added_mass_model > 0) then + ! Quasi-steady Drag Force Parameters + slip_velocity_x = fluid_vel(1) - vel_p(1) + slip_velocity_y = fluid_vel(2) - vel_p(2) + if (num_dims == 3) then + slip_velocity_z = fluid_vel(3) - vel_p(3) + vmag = sqrt(slip_velocity_x*slip_velocity_x + slip_velocity_y*slip_velocity_y + slip_velocity_z*slip_velocity_z) + else if (num_dims == 2) then + vmag = sqrt(slip_velocity_x*slip_velocity_x + slip_velocity_y*slip_velocity_y) + end if + particle_diam = rad*2._wp + if (rho_fluid > 0._wp) then + cson = sqrt((gamm*pressure_fluid)/rho_fluid) + else + cson = 1._wp + end if + gas_mu = Re + end if + + if (lag_params%added_mass_model > 0) then + rhoDuDt = -dp + vpgradrho = dot_product(vel_p, grad_rho) + SDrho = drhodt + fluid_vel(1)*grad_rho(1) + fluid_vel(2)*grad_rho(2) + fluid_vel(3)*grad_rho(3) + mach = vmag/cson + end if + + ! Step 1: Force component quasi-steady + if (lag_params%qs_drag_model == 1) then + beta = QS_Parmar(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 2) then + beta = QS_Osnes(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 3) then + beta = QS_ModifiedParmar(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else if (lag_params%qs_drag_model == 4) then + beta = QS_Gidaspow(rho_fluid, cson, gas_mu, gamm, vmag, particle_diam, vol_frac) + force = force - beta*v_rel + else + ! No Quasi-Steady drag + end if + + ! Step 1.1: Stokes drag + if (lag_params%stokes_drag == 1) then ! Free slip Stokes drag + force = force - 4._wp*pi*gas_mu*rad*v_rel + else if (lag_params%stokes_drag == 2) then ! No slip Stokes drag + force = force - 6._wp*pi*gas_mu*rad*v_rel + else + ! No stokes drag + end if + + ! Step 2: Pressure Gradient Force + if (lag_params%pressure_force) then + vol = (4._wp/3._wp)*pi*(rad**3._wp) + force = force - vol*dp + end if + + ! Step 3: Gravitational Force + if (lag_params%gravity_force) then + force = force + (mass_p)*accel_bf + end if + + ! Step 4: Added Mass Force + if (lag_params%added_mass_model == 1) then + vol = (4._wp/3._wp)*pi*(rad**3._wp) + if (mach > 0.6_wp) then + Cam = 1._wp + 1.8_wp*(0.6_wp**2) + 7.6_wp*(0.6_wp**4) + else + Cam = 1._wp + 1.8_wp*mach**2 + 7.6_wp*mach**4 + end if + + Cam = 0.5_wp*Cam*(1._wp + 0.68_wp*vol_frac**2) + rmass_add = rho_fluid*vol*Cam ! (1._wp-vol_frac)*rho_fluid*vol*Cam + + fam = Cam*vol*(vel_p*SDrho + rhoDuDt + fluid_vel*(vpgradrho)) + + do dir = 1, num_dims + if (.not. ieee_is_finite(fam(dir))) then + fam(dir) = 0._wp + rmass_add = 0._wp + end if + end do + force = force + fam + else + rmass_add = 0._wp + end if + + do dir = 1, num_dims + if (.not. ieee_is_finite(force(dir))) then + force(dir) = 0._wp + end if + end do + + end subroutine s_get_particle_force + + !> Interpolate an Eulerian field to a particle position using barycentric Lagrange interpolation with precomputed weights. Falls + !! back to nearest-cell value if the interpolant is non-finite. + !! @param pos Particle position + !! @param cell Grid cell containing the particle + !! @param field_vf Eulerian field to interpolate + !! @param field_index Component index in field_vf + function f_interp_barycentric(pos, cell, field_vf, field_index, wx, wy, wz) result(val) + + $:GPU_ROUTINE(parallelism='[seq]') + + real(wp), dimension(3), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + type(scalar_field), dimension(:), intent(in) :: field_vf + type(scalar_field), dimension(:), intent(in) :: wx, wy, wz + integer, intent(in) :: field_index + integer :: i, j, k, ix, jy, kz, npts, npts_z, N, a, b + integer :: ix_count, jy_count, kz_count + real(wp) :: weight, numerator, denominator, xBar, eps + real(wp) :: val, local_min, local_max, prod_x, prod_y, prod_z + + i = cell(1) + j = cell(2) + k = cell(3) + + N = lag_params%interpolation_order + npts = N/2 + npts_z = npts + if (num_dims == 2) npts_z = 0 + eps = 1.e-12_wp + numerator = 0._wp + denominator = 0._wp + + ! if (abs(pos(1) - x_cc(i)) <= eps .and. & abs(pos(2) - y_cc(j)) <= eps .and. & abs(pos(3) - z_cc(k)) <= eps) then val = + ! field_vf(field_index)%sf(i, j, k) return end if + + ix_count = 0 + do ix = i - npts, i + npts + ix_count = ix_count + 1 + jy_count = 0 + do jy = j - npts, j + npts + jy_count = jy_count + 1 + kz_count = 0 + do kz = k - npts_z, k + npts_z + kz_count = kz_count + 1 + if (num_dims == 3) then + xBar = (pos(1) - x_cc(ix))*(pos(2) - y_cc(jy))*(pos(3) - z_cc(kz)) + weight = wx(ix_count)%sf(i, 1, 1)*wy(jy_count)%sf(j, 1, 1)*wz(kz_count)%sf(k, 1, 1) + else + xBar = (pos(1) - x_cc(ix))*(pos(2) - y_cc(jy)) + weight = wx(ix_count)%sf(i, 1, 1)*wy(jy_count)%sf(j, 1, 1) + end if + weight = weight/xBar + numerator = numerator + weight*field_vf(field_index)%sf(ix, jy, kz) + denominator = denominator + weight + end do + end do + end do + + val = numerator/denominator + + if (.not. ieee_is_finite(val)) then + val = field_vf(field_index)%sf(i, j, k) + else if (abs(val) <= eps) then + val = 0._wp + end if + + end function f_interp_barycentric + + ! Quasi-steady force (Re_p and Ma_p corrections): Improved Drag Correlation for Spheres and Application to Shock-Tube + ! Experiments - Parmar et al. (2010) - AIAA Journal + ! + ! Quasi-steady force (phi corrections): The Added Mass, Basset, and Viscous Drag Coefficients in Nondilute Bubbly Liquids + ! Undergoing Small-Amplitude Oscillatory Motion - Sangani et al. (1991) - Phys. Fluids A + function QS_Parmar(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rcd1, rmacr, rcd_mcr, rcd_std, rmach_rat, rcd_M1 + real(wp) :: rcd_M2, C1, C2, C3, f1M, f2M, f3M, lrep, factor, cd, phi_corr + real(wp) :: beta + real(wp) :: rmachp, mp, phi, rep, re + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + if (re < 1.e-14_wp) then + rcd1 = 1.0_wp + else + rmacr = 0.6_wp ! Critical rmachp no + rcd_mcr = (1._wp + 0.15_wp*re**(0.684_wp)) + (re/24.0_wp)*(0.513_wp/(1._wp + 483._wp/re**(0.669_wp))) + if (mp <= rmacr) then + rcd_std = (1._wp + 0.15_wp*re**(0.687_wp)) + (re/24.0_wp)*(0.42_wp/(1._wp + 42500._wp/re**(1.16_wp))) + rmach_rat = mp/rmacr + rcd1 = rcd_std + (rcd_mcr - rcd_std)*rmach_rat + else if (mp <= 1.0_wp) then + rcd_M1 = (1.0_wp + 0.118_wp*re**0.813_wp) + (re/24.0_wp)*0.69_wp/(1.0_wp + 3550.0_wp/re**0.793_wp) + C1 = 6.48_wp + C2 = 9.28_wp + C3 = 12.21_wp + f1M = -1.884_wp + 8.422_wp*mp - 13.70_wp*mp**2 + 8.162_wp*mp**3 + f2M = -2.228_wp + 10.35_wp*mp - 16.96_wp*mp**2 + 9.840_wp*mp**3 + f3M = 4.362_wp - 16.91_wp*mp + 19.84_wp*mp**2 - 6.296_wp*mp**3 + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + & + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_mcr + (rcd_M1 - rcd_mcr)*factor + else if (mp < 1.75_wp) then + rcd_M1 = (1.0_wp + 0.118_wp*re**0.813_wp) + (re/24.0_wp)*0.69_wp/(1.0_wp + 3550.0_wp/re**0.793_wp) + rcd_M2 = (1.0_wp + 0.107_wp*re**0.867_wp) + (re/24.0_wp)*0.646_wp/(1.0_wp + 861.0_wp/re**0.634_wp) + C1 = 6.48_wp + C2 = 8.93_wp + C3 = 12.21_wp + f1M = -2.963_wp + 4.392_wp*mp - 1.169_wp*mp**2 - 0.027_wp*mp**3 - 0.233_wp*exp((1.0_wp - mp)/0.011_wp) + f2M = -6.617_wp + 12.11_wp*mp - 6.501_wp*mp**2 + 1.182_wp*mp**3 - 0.174_wp*exp((1.0_wp - mp)/0.010_wp) + f3M = -5.866_wp + 11.57_wp*mp - 6.665_wp*mp**2 + 1.312_wp*mp**3 - 0.350_wp*exp((1.0_wp - mp)/0.012_wp) + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + & + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_M1 + (rcd_M2 - rcd_M1)*factor + else + rcd1 = (1.0_wp + 0.107_wp*re**0.867_wp) + (re/24.0_wp)*0.646_wp/(1.0_wp + 861.0_wp/re**0.634_wp) + end if ! mp + end if ! re + + ! Sangani's volume fraction correction for dilute random arrays Capping volume fraction at 0.5 + phi_corr = (1.0_wp + 5.94_wp*min(phi, 0.5_wp)) + + cd = (24.0_wp/re)*rcd1*phi_corr + + beta = rcd1*3.0_wp*pi*mu_fluid*dp + + beta = beta*phi_corr + + end function QS_Parmar + + ! Quasi-steady force (Re_p and Ma_p corrections): Improved Drag Correlation for Spheres and Application to Shock-Tube + ! Experiments - Parmar et al. (2010) - AIAA Journal + ! + ! Quasi-steady force (phi corrections): Sangani et al. (1991) volume fraction correction overshoots the drag coefficient. + ! + ! We adopt instead Osnes et al. (2023) volume fraction correction based on Tenneti et al. with one extra term. + ! + ! At Mach=0, the drag coefficient from this subroutine matches very well with the one calculated using the Osnes subroutine, for + ! various Reynolds numbers and volume fractions. + function QS_ModifiedParmar(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rcd1, rmacr, rcd_mcr, rcd_std, rmach_rat, rcd_M1 + real(wp) :: rcd_M2, C1, C2, C3, f1M, f2M, f3M, lrep, factor, cd, phi_corr + real(wp) :: b1, b2, b3 + real(wp) :: beta + real(wp) :: rmachp, mp, phi, rep, re + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + if (re < 1e-14_wp) then + rcd1 = 1.0_wp + else + rmacr = 0.6_wp ! Critical rmachp no. + rcd_mcr = (1._wp + 0.15_wp*re**(0.684_wp)) + (re/24.0_wp)*(0.513_wp/(1._wp + 483._wp/re**(0.669_wp))) + if (mp <= rmacr) then + rcd_std = (1._wp + 0.15_wp*re**(0.687_wp)) + (re/24.0_wp)*(0.42_wp/(1._wp + 42500._wp/re**(1.16_wp))) + rmach_rat = mp/rmacr + rcd1 = rcd_std + (rcd_mcr - rcd_std)*rmach_rat + else if (mp <= 1.0_wp) then + rcd_M1 = (1.0_wp + 0.118_wp*re**0.813_wp) + (re/24.0_wp)*0.69_wp/(1.0_wp + 3550.0_wp/re**0.793_wp) + C1 = 6.48_wp + C2 = 9.28_wp + C3 = 12.21_wp + f1M = -1.884_wp + 8.422_wp*mp - 13.70_wp*mp**2 + 8.162_wp*mp**3 + f2M = -2.228_wp + 10.35_wp*mp - 16.96_wp*mp**2 + 9.840_wp*mp**3 + f3M = 4.362_wp - 16.91_wp*mp + 19.84_wp*mp**2 - 6.296_wp*mp**3 + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + & + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_mcr + (rcd_M1 - rcd_mcr)*factor + else if (mp < 1.75_wp) then + rcd_M1 = (1.0_wp + 0.118_wp*re**0.813_wp) + (re/24.0_wp)*0.69_wp/(1.0_wp + 3550.0_wp/re**0.793_wp) + rcd_M2 = (1.0_wp + 0.107_wp*re**0.867_wp) + (re/24.0_wp)*0.646_wp/(1.0_wp + 861.0_wp/re**0.634_wp) + C1 = 6.48_wp + C2 = 8.93_wp + C3 = 12.21_wp + f1M = -2.963_wp + 4.392_wp*mp - 1.169_wp*mp**2 - 0.027_wp*mp**3 - 0.233_wp*exp((1.0_wp - mp)/0.011_wp) + f2M = -6.617_wp + 12.11_wp*mp - 6.501_wp*mp**2 + 1.182_wp*mp**3 - 0.174_wp*exp((1.0_wp - mp)/0.010_wp) + f3M = -5.866_wp + 11.57_wp*mp - 6.665_wp*mp**2 + 1.312_wp*mp**3 - 0.350_wp*exp((1.0_wp - mp)/0.012_wp) + lrep = log(re) + factor = f1M*(lrep - C2)*(lrep - C3)/((C1 - C2)*(C1 - C3)) + f2M*(lrep - C1)*(lrep - C3)/((C2 - C1)*(C2 - C3)) & + & + f3M*(lrep - C1)*(lrep - C2)/((C3 - C1)*(C3 - C2)) + rcd1 = rcd_M1 + (rcd_M2 - rcd_M1)*factor + else + rcd1 = (1.0_wp + 0.107_wp*re**0.867_wp) + (re/24.0_wp)*0.646_wp/(1.0_wp + 861.0_wp/re**0.634_wp) + end if ! mp + end if ! re + + ! Osnes's volume fraction correction + b1 = 5.81_wp*phi/((1.0_wp - phi)**2) + 0.48_wp*(phi**(1._wp/3._wp))/((1.0_wp - phi)**3) + + b2 = ((1.0_wp - phi)**2)*(phi**3)*re*(0.95_wp + 0.61_wp*(phi**3)/((1.0_wp - phi)**2)) + + b3 = min(sqrt(20.0_wp*mp), & + & 1.0_wp)*(5.65_wp*phi - 22.0_wp*(phi**2) + 23.4_wp*(phi**3))*(1._wp + tanh((mp - (0.65_wp - 0.24_wp*phi)) & + & /0.35_wp)) + + cd = (24.0_wp/re)*rcd1 + + cd = cd/(1.0_wp - phi) + b3 + (24.0_wp/re)*(1.0_wp - phi)*(b1 + b2) + + beta = 3.0_wp*pi*mu_fluid*dp*(re/24.0_wp)*cd + + end function QS_ModifiedParmar + + ! QS Force calculated as a function of Re, Ma and phi + ! + ! Use Osnes etal (2023) correlations A.N. Osnes, M. Vartdal, M. Khalloufi, J. Capecelatro, and S. Balachandar. Comprehensive + ! quasi-steady force correlations for compressible flow through random particle suspensions. International Journal of Multiphase + ! Flow, Vol. 165, 104485, (2023). doi: https://doi.org/10.1016/j.imultiphaseflow.2023.104485. + ! + ! E. Loth, J.T. Daspit, M. Jeong, T. Nagata, and T. Nonomura. Supersonic and hypersonic drag coefficients for a sphere. AIAA + ! Journal, Vol. 59(8), pp. 3261-3274, (2021). doi: https://doi.org/10.2514/1.J060153. + ! + ! NOTE: Re<45 Rarefied formula of Loth et al has been redefined by Balachandar to avoid singularity as Ma -> 0. + function QS_Osnes(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: rmachp, mp, phi, rep, re + real(wp) :: Knp, fKn, CD1, s, JM, CD2, cd_loth, CM, GM, HM, b1, b2, b3, cd, sgby2, JMt + real(wp) :: beta + + rmachp = vmag/cson + mp = max(rmachp, 0.01_wp) + phi = max(volume_fraction, 0.0001_wp) + rep = vmag*dp*rho/mu_fluid + re = max(rep, 0.1_wp) + + ! Loth's correlation + if (re <= 45.0_wp) then + ! Rarefied-dominated regime + Knp = sqrt(0.5_wp*pi*gamma)*mp/re + if (Knp > 0.01_wp) then + fKn = 1.0_wp/(1.0_wp + Knp*(2.514_wp + 0.8_wp*exp(-0.55_wp/Knp))) + else + fKn = 1.0_wp/(1.0_wp + Knp*(2.514_wp + 0.8_wp*exp(-0.55_wp/0.01_wp))) + end if + CD1 = (24.0_wp/re)*(1.0_wp + 0.15_wp*re**(0.687_wp))*fKn + s = mp*sqrt(0.5_wp*gamma) + sgby2 = sqrt(0.5_wp*gamma) + if (mp <= 1._wp) then + ! JMt = 2.26_wp*(mp**4) - 0.1_wp*(mp**3) + 0.14_wp*mp + JMt = 2.26_wp*(mp**4) + 0.14_wp*mp + else + JMt = 1.6_wp*(mp**4) + 0.25_wp*(mp**3) + 0.11_wp*(mp**2) + 0.44_wp*mp + end if + ! + ! Reformulated version of Loth et al. to avoid singularity at mp = 0 + ! + CD2 = (1.0_wp + 2.0_wp*(s**2))*exp(-s**2)*mp/((sgby2**3)*sqrt(pi)) + (4.0_wp*(s**4) + 4.0_wp*(s**2) - 1.0_wp)*erf(s) & + & /(2.0_wp*(sgby2**4)) + (2.0_wp*(mp**3)/(3.0_wp*sgby2))*sqrt(pi) + + CD2 = CD2/(1.0_wp + (((CD2/JMt) - 1.0_wp)*sqrt(re/45.0_wp))) + cd_loth = CD1/(1.0_wp + (mp**4)) + CD2/(1.0_wp + (mp**4)) + else + ! Compression-dominated regime TLJ: coefficients tweaked to get continuous values on the two branches at the critical + ! points + if (mp < 1.5_wp) then + CM = 1.65_wp + 0.65_wp*tanh(4._wp*mp - 3.4_wp) + else + ! CM = 2.18_wp - 0.13_wp*tanh(0.9_wp*mp - 2.7_wp) + CM = 2.18_wp - 0.12913149918318745_wp*tanh(0.9_wp*mp - 2.7_wp) + end if + if (mp < 0.8_wp) then + GM = 166.0_wp*(mp**3) + 3.29_wp*(mp**2) - 10.9_wp*mp + 20._wp + else + ! GM = 5.0_wp + 40._wp*(mp**(-3)) + GM = 5.0_wp + 47.809331200000017_wp*(mp**(-3)) + end if + if (mp < 1._wp) then + HM = 0.0239_wp*(mp**3) + 0.212_wp*(mp**2) - 0.074_wp*mp + 1._wp + else + ! HM = 0.93_wp + 1.0_wp / (3.5_wp + (mp**5)) + HM = 0.93967777777777772_wp + 1.0_wp/(3.5_wp + (mp**5)) + end if + + cd_loth = (24.0_wp/re)*(1._wp + 0.15_wp*(re**(0.687_wp)))*HM + 0.42_wp*CM/(1._wp + 42500._wp/re**(1.16_wp*CM) & + & + GM/sqrt(re)) + end if + + b1 = 5.81_wp*phi/((1.0_wp - phi)**2) + 0.48_wp*(phi**(1._wp/3._wp))/((1.0_wp - phi)**3) + + b2 = ((1.0_wp - phi)**2)*(phi**3)*re*(0.95_wp + 0.61_wp*(phi**3)/((1.0_wp - phi)**2)) + + b3 = min(sqrt(20.0_wp*mp), & + & 1.0_wp)*(5.65_wp*phi - 22.0_wp*(phi**2) + 23.4_wp*(phi**3))*(1._wp + tanh((mp - (0.65_wp - 0.24_wp*phi)) & + & /0.35_wp)) + + cd = cd_loth/(1.0_wp - phi) + b3 + (24.0_wp/re)*(1.0_wp - phi)*(b1 + b2) + + beta = 3.0_wp*pi*mu_fluid*dp*(re/24.0_wp)*cd + + end function QS_Osnes + + ! Subroutine for Quasi-Steady Drag Model of Gidaspow + ! + ! D. Gidaspow, Multiphase Flow and Fluidization (Academic Press, 1994) + ! + ! Note: Model is provided per cell volume. We convert that to per particle using the particle volume fraction and volume + function QS_Gidaspow(rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction) result(beta) + + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), intent(in) :: rho, cson, mu_fluid, gamma, vmag, dp, volume_fraction + real(wp) :: cd, phifRep, phif + real(wp) :: phi, rep, re + real(wp) :: beta + + rep = vmag*dp*rho/mu_fluid + phi = max(volume_fraction, 0.0001_wp) + phif = max(1._wp - volume_fraction, 0.0001_wp) + re = max(rep, 0.1_wp) + + phifRep = phif*re + + if (phifRep < 1000.0_wp) then + cd = 24.0_wp/phifRep*(1.0_wp + 0.15_wp*(phifRep)**0.687_wp) + else + cd = 0.44_wp + end if + + if (phif < 0.8_wp) then + beta = 150.0_wp*((phi**2)*mu_fluid)/(phif*dp**2) + 1.75_wp*(rho*phi*vmag/dp) + else + beta = 0.75_wp*cd*phi*rho*vmag/(dp*phif**1.65_wp) + end if + + beta = beta*(pi*dp**3)/(6.0_wp*phi) + + end function QS_Gidaspow + +end module m_particles_EL_kernels diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 1f347a0289..c343d33cf6 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -19,6 +19,7 @@ module m_rhs use m_cbc use m_bubbles_EE use m_bubbles_EL + use m_particles_EL use m_qbmm use m_hypoelastic use m_hyperelastic @@ -38,9 +39,13 @@ module m_rhs private; public :: s_initialize_rhs_module, s_compute_rhs, s_finalize_rhs_module + !! This variable contains the WENO-reconstructed values of the cell-average conservative variables, which are located in + !! q_cons_vf, at cell-interior Gaussian quadrature points (QP). type(vector_field) :: q_cons_qp !< WENO-reconstructed cell-average conservative variables at quadrature points $:GPU_DECLARE(create='[q_cons_qp]') + !! The primitive variables at cell-interior Gaussian quadrature points. These are calculated from the conservative variables and + !! gradient magnitude (GM) of the volume fractions, q_cons_qp and gm_alpha_qp, respectively. type(vector_field) :: q_prim_qp !< Primitive variables at cell-interior quadrature points $:GPU_DECLARE(create='[q_prim_qp]') @@ -67,7 +72,10 @@ module m_rhs type(scalar_field), allocatable, dimension(:) :: tau_Re_vf $:GPU_DECLARE(create='[tau_Re_vf]') + !> The gradient magnitude of the volume fractions at cell-interior Gaussian quadrature points. gm_alpha_qp is calculated from + !! individual first-order spatial derivatives located in dq_prim_ds_qp. type(vector_field) :: gm_alpha_qp !< Volume fraction gradient magnitudes at cell-interior quadrature points + $:GPU_DECLARE(create='[gm_alpha_qp]') !> @name The left and right WENO-reconstructed cell-boundary values of the cell- average gradient magnitude of volume fractions, @@ -127,7 +135,8 @@ module m_rhs contains - !> Initialize the RHS module + !> The computation of parameters, the allocation of memory, the association of pointers and/or the execution of any other + !! procedures that are necessary to setup the module. impure subroutine s_initialize_rhs_module integer :: i, j, k, l, id !< Generic loop iterators @@ -197,6 +206,7 @@ contains $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(psi_idx)%sf]') end if + ! Allocation/Association of flux_n, flux_src_n, and flux_gsrc_n if (.not. igr) then @:ALLOCATE(flux_n(1:num_dims)) @:ALLOCATE(flux_src_n(1:num_dims)) @@ -209,42 +219,42 @@ contains if (i == 1) then do l = 1, sys_size - @:ALLOCATE(flux_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) - @:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do if (viscous .or. surface_tension) then do l = mom_idx%beg, E_idx - @:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_src_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do end if - @:ALLOCATE(flux_src_n(i)%vf(adv_idx%beg)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_src_n(i)%vf(adv_idx%beg)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) if (riemann_solver == 1 .or. riemann_solver == 4) then do l = adv_idx%beg + 1, adv_idx%end - @:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_src_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do end if if (chemistry) then do l = chemxb, chemxe - @:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_src_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do if (chem_params%diffusion .and. .not. viscous) then - @:ALLOCATE(flux_src_n(i)%vf(E_idx)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_src_n(i)%vf(E_idx)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end if end if else do l = 1, sys_size - @:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do end if @@ -268,9 +278,11 @@ contains end do end if end do + ! END: Allocation/Association of flux_n, flux_src_n, and flux_gsrc_n end if if ((.not. igr) .or. dummy) then + ! Allocation of dq_prim_ds_qp @:ALLOCATE(dq_prim_dx_qp(1:1)) @:ALLOCATE(dq_prim_dy_qp(1:1)) @:ALLOCATE(dq_prim_dz_qp(1:1)) @@ -278,6 +290,7 @@ contains @:ALLOCATE(qL_prim(1:num_dims)) @:ALLOCATE(qR_prim(1:num_dims)) + ! Allocation/Association of dqK_prim_ds_n @:ALLOCATE(dqL_prim_dx_n(1:num_dims)) @:ALLOCATE(dqL_prim_dy_n(1:num_dims)) @:ALLOCATE(dqL_prim_dz_n(1:num_dims)) @@ -364,7 +377,7 @@ contains @:ALLOCATE(dq_prim_dz_qp(1)%vf(1:sys_size)) do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dq_prim_dx_qp(1)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dq_prim_dx_qp(1)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do @@ -372,7 +385,7 @@ contains if (n > 0) then do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dq_prim_dy_qp(1)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dq_prim_dy_qp(1)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do @@ -380,7 +393,7 @@ contains if (p > 0) then do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dq_prim_dz_qp(1)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dq_prim_dz_qp(1)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do @:ACC_SETUP_VFs(dq_prim_dz_qp(1)) @@ -398,26 +411,26 @@ contains do i = 1, num_dims do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dqL_prim_dx_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqL_prim_dx_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) - @:ALLOCATE(dqR_prim_dx_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqR_prim_dx_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do if (n > 0) then do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dqL_prim_dy_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqL_prim_dy_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) - @:ALLOCATE(dqR_prim_dy_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqR_prim_dy_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do end if if (p > 0) then do l = mom_idx%beg, mom_idx%end - @:ALLOCATE(dqL_prim_dz_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqL_prim_dz_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) - @:ALLOCATE(dqR_prim_dz_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(dqR_prim_dz_n(i)%vf(l)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) end do end if @@ -455,7 +468,7 @@ contains @:ALLOCATE(dqR_rsz_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end, mom_idx%beg:mom_idx%end)) end if - end if ! end allocation for weno_Re_flux + end if else @:ALLOCATE(dq_prim_dx_qp(1)%vf(1:sys_size)) @:ALLOCATE(dq_prim_dy_qp(1)%vf(1:sys_size)) @@ -473,7 +486,7 @@ contains end if end if end do - end if ! end allocation of viscous variables + end if $:GPU_PARALLEL_LOOP(private='[i, j, k, l, id]', collapse=4) do id = 1, num_dims @@ -488,7 +501,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - end if ! end allocation for .not. igr + end if if (qbmm) then @:ALLOCATE(mom_sp(1:nmomsp), mom_3d(0:2, 0:2, nb)) @@ -496,7 +509,7 @@ contains do i = 0, 2 do j = 0, 2 do k = 1, nb - @:ALLOCATE(mom_3d(i, j, k)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(mom_3d(i, j, k)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) @:ACC_SETUP_SFs(mom_3d(i, j, k)) end do @@ -504,7 +517,7 @@ contains end do do i = 1, nmomsp - @:ALLOCATE(mom_sp(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & + @:ALLOCATE(mom_sp(i)%sf( idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end)) @:ACC_SETUP_SFs(mom_sp(i)) end do @@ -513,6 +526,9 @@ contains if (mpp_lim .and. bubbles_euler) then @:ALLOCATE(alf_sum%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) end if + ! END: Allocation/Association of qK_cons_n and qK_prim_n + + ! Allocation of gm_alphaK_n if (.not. igr) then @:ALLOCATE(gm_alphaL_n(1:num_dims)) @:ALLOCATE(gm_alphaR_n(1:num_dims)) @@ -531,10 +547,9 @@ contains end subroutine s_initialize_rhs_module - !> Compute the right-hand side of the semi-discrete governing equations for a single time stage + !> @brief Computes the right-hand side of the semi-discrete governing equations for a single time stage. impure subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_in, rhs_pb, mv_in, rhs_mv, t_step, & - - & time_avg, stage) + & time_avg, stage) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), intent(inout) :: q_T_sf @@ -555,8 +570,6 @@ contains integer :: id integer(kind=8) :: i, j, k, l, q !< Generic loop iterators - ! RHS: halo exchange -> reconstruct -> Riemann solve -> flux difference -> source terms - call nvtxStartRange("COMPUTE-RHS") call cpu_time(t_start) @@ -641,7 +654,7 @@ contains call nvtxEndRange end if - ! Loop over coordinate directions for dimensional splitting + ! Dimensional Splitting Loop do id = 1, num_dims if (igr .or. dummy) then if (id == 1) then @@ -740,7 +753,7 @@ contains end if end if - call nvtxEndRange ! WENO + call nvtxEndRange ! Configuring Coordinate Direction Indexes if (id == 1) then @@ -751,6 +764,9 @@ contains irx%beg = 0; iry%beg = 0; irz%beg = -1 end if irx%end = m; iry%end = n; irz%end = p + ! $:GPU_UPDATE(host='[qL_rsx_vf,qR_rsx_vf]') print *, "L", qL_rsx_vf(100:300, 0, 0, 1) print *, "R", + ! qR_rsx_vf(100:300, 0, 0, 1) + ! Computing Riemann Solver Flux and Source Flux call nvtxStartRange("RHS-RIEMANN-SOLVER") call s_riemann_solver(qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, dqR_prim_dx_n(id)%vf, dqR_prim_dy_n(id)%vf, & @@ -759,6 +775,9 @@ contains & q_prim_qp%vf, flux_n(id)%vf, flux_src_n(id)%vf, flux_gsrc_n(id)%vf, id, irx, iry, irz) call nvtxEndRange + !$:GPU_UPDATE(host='[flux_n(1)%vf(1)%sf]') + ! print *, "FLUX", flux_n(1)%vf(1)%sf(100:300, 0, 0) + ! Additional physics and source terms RHS addition for advection source call nvtxStartRange("RHS-ADVECTION-SRC") call s_compute_advection_source_term(id, rhs_vf, q_cons_qp, q_prim_qp, flux_src_n(id)) @@ -776,7 +795,7 @@ contains call nvtxEndRange end if - ! Viscous stress contribution to RHS + ! RHS additions for viscosity if (viscous .or. surface_tension .or. chem_params%diffusion) then call nvtxStartRange("RHS-ADD-PHYSICS") call s_compute_additional_physics_rhs(id, q_prim_qp%vf, rhs_vf, flux_src_n(id)%vf, dq_prim_dx_qp(1)%vf, & @@ -784,7 +803,7 @@ contains call nvtxEndRange end if - ! Bubble dynamics source terms + ! RHS additions for sub-grid bubbles_euler if (bubbles_euler) then call nvtxStartRange("RHS-BUBBLES-COMPUTE") call s_compute_bubbles_EE_rhs(id, q_prim_qp%vf, divu) @@ -848,14 +867,32 @@ contains end if if (bubbles_lagrange) then - ! RHS additions for sub-grid bubbles_lagrange - call nvtxStartRange("RHS-EL-BUBBLES-SRC") - call s_compute_bubbles_EL_source(q_cons_qp%vf(1:sys_size), q_prim_qp%vf(1:sys_size), rhs_vf) - call nvtxEndRange ! Compute bubble dynamics if (.not. adap_dt) then call nvtxStartRange("RHS-EL-BUBBLES-DYN") - call s_compute_bubble_EL_dynamics(q_prim_qp%vf(1:sys_size), stage) + call s_compute_bubble_EL_dynamics(q_prim_qp%vf(1:sys_size), bc_type, stage) + call nvtxEndRange + end if + + ! RHS additions for sub-grid bubbles_lagrange + if (lag_params%solver_approach == 2) then + call nvtxStartRange("RHS-EL-BUBBLES-SRC") + call s_compute_bubbles_EL_source(q_cons_qp%vf(1:sys_size), q_prim_qp%vf(1:sys_size), rhs_vf) + call nvtxEndRange + end if + end if + + if (particles_lagrange) then + ! Compute particle dynamics, forces, dvdt + call nvtxStartRange("RHS-EL-PARTICLES-DYN") + call s_compute_particle_EL_dynamics(q_prim_qp%vf(1:sys_size), bc_type, stage, qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & + & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, rhs_vf) + call nvtxEndRange + + ! RHS additions for sub-grid particles_lagrange + if (lag_params%solver_approach == 2) then + call nvtxStartRange("RHS-EL-PARTICLES-SRC") + call s_compute_particles_EL_source(q_cons_qp%vf(1:sys_size), q_prim_qp%vf(1:sys_size), rhs_vf, stage) call nvtxEndRange end if end if @@ -870,7 +907,7 @@ contains ! END: Additional physics and source terms - if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then + if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange .or. particles_lagrange) then if (.not. igr .or. dummy) then $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) do i = 1, sys_size @@ -898,7 +935,7 @@ contains end subroutine s_compute_rhs - !> Accumulate advection source contributions from a given coordinate direction into the RHS + !> @brief Accumulates advection source contributions from a given coordinate direction into the RHS. subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf) integer, intent(in) :: idir @@ -1010,7 +1047,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4, private='[i_fluid_loop, k, l, q, inv_ds, advected_qty_val, pressure_val, & + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop, k, l, q, inv_ds, advected_qty_val, pressure_val, & & flux_face1, flux_face2]') do l = 0, p do k = 0, n @@ -1059,7 +1096,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - if (grid_geometry == 3) then ! Cylindrical Coordinates + if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j, k, l, q, inv_ds, velocity_val, flux_face1, flux_face2]') do j = 1, sys_size do k = 0, p @@ -1088,7 +1125,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - else ! Cartesian Coordinates + else $:GPU_PARALLEL_LOOP(collapse=4,private='[j, k, l, q, inv_ds, flux_face1, flux_face2]') do j = 1, sys_size do k = 0, p @@ -1106,7 +1143,7 @@ contains end if if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4, private='[i_fluid_loop, k, l, q, inv_ds, advected_qty_val, pressure_val, & + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop, k, l, q, inv_ds, advected_qty_val, pressure_val, & & flux_face1, flux_face2]') do k = 0, p do q = 0, n @@ -1131,9 +1168,8 @@ contains contains - !> Add the advection source flux-difference terms for a single coordinate direction to the RHS + !> @brief Adds the advection source flux-difference terms for a single coordinate direction to the RHS. subroutine s_add_directional_advection_source_terms(current_idir, rhs_vf_arg, q_cons_vf_arg, q_prim_vf_arg, & - & flux_src_n_vf_arg, Kterm_arg) integer, intent(in) :: current_idir type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf_arg @@ -1168,7 +1204,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - else ! Other Riemann solvers + else if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx, l_idx, q_idx, local_inv_ds, local_q_cons_val, & @@ -1176,7 +1212,7 @@ contains do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe local_term_coeff = local_q_cons_val - local_k_term_val local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) @@ -1199,7 +1235,7 @@ contains end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if - else ! NOT alt_soundspeed + else $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv, k_idx, l_idx, q_idx, local_inv_ds, local_term_coeff, & & local_flux1, local_flux2]') do j_adv = advxb, advxe @@ -1215,8 +1251,8 @@ contains $:END_GPU_PARALLEL_LOOP() end if end if - case (2) - ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) + case (2) & + & ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv, k_idx, l_idx, q_idx, local_inv_ds, local_term_coeff, & @@ -1236,7 +1272,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - else ! Other Riemann solvers + else if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx, l_idx, q_idx, local_inv_ds, local_q_cons_val, & @@ -1275,7 +1311,7 @@ contains end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if - else ! NOT alt_soundspeed + else $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv, k_idx, l_idx, q_idx, local_inv_ds, local_term_coeff, & & local_flux1, local_flux2]') do j_adv = advxb, advxe @@ -1291,8 +1327,8 @@ contains $:END_GPU_PARALLEL_LOOP() end if end if - case (3) - ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) + case (3) & + & ! z-direction: loops l_idx (x), q_idx (y), k_idx (z); sf(l_idx, q_idx, k_idx); dz(k_idx); Kterm(l_idx,q_idx,k_idx) if (grid_geometry == 3) then use_standard_riemann = (riemann_solver == 1) else @@ -1317,7 +1353,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() - else ! Other Riemann solvers + else if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx, l_idx, q_idx, local_inv_ds, local_q_cons_val, & @@ -1348,7 +1384,7 @@ contains end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if - else ! NOT alt_soundspeed + else $:GPU_PARALLEL_LOOP(collapse=4, private='[j_adv, k_idx, l_idx, q_idx, local_inv_ds, local_term_coeff, & & local_flux1, local_flux2]') do j_adv = advxb, advxe @@ -1370,7 +1406,7 @@ contains end subroutine s_compute_advection_source_term - !> Add viscous, surface-tension, and species-diffusion source flux contributions to the RHS for a given direction + !> @brief Adds viscous, surface-tension, and species-diffusion source flux contributions to the RHS for a given direction. subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n_in, dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf) integer, intent(in) :: idir @@ -1620,14 +1656,23 @@ contains end subroutine s_compute_additional_physics_rhs - !> Reconstruct left and right cell-boundary values from cell-averaged variables + !> The purpose of this subroutine is to WENO-reconstruct the left and the right cell-boundary values, including values at the + !! Gaussian quadrature points, from the cell-averaged variables. + !! @param v_vf Cell-average variables + !! @param vL_x Left reconstructed cell-boundary values in x + !! @param vL_y Left reconstructed cell-boundary values in y + !! @param vL_z Left reconstructed cell-boundary values in z + !! @param vR_x Right reconstructed cell-boundary values in x + !! @param vR_y Right reconstructed cell-boundary values in y + !! @param vR_z Right reconstructed cell-boundary values in z + !! @param norm_dir Splitting coordinate direction subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir) type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vL_y, vL_z real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x, vR_y, vR_z integer, intent(in) :: norm_dir - integer :: recon_dir !< Coordinate direction of the reconstruction + integer :: recon_dir !< Coordinate direction of the WENO reconstruction integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] @@ -1666,7 +1711,7 @@ contains end subroutine s_reconstruct_cell_boundary_values - !> Perform first-order (piecewise constant) reconstruction of left and right cell-boundary values + !> @brief Performs first-order (piecewise constant) reconstruction of left and right cell-boundary values. subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir) type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index e20ec982fe..b9a5b10cf8 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -19,6 +19,9 @@ module m_sim_helpers contains !> Computes the modified dtheta for Fourier filtering in azimuthal direction + !! @param k y coordinate index + !! @param l z coordinate index + !! @return fltr_dtheta Modified dtheta value for cylindrical coordinates function f_compute_filtered_dtheta(k, l) result(fltr_dtheta) $:GPU_ROUTINE(parallelism='[seq]') @@ -42,6 +45,12 @@ contains end function f_compute_filtered_dtheta !> Computes inviscid CFL terms for multi-dimensional cases (2D/3D only) + !! @param vel directional velocities + !! @param c mixture speed of sound + !! @param j x coordinate index + !! @param k y coordinate index + !! @param l z coordinate index + !! @return cfl_terms computed CFL terms for 2D/3D cases function f_compute_multidim_cfl_terms(vel, c, j, k, l) result(cfl_terms) $:GPU_ROUTINE(parallelism='[seq]') @@ -70,6 +79,20 @@ contains end function f_compute_multidim_cfl_terms !> Computes enthalpy + !! @param q_prim_vf cell centered primitive variables + !! @param pres mixture pressure + !! @param rho mixture density + !! @param gamma mixture gamma + !! @param pi_inf mixture pi_inf + !! @param Re mixture reynolds number + !! @param H mixture enthalpy + !! @param alpha component alphas + !! @param vel directional velocities + !! @param vel_sum squard sum of velocity components + !! @param qv Fluid reference energy + !! @param j x index + !! @param k y index + !! @param l z index subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, qv, j, k, l) $:GPU_ROUTINE(function_name='s_compute_enthalpy',parallelism='[seq]', cray_inline=True) @@ -138,80 +161,88 @@ contains end subroutine s_compute_enthalpy !> Computes stability criterion for a specified dt - subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + !! @param vel directional velocities + !! @param c mixture speed of sound + !! @param rho Density + !! @param Re_l mixture Reynolds number + !! @param j x index + !! @param k y index + !! @param l z index + !! @param icfl cell-centered inviscid cfl number + !! @param vcfl (optional) cell-centered viscous CFL number + !! @param Rc (optional) cell centered Rc + subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl, vcfl, Rc) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), intent(in), dimension(num_vels) :: vel - real(wp), intent(in) :: c, rho - real(wp), dimension(0:m,0:n,0:p), intent(inout) :: icfl_sf - real(wp), dimension(0:m,0:n,0:p), intent(inout), optional :: vcfl_sf, Rc_sf - real(wp), dimension(2), intent(in) :: Re_l - integer, intent(in) :: j, k, l - real(wp) :: fltr_dtheta + real(wp), intent(in), dimension(num_vels) :: vel + real(wp), intent(in) :: c, rho + real(wp), intent(inout) :: icfl + real(wp), intent(inout), optional :: vcfl, Rc + real(wp), dimension(2), intent(in) :: Re_l + integer, intent(in) :: j, k, l + real(wp) :: fltr_dtheta ! Inviscid CFL calculation if (p > 0 .or. n > 0) then - ! 2D/3D - icfl_sf(j, k, l) = dt/f_compute_multidim_cfl_terms(vel, c, j, k, l) + icfl = dt/f_compute_multidim_cfl_terms(vel, c, j, k, l) else - ! 1D - icfl_sf(j, k, l) = (dt/dx(j))*(abs(vel(1)) + c) + icfl = (dt/dx(j))*(abs(vel(1)) + c) end if ! Viscous calculations if (viscous) then if (p > 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k), fltr_dtheta)**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c), & - & fltr_dtheta*(abs(vel(3)) + c))/maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho)/min(dx(j), dy(k), fltr_dtheta)**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c), fltr_dtheta*(abs(vel(3)) + c))/maxval(1._wp/Re_l) else - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k), dz(l))**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c), & - & dz(l)*(abs(vel(3)) + c))/maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho)/min(dx(j), dy(k), dz(l))**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c), dz(l)*(abs(vel(3)) + c))/maxval(1._wp/Re_l) end if #:endif else if (n > 0) then - ! 2D - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp - Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c))/maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp + Rc = min(dx(j)*(abs(vel(1)) + c), dy(k)*(abs(vel(2)) + c))/maxval(1._wp/Re_l) else - ! 1D - vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/dx(j)**2._wp - Rc_sf(j, k, l) = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l) + vcfl = maxval(dt/Re_l/rho)/dx(j)**2._wp + Rc = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l) end if end if end subroutine s_compute_stability_from_dt !> Computes dt for a specified CFL number + !! @param vel directional velocities + !! @param c Speed of sound + !! @param max_dt cell centered maximum dt + !! @param rho cell centered density + !! @param Re_l cell centered Reynolds number + !! @param j x coordinate + !! @param k y coordinate + !! @param l z coordinate subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), dimension(num_vels), intent(in) :: vel - real(wp), intent(in) :: c, rho - real(wp), dimension(0:m,0:n,0:p), intent(inout) :: max_dt - real(wp), dimension(2), intent(in) :: Re_l - integer, intent(in) :: j, k, l - real(wp) :: icfl_dt, vcfl_dt - real(wp) :: fltr_dtheta + real(wp), dimension(num_vels), intent(in) :: vel + real(wp), intent(in) :: c, rho + real(wp), intent(inout) :: max_dt + real(wp), dimension(2), intent(in) :: Re_l + integer, intent(in) :: j, k, l + real(wp) :: icfl_dt, vcfl_dt + real(wp) :: fltr_dtheta ! Inviscid CFL calculation if (p > 0 .or. n > 0) then - ! 2D/3D cases icfl_dt = cfl_target*f_compute_multidim_cfl_terms(vel, c, j, k, l) else - ! 1D case icfl_dt = cfl_target*(dx(j)/(abs(vel(1)) + c)) end if ! Viscous calculations if (viscous) then if (p > 0) then - ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) vcfl_dt = cfl_target*(min(dx(j), dy(k), fltr_dtheta)**2._wp)/maxval(1/(rho*Re_l)) @@ -219,18 +250,16 @@ contains vcfl_dt = cfl_target*(min(dx(j), dy(k), dz(l))**2._wp)/maxval(1/(rho*Re_l)) end if else if (n > 0) then - ! 2D vcfl_dt = cfl_target*(min(dx(j), dy(k))**2._wp)/maxval((1/Re_l)/rho) else - ! 1D vcfl_dt = cfl_target*(dx(j)**2._wp)/maxval(1/(rho*Re_l)) end if end if - if (any(Re_size > 0)) then - max_dt(j, k, l) = min(icfl_dt, vcfl_dt) + if (viscous) then + max_dt = min(icfl_dt, vcfl_dt) else - max_dt(j, k, l) = icfl_dt + max_dt = icfl_dt end if end subroutine s_compute_dt_from_cfl diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index c3f38ef500..c19eb94846 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -31,6 +31,7 @@ module m_start_up use m_viscous use m_bubbles_EE use m_bubbles_EL + use m_particles_EL !< Lagrange particle dynamics routines use ieee_arithmetic use m_helper_basic use m_helper @@ -59,6 +60,7 @@ module m_start_up contains !> Read data files. Dispatch subroutine that replaces procedure pointer. + !! @param q_cons_vf Conservative variables impure subroutine s_read_data_files(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -71,24 +73,26 @@ contains end subroutine s_read_data_files - !> Verify the input file exists and read it + !> The purpose of this procedure is to first verify that an input file has been made available by the user. Provided that this + !! is so, the input file is then read in. impure subroutine s_read_input_file + ! Relative path to the input file provided by the user character(LEN=name_len), parameter :: file_path = './simulation.inp' logical :: file_exist !< Logical used to check the existence of the input file integer :: iostatus - ! Integer to check iostat of file read + !! Integer to check iostat of file read character(len=1000) :: line + ! Namelist of the global parameters which may be specified by user + namelist /user_inputs/ case_dir, run_time_info, m, n, p, dt, & t_step_start, t_step_stop, t_step_save, t_step_print, & model_eqns, mpp_lim, time_stepper, weno_eps, & rdma_mpi, teno_CT, mp_weno, weno_avg, & riemann_solver, low_Mach, wave_speeds, avg_state, & bc_x, bc_y, bc_z, & - x_a, y_a, z_a, x_b, y_b, z_b, & - x_domain, y_domain, z_domain, & hypoelasticity, & ib, num_ibs, patch_ib, & ib_state_wrt, & @@ -110,8 +114,11 @@ contains & g_y, g_z, n_start, t_save, t_stop, cfl_adap_dt, cfl_const_dt, cfl_target, surface_tension, bubbles_lagrange, & & lag_params, hyperelasticity, R0ref, num_bc_patches, Bx0, cont_damage, tau_star, cont_damage_s, alpha_bar, & & hyper_cleaning, hyper_cleaning_speed, hyper_cleaning_tau, alf_factor, num_igr_iters, num_igr_warm_start_iters, & - & int_comp, ic_eps, ic_beta, nv_uvm_out_of_core, nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt + & int_comp, ic_eps, ic_beta, nv_uvm_out_of_core, nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt, & + & particles_lagrange, particle_pp + ! Checking that an input file has been provided by the user. If it has, then the input file is read in, otherwise, + ! simulation exits. inquire (FILE=trim(file_path), EXIST=file_exist) if (file_exist) then @@ -131,6 +138,7 @@ contains bodyForces = .true. end if + ! Store m,n,p into global m,n,p m_glb = m n_glb = n p_glb = p @@ -139,20 +147,30 @@ contains if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true. - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. num_bc_patches > 0) then + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_DIRICHLET) .or. num_bc_patches > 0) then bc_io = .true. end if + + if (bc_x%beg == BC_PERIODIC .and. bc_x%end == BC_PERIODIC) periodic_bc(1) = .true. + if (bc_y%beg == BC_PERIODIC .and. bc_y%end == BC_PERIODIC) periodic_bc(2) = .true. + if (bc_z%beg == BC_PERIODIC .and. bc_z%end == BC_PERIODIC) periodic_bc(3) = .true. else call s_mpi_abort(trim(file_path) // ' is missing. Exiting.') end if end subroutine s_read_input_file - !> Validate that all user-provided inputs form a consistent simulation configuration + !> The goal of this procedure is to verify that each of the user provided inputs is valid and that their combination constitutes + !! a meaningful configuration for the simulation. impure subroutine s_check_input_file + ! Relative path to the current directory file in the case directory character(LEN=path_len) :: file_path - logical :: file_exist + + ! Logical used to check the existence of the current directory file + logical :: file_exist + + ! Logistics file_path = trim(case_dir) // '/.' @@ -167,15 +185,21 @@ contains end subroutine s_check_input_file - !> Read serial initial condition and grid data files and compute cell-width distributions + !> @brief Reads serial initial condition and grid data files and computes cell-width distributions. + !! @param q_cons_vf Cell-averaged conservative variables impure subroutine s_read_serial_data_files(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf character(LEN=path_len + 2*name_len) :: t_step_dir !< Relative path to the starting time-step directory character(LEN=path_len + 3*name_len) :: file_path !< Relative path to the grid and conservative variables data files logical :: file_exist + ! Logical used to check the existence of the data files + integer :: i, r + ! Confirming that the directory from which the initial condition and the grid data files are to be read in exists and + ! exiting otherwise + if (cfl_dt) then write (t_step_dir, '(A,I0,A,I0)') trim(case_dir) // '/p_all/p', proc_rank, '/', n_start else @@ -195,6 +219,7 @@ contains call s_assign_default_bc_type(bc_type) end if + ! Cell-boundary Locations in x-direction file_path = trim(t_step_dir) // '/x_cb.dat' inquire (FILE=trim(file_path), EXIST=file_exist) @@ -218,6 +243,7 @@ contains end do end if + ! Cell-boundary Locations in y-direction if (n > 0) then file_path = trim(t_step_dir) // '/y_cb.dat' @@ -234,6 +260,7 @@ contains y_cc(0:n) = y_cb(-1:n - 1) + dy(0:n)/2._wp end if + ! Cell-boundary Locations in z-direction if (p > 0) then file_path = trim(t_step_dir) // '/z_cb.dat' @@ -293,7 +320,8 @@ contains end subroutine s_read_serial_data_files - !> Read parallel initial condition and grid data files via MPI I/O + !> @brief Reads parallel initial condition and grid data files via MPI I/O. + !! @param q_cons_vf Conservative variables impure subroutine s_read_parallel_data_files(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -321,6 +349,7 @@ contains allocate (y_cb_glb(-1:n_glb)) allocate (z_cb_glb(-1:p_glb)) + ! Read in cell boundary locations in x-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'x_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -343,8 +372,11 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting.') end if + ! Assigning local cell boundary locations x_cb(-1:m) = x_cb_glb((start_idx(1) - 1):(start_idx(1) + m)) + ! Computing the cell width distribution dx(0:m) = x_cb(0:m) - x_cb(-1:m - 1) + ! Computing the cell center locations x_cc(0:m) = x_cb(-1:m - 1) + dx(0:m)/2._wp if (ib) then @@ -358,6 +390,7 @@ contains end if if (n > 0) then + ! Read in cell boundary locations in y-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'y_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -370,11 +403,15 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting.') end if + ! Assigning local cell boundary locations y_cb(-1:n) = y_cb_glb((start_idx(2) - 1):(start_idx(2) + n)) + ! Computing the cell width distribution dy(0:n) = y_cb(0:n) - y_cb(-1:n - 1) + ! Computing the cell center locations y_cc(0:n) = y_cb(-1:n - 1) + dy(0:n)/2._wp if (p > 0) then + ! Read in cell boundary locations in z-direction file_loc = trim(case_dir) // '/restart_data' // trim(mpiiofs) // 'z_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) @@ -387,8 +424,11 @@ contains call s_mpi_abort('File ' // trim(file_loc) // 'is missing. Exiting.') end if + ! Assigning local cell boundary locations z_cb(-1:p) = z_cb_glb((start_idx(3) - 1):(start_idx(3) + p)) + ! Computing the cell width distribution dz(0:p) = z_cb(0:p) - z_cb(-1:p - 1) + ! Computing the cell center locations z_cc(0:p) = z_cb(-1:p - 1) + dz(0:p)/2._wp end if end if @@ -407,6 +447,7 @@ contains if (file_exist) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + ! Initialize MPI data I/O if (down_sample) then call s_initialize_mpi_data_ds(q_cons_vf) else @@ -418,17 +459,20 @@ contains end if if (down_sample) then + ! Size of local arrays data_size = (m_ds + 3)*(n_ds + 3)*(p_ds + 3) m_glb_read = m_glb_ds + 1 n_glb_read = n_glb_ds + 1 p_glb_read = p_glb_ds + 1 else + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) m_glb_read = m_glb + 1 n_glb_read = n_glb + 1 p_glb_read = p_glb + 1 end if + ! Resize some integers so MPI can read even the biggest file m_MOK = int(m_glb_read + 1, MPI_OFFSET_KIND) n_MOK = int(m_glb_read + 1, MPI_OFFSET_KIND) p_MOK = int(m_glb_read + 1, MPI_OFFSET_KIND) @@ -437,8 +481,9 @@ contains str_MOK = int(name_len, MPI_OFFSET_KIND) NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) + ! Read the data for each variable if (bubbles_euler .or. elasticity) then - do i = 1, sys_size ! adv_idx%end + do i = 1, sys_size var_MOK = int(i, MPI_OFFSET_KIND) call MPI_FILE_READ(ifile, MPI_IO_DATA%var(i)%sf, data_size*mpi_io_type, mpi_io_p, status, ierr) @@ -474,6 +519,7 @@ contains call s_mpi_abort('File ' // trim(file_loc) // ' is missing. Exiting.') end if else + ! Open the file to read conservative variables if (cfl_dt) then write (file_loc, '(I0,A)') n_start, '.dat' else @@ -485,14 +531,18 @@ contains if (file_exist) then call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + ! Initialize MPI data I/O + if (ib) then call s_initialize_mpi_data(q_cons_vf, ib_markers) else call s_initialize_mpi_data(q_cons_vf) end if + ! Size of local arrays data_size = (m + 1)*(n + 1)*(p + 1) + ! Resize some integers so MPI can read even the biggest file m_MOK = int(m_glb + 1, MPI_OFFSET_KIND) n_MOK = int(n_glb + 1, MPI_OFFSET_KIND) p_MOK = int(p_glb + 1, MPI_OFFSET_KIND) @@ -501,9 +551,11 @@ contains str_MOK = int(name_len, MPI_OFFSET_KIND) NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) + ! Read the data for each variable if (bubbles_euler .or. elasticity) then - do i = 1, sys_size ! adv_idx%end + do i = 1, sys_size var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_io_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) @@ -513,6 +565,7 @@ contains if (qbmm .and. .not. polytropic) then do i = sys_size + 1, sys_size + 2*nb*nnode var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_io_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) @@ -523,6 +576,7 @@ contains do i = 1, sys_size var_MOK = int(i, MPI_OFFSET_KIND) + ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_io_p, MPI_IO_DATA%view(i), 'native', mpi_info_int, ierr) @@ -549,7 +603,9 @@ contains end subroutine s_read_parallel_data_files - !> Initialize internal-energy equations from phase mass, mixture momentum, and total energy + !> The purpose of this procedure is to initialize the values of the internal-energy equations of each phase from the mass of + !! each phase, the mixture momentum and mixture-total-energy equations. + !! @param v_vf conservative variables subroutine s_initialize_internal_energy_equations(v_vf) type(scalar_field), dimension(sys_size), intent(inout) :: v_vf @@ -606,7 +662,7 @@ contains end subroutine s_initialize_internal_energy_equations - !> Advance the simulation by one time step, handling CFL-based dt and time-stepper dispatch + !> @brief Advances the simulation by one time step, handling CFL-based dt and time-stepper dispatch. impure subroutine s_perform_time_step(t_step, time_avg) integer, intent(inout) :: t_step @@ -672,9 +728,8 @@ contains end subroutine s_perform_time_step - !> Collect per-process wall-clock times and write aggregate performance metrics to file + !> @brief Collects per-process wall-clock times and writes aggregate performance metrics to file. impure subroutine s_save_performance_metrics(time_avg, time_final, io_time_avg, io_time_final, proc_time, io_proc_time, & - & file_exists) real(wp), intent(inout) :: time_avg, time_final @@ -733,7 +788,7 @@ contains end subroutine s_save_performance_metrics - !> Save conservative variable data to disk at the current time step + !> @brief Saves conservative variable data to disk at the current time step. impure subroutine s_save_data(t_step, start, finish, io_time_avg, nt) integer, intent(inout) :: t_step @@ -796,7 +851,7 @@ contains if (bubbles_lagrange) then $:GPU_UPDATE(host='[lag_id, mtn_pos, mtn_posPrev, mtn_vel, intfc_rad, intfc_vel, bub_R0, Rmax_stats, Rmin_stats, & & bub_dphidt, gas_p, gas_mv, gas_mg, gas_betaT, gas_betaC]') - do i = 1, nBubs + do i = 1, n_el_bubs_loc if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") end if @@ -807,6 +862,20 @@ contains $:GPU_UPDATE(host='[Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_vel]') call s_write_restart_lag_bubbles(save_count) ! parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() + else if (particles_lagrange) then + $:GPU_UPDATE(host='[lag_part_id, particle_pos, particle_posPrev, particle_vel, particle_rad, particle_R0, & + & Rmax_stats_part, Rmin_stats_part, particle_mass]') + do i = 1, n_el_particles_loc + if (ieee_is_nan(particle_rad(i, 1)) .or. particle_rad(i, 1) <= 0._wp) then + call s_mpi_abort("Particle radius is negative or NaN, please reduce dt.") + end if + end do + + $:GPU_UPDATE(host='[q_particles(1)%sf]') + call s_write_data_files(q_cons_ts(stor)%vf, q_T_sf, q_prim_vf, save_count, bc_type, q_particles(1)) + $:GPU_UPDATE(host='[Rmax_stats_part, Rmin_stats_part]') + call s_write_restart_lag_particles(save_count) ! parallel + if (lag_params%write_bubbles_stats) call s_write_lag_particle_stats() else call s_write_data_files(q_cons_ts(stor)%vf, q_T_sf, q_prim_vf, save_count, bc_type) end if @@ -827,7 +896,7 @@ contains end subroutine s_save_data - !> Initialize all simulation sub-modules in the required dependency order + !> @brief Initializes all simulation sub-modules in the required dependency order. impure subroutine s_initialize_modules integer :: m_ds, n_ds, p_ds @@ -848,6 +917,11 @@ contains if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if + + if (particles_lagrange) then + call s_initialize_particles_model() + end if + call s_initialize_mpi_common_module() call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() @@ -888,6 +962,7 @@ contains end do end if + ! Reading in the user provided initial condition and grid data if (down_sample) then call s_read_data_files(q_cons_temp) call s_upsample_data(q_cons_ts(1)%vf, q_cons_temp) @@ -902,6 +977,7 @@ contains call s_read_data_files(q_cons_ts(1)%vf) end if + ! Populating the buffers of the grid variables using the boundary conditions call s_populate_grid_variables_buffers() if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf) @@ -931,14 +1007,16 @@ contains end if call s_initialize_derived_variables() - if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf) + + if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf, bc_type) + if (particles_lagrange) call s_initialize_particles_EL_module(q_cons_ts(1)%vf, bc_type) if (hypoelasticity) call s_initialize_hypoelastic_module() if (hyperelasticity) call s_initialize_hyperelastic_module() end subroutine s_initialize_modules - !> Set up the MPI execution environment, bind GPUs, and decompose the computational domain + !> @brief Sets up the MPI execution environment, binds GPUs, and decomposes the computational domain. impure subroutine s_initialize_mpi_domain integer :: ierr @@ -955,8 +1033,11 @@ contains #endif #endif + ! Initializing MPI execution environment + call s_mpi_initialize() + ! Bind GPUs if OpenACC is enabled #ifdef MFC_GPU #ifndef MFC_MPI local_size = 1 @@ -979,6 +1060,9 @@ contains #endif #endif + ! The rank 0 processor assigns default values to the user inputs prior to reading them in from the input file. Next, the + ! user inputs are read and their consistency is checked. The identification of any inconsistencies will result in the + ! termination of the simulation. if (proc_rank == 0) then call s_assign_default_values_to_user_inputs() call s_read_input_file() @@ -1000,6 +1084,9 @@ contains #endif end if + ! Broadcasting the user inputs to all of the processors and performing the parallel computational domain decomposition. + ! Neither procedure has to be carried out if the simulation is in fact not truly executed in parallel. + call s_mpi_bcast_user_inputs() call s_initialize_parallel_io() @@ -1008,10 +1095,11 @@ contains end subroutine s_initialize_mpi_domain - !> Transfer initial conservative variable and model parameter data to the GPU device + !> @brief Transfers initial conservative variable and model parameter data to the GPU device. subroutine s_initialize_gpu_vars integer :: i + ! Update GPU DATA if (.not. down_sample) then do i = 1, sys_size @@ -1052,6 +1140,10 @@ contains $:GPU_UPDATE(device='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]') $:GPU_UPDATE(device='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]') + $:GPU_UPDATE(device='[bc_x%beg, bc_x%end]') + $:GPU_UPDATE(device='[bc_y%beg, bc_y%end]') + $:GPU_UPDATE(device='[bc_z%beg, bc_z%end]') + $:GPU_UPDATE(device='[bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out]') $:GPU_UPDATE(device='[bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out]') @@ -1078,7 +1170,7 @@ contains end subroutine s_initialize_gpu_vars - !> Finalize and deallocate all simulation sub-modules in reverse initialization order + !> @brief Finalizes and deallocates all simulation sub-modules in reverse initialization order. impure subroutine s_finalize_modules call s_finalize_time_steppers_module() @@ -1105,6 +1197,7 @@ contains call s_finalize_boundary_common_module() if (relax) call s_finalize_relaxation_solver_module() if (bubbles_lagrange) call s_finalize_lagrangian_solver() + if (particles_lagrange) call s_finalize_particle_lagrangian_solver() if (viscous .and. (.not. igr)) then call s_finalize_viscous_module() end if @@ -1113,6 +1206,7 @@ contains if (surface_tension) call s_finalize_surface_tension_module() if (bodyForces) call s_finalize_body_forces_module() + ! Terminating MPI execution environment call s_mpi_finalize() end subroutine s_finalize_modules diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 4a741a4068..852728ae6f 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -15,6 +15,7 @@ module m_time_steppers use m_data_output use m_bubbles_EE use m_bubbles_EL + use m_particles_EL !< Lagrange particle dynamics routines use m_ibm use m_hyperelastic use m_mpi_proxy @@ -33,18 +34,18 @@ module m_time_steppers type(scalar_field), allocatable, dimension(:) :: q_prim_vf !< Cell-average primitive variables at the current time-stage type(scalar_field), allocatable, dimension(:) :: rhs_vf !< Cell-average RHS variables at the current time-stage type(integer_field), allocatable, dimension(:,:) :: bc_type !< Boundary condition identifiers + !> Cell-average primitive variables at consecutive TIMESTEPS type(vector_field), allocatable, dimension(:) :: q_prim_ts1, q_prim_ts2 real(wp), allocatable, dimension(:,:,:,:,:) :: rhs_pb type(scalar_field) :: q_T_sf !< Cell-average temperature variables at the current time-stage real(wp), allocatable, dimension(:,:,:,:,:) :: rhs_mv - real(wp), allocatable, dimension(:,:,:) :: max_dt integer, private :: num_ts !< Number of time stages in the time-stepping scheme integer :: stor !< storage index real(wp), allocatable, dimension(:,:) :: rk_coef integer, private :: num_probe_ts - $:GPU_DECLARE(create='[q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, q_prim_ts1, q_prim_ts2, rhs_mv, rhs_pb, max_dt, rk_coef, stor, bc_type]') + $:GPU_DECLARE(create='[q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, q_prim_ts1, q_prim_ts2, rhs_mv, rhs_pb, rk_coef, stor, bc_type]') !> @cond #if defined(__NVCOMPILER_GPU_UNIFIED_MEM) @@ -59,7 +60,8 @@ module m_time_steppers contains - !> Initialize the time steppers module + !> The computation of parameters, the allocation of memory, the association of pointers and/or the execution of any other + !! procedures that are necessary to setup the module. impure subroutine s_initialize_time_steppers_module #ifdef FRONTIER_UNIFIED @@ -71,8 +73,8 @@ contains #endif #endif integer :: i, j !< Generic loop iterators - ! Setting number of time-stages for selected time-stepping scheme + ! Setting number of time-stages for selected time-stepping scheme if (time_stepper == 1) then num_ts = 1 else if (any(time_stepper == (/2, 3/))) then @@ -328,7 +330,7 @@ contains @:ALLOCATE(rhs_pb(idwbuff(1)%beg:idwbuff(1)%beg + 1, idwbuff(2)%beg:idwbuff(2)%beg + 1, & & idwbuff(3)%beg:idwbuff(3)%beg + 1, 1:nnode, 1:nb)) - else + else ! All other timesteps @:ALLOCATE(pb_ts(1)%sf(0,0,0,0,0)) @:ACC_SETUP_SFs(pb_ts(1)) @@ -362,7 +364,7 @@ contains @:ALLOCATE(rhs_mv(idwbuff(1)%beg:idwbuff(1)%beg + 1, idwbuff(2)%beg:idwbuff(2)%beg + 1, & & idwbuff(3)%beg:idwbuff(3)%beg + 1, 1:nnode, 1:nb)) - else + else ! All other timesteps @:ALLOCATE(mv_ts(1)%sf(0,0,0,0,0)) @:ACC_SETUP_SFs(mv_ts(1)) @@ -382,7 +384,7 @@ contains @:ACC_SETUP_SFs(rhs_vf(i)) @:PREFER_GPU(rhs_vf(i)%sf) end do - else + else ! All other timesteps do i = 1, sys_size @:ALLOCATE(rhs_vf(i)%sf(0:m, 0:n, 0:p)) @:ACC_SETUP_SFs(rhs_vf(i)) @@ -399,10 +401,6 @@ contains call s_open_ib_state_file() end if - if (cfl_dt) then - @:ALLOCATE(max_dt(0:m, 0:n, 0:p)) - end if - ! Allocating arrays to store the bc types @:ALLOCATE(bc_type(1:num_dims,1:2)) @@ -452,7 +450,7 @@ contains end subroutine s_initialize_time_steppers_module - !> Advance the solution one full step using a TVD Runge-Kutta time integrator + !> @brief Advances the solution one full step using a TVD Runge-Kutta time integrator. impure subroutine s_tvd_rk(t_step, time_avg, nstage) #ifdef _CRAYFTN @@ -497,7 +495,12 @@ contains end if end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, bc_type, stage=s) + + if (particles_lagrange) then + call s_update_lagrange_particles_tdv_rk(q_prim_vf, bc_type, stage=s) + end if + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p @@ -520,6 +523,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + ! Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then $:GPU_PARALLEL_LOOP(collapse=5) @@ -589,41 +593,41 @@ contains if (t_step >= 2) then wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1) - else + else ! All other timesteps wall_time_avg = 0._wp end if end subroutine s_tvd_rk !> Bubble source part in Strang operator splitting scheme + !! @param stage Current time-stage impure subroutine s_adaptive_dt_bubble(stage) integer, intent(in) :: stage type(vector_field) :: gm_alpha_qp - call s_convert_conservative_to_primitive_variables(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, idwint) + call s_convert_conservative_to_primitive_variables(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, idwbuff) if (bubbles_euler) then call s_compute_bubble_EE_source(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, divu) call s_comp_alpha_from_n(q_cons_ts(1)%vf) else if (bubbles_lagrange) then call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) - call s_compute_bubble_EL_dynamics(q_prim_vf, stage) - call s_transfer_data_to_tmp() - call s_smear_voidfraction() + call s_compute_bubble_EL_dynamics(q_prim_vf, bc_type, stage) + if (stage == 3) then if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then $:GPU_UPDATE(host='[gas_p, gas_mv, intfc_rad, intfc_vel]') - call s_write_lag_particles(mytime) + call s_write_lag_bubble_evol(mytime) end if - call s_write_void_evol(mytime) + if (lag_params%write_void_evol) call s_write_void_evol(mytime) end if end if end subroutine s_adaptive_dt_bubble - !> Compute the global time step size from CFL stability constraints across all cells + !> @brief Computes the global time step size from CFL stability constraints across all cells. impure subroutine s_compute_dt() real(wp) :: rho !< Cell-avg. density @@ -644,6 +648,7 @@ contains real(wp) :: H !< Cell-avg. enthalpy real(wp), dimension(2) :: Re !< Cell-avg. Reynolds numbers type(vector_field) :: gm_alpha_qp + real(wp) :: max_dt real(wp) :: dt_local integer :: j, k, l !< Generic loop iterators @@ -651,7 +656,9 @@ contains call s_convert_conservative_to_primitive_variables(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, idwint) end if - $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]') + dt_local = huge(1.0_wp) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H, qv]', & + & reduction='[[dt_local]]', reductionOp='[min]') do l = 0, p do k = 0, n do j = 0, m @@ -665,18 +672,15 @@ contains call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c, qv) call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) + dt_local = min(dt_local, max_dt) end do end do end do $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') - dt_local = minval(max_dt) - #:endcall GPU_PARALLEL - if (num_procs == 1) then dt = dt_local - else + else ! All other timesteps call s_mpi_allreduce_min(dt_local, dt) end if @@ -684,7 +688,10 @@ contains end subroutine s_compute_dt - !> Apply the body forces source term at each Runge-Kutta stage + !> This subroutine applies the body forces source term at each Runge-Kutta stage + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf_in Primitive variables + !! @param rhs_vf_in Right-hand side variables subroutine s_apply_bodyforces(q_cons_vf, q_prim_vf_in, rhs_vf_in, ldt) type(scalar_field), dimension(1:sys_size), intent(inout) :: q_cons_vf @@ -712,7 +719,7 @@ contains end subroutine s_apply_bodyforces - !> Update immersed boundary positions and velocities at the current Runge-Kutta stage + !> @brief Updates immersed boundary positions and velocities at the current Runge-Kutta stage. subroutine s_propagate_immersed_boundaries(s) integer, intent(in) :: s @@ -754,8 +761,9 @@ contains ! update the angular velocity with the torque value patch_ib(i)%angular_vel = (patch_ib(i)%angular_vel*patch_ib(i)%moment) + (rk_coef(s, & & 3)*dt*patch_ib(i)%torque/rk_coef(s, 4)) ! add the torque to the angular momentum - call s_compute_moment_of_inertia(i, patch_ib(i)%angular_vel) - ! update the moment of inertia to be based on the direction of the angular momentum + call s_compute_moment_of_inertia(i, & + & patch_ib(i)%angular_vel) & + & ! update the moment of inertia to be based on the direction of the angular momentum patch_ib(i)%angular_vel = patch_ib(i)%angular_vel/patch_ib(i) & & %moment ! convert back to angular velocity with the new moment of inertia end if @@ -780,7 +788,8 @@ contains end subroutine s_propagate_immersed_boundaries - !> Save the temporary q_prim_vf vector into q_prim_ts for use in p_main + !> This subroutine saves the temporary q_prim_vf vector into the q_prim_ts vector that is then used in p_main + !! @param t_step current time-step subroutine s_time_step_cycling(t_step) integer, intent(in) :: t_step @@ -862,6 +871,7 @@ contains use hipfort_check #endif integer :: i, j !< Generic loop iterators + ! Deallocating the cell-average conservative variables #if defined(__NVCOMPILER_GPU_UNIFIED_MEM) do j = 1, sys_size diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index 2f3631c228..a193d9df49 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -627,8 +627,8 @@ def get_value_label(param_name: str, value: int) -> str: }, # Bubbles "bubble_model": { - "choices": [1, 2, 3], - "value_labels": {1: "Gilmore", 2: "Keller-Miksis", 3: "Rayleigh-Plesset"}, + "choices": [0, 1, 2, 3], + "value_labels": {0: "Particle", 1: "Gilmore", 2: "Keller-Miksis", 3: "Rayleigh-Plesset"}, }, # Output "format": { @@ -880,6 +880,10 @@ def _load(): for n in ["polytropic", "bubbles_euler", "polydisperse", "qbmm", "bubbles_lagrange"]: _r(n, LOG, {"bubbles"}) + # Particles + for n in ["particles_lagrange"]: + _r(n, LOG, {"particles"}) + # Viscosity _r("viscous", LOG, {"viscosity"}) @@ -1053,6 +1057,13 @@ def _load(): _r(f"p_{d}", REAL, math=r"\f$\phi_" + d + r"\f$") _r(f"bf_{d}", LOG) + # Interfacial flow inputs + _r("normMag", REAL) + _r("p0_ic", REAL) + _r("g0_ic", REAL) + _r("normFac", REAL) + _r("interface_file", STR) + # INDEXED PARAMETERS # patch_icpp (10 patches) @@ -1149,10 +1160,12 @@ def _load(): ]: _r(f"bub_pp%{a}", REAL, {"bubbles"}, math=sym) + # particle_pp (particle properties) + for a in ["rho0ref_particle", "cp_particle"]: + _r(f"particle_pp%{a}", REAL, {"particles"}) + # patch_ib (immersed boundaries) — registered as indexed family for O(1) lookup. - # max_index is None so the parameter registry stays compact (no enumeration). - # The Fortran-side upper bound (num_patches_max in m_constants.fpp) is parsed - # and enforced by the case_validator, not by max_index here. + # max_index is sourced from Fortran's num_patches_max in m_constants.fpp. _ib_tags = {"ib"} _ib_attrs: Dict[str, tuple] = {} for a in ["geometry", "moving_ibm"]: @@ -1260,12 +1273,20 @@ def _load(): _r(f"simplex_params%perturb_vel_offset({d},{j})", REAL) # lag_params (Lagrangian bubbles) - for a in ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"]: + for a in ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats", "pressure_force", "gravity_force", "write_void_evol"]: _r(f"lag_params%{a}", LOG, {"bubbles"}) - for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb"]: + for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb", "drag_model", "vel_model", "charNz"]: _r(f"lag_params%{a}", INT, {"bubbles"}) for a in ["epsilonb", "valmaxvoid", "charwidth", "c0", "rho0", "T0", "x0", "Thost"]: _r(f"lag_params%{a}", REAL, {"bubbles"}) + _r("lag_params%input_path", STR, {"bubbles"}) + + # --- lag_params (Lagrangian particles) --- + for a in ["nParticles_glb", "stokes_drag", "qs_drag_model", "added_mass_model", "interpolation_order"]: + _r(f"lag_params%{a}", INT, {"particles"}) + + for a in ["collision_force"]: + _r(f"lag_params%{a}", LOG, {"particles"}) # chem_params for a in ["diffusion", "reactions"]: diff --git a/toolchain/mfc/params/descriptions.py b/toolchain/mfc/params/descriptions.py index 7906b6d38d..2a15f8ed37 100644 --- a/toolchain/mfc/params/descriptions.py +++ b/toolchain/mfc/params/descriptions.py @@ -250,6 +250,14 @@ "lag_mg_wrt": "Write bubble gas mass", "lag_betaT_wrt": "Write bubble heat transfer coefficient", "lag_betaC_wrt": "Write bubble mass transfer coefficient", + # Interfacial flow parameters + "interface_file": "Path to interface geometry data file", + "normFac": "Interface normalization factor", + "normMag": "Interface normal magnitude", + "g0_ic": "Initial gas volume fraction for interfacial IC", + "p0_ic": "Initial pressure for interfacial IC", + # Particle parameters + "particles_lagrange": "Enable Lagrangian solid particle solver", } # Patterns for auto-generating descriptions of indexed parameters