From b638175a459fd436e305cb6b784d62d8e3a1b192 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 14:20:17 -0400 Subject: [PATCH 01/22] src: hoist shared equation-index core into m_global_parameters_common --- docs/module_categories.json | 1 + src/common/m_global_parameters_common.fpp | 267 +++++++++++++++++++ src/post_process/m_global_parameters.fpp | 306 +++++---------------- src/pre_process/m_global_parameters.fpp | 293 ++++----------------- src/simulation/m_global_parameters.fpp | 307 +++++----------------- 5 files changed, 463 insertions(+), 711 deletions(-) create mode 100644 src/common/m_global_parameters_common.fpp diff --git a/docs/module_categories.json b/docs/module_categories.json index 30b2bbbc72..f8ba379f2d 100644 --- a/docs/module_categories.json +++ b/docs/module_categories.json @@ -59,6 +59,7 @@ "modules": [ "m_derived_types", "m_global_parameters", + "m_global_parameters_common", "m_mpi_common", "m_mpi_proxy", "m_constants", diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp new file mode 100644 index 0000000000..c42a3589d1 --- /dev/null +++ b/src/common/m_global_parameters_common.fpp @@ -0,0 +1,267 @@ +!> +!! @file +!! @brief Contains module m_global_parameters_common + +#:include 'case.fpp' +#:include 'macros.fpp' + +!> @brief Shared global parameters and equation-index setup for all three executables. Each per-target m_global_parameters uses this +!! module (default-public, so all symbols re-export to downstream consumers via two-hop use-association). +module m_global_parameters_common + +#ifdef MFC_MPI + use mpi +#endif + + use m_derived_types + use m_helper_basic + use m_thermochem, only: num_species + use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, & + & recon_type_muscl, nnode + + implicit none + + ! All namelist-bound scalar and array declarations (per-target, auto-generated at CMake configure time) + #:include 'generated_decls.fpp' + + ! Case-optimization declarations: parameters (under MFC_CASE_OPTIMIZATION) or plain variables for + ! num_dims, num_vels, weno_polyn, muscl_polyn, weno_num_stencils, wenojs, igr, etc. +#ifdef MFC_SIMULATION + #:include 'generated_case_opt_decls.fpp' +#endif + + ! For pre_process and post_process: num_dims and num_vels are declared manually here + ! (sim gets them from generated_case_opt_decls.fpp above) +#ifndef MFC_SIMULATION + integer :: num_dims !< Number of spatial dimensions + integer :: num_vels !< Number of velocity components (different from num_dims for mhd) +#endif + + ! For pre_process: weno_polyn and muscl_polyn are declared manually here + ! (sim gets them from generated_case_opt_decls.fpp; post does not use them) +#ifdef MFC_PRE_PROCESS + integer :: weno_polyn !< Degree of the WENO polynomials + integer :: muscl_polyn !< Degree of the MUSCL polynomials +#endif + + !> @name Annotations of the structure of the state and flux vectors in terms of the size and configuration of the system of + !! equations + !> @{ + integer :: sys_size !< Number of unknowns in system of equations + type(eqn_idx_info) :: eqn_idx !< All conserved-variable equation index ranges and scalars + integer :: b_size !< Number of elements in the symmetric b tensor, plus one + integer :: tensor_size !< Number of elements in the full tensor plus one + !> @} + + !> @name Chemistry modeling (Fypp compile-time constant; same value in all targets) + !> @{ + logical, parameter :: chemistry = .${chemistry}$. + !> @} + + !> @name Elasticity and shear stress state (identical across all three executables) + !> @{ + logical :: elasticity !< elasticity modeling, true for hyper or hypo + integer :: shear_num !< Number of shear stress components + integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress + integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions + integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) + !> @} + +#ifdef MFC_SIMULATION + $:GPU_DECLARE(create='[sys_size, eqn_idx, b_size, tensor_size]') +#endif + +contains + + !> Initialize equation-index state (eqn_idx, sys_size, b_size, tensor_size) from the namelist parameters. This is the shared + !! skeleton: it covers the model_eqns dispatch, all eqn_idx field assignments, and the elasticity/surface-tension/chemistry + !! extensions. + !! + !! @param nmom_in Number of carried moments per R0 location (per-target: pre/post pass an + !! integer variable; sim passes its integer parameter nmom = 6). Used only in the 5eq + !! qbmm bubble-index calculation (eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom_in). + !! + !! Per-target callers are responsible for the following after this call: + !! - qbmm_idx allocations and fills (diverge between pre vs sim/post) + !! - sim-only: gam = bub_pp%gam_g, nmomsp/nmomtot, Re_idx allocation, GPU_UPDATE calls + !! - post-only: beta_idx increment (bubbles_lagrange path), offset/grid allocations + impure subroutine s_initialize_eqn_idx(nmom_in) + + integer, intent(in) :: nmom_in + + ! Gamma/Pi_inf Model + + if (model_eqns == model_eqns_gamma_law) then + ! Annotating structure of the state and flux vectors belonging to the system of + ! equations defined by the selected number of spatial dimensions and the gamma/pi_inf model + eqn_idx%cont%beg = 1 + eqn_idx%cont%end = eqn_idx%cont%beg + eqn_idx%mom%beg = eqn_idx%cont%end + 1 + eqn_idx%mom%end = eqn_idx%cont%end + num_vels + eqn_idx%E = eqn_idx%mom%end + 1 + eqn_idx%adv%beg = eqn_idx%E + 1 + eqn_idx%adv%end = eqn_idx%adv%beg + 1 + eqn_idx%gamma = eqn_idx%adv%beg + eqn_idx%pi_inf = eqn_idx%adv%end + sys_size = eqn_idx%adv%end + + ! Volume Fraction Model (5-equation model) + else if (model_eqns == model_eqns_5eq) then + ! Annotating structure of the state and flux vectors belonging to the system of + ! equations defined by the selected number of spatial dimensions and the volume fraction model + eqn_idx%cont%beg = 1 + eqn_idx%cont%end = num_fluids + eqn_idx%mom%beg = eqn_idx%cont%end + 1 + eqn_idx%mom%end = eqn_idx%cont%end + num_vels + eqn_idx%E = eqn_idx%mom%end + 1 + + if (igr) then + ! IGR: volume fractions after energy (N-1 for N fluids; skipped when num_fluids=1) + eqn_idx%adv%beg = eqn_idx%E + 1 + eqn_idx%adv%end = eqn_idx%E + num_fluids - 1 + else + ! WENO/MUSCL + Riemann tracks a total of (N) volume fractions for N fluids + eqn_idx%adv%beg = eqn_idx%E + 1 + eqn_idx%adv%end = eqn_idx%E + num_fluids + end if + + sys_size = eqn_idx%adv%end + + if (bubbles_euler) then + eqn_idx%alf = eqn_idx%adv%end + else + eqn_idx%alf = 1 + end if + + if (bubbles_euler) then + eqn_idx%bub%beg = sys_size + 1 + if (qbmm) then + eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom_in + else + if (.not. polytropic) then + eqn_idx%bub%end = sys_size + 4*nb + else + eqn_idx%bub%end = sys_size + 2*nb + end if + end if + sys_size = eqn_idx%bub%end + + if (adv_n) then + eqn_idx%n = eqn_idx%bub%end + 1 + sys_size = eqn_idx%n + end if + end if + + if (mhd) then + eqn_idx%B%beg = sys_size + 1 + if (n == 0) then + eqn_idx%B%end = sys_size + 2 ! 1D: By, Bz + else + eqn_idx%B%end = sys_size + 3 ! 2D/3D: Bx, By, Bz + end if + sys_size = eqn_idx%B%end + end if + + ! Volume Fraction Model (6-equation model) + else if (model_eqns == model_eqns_6eq) then + ! Annotating structure of the state and flux vectors belonging to the system of + ! equations defined by the selected number of spatial dimensions and the volume fraction model + eqn_idx%cont%beg = 1 + eqn_idx%cont%end = num_fluids + eqn_idx%mom%beg = eqn_idx%cont%end + 1 + eqn_idx%mom%end = eqn_idx%cont%end + num_vels + eqn_idx%E = eqn_idx%mom%end + 1 + eqn_idx%adv%beg = eqn_idx%E + 1 + eqn_idx%adv%end = eqn_idx%E + num_fluids +#ifdef MFC_SIMULATION + eqn_idx%alf = eqn_idx%adv%end +#endif + eqn_idx%int_en%beg = eqn_idx%adv%end + 1 + eqn_idx%int_en%end = eqn_idx%adv%end + num_fluids + sys_size = eqn_idx%int_en%end + else if (model_eqns == model_eqns_4eq) then + ! 4-equation model with subgrid bubbles + eqn_idx%cont%beg = 1 + eqn_idx%cont%end = 1 + eqn_idx%mom%beg = eqn_idx%cont%end + 1 + eqn_idx%mom%end = eqn_idx%cont%end + num_vels + eqn_idx%E = eqn_idx%mom%end + 1 + eqn_idx%adv%beg = eqn_idx%E + 1 + eqn_idx%adv%end = eqn_idx%adv%beg + eqn_idx%alf = eqn_idx%adv%end + sys_size = eqn_idx%adv%end + + if (bubbles_euler) then + eqn_idx%bub%beg = sys_size + 1 + eqn_idx%bub%end = sys_size + 2*nb + if (.not. polytropic) then + eqn_idx%bub%end = sys_size + 4*nb + end if + sys_size = eqn_idx%bub%end + end if + end if + + if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then + if (hypoelasticity .or. hyperelasticity) then + elasticity = .true. + eqn_idx%stress%beg = sys_size + 1 + eqn_idx%stress%end = sys_size + (num_dims*(num_dims + 1))/2 + if (cyl_coord) eqn_idx%stress%end = eqn_idx%stress%end + 1 + ! number of stresses is 1 in 1D, 3 in 2D, 4 in 2D-Axisym, 6 in 3D + sys_size = eqn_idx%stress%end + + ! shear stress index is 2 for 2D and 2,4,5 for 3D + if (num_dims == 1) then + shear_num = 0 + else if (num_dims == 2) then + shear_num = 1 + shear_indices(1) = eqn_idx%stress%beg - 1 + 2 + shear_BC_flip_num = 1 + shear_BC_flip_indices(1:2,1) = shear_indices(1) + ! Both x-dir and y-dir: flip tau_xy only + else if (num_dims == 3) then + shear_num = 3 + shear_indices(1:3) = eqn_idx%stress%beg - 1 + (/2, 4, 5/) + shear_BC_flip_num = 2 + shear_BC_flip_indices(1,1:2) = shear_indices((/1, 2/)) + shear_BC_flip_indices(2,1:2) = shear_indices((/1, 3/)) + shear_BC_flip_indices(3,1:2) = shear_indices((/2, 3/)) + ! x-dir: flip tau_xy and tau_xz; y-dir: flip tau_xy and tau_yz; z-dir: flip tau_xz and tau_yz + end if + end if + + if (hyperelasticity) then + ! number of entries in the symmetric b tensor plus the jacobian + b_size = (num_dims*(num_dims + 1))/2 + 1 + tensor_size = num_dims**2 + 1 + eqn_idx%xi%beg = sys_size + 1 + eqn_idx%xi%end = sys_size + num_dims + ! adding equations for the xi field and the elastic energy + sys_size = eqn_idx%xi%end + 1 + end if + + if (surface_tension) then + eqn_idx%c = sys_size + 1 + sys_size = eqn_idx%c + end if + + if (cont_damage) then + eqn_idx%damage = sys_size + 1 + sys_size = eqn_idx%damage + end if + + if (hyper_cleaning) then + eqn_idx%psi = sys_size + 1 + sys_size = eqn_idx%psi + end if + end if + + if (chemistry) then + eqn_idx%species%beg = sys_size + 1 + eqn_idx%species%end = sys_size + num_species + sys_size = eqn_idx%species%end + end if + + end subroutine s_initialize_eqn_idx + +end module m_global_parameters_common diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 221042645e..c75ae99039 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -13,14 +13,13 @@ module m_global_parameters use m_derived_types use m_helper_basic - use m_thermochem, only: num_species, species_names - use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, format_silo, precision_single, & - & recon_type_weno + use m_thermochem, only: species_names + use m_constants, only: format_silo, precision_single + ! Shared state: generated_decls, num_dims, num_vels, sys_size, eqn_idx, b_size, tensor_size, chemistry, elasticity, shear_* + use m_global_parameters_common implicit none - #:include 'generated_decls.fpp' - !> @name Logistics !> @{ integer :: num_procs !< Number of processors @@ -48,8 +47,7 @@ module m_global_parameters integer :: m_glb, n_glb, p_glb !> @} - integer :: num_dims !< Number of spatial dimensions - integer :: num_vels !< Number of velocity components (different from num_dims for mhd) + ! num_dims, num_vels: in m_global_parameters_common !> @name Cell-boundary locations in the x-, y- and z-coordinate directions !> @{ real(wp), allocatable, dimension(:) :: x_cb, x_root_cb, y_cb, z_cb @@ -77,16 +75,11 @@ module m_global_parameters !> @name Simulation Algorithm Parameters !> @{ - integer :: sys_size !< Number of unknowns in the system of equations - logical :: elasticity !< elasticity modeling, true for hyper or hypo - integer :: b_size !< Number of components in the b tensor - integer :: tensor_size !< Number of components in the nonsymmetric tensor - logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling + ! sys_size, elasticity, b_size, tensor_size, chemistry, eqn_idx: in m_global_parameters_common !> @} !> @name Annotations of the structure, i.e. the organization, of the state vectors !> @{ - type(eqn_idx_info) :: eqn_idx !< All conserved-variable equation index ranges and scalars. type(qbmm_idx_info) :: qbmm_idx !< QBMM moment index mappings. integer :: beta_idx !< Index of lagrange bubbles beta !> @} @@ -102,12 +95,9 @@ module m_global_parameters type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions - integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) + ! shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices: in m_global_parameters_common integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid #ifdef MFC_MPI type(mpi_io_var), public :: MPI_IO_DATA type(mpi_io_ib_var), public :: MPI_IO_IB_DATA @@ -375,161 +365,51 @@ contains if (n == 0) m_root = m_glb - ! Gamma/Pi_inf Model - if (model_eqns == model_eqns_gamma_law) then - ! Setting number of fluids - num_fluids = 1 - - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the gamma/pi_inf model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = eqn_idx%cont%beg - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg + 1 - eqn_idx%gamma = eqn_idx%adv%beg - eqn_idx%pi_inf = eqn_idx%adv%end - sys_size = eqn_idx%adv%end - - ! Volume Fraction Model (5-equation model) - else if (model_eqns == model_eqns_5eq) then - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the volume fraction model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - - if (igr) then - ! Volume fractions are stored in the indices immediately following the energy equation. IGR tracks a total of (N-1) - ! volume fractions for N fluids, hence the "-1" in eqn_idx%adv%end. If num_fluids = 1 then eqn_idx%adv%end < - ! eqn_idx%adv%beg, which skips all loops over the volume fractions since there is no volume fraction to track - eqn_idx%adv%beg = eqn_idx%E + 1 ! Alpha for fluid 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - 1 - else - ! Volume fractions are stored in the indices immediately following the energy equation. WENO/MUSCL + Riemann tracks - ! a total of (N) volume fractions for N fluids, hence the lack of "-1" in eqn_idx%adv%end - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - end if + ! Gamma/Pi_inf: force num_fluids=1 (post_process-specific side effect of the gamma-law model) + if (model_eqns == model_eqns_gamma_law) num_fluids = 1 - sys_size = eqn_idx%adv%end + ! post_process sets nmom to 6 for qbmm before the shared eqn_idx setup + if (qbmm) nmom = 6 - if (bubbles_euler) then - eqn_idx%alf = eqn_idx%adv%end - else - eqn_idx%alf = 1 - end if + ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) + call s_initialize_eqn_idx(nmom) - if (qbmm) then - nmom = 6 - end if + ! post-only: 6eq alf is a dummy (no void fraction in 6eq) + if (model_eqns == model_eqns_6eq) eqn_idx%alf = 1 - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - if (qbmm) then - eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom - else - if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb - else - eqn_idx%bub%end = sys_size + 2*nb - end if - end if - sys_size = eqn_idx%bub%end - - if (adv_n) then - eqn_idx%n = eqn_idx%bub%end + 1 - sys_size = eqn_idx%n - end if - - allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - - if (qbmm) then - allocate (qbmm_idx%moms(nb, nmom)) - do i = 1, nb - do j = 1, nmom - qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom - end do - qbmm_idx%rs(i) = qbmm_idx%moms(i, 2) - qbmm_idx%vs(i) = qbmm_idx%moms(i, 3) - end do - else - do i = 1, nb - if (polytropic .neqv. .true.) then - fac = 4 - else - fac = 2 - end if - - qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac - qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - - if (polytropic .neqv. .true.) then - qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 - qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 - end if - end do - end if - end if - - if (bubbles_lagrange) then - beta_idx = sys_size + 1 - sys_size = beta_idx - end if + ! post-only: set default indices for disabled fields (used by post-processing consumers) + if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then + if (.not. cont_damage) eqn_idx%damage = dflt_int + if (.not. hyper_cleaning) eqn_idx%psi = dflt_int + end if - if (mhd) then - eqn_idx%B%beg = sys_size + 1 - if (n == 0) then - eqn_idx%B%end = sys_size + 2 ! 1D: By, Bz - else - eqn_idx%B%end = sys_size + 3 ! 2D/3D: Bx, By, Bz - end if - sys_size = eqn_idx%B%end - end if + ! post-only: species defaults when chemistry is off + if (.not. chemistry) then + eqn_idx%species%beg = 1 + eqn_idx%species%end = 1 + end if - ! Volume Fraction Model (6-equation model) - else if (model_eqns == model_eqns_6eq) then - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the volume fraction model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - eqn_idx%int_en%beg = eqn_idx%adv%end + 1 - eqn_idx%int_en%end = eqn_idx%adv%end + num_fluids - sys_size = eqn_idx%int_en%end - eqn_idx%alf = 1 ! dummy, cannot actually have a void fraction - else if (model_eqns == model_eqns_4eq) then - eqn_idx%cont%beg = 1 ! one continuity equation - eqn_idx%cont%end = 1 ! num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 ! one momentum equation in each - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 ! one energy equation - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg ! one volume advection equation - eqn_idx%alf = eqn_idx%adv%end - sys_size = eqn_idx%alf ! eqn_idx%adv%end - - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - eqn_idx%bub%end = sys_size + 2*nb - if (polytropic .neqv. .true.) then - eqn_idx%bub%end = sys_size + 4*nb - end if - sys_size = eqn_idx%bub%end + ! Per-target (post_process): beta_idx for bubbles_lagrange (5eq only, after main eqn_idx setup) + if (model_eqns == model_eqns_5eq .and. bubbles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if - allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - allocate (weight(nb), R0(nb)) + ! Per-target (post_process): qbmm_idx allocations and fills + if (model_eqns == model_eqns_5eq .and. bubbles_euler) then + allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) + if (qbmm) then + allocate (qbmm_idx%moms(nb, nmom)) + do i = 1, nb + do j = 1, nmom + qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom + end do + qbmm_idx%rs(i) = qbmm_idx%moms(i, 2) + qbmm_idx%vs(i) = qbmm_idx%moms(i, 3) + end do + else do i = 1, nb if (polytropic .neqv. .true.) then fac = 4 @@ -545,89 +425,43 @@ contains qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 end if end do - - if (nb == 1) then - weight(:) = 1._wp - R0(:) = 1._wp - else if (nb < 1) then - stop 'Invalid value of nb' - end if - - if (polytropic) then - rhoref = 1._wp - pref = 1._wp - end if end if end if - if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then - if (hypoelasticity .or. hyperelasticity) then - elasticity = .true. - eqn_idx%stress%beg = sys_size + 1 - eqn_idx%stress%end = sys_size + (num_dims*(num_dims + 1))/2 - if (cyl_coord) eqn_idx%stress%end = eqn_idx%stress%end + 1 - ! number of stresses is 1 in 1D, 3 in 2D, 4 in 2D-Axisym, 6 in 3D - sys_size = eqn_idx%stress%end - - ! shear stress index is 2 for 2D and 2,4,5 for 3D - if (num_dims == 1) then - shear_num = 0 - else if (num_dims == 2) then - shear_num = 1 - shear_indices(1) = eqn_idx%stress%beg - 1 + 2 - shear_BC_flip_num = 1 - shear_BC_flip_indices(1:2,1) = shear_indices(1) - ! Both x-dir and y-dir: flip tau_xy only - else if (num_dims == 3) then - shear_num = 3 - shear_indices(1:3) = eqn_idx%stress%beg - 1 + (/2, 4, 5/) - shear_BC_flip_num = 2 - shear_BC_flip_indices(1,1:2) = shear_indices((/1, 2/)) - shear_BC_flip_indices(2,1:2) = shear_indices((/1, 3/)) - shear_BC_flip_indices(3,1:2) = shear_indices((/2, 3/)) - ! x-dir: flip tau_xy and tau_xz y-dir: flip tau_xy and tau_yz z-dir: flip tau_xz and tau_yz + if (model_eqns == model_eqns_4eq .and. bubbles_euler) then + allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) + allocate (weight(nb), R0(nb)) + + do i = 1, nb + if (polytropic .neqv. .true.) then + fac = 4 + else + fac = 2 end if - end if - if (hyperelasticity) then - eqn_idx%xi%beg = sys_size + 1 - eqn_idx%xi%end = sys_size + num_dims - ! adding three more equations for the \xi field and the elastic energy - sys_size = eqn_idx%xi%end + 1 - ! number of entries in the symmetric btensor plus the jacobian - b_size = (num_dims*(num_dims + 1))/2 + 1 - tensor_size = num_dims**2 + 1 - end if + qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac + qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - if (surface_tension) then - eqn_idx%c = sys_size + 1 - sys_size = eqn_idx%c - end if + if (polytropic .neqv. .true.) then + qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 + qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 + end if + end do - if (cont_damage) then - eqn_idx%damage = sys_size + 1 - sys_size = eqn_idx%damage - else - eqn_idx%damage = dflt_int + if (nb == 1) then + weight(:) = 1._wp + R0(:) = 1._wp + else if (nb < 1) then + stop 'Invalid value of nb' end if - if (hyper_cleaning) then - eqn_idx%psi = sys_size + 1 - sys_size = eqn_idx%psi - else - eqn_idx%psi = dflt_int + if (polytropic) then + rhoref = 1._wp + pref = 1._wp end if end if - if (chemistry) then - eqn_idx%species%beg = sys_size + 1 - eqn_idx%species%end = sys_size + num_species - sys_size = eqn_idx%species%end - else - eqn_idx%species%beg = 1 - eqn_idx%species%end = 1 - end if - if (output_partial_domain) then x_output_idx%beg = 0 x_output_idx%end = 0 diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index a5829e5348..b6259ee493 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -13,13 +13,11 @@ module m_global_parameters use m_derived_types ! Definitions of the derived types use m_helper_basic ! Functions to compare floating point numbers - use m_thermochem, only: num_species - use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, recon_type_muscl + ! Shared state: generated_decls, sys_size, eqn_idx, b_size, tensor_size, chemistry, elasticity, shear_* + use m_global_parameters_common implicit none - #:include 'generated_decls.fpp' - ! Logistics integer :: num_procs !< Number of processors logical :: non_axis_sym !< Use existing IC data @@ -33,8 +31,6 @@ module m_global_parameters type(cell_num_bounds) :: cells_bounds integer(kind=8) :: nGlobal !< Global number of cells in the domain integer :: m_glb, n_glb, p_glb !< Global number of cells in each direction - integer :: num_dims !< Number of spatial dimensions - integer :: num_vels !< Number of velocity components (different from num_dims for mhd) integer :: grid_geometry !< Cylindrical coordinates (either axisymmetric or full 3D) !> Locations of cell-centers (cc) in x-, y- and z-directions, respectively real(wp), allocatable, dimension(:) :: x_cc, y_cc, z_cc @@ -44,26 +40,16 @@ module m_global_parameters type(bounds_info) :: x_domain, y_domain, z_domain !< Locations of the domain bounds in the x-, y- and z-coordinate directions ! Simulation Algorithm Parameters - integer :: sys_size !< Number of unknowns in the system of equations - integer :: weno_polyn !< Degree of the WENO polynomials (polyn) - integer :: muscl_polyn !< Degree of the MUSCL polynomials (polyn) - logical :: elasticity !< elasticity modeling, true for hyper or hypo - integer :: b_size !< Number of components in the b tensor - integer :: tensor_size !< Number of components in the nonsymmetric tensor - logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling + ! sys_size, eqn_idx, b_size, tensor_size, chemistry, elasticity, shear_*: in m_global_parameters_common + ! weno_polyn, muscl_polyn, num_dims, num_vels: in m_global_parameters_common ! Annotations of the structure, i.e. the organization, of the state vectors - type(eqn_idx_info) :: eqn_idx !< All conserved-variable equation index ranges and scalars. type(qbmm_idx_info) :: qbmm_idx !< QBMM moment index mappings. ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). Stands for "InDices With BUFFer". type(int_bounds_info) :: idwint(1:3) ! Cell indices (InDices With BUFFer): includes buffer except in pre_process - type(int_bounds_info) :: idwbuff(1:3) - type(int_bounds_info) :: bc_x, bc_y, bc_z !< Boundary conditions in the x-, y- and z-coordinate directions - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions - integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) + type(int_bounds_info) :: idwbuff(1:3) + type(int_bounds_info) :: bc_x, bc_y, bc_z !< Boundary conditions in the x-, y- and z-coordinate directions ! simplex_params: auto-generated in generated_decls.fpp ! fluid_rho (perturbs surrounding-air density to break grid symmetry): auto-generated in generated_decls.fpp @@ -456,165 +442,37 @@ contains muscl_polyn = muscl_order end if - ! Determining the layout of the state vectors and overall size of the system of equations, given the dimensionality and - ! choice of the equations of motion - - ! Gamma/Pi_inf Model - if (model_eqns == model_eqns_gamma_law) then - ! Setting number of fluids - num_fluids = 1 - - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the gamma/pi_inf model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = eqn_idx%cont%beg - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg + 1 - eqn_idx%gamma = eqn_idx%adv%beg - eqn_idx%pi_inf = eqn_idx%adv%end - sys_size = eqn_idx%adv%end - - ! Volume Fraction Model (5-equation model) - else if (model_eqns == model_eqns_5eq) then - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the volume fraction model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - - if (igr) then - ! Volume fractions are stored in the indices immediately following the energy equation. IGR tracks a total of (N-1) - ! volume fractions for N fluids, hence the "-1" in eqn_idx%adv%end. If num_fluids = 1 then eqn_idx%adv%end < - ! eqn_idx%adv%beg, which skips all loops over the volume fractions since there is no volume fraction to track - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - 1 - else - ! Volume fractions are stored in the indices immediately following the energy equation. WENO/MUSCL + Riemann tracks - ! a total of (N) volume fractions for N fluids, hence the lack of "-1" in eqn_idx%adv%end - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - end if + ! Gamma/Pi_inf: force num_fluids=1 (pre_process-specific side effect of the gamma-law model) + if (model_eqns == model_eqns_gamma_law) num_fluids = 1 - sys_size = eqn_idx%adv%end + ! Pre-process sets nmom to 6 for qbmm before the shared eqn_idx setup + if (qbmm .and. nnode == 4) nmom = 6 - if (bubbles_euler) then - eqn_idx%alf = eqn_idx%adv%end - else - eqn_idx%alf = 1 - end if + ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) + call s_initialize_eqn_idx(nmom) - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - if (qbmm) then - if (nnode == 4) then - nmom = 6 !< Already set as a parameter - end if - eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom - else - if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb - else - eqn_idx%bub%end = sys_size + 2*nb - end if - end if - sys_size = eqn_idx%bub%end + ! Per-target (pre_process): qbmm_idx allocations and fills + if (model_eqns == model_eqns_5eq .and. bubbles_euler) then + allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - if (adv_n) then - eqn_idx%n = eqn_idx%bub%end + 1 - sys_size = eqn_idx%n - end if + if (qbmm) then + allocate (qbmm_idx%moms(nb, nmom)) + allocate (qbmm_idx%fullmom(nb,0:nmom,0:nmom)) - allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - - if (qbmm) then - allocate (qbmm_idx%moms(nb, nmom)) - allocate (qbmm_idx%fullmom(nb,0:nmom,0:nmom)) - - do i = 1, nb - do j = 1, nmom - qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom - end do - qbmm_idx%fullmom(i, 0, 0) = qbmm_idx%moms(i, 1) - qbmm_idx%fullmom(i, 1, 0) = qbmm_idx%moms(i, 2) - qbmm_idx%fullmom(i, 0, 1) = qbmm_idx%moms(i, 3) - qbmm_idx%fullmom(i, 2, 0) = qbmm_idx%moms(i, 4) - qbmm_idx%fullmom(i, 1, 1) = qbmm_idx%moms(i, 5) - qbmm_idx%fullmom(i, 0, 2) = qbmm_idx%moms(i, 6) - qbmm_idx%rs(i) = qbmm_idx%fullmom(i, 1, 0) - end do - else - do i = 1, nb - if (.not. polytropic) then - fac = 4 - else - fac = 2 - end if - - qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac - qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - - if (.not. polytropic) then - qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 - qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 - end if + do i = 1, nb + do j = 1, nmom + qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom end do - end if - end if - - if (mhd) then - eqn_idx%B%beg = sys_size + 1 - if (n == 0) then - eqn_idx%B%end = sys_size + 2 ! 1D: By, Bz - else - eqn_idx%B%end = sys_size + 3 ! 2D/3D: Bx, By, Bz - end if - sys_size = eqn_idx%B%end - end if - - ! Volume Fraction Model (6-equation model) - else if (model_eqns == model_eqns_6eq) then - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the volume fraction model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - eqn_idx%int_en%beg = eqn_idx%adv%end + 1 - eqn_idx%int_en%end = eqn_idx%adv%end + num_fluids - sys_size = eqn_idx%int_en%end - else if (model_eqns == model_eqns_4eq) then - ! 4 equation model with subgrid bubbles_euler - eqn_idx%cont%beg = 1 ! one continuity equation - eqn_idx%cont%end = 1 ! num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 ! one momentum equation in each direction - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 ! one energy equation - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg ! one volume advection equation - eqn_idx%alf = eqn_idx%adv%end - sys_size = eqn_idx%alf ! eqn_idx%adv%end - - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - eqn_idx%bub%end = sys_size + 2*nb - if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb - end if - sys_size = eqn_idx%bub%end - - allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - allocate (weight(nb), R0(nb)) - + qbmm_idx%fullmom(i, 0, 0) = qbmm_idx%moms(i, 1) + qbmm_idx%fullmom(i, 1, 0) = qbmm_idx%moms(i, 2) + qbmm_idx%fullmom(i, 0, 1) = qbmm_idx%moms(i, 3) + qbmm_idx%fullmom(i, 2, 0) = qbmm_idx%moms(i, 4) + qbmm_idx%fullmom(i, 1, 1) = qbmm_idx%moms(i, 5) + qbmm_idx%fullmom(i, 0, 2) = qbmm_idx%moms(i, 6) + qbmm_idx%rs(i) = qbmm_idx%fullmom(i, 1, 0) + end do + else do i = 1, nb if (.not. polytropic) then fac = 4 @@ -630,82 +488,43 @@ contains qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 end if end do - - if (nb == 1) then - weight(:) = 1._wp - R0(:) = 1._wp - else if (nb < 1) then - stop 'Invalid value of nb' - end if - - if (polytropic) then - rhoref = 1._wp - pref = 1._wp - end if end if end if - if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then - if (hypoelasticity .or. hyperelasticity) then - elasticity = .true. - eqn_idx%stress%beg = sys_size + 1 - eqn_idx%stress%end = sys_size + (num_dims*(num_dims + 1))/2 - if (cyl_coord) eqn_idx%stress%end = eqn_idx%stress%end + 1 - ! number of stresses is 1 in 1D, 3 in 2D, 4 in 2D-Axisym, 6 in 3D - sys_size = eqn_idx%stress%end - - ! shear stress index is 2 for 2D and 2,4,5 for 3D - if (num_dims == 1) then - shear_num = 0 - else if (num_dims == 2) then - shear_num = 1 - shear_indices(1) = eqn_idx%stress%beg - 1 + 2 - shear_BC_flip_num = 1 - shear_BC_flip_indices(1:2,1) = shear_indices(1) - ! Both x-dir and y-dir: flip tau_xy only - else if (num_dims == 3) then - shear_num = 3 - shear_indices(1:3) = eqn_idx%stress%beg - 1 + (/2, 4, 5/) - shear_BC_flip_num = 2 - shear_BC_flip_indices(1,1:2) = shear_indices((/1, 2/)) - shear_BC_flip_indices(2,1:2) = shear_indices((/1, 3/)) - shear_BC_flip_indices(3,1:2) = shear_indices((/2, 3/)) - ! x-dir: flip tau_xy and tau_xz y-dir: flip tau_xy and tau_yz z-dir: flip tau_xz and tau_yz + if (model_eqns == model_eqns_4eq .and. bubbles_euler) then + allocate (qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + allocate (qbmm_idx%ps(nb), qbmm_idx%ms(nb)) + allocate (weight(nb), R0(nb)) + + do i = 1, nb + if (.not. polytropic) then + fac = 4 + else + fac = 2 end if - end if - if (hyperelasticity) then - ! number of entries in the symmetric btensor plus the jacobian - b_size = (num_dims*(num_dims + 1))/2 + 1 - tensor_size = num_dims**2 + 1 - eqn_idx%xi%beg = sys_size + 1 - eqn_idx%xi%end = sys_size + num_dims - ! adding three more equations for the \xi field and the elastic energy - sys_size = eqn_idx%xi%end + 1 - end if + qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac + qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - if (surface_tension) then - eqn_idx%c = sys_size + 1 - sys_size = eqn_idx%c - end if + if (.not. polytropic) then + qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 + qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 + end if + end do - if (cont_damage) then - eqn_idx%damage = sys_size + 1 - sys_size = eqn_idx%damage + if (nb == 1) then + weight(:) = 1._wp + R0(:) = 1._wp + else if (nb < 1) then + stop 'Invalid value of nb' end if - if (hyper_cleaning) then - eqn_idx%psi = sys_size + 1 - sys_size = eqn_idx%psi + if (polytropic) then + rhoref = 1._wp + pref = 1._wp end if end if - if (chemistry) then - eqn_idx%species%beg = sys_size + 1 - eqn_idx%species%end = sys_size + num_species - sys_size = eqn_idx%species%end - end if - call s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, igr_order, buff_size, idwint, idwbuff, viscous, & & bubbles_lagrange, m, n, p, num_dims, igr, ib) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index feed63d798..33b9cf5fb5 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -14,13 +14,13 @@ module m_global_parameters use m_derived_types use m_helper_basic - use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, recon_type_muscl + ! Shared state: generated_decls, generated_case_opt_decls, sys_size, eqn_idx, b_size, tensor_size, chemistry, elasticity, + ! shear_* + use m_global_parameters_common ! $:USE_GPU_MODULE() implicit none - #:include 'generated_decls.fpp' - real(wp) :: wall_time = 0 real(wp) :: wall_time_avg = 0 @@ -64,15 +64,13 @@ module m_global_parameters $:GPU_DECLARE(create='[cfl_target]') logical :: cfl_dt - ! Simulation Algorithm Parameters - #:include 'generated_case_opt_decls.fpp' + ! Simulation Algorithm Parameters generated_case_opt_decls.fpp: now in m_global_parameters_common $:GPU_DECLARE(create='[int_comp, ic_eps, ic_beta]') - integer :: hyper_model !< hyperelasticity solver algorithm - logical :: elasticity !< elasticity modeling, true for hyper or hypo - logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling - logical :: shear_stress !< Shear stresses - logical :: bulk_stress !< Bulk stresses + integer :: hyper_model !< hyperelasticity solver algorithm + ! elasticity, chemistry: in m_global_parameters_common + logical :: shear_stress !< Shear stresses + logical :: bulk_stress !< Bulk stresses logical :: bodyForces real(wp), dimension(3) :: accel_bf $:GPU_DECLARE(create='[accel_bf]') @@ -136,16 +134,8 @@ module m_global_parameters integer :: mpi_info_int !> @} - !> @name Annotations of the structure of the state and flux vectors in terms of the size and the configuration of the system of - !! equations to which they belong - !> @{ - integer :: sys_size !< Number of unknowns in system of eqns. - type(eqn_idx_info) :: eqn_idx !< All conserved-variable equation index ranges and scalars. - type(qbmm_idx_info) :: qbmm_idx !< QBMM moment index mappings (allocatable; GPU-managed separately). - integer :: b_size !< Number of elements in the symmetric b tensor, plus one - integer :: tensor_size !< Number of elements in the full tensor plus one - !> @} - $:GPU_DECLARE(create='[sys_size, eqn_idx, b_size, tensor_size]') + ! sys_size, eqn_idx, b_size, tensor_size: in m_global_parameters_common (GPU_DECLARE there too) + type(qbmm_idx_info) :: qbmm_idx !< QBMM moment index mappings (allocatable; GPU-managed separately). ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). Stands for "InDices With INTerior". type(int_bounds_info) :: idwint(1:3) @@ -198,11 +188,7 @@ module m_global_parameters integer :: buff_size !< Number of ghost cells for boundary condition storage $:GPU_DECLARE(create='[buff_size]') - integer :: shear_num !< Number of shear stress components - integer, dimension(3) :: shear_indices !< Indices of the stress components that represent shear stress - integer :: shear_BC_flip_num !< Number of shear stress components to reflect for boundary conditions - integer, dimension(3, 2) :: shear_BC_flip_indices !< Shear stress BC reflection indices (1:3, 1:shear_BC_flip_num) - $:GPU_DECLARE(create='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') + ! shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices: in m_global_parameters_common ! END: Simulation Algorithm Parameters @@ -746,168 +732,79 @@ contains Re_size = 0 Re_size_max = 0 - ! Gamma/Pi_inf Model - if (model_eqns == model_eqns_gamma_law) then - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the gamma/pi_inf model - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = eqn_idx%cont%beg - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg + 1 - eqn_idx%gamma = eqn_idx%adv%beg - eqn_idx%pi_inf = eqn_idx%adv%end - sys_size = eqn_idx%adv%end - - ! Volume Fraction Model - else - ! Annotating structure of the state and flux vectors belonging to the system of equations defined by the selected number - ! of spatial dimensions and the volume fraction model - if (model_eqns == model_eqns_5eq) then - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - - if (igr) then - ! IGR: volume fractions after energy (N-1 for N fluids; skipped when num_fluids=1) - eqn_idx%adv%beg = eqn_idx%E + 1 ! Alpha for fluid 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - 1 - else - ! Volume fractions are stored in the indices immediately following the energy equation. WENO/MUSCL + Riemann - ! tracks a total of (N) volume fractions for N fluids, hence the lack of "-1" in eqn_idx%adv%end - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - end if - - sys_size = eqn_idx%adv%end + ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) + call s_initialize_eqn_idx(nmom) - if (bubbles_euler) then - eqn_idx%alf = eqn_idx%adv%end - else - eqn_idx%alf = 1 - end if - - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - if (qbmm) then - nmomsp = 4 ! number of special moments - if (nnode == 4) then - ! nmom = 6 : It is already a parameter - nmomtot = nmom*nb - end if - eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom - else - if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb - else - eqn_idx%bub%end = sys_size + 2*nb - end if - end if - sys_size = eqn_idx%bub%end + ! sim-only: GPU update for shear state after s_initialize_eqn_idx populated it + if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then + if (hypoelasticity .or. hyperelasticity) then + $:GPU_UPDATE(device='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') + end if + end if - if (adv_n) then - eqn_idx%n = eqn_idx%bub%end + 1 - sys_size = eqn_idx%n - end if + ! Per-target (sim): nmomsp/nmomtot for qbmm, qbmm_idx alloc/fill, gam, Re_idx + if (model_eqns == model_eqns_5eq .and. bubbles_euler) then + if (qbmm) then + nmomsp = 4 ! number of special moments + if (nnode == 4) nmomtot = nmom*nb + end if - @:ALLOCATE(qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - @:ALLOCATE(qbmm_idx%ps(nb), qbmm_idx%ms(nb)) + @:ALLOCATE(qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + @:ALLOCATE(qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - gam = bub_pp%gam_g + gam = bub_pp%gam_g - if (qbmm) then - @:ALLOCATE(qbmm_idx%moms(nb, nmom)) - do i = 1, nb - do j = 1, nmom - qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom - end do - qbmm_idx%rs(i) = qbmm_idx%moms(i, 2) - qbmm_idx%vs(i) = qbmm_idx%moms(i, 3) - end do + if (qbmm) then + @:ALLOCATE(qbmm_idx%moms(nb, nmom)) + do i = 1, nb + do j = 1, nmom + qbmm_idx%moms(i, j) = eqn_idx%bub%beg + (j - 1) + (i - 1)*nmom + end do + qbmm_idx%rs(i) = qbmm_idx%moms(i, 2) + qbmm_idx%vs(i) = qbmm_idx%moms(i, 3) + end do + else + do i = 1, nb + if (.not. polytropic) then + fac = 4 else - do i = 1, nb - if (.not. polytropic) then - fac = 4 - else - fac = 2 - end if - - qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac - qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - - if (.not. polytropic) then - qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 - qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 - end if - end do + fac = 2 end if - end if - if (mhd) then - eqn_idx%B%beg = sys_size + 1 - if (n == 0) then - eqn_idx%B%end = sys_size + 2 ! 1D: By, Bz - else - eqn_idx%B%end = sys_size + 3 ! 2D/3D: Bx, By, Bz - end if - sys_size = eqn_idx%B%end - end if - else if (model_eqns == model_eqns_6eq) then - eqn_idx%cont%beg = 1 - eqn_idx%cont%end = num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%E + num_fluids - eqn_idx%alf = eqn_idx%adv%end - eqn_idx%int_en%beg = eqn_idx%adv%end + 1 - eqn_idx%int_en%end = eqn_idx%adv%end + num_fluids - sys_size = eqn_idx%int_en%end - else if (model_eqns == model_eqns_4eq) then - eqn_idx%cont%beg = 1 ! one continuity equation - eqn_idx%cont%end = 1 ! num_fluids - eqn_idx%mom%beg = eqn_idx%cont%end + 1 ! one momentum equation in each direction - eqn_idx%mom%end = eqn_idx%cont%end + num_vels - eqn_idx%E = eqn_idx%mom%end + 1 ! one energy equation - eqn_idx%adv%beg = eqn_idx%E + 1 - eqn_idx%adv%end = eqn_idx%adv%beg ! one volume advection equation - eqn_idx%alf = eqn_idx%adv%end - sys_size = eqn_idx%adv%end - - if (bubbles_euler) then - eqn_idx%bub%beg = sys_size + 1 - eqn_idx%bub%end = sys_size + 2*nb + qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac + qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 + if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb + qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 + qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 end if - sys_size = eqn_idx%bub%end + end do + end if + end if - @:ALLOCATE(qbmm_idx%rs(nb), qbmm_idx%vs(nb)) - @:ALLOCATE(qbmm_idx%ps(nb), qbmm_idx%ms(nb)) + if (model_eqns == model_eqns_4eq .and. bubbles_euler) then + @:ALLOCATE(qbmm_idx%rs(nb), qbmm_idx%vs(nb)) + @:ALLOCATE(qbmm_idx%ps(nb), qbmm_idx%ms(nb)) - do i = 1, nb - if (polytropic) then - fac = 2 - else - fac = 4 - end if + do i = 1, nb + if (polytropic) then + fac = 2 + else + fac = 4 + end if - qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac - qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 + qbmm_idx%rs(i) = eqn_idx%bub%beg + (i - 1)*fac + qbmm_idx%vs(i) = qbmm_idx%rs(i) + 1 - if (.not. polytropic) then - qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 - qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 - end if - end do + if (.not. polytropic) then + qbmm_idx%ps(i) = qbmm_idx%vs(i) + 1 + qbmm_idx%ms(i) = qbmm_idx%ps(i) + 1 end if - end if + end do + end if + ! sim-only: Re_idx (non-gamma-law models only) + if (model_eqns /= model_eqns_gamma_law) then ! Count fluids with non-negligible viscous effects (Re > 0) do i = 1, num_fluids if (fluid_pp(i)%Re(1) > 0) Re_size(1) = Re_size(1) + 1 @@ -921,8 +818,7 @@ contains $:GPU_UPDATE(device='[Re_size, Re_size_max, shear_stress, bulk_stress]') - ! Bookkeeping the indexes of any viscous fluids and any pairs of fluids whose interface will support effects of surface - ! tension + ! Bookkeeping the indexes of any viscous fluids if (viscous) then @:ALLOCATE(Re_idx(1:2, 1:Re_size_max)) @@ -966,71 +862,6 @@ contains end do $:GPU_UPDATE(device='[any_non_newtonian, is_non_newtonian, hb_tau0, hb_K, hb_nn, hb_m_arr, hb_mu_min, hb_mu_max, fluid_inv_re]') - if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then - if (hypoelasticity .or. hyperelasticity) then - elasticity = .true. - eqn_idx%stress%beg = sys_size + 1 - eqn_idx%stress%end = sys_size + (num_dims*(num_dims + 1))/2 - if (cyl_coord) eqn_idx%stress%end = eqn_idx%stress%end + 1 - ! number of stresses is 1 in 1D, 3 in 2D, 4 in 2D-Axisym, 6 in 3D - sys_size = eqn_idx%stress%end - - ! shear stress index is 2 for 2D and 2,4,5 for 3D - if (num_dims == 1) then - shear_num = 0 - else if (num_dims == 2) then - shear_num = 1 - shear_indices(1) = eqn_idx%stress%beg - 1 + 2 - shear_BC_flip_num = 1 - shear_BC_flip_indices(1:2,1) = shear_indices(1) - ! Both x-dir and y-dir: flip tau_xy only - else if (num_dims == 3) then - shear_num = 3 - shear_indices(1:3) = eqn_idx%stress%beg - 1 + (/2, 4, 5/) - shear_BC_flip_num = 2 - shear_BC_flip_indices(1,1:2) = shear_indices((/1, 2/)) - shear_BC_flip_indices(2,1:2) = shear_indices((/1, 3/)) - shear_BC_flip_indices(3,1:2) = shear_indices((/2, 3/)) - ! x-dir: flip tau_xy and tau_xz y-dir: flip tau_xy and tau_yz z-dir: flip tau_xz and tau_yz - end if - $:GPU_UPDATE(device='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') - end if - - if (hyperelasticity) then - ! number of entries in the symmetric btensor plus the jacobian - b_size = (num_dims*(num_dims + 1))/2 + 1 - ! storing the jacobian in the last entry - tensor_size = num_dims**2 + 1 - eqn_idx%xi%beg = sys_size + 1 - eqn_idx%xi%end = sys_size + num_dims - ! adding three more equations for the \xi field and the elastic energy - sys_size = eqn_idx%xi%end + 1 - end if - - if (surface_tension) then - eqn_idx%c = sys_size + 1 - sys_size = eqn_idx%c - end if - - if (cont_damage) then - eqn_idx%damage = sys_size + 1 - sys_size = eqn_idx%damage - end if - - if (hyper_cleaning) then - eqn_idx%psi = sys_size + 1 - sys_size = eqn_idx%psi - end if - end if - - ! END: Volume Fraction Model - - if (chemistry) then - eqn_idx%species%beg = sys_size + 1 - eqn_idx%species%end = sys_size + num_species - sys_size = eqn_idx%species%end - end if - if (bubbles_euler .and. qbmm .and. .not. polytropic) then allocate (MPI_IO_DATA%view(1:sys_size + 2*nb*nnode)) allocate (MPI_IO_DATA%var(1:sys_size + 2*nb*nnode)) From 1400c2b19cfa7de20a905e938ba11986e75b4e39 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 14:36:30 -0400 Subject: [PATCH 02/22] src: share parallel-io setup and finalize core across executables --- src/common/m_global_parameters_common.fpp | 78 ++++++++++++++++++++++- src/post_process/m_global_parameters.fpp | 43 ++----------- src/pre_process/m_global_parameters.fpp | 39 ++---------- src/simulation/m_global_parameters.fpp | 47 ++------------ 4 files changed, 90 insertions(+), 117 deletions(-) diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index c42a3589d1..9af4cc6043 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -17,7 +17,7 @@ module m_global_parameters_common use m_helper_basic use m_thermochem, only: num_species use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, & - & recon_type_muscl, nnode + & recon_type_muscl, nnode, name_len implicit none @@ -71,6 +71,18 @@ module m_global_parameters_common $:GPU_DECLARE(create='[sys_size, eqn_idx, b_size, tensor_size]') #endif + !> @name Processor coordinates and parallel-IO addressing (identical declaration across all three targets) + !> @{ + integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM + integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + !> @} + + !> @name MPI info for parallel IO with Lustre file systems (identical across all three targets) + !> @{ + character(len=name_len) :: mpiiofs + integer :: mpi_info_int + !> @} + contains !> Initialize equation-index state (eqn_idx, sys_size, b_size, tensor_size) from the namelist parameters. This is the shared @@ -264,4 +276,68 @@ contains end subroutine s_initialize_eqn_idx + !> Configure MPI parallel I/O settings and allocate processor coordinate arrays. Shared across all three executables; + !! num_dims/num_vels are computed here for pre/post unconditionally and for sim only when not case-optimized (in which case they + !! are compile-time parameters). Callers must have already populated n and p (grid dimensions). + impure subroutine s_initialize_parallel_io_common + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors +#endif + +#ifdef MFC_SIMULATION + ! Under case-optimization, num_dims and num_vels are compile-time parameters; skip assignment. + #:if not MFC_CASE_OPTIMIZATION + num_dims = 1 + min(1, n) + min(1, p) + + if (mhd) then + num_vels = 3 + else + num_vels = num_dims + end if + #:endif +#else + num_dims = 1 + min(1, n) + min(1, p) + + if (mhd) then + num_vels = 3 + else + num_vels = num_dims + end if +#endif + + allocate (proc_coords(1:num_dims)) + + if (parallel_io .neqv. .true.) return + +#ifdef MFC_MPI + ! Option for Lustre file system (Darter/Comet/Stampede) + write (mpiiofs, '(A)') '/lustre_' + mpiiofs = trim(mpiiofs) + call MPI_INFO_CREATE(mpi_info_int, ierr) + call MPI_INFO_SET(mpi_info_int, 'romio_ds_write', 'disable', ierr) + + ! Option for UNIX file system (Hooke/Thomson) WRITE(mpiiofs, '(A)') '/ufs_' mpiiofs = TRIM(mpiiofs) mpi_info_int = + ! MPI_INFO_NULL + + allocate (start_idx(1:num_dims)) +#endif + + end subroutine s_initialize_parallel_io_common + + !> Shared finalize core: deallocate proc_coords and start_idx. Per-target finalize routines call this first, then handle their + !! own extras (qbmm_idx, grid arrays, MPI_IO_DATA null/dealloc - those reference per-target typed variables and stay + !! per-target). + impure subroutine s_finalize_global_parameters_common + + deallocate (proc_coords) + +#ifdef MFC_MPI + if (parallel_io) then + deallocate (start_idx) + end if +#endif + + end subroutine s_finalize_global_parameters_common + end module m_global_parameters_common diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index c75ae99039..388b05172d 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -96,8 +96,7 @@ module m_global_parameters !> @} ! shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices: in m_global_parameters_common - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + ! proc_coords, start_idx, mpiiofs, mpi_info_int: in m_global_parameters_common #ifdef MFC_MPI type(mpi_io_var), public :: MPI_IO_DATA type(mpi_io_ib_var), public :: MPI_IO_IB_DATA @@ -106,12 +105,6 @@ module m_global_parameters real(wp), allocatable, dimension(:,:), public :: MPI_IO_DATA_lg_bubbles #endif - !> @name MPI info for parallel IO with Lustre file systems - !> @{ - character(LEN=name_len) :: mpiiofs - integer :: mpi_info_int - !> @} - ! fluid_pp, bub_pp: auto-generated in generated_decls.fpp real(wp), allocatable, dimension(:) :: adv !< Advection variables ! Formatted Database File(s) Structure Parameters @@ -586,34 +579,7 @@ contains !> Subroutine to initialize parallel infrastructure impure subroutine s_initialize_parallel_io -#ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors -#endif - - num_dims = 1 + min(1, n) + min(1, p) - - if (mhd) then - num_vels = 3 - else - num_vels = num_dims - end if - - allocate (proc_coords(1:num_dims)) - - if (parallel_io .neqv. .true.) return - -#ifdef MFC_MPI - ! Option for Lustre file system (Darter/Comet/Stampede) - write (mpiiofs, '(A)') '/lustre_' - mpiiofs = trim(mpiiofs) - call MPI_INFO_CREATE(mpi_info_int, ierr) - call MPI_INFO_SET(mpi_info_int, 'romio_ds_write', 'disable', ierr) - - ! Option for UNIX file system (Hooke/Thomson) WRITE(mpiiofs, '(A)') '/ufs_' mpiiofs = TRIM(mpiiofs) mpi_info_int = - ! MPI_INFO_NULL - - allocate (start_idx(1:num_dims)) -#endif + call s_initialize_parallel_io_common end subroutine s_initialize_parallel_io @@ -628,7 +594,6 @@ contains end if ! Deallocating the grid variables for the x-coordinate direction - deallocate (x_cc, x_cb, dx) ! Deallocating grid variables for the y- and z-coordinate directions @@ -643,13 +608,13 @@ contains deallocate (x_root_cb, x_root_cc) end if - deallocate (proc_coords) + ! Shared: deallocate proc_coords and start_idx + call s_finalize_global_parameters_common deallocate (adv) #ifdef MFC_MPI if (parallel_io) then - deallocate (start_idx) do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() end do diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index b6259ee493..0b0444ac85 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -53,12 +53,9 @@ module m_global_parameters ! simplex_params: auto-generated in generated_decls.fpp ! fluid_rho (perturbs surrounding-air density to break grid symmetry): auto-generated in generated_decls.fpp - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + ! proc_coords, start_idx, mpiiofs, mpi_info_int: in m_global_parameters_common #ifdef MFC_MPI type(mpi_io_var), public :: MPI_IO_DATA - character(LEN=name_len) :: mpiiofs - integer :: mpi_info_int !< MPI info for parallel IO with Lustre file systems #endif ! Initial Condition Parameters patch_icpp, patch_bc: auto-generated in generated_decls.fpp @@ -578,34 +575,7 @@ contains !> Configure MPI parallel I/O settings and allocate processor coordinate arrays. impure subroutine s_initialize_parallel_io -#ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors -#endif - - num_dims = 1 + min(1, n) + min(1, p) - - if (mhd) then - num_vels = 3 - else - num_vels = num_dims - end if - - allocate (proc_coords(1:num_dims)) - - if (parallel_io .neqv. .true.) return - -#ifdef MFC_MPI - ! Option for Lustre file system (Darter/Comet/Stampede) - write (mpiiofs, '(A)') '/lustre_' - mpiiofs = trim(mpiiofs) - call MPI_INFO_CREATE(mpi_info_int, ierr) - call MPI_INFO_SET(mpi_info_int, 'romio_ds_write', 'disable', ierr) - - ! Option for UNIX file system (Hooke/Thomson) WRITE(mpiiofs, '(A)') '/ufs_' mpiiofs = TRIM(mpiiofs) mpi_info_int = - ! MPI_INFO_NULL - - allocate (start_idx(1:num_dims)) -#endif + call s_initialize_parallel_io_common end subroutine s_initialize_parallel_io @@ -620,7 +590,6 @@ contains end if ! Deallocating grid variables for the x-direction - deallocate (x_cc, x_cb) ! Deallocating grid variables for the y- and z-directions if (n > 0) then @@ -630,11 +599,11 @@ contains end if end if - deallocate (proc_coords) + ! Shared: deallocate proc_coords and start_idx + call s_finalize_global_parameters_common #ifdef MFC_MPI if (parallel_io) then - deallocate (start_idx) do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 33b9cf5fb5..cf096f632a 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -119,8 +119,7 @@ module m_global_parameters $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z, num_gbl_ibs]') $:GPU_DECLARE(create='[down_sample]') - integer, allocatable, dimension(:) :: proc_coords !< Processor coordinates in MPI_CART_COMM - integer, allocatable, dimension(:) :: start_idx !< Starting cell-center index of local processor in global grid + ! proc_coords, start_idx, mpiiofs, mpi_info_int: in m_global_parameters_common type(mpi_io_var), public :: MPI_IO_DATA type(mpi_io_ib_var), public :: MPI_IO_IB_DATA type(mpi_io_airfoil_ib_var), public :: MPI_IO_airfoil_IB_DATA @@ -128,12 +127,6 @@ module m_global_parameters type(mpi_io_levelset_norm_var), public :: MPI_IO_levelsetnorm_DATA real(wp), allocatable, dimension(:,:), public :: MPI_IO_DATA_lag_bubbles - !> @name MPI info for parallel IO with Lustre file systems - !> @{ - character(LEN=name_len) :: mpiiofs - integer :: mpi_info_int - !> @} - ! sys_size, eqn_idx, b_size, tensor_size: in m_global_parameters_common (GPU_DECLARE there too) type(qbmm_idx_info) :: qbmm_idx !< QBMM moment index mappings (allocatable; GPU-managed separately). @@ -995,37 +988,7 @@ contains !> Initializes parallel infrastructure impure subroutine s_initialize_parallel_io -#ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors -#endif - - #:if not MFC_CASE_OPTIMIZATION - num_dims = 1 + min(1, n) + min(1, p) - - if (mhd) then - num_vels = 3 - else - num_vels = num_dims - end if - #:endif - - allocate (proc_coords(1:num_dims)) - - if (parallel_io .neqv. .true.) return - -#ifdef MFC_MPI - ! Option for Lustre file system (Darter/Comet/Stampede) - write (mpiiofs, '(A)') '/lustre_' - mpiiofs = trim(mpiiofs) - - call MPI_INFO_CREATE(mpi_info_int, ierr) - call MPI_INFO_SET(mpi_info_int, 'romio_ds_write', 'disable', ierr) - - ! Option for UNIX file system (Hooke/Thomson) WRITE(mpiiofs, '(A)') '/ufs_' mpiiofs = TRIM(mpiiofs) mpi_info_int = - ! MPI_INFO_NULL - - allocate (start_idx(1:num_dims)) -#endif + call s_initialize_parallel_io_common end subroutine s_initialize_parallel_io @@ -1054,10 +1017,10 @@ contains end if end if - deallocate (proc_coords) - if (parallel_io) then - deallocate (start_idx) + ! Shared: deallocate proc_coords and start_idx + call s_finalize_global_parameters_common + if (parallel_io) then if (bubbles_lagrange) then do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() From ec39da850a7483bd0124998811424789c95f797b Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 14:43:36 -0400 Subject: [PATCH 03/22] src: fix Task-1 hoist review findings Fypp resolves #:include at parse time, so generated_case_opt_decls.fpp is now emitted for every target (header-only stub outside simulation) and included unconditionally. Restores the dropped shear GPU_DECLARE (consumed in device kernels), restores the original nmom guard conditions in pre/post, and drops unused imports. The post-process beta_idx ordering change from the hoist is retained deliberately: it aligns post with simulation's writer layout (the parent ordering was inconsistent for bubbles_lagrange combined with mhd/elasticity/etc.). --- src/common/m_global_parameters_common.fpp | 6 ++---- src/post_process/m_global_parameters.fpp | 3 ++- src/pre_process/m_global_parameters.fpp | 3 ++- toolchain/mfc/params/generators/fortran_gen.py | 10 +++++++--- toolchain/mfc/params_tests/test_fortran_gen.py | 6 +++--- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index 9af4cc6043..bff61f3a45 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -14,10 +14,9 @@ module m_global_parameters_common #endif use m_derived_types - use m_helper_basic use m_thermochem, only: num_species use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, & - & recon_type_muscl, nnode, name_len + & recon_type_muscl, name_len implicit none @@ -26,9 +25,7 @@ module m_global_parameters_common ! Case-optimization declarations: parameters (under MFC_CASE_OPTIMIZATION) or plain variables for ! num_dims, num_vels, weno_polyn, muscl_polyn, weno_num_stencils, wenojs, igr, etc. -#ifdef MFC_SIMULATION #:include 'generated_case_opt_decls.fpp' -#endif ! For pre_process and post_process: num_dims and num_vels are declared manually here ! (sim gets them from generated_case_opt_decls.fpp above) @@ -69,6 +66,7 @@ module m_global_parameters_common #ifdef MFC_SIMULATION $:GPU_DECLARE(create='[sys_size, eqn_idx, b_size, tensor_size]') + $:GPU_DECLARE(create='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') #endif !> @name Processor coordinates and parallel-IO addressing (identical declaration across all three targets) diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 388b05172d..e33942bfc7 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -362,7 +362,8 @@ contains if (model_eqns == model_eqns_gamma_law) num_fluids = 1 ! post_process sets nmom to 6 for qbmm before the shared eqn_idx setup - if (qbmm) nmom = 6 + ! (guard matches the original site: inside the 5-equation branch) + if (model_eqns == model_eqns_5eq .and. qbmm) nmom = 6 ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) call s_initialize_eqn_idx(nmom) diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 0b0444ac85..82e706de28 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -443,7 +443,8 @@ contains if (model_eqns == model_eqns_gamma_law) num_fluids = 1 ! Pre-process sets nmom to 6 for qbmm before the shared eqn_idx setup - if (qbmm .and. nnode == 4) nmom = 6 + ! (guards match the original site: 5-equation bubbles with 4-node qbmm) + if (model_eqns == model_eqns_5eq .and. bubbles_euler .and. qbmm .and. nnode == 4) nmom = 6 ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) call s_initialize_eqn_idx(nmom) diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 0ef8efe0c6..63434d10e6 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -249,7 +249,9 @@ def get_generated_files(build_dir: Path) -> List[Tuple[Path, str]]: Paths match the cmake include directory structure: build_dir/include/{full_target}/generated_{namelist,decls,constants}.fpp - The simulation target also gets generated_case_opt_decls.fpp. + Every target gets generated_case_opt_decls.fpp: real content for simulation, + a header-only stub for the others (Fypp resolves #:include at parse time, so + the file must exist for every target even inside a dead conditional). """ result = [] for short, full in TARGETS: @@ -257,6 +259,8 @@ def get_generated_files(build_dir: Path) -> List[Tuple[Path, str]]: result.append((inc / "generated_namelist.fpp", generate_namelist_fpp(short))) result.append((inc / "generated_decls.fpp", generate_decls_fpp(short))) result.append((inc / "generated_constants.fpp", generate_constants_fpp())) - sim_inc = build_dir / "include" / "simulation" - result.append((sim_inc / "generated_case_opt_decls.fpp", generate_case_opt_decls_fpp())) + for short, full in TARGETS: + inc = build_dir / "include" / full + content = generate_case_opt_decls_fpp() if short == "sim" else _HEADER + "! (no case-optimization declarations for this target)\n" + result.append((inc / "generated_case_opt_decls.fpp", content)) return result diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py index 6f007c0ead..f5e654f13c 100644 --- a/toolchain/mfc/params_tests/test_fortran_gen.py +++ b/toolchain/mfc/params_tests/test_fortran_gen.py @@ -210,13 +210,13 @@ def test_check_target_raises_on_bad_target(): generate_decls_fpp("bad") -def test_get_generated_files_returns_ten(): +def test_get_generated_files_returns_twelve(): from pathlib import Path from mfc.params.generators.fortran_gen import get_generated_files files = get_generated_files(Path("/build")) - assert len(files) == 10 + assert len(files) == 12 paths = [str(p) for p, _ in files] assert any("pre_process/generated_namelist.fpp" in p for p in paths) assert any("simulation/generated_decls.fpp" in p for p in paths) @@ -244,7 +244,7 @@ def test_get_generated_files_includes_constants(): files = get_generated_files(Path("/tmp/x")) names = {p.name for p, _ in files} assert names == {"generated_namelist.fpp", "generated_decls.fpp", "generated_constants.fpp", "generated_case_opt_decls.fpp"} - assert len(files) == 10 + assert len(files) == 12 def test_generate_case_opt_decls_fpp(): From d50e68dbae915e315a79d124043a552c1f73dceb Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 14:53:05 -0400 Subject: [PATCH 04/22] src: pass nb into s_initialize_eqn_idx Simulation declares nb in its own case-optimization block (excluded from generated decls), so the hoisted routine cannot reference it directly; pass it as an argument like nmom. --- src/common/m_global_parameters_common.fpp | 15 ++++++++------- src/post_process/m_global_parameters.fpp | 2 +- src/pre_process/m_global_parameters.fpp | 2 +- src/simulation/m_global_parameters.fpp | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index bff61f3a45..8a92e2d87c 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -89,15 +89,16 @@ contains !! !! @param nmom_in Number of carried moments per R0 location (per-target: pre/post pass an !! integer variable; sim passes its integer parameter nmom = 6). Used only in the 5eq - !! qbmm bubble-index calculation (eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom_in). + !! qbmm bubble-index calculation (eqn_idx%bub%end = eqn_idx%adv%end + nb_in*nmom_in). !! !! Per-target callers are responsible for the following after this call: !! - qbmm_idx allocations and fills (diverge between pre vs sim/post) !! - sim-only: gam = bub_pp%gam_g, nmomsp/nmomtot, Re_idx allocation, GPU_UPDATE calls !! - post-only: beta_idx increment (bubbles_lagrange path), offset/grid allocations - impure subroutine s_initialize_eqn_idx(nmom_in) + impure subroutine s_initialize_eqn_idx(nmom_in, nb_in) integer, intent(in) :: nmom_in + integer, intent(in) :: nb_in ! Gamma/Pi_inf Model @@ -146,12 +147,12 @@ contains if (bubbles_euler) then eqn_idx%bub%beg = sys_size + 1 if (qbmm) then - eqn_idx%bub%end = eqn_idx%adv%end + nb*nmom_in + eqn_idx%bub%end = eqn_idx%adv%end + nb_in*nmom_in else if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb + eqn_idx%bub%end = sys_size + 4*nb_in else - eqn_idx%bub%end = sys_size + 2*nb + eqn_idx%bub%end = sys_size + 2*nb_in end if end if sys_size = eqn_idx%bub%end @@ -203,9 +204,9 @@ contains if (bubbles_euler) then eqn_idx%bub%beg = sys_size + 1 - eqn_idx%bub%end = sys_size + 2*nb + eqn_idx%bub%end = sys_size + 2*nb_in if (.not. polytropic) then - eqn_idx%bub%end = sys_size + 4*nb + eqn_idx%bub%end = sys_size + 4*nb_in end if sys_size = eqn_idx%bub%end end if diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index e33942bfc7..b359d50306 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -366,7 +366,7 @@ contains if (model_eqns == model_eqns_5eq .and. qbmm) nmom = 6 ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) - call s_initialize_eqn_idx(nmom) + call s_initialize_eqn_idx(nmom, nb) ! post-only: 6eq alf is a dummy (no void fraction in 6eq) if (model_eqns == model_eqns_6eq) eqn_idx%alf = 1 diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 82e706de28..2da45d4915 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -447,7 +447,7 @@ contains if (model_eqns == model_eqns_5eq .and. bubbles_euler .and. qbmm .and. nnode == 4) nmom = 6 ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) - call s_initialize_eqn_idx(nmom) + call s_initialize_eqn_idx(nmom, nb) ! Per-target (pre_process): qbmm_idx allocations and fills if (model_eqns == model_eqns_5eq .and. bubbles_euler) then diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index cf096f632a..7045c4c302 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -726,7 +726,7 @@ contains Re_size_max = 0 ! Populate eqn_idx, sys_size, b_size, tensor_size, elasticity, shear_* (shared logic) - call s_initialize_eqn_idx(nmom) + call s_initialize_eqn_idx(nmom, nb) ! sim-only: GPU update for shear state after s_initialize_eqn_idx populated it if (model_eqns == model_eqns_5eq .or. model_eqns == model_eqns_6eq) then From 33e1d9ccf4f75a6e947212ae579d71cd52268655 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 15:26:01 -0400 Subject: [PATCH 05/22] codegen: generate MPI broadcast lists from the parameter registry Drive s_mpi_bcast_user_inputs in all three m_mpi_proxy.fpp files from generate_bcast_fpp(target) in fortran_gen.py. The generator emits case_dir, class-(a) scalars (INT/LOG/REAL), FORTRAN_ARRAY_DIMS arrays, and the fluid_pp / bub_pp / lag_params / chem_params struct-array loops. Manual residues (bc_x/y/z members, domain bounds, m/n/p_glb, patch loops, etc.) stay in each file. get_generated_files() grows from 12 to 15 entries. Latent bug fixed: chem_wrt_Y (post, FORTRAN_ARRAY_DIMS dim=num_species) was namelist-bound but never broadcast; consumed by s_save_data on all ranks. Registry-driven generation closes it by construction. Tuple-set equivalence: pre +n_start_old (dead param, harmless), post +chem_wrt_Y only, sim exact identity. --- src/post_process/m_mpi_proxy.fpp | 91 ++---- src/pre_process/m_mpi_proxy.fpp | 72 ++--- src/simulation/m_mpi_proxy.fpp | 149 +++------- .../mfc/params/generators/fortran_gen.py | 279 +++++++++++++++++- .../mfc/params_tests/test_fortran_gen.py | 249 +++++++++++++++- 5 files changed, 603 insertions(+), 237 deletions(-) diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 1636539d7a..711aff1904 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -65,88 +65,37 @@ contains #ifdef MFC_MPI integer :: i !< Generic loop iterator integer :: ierr !< Generic flag used to identify and report MPI errors - ! Logistics - call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + ! Generated: case_dir, namelist scalars (INT/LOG/REAL), array dims (including + ! chem_wrt_Y - latent bug fix), fluid_pp loop, bub_pp guard + #:include 'generated_bcast.fpp' - #:for VAR in [ 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & - & 't_step_start', 't_step_stop', 't_step_save', 'weno_order', & - & 'model_eqns', 'num_fluids', 'bc_x%beg', 'bc_x%end', 'bc_y%beg', & - & 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'flux_lim', 'format', & - & 'precision', 'fd_order', 'thermal', 'nb', 'relax_model', & - & 'n_start', 'num_ibs', 'muscl_order' ] + ! manual: m_glb, n_glb, p_glb (computed in s_read_input_file, not namelist-bound) + call MPI_BCAST(m_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(n_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(p_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! manual: bc_x/y/z member broadcasts (struct members not in NAMELIST_VARS) + #:for VAR in [ 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor - #:for VAR in [ 'cyl_coord', 'mpp_lim', 'mixture_err', & - & 'alt_soundspeed', 'hypoelasticity', 'mhd', 'parallel_io', & - & 'rho_wrt', 'E_wrt', 'pres_wrt', 'gamma_wrt', 'sim_data', & - & 'heat_ratio_wrt', 'pi_inf_wrt', 'pres_inf_wrt', 'cons_vars_wrt', & - & 'prim_vars_wrt', 'c_wrt', 'qm_wrt','schlieren_wrt','chem_wrt_T', & - & 'bubbles_euler', 'qbmm', 'polytropic', 'polydisperse', & - & 'file_per_process', 'relax', 'cf_wrt', 'igr', 'liutex_wrt', & - & 'bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & - & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out',& - & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & - & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & - & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & - & 'down_sample','fft_wrt', 'hyper_cleaning', 'ib_state_wrt'] + #:for VAR in [ 'bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & + & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out'] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - if (bubbles_lagrange) then - #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & - & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & - & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & - & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & - & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ] - call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - - call MPI_BCAST(flux_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(omega_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mom_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(vel_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(alpha_rho_wrt(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(alpha_rho_e_wrt(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(alpha_wrt(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - - do i = 1, num_fluids_max - call MPI_BCAST(fluid_pp(i)%gamma, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%pi_inf, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%cv, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%qv, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%qvp, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%G, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%K, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%nn, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%tau0, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%hb_m, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%mu_min, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%mu_max, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%mu_bulk, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - end do - - ! Subgrid bubble parameters - if (bubbles_euler .or. bubbles_lagrange) then - #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & - 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g', & - 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] - call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if + ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence) + call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:for VAR in [ 'pref', 'rhoref', 'R0ref', 'poly_sigma', 'Web', 'Ca', & - & 'Re_inv', 'Bx0', 'sigma', 't_save', 't_stop', & - & 'x_output%beg', 'x_output%end', 'y_output%beg', & - & 'y_output%end', 'z_output%beg', 'z_output%end', & - & 'bc_x%Twall_in', 'bc_x%Twall_out', 'bc_y%Twall_in',& - & 'bc_y%Twall_out','bc_z%Twall_in', 'bc_z%Twall_out' ] + ! manual: output domain and Twall bc members (struct members not in NAMELIST_VARS) + #:for VAR in [ 'x_output%beg', 'x_output%end', 'y_output%beg', & + & 'y_output%end', 'z_output%beg', 'z_output%end', & + & 'bc_x%Twall_in', 'bc_x%Twall_out', 'bc_y%Twall_in', & + & 'bc_y%Twall_out','bc_z%Twall_in', 'bc_z%Twall_out'] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor - call MPI_BCAST(schlieren_alpha(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_bcast_user_inputs diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index 6629b2feea..bf6e3d1180 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -25,46 +25,35 @@ contains integer :: i, j integer :: ierr - call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) - - #:for VAR in ['t_step_old', 't_step_start', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & - & 'loops_x', 'loops_y', 'loops_z', 'model_eqns', 'num_fluids', & - & 'weno_order', 'precision', 'perturb_flow_fluid', & - & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',& - & 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', & - & 'num_bc_patches', 'mixlayer_perturb_nk', 'recon_type', & - & 'muscl_order', 'igr_order' ] - call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor + ! Generated: case_dir, namelist scalars (INT/LOG/REAL), fluid_rho, fluid_pp loop, bub_pp + #:include 'generated_bcast.fpp' + + ! manual: m/n/p_glb (computed from m/n/p post-read, not namelist-bound) + call MPI_BCAST(m_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(n_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(p_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:for VAR in [ 'old_grid','old_ic','stretch_x','stretch_y','stretch_z', & - & 'cyl_coord','mpp_lim','hypoelasticity', 'relax', 'parallel_io', & - & 'perturb_flow', 'perturb_sph', 'mixlayer_vel_profile', & - & 'mixlayer_perturb', 'bubbles_euler', 'polytropic', 'polydisperse', & - & 'qbmm', 'file_per_process', 'adv_n', 'ib' , 'cfl_adap_dt', & - & 'cfl_const_dt', 'cfl_dt', 'surface_tension', & - & 'bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & - & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out',& - & 'hyperelasticity', 'pre_stress', 'elliptic_smoothing', 'viscous', & - & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage', & - & 'igr', 'down_sample', 'simplex_perturb','fft_wrt', 'hyper_cleaning' ] + ! manual: bc_x/y/z member broadcasts (struct members not in NAMELIST_VARS) + #:for VAR in ['bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & + & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out'] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - call MPI_BCAST(fluid_rho(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr) + ! manual: bc_x/y/z and domain bounds (registered as struct members, not in NAMELIST_VARS) #:for VAR in [ 'x_domain%beg', 'x_domain%end', 'y_domain%beg', & - & 'y_domain%end', 'z_domain%beg', 'z_domain%end', 'a_x', 'a_y', & - & 'a_z', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', 'z_b', 'bc_x%beg', & - & 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', & - & 'perturb_flow_mag', 'pref', 'rhoref', 'poly_sigma', 'R0ref', & - & 'Web', 'Ca', 'Re_inv', 'sigR', 'sigV', 'rhoRV', 'palpha_eps', & - & 'ptgalpha_eps', 'sigma', 'pi_fac', 'mixlayer_vel_coef', 'Bx0', & - & 'mixlayer_perturb_k0', 'bc_x%Twall_in', 'bc_x%Twall_out', & + & 'y_domain%end', 'z_domain%beg', 'z_domain%end', & + & 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', & + & 'bc_z%beg', 'bc_z%end', 'bc_x%Twall_in', 'bc_x%Twall_out', & & 'bc_y%Twall_in', 'bc_y%Twall_out', 'bc_z%Twall_in', & & 'bc_z%Twall_out'] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor + ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence) + call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + + ! manual: patch_bc (complex array members broadcast with size()) do i = 1, num_bc_patches_max #:for VAR in ['geometry', 'type', 'dir', 'loc'] call MPI_BCAST(patch_bc(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) @@ -77,6 +66,7 @@ contains #:endfor end do + ! manual: patch_icpp (complex members: alter_patch, sph_har_coeff, size() arrays) do i = 1, num_patches_max #:for VAR in [ 'geometry', 'smooth_patch_id'] call MPI_BCAST(patch_icpp(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) @@ -115,6 +105,7 @@ contains end if end do + ! manual: patch_ib (per-target member subset; pre uses size() for vel/angular_vel/angles) do i = 1, num_ibs #:for VAR in ['vel', 'angular_vel', 'angles'] call MPI_BCAST(patch_ib(i)%${VAR}$, size(patch_ib(i)%${VAR}$), mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -130,14 +121,14 @@ contains call MPI_BCAST(patch_ib(i)%slip, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) end do + ! manual: ib_airfoil (kept manual alongside patch_ib) do i = 1, num_ib_airfoils_max #:for VAR in ['c', 'p', 't', 'm'] call MPI_BCAST(ib_airfoil(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor end do - call MPI_BCAST(num_stl_models, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - + ! manual: stl_models loop (num_stl_models scalar is generated; grouped array members) do i = 1, num_stl_models_max call MPI_BCAST(stl_models(i)%model_filepath, len(stl_models(i)%model_filepath), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(stl_models(i)%model_threshold, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -146,13 +137,8 @@ contains #:endfor end do - ! Simplex noise and fluid physical parameters + ! manual: simplex_params density perturbation (nested i-j loops; irregular structure) do i = 1, num_fluids_max - #:for VAR in [ 'gamma','pi_inf', 'G', 'cv', 'qv', 'qvp', 'K', 'nn', 'tau0', 'hb_m', 'mu_min', 'mu_max', 'mu_bulk' ] - call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(simplex_params%perturb_dens(i), 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(simplex_params%perturb_dens_freq(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(simplex_params%perturb_dens_scale(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -162,15 +148,7 @@ contains end do end do - ! Subgrid bubble parameters - if (bubbles_euler .or. bubbles_lagrange) then - #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & - 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g', & - 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] - call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - + ! manual: simplex_params velocity perturbation do i = 1, 3 call MPI_BCAST(simplex_params%perturb_vel(i), 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(simplex_params%perturb_vel_freq(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index b51dd1f970..8a25872423 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -61,85 +61,37 @@ contains integer :: i, j !< Generic loop iterator integer :: ierr !< Generic flag used to identify and report MPI errors - call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + ! Generated: case_dir, namelist scalars (INT/LOG/REAL), CASE_OPT guard, fluid_pp loop, + ! bub_pp guard, lag_params guard, chem_params guard + #:include 'generated_bcast.fpp' - #:for VAR in ['k_x', 'k_y', 'k_z', 'w_x', 'w_y', 'w_z', 'p_x', 'p_y', & - & 'p_z', 'g_x', 'g_y', 'g_z'] - call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor + ! manual: m_glb, n_glb, p_glb (computed in s_read_input_file, not namelist-bound) + call MPI_BCAST(m_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(n_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(p_glb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:for VAR in ['t_step_old', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & - & 't_step_start','t_step_stop','t_step_save','t_step_print', & - & 'model_eqns','time_stepper', 'riemann_solver', 'low_Mach', & - & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', & - & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & - & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & - & 'num_source', 'relax_model', 'num_ibs', 'num_particle_clouds', 'n_start', & - & 'num_bc_patches', 'num_igr_iters', 'num_igr_warm_start_iters', & - & 'adap_dt_max_iters', 'collision_model', 'ib_neighborhood_radius', & - & 'int_comp' ] + ! manual: bc_x/y/z member broadcasts (struct members not in NAMELIST_VARS) + #:for VAR in [ 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor - #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', & - & 'mp_weno', 'rdma_mpi', 'cont_damage', 'bc_io', & - & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', & - & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', & - & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', & - & 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax', & - & 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z', & - & 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out', & - & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out', & - & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & - & 'bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & - & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out', & - & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & - & 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'down_sample', 'fft_wrt', & - & 'hyper_cleaning', 'ib_state_wrt'] + #:for VAR in [ 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out', & + & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out', & + & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & + & 'bc_x%isothermal_in', 'bc_y%isothermal_in', 'bc_z%isothermal_in', & + & 'bc_x%isothermal_out', 'bc_y%isothermal_out', 'bc_z%isothermal_out'] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - if (chemistry) then - #:for VAR in [ 'diffusion', 'reactions' ] - call MPI_BCAST(chem_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'gamma_method', 'transport_model' ] - call MPI_BCAST(chem_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - - if (bubbles_lagrange) then - #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats'] - call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb'] - call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in ['epsilonb','charwidth','valmaxvoid'] - call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - - #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & - & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac', & - & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve3', & - & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & - & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & - & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & - & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & - & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & - & 'bc_x%Twall_in', 'bc_x%Twall_out', 'bc_y%Twall_in', 'bc_y%Twall_out', & - & 'bc_z%Twall_in', 'bc_z%Twall_out', & - & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & - & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', & - & 'ic_eps', 'ic_beta', 'hyper_cleaning_speed', & - & 'hyper_cleaning_tau', 'coefficient_of_restitution', 'collision_time', & - & 'ib_coefficient_of_friction' ] + #:for VAR in [ 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve3', & + & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & + & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & + & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', & + & 'bc_z%pres_in','bc_z%pres_out', & + & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & + & 'z_domain%beg', 'z_domain%end', & + & 'bc_x%Twall_in', 'bc_x%Twall_out', 'bc_y%Twall_in', 'bc_y%Twall_out', & + & 'bc_z%Twall_in', 'bc_z%Twall_out'] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -150,42 +102,17 @@ contains #:endfor end do - #:if not MFC_CASE_OPTIMIZATION - call MPI_BCAST(mapped_weno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(wenoz, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(teno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(nb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(num_fluids, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(wenoz_q, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mhd, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(relativity, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(igr, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(igr_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(igr_pres_lim, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(igr_iter_solver, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(viscous, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(recon_type, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(muscl_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(muscl_lim, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endif + ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence) + call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - do i = 1, num_fluids_max - #:for VAR in [ 'gamma','pi_inf','G','cv','qv','qvp','K','nn','tau0','hb_m','mu_min','mu_max','mu_bulk' ] - call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr) - end do - - if (bubbles_euler .or. bubbles_lagrange) then - #:for VAR in [ 'R0ref','p0ref','rho0ref','T0ref', & - 'ss','pv','vd','mu_l','mu_v','mu_g','gam_v','gam_g',& - 'M_v','M_g','k_v','k_g','cp_v','cp_g','R_v','R_g'] - call MPI_BCAST(bub_pp%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if + ! manual: shear_stress, bulk_stress (derived from Re_size post-init on all ranks), + ! bodyForces (derived from bf_x/y/z) + call MPI_BCAST(shear_stress, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bulk_stress, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bodyForces, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + ! manual: bc_x per-fluid inflow arrays (loop over num_fluids_max) do i = 1, num_fluids_max #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in', & & 'bc_z%alpha_in'] @@ -193,6 +120,7 @@ contains #:endfor end do + ! manual: patch_ib (sim member subset differs from pre; uses count=3, adds mass/moving_ibm) do i = 1, num_ibs #:for VAR in [ 'radius', 'length_x', 'length_y', 'length_z', & & 'x_centroid', 'y_centroid', 'z_centroid', 'slip', 'mass'] @@ -207,14 +135,14 @@ contains call MPI_BCAST(patch_ib(i)%model_id, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) end do + ! manual: ib_airfoil (kept manual alongside patch_ib) do i = 1, num_ib_airfoils_max #:for VAR in ['c', 'p', 't', 'm'] call MPI_BCAST(ib_airfoil(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor end do - call MPI_BCAST(num_stl_models, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - + ! manual: stl_models loop (num_stl_models scalar is generated; grouped array members) do i = 1, num_stl_models_max call MPI_BCAST(stl_models(i)%model_filepath, len(stl_models(i)%model_filepath), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(stl_models(i)%model_threshold, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -223,6 +151,7 @@ contains #:endfor end do + ! manual: particle_cloud (runtime loop to num_particle_clouds; irregular member subset) do i = 1, num_particle_clouds #:for VAR in ['x_centroid', 'y_centroid', 'z_centroid', 'length_x', 'length_y', 'length_z', & & 'radius', 'mass', 'min_spacing'] @@ -234,6 +163,7 @@ contains call MPI_BCAST(particle_cloud(i)%packing_method, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) end do + ! manual: acoustic/probe/integral (combined loop; complex acoustic member set) do j = 1, num_probes_max do i = 1, 3 call MPI_BCAST(acoustic(j)%loc(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) @@ -261,11 +191,6 @@ contains call MPI_BCAST(integral(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor end do - - ! NVIDIA UVM variables - call MPI_BCAST(nv_uvm_out_of_core, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #endif end subroutine s_mpi_bcast_user_inputs diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 63434d10e6..41e6dc4cc8 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -229,6 +229,277 @@ def generate_case_opt_decls_fpp() -> str: return "\n".join(parts) + "\n" +# Struct roots in NAMELIST_VARS whose member-level broadcasts are irregular +# (per-target member subsets, grouped array members, nested loops, etc.). +# These are kept in the manual residue of m_mpi_proxy.fpp. +_STRUCT_ROOTS = frozenset({"bc_x", "bc_y", "bc_z", "x_domain", "y_domain", "z_domain", "x_output", "y_output", "z_output"}) + +# Variables excluded from broadcast generation (derived post-broadcast or non-namelist). +_BCAST_EXCLUDE = frozenset({"muscl_eps"}) + +# Post-process scalars that are namelist-bound but consumed on rank 0 only (reading/init). +# Broadcasting them would be harmless but changes the existing call set, which we preserve. +_POST_BCAST_EXCLUDE = frozenset({"avg_state", "cfl_target", "igr_order", "num_bc_patches", "recon_type", "sigR"}) + +# TYPED_DECLS entries kept entirely in the manual residue: their member-broadcast structure +# is irregular (non-uniform subsets, size() arrays, nested loops, complex guards). +_MANUAL_TYPED_DECLS = frozenset( + { + "patch_icpp", # complex array members (alter_patch, sph_har_coeff, size() members) + "patch_bc", # kept manual in pre (generated separately would diverge) + "patch_ib", # per-target member subsets differ (pre: size(); sim: count=3; slip absent in sim) + "ib_airfoil", # grouped array members (ib_airfoil attrs are all plain scalars but kept manual) + "stl_models", # grouped array members (model_translate/scale emitted as count=3, not per-index) + "particle_cloud", # sim-only runtime loop; kept manual alongside patch_ib + "simplex_params", # nested i×j loops + "acoustic", # complex combined loop with probe/integral in existing code + "probe", # combined with acoustic/integral + "integral", # combined with acoustic/probe + } +) + + +def _mpi_type_for(ptype: ParamType) -> str: + """Return the Fortran MPI type constant for a ParamType.""" + if ptype in _REAL_TYPES: + return "mpi_p" + if ptype in (ParamType.INT, ParamType.ANALYTIC_INT): + return "MPI_INTEGER" + if ptype == ParamType.LOG: + return "MPI_LOGICAL" + if ptype == ParamType.STR: + return "MPI_CHARACTER" + raise ValueError(f"No MPI type mapping for {ptype!r}") + + +def _bcast_scalar(name: str, mpi_type: str, count: str = "1") -> str: + return f" call MPI_BCAST({name}, {count}, {mpi_type}, 0, MPI_COMM_WORLD, ierr)" + + +def _classify_scalar_vars(target: str) -> Tuple[List[str], List[str], List[str], List[str]]: + """Return (int_vars, log_vars, real_vars, case_opt_vars) for class-(a) scalars. + + case_opt_vars are sim CASE_OPT_PARAMS (wrapped in #:if not MFC_CASE_OPTIMIZATION). + All four lists contain variable names suitable for a 1-element MPI_BCAST. + Struct roots, TYPED_DECLS, FORTRAN_ARRAY_DIMS, case_dir, and _BCAST_EXCLUDE are removed. + """ + int_vars: List[str] = [] + log_vars: List[str] = [] + real_vars: List[str] = [] + case_opt_vars: List[str] = [] # (name, mpi_type) pairs for sim case-opt section + + for name in sorted(NAMELIST_VARS): + if target not in NAMELIST_VARS[name]: + continue + if name in _BCAST_EXCLUDE: + continue + if target == "post" and name in _POST_BCAST_EXCLUDE: + continue + if name in _STRUCT_ROOTS: + continue + if name in TYPED_DECLS: + continue + if name in FORTRAN_ARRAY_DIMS: + continue + if name == "case_dir": + continue + + pdef = REGISTRY.all_params.get(name) + if pdef is None: + continue # no registry entry — skip (e.g. 'G' in post is rank-0-only) + + if target == "sim" and name in CASE_OPT_PARAMS: + case_opt_vars.append(name) + continue + + mpi_type = _mpi_type_for(pdef.param_type) + if mpi_type == "MPI_INTEGER": + int_vars.append(name) + elif mpi_type == "MPI_LOGICAL": + log_vars.append(name) + else: + real_vars.append(name) + + return sorted(int_vars), sorted(log_vars), sorted(real_vars), sorted(case_opt_vars) + + +def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) -> None: + """Append one MPI_BCAST per variable in vars_list to lines.""" + for name in vars_list: + lines.append(_bcast_scalar(name, mpi_type)) + + +def _emit_fluid_pp(lines: List[str], target: str) -> None: + """Emit the fluid_pp(i) member-loop broadcast block. + + Members broadcast: gamma, pi_inf, G, cv, qv, qvp (all REAL, mpi_p). + Sim additionally: Re(1) with count=2. + """ + # These are the core EOS members broadcast in all three targets. + # Other fluid_pp members in the registry (mu_v, pv, mul0, ss, D_v, etc.) are + # physics-specific and are initialised from input on rank 0 only or derived. + fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp"] + lines.append(" do i = 1, num_fluids_max") + for mem in fp_real_members: + lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") + if target == "sim": + lines.append(" call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)") + lines.append(" end do") + + +def _emit_bub_pp(lines: List[str]) -> None: + """Emit the bub_pp member broadcast block (all targets, under bubbles guard). + + All bub_pp members in the registry are REAL — emit them all sorted. + """ + bub_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("bub_pp%")) + lines.append(" if (bubbles_euler .or. bubbles_lagrange) then") + for mem in bub_members: + lines.append(f" call MPI_BCAST(bub_pp%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") + lines.append(" end if") + + +def _emit_lag_params(lines: List[str]) -> None: + """Emit the lag_params member broadcast block (sim-only, under bubbles_lagrange guard). + + Subset of lag_params members that are actually broadcast: the fields that appear in the + existing simulation m_mpi_proxy.fpp lag_params block. The registry has additional + members (T0, Thost, c0, rho0, x0) that are not broadcast (rank-0-only). + """ + # Hardcoded broadcast subset — matches the existing sim m_mpi_proxy exactly. + lag_log = ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"] + lag_int = ["cluster_type", "nBubs_glb", "smooth_type", "solver_approach"] + lag_real = ["charwidth", "epsilonb", "valmaxvoid"] + lines.append(" if (bubbles_lagrange) then") + for mem in sorted(lag_log): + lines.append(f" call MPI_BCAST(lag_params%{mem}, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)") + for mem in sorted(lag_int): + lines.append(f" call MPI_BCAST(lag_params%{mem}, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)") + for mem in sorted(lag_real): + lines.append(f" call MPI_BCAST(lag_params%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") + lines.append(" end if") + + +def _emit_chem_params(lines: List[str]) -> None: + """Emit the chem_params member broadcast block (sim-only, under chemistry guard). + + All chem_params members in the registry are broadcast. + """ + chem_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("chem_params%")) + chem_log = [m for m in chem_members if REGISTRY.all_params[f"chem_params%{m}"].param_type == ParamType.LOG] + chem_int = [m for m in chem_members if REGISTRY.all_params[f"chem_params%{m}"].param_type == ParamType.INT] + lines.append(" if (chemistry) then") + for mem in sorted(chem_log): + lines.append(f" call MPI_BCAST(chem_params%{mem}, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)") + for mem in sorted(chem_int): + lines.append(f" call MPI_BCAST(chem_params%{mem}, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)") + lines.append(" end if") + + +def _emit_fortran_array_dims(lines: List[str], target: str) -> None: + """Emit broadcasts for FORTRAN_ARRAY_DIMS entries belonging to this target. + + Each entry is a 1D array; we emit: MPI_BCAST(name(1), dim, mpi_type, ...). + The element type is determined by the registered member param_type. + """ + for name in sorted(FORTRAN_ARRAY_DIMS): + if name not in NAMELIST_VARS or target not in NAMELIST_VARS[name]: + continue + dim = FORTRAN_ARRAY_DIMS[name] + # Determine element type from registry (use the (1) example entry) + member_key = f"{name}(1)" + pdef = REGISTRY.all_params.get(member_key) + if pdef is None: + raise ValueError(f"No registry entry for {member_key!r} (needed for FORTRAN_ARRAY_DIMS broadcast)") + mpi_type = _mpi_type_for(pdef.param_type) + lines.append(f" call MPI_BCAST({name}(1), {dim}, {mpi_type}, 0, MPI_COMM_WORLD, ierr)") + + +def generate_bcast_fpp(target: str) -> str: + """Return the generated MPI broadcast statements for a target as a Fortran string. + + Generates class-(a) namelist scalar broadcasts (sorted, MPI type from registry) + and class-(b) struct/array broadcasts for cleanly walkable TYPED_DECLS entries. + The manual residue (bc_x members, patch_ib, patch_icpp, simplex_params, etc.) + stays in the hand-written m_mpi_proxy.fpp. + + For sim, CASE_OPT_PARAMS scalars are wrapped in ``#:if not MFC_CASE_OPTIMIZATION``. + The ``chem_wrt_Y`` array broadcast for post is intentionally ADDED here (latent bug fix: + it is namelist-bound and consumed on all ranks but was previously not broadcast). + """ + _check_target(target) + lines: List[str] = [_HEADER.rstrip()] + + # -- case_dir (STR, special len() form) -- + lines.append(" call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)") + lines.append("") + + # -- Class (a): simple scalar broadcasts -- + int_vars, log_vars, real_vars, case_opt_vars = _classify_scalar_vars(target) + + if int_vars: + lines.append(" ! Integer scalars") + _emit_bcast_group(lines, int_vars, "MPI_INTEGER") + lines.append("") + + if log_vars: + lines.append(" ! Logical scalars") + _emit_bcast_group(lines, log_vars, "MPI_LOGICAL") + lines.append("") + + if real_vars: + lines.append(" ! Real scalars") + _emit_bcast_group(lines, real_vars, "mpi_p") + lines.append("") + + # Sim case-optimization guard + if target == "sim" and case_opt_vars: + lines.append(" ! Case-optimization scalars (absent when constants are baked in)") + lines.append(" #:if not MFC_CASE_OPTIMIZATION") + # Classify by type + co_int = sorted(v for v in case_opt_vars if _mpi_type_for(REGISTRY.all_params[v].param_type) == "MPI_INTEGER") + co_log = sorted(v for v in case_opt_vars if _mpi_type_for(REGISTRY.all_params[v].param_type) == "MPI_LOGICAL") + co_real = sorted(v for v in case_opt_vars if _mpi_type_for(REGISTRY.all_params[v].param_type) == "mpi_p") + for name in co_int: + lines.append(" " + _bcast_scalar(name, "MPI_INTEGER")) + for name in co_log: + lines.append(" " + _bcast_scalar(name, "MPI_LOGICAL")) + for name in co_real: + lines.append(" " + _bcast_scalar(name, "mpi_p")) + lines.append(" #:endif") + lines.append("") + + # -- Class (b): FORTRAN_ARRAY_DIMS (simple 1D arrays) -- + array_dim_names = [n for n in FORTRAN_ARRAY_DIMS if n in NAMELIST_VARS and target in NAMELIST_VARS[n]] + if array_dim_names: + lines.append(" ! Array broadcasts (dimension from FORTRAN_ARRAY_DIMS)") + _emit_fortran_array_dims(lines, target) + lines.append("") + + # -- Class (b): TYPED_DECLS structs (cleanly walkable) -- + if "fluid_pp" in NAMELIST_VARS and target in NAMELIST_VARS["fluid_pp"]: + lines.append(" ! fluid_pp member loop") + _emit_fluid_pp(lines, target) + lines.append("") + + if "bub_pp" in NAMELIST_VARS and target in NAMELIST_VARS["bub_pp"]: + lines.append(" ! bub_pp members (under bubbles guard)") + _emit_bub_pp(lines) + lines.append("") + + if target == "sim": + if "lag_params" in NAMELIST_VARS and "sim" in NAMELIST_VARS["lag_params"]: + lines.append(" ! lag_params members (under bubbles_lagrange guard)") + _emit_lag_params(lines) + lines.append("") + if "chem_params" in NAMELIST_VARS and "sim" in NAMELIST_VARS["chem_params"]: + lines.append(" ! chem_params members (under chemistry guard)") + _emit_chem_params(lines) + lines.append("") + + return "\n".join(lines) + "\n" + + def resolve_namelist_content(fpp_path: Path) -> str: """Return the namelist content for an fpp file. @@ -245,13 +516,14 @@ def resolve_namelist_content(fpp_path: Path) -> str: def get_generated_files(build_dir: Path) -> List[Tuple[Path, str]]: - """Return (path, content) for all 10 generated .fpp files under build_dir. + """Return (path, content) for all 15 generated .fpp files under build_dir. Paths match the cmake include directory structure: - build_dir/include/{full_target}/generated_{namelist,decls,constants}.fpp + build_dir/include/{full_target}/generated_{namelist,decls,constants,case_opt_decls,bcast}.fpp Every target gets generated_case_opt_decls.fpp: real content for simulation, a header-only stub for the others (Fypp resolves #:include at parse time, so the file must exist for every target even inside a dead conditional). + Every target gets generated_bcast.fpp with its MPI broadcast statements. """ result = [] for short, full in TARGETS: @@ -263,4 +535,7 @@ def get_generated_files(build_dir: Path) -> List[Tuple[Path, str]]: inc = build_dir / "include" / full content = generate_case_opt_decls_fpp() if short == "sim" else _HEADER + "! (no case-optimization declarations for this target)\n" result.append((inc / "generated_case_opt_decls.fpp", content)) + for short, full in TARGETS: + inc = build_dir / "include" / full + result.append((inc / "generated_bcast.fpp", generate_bcast_fpp(short))) return result diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py index f5e654f13c..2f74251863 100644 --- a/toolchain/mfc/params_tests/test_fortran_gen.py +++ b/toolchain/mfc/params_tests/test_fortran_gen.py @@ -210,18 +210,21 @@ def test_check_target_raises_on_bad_target(): generate_decls_fpp("bad") -def test_get_generated_files_returns_twelve(): +def test_get_generated_files_returns_fifteen(): from pathlib import Path from mfc.params.generators.fortran_gen import get_generated_files files = get_generated_files(Path("/build")) - assert len(files) == 12 + assert len(files) == 15 paths = [str(p) for p, _ in files] assert any("pre_process/generated_namelist.fpp" in p for p in paths) assert any("simulation/generated_decls.fpp" in p for p in paths) assert any("post_process/generated_namelist.fpp" in p for p in paths) assert any("simulation/generated_case_opt_decls.fpp" in p for p in paths) + assert any("pre_process/generated_bcast.fpp" in p for p in paths) + assert any("simulation/generated_bcast.fpp" in p for p in paths) + assert any("post_process/generated_bcast.fpp" in p for p in paths) def test_generate_constants_fpp_content(): @@ -236,15 +239,21 @@ def test_generate_constants_fpp_content(): assert out == generate_constants_fpp() -def test_get_generated_files_includes_constants(): +def test_get_generated_files_includes_bcast(): from pathlib import Path from mfc.params.generators.fortran_gen import get_generated_files files = get_generated_files(Path("/tmp/x")) names = {p.name for p, _ in files} - assert names == {"generated_namelist.fpp", "generated_decls.fpp", "generated_constants.fpp", "generated_case_opt_decls.fpp"} - assert len(files) == 12 + assert names == { + "generated_namelist.fpp", + "generated_decls.fpp", + "generated_constants.fpp", + "generated_case_opt_decls.fpp", + "generated_bcast.fpp", + } + assert len(files) == 15 def test_generate_case_opt_decls_fpp(): @@ -287,3 +296,233 @@ def test_generate_case_opt_decls_fpp(): # AUTO-GENERATED header present assert "AUTO-GENERATED" in out + + +# ── generate_bcast_fpp tests ────────────────────────────────────────────────── + + +def test_generate_bcast_fpp_header_and_case_dir(): + """All targets emit the AUTO-GENERATED header and case_dir broadcast.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + for target in ("pre", "sim", "post"): + out = generate_bcast_fpp(target) + assert "AUTO-GENERATED" in out, f"{target}: missing header" + assert "call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)" in out + + +def test_generate_bcast_fpp_class_a_int_scalars(): + """Class-(a) integer scalars are broadcast for all targets.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + pre = generate_bcast_fpp("pre") + sim = generate_bcast_fpp("sim") + post = generate_bcast_fpp("post") + + # m, n, p are in all three targets + for out, target in [(pre, "pre"), (sim, "sim"), (post, "post")]: + assert "call MPI_BCAST(m, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: m missing" + assert "call MPI_BCAST(n, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: n missing" + assert "call MPI_BCAST(p, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: p missing" + + # precision is in all three + for out, target in [(pre, "pre"), (sim, "sim"), (post, "post")]: + assert "call MPI_BCAST(precision, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: precision missing" + + +def test_generate_bcast_fpp_class_a_log_scalars(): + """Class-(a) logical scalars are broadcast per target.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + pre = generate_bcast_fpp("pre") + sim = generate_bcast_fpp("sim") + post = generate_bcast_fpp("post") + + # bubbles_euler is in all three + for out, target in [(pre, "pre"), (sim, "sim"), (post, "post")]: + assert "call MPI_BCAST(bubbles_euler, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: bubbles_euler missing" + + # run_time_info is sim-only + assert "run_time_info" in sim + assert "run_time_info" not in pre + assert "run_time_info" not in post + + # old_grid is pre-only + assert "old_grid" in pre + assert "old_grid" not in sim + assert "old_grid" not in post + + +def test_generate_bcast_fpp_class_a_real_scalars(): + """Class-(a) real scalars (mpi_p) are broadcast per target.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + + # dt is sim-only REAL + assert "call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in sim + assert "call MPI_BCAST(dt, " not in pre and "call MPI_BCAST(dt, " not in post + + # pref is in all three + for out, target in [(pre, "pre"), (sim, "sim"), (post, "post")]: + assert "call MPI_BCAST(pref, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out, f"{target}: pref missing" + + +def test_generate_bcast_fpp_case_opt_guard_sim(): + """Sim case-opt scalars are wrapped in #:if not MFC_CASE_OPTIMIZATION.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + out = generate_bcast_fpp("sim") + assert "#:if not MFC_CASE_OPTIMIZATION" in out + assert "#:endif" in out + + # weno_order and num_fluids are CASE_OPT_PARAMS — must be inside the guard + lines = out.splitlines() + guard_start = next(i for i, ln in enumerate(lines) if "#:if not MFC_CASE_OPTIMIZATION" in ln) + guard_end = next(i for i, ln in enumerate(lines) if i > guard_start and "#:endif" in ln) + guard_body = "\n".join(lines[guard_start:guard_end]) + assert "weno_order" in guard_body + assert "num_fluids" in guard_body + assert "mapped_weno" in guard_body + + # muscl_eps must NOT appear anywhere (it is excluded — derived post-broadcast) + assert "muscl_eps" not in out + + +def test_generate_bcast_fpp_case_opt_not_in_pre_post(): + """CASE_OPT_PARAMS that apply to pre/post are emitted unconditionally there.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + + # weno_order and nb are in pre and post (not only sim) — emitted unconditionally + assert "call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in pre + assert "call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in post + assert "#:if not MFC_CASE_OPTIMIZATION" not in pre + assert "#:if not MFC_CASE_OPTIMIZATION" not in post + + +def test_generate_bcast_fpp_fluid_pp_loop(): + """fluid_pp member-loop is emitted for all targets; Re(1) count=2 only for sim.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + for target in ("pre", "sim", "post"): + out = generate_bcast_fpp(target) + assert "do i = 1, num_fluids_max" in out, f"{target}: fluid_pp loop missing" + assert "call MPI_BCAST(fluid_pp(i)%gamma, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out + assert "call MPI_BCAST(fluid_pp(i)%cv, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out + + sim = generate_bcast_fpp("sim") + assert "call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)" in sim + + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + assert "fluid_pp(i)%Re(1)" not in pre + assert "fluid_pp(i)%Re(1)" not in post + + +def test_generate_bcast_fpp_bub_pp_under_guard(): + """bub_pp members are emitted under a bubbles guard for all targets.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + for target in ("pre", "sim", "post"): + out = generate_bcast_fpp(target) + assert "if (bubbles_euler .or. bubbles_lagrange) then" in out, f"{target}: bub_pp guard missing" + assert "call MPI_BCAST(bub_pp%R0ref, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out + assert "call MPI_BCAST(bub_pp%gam_g, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out + + +def test_generate_bcast_fpp_lag_chem_sim_only(): + """lag_params and chem_params broadcasts are emitted for sim only.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + + # lag_params under bubbles_lagrange guard + assert "if (bubbles_lagrange) then" in sim + assert "call MPI_BCAST(lag_params%solver_approach, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)" in sim + assert "lag_params" not in pre + assert "lag_params" not in post + + # chem_params under chemistry guard + assert "if (chemistry) then" in sim + assert "call MPI_BCAST(chem_params%diffusion, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in sim + assert "chem_params" not in pre + assert "chem_params" not in post + + +def test_generate_bcast_fpp_fortran_array_dims(): + """FORTRAN_ARRAY_DIMS arrays are broadcast with the correct dim and type.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + + # fluid_rho is pre-only, REAL → mpi_p + assert "call MPI_BCAST(fluid_rho(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr)" in pre + assert "fluid_rho" not in post + + # alpha_wrt, mom_wrt are post-only LOGICAL → MPI_LOGICAL + assert "call MPI_BCAST(alpha_wrt(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in post + assert "call MPI_BCAST(mom_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in post + + # schlieren_alpha is post-only REAL → mpi_p + assert "call MPI_BCAST(schlieren_alpha(1), num_fluids_max, mpi_p, 0, MPI_COMM_WORLD, ierr)" in post + + +def test_generate_bcast_fpp_chem_wrt_Y_latent_bug_fix(): + """chem_wrt_Y is broadcast for post (latent bug fix: previously missing).""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + post = generate_bcast_fpp("post") + # chem_wrt_Y is FORTRAN_ARRAY_DIMS with dim=num_species, type=LOG + assert "call MPI_BCAST(chem_wrt_Y(1), num_species, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in post + + # Must not appear in pre or sim (post-only) + pre = generate_bcast_fpp("pre") + sim = generate_bcast_fpp("sim") + assert "chem_wrt_Y" not in pre + assert "chem_wrt_Y" not in sim + + +def test_generate_bcast_fpp_excludes_manual_residue(): + """Class-(c) vars and excluded vars must not appear in generated output.""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + for target in ("pre", "sim", "post"): + out = generate_bcast_fpp(target) + # These are non-namelist and must stay manual + assert "m_glb" not in out, f"{target}: m_glb should be manual" + assert "n_glb" not in out, f"{target}: n_glb should be manual" + assert "p_glb" not in out, f"{target}: p_glb should be manual" + # muscl_eps is excluded (derived post-broadcast) + assert "muscl_eps" not in out, f"{target}: muscl_eps should be excluded" + + sim = generate_bcast_fpp("sim") + # shear_stress, bulk_stress, bodyForces are derived (non-namelist) + assert "shear_stress" not in sim + assert "bulk_stress" not in sim + assert "bodyForces" not in sim + + +def test_generate_bcast_fpp_deterministic(): + """Calling generate_bcast_fpp twice produces identical output (sorted, stable).""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + for target in ("pre", "sim", "post"): + assert generate_bcast_fpp(target) == generate_bcast_fpp(target), f"{target}: not deterministic" + + +def test_generate_bcast_fpp_bad_target(): + """Unknown target raises ValueError.""" + import pytest + + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + with pytest.raises(ValueError, match="Unknown target"): + generate_bcast_fpp("bad") From dd2025e8908e51664c1366e9baafcef19ff46601 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 15:47:24 -0400 Subject: [PATCH 06/22] src: share common default-value assignments across executables --- CMakeLists.txt | 5 + src/common/m_global_parameters_common.fpp | 97 ++++++++++++++++++- src/post_process/m_global_parameters.fpp | 91 +++-------------- src/pre_process/m_global_parameters.fpp | 93 +++--------------- src/simulation/m_global_parameters.fpp | 95 +++--------------- .../mfc/params/generators/fortran_gen.py | 17 +--- 6 files changed, 142 insertions(+), 256 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a9c15907f..c684916b0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -502,17 +502,22 @@ set(_mfc_gen_files_pre_process "${_mfc_gen_inc}/pre_process/generated_namelist.fpp" "${_mfc_gen_inc}/pre_process/generated_decls.fpp" "${_mfc_gen_inc}/pre_process/generated_constants.fpp" + "${_mfc_gen_inc}/pre_process/generated_bcast.fpp" + "${_mfc_gen_inc}/pre_process/generated_case_opt_decls.fpp" ) set(_mfc_gen_files_simulation "${_mfc_gen_inc}/simulation/generated_namelist.fpp" "${_mfc_gen_inc}/simulation/generated_decls.fpp" "${_mfc_gen_inc}/simulation/generated_constants.fpp" + "${_mfc_gen_inc}/simulation/generated_bcast.fpp" "${_mfc_gen_inc}/simulation/generated_case_opt_decls.fpp" ) set(_mfc_gen_files_post_process "${_mfc_gen_inc}/post_process/generated_namelist.fpp" "${_mfc_gen_inc}/post_process/generated_decls.fpp" "${_mfc_gen_inc}/post_process/generated_constants.fpp" + "${_mfc_gen_inc}/post_process/generated_bcast.fpp" + "${_mfc_gen_inc}/post_process/generated_case_opt_decls.fpp" ) set(_mfc_gen_files_syscheck) set(_mfc_all_gen_files diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index 8a92e2d87c..513c784650 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -16,7 +16,7 @@ module m_global_parameters_common use m_derived_types use m_thermochem, only: num_species use m_constants, only: model_eqns_gamma_law, model_eqns_5eq, model_eqns_6eq, model_eqns_4eq, recon_type_weno, & - & recon_type_muscl, name_len + & recon_type_muscl, name_len, dflt_int, dflt_real implicit none @@ -339,4 +339,99 @@ contains end subroutine s_finalize_global_parameters_common + !> Assign default values to the user-input parameters that are shared across all three executables (pre_process, simulation, + !! post_process). Per-target defaults (bc_io, cfl_dt, precision, nb, chem_params, bub_pp scalar companions, fluid_pp loop, patch + !! arrays, output flags) remain in the per-target s_assign_default_values_to_user_inputs routines, which call this subroutine + !! first, then call s_update_cell_bounds(cells_bounds, m, n, p) (cells_bounds is per-target), then apply their own assignments. + impure subroutine s_assign_common_defaults + + ! Logistics + case_dir = '.' + + ! Computational domain parameters (m/n/p set here; caller must call s_update_cell_bounds after) + m = dflt_int; n = 0; p = 0 + + cyl_coord = .false. + + ! CFL adaptive time-stepping flags + cfl_adap_dt = .false. + cfl_const_dt = .false. + + ! Time-stepping bookkeeping + n_start = dflt_int + t_step_start = dflt_int + + ! Simulation algorithm + model_eqns = dflt_int + relax = .false. + relax_model = dflt_int + hypoelasticity = .false. + hyperelasticity = .false. + elasticity = .false. + b_size = dflt_int + tensor_size = dflt_int + cont_damage = .false. + hyper_cleaning = .false. + + ! Boundary conditions + bc_x%beg = dflt_int; bc_x%end = dflt_int + bc_y%beg = dflt_int; bc_y%end = dflt_int + bc_z%beg = dflt_int; bc_z%end = dflt_int + + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + bc_${DIM}$%vb${DIR}$ = 0._wp + bc_${DIM}$%ve${DIR}$ = 0._wp + #:endfor + #:endfor + + #:for dir in {'x', 'y', 'z'} + bc_${dir}$%isothermal_in = .false. + bc_${dir}$%isothermal_out = .false. + bc_${dir}$%Twall_in = dflt_real + bc_${dir}$%Twall_out = dflt_real + #:endfor + + ! Case-optimization params: under case-opt these are compile-time constants in sim (skip assignment); in pre/post + ! MFC_CASE_OPTIMIZATION is always False so the block always executes there. + #:if not MFC_CASE_OPTIMIZATION + recon_type = recon_type_weno + weno_order = dflt_int + muscl_order = dflt_int + num_fluids = dflt_int + igr = .false. + mhd = .false. + relativity = .false. + #:endif + + ! Tait EOS + rhoref = dflt_real + pref = dflt_real + + ! Bubble modeling flags and parameters + R0ref = dflt_real + bubbles_euler = .false. + polydisperse = .false. + poly_sigma = dflt_real + qbmm = .false. + surface_tension = .false. + adv_n = .false. + sigma = dflt_real + bubbles_lagrange = .false. + + ! Immersed boundaries + ib = .false. + num_ibs = dflt_int + + ! MHD (background field) + Bx0 = dflt_real + + ! Output and I/O options + parallel_io = .false. + file_per_process = .false. + down_sample = .false. + fft_wrt = .false. + + end subroutine s_assign_common_defaults + end module m_global_parameters_common diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index b359d50306..10e83691d3 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -144,76 +144,36 @@ contains impure subroutine s_assign_default_values_to_user_inputs integer :: i !< Generic loop iterator - ! Logistics - case_dir = '.' + ! Shared defaults (case_dir, m/n/p, cyl_coord, cfl flags, model_eqns, elasticity, BC blocks, + ! recon/weno/muscl/num_fluids/igr/mhd/relativity under case-opt guard, Tait EOS, bubble flags, + ! IB flags, parallel I/O flags, fft_wrt) - ! Computational domain parameters - m = dflt_int; n = 0; p = 0 + call s_assign_common_defaults call s_update_cell_bounds(cells_bounds, m, n, p) + ! Computational domain parameters (post-specific) m_root = dflt_int - cyl_coord = .false. - t_step_start = dflt_int t_step_stop = dflt_int t_step_save = dflt_int - cfl_adap_dt = .false. - cfl_const_dt = .false. cfl_dt = .false. cfl_target = dflt_real t_save = dflt_real - n_start = dflt_int t_stop = dflt_real - ! Simulation algorithm parameters - model_eqns = dflt_int - num_fluids = dflt_int - recon_type = recon_type_weno - weno_order = dflt_int - muscl_order = dflt_int + ! Simulation algorithm parameters (post-specific) mixture_err = .false. alt_soundspeed = .false. - relax = .false. - relax_model = dflt_int - - mhd = .false. - relativity = .false. - - hypoelasticity = .false. - hyperelasticity = .false. - elasticity = .false. - b_size = dflt_int - tensor_size = dflt_int - cont_damage = .false. - hyper_cleaning = .false. - igr = .false. - - bc_x%beg = dflt_int; bc_x%end = dflt_int - bc_y%beg = dflt_int; bc_y%end = dflt_int - bc_z%beg = dflt_int; bc_z%end = dflt_int + bc_io = .false. num_bc_patches = dflt_int - #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0._wp - bc_${DIM}$%ve${DIR}$ = 0._wp - #:endfor - #:endfor - - #:for dir in {'x', 'y', 'z'} - bc_${dir}$%isothermal_in = .false. - bc_${dir}$%isothermal_out = .false. - bc_${dir}$%Twall_in = dflt_real - bc_${dir}$%Twall_out = dflt_real - #:endfor - chem_params%gamma_method = 1 chem_params%transport_model = 1 - ! Fluids physical parameters + ! Fluids physical parameters (post-specific; G = dflt_real differs from pre/sim) do i = 1, num_fluids_max fluid_pp(i)%gamma = dflt_real fluid_pp(i)%pi_inf = dflt_real @@ -231,7 +191,8 @@ contains fluid_pp(i)%mu_bulk = dflt_real end do - ! Subgrid bubble parameters + ! Subgrid bubble parameters (bub_pp struct + scalar companions; bub_pp%R0ref is set in common + ! via R0ref; the scalar companions are per-target manual declarations) bub_pp%R0ref = dflt_real; R0ref = dflt_real bub_pp%p0ref = dflt_real; p0ref = dflt_real bub_pp%rho0ref = dflt_real; rho0ref = dflt_real @@ -253,11 +214,10 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real - ! Formatted database file(s) structure parameters + ! Formatted database file(s) structure parameters (post-specific) format = dflt_int precision = dflt_int - down_sample = .false. alpha_rho_wrt = .false. alpha_rho_e_wrt = .false. @@ -268,10 +228,7 @@ contains chem_wrt_T = .false. flux_lim = dflt_int flux_wrt = .false. - parallel_io = .false. - file_per_process = .false. E_wrt = .false. - fft_wrt = .false. pres_wrt = .false. alpha_wrt = .false. gamma_wrt = .false. @@ -287,7 +244,6 @@ contains schlieren_wrt = .false. sim_data = .false. cf_wrt = .false. - ib = .false. ib_state_wrt = .false. lag_txt_wrt = .false. lag_header = .true. @@ -313,29 +269,11 @@ contains fd_order = dflt_int avg_state = dflt_int - ! Tait EOS - rhoref = dflt_real - pref = dflt_real - - ! Bubble modeling - bubbles_euler = .false. - qbmm = .false. - R0ref = dflt_real + ! Bubble modeling (post-specific) nb = dflt_int - polydisperse = .false. - poly_sigma = dflt_real sigR = dflt_real - sigma = dflt_real - surface_tension = .false. - adv_n = .false. - ! Lagrangian bubbles modeling - bubbles_lagrange = .false. - - ! IBM - num_ibs = dflt_int - - ! Output partial domain + ! Output partial domain (post-specific) output_partial_domain = .false. x_output%beg = dflt_real x_output%end = dflt_real @@ -344,9 +282,6 @@ contains z_output%beg = dflt_real z_output%end = dflt_real - ! MHD - Bx0 = dflt_real - end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or any other tasks needed to properly setup the module diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 2da45d4915..fad0db9113 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -93,26 +93,21 @@ contains impure subroutine s_assign_default_values_to_user_inputs integer :: i !< Generic loop operator - ! Logistics - case_dir = '.' + ! Shared defaults (case_dir, m/n/p, cyl_coord, cfl flags, model_eqns, elasticity, BC blocks, + ! recon/weno/muscl/num_fluids/igr/mhd/relativity under case-opt guard, Tait EOS, bubble flags, + ! IB flags, parallel I/O flags, fft_wrt) + + call s_assign_common_defaults + call s_update_cell_bounds(cells_bounds, m, n, p) + + ! Logistics (pre-specific) old_grid = .false. old_ic = .false. t_step_old = dflt_int - t_step_start = dflt_int - - cfl_adap_dt = .false. - cfl_const_dt = .false. cfl_dt = .false. - n_start = dflt_int - - ! Computational domain parameters - m = dflt_int; n = 0; p = 0 - - call s_update_cell_bounds(cells_bounds, m, n, p) - - cyl_coord = .false. + ! Computational domain parameters (pre-specific) x_domain%beg = dflt_real x_domain%end = dflt_real y_domain%beg = dflt_real @@ -137,55 +132,14 @@ contains z_a = dflt_real z_b = dflt_real - ! Simulation algorithm parameters - model_eqns = dflt_int - relax = .false. - relax_model = dflt_int + ! Simulation algorithm parameters (pre-specific) palpha_eps = dflt_real ptgalpha_eps = dflt_real - num_fluids = dflt_int - recon_type = recon_type_weno - weno_order = dflt_int - igr = .false. igr_order = dflt_int - muscl_order = dflt_int - - hypoelasticity = .false. - hyperelasticity = .false. - elasticity = .false. pre_stress = .false. - b_size = dflt_int - tensor_size = dflt_int - cont_damage = .false. - hyper_cleaning = .false. - - mhd = .false. - relativity = .false. - - bc_x%beg = dflt_int; bc_x%end = dflt_int - bc_y%beg = dflt_int; bc_y%end = dflt_int - bc_z%beg = dflt_int; bc_z%end = dflt_int - - #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0._wp - bc_${DIM}$%ve${DIR}$ = 0._wp - #:endfor - #:endfor - - #:for dir in {'x', 'y', 'z'} - bc_${dir}$%isothermal_in = .false. - bc_${dir}$%isothermal_out = .false. - bc_${dir}$%Twall_in = dflt_real - bc_${dir}$%Twall_out = dflt_real - #:endfor - - parallel_io = .false. - file_per_process = .false. + precision = 2 - down_sample = .false. viscous = .false. - bubbles_lagrange = .false. mixlayer_vel_profile = .false. mixlayer_vel_coef = 1._wp mixlayer_perturb = .false. @@ -200,8 +154,6 @@ contains elliptic_smoothing_iters = dflt_int elliptic_smoothing = .false. - fft_wrt = .false. - simplex_perturb = .false. simplex_params%perturb_vel(:) = .false. simplex_params%perturb_vel_freq(:) = dflt_real @@ -291,29 +243,16 @@ contains patch_bc(i)%radius = dflt_real end do - ! Tait EOS - rhoref = dflt_real - pref = dflt_real - - ! Bubble modeling - bubbles_euler = .false. + ! Bubble modeling (pre-specific) polytropic = .true. - polydisperse = .false. - thermal = dflt_int - R0ref = dflt_real nb = dflt_int Eu = dflt_real Ca = dflt_real Re_inv = dflt_real Web = dflt_real - poly_sigma = dflt_real - surface_tension = .false. - - adv_n = .false. - qbmm = .false. nmom = 1 sigR = dflt_real sigV = dflt_real @@ -327,14 +266,8 @@ contains Pe_c = dflt_real Tw = dflt_real - ! surface tension modeling - sigma = dflt_real pi_fac = 1._wp - ! Immersed Boundaries - ib = .false. - num_ibs = dflt_int - do i = 1, num_ib_patches_max_namelist patch_ib(i)%geometry = dflt_int patch_ib(i)%x_centroid = dflt_real @@ -402,8 +335,6 @@ contains fluid_pp(i)%mu_bulk = dflt_real end do - Bx0 = dflt_real - ! Subgrid bubble parameters bub_pp%R0ref = dflt_real; R0ref = dflt_real bub_pp%p0ref = dflt_real; p0ref = dflt_real diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 7045c4c302..33f10d9dd6 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -307,31 +307,27 @@ contains impure subroutine s_assign_default_values_to_user_inputs integer :: i, j !< Generic loop iterator - ! Logistics - case_dir = '.' - run_time_info = .false. - t_step_old = dflt_int + ! Shared defaults (case_dir, m/n/p, cyl_coord, cfl flags, model_eqns, elasticity, BC blocks, + ! recon/weno/muscl/num_fluids/igr/mhd/relativity under case-opt guard, Tait EOS, bubble flags, + ! IB flags, parallel I/O flags, fft_wrt) - ! Computational domain parameters - m = dflt_int; n = 0; p = 0 + call s_assign_common_defaults call s_update_cell_bounds(cells_bounds, m, n, p) - cyl_coord = .false. + ! Logistics (sim-specific) + run_time_info = .false. + t_step_old = dflt_int + ! Computational domain parameters (sim-specific) dt = dflt_real - - cfl_adap_dt = .false. - cfl_const_dt = .false. cfl_dt = .false. cfl_target = dflt_real - t_step_start = dflt_int t_step_stop = dflt_int t_step_save = dflt_int t_step_print = 1 - n_start = dflt_int t_stop = dflt_real t_save = dflt_real @@ -340,8 +336,7 @@ contains nv_uvm_igr_temps_on_gpu = 3 ! => jac, jac_rhs, and jac_old on GPU (default) nv_uvm_pref_gpu = .false. - ! Simulation algorithm parameters - model_eqns = dflt_int + ! Simulation algorithm parameters (sim-specific) mpp_lim = .false. time_stepper = dflt_int muscl_eps = dflt_real @@ -357,29 +352,17 @@ contains alt_soundspeed = .false. null_weights = .false. mixture_err = .false. - parallel_io = .false. - file_per_process = .false. precision = 2 - down_sample = .false. - relax = .false. - relax_model = dflt_int palpha_eps = dflt_real ptgalpha_eps = dflt_real - hypoelasticity = .false. - hyperelasticity = .false. int_comp = 0 ic_eps = dflt_ic_eps ic_beta = dflt_ic_beta - elasticity = .false. hyper_model = dflt_int - b_size = dflt_int - tensor_size = dflt_int rdma_mpi = .false. shear_stress = .false. bulk_stress = .false. any_non_newtonian = .false. - cont_damage = .false. - hyper_cleaning = .false. num_igr_iters = dflt_num_igr_iters num_igr_warm_start_iters = dflt_num_igr_warm_start_iters alf_factor = dflt_alf_factor @@ -389,7 +372,6 @@ contains wenoz = .false. teno = .false. wenoz_q = dflt_real - igr = .false. igr_order = dflt_int igr_pres_lim = .false. viscous = .false. @@ -404,22 +386,11 @@ contains num_bc_patches = 0 bc_io = .false. - bc_x%beg = dflt_int; bc_x%end = dflt_int - bc_y%beg = dflt_int; bc_y%end = dflt_int - bc_z%beg = dflt_int; bc_z%end = dflt_int - - #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0._wp - bc_${DIM}$%ve${DIR}$ = 0._wp - #:endfor - #:endfor - x_domain%beg = dflt_real; x_domain%end = dflt_real y_domain%beg = dflt_real; y_domain%end = dflt_real z_domain%beg = dflt_real; z_domain%end = dflt_real - ! Fluids physical parameters + ! Fluids physical parameters (sim-specific; Re(:) and G=0._wp differ from post) do i = 1, num_fluids_max fluid_pp(i)%gamma = dflt_real fluid_pp(i)%pi_inf = dflt_real @@ -438,7 +409,8 @@ contains fluid_pp(i)%mu_bulk = dflt_real end do - ! Subgrid bubble parameters + ! Subgrid bubble parameters (bub_pp struct + scalar companions; scalar companions are + ! per-target manual declarations not in m_global_parameters_common scope) bub_pp%R0ref = dflt_real; R0ref = dflt_real bub_pp%p0ref = dflt_real; p0ref = dflt_real bub_pp%rho0ref = dflt_real; rho0ref = dflt_real @@ -460,13 +432,7 @@ contains bub_pp%R_v = dflt_real; R_v = dflt_real bub_pp%R_g = dflt_real; R_g = dflt_real - ! Tait EOS - rhoref = dflt_real - pref = dflt_real - - ! Immersed Boundaries - ib = .false. - num_ibs = dflt_int + ! Immersed Boundaries (sim-specific extras) ib_neighborhood_radius = 1 collision_model = 0 coefficient_of_restitution = dflt_real @@ -474,21 +440,14 @@ contains ib_coefficient_of_friction = dflt_real ib_state_wrt = .false. - ! Bubble modeling - bubbles_euler = .false. + ! Bubble modeling (sim-specific) bubble_model = 1 polytropic = .true. - polydisperse = .false. thermal = dflt_int - R0ref = dflt_real #:if not MFC_CASE_OPTIMIZATION nb = 1 - recon_type = recon_type_weno - weno_order = dflt_int - muscl_order = dflt_int muscl_lim = dflt_int - num_fluids = dflt_int #:endif adv_n = .false. @@ -498,23 +457,15 @@ contains pi_fac = 1._wp - ! User inputs for qbmm for simulation code - qbmm = .false. - Eu = dflt_real Ca = dflt_real Re_inv = dflt_real Web = dflt_real - poly_sigma = dflt_real ! Acoustic source acoustic_source = .false. num_source = dflt_int - ! Surface tension - sigma = dflt_real - surface_tension = .false. - bodyForces = .false. bf_x = .false.; bf_y = .false.; bf_z = .false. !> amplitude, frequency, and phase shift sinusoid in each direction @@ -524,8 +475,6 @@ contains #:endfor #:endfor - fft_wrt = .false. - do j = 1, num_probes_max acoustic(j)%pulse = dflt_int acoustic(j)%support = dflt_int @@ -583,15 +532,7 @@ contains bc_${dir}$%grcbc_vel_out = .false. #:endfor - #:for dir in {'x', 'y', 'z'} - bc_${dir}$%isothermal_in = .false. - bc_${dir}$%isothermal_out = .false. - bc_${dir}$%Twall_in = dflt_real - bc_${dir}$%Twall_out = dflt_real - #:endfor - ! Lagrangian subgrid bubble model - bubbles_lagrange = .false. lag_params%solver_approach = dflt_int lag_params%cluster_type = dflt_int lag_params%pressure_corrector = .false. @@ -610,16 +551,10 @@ contains cont_damage_s = dflt_real alpha_bar = dflt_real - ! MHD - Bx0 = dflt_real + ! MHD (sim-specific extras beyond common Bx0) hyper_cleaning_speed = dflt_real hyper_cleaning_tau = dflt_real - #:if not MFC_CASE_OPTIMIZATION - mhd = .false. - relativity = .false. - #:endif - do i = 1, num_ib_airfoils_max ib_airfoil(i)%c = dflt_real ib_airfoil(i)%p = dflt_real diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 41e6dc4cc8..6ad861d33c 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -243,21 +243,6 @@ def generate_case_opt_decls_fpp() -> str: # TYPED_DECLS entries kept entirely in the manual residue: their member-broadcast structure # is irregular (non-uniform subsets, size() arrays, nested loops, complex guards). -_MANUAL_TYPED_DECLS = frozenset( - { - "patch_icpp", # complex array members (alter_patch, sph_har_coeff, size() members) - "patch_bc", # kept manual in pre (generated separately would diverge) - "patch_ib", # per-target member subsets differ (pre: size(); sim: count=3; slip absent in sim) - "ib_airfoil", # grouped array members (ib_airfoil attrs are all plain scalars but kept manual) - "stl_models", # grouped array members (model_translate/scale emitted as count=3, not per-index) - "particle_cloud", # sim-only runtime loop; kept manual alongside patch_ib - "simplex_params", # nested i×j loops - "acoustic", # complex combined loop with probe/integral in existing code - "probe", # combined with acoustic/integral - "integral", # combined with acoustic/probe - } -) - def _mpi_type_for(ptype: ParamType) -> str: """Return the Fortran MPI type constant for a ParamType.""" @@ -383,7 +368,7 @@ def _emit_lag_params(lines: List[str]) -> None: def _emit_chem_params(lines: List[str]) -> None: """Emit the chem_params member broadcast block (sim-only, under chemistry guard). - All chem_params members in the registry are broadcast. + Broadcasts the registry's LOG and INT chem_params members (the only kinds registered today; extend the type split if REAL members appear). """ chem_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("chem_params%")) chem_log = [m for m in chem_members if REGISTRY.all_params[f"chem_params%{m}"].param_type == ParamType.LOG] From dd61fbe60e2eb3883c74facf3a271a115c043067 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 15:58:16 -0400 Subject: [PATCH 07/22] docs: document the shared global-parameters core and generated broadcasts --- .claude/rules/common-pitfalls.md | 23 ++++++++++++++++++----- docs/documentation/contributing.md | 5 ++++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/.claude/rules/common-pitfalls.md b/.claude/rules/common-pitfalls.md index 2f790a8e90..463d486870 100644 --- a/.claude/rules/common-pitfalls.md +++ b/.claude/rules/common-pitfalls.md @@ -48,15 +48,28 @@ covered in `docs/documentation/contributing.md`. `CASE_OPT_EXTRA_LINES` literal in `toolchain/mfc/params/generators/fortran_gen.py` (covers `num_dims`, `num_vels`, `weno_polyn`, `muscl_polyn`, `weno_num_stencils`, `wenojs`); multi-variable declaration lines (`bc_x/y/z`, `x/y/z_domain`, `x/y/z_output`, post's - `G`); and MPI broadcast lists in `m_mpi_proxy`. Everything else — scalar declarations, - plain arrays (`FORTRAN_ARRAY_DIMS` table in `definitions.py`), derived-type namelist - declarations including `GPU_DECLARE` lines and Doxygen descs (`TYPED_DECLS` table in - `definitions.py`), and the simulation case-optimization declaration block — is - auto-generated at CMake configure time. + `G`); and the MPI broadcast residue in `m_mpi_proxy` (computed variables that are not + namelist-bound: `m_glb`/`n_glb`/`p_glb`, `cfl_dt`, `bc_io`, and complex struct-member + array loops — these cannot be auto-generated and stay hand-listed). Everything else — scalar declarations, plain arrays (`FORTRAN_ARRAY_DIMS` + table in `definitions.py`), derived-type namelist declarations including `GPU_DECLARE` + lines and Doxygen descs (`TYPED_DECLS` table in `definitions.py`), the simulation + case-optimization declaration block, and the per-target MPI broadcast lists for all + namelist-registry scalars (`generated_bcast.fpp`) — is auto-generated at CMake configure + time. Gotcha: after editing a generator or table, force regen via `cmake_gen.py` into `build/staging/*/` or simply reconfigure (`./mfc.sh build`) — cached builds compile stale includes. Under `--case-optimization` the baked-in constants are dropped from the namelist, so changing one needs a *rebuild*, not a case edit. +- Shared-state pattern: namelist declarations (`#:include 'generated_decls.fpp'`), the + `eqn_idx`/`sys_size`/`b_size`/`tensor_size` state variables, and the common defaults + core all live in `src/common/m_global_parameters_common.fpp`. Each per-target + `m_global_parameters.fpp` does `use m_global_parameters_common` (default-public), so + `use m_global_parameters` continues to work for all downstream modules without change. + Sim-only declarations (GPU_DECLARE, Re_idx allocation) stay in + `m_global_parameters_common` behind `#ifdef MFC_SIMULATION`. Generated includes + (`generated_decls.fpp`, `generated_bcast.fpp`) must exist for every target — the build + emits stubs where the content is sim-only, so a common file that includes one will + compile for pre/post too. - Runtime checks (`@:PROHIBIT`) go where they run: shared → `src/common/m_checker_common.fpp`; simulation-only → `src/simulation/m_checker.fpp`; pre/post-only → `src/{pre,post}_process/m_checker.fpp` (their `s_check_inputs` are diff --git a/docs/documentation/contributing.md b/docs/documentation/contributing.md index a79c52006c..6898804239 100644 --- a/docs/documentation/contributing.md +++ b/docs/documentation/contributing.md @@ -290,7 +290,10 @@ Still manual (not auto-generated): - `TYPE` member definitions inside derived types in `src/common/m_derived_types.fpp` - Default-value assignments in `s_assign_default_values_to_user_inputs` - Multi-variable declaration lines (`bc_x/y/z`, `x/y/z_domain`, `x/y/z_output`) -- MPI broadcast lists in `src/*/m_mpi_proxy.fpp` +- MPI broadcast residue in `src/*/m_mpi_proxy.fpp` (computed/non-namelist variables such + as `m_glb`/`n_glb`/`p_glb`, `cfl_dt`, `bc_io`, and complex struct-member array loops; + namelist-registry scalars are broadcast via the auto-generated `generated_bcast.fpp` + include) - `CASE_OPT_EXTRA_LINES` in `toolchain/mfc/params/generators/fortran_gen.py` for case-optimization constants After editing any generator or table, force regen by reconfiguring (`./mfc.sh build`) — From 68d8a07e6c66bfd5d3b485a30a76cc2a76be0c46 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 16:13:13 -0400 Subject: [PATCH 08/22] src: keep boundary-condition defaults per-target bc_x/y/z are per-target declarations (the documented multi-variable-line exclusion), so their default assignments cannot live in m_global_parameters_common; the hoist had moved them and broke compilation. Restores the three BC default blocks to each executable after the s_assign_common_defaults call. --- src/common/m_global_parameters_common.fpp | 19 ------------------- src/post_process/m_global_parameters.fpp | 20 ++++++++++++++++++++ src/pre_process/m_global_parameters.fpp | 20 ++++++++++++++++++++ src/simulation/m_global_parameters.fpp | 20 ++++++++++++++++++++ 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index 513c784650..cac5aea936 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -373,25 +373,6 @@ contains cont_damage = .false. hyper_cleaning = .false. - ! Boundary conditions - bc_x%beg = dflt_int; bc_x%end = dflt_int - bc_y%beg = dflt_int; bc_y%end = dflt_int - bc_z%beg = dflt_int; bc_z%end = dflt_int - - #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0._wp - bc_${DIM}$%ve${DIR}$ = 0._wp - #:endfor - #:endfor - - #:for dir in {'x', 'y', 'z'} - bc_${dir}$%isothermal_in = .false. - bc_${dir}$%isothermal_out = .false. - bc_${dir}$%Twall_in = dflt_real - bc_${dir}$%Twall_out = dflt_real - #:endfor - ! Case-optimization params: under case-opt these are compile-time constants in sim (skip assignment); in pre/post ! MFC_CASE_OPTIMIZATION is always False so the block always executes there. #:if not MFC_CASE_OPTIMIZATION diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 10e83691d3..9d52c67e35 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -150,6 +150,26 @@ contains ! IB flags, parallel I/O flags, fft_wrt) call s_assign_common_defaults + + ! Boundary conditions (bc_x/y/z are per-target declarations, not visible in common) + bc_x%beg = dflt_int; bc_x%end = dflt_int + bc_y%beg = dflt_int; bc_y%end = dflt_int + bc_z%beg = dflt_int; bc_z%end = dflt_int + + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + bc_${DIM}$%vb${DIR}$ = 0._wp + bc_${DIM}$%ve${DIR}$ = 0._wp + #:endfor + #:endfor + + #:for dir in {'x', 'y', 'z'} + bc_${dir}$%isothermal_in = .false. + bc_${dir}$%isothermal_out = .false. + bc_${dir}$%Twall_in = dflt_real + bc_${dir}$%Twall_out = dflt_real + #:endfor + call s_update_cell_bounds(cells_bounds, m, n, p) ! Computational domain parameters (post-specific) diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index fad0db9113..bfb5df737d 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -99,6 +99,26 @@ contains ! IB flags, parallel I/O flags, fft_wrt) call s_assign_common_defaults + + ! Boundary conditions (bc_x/y/z are per-target declarations, not visible in common) + bc_x%beg = dflt_int; bc_x%end = dflt_int + bc_y%beg = dflt_int; bc_y%end = dflt_int + bc_z%beg = dflt_int; bc_z%end = dflt_int + + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + bc_${DIM}$%vb${DIR}$ = 0._wp + bc_${DIM}$%ve${DIR}$ = 0._wp + #:endfor + #:endfor + + #:for dir in {'x', 'y', 'z'} + bc_${dir}$%isothermal_in = .false. + bc_${dir}$%isothermal_out = .false. + bc_${dir}$%Twall_in = dflt_real + bc_${dir}$%Twall_out = dflt_real + #:endfor + call s_update_cell_bounds(cells_bounds, m, n, p) ! Logistics (pre-specific) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 33f10d9dd6..350bc07d0c 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -313,6 +313,26 @@ contains ! IB flags, parallel I/O flags, fft_wrt) call s_assign_common_defaults + + ! Boundary conditions (bc_x/y/z are per-target declarations, not visible in common) + bc_x%beg = dflt_int; bc_x%end = dflt_int + bc_y%beg = dflt_int; bc_y%end = dflt_int + bc_z%beg = dflt_int; bc_z%end = dflt_int + + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + bc_${DIM}$%vb${DIR}$ = 0._wp + bc_${DIM}$%ve${DIR}$ = 0._wp + #:endfor + #:endfor + + #:for dir in {'x', 'y', 'z'} + bc_${dir}$%isothermal_in = .false. + bc_${dir}$%isothermal_out = .false. + bc_${dir}$%Twall_in = dflt_real + bc_${dir}$%Twall_out = dflt_real + #:endfor + call s_update_cell_bounds(cells_bounds, m, n, p) ! Logistics (sim-specific) From 5821ceec47bc5f6ba5a0f7869bd9e33c03a3c19e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 16:55:27 -0400 Subject: [PATCH 09/22] build: split CMakeLists into cmake/ modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verbatim motion only — no renames, reflows, or logic changes. Section map (original line ranges → files): lines 129-331 compiler flags / GPU logic → cmake/GPU.cmake lines 107-461 FYPP_EXE discovery + HANDLE_SOURCES → cmake/Fypp.cmake lines 464-541 params codegen stamp + gen-file lists + custom command/target → cmake/ParamsCodegen.cmake lines 549-821 MFC_SETUP_TARGET function → cmake/MFCTargets.cmake include() order in root CMakeLists.txt: 1. cmake/GPU.cmake (sets FYPP_GCOV_OPTS, NVHPC_USE_TWO_PASS_IPO, MFC_CUDA_CC) 2. cmake/Fypp.cmake (finds FYPP_EXE, defines HANDLE_SOURCES — needs FYPP_GCOV_OPTS) 3. cmake/ParamsCodegen.cmake (sets _mfc_gen_files_* lists — must precede HANDLE_SOURCES calls) 4. HANDLE_SOURCES calls (in root — consume _mfc_gen_files_* and FYPP_EXE) 5. cmake/MFCTargets.cmake (defines MFC_SETUP_TARGET — needs NVHPC_USE_TWO_PASS_IPO) 6. MFC_SETUP_TARGET calls (in root — instantiate targets) 7. docs section (in root — unchanged) Equivalence gate: cmake configure of -DMFC_PRE_PROCESS=ON -DMFC_MPI=OFF -DCMAKE_BUILD_TYPE=Release BEFORE and AFTER, path-normalized diff of CMakeCache.txt, Makefile, CMakeFiles/pre_process.dir/flags.make, and CMakeFiles/pre_process.dir/build.make — all empty (identical). A clean 3-target build rides the next scheduled gate. --- CMakeLists.txt | 702 +------------------------------------- cmake/Fypp.cmake | 135 ++++++++ cmake/GPU.cmake | 205 +++++++++++ cmake/MFCTargets.cmake | 275 +++++++++++++++ cmake/ParamsCodegen.cmake | 80 +++++ 5 files changed, 705 insertions(+), 692 deletions(-) create mode 100644 cmake/Fypp.cmake create mode 100644 cmake/GPU.cmake create mode 100644 cmake/MFCTargets.cmake create mode 100644 cmake/ParamsCodegen.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c684916b0e..fa1bc0a4df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,11 +103,6 @@ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_C_COMPILER_ID S message(FATAL_ERROR "ERROR: MFC does not support the Apple Clang compilers. Please consult the documentation.\n${__err_msg}") endif() -# Fypp is required to build MFC. We try to locate it. The path to the binary is -# returned in FYPP_EXE upon success. This is used later. - -find_program(FYPP_EXE fypp REQUIRED) - # Miscellaneous Configuration: # * Explicitly link to -ldl (or system equivalent) @@ -126,700 +121,23 @@ endif() # debug builds. These include optimization and debug flags, as well as some that # are required for a successful build of MFC. -set(FYPP_GCOV_OPTS "") - -if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - add_compile_options( - $<$:-ffree-line-length-none> - ) - - if (MFC_GCov) - - # Warning present in gcc versions >= 12 that is treated as an error - # This flag doesn't exist in gcc versions < 12 - if (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 12) - add_compile_options( - -Wno-error=coverage-invalid-line-number - ) - endif() - - add_compile_options( - $<$:-fprofile-arcs> - $<$:-ftest-coverage> - ) - - add_link_options( - $<$:-lgcov> - $<$:--coverage> - ) - - # Override Release -O3 with -O1 for gcov: coverage instrumentation is - # inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on - # Granite Rapids) can emit instructions that older assemblers reject. - set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE) - - # Use gfortran5 line markers so gcov can map coverage to .fpp sources. - set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5") - endif() - - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - add_compile_options( - -Wall - -Wextra - -fcheck=all,no-array-temps - -fbacktrace - -fimplicit-none - -fsignaling-nans - -finit-real=snan - -finit-integer=-99999999 - -Wconversion - -Wintrinsic-shadow - -Wunderflow - -Wrealloc-lhs - -Wsurprising - ) - elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_options( - -Og - -Wall - -Wextra - -fcheck=bounds,pointer - -fbacktrace - -fimplicit-none - -fsignaling-nans - -finit-real=snan - -finit-integer=-99999999 - -Wconversion - -Wintrinsic-shadow - -Wunderflow - -Wrealloc-lhs - -Wsurprising - ) - endif() - - if (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 10) - add_compile_options( - $<$:-fallow-invalid-boz> - $<$:-fallow-argument-mismatch> - ) - endif() -elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - add_compile_options( - "SHELL:-M 296,878,1391,1069,990,5025,7208,7212,7242" - "SHELL:-h static" "SHELL:-h keepfiles" - "SHELL:-h acc_model=auto_async_none" - "SHELL: -h acc_model=no_fast_addr" - "SHELL: -h list=adm" - "SHELL: -munsafe-fp-atomics" # Not unsafe for operations we do - ) - - add_link_options("SHELL:-hkeepfiles") - - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - add_compile_options( - "SHELL:-h acc_model=auto_async_none" - "SHELL: -h acc_model=no_fast_addr" - "SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O0" - ) - add_link_options("SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O0") - elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_options( - "SHELL:-h acc_model=auto_async_none" - "SHELL: -h acc_model=no_fast_addr" - "SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O1" - ) - add_link_options("SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O1") - endif() - -elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang") - add_compile_options( - $<$:-Mfreeform> - $<$:-Mpreprocess> - $<$:-fdefault-real-8> - ) - - if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_options($<$:-O1> $<$:-g>) - endif() -elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") - add_compile_options($<$:-free>) - - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - add_compile_options(-g -Og -traceback -debug -check all) - elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_options(-g -Og -traceback -check bounds) - endif() -elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")) - add_compile_options( - $<$:-Mfreeform> - $<$:-cpp> - $<$:-Minfo=inline> - $<$:-Minfo=accel> - ) - - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - add_compile_options( - $<$:-O0> - $<$:-C> - $<$:-g> - $<$:-traceback> - $<$:-Minform=inform> - $<$:-Mbounds> - ) - elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_options( - $<$:-O1> - $<$:-g> - $<$:-traceback> - $<$:-Mbounds> - ) - endif() - - if (DEFINED ENV{MFC_CUDA_CC}) - string(REGEX MATCHALL "[0-9]+" MFC_CUDA_CC $ENV{MFC_CUDA_CC}) - message(STATUS "Found $MFC_CUDA_CC specified. GPU code will be generated for compute capability(ies) ${MFC_CUDA_CC}.") - endif() -endif() +include(cmake/GPU.cmake) -if (CMAKE_BUILD_TYPE STREQUAL "Release") - # Processor tuning: Check if we can target the host's native CPU's ISA. - # Skip for gcov builds — -march=native on newer CPUs (e.g. Granite Rapids) - # can emit instructions the system assembler doesn't support. - if (NOT MFC_GCov) - CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE) - if (SUPPORTS_MARCH_NATIVE) - add_compile_options($<$:-march=native>) - # Disable AVX-512 FP16: gfortran >=12 emits vmovw instructions on - # Granite Rapids CPUs, but binutils <2.38 cannot assemble them. - # FP16 is unused in MFC's double-precision computations. - CHECK_FORTRAN_COMPILER_FLAG("-mno-avx512fp16" SUPPORTS_MNO_AVX512FP16) - if (SUPPORTS_MNO_AVX512FP16) - add_compile_options($<$:-mno-avx512fp16>) - endif() - else() - CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE) - if (SUPPORTS_MCPU_NATIVE) - add_compile_options($<$:-mcpu=native>) - endif() - endif() - endif() +# Fypp discovery and HANDLE_SOURCES macro definition. +include(cmake/Fypp.cmake) - # Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage - # instrumentation and can trigger assembler errors on newer architectures). - if (MFC_GCov) - message(STATUS "LTO/IPO disabled for gcov build") - elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") - if (MFC_Unified) - message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory") - else() - message(STATUS "Performing IPO using -Mextract followed by -Minline") - set(NVHPC_USE_TWO_PASS_IPO TRUE) - endif() - else() - CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) - if (SUPPORTS_IPO) - message(STATUS "Enabled IPO / LTO") - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) - else() - message(STATUS "IPO / LTO is NOT available") - endif() - endif() -endif() - -if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") - add_compile_definitions(MFC_DEBUG) -endif() - - - -# HANDLE_SOURCES: Given a target (herein ): -# -# * Locate all source files for of the type -# -# src/[,common]/[.,include]/*.[f90,fpp]. -# -# * For each .fpp file found with filepath /.fpp, using a -# custom command, instruct CMake how to generate a file with path -# -# src//fypp/.f90 -# -# by running Fypp on /.fpp. It is important to understand -# that this does not actually run the pre-processor. Rather, it instructs -# CMake what to do when it finds a src//fypp/.f90 path -# in the source list for a target. Thus, an association is made from an .f90 -# file to its corresponding .fpp file (if applicable) even though the -# generation is of the form .fpp -> .f90. -# -# This design has one limitation: If an .fpp file depends on another, for -# example if it '#:include's it and uses a macro defined in it, then the -# dependency will not be tracked. A modification to the .fpp file it depends -# on will not trigger a re-run of Fypp on the .fpp file that depends on it. -# As a compromise, both in speed and complexity, all .f90 files generated -# from .fpp files are re-generated not only when their corresponding .fpp -# file is modified, but also when any file with filepath of the form -# -# src/[,common]/include/*.fpp -# -# is modified. This is a reasonable compromise as modifications to .fpp files -# in the include directories will be rare - by design. Other approaches would -# have required a more complex CMakeLists.txt file (perhaps parsing the .fpp -# files to determine their dependencies) or the endurment of longer -# compilation times (by way of re-running Fypp on all .fpp files every time -# one of them is modified). -# -# .fpp files in src/common are treated as if they were in src/ (not -# pre-processed to src/common/fypp/) so as not to clash with other targets' -# .fpp files (this has caused problems in the past). -# -# * Export, in the variable _SRCs, a list of all source files (.f90) -# that would compile to produce . If includes .fpp files, -# then the list will include the paths to the corresponding .f90 files that -# Fypp would generate from the .fpp files. -# -# This design allows us to be flexible in our use of Fypp as we don't have to -# worry about running the pre-processor on .fpp files when we create executables -# and generate documentation. Instead, we can simply include the list of .f90 -# files that will eventually be used to compile . - -macro(HANDLE_SOURCES target useCommon) - - set(${target}_DIR "${CMAKE_SOURCE_DIR}/src/${target}") - set(common_DIR "${CMAKE_SOURCE_DIR}/src/common") - - string(TOUPPER ${target} ${target}_UPPER) - - # Gather: - # * src/[,(common)]/*.f90 - # * (if any) /modules//*.f90 - file(GLOB ${target}_F90s CONFIGURE_DEPENDS "${${target}_DIR}/*.f90" - "${CMAKE_BINARY_DIR}/modules/${target}/*.f90") - set(${target}_SRCs ${${target}_F90s}) - if (${useCommon}) - file(GLOB common_F90s CONFIGURE_DEPENDS "${common_DIR}/*.f90") - list(APPEND ${target}_SRCs ${common_F90s}) - endif() - - # Gather: - # * src/[,(common)]/*.fpp] - # * (if any) /modules//*.fpp - file(GLOB ${target}_FPPs CONFIGURE_DEPENDS "${${target}_DIR}/*.fpp" - "${CMAKE_BINARY_DIR}/modules/${target}/*.fpp") - if (${useCommon}) - file(GLOB common_FPPs CONFIGURE_DEPENDS "${common_DIR}/*.fpp") - - # If we're building post_process, exclude m_compute_levelset.fpp - if("${target}" STREQUAL "post_process") - list(FILTER common_FPPs EXCLUDE REGEX ".*/m_compute_levelset\.fpp$") - list(FILTER common_FPPs EXCLUDE REGEX ".*/m_ib_patches\.fpp$") - endif() - - list(APPEND ${target}_FPPs ${common_FPPs}) - endif() - - # Gather: - # * src/[,common]/include/*.fpp - # * generated includes from build/include// (explicit list, not a GLOB) - file(GLOB ${target}_incs CONFIGURE_DEPENDS "${${target}_DIR}/include/*.fpp") - list(APPEND ${target}_incs ${_mfc_gen_files_${target}}) - - if (${useCommon}) - file(GLOB common_incs CONFIGURE_DEPENDS "${common_DIR}/include/*.fpp") - list(APPEND ${target}_incs ${common_incs}) - endif() - - # /path/to/*.fpp (used by ) -> /fypp//*.f90 - file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/fypp/${target}") - foreach(fpp ${${target}_FPPs}) - cmake_path(GET fpp FILENAME fpp_filename) - set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90") - - add_custom_command( - OUTPUT ${f90} - COMMAND ${FYPP_EXE} -m re - -I "${CMAKE_BINARY_DIR}/include/${target}" - -I "${${target}_DIR}/include" - -I "${common_DIR}/include" - -I "${common_DIR}" - -D MFC_${CMAKE_Fortran_COMPILER_ID} - -D MFC_${${target}_UPPER} - -D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}" - -D MFC_CASE_OPTIMIZATION=False - -D chemistry=False - --line-numbering - --no-folding - --line-length=999 - --line-numbering-mode=nocontlines - ${FYPP_GCOV_OPTS} - "${fpp}" "${f90}" - DEPENDS "${fpp};${${target}_incs}" - COMMENT "Preprocessing (Fypp) ${fpp_filename}" - VERBATIM - ) - - list(APPEND ${target}_SRCs ${f90}) - endforeach() -endmacro() - - -# Generate Fortran parameter namelist/decl includes into the per-target build -# include directories before HANDLE_SOURCES globs them for Fypp. -find_package(Python3 REQUIRED COMPONENTS Interpreter) -set(_mfc_gen_stamp "${CMAKE_BINARY_DIR}/mfc_params_gen.stamp") -file(GLOB_RECURSE _mfc_gen_inputs - "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/*.py" -) -set(_mfc_needs_regen FALSE) -if(NOT EXISTS "${_mfc_gen_stamp}") - set(_mfc_needs_regen TRUE) -else() - foreach(_input IN LISTS _mfc_gen_inputs) - if("${_input}" IS_NEWER_THAN "${_mfc_gen_stamp}") - set(_mfc_needs_regen TRUE) - break() - endif() - endforeach() -endif() -if(_mfc_needs_regen) - execute_process( - COMMAND "${Python3_EXECUTABLE}" - "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/generators/cmake_gen.py" - "${CMAKE_BINARY_DIR}" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - RESULT_VARIABLE _mfc_gen_result - ERROR_VARIABLE _mfc_gen_error - ) - if(NOT _mfc_gen_result EQUAL 0) - message(FATAL_ERROR "Fortran param generation failed:\n${_mfc_gen_error}") - endif() - file(TOUCH "${_mfc_gen_stamp}") -endif() - -# Enumerate the 10 generated .fpp files explicitly so ninja can track them as -# build-time outputs and so HANDLE_SOURCES does not need a configure-time GLOB -# of ${CMAKE_BINARY_DIR}/include// (which fails when the dir is empty). -set(_mfc_gen_inc "${CMAKE_BINARY_DIR}/include") -set(_mfc_gen_files_pre_process - "${_mfc_gen_inc}/pre_process/generated_namelist.fpp" - "${_mfc_gen_inc}/pre_process/generated_decls.fpp" - "${_mfc_gen_inc}/pre_process/generated_constants.fpp" - "${_mfc_gen_inc}/pre_process/generated_bcast.fpp" - "${_mfc_gen_inc}/pre_process/generated_case_opt_decls.fpp" -) -set(_mfc_gen_files_simulation - "${_mfc_gen_inc}/simulation/generated_namelist.fpp" - "${_mfc_gen_inc}/simulation/generated_decls.fpp" - "${_mfc_gen_inc}/simulation/generated_constants.fpp" - "${_mfc_gen_inc}/simulation/generated_bcast.fpp" - "${_mfc_gen_inc}/simulation/generated_case_opt_decls.fpp" -) -set(_mfc_gen_files_post_process - "${_mfc_gen_inc}/post_process/generated_namelist.fpp" - "${_mfc_gen_inc}/post_process/generated_decls.fpp" - "${_mfc_gen_inc}/post_process/generated_constants.fpp" - "${_mfc_gen_inc}/post_process/generated_bcast.fpp" - "${_mfc_gen_inc}/post_process/generated_case_opt_decls.fpp" -) -set(_mfc_gen_files_syscheck) -set(_mfc_all_gen_files - ${_mfc_gen_files_pre_process} - ${_mfc_gen_files_simulation} - ${_mfc_gen_files_post_process} -) - -# Build-time regeneration: ninja re-runs cmake_gen.py and re-preprocesses -# any .fpp that includes a generated file whenever a params .py changes. -add_custom_command( - OUTPUT ${_mfc_all_gen_files} - COMMAND "${Python3_EXECUTABLE}" - "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/generators/cmake_gen.py" - "${CMAKE_BINARY_DIR}" - DEPENDS ${_mfc_gen_inputs} - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - COMMENT "Regenerating Fortran parameter includes" - VERBATIM -) -add_custom_target(mfc_params_gen DEPENDS ${_mfc_all_gen_files}) +# Params codegen: configure-time bootstrap, explicit gen-file lists, +# and the build-time add_custom_command/target. +# NOTE: _mfc_gen_files_* lists must exist before HANDLE_SOURCES is called. +include(cmake/ParamsCodegen.cmake) HANDLE_SOURCES(pre_process ON) HANDLE_SOURCES(simulation ON) HANDLE_SOURCES(post_process ON) HANDLE_SOURCES(syscheck OFF) - -# MFC_SETUP_TARGET: Given a target (herein ), this macro creates a new -# executable with the appropriate sources, compiler definitions, and -# linked libraries (assuming HANDLE_SOURCES was called on ). -# -# Inputs: -# * TARGET Target name () -# * SOURCES List of source files (.f90) -# * OpenACC (optional) Can be compiled with OpenACC. -# * MPI (optional) Can be compiled with MPI. -# * SILO (optional) Should be linked with SILO. -# * HDF5 (optional) Should be linked with HDF5. -# * FFTW (optional) Should be linked with an FFTW-like library (fftw/cufftw), -# depending on whether OpenACC is enabled and which compiler is -# being used. -# * LAPACK (optional) Should be linked with LAPACK - -function(MFC_SETUP_TARGET) - cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW;LAPACK;OpenMP" "TARGET" "SOURCES" ${ARGN}) - - add_executable(${ARGS_TARGET} ${ARGS_SOURCES}) - set(IPO_TARGETS ${ARGS_TARGET}) - # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. - # A little hacky, but it *is* an edge-case for *one* compiler. - if (NVHPC_USE_TWO_PASS_IPO AND NOT(MFC_OpenMP AND ARGS_OpenMP)) - # nvfortran -Mextract does not produce .o files, only inline library - # data. An OBJECT library with -Mextract causes CMake to rebuild - # everything on every build because the expected .o outputs never - # exist. We use a wrapper script as RULE_LAUNCH_COMPILE that runs - # the compiler and then touches the expected .o output file. - set(_ipo_wrapper "${CMAKE_BINARY_DIR}/${ARGS_TARGET}_extract_wrapper.sh") - file(WRITE "${_ipo_wrapper}" [=[#!/bin/sh -# Find the -o argument (the object file CMake expects) -out= -prev= -for arg do - if [ "$prev" = "-o" ]; then out="$arg"; break; fi - prev="$arg" -done -# Run the compiler; propagate its exit status on failure -"$@" -status=$? -[ "$status" -eq 0 ] || exit "$status" -# Touch the .o so CMake's dependency tracking sees it -[ -n "$out" ] && touch "$out" -exit 0 -]=]) - file(CHMOD "${_ipo_wrapper}" PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE) - add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) - set_target_properties(${ARGS_TARGET}_lib PROPERTIES - RULE_LAUNCH_COMPILE "${_ipo_wrapper}") - target_compile_options(${ARGS_TARGET}_lib PRIVATE - $<$:-Mextract=lib:${ARGS_TARGET}_lib> - $<$:-Minline> - ) - add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) - target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib,except:f_is_default,except:s_compute_dt,except:my_inquire,except:s_mpi_abort,except:s_mpi_barrier,except:s_prohibit_abort,except:s_int_to_str,except:s_associate_cbc_coefficients_pointers) - - # Exclude m_start_up and m_cbc from cross-file inlining: these are - # initialization/boundary code that trigger NVHPC 25.x fort2 ICE when - # too many functions are cross-inlined into them. GPU hot-path files - # (m_rhs, m_riemann_solvers, m_viscous, m_weno, etc.) keep full IPO. - foreach(_no_inline_file m_start_up m_cbc) - set_source_files_properties( - "${CMAKE_BINARY_DIR}/fypp/${ARGS_TARGET}/${_no_inline_file}.fpp.f90" - TARGET_DIRECTORY ${ARGS_TARGET} - PROPERTIES COMPILE_OPTIONS "-Mnoinline" - ) - endforeach() - - list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) - endif() - - foreach (a_target ${IPO_TARGETS}) - set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - message(STATUS ${CMAKE_Fortran_COMPILER_ID}) - - target_include_directories(${a_target} PRIVATE - "${CMAKE_SOURCE_DIR}/src/common" - "${CMAKE_SOURCE_DIR}/src/common/include" - "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") - - if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") - target_include_directories(${a_target} PRIVATE - "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") - endif() - - string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER) - target_compile_definitions( - ${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID} - MFC_${${ARGS_TARGET}_UPPER} - ) - - if (MFC_MPI AND ARGS_MPI) - find_package(MPI COMPONENTS Fortran REQUIRED) - - target_compile_definitions(${a_target} PRIVATE MFC_MPI) - if(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang" AND - DEFINED ENV{CRAY_MPICH_INC} AND NOT "$ENV{CRAY_MPICH_INC}" STREQUAL "") - target_compile_options(${a_target} PRIVATE "$ENV{CRAY_MPICH_INC}") - target_link_libraries(${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) - else() - target_link_libraries(${a_target} PRIVATE MPI::MPI_Fortran) - endif() - endif() - - if (ARGS_SILO) - find_package(SILO REQUIRED) - target_link_libraries(${a_target} PRIVATE SILO::SILO stdc++) - endif() - - if (ARGS_HDF5) - find_package(HDF5 REQUIRED) - target_link_libraries(${a_target} PRIVATE HDF5::HDF5) - endif() - - if (ARGS_FFTW) - if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) - if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(CUDAToolkit REQUIRED) - target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - if(DEFINED ENV{CRAY_HIPFORT_LIB} AND NOT "$ENV{CRAY_HIPFORT_LIB}" STREQUAL "") - target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_LIB}) - else() - find_library(HIPFFT_LIB hipfft - HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) - target_link_libraries(${a_target} PRIVATE ${HIPFFT_LIB}) - endif() - else() - find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE hipfort::hipfft) - endif() - else() - find_package(FFTW REQUIRED) - target_link_libraries(${a_target} PRIVATE FFTW::FFTW) - endif() - endif() - - if (ARGS_LAPACK) - find_package(LAPACK REQUIRED) - target_link_libraries(${a_target} PRIVATE LAPACK::LAPACK) - endif() - - if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) - if ((MFC_OpenACC AND ARGS_OpenACC)) - find_package(OpenACC) - - # This should be equivalent to if (NOT OpenACC_FC_FOUND) - if (NOT TARGET OpenACC::OpenACC_Fortran) - message(FATAL_ERROR "OpenACC + Fortran is unsupported.") - endif() - - target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran) - target_compile_definitions(${a_target} PRIVATE MFC_OpenACC MFC_GPU) - elseif((MFC_OpenMP AND ARGS_OpenMP)) - find_package(OpenMP) - - # This should be equivalent to if (NOT OpenACC_FC_FOUND) - if (NOT TARGET OpenMP::OpenMP_Fortran) - message(FATAL_ERROR "OpenMP + Fortran is unsupported.") - endif() - set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) - # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) - target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) - - if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - target_compile_options(${a_target} PRIVATE "-mp=gpu" "-Minfo=mp") - target_link_options(${a_target} PRIVATE "-mp=gpu") - set_target_properties(${a_target} PROPERTIES Fortran_FLAGS "-mp=gpu -gpu=ccall") - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") - target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) - target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - target_compile_options(${a_target} PRIVATE -fopenmp) - target_link_options(${a_target} PRIVATE -fopenmp) - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS}) - endif() - endif() - - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - # FIXME: This should work with other cards than gfx90a ones. - target_compile_options(${a_target} PRIVATE - "-foffload=amdgcn-amdhsa='-march=gfx90a'" - "-foffload-options=-lgfortran\ -lm" - "-fno-exceptions") - if (MFC_Fastmath) - message(WARNING "--fastmath has no effect with the GNU compiler") - endif() - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - foreach (cc ${MFC_CUDA_CC}) - target_compile_options(${a_target} - PRIVATE -gpu=cc${cc} - ) - endforeach() - - target_compile_options(${a_target} - PRIVATE -gpu=keep,ptxinfo,lineinfo - ) - - if (MFC_Fastmath) - target_compile_options(${a_target} - PRIVATE -gpu=fastmath - ) - endif() - - # GH-200 Unified Memory Support - if (MFC_Unified) - target_compile_options(${ARGS_TARGET} - PRIVATE -gpu=mem:unified:managedalloc -cuda - ) - # "This option must appear in both the compile and link lines" -- NVHPC Docs - target_link_options(${ARGS_TARGET} - PRIVATE -gpu=mem:unified:managedalloc -cuda - ) - endif() - - if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") - target_compile_options(${a_target} - PRIVATE -gpu=debug - ) - endif() - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - # Frontier Unified Memory Support - if (MFC_Unified) - target_compile_options(${ARGS_TARGET} - PRIVATE -DFRONTIER_UNIFIED) - endif() - - if (MFC_Fastmath) - message(WARNING "--fastmath has no effect with the CCE") - endif() - - find_package(hipfort COMPONENTS hip CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn) - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - - if (MFC_Unified) - target_compile_options(${ARGS_TARGET} - PRIVATE -DFRONTIER_UNIFIED) - endif() - - find_library(HIP_LIB amdhip64 - HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) - find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn - HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) - target_include_directories(${a_target} PRIVATE - "$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn") - target_link_libraries(${a_target} PRIVATE - ${HIP_LIB} ${HIPFORT_AMDGCN_LIB}) - - endif() - elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") - if (MFC_Fastmath) - message(WARNING "--fastmath has no effect with the CCE") - endif() - endif() - - if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(CUDAToolkit REQUIRED) - if (TARGET CUDA::nvToolsExt) # CUDA <= 12.8 - target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt) - else() # CUDA >= 12.9 - target_link_libraries(${a_target} PRIVATE nvhpcwrapnvtx) - target_link_options(${a_target} PRIVATE "-cudalib=nvtx3") - endif() - endif() - endforeach() - - install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin) -endfunction() - +# MFC_SETUP_TARGET function definition and per-target instantiation. +include(cmake/MFCTargets.cmake) if (MFC_PRE_PROCESS) MFC_SETUP_TARGET(TARGET pre_process @@ -1138,4 +456,4 @@ site_name(SITE_NAME) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/cmake/configuration.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/configuration.txt") \ No newline at end of file + "${CMAKE_CURRENT_BINARY_DIR}/configuration.txt") diff --git a/cmake/Fypp.cmake b/cmake/Fypp.cmake new file mode 100644 index 0000000000..9519268b87 --- /dev/null +++ b/cmake/Fypp.cmake @@ -0,0 +1,135 @@ +# cmake/Fypp.cmake — FYPP_EXE discovery and HANDLE_SOURCES macro + +# Fypp is required to build MFC. We try to locate it. The path to the binary is +# returned in FYPP_EXE upon success. This is used later. + +find_program(FYPP_EXE fypp REQUIRED) + + +# HANDLE_SOURCES: Given a target (herein ): +# +# * Locate all source files for of the type +# +# src/[,common]/[.,include]/*.[f90,fpp]. +# +# * For each .fpp file found with filepath /.fpp, using a +# custom command, instruct CMake how to generate a file with path +# +# src//fypp/.f90 +# +# by running Fypp on /.fpp. It is important to understand +# that this does not actually run the pre-processor. Rather, it instructs +# CMake what to do when it finds a src//fypp/.f90 path +# in the source list for a target. Thus, an association is made from an .f90 +# file to its corresponding .fpp file (if applicable) even though the +# generation is of the form .fpp -> .f90. +# +# This design has one limitation: If an .fpp file depends on another, for +# example if it '#:include's it and uses a macro defined in it, then the +# dependency will not be tracked. A modification to the .fpp file it depends +# on will not trigger a re-run of Fypp on the .fpp file that depends on it. +# As a compromise, both in speed and complexity, all .f90 files generated +# from .fpp files are re-generated not only when their corresponding .fpp +# file is modified, but also when any file with filepath of the form +# +# src/[,common]/include/*.fpp +# +# is modified. This is a reasonable compromise as modifications to .fpp files +# in the include directories will be rare - by design. Other approaches would +# have required a more complex CMakeLists.txt file (perhaps parsing the .fpp +# files to determine their dependencies) or the endurment of longer +# compilation times (by way of re-running Fypp on all .fpp files every time +# one of them is modified). +# +# .fpp files in src/common are treated as if they were in src/ (not +# pre-processed to src/common/fypp/) so as not to clash with other targets' +# .fpp files (this has caused problems in the past). +# +# * Export, in the variable _SRCs, a list of all source files (.f90) +# that would compile to produce . If includes .fpp files, +# then the list will include the paths to the corresponding .f90 files that +# Fypp would generate from the .fpp files. +# +# This design allows us to be flexible in our use of Fypp as we don't have to +# worry about running the pre-processor on .fpp files when we create executables +# and generate documentation. Instead, we can simply include the list of .f90 +# files that will eventually be used to compile . + +macro(HANDLE_SOURCES target useCommon) + + set(${target}_DIR "${CMAKE_SOURCE_DIR}/src/${target}") + set(common_DIR "${CMAKE_SOURCE_DIR}/src/common") + + string(TOUPPER ${target} ${target}_UPPER) + + # Gather: + # * src/[,(common)]/*.f90 + # * (if any) /modules//*.f90 + file(GLOB ${target}_F90s CONFIGURE_DEPENDS "${${target}_DIR}/*.f90" + "${CMAKE_BINARY_DIR}/modules/${target}/*.f90") + set(${target}_SRCs ${${target}_F90s}) + if (${useCommon}) + file(GLOB common_F90s CONFIGURE_DEPENDS "${common_DIR}/*.f90") + list(APPEND ${target}_SRCs ${common_F90s}) + endif() + + # Gather: + # * src/[,(common)]/*.fpp] + # * (if any) /modules//*.fpp + file(GLOB ${target}_FPPs CONFIGURE_DEPENDS "${${target}_DIR}/*.fpp" + "${CMAKE_BINARY_DIR}/modules/${target}/*.fpp") + if (${useCommon}) + file(GLOB common_FPPs CONFIGURE_DEPENDS "${common_DIR}/*.fpp") + + # If we're building post_process, exclude m_compute_levelset.fpp + if("${target}" STREQUAL "post_process") + list(FILTER common_FPPs EXCLUDE REGEX ".*/m_compute_levelset\.fpp$") + list(FILTER common_FPPs EXCLUDE REGEX ".*/m_ib_patches\.fpp$") + endif() + + list(APPEND ${target}_FPPs ${common_FPPs}) + endif() + + # Gather: + # * src/[,common]/include/*.fpp + # * generated includes from build/include// (explicit list, not a GLOB) + file(GLOB ${target}_incs CONFIGURE_DEPENDS "${${target}_DIR}/include/*.fpp") + list(APPEND ${target}_incs ${_mfc_gen_files_${target}}) + + if (${useCommon}) + file(GLOB common_incs CONFIGURE_DEPENDS "${common_DIR}/include/*.fpp") + list(APPEND ${target}_incs ${common_incs}) + endif() + + # /path/to/*.fpp (used by ) -> /fypp//*.f90 + file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/fypp/${target}") + foreach(fpp ${${target}_FPPs}) + cmake_path(GET fpp FILENAME fpp_filename) + set(f90 "${CMAKE_BINARY_DIR}/fypp/${target}/${fpp_filename}.f90") + + add_custom_command( + OUTPUT ${f90} + COMMAND ${FYPP_EXE} -m re + -I "${CMAKE_BINARY_DIR}/include/${target}" + -I "${${target}_DIR}/include" + -I "${common_DIR}/include" + -I "${common_DIR}" + -D MFC_${CMAKE_Fortran_COMPILER_ID} + -D MFC_${${target}_UPPER} + -D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}" + -D MFC_CASE_OPTIMIZATION=False + -D chemistry=False + --line-numbering + --no-folding + --line-length=999 + --line-numbering-mode=nocontlines + ${FYPP_GCOV_OPTS} + "${fpp}" "${f90}" + DEPENDS "${fpp};${${target}_incs}" + COMMENT "Preprocessing (Fypp) ${fpp_filename}" + VERBATIM + ) + + list(APPEND ${target}_SRCs ${f90}) + endforeach() +endmacro() diff --git a/cmake/GPU.cmake b/cmake/GPU.cmake new file mode 100644 index 0000000000..eb261d1521 --- /dev/null +++ b/cmake/GPU.cmake @@ -0,0 +1,205 @@ +# cmake/GPU.cmake — compiler flags and GPU configuration + +set(FYPP_GCOV_OPTS "") + +if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + add_compile_options( + $<$:-ffree-line-length-none> + ) + + if (MFC_GCov) + + # Warning present in gcc versions >= 12 that is treated as an error + # This flag doesn't exist in gcc versions < 12 + if (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 12) + add_compile_options( + -Wno-error=coverage-invalid-line-number + ) + endif() + + add_compile_options( + $<$:-fprofile-arcs> + $<$:-ftest-coverage> + ) + + add_link_options( + $<$:-lgcov> + $<$:--coverage> + ) + + # Override Release -O3 with -O1 for gcov: coverage instrumentation is + # inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on + # Granite Rapids) can emit instructions that older assemblers reject. + set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE) + + # Use gfortran5 line markers so gcov can map coverage to .fpp sources. + set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5") + endif() + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options( + -Wall + -Wextra + -fcheck=all,no-array-temps + -fbacktrace + -fimplicit-none + -fsignaling-nans + -finit-real=snan + -finit-integer=-99999999 + -Wconversion + -Wintrinsic-shadow + -Wunderflow + -Wrealloc-lhs + -Wsurprising + ) + elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_options( + -Og + -Wall + -Wextra + -fcheck=bounds,pointer + -fbacktrace + -fimplicit-none + -fsignaling-nans + -finit-real=snan + -finit-integer=-99999999 + -Wconversion + -Wintrinsic-shadow + -Wunderflow + -Wrealloc-lhs + -Wsurprising + ) + endif() + + if (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 10) + add_compile_options( + $<$:-fallow-invalid-boz> + $<$:-fallow-argument-mismatch> + ) + endif() +elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + add_compile_options( + "SHELL:-M 296,878,1391,1069,990,5025,7208,7212,7242" + "SHELL:-h static" "SHELL:-h keepfiles" + "SHELL:-h acc_model=auto_async_none" + "SHELL: -h acc_model=no_fast_addr" + "SHELL: -h list=adm" + "SHELL: -munsafe-fp-atomics" # Not unsafe for operations we do + ) + + add_link_options("SHELL:-hkeepfiles") + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options( + "SHELL:-h acc_model=auto_async_none" + "SHELL: -h acc_model=no_fast_addr" + "SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O0" + ) + add_link_options("SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O0") + elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_options( + "SHELL:-h acc_model=auto_async_none" + "SHELL: -h acc_model=no_fast_addr" + "SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O1" + ) + add_link_options("SHELL: -K trap=fp" "SHELL: -g" "SHELL: -O1") + endif() + +elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang") + add_compile_options( + $<$:-Mfreeform> + $<$:-Mpreprocess> + $<$:-fdefault-real-8> + ) + + if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_options($<$:-O1> $<$:-g>) + endif() +elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") + add_compile_options($<$:-free>) + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options(-g -Og -traceback -debug -check all) + elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_options(-g -Og -traceback -check bounds) + endif() +elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")) + add_compile_options( + $<$:-Mfreeform> + $<$:-cpp> + $<$:-Minfo=inline> + $<$:-Minfo=accel> + ) + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options( + $<$:-O0> + $<$:-C> + $<$:-g> + $<$:-traceback> + $<$:-Minform=inform> + $<$:-Mbounds> + ) + elseif (CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_options( + $<$:-O1> + $<$:-g> + $<$:-traceback> + $<$:-Mbounds> + ) + endif() + + if (DEFINED ENV{MFC_CUDA_CC}) + string(REGEX MATCHALL "[0-9]+" MFC_CUDA_CC $ENV{MFC_CUDA_CC}) + message(STATUS "Found $MFC_CUDA_CC specified. GPU code will be generated for compute capability(ies) ${MFC_CUDA_CC}.") + endif() +endif() + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + # Processor tuning: Check if we can target the host's native CPU's ISA. + # Skip for gcov builds — -march=native on newer CPUs (e.g. Granite Rapids) + # can emit instructions the system assembler doesn't support. + if (NOT MFC_GCov) + CHECK_FORTRAN_COMPILER_FLAG("-march=native" SUPPORTS_MARCH_NATIVE) + if (SUPPORTS_MARCH_NATIVE) + add_compile_options($<$:-march=native>) + # Disable AVX-512 FP16: gfortran >=12 emits vmovw instructions on + # Granite Rapids CPUs, but binutils <2.38 cannot assemble them. + # FP16 is unused in MFC's double-precision computations. + CHECK_FORTRAN_COMPILER_FLAG("-mno-avx512fp16" SUPPORTS_MNO_AVX512FP16) + if (SUPPORTS_MNO_AVX512FP16) + add_compile_options($<$:-mno-avx512fp16>) + endif() + else() + CHECK_FORTRAN_COMPILER_FLAG("-mcpu=native" SUPPORTS_MCPU_NATIVE) + if (SUPPORTS_MCPU_NATIVE) + add_compile_options($<$:-mcpu=native>) + endif() + endif() + endif() + + # Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage + # instrumentation and can trigger assembler errors on newer architectures). + if (MFC_GCov) + message(STATUS "LTO/IPO disabled for gcov build") + elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") + if (MFC_Unified) + message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory") + else() + message(STATUS "Performing IPO using -Mextract followed by -Minline") + set(NVHPC_USE_TWO_PASS_IPO TRUE) + endif() + else() + CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) + if (SUPPORTS_IPO) + message(STATUS "Enabled IPO / LTO") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(STATUS "IPO / LTO is NOT available") + endif() + endif() +endif() + +if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") + add_compile_definitions(MFC_DEBUG) +endif() diff --git a/cmake/MFCTargets.cmake b/cmake/MFCTargets.cmake new file mode 100644 index 0000000000..5242fe9fd8 --- /dev/null +++ b/cmake/MFCTargets.cmake @@ -0,0 +1,275 @@ +# cmake/MFCTargets.cmake — MFC_SETUP_TARGET macro and related + +# MFC_SETUP_TARGET: Given a target (herein ), this macro creates a new +# executable with the appropriate sources, compiler definitions, and +# linked libraries (assuming HANDLE_SOURCES was called on ). +# +# Inputs: +# * TARGET Target name () +# * SOURCES List of source files (.f90) +# * OpenACC (optional) Can be compiled with OpenACC. +# * MPI (optional) Can be compiled with MPI. +# * SILO (optional) Should be linked with SILO. +# * HDF5 (optional) Should be linked with HDF5. +# * FFTW (optional) Should be linked with an FFTW-like library (fftw/cufftw), +# depending on whether OpenACC is enabled and which compiler is +# being used. +# * LAPACK (optional) Should be linked with LAPACK + +function(MFC_SETUP_TARGET) + cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW;LAPACK;OpenMP" "TARGET" "SOURCES" ${ARGN}) + + add_executable(${ARGS_TARGET} ${ARGS_SOURCES}) + set(IPO_TARGETS ${ARGS_TARGET}) + # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. + # A little hacky, but it *is* an edge-case for *one* compiler. + if (NVHPC_USE_TWO_PASS_IPO AND NOT(MFC_OpenMP AND ARGS_OpenMP)) + # nvfortran -Mextract does not produce .o files, only inline library + # data. An OBJECT library with -Mextract causes CMake to rebuild + # everything on every build because the expected .o outputs never + # exist. We use a wrapper script as RULE_LAUNCH_COMPILE that runs + # the compiler and then touches the expected .o output file. + set(_ipo_wrapper "${CMAKE_BINARY_DIR}/${ARGS_TARGET}_extract_wrapper.sh") + file(WRITE "${_ipo_wrapper}" [=[#!/bin/sh +# Find the -o argument (the object file CMake expects) +out= +prev= +for arg do + if [ "$prev" = "-o" ]; then out="$arg"; break; fi + prev="$arg" +done +# Run the compiler; propagate its exit status on failure +"$@" +status=$? +[ "$status" -eq 0 ] || exit "$status" +# Touch the .o so CMake's dependency tracking sees it +[ -n "$out" ] && touch "$out" +exit 0 +]=]) + file(CHMOD "${_ipo_wrapper}" PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE) + add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) + set_target_properties(${ARGS_TARGET}_lib PROPERTIES + RULE_LAUNCH_COMPILE "${_ipo_wrapper}") + target_compile_options(${ARGS_TARGET}_lib PRIVATE + $<$:-Mextract=lib:${ARGS_TARGET}_lib> + $<$:-Minline> + ) + add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) + target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib,except:f_is_default,except:s_compute_dt,except:my_inquire,except:s_mpi_abort,except:s_mpi_barrier,except:s_prohibit_abort,except:s_int_to_str,except:s_associate_cbc_coefficients_pointers) + + # Exclude m_start_up and m_cbc from cross-file inlining: these are + # initialization/boundary code that trigger NVHPC 25.x fort2 ICE when + # too many functions are cross-inlined into them. GPU hot-path files + # (m_rhs, m_riemann_solvers, m_viscous, m_weno, etc.) keep full IPO. + foreach(_no_inline_file m_start_up m_cbc) + set_source_files_properties( + "${CMAKE_BINARY_DIR}/fypp/${ARGS_TARGET}/${_no_inline_file}.fpp.f90" + TARGET_DIRECTORY ${ARGS_TARGET} + PROPERTIES COMPILE_OPTIONS "-Mnoinline" + ) + endforeach() + + list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) + endif() + + foreach (a_target ${IPO_TARGETS}) + set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) + message(STATUS ${CMAKE_Fortran_COMPILER_ID}) + + target_include_directories(${a_target} PRIVATE + "${CMAKE_SOURCE_DIR}/src/common" + "${CMAKE_SOURCE_DIR}/src/common/include" + "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") + + if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") + target_include_directories(${a_target} PRIVATE + "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") + endif() + + string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER) + target_compile_definitions( + ${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID} + MFC_${${ARGS_TARGET}_UPPER} + ) + + if (MFC_MPI AND ARGS_MPI) + find_package(MPI COMPONENTS Fortran REQUIRED) + + target_compile_definitions(${a_target} PRIVATE MFC_MPI) + if(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang" AND + DEFINED ENV{CRAY_MPICH_INC} AND NOT "$ENV{CRAY_MPICH_INC}" STREQUAL "") + target_compile_options(${a_target} PRIVATE "$ENV{CRAY_MPICH_INC}") + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) + else() + target_link_libraries(${a_target} PRIVATE MPI::MPI_Fortran) + endif() + endif() + + if (ARGS_SILO) + find_package(SILO REQUIRED) + target_link_libraries(${a_target} PRIVATE SILO::SILO stdc++) + endif() + + if (ARGS_HDF5) + find_package(HDF5 REQUIRED) + target_link_libraries(${a_target} PRIVATE HDF5::HDF5) + endif() + + if (ARGS_FFTW) + if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + find_package(CUDAToolkit REQUIRED) + target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if(DEFINED ENV{CRAY_HIPFORT_LIB} AND NOT "$ENV{CRAY_HIPFORT_LIB}" STREQUAL "") + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_LIB}) + else() + find_library(HIPFFT_LIB hipfft + HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) + target_link_libraries(${a_target} PRIVATE ${HIPFFT_LIB}) + endif() + else() + find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hipfft) + endif() + else() + find_package(FFTW REQUIRED) + target_link_libraries(${a_target} PRIVATE FFTW::FFTW) + endif() + endif() + + if (ARGS_LAPACK) + find_package(LAPACK REQUIRED) + target_link_libraries(${a_target} PRIVATE LAPACK::LAPACK) + endif() + + if ((MFC_OpenACC AND ARGS_OpenACC) OR (MFC_OpenMP AND ARGS_OpenMP)) + if ((MFC_OpenACC AND ARGS_OpenACC)) + find_package(OpenACC) + + # This should be equivalent to if (NOT OpenACC_FC_FOUND) + if (NOT TARGET OpenACC::OpenACC_Fortran) + message(FATAL_ERROR "OpenACC + Fortran is unsupported.") + endif() + + target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran) + target_compile_definitions(${a_target} PRIVATE MFC_OpenACC MFC_GPU) + elseif((MFC_OpenMP AND ARGS_OpenMP)) + find_package(OpenMP) + + # This should be equivalent to if (NOT OpenACC_FC_FOUND) + if (NOT TARGET OpenMP::OpenMP_Fortran) + message(FATAL_ERROR "OpenMP + Fortran is unsupported.") + endif() + set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) + # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) + target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) + + if(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + target_compile_options(${a_target} PRIVATE "-mp=gpu" "-Minfo=mp") + target_link_options(${a_target} PRIVATE "-mp=gpu") + set_target_properties(${a_target} PROPERTIES Fortran_FLAGS "-mp=gpu -gpu=ccall") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") + target_compile_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) + target_link_options(${a_target} PRIVATE -fopenmp -fopenmp-targets=spir64) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + target_compile_options(${a_target} PRIVATE -fopenmp) + target_link_options(${a_target} PRIVATE -fopenmp) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS}) + endif() + endif() + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + # FIXME: This should work with other cards than gfx90a ones. + target_compile_options(${a_target} PRIVATE + "-foffload=amdgcn-amdhsa='-march=gfx90a'" + "-foffload-options=-lgfortran\ -lm" + "-fno-exceptions") + if (MFC_Fastmath) + message(WARNING "--fastmath has no effect with the GNU compiler") + endif() + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + foreach (cc ${MFC_CUDA_CC}) + target_compile_options(${a_target} + PRIVATE -gpu=cc${cc} + ) + endforeach() + + target_compile_options(${a_target} + PRIVATE -gpu=keep,ptxinfo,lineinfo + ) + + if (MFC_Fastmath) + target_compile_options(${a_target} + PRIVATE -gpu=fastmath + ) + endif() + + # GH-200 Unified Memory Support + if (MFC_Unified) + target_compile_options(${ARGS_TARGET} + PRIVATE -gpu=mem:unified:managedalloc -cuda + ) + # "This option must appear in both the compile and link lines" -- NVHPC Docs + target_link_options(${ARGS_TARGET} + PRIVATE -gpu=mem:unified:managedalloc -cuda + ) + endif() + + if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug") + target_compile_options(${a_target} + PRIVATE -gpu=debug + ) + endif() + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + # Frontier Unified Memory Support + if (MFC_Unified) + target_compile_options(${ARGS_TARGET} + PRIVATE -DFRONTIER_UNIFIED) + endif() + + if (MFC_Fastmath) + message(WARNING "--fastmath has no effect with the CCE") + endif() + + find_package(hipfort COMPONENTS hip CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn) + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + + if (MFC_Unified) + target_compile_options(${ARGS_TARGET} + PRIVATE -DFRONTIER_UNIFIED) + endif() + + find_library(HIP_LIB amdhip64 + HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) + find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn + HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) + target_include_directories(${a_target} PRIVATE + "$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn") + target_link_libraries(${a_target} PRIVATE + ${HIP_LIB} ${HIPFORT_AMDGCN_LIB}) + + endif() + elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") + if (MFC_Fastmath) + message(WARNING "--fastmath has no effect with the CCE") + endif() + endif() + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + find_package(CUDAToolkit REQUIRED) + if (TARGET CUDA::nvToolsExt) # CUDA <= 12.8 + target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt) + else() # CUDA >= 12.9 + target_link_libraries(${a_target} PRIVATE nvhpcwrapnvtx) + target_link_options(${a_target} PRIVATE "-cudalib=nvtx3") + endif() + endif() + endforeach() + + install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin) +endfunction() diff --git a/cmake/ParamsCodegen.cmake b/cmake/ParamsCodegen.cmake new file mode 100644 index 0000000000..dac02a7b48 --- /dev/null +++ b/cmake/ParamsCodegen.cmake @@ -0,0 +1,80 @@ +# cmake/ParamsCodegen.cmake — params codegen stamp, gen-file lists, and build-time custom command + +# Generate Fortran parameter namelist/decl includes into the per-target build +# include directories before HANDLE_SOURCES globs them for Fypp. +find_package(Python3 REQUIRED COMPONENTS Interpreter) +set(_mfc_gen_stamp "${CMAKE_BINARY_DIR}/mfc_params_gen.stamp") +file(GLOB_RECURSE _mfc_gen_inputs + "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/*.py" +) +set(_mfc_needs_regen FALSE) +if(NOT EXISTS "${_mfc_gen_stamp}") + set(_mfc_needs_regen TRUE) +else() + foreach(_input IN LISTS _mfc_gen_inputs) + if("${_input}" IS_NEWER_THAN "${_mfc_gen_stamp}") + set(_mfc_needs_regen TRUE) + break() + endif() + endforeach() +endif() +if(_mfc_needs_regen) + execute_process( + COMMAND "${Python3_EXECUTABLE}" + "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/generators/cmake_gen.py" + "${CMAKE_BINARY_DIR}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE _mfc_gen_result + ERROR_VARIABLE _mfc_gen_error + ) + if(NOT _mfc_gen_result EQUAL 0) + message(FATAL_ERROR "Fortran param generation failed:\n${_mfc_gen_error}") + endif() + file(TOUCH "${_mfc_gen_stamp}") +endif() + +# Enumerate the 10 generated .fpp files explicitly so ninja can track them as +# build-time outputs and so HANDLE_SOURCES does not need a configure-time GLOB +# of ${CMAKE_BINARY_DIR}/include// (which fails when the dir is empty). +set(_mfc_gen_inc "${CMAKE_BINARY_DIR}/include") +set(_mfc_gen_files_pre_process + "${_mfc_gen_inc}/pre_process/generated_namelist.fpp" + "${_mfc_gen_inc}/pre_process/generated_decls.fpp" + "${_mfc_gen_inc}/pre_process/generated_constants.fpp" + "${_mfc_gen_inc}/pre_process/generated_bcast.fpp" + "${_mfc_gen_inc}/pre_process/generated_case_opt_decls.fpp" +) +set(_mfc_gen_files_simulation + "${_mfc_gen_inc}/simulation/generated_namelist.fpp" + "${_mfc_gen_inc}/simulation/generated_decls.fpp" + "${_mfc_gen_inc}/simulation/generated_constants.fpp" + "${_mfc_gen_inc}/simulation/generated_bcast.fpp" + "${_mfc_gen_inc}/simulation/generated_case_opt_decls.fpp" +) +set(_mfc_gen_files_post_process + "${_mfc_gen_inc}/post_process/generated_namelist.fpp" + "${_mfc_gen_inc}/post_process/generated_decls.fpp" + "${_mfc_gen_inc}/post_process/generated_constants.fpp" + "${_mfc_gen_inc}/post_process/generated_bcast.fpp" + "${_mfc_gen_inc}/post_process/generated_case_opt_decls.fpp" +) +set(_mfc_gen_files_syscheck) +set(_mfc_all_gen_files + ${_mfc_gen_files_pre_process} + ${_mfc_gen_files_simulation} + ${_mfc_gen_files_post_process} +) + +# Build-time regeneration: ninja re-runs cmake_gen.py and re-preprocesses +# any .fpp that includes a generated file whenever a params .py changes. +add_custom_command( + OUTPUT ${_mfc_all_gen_files} + COMMAND "${Python3_EXECUTABLE}" + "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/generators/cmake_gen.py" + "${CMAKE_BINARY_DIR}" + DEPENDS ${_mfc_gen_inputs} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + COMMENT "Regenerating Fortran parameter includes" + VERBATIM +) +add_custom_target(mfc_params_gen DEPENDS ${_mfc_all_gen_files}) From b6a067233176303f5dc43a1946ff82143bc81813 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 17:01:49 -0400 Subject: [PATCH 10/22] build: make the ninja custom command the sole generator of params includes Delete the configure-time execute_process + stamp block from cmake/ParamsCodegen.cmake (29 lines removed). The build-time add_custom_command is now the sole mechanism that writes the 15 generated_*.fpp includes. Cold-start proof (scratch build dir, no pre-existing includes): - cmake configure succeeds with include/ absent - make -n shows cmake_gen.py scheduled before all fypp steps - make mfc_params_gen produces all 15 files across 3 target dirs Incremental proof: touch toolchain/mfc/params/definitions.py -> make -n schedules regeneration without reconfigure. cmake_gen.py already calls path.parent.mkdir(parents=True,exist_ok=True) so no file(MAKE_DIRECTORY) guards were added. find_package(Python3) is kept in ParamsCodegen.cmake (the CMakeLists.txt one is in a docs block that runs later). Full build+test rides the next gate. --- CMakeLists.txt | 5 +++-- cmake/ParamsCodegen.cmake | 32 ++++---------------------------- 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa1bc0a4df..8f4a96c943 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,8 +126,9 @@ include(cmake/GPU.cmake) # Fypp discovery and HANDLE_SOURCES macro definition. include(cmake/Fypp.cmake) -# Params codegen: configure-time bootstrap, explicit gen-file lists, -# and the build-time add_custom_command/target. +# Params codegen: explicit gen-file lists and the build-time +# add_custom_command/target. The custom command is the SOLE generator; +# no configure-time execute_process bootstrap. # NOTE: _mfc_gen_files_* lists must exist before HANDLE_SOURCES is called. include(cmake/ParamsCodegen.cmake) diff --git a/cmake/ParamsCodegen.cmake b/cmake/ParamsCodegen.cmake index dac02a7b48..98d9362d49 100644 --- a/cmake/ParamsCodegen.cmake +++ b/cmake/ParamsCodegen.cmake @@ -1,39 +1,15 @@ -# cmake/ParamsCodegen.cmake — params codegen stamp, gen-file lists, and build-time custom command +# cmake/ParamsCodegen.cmake — params codegen gen-file lists and build-time custom command # Generate Fortran parameter namelist/decl includes into the per-target build # include directories before HANDLE_SOURCES globs them for Fypp. +# cmake_gen.py creates the include dirs itself (path.parent.mkdir), so no +# file(MAKE_DIRECTORY ...) is needed here. find_package(Python3 REQUIRED COMPONENTS Interpreter) -set(_mfc_gen_stamp "${CMAKE_BINARY_DIR}/mfc_params_gen.stamp") file(GLOB_RECURSE _mfc_gen_inputs "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/*.py" ) -set(_mfc_needs_regen FALSE) -if(NOT EXISTS "${_mfc_gen_stamp}") - set(_mfc_needs_regen TRUE) -else() - foreach(_input IN LISTS _mfc_gen_inputs) - if("${_input}" IS_NEWER_THAN "${_mfc_gen_stamp}") - set(_mfc_needs_regen TRUE) - break() - endif() - endforeach() -endif() -if(_mfc_needs_regen) - execute_process( - COMMAND "${Python3_EXECUTABLE}" - "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/mfc/params/generators/cmake_gen.py" - "${CMAKE_BINARY_DIR}" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - RESULT_VARIABLE _mfc_gen_result - ERROR_VARIABLE _mfc_gen_error - ) - if(NOT _mfc_gen_result EQUAL 0) - message(FATAL_ERROR "Fortran param generation failed:\n${_mfc_gen_error}") - endif() - file(TOUCH "${_mfc_gen_stamp}") -endif() -# Enumerate the 10 generated .fpp files explicitly so ninja can track them as +# Enumerate the 15 generated .fpp files explicitly so ninja can track them as # build-time outputs and so HANDLE_SOURCES does not need a configure-time GLOB # of ${CMAKE_BINARY_DIR}/include// (which fails when the dir is empty). set(_mfc_gen_inc "${CMAKE_BINARY_DIR}/include") From 50b24e63f145f42b2b87be06641b8c583c0a2db2 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 17:09:54 -0400 Subject: [PATCH 11/22] build: prefix config-hash directories with a readable slug --- toolchain/mfc/build.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 01efb1a9b1..60cfc1e3ea 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -310,7 +310,34 @@ def get_slug(self, case: Case) -> str: if case.params.get("chemistry", "F") == "T": m.update(case.get_cantera_solution().name.encode()) - return m.hexdigest()[:10] + cfg = CFG() + if cfg.gpu == gpuConfigOptions.ACC.value: + prefix = "gpu-acc" + elif cfg.gpu == gpuConfigOptions.MP.value: + prefix = "gpu-mp" + else: + prefix = "cpu" + + if cfg.debug: + prefix += "-debug" + if cfg.reldebug: + prefix += "-reldebug" + if cfg.gcov: + prefix += "-gcov" + if cfg.fastmath: + prefix += "-fastmath" + if cfg.single: + prefix += "-single" + if cfg.mixed: + prefix += "-mixed" + if cfg.unified: + prefix += "-unified" + if not cfg.mpi: + prefix += "-nompi" + if case.params.get("chemistry", "F") == "T": + prefix += "-chem" + + return f"{prefix}-{m.hexdigest()[:10]}" # Get path to directory that will store the build files def get_staging_dirpath(self, case: Case) -> str: From 5e252324f8cc3b6f41f0329933528dc8ade6318e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 17:18:29 -0400 Subject: [PATCH 12/22] docs: document the build-system layer contract --- docs/documentation/contributing.md | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/documentation/contributing.md b/docs/documentation/contributing.md index 6898804239..af6efe3aae 100644 --- a/docs/documentation/contributing.md +++ b/docs/documentation/contributing.md @@ -78,6 +78,43 @@ Key data structures (defined in `src/common/m_derived_types.fpp`): See @ref parameters for the full list of ~3,400 simulation parameters. See @ref case_constraints for feature compatibility and example configurations. +### How the Build Fits Together + +The build pipeline has four layers. Each layer has a single responsibility; the handoff +between them is narrow. + +``` +mfc.sh (env bootstrap, venv, module loading, lock) + └─ toolchain/mfc/build.py (config slugs, cmake invocation) + └─ CMakeLists.txt + cmake/{GPU,Fypp,ParamsCodegen,MFCTargets}.cmake + └─ toolchain/mfc/params/generators/cmake_gen.py (writes 15 generated .fpp includes) +``` + +**`mfc.sh` → `build.py`.** `mfc.sh` is a thin shell wrapper that activates the Python +virtual environment, loads HPC modules, and delegates to `build.py`. `build.py` calls +`get_slug` to compute a human-readable variant identifier (`-<10-hex>`, e.g. +`gpu-acc-chem-f93aa400b9`) that encodes every build-affecting flag: GPU backend, precision +mode, debug, chemistry, MPI. Staging and install trees are namespaced by slug under +`build/staging/` and `build/install/`, so multiple variants coexist without interfering. + +**CMake layer.** `cmake/Fypp.cmake` defines `HANDLE_SOURCES`, which sets up one +`add_custom_command` per `.fpp` file to run Fypp at build time. `cmake/ParamsCodegen.cmake` +registers a single ninja-tracked `add_custom_command` (DEPENDS all `params/*.py`) that +invokes `cmake_gen.py` and writes the 15 generated includes under +`build/include//`. There is no configure-time generation: all 15 files are build +outputs, so changing any `params/*.py` triggers only a targeted rebuild, not a full +reconfigure. + +**Fypp and per-target stubs.** Fypp resolves `#:include` at parse time, so every `.fpp` +file sees exactly the include path for the target being compiled. `src/common/` is +compiled once per executable with the `MFC_` preprocessor define +(`MFC_PRE_PROCESS`, `MFC_SIMULATION`, or `MFC_POST_PROCESS`) — this is intentional. +It is what lets common modules include per-target generated files and gate +simulation-only code with `#ifdef MFC_SIMULATION` without duplication. + +For which of the 15 files is manual vs. generated and what each contains, see the +"How to Add a New Simulation Parameter" section below. + ## Development Workflow | Step | Command / Action | From 4b4ea36197ff1ab5540e9feb8b19ff80430a1337 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 17:29:49 -0400 Subject: [PATCH 13/22] docs: align generation-timing wording with the build-time mechanism --- .claude/rules/common-pitfalls.md | 11 +++++------ docs/documentation/contributing.md | 2 +- src/common/m_global_parameters_common.fpp | 2 +- src/post_process/m_mpi_proxy.fpp | 2 +- toolchain/mfc/params/generators/cmake_gen.py | 2 +- toolchain/mfc/params/generators/fortran_gen.py | 1 + 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.claude/rules/common-pitfalls.md b/.claude/rules/common-pitfalls.md index 463d486870..b88f5be460 100644 --- a/.claude/rules/common-pitfalls.md +++ b/.claude/rules/common-pitfalls.md @@ -42,7 +42,7 @@ covered in `docs/documentation/contributing.md`. - Adding one: `_r()` definition + `_nv()` `NAMELIST_VARS` registration in `toolchain/mfc/params/definitions.py`; `case_validator.py` only if physics-constrained (with a `PHYSICS_DOCS` entry). Fortran declarations and namelist bindings are - auto-generated at CMake configure time — re-run cmake (or `./mfc.sh build`) after editing. + auto-generated at build time (ninja-tracked custom command) — re-run cmake (or `./mfc.sh build`) after editing. - Still manual: derived-type `TYPE` member definitions in `src/common/m_derived_types.fpp`; default-value assignments in `s_assign_default_values_to_user_inputs`; the `CASE_OPT_EXTRA_LINES` literal in `toolchain/mfc/params/generators/fortran_gen.py` (covers `num_dims`, @@ -54,11 +54,10 @@ covered in `docs/documentation/contributing.md`. table in `definitions.py`), derived-type namelist declarations including `GPU_DECLARE` lines and Doxygen descs (`TYPED_DECLS` table in `definitions.py`), the simulation case-optimization declaration block, and the per-target MPI broadcast lists for all - namelist-registry scalars (`generated_bcast.fpp`) — is auto-generated at CMake configure - time. - Gotcha: after editing a generator or table, force regen via `cmake_gen.py` into - `build/staging/*/` or simply reconfigure (`./mfc.sh build`) — cached builds compile - stale includes. Under `--case-optimization` the baked-in constants are dropped from the + namelist-registry scalars (`generated_bcast.fpp`) — is regenerated at build time by a + ninja-tracked custom command (editing `params/*.py` triggers regeneration automatically). + Gotcha: ADDING a new file under `toolchain/mfc/params/` needs one reconfigure + (the custom command's DEPENDS list is globbed at configure time). Under `--case-optimization` the baked-in constants are dropped from the namelist, so changing one needs a *rebuild*, not a case edit. - Shared-state pattern: namelist declarations (`#:include 'generated_decls.fpp'`), the `eqn_idx`/`sys_size`/`b_size`/`tensor_size` state variables, and the common defaults diff --git a/docs/documentation/contributing.md b/docs/documentation/contributing.md index af6efe3aae..d3ae52c30c 100644 --- a/docs/documentation/contributing.md +++ b/docs/documentation/contributing.md @@ -316,7 +316,7 @@ If your check enforces a physics constraint, also add a `PHYSICS_DOCS` entry (se **Step 5: Fortran declaration and namelist binding (auto-generated)** Scalar declarations, GPU declare lines, Doxygen descriptions, and namelist bindings are -auto-generated at CMake configure time from the `TYPED_DECLS` and `FORTRAN_ARRAY_DIMS` +auto-generated at build time (ninja-tracked custom command) from the `TYPED_DECLS` and `FORTRAN_ARRAY_DIMS` tables in `toolchain/mfc/params/definitions.py`. For a plain scalar registered with `_r()` / `_nv()` above, no manual Fortran edit is needed — reconfigure (`./mfc.sh build`) and the generated include in each target's `m_global_parameters.fpp` is updated diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index cac5aea936..74b0e14883 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -20,7 +20,7 @@ module m_global_parameters_common implicit none - ! All namelist-bound scalar and array declarations (per-target, auto-generated at CMake configure time) + ! All namelist-bound scalar and array declarations (per-target, regenerated at build time by the ninja custom command) #:include 'generated_decls.fpp' ! Case-optimization declarations: parameters (under MFC_CASE_OPTIMIZATION) or plain variables for diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 711aff1904..d9beac5347 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -67,7 +67,7 @@ contains integer :: ierr !< Generic flag used to identify and report MPI errors ! Generated: case_dir, namelist scalars (INT/LOG/REAL), array dims (including - ! chem_wrt_Y - latent bug fix), fluid_pp loop, bub_pp guard + ! chem_wrt_Y, previously missing), fluid_pp loop, bub_pp guard #:include 'generated_bcast.fpp' ! manual: m_glb, n_glb, p_glb (computed in s_read_input_file, not namelist-bound) diff --git a/toolchain/mfc/params/generators/cmake_gen.py b/toolchain/mfc/params/generators/cmake_gen.py index c71a8e5c23..8a92818ebb 100644 --- a/toolchain/mfc/params/generators/cmake_gen.py +++ b/toolchain/mfc/params/generators/cmake_gen.py @@ -1,6 +1,6 @@ """Generate Fortran parameter .fpp files into the CMake build directory. -Called by CMakeLists.txt at configure time: +Invoked by the build-time custom command in cmake/ParamsCodegen.cmake: python3 cmake_gen.py """ diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 6ad861d33c..a1683e78b6 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -244,6 +244,7 @@ def generate_case_opt_decls_fpp() -> str: # TYPED_DECLS entries kept entirely in the manual residue: their member-broadcast structure # is irregular (non-uniform subsets, size() arrays, nested loops, complex guards). + def _mpi_type_for(ptype: ParamType) -> str: """Return the Fortran MPI type constant for a ParamType.""" if ptype in _REAL_TYPES: From 6ce3ea5b8af3f3d7c667f0aeb049bd2c10e0dae0 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 21:29:20 -0400 Subject: [PATCH 14/22] codegen: broadcast the Herschel-Bulkley fluid_pp members The hardcoded fluid_pp emitter predated the #1545 merge and silently dropped K/nn/tau0/hb_m/mu_min/mu_max/mu_bulk and non_newtonian from the generated lists - a multi-rank regression for non-Newtonian runs relative to the manual lists it replaced. Emitted sets now verified member-identical to master's manual lists per target. Caught by automated PR review. --- toolchain/mfc/params/generators/fortran_gen.py | 12 ++++++------ toolchain/mfc/params_tests/test_fortran_gen.py | 4 ++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index a1683e78b6..64c003925f 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -318,16 +318,16 @@ def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) -> def _emit_fluid_pp(lines: List[str], target: str) -> None: """Emit the fluid_pp(i) member-loop broadcast block. - Members broadcast: gamma, pi_inf, G, cv, qv, qvp (all REAL, mpi_p). - Sim additionally: Re(1) with count=2. + Members broadcast: the 13 REAL members (EOS core plus the Herschel-Bulkley + non-Newtonian set) and the LOGICAL non_newtonian flag, matching the manual + lists this generator replaces. Sim additionally: Re(1) with count=2. """ - # These are the core EOS members broadcast in all three targets. - # Other fluid_pp members in the registry (mu_v, pv, mul0, ss, D_v, etc.) are - # physics-specific and are initialised from input on rank 0 only or derived. - fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp"] + fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp", + "K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"] lines.append(" do i = 1, num_fluids_max") for mem in fp_real_members: lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") + lines.append(" call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)") if target == "sim": lines.append(" call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)") lines.append(" end do") diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py index 2f74251863..e4fe751f7c 100644 --- a/toolchain/mfc/params_tests/test_fortran_gen.py +++ b/toolchain/mfc/params_tests/test_fortran_gen.py @@ -414,6 +414,10 @@ def test_generate_bcast_fpp_fluid_pp_loop(): assert "do i = 1, num_fluids_max" in out, f"{target}: fluid_pp loop missing" assert "call MPI_BCAST(fluid_pp(i)%gamma, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out assert "call MPI_BCAST(fluid_pp(i)%cv, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out + # Herschel-Bulkley members (#1545) — dropping any is a multi-rank regression + for hb in ("K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"): + assert f"call MPI_BCAST(fluid_pp(i)%{hb}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out, f"missing HB member {hb}" + assert "call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)" in out sim = generate_bcast_fpp("sim") assert "call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)" in sim From 3b6f432677f4d637e55c77f2ffc62fee2fa37e0c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 21:39:17 -0400 Subject: [PATCH 15/22] src: restore the interface-compression section heading in m_constants The named-constant retirement deleted the WENO/MUSCL block between two section headings and merged them into one nonsensical comment. Flagged by automated PR review on #1552. --- src/common/m_constants.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 18d4553c1c..eb49d774d2 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -50,7 +50,7 @@ module m_constants real(wp), parameter :: broadband_spectral_level_constant = 20._wp !> The spectral level constant to correct the magnitude at each frequency to ensure the source is overall broadband real(wp), parameter :: broadband_spectral_level_growth_rate = 10._wp - ! Reconstruction Types Interface Compression + ! Interface compression (THINC) real(wp), parameter :: dflt_ic_eps = 1e-4_wp !< Ensure compression is only applied to surface cells in THINC real(wp), parameter :: dflt_ic_beta = 1.6_wp !< Sharpness parameter's default value used in THINC real(wp), parameter :: moncon_cutoff = 1e-8_wp !< Monotonicity constraint's limiter to prevent extremas in THINC From 284396401e5c3018762fded7d1a20d6b943ea4ee Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 21:52:59 -0400 Subject: [PATCH 16/22] toolchain: validate patch_ib against the namelist array bound; refresh dedup docs NIB read num_ib_patches_max (2050000) but the patch_ib namelist array is dimensioned num_ib_patches_max_namelist (54000), so validation accepted indices that overflow the array. Also refreshes three doc passages made stale by the dedup. Both flagged by review. --- .claude/rules/common-pitfalls.md | 4 ++-- docs/documentation/contributing.md | 2 +- toolchain/mfc/params/definitions.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude/rules/common-pitfalls.md b/.claude/rules/common-pitfalls.md index b88f5be460..bf3dfc53c3 100644 --- a/.claude/rules/common-pitfalls.md +++ b/.claude/rules/common-pitfalls.md @@ -14,7 +14,7 @@ covered in `docs/documentation/contributing.md`. bubbles and IB. Read that routine for the current value rather than assuming one. - Riemann solvers: left state at `j`, right state at `j+1`. - All equation indices live in the `eqn_idx` struct (`eqn_idx_info` in - `m_derived_types.fpp`, populated in `m_global_parameters.fpp`): `%cont`, `%mom`, `%E`, + `m_derived_types.fpp`, populated by `s_initialize_eqn_idx` in `m_global_parameters_common.fpp`): `%cont`, `%mom`, `%E`, `%adv`, plus optional ranges (`%bub`, `%stress`, `%species`, `%B`, ...). The old `contxb`/`momxb` shorthands are gone. Index positions depend on `model_eqns` and enabled features — changing either moves ALL indices; never hard-code one. @@ -66,7 +66,7 @@ covered in `docs/documentation/contributing.md`. `use m_global_parameters` continues to work for all downstream modules without change. Sim-only declarations (GPU_DECLARE, Re_idx allocation) stay in `m_global_parameters_common` behind `#ifdef MFC_SIMULATION`. Generated includes - (`generated_decls.fpp`, `generated_bcast.fpp`) must exist for every target — the build + (`generated_decls.fpp`, `generated_bcast.fpp`, `generated_case_opt_decls.fpp`) must exist for every target — the build emits stubs where the content is sim-only, so a common file that includes one will compile for pre/post too. - Runtime checks (`@:PROHIBIT`) go where they run: shared → diff --git a/docs/documentation/contributing.md b/docs/documentation/contributing.md index d3ae52c30c..0468e01e2f 100644 --- a/docs/documentation/contributing.md +++ b/docs/documentation/contributing.md @@ -319,7 +319,7 @@ Scalar declarations, GPU declare lines, Doxygen descriptions, and namelist bindi auto-generated at build time (ninja-tracked custom command) from the `TYPED_DECLS` and `FORTRAN_ARRAY_DIMS` tables in `toolchain/mfc/params/definitions.py`. For a plain scalar registered with `_r()` / `_nv()` above, no manual Fortran edit is needed — reconfigure (`./mfc.sh build`) -and the generated include in each target's `m_global_parameters.fpp` is updated +and the generated include in `m_global_parameters_common.fpp` (compiled per target) is updated automatically. Still manual (not auto-generated): diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index 5d0c0e55b0..1ba1ed965d 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -31,7 +31,7 @@ def _fc(name: str, default: int) -> int: NPR = _fc("num_probes_max", 10) # probe, acoustic, integral NB = _fc("num_bc_patches_max", 10) # patch_bc NUM_PATCHES_MAX = _fc("num_patches_max", 10) # patch_icpp (Fortran array bound) -NIB = _fc("num_ib_patches_max", 54000) # patch_ib (Fortran array bound) +NIB = _fc("num_ib_patches_max_namelist", 54000) # patch_ib namelist array bound NAF = _fc("num_ib_airfoils_max", 5) # ib_airfoil (Fortran array bound) NSM = _fc("num_stl_models_max", 10) # stl_models (Fortran array bound) NPB = _fc("num_particle_clouds_max", 10) # particle_cloud (Fortran array bound) From debf14b498284ce39481ece30cee98e5798b3a15 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 22:04:48 -0400 Subject: [PATCH 17/22] style: format the broadcast generator --- toolchain/mfc/params/generators/fortran_gen.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 64c003925f..d1bd45477e 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -322,8 +322,7 @@ def _emit_fluid_pp(lines: List[str], target: str) -> None: non-Newtonian set) and the LOGICAL non_newtonian flag, matching the manual lists this generator replaces. Sim additionally: Re(1) with count=2. """ - fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp", - "K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"] + fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp", "K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"] lines.append(" do i = 1, num_fluids_max") for mem in fp_real_members: lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") From eafc65247ad416d3aa8d76601119d557d8225ff7 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 23:43:06 -0400 Subject: [PATCH 18/22] src: move GPU_DECLAREs for hoisted state into m_global_parameters_common Cray ftn rejects declare-target on use-associated names (ftn-1448 on Frontier gpu-omp): the dedup hoisted these declarations into the common module but left their GPU_DECLARE lines in simulation. Declares move to the declaring module; mixed lines split so locally-declared variables keep their declares in place. CPU-preprocessed output verified identical; declare-target scoping verified clean for both files under OpenMP emission. --- src/common/m_global_parameters_common.fpp | 38 ++++++++++++++++ src/simulation/m_global_parameters.fpp | 53 +++++------------------ 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp index 74b0e14883..f436be4366 100644 --- a/src/common/m_global_parameters_common.fpp +++ b/src/common/m_global_parameters_common.fpp @@ -67,6 +67,44 @@ module m_global_parameters_common #ifdef MFC_SIMULATION $:GPU_DECLARE(create='[sys_size, eqn_idx, b_size, tensor_size]') $:GPU_DECLARE(create='[shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices]') + ! Device residency for namelist/case-opt state declared above via the generated + ! includes: declare directives must live in the declaring module (Cray ftn rejects + ! declare-target on use-associated names), so these moved here from simulation. + $:GPU_DECLARE(create='[cyl_coord]') + $:GPU_DECLARE(create='[dt, m, n, p]') + $:GPU_DECLARE(create='[cfl_target]') + $:GPU_DECLARE(create='[int_comp, ic_eps, ic_beta]') + $:GPU_DECLARE(create='[muscl_eps]') + $:GPU_DECLARE(create='[mpp_lim, model_eqns, mixture_err, alt_soundspeed]') + $:GPU_DECLARE(create='[avg_state, mp_weno, weno_eps, teno_CT, hypoelasticity]') + $:GPU_DECLARE(create='[hyperelasticity, elasticity, low_Mach]') + $:GPU_DECLARE(create='[cont_damage, hyper_cleaning]') + $:GPU_DECLARE(create='[relax, relax_model, palpha_eps, ptgalpha_eps]') + $:GPU_DECLARE(create='[down_sample]') + $:GPU_DECLARE(create='[fd_order]') + $:GPU_DECLARE(create='[rhoref, pref]') + $:GPU_DECLARE(create='[ib, num_ibs]') + $:GPU_DECLARE(create='[ib_coefficient_of_friction]') + $:GPU_DECLARE(create='[Ca, Web, Re_inv]') + $:GPU_DECLARE(create='[bubbles_euler, polytropic, polydisperse]') + $:GPU_DECLARE(create='[adv_n, adap_dt, adap_dt_tol, adap_dt_max_iters]') + $:GPU_DECLARE(create='[bubble_model, thermal]') + $:GPU_DECLARE(create='[poly_sigma]') + $:GPU_DECLARE(create='[qbmm, pi_fac]') + $:GPU_DECLARE(create='[R0ref]') + $:GPU_DECLARE(create='[acoustic_source, num_source]') + $:GPU_DECLARE(create='[sigma, surface_tension]') + $:GPU_DECLARE(create='[bubbles_lagrange]') + $:GPU_DECLARE(create='[Bx0]') + $:GPU_DECLARE(create='[tau_star, cont_damage_s, alpha_bar]') + $:GPU_DECLARE(create='[hyper_cleaning_speed, hyper_cleaning_tau]') + #:if not MFC_CASE_OPTIMIZATION + $:GPU_DECLARE(create='[num_dims, num_vels, weno_polyn, weno_order]') + $:GPU_DECLARE(create='[weno_num_stencils, num_fluids, wenojs]') + $:GPU_DECLARE(create='[mapped_weno, wenoz, teno, wenoz_q, mhd, relativity]') + $:GPU_DECLARE(create='[igr_iter_solver, igr_order, viscous, igr_pres_lim, igr]') + $:GPU_DECLARE(create='[recon_type, muscl_order, muscl_polyn, muscl_lim]') + #:endif #endif !> @name Processor coordinates and parallel-IO addressing (identical declaration across all three targets) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 350bc07d0c..7a22a67afb 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -42,7 +42,7 @@ module m_global_parameters !> @{ integer :: grid_geometry !> @} - $:GPU_DECLARE(create='[cyl_coord, grid_geometry]') + $:GPU_DECLARE(create='[grid_geometry]') !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ @@ -59,14 +59,11 @@ module m_global_parameters real(wp), target, allocatable, dimension(:) :: dx, dy, dz !> @} - $:GPU_DECLARE(create='[x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p]') - - $:GPU_DECLARE(create='[cfl_target]') + $:GPU_DECLARE(create='[x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz]') logical :: cfl_dt ! Simulation Algorithm Parameters generated_case_opt_decls.fpp: now in m_global_parameters_common - $:GPU_DECLARE(create='[int_comp, ic_eps, ic_beta]') integer :: hyper_model !< hyperelasticity solver algorithm ! elasticity, chemistry: in m_global_parameters_common logical :: shear_stress !< Shear stresses @@ -78,21 +75,8 @@ module m_global_parameters integer :: cpu_start, cpu_end, cpu_rate - #:if not MFC_CASE_OPTIMIZATION - $:GPU_DECLARE(create='[num_dims, num_vels, weno_polyn, weno_order]') - $:GPU_DECLARE(create='[weno_num_stencils, num_fluids, wenojs]') - $:GPU_DECLARE(create='[mapped_weno, wenoz, teno, wenoz_q, mhd, relativity]') - $:GPU_DECLARE(create='[igr_iter_solver, igr_order, viscous, igr_pres_lim, igr]') - $:GPU_DECLARE(create='[recon_type, muscl_order, muscl_polyn, muscl_lim]') - #:endif - - $:GPU_DECLARE(create='[muscl_eps]') - $:GPU_DECLARE(create='[mpp_lim, model_eqns, mixture_err, alt_soundspeed]') - $:GPU_DECLARE(create='[avg_state, mp_weno, weno_eps, teno_CT, hypoelasticity]') - $:GPU_DECLARE(create='[hyperelasticity, hyper_model, elasticity, low_Mach]') - $:GPU_DECLARE(create='[shear_stress, bulk_stress, cont_damage, hyper_cleaning]') - - $:GPU_DECLARE(create='[relax, relax_model, palpha_eps, ptgalpha_eps]') + $:GPU_DECLARE(create='[hyper_model]') + $:GPU_DECLARE(create='[shear_stress, bulk_stress]') logical :: bc_io !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively @@ -117,7 +101,6 @@ module m_global_parameters type(bounds_info) :: neighbor_domain_x, neighbor_domain_y, neighbor_domain_z integer :: num_gbl_ibs, num_local_ibs $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z, num_gbl_ibs]') - $:GPU_DECLARE(create='[down_sample]') ! proc_coords, start_idx, mpiiofs, mpi_info_int: in m_global_parameters_common type(mpi_io_var), public :: MPI_IO_DATA @@ -188,14 +171,12 @@ module m_global_parameters ! Fluids Physical Parameters fluid_pp, bub_pp: auto-generated in generated_decls.fpp integer :: fd_number !< Finite-difference half-stencil size: MAX(1, fd_order/2) - $:GPU_DECLARE(create='[fd_order, fd_number]') + $:GPU_DECLARE(create='[fd_number]') ! probe, integral: auto-generated in generated_decls.fpp !> @name Reference density and pressure for Tait EOS !> @{ - $:GPU_DECLARE(create='[rhoref, pref]') - !> @name Immersed Boundaries !> patch_ib, ib_airfoil, stl_models, particle_cloud: auto-generated in generated_decls.fpp !> @{ @@ -203,8 +184,7 @@ module m_global_parameters integer, allocatable, dimension(:,:,:) :: ib_neighbor_ranks !< MPI ranks of neighborhood domains, indexed (-N:N,-N:N,-N:N) type(ib_airfoil_grid), dimension(num_ib_airfoils_max) :: ib_airfoil_grids !< Per-airfoil computed surface grids - $:GPU_DECLARE(create='[ib, num_ibs, ib_airfoil_grids]') - $:GPU_DECLARE(create='[ib_coefficient_of_friction]') + $:GPU_DECLARE(create='[ib_airfoil_grids]') !> @} !> @name Bubble modeling @@ -216,25 +196,19 @@ module m_global_parameters #:endif real(wp) :: Eu !< Euler number - $:GPU_DECLARE(create='[Eu, Ca, Web, Re_inv]') + $:GPU_DECLARE(create='[Eu]') real(wp), dimension(:), allocatable :: weight !< Simpson quadrature weights real(wp), dimension(:), allocatable :: R0 !< Bubble sizes $:GPU_DECLARE(create='[weight, R0]') - $:GPU_DECLARE(create='[bubbles_euler, polytropic, polydisperse]') - - $:GPU_DECLARE(create='[adv_n, adap_dt, adap_dt_tol, adap_dt_max_iters]') - - $:GPU_DECLARE(create='[bubble_model, thermal]') - real(wp), allocatable, dimension(:,:,:) :: ptil !< Pressure modification - $:GPU_DECLARE(create='[ptil, poly_sigma]') + $:GPU_DECLARE(create='[ptil]') integer, parameter :: nmom = 6 !< Number of carried moments per R0 location integer :: nmomsp !< Number of moments required by ensemble-averaging integer :: nmomtot !< Total number of carried moments moments/transport equations - $:GPU_DECLARE(create='[qbmm, nmomsp, nmomtot, pi_fac]') + $:GPU_DECLARE(create='[nmomsp, nmomtot]') #:if not MFC_CASE_OPTIMIZATION $:GPU_DECLARE(create='[nb]') @@ -261,15 +235,13 @@ module m_global_parameters $:GPU_DECLARE(create='[gam, gam_m]') real(wp) :: p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g - $:GPU_DECLARE(create='[R0ref, p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g]') + $:GPU_DECLARE(create='[p0ref, rho0ref, T0ref, ss, pv, vd, mu_l, mu_v, mu_g, gam_v, gam_g, M_v, M_g, cp_v, cp_g, R_v, R_g]') !> @} ! acoustic: auto-generated in generated_decls.fpp - $:GPU_DECLARE(create='[acoustic_source, num_source]') !> @name Surface tension parameters !> @{ - $:GPU_DECLARE(create='[sigma, surface_tension]') !> @} real(wp), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps @@ -285,19 +257,14 @@ module m_global_parameters !> @name lagrangian subgrid bubble parameters !> lag_params: auto-generated in generated_decls.fpp !> @{! - $:GPU_DECLARE(create='[bubbles_lagrange]') !> @} - $:GPU_DECLARE(create='[Bx0]') - !> @name Continuum damage model parameters !> @{! - $:GPU_DECLARE(create='[tau_star, cont_damage_s, alpha_bar]') !> @} !> @name MHD Hyperbolic cleaning parameters !> @{! - $:GPU_DECLARE(create='[hyper_cleaning_speed, hyper_cleaning_tau]') !> @} contains From 786dd1e1f7906dae1c73adf799a94594fa2e5654 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 19:13:20 -0400 Subject: [PATCH 19/22] toolchain: deregister parameters removed from the derived types fluid_pp%{mul0,ss,pv,gamma_v,M_v,mu_v,k_v,cp_v,D_v} and lag_params%{T0,Thost,c0,rho0,x0} were removed from the Fortran derived types by upstream #1085/#1093 but remained registered in the toolchain. Setting any of them causes namelist read to abort with a misleading 'datatype mismatch' error. Verified against src/common/m_derived_types.fpp: - physical_parameters has: gamma, pi_inf, Re, cv, qv, qvp, G - bubbles_lagrange_parameters has: solver_approach, cluster_type, pressure_corrector, smooth_type, heatTransfer_model, massTransfer_model, write_bubbles, write_bubbles_stats, nBubs_glb, epsilonb, charwidth, valmaxvoid Also remove the now-dead PATTERNS entries in descriptions.py and update the stale comments in fortran_gen.py (_emit_fluid_pp and _emit_lag_params). Both emitters now walk the registry rather than hardcoding member lists. --- toolchain/mfc/params/definitions.py | 14 +++++++---- toolchain/mfc/params/descriptions.py | 14 ----------- .../mfc/params/generators/fortran_gen.py | 24 +++++++++---------- 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index 1ba1ed965d..3edb09e758 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -842,14 +842,13 @@ def _load(): _r(f"{px}sph_har_coeff({ll},{mm})", REAL) # fluid_pp (10 fluids) + # Members present in physical_parameters: gamma, pi_inf, Re, cv, qv, qvp, G. + # mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type + # by upstream #1085/#1093 — they must NOT be registered (namelist read would crash). for f in range(1, NF + 1): px = f"fluid_pp({f})%" for a, sym in [("gamma", r"\f$\gamma_k\f$"), ("pi_inf", r"\f$\pi_{\infty,k}\f$"), ("cv", r"\f$c_{v,k}\f$"), ("qv", r"\f$q_{v,k}\f$"), ("qvp", r"\f$q'_{v,k}\f$")]: _r(f"{px}{a}", REAL, math=sym) - _r(f"{px}mul0", REAL, {"viscosity"}, math=r"\f$\mu_{l,k}\f$") - _r(f"{px}ss", REAL, {"surface_tension"}, math=r"\f$\sigma_k\f$") - for a in ["pv", "gamma_v", "M_v", "mu_v", "k_v", "cp_v", "D_v"]: - _r(f"{px}{a}", REAL, {"bubbles"}) _r(f"{px}G", REAL, {"elasticity"}, math=r"\f$G_k\f$") _r(f"{px}Re(1)", REAL, {"viscosity"}, math=r"\f$\mathrm{Re}_k\f$ (shear)") _r(f"{px}Re(2)", REAL, {"viscosity"}, math=r"\f$\mathrm{Re}_k\f$ (bulk)") @@ -1049,11 +1048,16 @@ def _load(): _r(f"simplex_params%perturb_vel_offset({d},{j})", REAL) # lag_params (Lagrangian bubbles) + # Members present in bubbles_lagrange_parameters: solver_approach, cluster_type, + # pressure_corrector, smooth_type, heatTransfer_model, massTransfer_model, + # write_bubbles, write_bubbles_stats, nBubs_glb, epsilonb, charwidth, valmaxvoid. + # T0/Thost/c0/rho0/x0 were removed from the Fortran type by upstream #1085/#1093 + # — they must NOT be registered (namelist read would crash). for a in ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"]: _r(f"lag_params%{a}", LOG, {"bubbles"}) for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb"]: _r(f"lag_params%{a}", INT, {"bubbles"}) - for a in ["epsilonb", "valmaxvoid", "charwidth", "c0", "rho0", "T0", "x0", "Thost"]: + for a in ["epsilonb", "valmaxvoid", "charwidth"]: _r(f"lag_params%{a}", REAL, {"bubbles"}) # chem_params diff --git a/toolchain/mfc/params/descriptions.py b/toolchain/mfc/params/descriptions.py index 016fd8cde8..79cc4089f4 100644 --- a/toolchain/mfc/params/descriptions.py +++ b/toolchain/mfc/params/descriptions.py @@ -323,15 +323,6 @@ (r"fluid_pp\((\d+)\)%qv", "Heat of formation for fluid {0}"), (r"fluid_pp\((\d+)\)%qvp", "Heat of formation prime for fluid {0}"), (r"fluid_pp\((\d+)\)%Re\((\d+)\)", "Reynolds number component {1} for fluid {0}"), - (r"fluid_pp\((\d+)\)%mul0", "Reference liquid viscosity for fluid {0}"), - (r"fluid_pp\((\d+)\)%ss", "Surface tension for fluid {0}"), - (r"fluid_pp\((\d+)\)%pv", "Vapor pressure for fluid {0}"), - (r"fluid_pp\((\d+)\)%gamma_v", "Specific heat ratio of vapor phase for fluid {0}"), - (r"fluid_pp\((\d+)\)%M_v", "Molecular weight of vapor phase for fluid {0}"), - (r"fluid_pp\((\d+)\)%mu_v", "Viscosity of vapor phase for fluid {0}"), - (r"fluid_pp\((\d+)\)%k_v", "Thermal conductivity of vapor phase for fluid {0}"), - (r"fluid_pp\((\d+)\)%cp_v", "Specific heat capacity (const. pressure) of vapor for fluid {0}"), - (r"fluid_pp\((\d+)\)%D_v", "Vapor mass diffusivity for fluid {0}"), (r"fluid_pp\((\d+)\)%non_newtonian", "Enable Herschel-Bulkley non-Newtonian viscosity for fluid {0}"), (r"fluid_pp\((\d+)\)%K", "HB consistency index for fluid {0}"), (r"fluid_pp\((\d+)\)%nn", "HB flow behavior index for fluid {0}"), @@ -500,11 +491,6 @@ (r"lag_params%epsilonb", "Standard deviation scaling for Gaussian smoothing"), (r"lag_params%charwidth", "Domain virtual depth for 2D simulations"), (r"lag_params%valmaxvoid", "Maximum permitted void fraction"), - (r"lag_params%T0", "Initial bubble temperature"), - (r"lag_params%Thost", "Host fluid temperature"), - (r"lag_params%c0", "Initial sound speed"), - (r"lag_params%rho0", "Initial density"), - (r"lag_params%x0", "Initial bubble position"), (r"lag_params%(\w+)", "Lagrangian tracking parameter: {0}"), # chem_params patterns - specific fields first (r"chem_params%diffusion", "Enable species diffusion for chemistry"), diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index d1bd45477e..817a5fa4fe 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -318,15 +318,16 @@ def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) -> def _emit_fluid_pp(lines: List[str], target: str) -> None: """Emit the fluid_pp(i) member-loop broadcast block. - Members broadcast: the 13 REAL members (EOS core plus the Herschel-Bulkley - non-Newtonian set) and the LOGICAL non_newtonian flag, matching the manual - lists this generator replaces. Sim additionally: Re(1) with count=2. + Members broadcast: all REAL registry members of fluid_pp (gamma, pi_inf, G, cv, qv, + qvp) derived from physical_parameters. Sim additionally: Re(1) with count=2. + mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type by + upstream #1085/#1093 and are no longer registered. """ - fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp", "K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"] + # Walk the registry for fluid_pp REAL members (Re handled separately; exclude). + fp_real_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re(")) lines.append(" do i = 1, num_fluids_max") for mem in fp_real_members: lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") - lines.append(" call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)") if target == "sim": lines.append(" call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)") lines.append(" end do") @@ -347,14 +348,13 @@ def _emit_bub_pp(lines: List[str]) -> None: def _emit_lag_params(lines: List[str]) -> None: """Emit the lag_params member broadcast block (sim-only, under bubbles_lagrange guard). - Subset of lag_params members that are actually broadcast: the fields that appear in the - existing simulation m_mpi_proxy.fpp lag_params block. The registry has additional - members (T0, Thost, c0, rho0, x0) that are not broadcast (rank-0-only). + All registered lag_params members are broadcast. T0/Thost/c0/rho0/x0 were removed + from the Fortran type by upstream #1085/#1093 and are no longer in the registry. """ - # Hardcoded broadcast subset — matches the existing sim m_mpi_proxy exactly. - lag_log = ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"] - lag_int = ["cluster_type", "nBubs_glb", "smooth_type", "solver_approach"] - lag_real = ["charwidth", "epsilonb", "valmaxvoid"] + # Walk the registry for lag_params members, split by type. + lag_log = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type == ParamType.LOG) + lag_int = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type in (ParamType.INT, ParamType.ANALYTIC_INT)) + lag_real = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type in _REAL_TYPES) lines.append(" if (bubbles_lagrange) then") for mem in sorted(lag_log): lines.append(f" call MPI_BCAST(lag_params%{mem}, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)") From 322d7d28970446d547b4d2f93c5f8b7ccd3961a5 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 19:17:58 -0400 Subject: [PATCH 20/22] src: broadcast wall-velocity BC members in pre and post bc_x/y/z%{vb1,vb2,vb3,ve1,ve2,ve3} are read from the namelist on rank 0 and consumed on all ranks by s_slip_wall/s_no_slip_wall in src/common/m_boundary_common.fpp (~833-1155). These routines are compiled into all three targets and are reached from pre_process (m_data_output, m_perturbation) and post_process (m_start_up) code paths. Without the broadcast, non-root ranks use uninitialised values for the wall-velocity components, producing rank-dependent ghost cells for any multi-rank wall-BC pre-process or post-process run. The sim residue has always broadcast these; pre and post were missing them. Add the matching broadcasts to both pre and post using the same nested Fypp loop idiom as the simulation residue. --- src/post_process/m_mpi_proxy.fpp | 8 ++++++++ src/pre_process/m_mpi_proxy.fpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index d9beac5347..7649747b72 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -85,6 +85,14 @@ contains call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor + ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + call MPI_BCAST(bc_${DIM}$%vb${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_${DIM}$%ve${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + #:endfor + ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence) call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index bf6e3d1180..d286fd87a4 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -49,6 +49,14 @@ contains call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor + ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks + #:for DIM in ['x', 'y', 'z'] + #:for DIR in [1, 2, 3] + call MPI_BCAST(bc_${DIM}$%vb${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_${DIM}$%ve${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + #:endfor + ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence) call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) From 782cfb65c177ea76e43e4eb7fc3dd114cab30035 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 19:23:35 -0400 Subject: [PATCH 21/22] codegen: broadcast muscl_eps and walk the registry for struct broadcasts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit muscl_eps was excluded from broadcast generation via _BCAST_EXCLUDE on the incorrect assumption that it is derived post-broadcast. The derivation (in m_weno or m_muscl) only fires under f_is_default(muscl_eps), and default values are assigned on rank 0 only. Every multi-rank MUSCL run therefore had rank-divergent muscl_eps on non-root ranks. Remove it from the exclusion set. Tuple-set delta (var, mpi_type, count) vs. HEAD~: sim: +1 entry: (muscl_eps, mpi_p, 1) pre: no change post: no change _emit_fluid_pp and _emit_lag_params now walk the registry instead of maintaining hardcoded member lists. After Commit 1 deregistered the dead members (mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v for fluid_pp; T0/Thost/c0/rho0/x0 for lag_params), the registry now matches the Fortran types exactly. Re(1) count=2 remains sim-only via an explicit target check with a comment. G is walked as a regular REAL member. Tests: 3 new tests added — muscl_eps now broadcast in sim, fluid_pp and lag_params registry walks produce exactly the registered members minus documented exclusions, dead members absent. --- .../mfc/params/generators/fortran_gen.py | 21 +++-- .../mfc/params_tests/test_fortran_gen.py | 83 ++++++++++++++++++- 2 files changed, 93 insertions(+), 11 deletions(-) diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py index 817a5fa4fe..14e0f472a6 100644 --- a/toolchain/mfc/params/generators/fortran_gen.py +++ b/toolchain/mfc/params/generators/fortran_gen.py @@ -235,7 +235,11 @@ def generate_case_opt_decls_fpp() -> str: _STRUCT_ROOTS = frozenset({"bc_x", "bc_y", "bc_z", "x_domain", "y_domain", "z_domain", "x_output", "y_output", "z_output"}) # Variables excluded from broadcast generation (derived post-broadcast or non-namelist). -_BCAST_EXCLUDE = frozenset({"muscl_eps"}) +# muscl_eps was previously excluded here on the assumption that it was derived +# post-broadcast, but the derivation only fires under f_is_default(muscl_eps), +# and default values are assigned on rank 0 only. Every multi-rank MUSCL run +# therefore had rank-divergent muscl_eps. Removed from exclusion to fix the bug. +_BCAST_EXCLUDE: frozenset = frozenset() # Post-process scalars that are namelist-bound but consumed on rank 0 only (reading/init). # Broadcasting them would be harmless but changes the existing call set, which we preserve. @@ -318,16 +322,19 @@ def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) -> def _emit_fluid_pp(lines: List[str], target: str) -> None: """Emit the fluid_pp(i) member-loop broadcast block. - Members broadcast: all REAL registry members of fluid_pp (gamma, pi_inf, G, cv, qv, - qvp) derived from physical_parameters. Sim additionally: Re(1) with count=2. + Walks the registry for every fluid_pp member, emitting the MPI datatype that + matches each member's registered ParamType (the Herschel-Bulkley merge added + a LOGICAL member, so the datatype must come from the registry, not be assumed + REAL). Sim additionally broadcasts Re(1) with count=2 (kept sim-only to + preserve the historical call set; pre/post never consumed Re). mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type by upstream #1085/#1093 and are no longer registered. """ - # Walk the registry for fluid_pp REAL members (Re handled separately; exclude). - fp_real_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re(")) + fp_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re(")) lines.append(" do i = 1, num_fluids_max") - for mem in fp_real_members: - lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)") + for mem in fp_members: + ptype = REGISTRY.all_params[f"fluid_pp(1)%{mem}"].param_type + lines.append(f" call MPI_BCAST(fluid_pp(i)%{mem}, 1, {_mpi_type_for(ptype)}, 0, MPI_COMM_WORLD, ierr)") if target == "sim": lines.append(" call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)") lines.append(" end do") diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py index e4fe751f7c..eac0bfccaf 100644 --- a/toolchain/mfc/params_tests/test_fortran_gen.py +++ b/toolchain/mfc/params_tests/test_fortran_gen.py @@ -387,8 +387,9 @@ def test_generate_bcast_fpp_case_opt_guard_sim(): assert "num_fluids" in guard_body assert "mapped_weno" in guard_body - # muscl_eps must NOT appear anywhere (it is excluded — derived post-broadcast) - assert "muscl_eps" not in out + # muscl_eps IS inside the case-opt guard (it is sim-only, non-CASE_OPT_PARAM, + # so it appears in the real-scalars section, which is outside the case-opt block) + assert "call MPI_BCAST(muscl_eps, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out def test_generate_bcast_fpp_case_opt_not_in_pre_post(): @@ -504,8 +505,6 @@ def test_generate_bcast_fpp_excludes_manual_residue(): assert "m_glb" not in out, f"{target}: m_glb should be manual" assert "n_glb" not in out, f"{target}: n_glb should be manual" assert "p_glb" not in out, f"{target}: p_glb should be manual" - # muscl_eps is excluded (derived post-broadcast) - assert "muscl_eps" not in out, f"{target}: muscl_eps should be excluded" sim = generate_bcast_fpp("sim") # shear_stress, bulk_stress, bodyForces are derived (non-namelist) @@ -530,3 +529,79 @@ def test_generate_bcast_fpp_bad_target(): with pytest.raises(ValueError, match="Unknown target"): generate_bcast_fpp("bad") + + +def test_generate_bcast_fpp_muscl_eps_now_broadcast(): + """muscl_eps is broadcast for sim (latent-bug fix: derivation is rank-0-only). + + Previously excluded via _BCAST_EXCLUDE; every multi-rank MUSCL run had + rank-divergent muscl_eps because f_is_default() only fires on rank 0. + """ + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + assert "call MPI_BCAST(muscl_eps, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in sim + + # muscl_eps is sim-only; must not appear in pre/post + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + assert "muscl_eps" not in pre + assert "muscl_eps" not in post + + +def test_generate_bcast_fpp_fluid_pp_registry_walk(): + """fluid_pp emitter walks registry; no dead members (mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v removed upstream). + + Re(1) count=2 is sim-only; all other registered REAL members appear in all three + targets. + """ + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + pre = generate_bcast_fpp("pre") + post = generate_bcast_fpp("post") + + # All registered members appear in every target + for mem in ("gamma", "pi_inf", "cv", "qv", "qvp", "G"): + for out, t in [(sim, "sim"), (pre, "pre"), (post, "post")]: + assert f"fluid_pp(i)%{mem}" in out, f"{t}: fluid_pp(i)%{mem} missing" + + # Re(1) count=2 is sim-only + assert "fluid_pp(i)%Re(1)" in sim + assert "fluid_pp(i)%Re(1)" not in pre + assert "fluid_pp(i)%Re(1)" not in post + + # Dead members must not appear + for dead in ("mul0", "ss", "pv", "gamma_v", "M_v", "mu_v", "k_v", "cp_v", "D_v"): + for out, t in [(sim, "sim"), (pre, "pre"), (post, "post")]: + assert f"fluid_pp(i)%{dead}" not in out, f"{t}: dead member fluid_pp(i)%{dead} present" + + +def test_generate_bcast_fpp_fluid_pp_member_datatypes(): + # The HB merge added a LOGICAL fluid_pp member; datatypes must come from the + # registry, not be assumed REAL. + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + assert "call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL," in sim + assert "call MPI_BCAST(fluid_pp(i)%tau0, 1, mpi_p," in sim + assert "non_newtonian, 1, mpi_p" not in sim + + +def test_generate_bcast_fpp_lag_params_registry_walk(): + """lag_params emitter walks registry; no dead members (T0/Thost/c0/rho0/x0 removed upstream).""" + from mfc.params.generators.fortran_gen import generate_bcast_fpp + + sim = generate_bcast_fpp("sim") + + # All registered members must appear + for mem in ("solver_approach", "cluster_type", "smooth_type", "nBubs_glb"): + assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim" + for mem in ("heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"): + assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim" + for mem in ("epsilonb", "charwidth", "valmaxvoid"): + assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim" + + # Dead members must not appear + for dead in ("T0", "Thost", "c0", "rho0", "x0"): + assert f"lag_params%{dead}" not in sim, f"dead member lag_params%{dead} present in sim" From 8eb6e1894c7e186cb0d3a44e494cda99125e0b1c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 10 Jun 2026 21:53:57 -0400 Subject: [PATCH 22/22] src: broadcast BC type codes with MPI_INTEGER in pre_process; pin residue broadcasts pre_process sent the integer bc_x/y/z%beg/%end with mpi_p (an 8-byte real transfer over 4-byte integers - undefined behavior that happened to work by adjacency); simulation and post_process already used MPI_INTEGER. Also adds a test pinning the hand-written vb/ve and BC-code residue in pre/post so a merge conflict cannot silently drop them. Both from review. --- src/pre_process/m_mpi_proxy.fpp | 10 +++++++--- toolchain/mfc/params_tests/test_fortran_gen.py | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index d286fd87a4..8e733765f3 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -39,16 +39,20 @@ contains call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor - ! manual: bc_x/y/z and domain bounds (registered as struct members, not in NAMELIST_VARS) + ! manual: domain bounds and wall temperatures (REAL struct members, not in NAMELIST_VARS) #:for VAR in [ 'x_domain%beg', 'x_domain%end', 'y_domain%beg', & & 'y_domain%end', 'z_domain%beg', 'z_domain%end', & - & 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', & - & 'bc_z%beg', 'bc_z%end', 'bc_x%Twall_in', 'bc_x%Twall_out', & + & 'bc_x%Twall_in', 'bc_x%Twall_out', & & 'bc_y%Twall_in', 'bc_y%Twall_out', 'bc_z%Twall_in', & & 'bc_z%Twall_out'] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor + ! manual: BC type codes (INTEGER struct members) + #:for VAR in [ 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end'] + call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks #:for DIM in ['x', 'y', 'z'] #:for DIR in [1, 2, 3] diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py index eac0bfccaf..0dc208608c 100644 --- a/toolchain/mfc/params_tests/test_fortran_gen.py +++ b/toolchain/mfc/params_tests/test_fortran_gen.py @@ -605,3 +605,19 @@ def test_generate_bcast_fpp_lag_params_registry_walk(): # Dead members must not appear for dead in ("T0", "Thost", "c0", "rho0", "x0"): assert f"lag_params%{dead}" not in sim, f"dead member lag_params%{dead} present in sim" + + +def test_mpi_proxy_residue_pins_wall_velocity_and_bc_datatypes(): + """The vb/ve wall-velocity broadcasts and integer BC datatypes live in + hand-written residue (not codegen); pin them so an edit or merge conflict + that drops them fails loudly.""" + import pathlib + + root = pathlib.Path(__file__).resolve().parents[3] + for target in ("pre_process", "post_process"): + src = (root / "src" / target / "m_mpi_proxy.fpp").read_text() + assert "bc_${DIM}$%vb${DIR}$" in src, f"{target}: vb broadcasts missing" + assert "bc_${DIM}$%ve${DIR}$" in src, f"{target}: ve broadcasts missing" + assert "'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end']" in src + seg = src.split("'bc_z%beg', 'bc_z%end']", 1)[1] + assert "MPI_INTEGER" in seg.split("#:endfor")[0], f"{target}: BC codes not MPI_INTEGER"