diff --git a/examples/3D_mibm_sphere_head_on_collision/case.py b/examples/3D_mibm_sphere_head_on_collision/case.py index 981aacb310..ec4b908ecf 100644 --- a/examples/3D_mibm_sphere_head_on_collision/case.py +++ b/examples/3D_mibm_sphere_head_on_collision/case.py @@ -89,6 +89,7 @@ "prim_vars_wrt": "T", "E_wrt": "T", "parallel_io": "T", + "ib_state_wrt": "T", # Patch: Constant Tube filled with air # Specify the cylindrical air tube grid geometry "patch_icpp(1)%geometry": 9, diff --git a/src/simulation/m_collisions.fpp b/src/simulation/m_collisions.fpp index ac0ab630e4..85b42880eb 100644 --- a/src/simulation/m_collisions.fpp +++ b/src/simulation/m_collisions.fpp @@ -48,12 +48,12 @@ contains end subroutine s_initialize_collisions_module - subroutine s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) + subroutine s_apply_collision_forces(ghost_points, num_gps, ib_markers, ib_ft) type(ghost_point), dimension(:), intent(in) :: ghost_points integer, intent(in) :: num_gps type(integer_field), intent(in) :: ib_markers - real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft integer :: num_considered_collisions ! return if no collisions @@ -67,17 +67,17 @@ contains select case (collision_model) case (1) ! soft sphere model - call s_apply_wall_collision_forces_soft_sphere(forces, torques) - call s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, forces, torques) + call s_apply_wall_collision_forces_soft_sphere(ib_ft) + call s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, ib_ft) end select end subroutine s_apply_collision_forces !> @brief applies collision forces to IBs assuming a soft-sphere collision model (all IBs are circles or spheres) - subroutine s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, forces, torques) + subroutine s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, ib_ft) integer, intent(in) :: num_considered_collisions - real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft integer :: i, encoded_pid1, encoded_pid2, xp1, xp2, yp1, yp2, zp1, zp2, pid1, pid2, l ! iterators and patch IDs real(wp) :: overlap_distance real(wp), dimension(3) :: normal_vector, centroid_1, centroid_2 @@ -93,7 +93,7 @@ contains $:GPU_PARALLEL_LOOP(private='[i, l, encoded_pid1, encoded_pid2, xp1, xp2, yp1, yp2, zp1, zp2, pid1, pid2, centroid_1, & & centroid_2, normal_vector, overlap_distance, effective_mass, k, eta, normal_velocity, & & tangental_vector, normal_force, tangental_force, torque, radial_vector, rotation_velocity, vel1, & - & vel2]', copy='[forces, torques]') + & vel2]', copy='[ib_ft]') do i = 1, num_considered_collisions encoded_pid1 = collision_lookup(i, 3) encoded_pid2 = collision_lookup(i, 4) @@ -144,15 +144,15 @@ contains do l = 1, num_dims ! update the first IB $:GPU_ATOMIC(atomic='update') - forces(pid1, l) = forces(pid1, l) + (normal_force(l) + tangental_force(l)) + ib_ft(l, pid1) = ib_ft(l, pid1) + (normal_force(l) + tangental_force(l)) $:GPU_ATOMIC(atomic='update') - torques(pid1, l) = torques(pid1, l) + torque(l) + ib_ft(l + 3, pid1) = ib_ft(l + 3, pid1) + torque(l) ! apply equal and opposite force/torque to second IB $:GPU_ATOMIC(atomic='update') - forces(pid2, l) = forces(pid2, l) - (normal_force(l) + tangental_force(l)) + ib_ft(l, pid2) = ib_ft(l, pid2) - (normal_force(l) + tangental_force(l)) $:GPU_ATOMIC(atomic='update') - torques(pid2, l) = torques(pid2, l) + torque(l)*patch_ib(pid2)%radius/patch_ib(pid1)%radius + ib_ft(l + 3, pid2) = ib_ft(l + 3, pid2) + torque(l)*patch_ib(pid2)%radius/patch_ib(pid1)%radius end do end if end if @@ -162,9 +162,9 @@ contains end subroutine s_apply_ib_collision_forces_soft_sphere !> @brief applies collision forces to IBs assuming a soft-sphere collision model (all IBs are circles or spheres) - subroutine s_apply_wall_collision_forces_soft_sphere(forces, torques) + subroutine s_apply_wall_collision_forces_soft_sphere(ib_ft) - real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft integer :: patch_id, i, l real(wp), dimension(3) :: normal_force, tangental_force, normal_vector, normal_velocity, tangental_vector, & & collision_location, torque, radial_vector, rotation_velocity, velocity @@ -172,7 +172,7 @@ contains $:GPU_PARALLEL_LOOP(private='[patch_id, i, l, collision_location, normal_vector, k, eta, normal_velocity, & & tangental_vector, normal_force, tangental_force, torque, radial_vector, rotation_velocity, & - & velocity]', copy='[forces, torques]', collapse=2) + & velocity]', copy='[ib_ft]', collapse=2) do patch_id = 1, num_ibs do i = 1, num_dims*2 ! only compute force contributions if there was an overlap @@ -217,9 +217,9 @@ contains do l = 1, num_dims $:GPU_ATOMIC(atomic='update') - forces(patch_id, l) = forces(patch_id, l) + (normal_force(l) + tangental_force(l)) + ib_ft(l, patch_id) = ib_ft(l, patch_id) + (normal_force(l) + tangental_force(l)) $:GPU_ATOMIC(atomic='update') - torques(patch_id, l) = torques(patch_id, l) + torque(l) + ib_ft(l + 3, patch_id) = ib_ft(l + 3, patch_id) + torque(l) end do end if end do diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 46dbe2d6eb..2332ae91d5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -47,8 +47,7 @@ module m_ibm ! IB MPI buffers integer, allocatable :: send_ids(:), recv_ids(:) - real(wp), allocatable :: send_ft(:,:), recv_ft(:,:) - real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:) + real(wp), allocatable :: recv_ft(:,:), recv_ft_snap(:,:) contains @@ -98,9 +97,8 @@ contains ! allocate some arrays for MPI communication, if required by this simulation #ifdef MFC_MPI if (num_procs > 1) then - @:ALLOCATE(send_ids(size(patch_ib)), send_ft(6, size(patch_ib))) - allocate (recv_forces_snap(size(patch_ib), 3), recv_torques_snap(size(patch_ib), 3), recv_ids(size(patch_ib)), & - & recv_ft(6, size(patch_ib))) + @:ALLOCATE(send_ids(size(patch_ib))) + allocate (recv_ft_snap(6, size(patch_ib)), recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib))) end if #endif @@ -908,7 +906,7 @@ contains type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf type(physical_parameters), dimension(1:num_fluids), intent(in) :: fluid_pp integer :: i, j, k, l, encoded_ib_idx, ib_idx, ib_idx_temp, fluid_idx - real(wp), dimension(num_ibs, 3) :: forces, torques + real(wp), dimension(6, num_ibs) :: ib_ft ! viscous stress tensor with temp vectors to hold divergence calculations real(wp), dimension(1:3,1:3) :: viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2 real(wp), dimension(1:3) :: local_force_contribution, radial_vector, local_torque_contribution @@ -922,8 +920,7 @@ contains call nvtxStartRange("COMPUTE-IB-FORCES") - forces = 0._wp - torques = 0._wp + ib_ft = 0._wp if (viscous) then do fluid_idx = 1, num_fluids @@ -937,8 +934,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i, j, k, l, ib_idx, ib_idx_temp, encoded_ib_idx, fluid_idx, radial_vector, & & local_force_contribution, cell_volume, local_torque_contribution, dynamic_viscosity, & - & viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[forces, & - & torques]', copyin='[dynamic_viscosities]', collapse=3) + & viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[ib_ft]', & + & copyin='[dynamic_viscosities]', collapse=3) do i = 0, m do j = 0, n do k = 0, p @@ -1025,9 +1022,9 @@ contains ! Update the force and torque values atomically to prevent race conditions do l = 1, 3 $:GPU_ATOMIC(atomic='update') - forces(ib_idx, l) = forces(ib_idx, l) + (local_force_contribution(l)*cell_volume) + ib_ft(l, ib_idx) = ib_ft(l, ib_idx) + (local_force_contribution(l)*cell_volume) $:GPU_ATOMIC(atomic='update') - torques(ib_idx, l) = torques(ib_idx, l) + local_torque_contribution(l)*cell_volume + ib_ft(l + 3, ib_idx) = ib_ft(l + 3, ib_idx) + local_torque_contribution(l)*cell_volume end do end if ! ib_idx > 0 end if @@ -1036,29 +1033,29 @@ contains end do $:END_GPU_PARALLEL_LOOP() - call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) + call s_apply_collision_forces(ghost_points, num_gps, ib_markers, ib_ft) ! reduce the forces across local neighborhood ranks - call s_communicate_ib_forces(forces, torques) + call s_communicate_ib_forces(ib_ft) ! consider body forces after reducing to avoid double counting do i = 1, num_ibs if (bf_x) then - forces(i, 1) = forces(i, 1) + accel_bf(1)*patch_ib(i)%mass + ib_ft(1, i) = ib_ft(1, i) + accel_bf(1)*patch_ib(i)%mass end if if (bf_y) then - forces(i, 2) = forces(i, 2) + accel_bf(2)*patch_ib(i)%mass + ib_ft(2, i) = ib_ft(2, i) + accel_bf(2)*patch_ib(i)%mass end if if (bf_z) then - forces(i, 3) = forces(i, 3) + accel_bf(3)*patch_ib(i)%mass + ib_ft(3, i) = ib_ft(3, i) + accel_bf(3)*patch_ib(i)%mass end if end do ! apply the summed forces - $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') + $:GPU_PARALLEL_LOOP(private='[i]', copyin='[ib_ft]') do i = 1, num_ibs - patch_ib(i)%force(:) = forces(i,:) - patch_ib(i)%torque(:) = torques(i,:) + patch_ib(i)%force(:) = ib_ft(1:3,i) + patch_ib(i)%torque(:) = ib_ft(4:6,i) end do $:END_GPU_PARALLEL_LOOP() @@ -1247,9 +1244,9 @@ contains !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each !! overwriting local forces with the neighbor's accumulated total. - subroutine s_communicate_ib_forces(forces, torques) + subroutine s_communicate_ib_forces(ib_ft) - real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft #ifdef MFC_MPI integer :: i, j, k, pack_pos, unpack_pos, buf_size, ierr @@ -1267,24 +1264,21 @@ contains send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) - recv_forces_snap = 0._wp - recv_torques_snap = 0._wp + recv_ft_snap = 0._wp tag = 300 do k = 1, 2*ib_neighborhood_radius ! send forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then pack_pos = 0 - $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') + $:GPU_PARALLEL_LOOP(private='[i]') do i = 1, num_ibs send_ids(i) = patch_ib(i)%gbl_patch_id - send_ft(1:3,i) = forces(i,:) - send_ft(4:6,i) = torques(i,:) end do $:END_GPU_PARALLEL_LOOP() - $:GPU_UPDATE(host='[send_ids, send_ft]') + $:GPU_UPDATE(host='[send_ids]') call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, & & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) @@ -1294,16 +1288,13 @@ contains call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, & & MPI_COMM_WORLD, ierr) call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr) - $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques, & - & recv_forces_snap, recv_torques_snap]') + $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft, recv_ft_snap]') do i = 1, recv_count call s_get_neighborhood_idx(recv_ids(i), j) if (j > 0) then ! add forces and subtract recv_snap prevent double-counting - forces(j,:) = forces(j,:) + recv_ft(1:3,i) - recv_forces_snap(j,:) - torques(j,:) = torques(j,:) + recv_ft(4:6,i) - recv_torques_snap(j,:) - recv_forces_snap(j,:) = recv_ft(1:3,i) - recv_torques_snap(j,:) = recv_ft(4:6,i) + ib_ft(:,j) = ib_ft(:,j) + recv_ft(:,i) - recv_ft_snap(:,j) + recv_ft_snap(:,j) = recv_ft(:,i) end if end do $:END_GPU_PARALLEL_LOOP() @@ -1321,17 +1312,15 @@ contains do k = 1, 2*ib_neighborhood_radius pack_pos = 0 - $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') + $:GPU_PARALLEL_LOOP(private='[i]') do i = 1, num_ibs send_ids(i) = patch_ib(i)%gbl_patch_id - send_ft(1:3,i) = forces(i,:) - send_ft(4:6,i) = torques(i,:) end do $:END_GPU_PARALLEL_LOOP() - $:GPU_UPDATE(host='[send_ids, send_ft]') + $:GPU_UPDATE(host='[send_ids]') call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, & & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) if (recv_neighbor /= MPI_PROC_NULL) then @@ -1340,12 +1329,11 @@ contains call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, & & MPI_COMM_WORLD, ierr) call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr) - $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques]') + $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft]') do i = 1, recv_count call s_get_neighborhood_idx(recv_ids(i), j) if (j > 0) then - forces(j,:) = recv_ft(1:3,i) - torques(j,:) = recv_ft(4:6,i) + ib_ft(:,j) = recv_ft(:,i) end if end do $:END_GPU_PARALLEL_LOOP() @@ -1557,8 +1545,8 @@ contains #ifdef MFC_MPI if (num_procs > 1) then - @:DEALLOCATE(send_ids, send_ft) - deallocate (recv_forces_snap, recv_torques_snap, recv_ids, recv_ft) + @:DEALLOCATE(send_ids) + deallocate (recv_ft_snap, recv_ids, recv_ft) end if #endif diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 7420404ef2..e8a877a8ae 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -566,6 +566,7 @@ contains if (ib) then if (moving_immersed_boundary_flag) then + $:GPU_UPDATE(host='[patch_ib(1:num_ibs)]') call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors else if (ib_state_wrt) then