-
Notifications
You must be signed in to change notification settings - Fork 150
Force reshape #1616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Force reshape #1616
Changes from all commits
e0a7193
db7f4a6
050d7cf
e149a40
28f668a
66750b8
8eccf24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,8 +47,7 @@ module m_ibm | |
|
|
||
| ! IB MPI buffers | ||
| integer, allocatable :: send_ids(:), recv_ids(:) | ||
| real(wp), allocatable :: send_ft(:,:), recv_ft(:,:) | ||
| real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:) | ||
| real(wp), allocatable :: recv_ft(:,:), recv_ft_snap(:,:) | ||
|
|
||
| contains | ||
|
|
||
|
|
@@ -98,9 +97,8 @@ contains | |
| ! allocate some arrays for MPI communication, if required by this simulation | ||
| #ifdef MFC_MPI | ||
| if (num_procs > 1) then | ||
| @:ALLOCATE(send_ids(size(patch_ib)), send_ft(6, size(patch_ib))) | ||
| allocate (recv_forces_snap(size(patch_ib), 3), recv_torques_snap(size(patch_ib), 3), recv_ids(size(patch_ib)), & | ||
| & recv_ft(6, size(patch_ib))) | ||
| @:ALLOCATE(send_ids(size(patch_ib))) | ||
| allocate (recv_ft_snap(6, size(patch_ib)), recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib))) | ||
| end if | ||
| #endif | ||
|
|
||
|
|
@@ -908,7 +906,7 @@ contains | |
| type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf | ||
| type(physical_parameters), dimension(1:num_fluids), intent(in) :: fluid_pp | ||
| integer :: i, j, k, l, encoded_ib_idx, ib_idx, ib_idx_temp, fluid_idx | ||
| real(wp), dimension(num_ibs, 3) :: forces, torques | ||
| real(wp), dimension(6, num_ibs) :: ib_ft | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be a good place to put a comment about the contents of this array. Like I said, I already think it would be best renamed. But it is not clear immediately to a reader how memory is written here. A comment briefly describing that it is f_x, f_y, f_x, tau_x, tau_y, tau_z contiguously would make it easier for anyone reading the code to understand how they are supposed to fill in the array. Although the array bounds help clarify this a little for you. |
||
| ! viscous stress tensor with temp vectors to hold divergence calculations | ||
| real(wp), dimension(1:3,1:3) :: viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2 | ||
| real(wp), dimension(1:3) :: local_force_contribution, radial_vector, local_torque_contribution | ||
|
|
@@ -922,8 +920,7 @@ contains | |
|
|
||
| call nvtxStartRange("COMPUTE-IB-FORCES") | ||
|
|
||
| forces = 0._wp | ||
| torques = 0._wp | ||
| ib_ft = 0._wp | ||
|
|
||
| if (viscous) then | ||
| do fluid_idx = 1, num_fluids | ||
|
|
@@ -937,8 +934,8 @@ contains | |
|
|
||
| $:GPU_PARALLEL_LOOP(private='[i, j, k, l, ib_idx, ib_idx_temp, encoded_ib_idx, fluid_idx, radial_vector, & | ||
| & local_force_contribution, cell_volume, local_torque_contribution, dynamic_viscosity, & | ||
| & viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[forces, & | ||
| & torques]', copyin='[dynamic_viscosities]', collapse=3) | ||
| & viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[ib_ft]', & | ||
| & copyin='[dynamic_viscosities]', collapse=3) | ||
| do i = 0, m | ||
| do j = 0, n | ||
| do k = 0, p | ||
|
|
@@ -1025,9 +1022,9 @@ contains | |
| ! Update the force and torque values atomically to prevent race conditions | ||
| do l = 1, 3 | ||
| $:GPU_ATOMIC(atomic='update') | ||
| forces(ib_idx, l) = forces(ib_idx, l) + (local_force_contribution(l)*cell_volume) | ||
| ib_ft(l, ib_idx) = ib_ft(l, ib_idx) + (local_force_contribution(l)*cell_volume) | ||
| $:GPU_ATOMIC(atomic='update') | ||
| torques(ib_idx, l) = torques(ib_idx, l) + local_torque_contribution(l)*cell_volume | ||
| ib_ft(l + 3, ib_idx) = ib_ft(l + 3, ib_idx) + local_torque_contribution(l)*cell_volume | ||
| end do | ||
| end if ! ib_idx > 0 | ||
| end if | ||
|
|
@@ -1036,29 +1033,29 @@ contains | |
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
|
|
||
| call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) | ||
| call s_apply_collision_forces(ghost_points, num_gps, ib_markers, ib_ft) | ||
|
|
||
| ! reduce the forces across local neighborhood ranks | ||
| call s_communicate_ib_forces(forces, torques) | ||
| call s_communicate_ib_forces(ib_ft) | ||
|
|
||
| ! consider body forces after reducing to avoid double counting | ||
| do i = 1, num_ibs | ||
| if (bf_x) then | ||
| forces(i, 1) = forces(i, 1) + accel_bf(1)*patch_ib(i)%mass | ||
| ib_ft(1, i) = ib_ft(1, i) + accel_bf(1)*patch_ib(i)%mass | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if this was intentional, but you did a good job of respecting fortran's column-major ordering for memory layout. Something I forgot to consider in my original implementation. |
||
| end if | ||
| if (bf_y) then | ||
| forces(i, 2) = forces(i, 2) + accel_bf(2)*patch_ib(i)%mass | ||
| ib_ft(2, i) = ib_ft(2, i) + accel_bf(2)*patch_ib(i)%mass | ||
| end if | ||
| if (bf_z) then | ||
| forces(i, 3) = forces(i, 3) + accel_bf(3)*patch_ib(i)%mass | ||
| ib_ft(3, i) = ib_ft(3, i) + accel_bf(3)*patch_ib(i)%mass | ||
| end if | ||
| end do | ||
|
|
||
| ! apply the summed forces | ||
| $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') | ||
| $:GPU_PARALLEL_LOOP(private='[i]', copyin='[ib_ft]') | ||
| do i = 1, num_ibs | ||
| patch_ib(i)%force(:) = forces(i,:) | ||
| patch_ib(i)%torque(:) = torques(i,:) | ||
| patch_ib(i)%force(:) = ib_ft(1:3,i) | ||
| patch_ib(i)%torque(:) = ib_ft(4:6,i) | ||
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
|
|
||
|
|
@@ -1247,9 +1244,9 @@ contains | |
| !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution | ||
| !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each | ||
| !! overwriting local forces with the neighbor's accumulated total. | ||
| subroutine s_communicate_ib_forces(forces, torques) | ||
| subroutine s_communicate_ib_forces(ib_ft) | ||
|
|
||
| real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques | ||
| real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft | ||
|
|
||
| #ifdef MFC_MPI | ||
| integer :: i, j, k, pack_pos, unpack_pos, buf_size, ierr | ||
|
|
@@ -1267,24 +1264,21 @@ contains | |
| send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) | ||
| recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) | ||
|
|
||
| recv_forces_snap = 0._wp | ||
| recv_torques_snap = 0._wp | ||
| recv_ft_snap = 0._wp | ||
| tag = 300 | ||
|
|
||
| do k = 1, 2*ib_neighborhood_radius | ||
| ! send forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then | ||
| pack_pos = 0 | ||
| $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') | ||
| $:GPU_PARALLEL_LOOP(private='[i]') | ||
| do i = 1, num_ibs | ||
| send_ids(i) = patch_ib(i)%gbl_patch_id | ||
| send_ft(1:3,i) = forces(i,:) | ||
| send_ft(4:6,i) = torques(i,:) | ||
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
| $:GPU_UPDATE(host='[send_ids, send_ft]') | ||
| $:GPU_UPDATE(host='[send_ids]') | ||
| call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, & | ||
| & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) | ||
|
|
||
|
|
@@ -1294,16 +1288,13 @@ contains | |
| call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, & | ||
| & MPI_COMM_WORLD, ierr) | ||
| call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr) | ||
| $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques, & | ||
| & recv_forces_snap, recv_torques_snap]') | ||
| $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft, recv_ft_snap]') | ||
| do i = 1, recv_count | ||
| call s_get_neighborhood_idx(recv_ids(i), j) | ||
| if (j > 0) then | ||
| ! add forces and subtract recv_snap prevent double-counting | ||
| forces(j,:) = forces(j,:) + recv_ft(1:3,i) - recv_forces_snap(j,:) | ||
| torques(j,:) = torques(j,:) + recv_ft(4:6,i) - recv_torques_snap(j,:) | ||
| recv_forces_snap(j,:) = recv_ft(1:3,i) | ||
| recv_torques_snap(j,:) = recv_ft(4:6,i) | ||
| ib_ft(:,j) = ib_ft(:,j) + recv_ft(:,i) - recv_ft_snap(:,j) | ||
| recv_ft_snap(:,j) = recv_ft(:,i) | ||
| end if | ||
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
|
|
@@ -1321,17 +1312,15 @@ contains | |
|
|
||
| do k = 1, 2*ib_neighborhood_radius | ||
| pack_pos = 0 | ||
| $:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]') | ||
| $:GPU_PARALLEL_LOOP(private='[i]') | ||
| do i = 1, num_ibs | ||
| send_ids(i) = patch_ib(i)%gbl_patch_id | ||
| send_ft(1:3,i) = forces(i,:) | ||
| send_ft(4:6,i) = torques(i,:) | ||
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
| $:GPU_UPDATE(host='[send_ids, send_ft]') | ||
| $:GPU_UPDATE(host='[send_ids]') | ||
| call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) | ||
| call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, & | ||
| & MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) | ||
| if (recv_neighbor /= MPI_PROC_NULL) then | ||
|
|
@@ -1340,12 +1329,11 @@ contains | |
| call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, & | ||
| & MPI_COMM_WORLD, ierr) | ||
| call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr) | ||
| $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques]') | ||
| $:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft]') | ||
| do i = 1, recv_count | ||
| call s_get_neighborhood_idx(recv_ids(i), j) | ||
| if (j > 0) then | ||
| forces(j,:) = recv_ft(1:3,i) | ||
| torques(j,:) = recv_ft(4:6,i) | ||
| ib_ft(:,j) = recv_ft(:,i) | ||
| end if | ||
| end do | ||
| $:END_GPU_PARALLEL_LOOP() | ||
|
|
@@ -1557,8 +1545,8 @@ contains | |
|
|
||
| #ifdef MFC_MPI | ||
| if (num_procs > 1) then | ||
| @:DEALLOCATE(send_ids, send_ft) | ||
| deallocate (recv_forces_snap, recv_torques_snap, recv_ids, recv_ft) | ||
| @:DEALLOCATE(send_ids) | ||
| deallocate (recv_ft_snap, recv_ids, recv_ft) | ||
| end if | ||
| #endif | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a point of some disagreement in coding philosophy, but I very-much like when variables are named extremely explicity. Physicists/mathemeticians/engineers are notorious for naming variiables things like
mu_0instead ofpermeability_of_free_space. The former saves much space when it is used in several equations, but the latter is extremely explicity about what the value is, even to non-technical users.it is clear that
ib_ftis "immersed boundary force and torque", but it could just as easily be something likeforces_and_torqueswithout causing too much clutter. But a name like this makes the code read more like a book. No one needs to go up a layer of abstraction to see what is being passed into this subroutine and understand what the array has in it. It is obvious from the name. This is a good thought exercises to participate in when naming variables. How to maximize clarity without making your equations bloated. And if you have to choose one or the other, which is more important?