Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/3D_mibm_sphere_head_on_collision/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
"prim_vars_wrt": "T",
"E_wrt": "T",
"parallel_io": "T",
"ib_state_wrt": "T",
# Patch: Constant Tube filled with air
# Specify the cylindrical air tube grid geometry
"patch_icpp(1)%geometry": 9,
Expand Down
32 changes: 16 additions & 16 deletions src/simulation/m_collisions.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ contains

end subroutine s_initialize_collisions_module

subroutine s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques)
subroutine s_apply_collision_forces(ghost_points, num_gps, ib_markers, ib_ft)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a point of some disagreement in coding philosophy, but I very-much like when variables are named extremely explicity. Physicists/mathemeticians/engineers are notorious for naming variiables things like mu_0 instead of permeability_of_free_space. The former saves much space when it is used in several equations, but the latter is extremely explicity about what the value is, even to non-technical users.

it is clear that ib_ft is "immersed boundary force and torque", but it could just as easily be something like forces_and_torques without causing too much clutter. But a name like this makes the code read more like a book. No one needs to go up a layer of abstraction to see what is being passed into this subroutine and understand what the array has in it. It is obvious from the name. This is a good thought exercises to participate in when naming variables. How to maximize clarity without making your equations bloated. And if you have to choose one or the other, which is more important?

@joshgillis joshgillis Jun 23, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danieljvickers I see, I definitely fall into the pattern of saving space, but I also agree that sometimes gets messy when it comes to legibility. What about ib_forces_torques for the name? I wanted to preserve the "ib" part of the name just so it doesn't get confused with other force/torque arrays in m_collisions if they are added later on, unless if you don't see that being necessary.

Edit: Do you also want this to extend to recv_ft and recv_ft_snap?


type(ghost_point), dimension(:), intent(in) :: ghost_points
integer, intent(in) :: num_gps
type(integer_field), intent(in) :: ib_markers
real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft
integer :: num_considered_collisions

! return if no collisions
Expand All @@ -67,17 +67,17 @@ contains

select case (collision_model)
case (1) ! soft sphere model
call s_apply_wall_collision_forces_soft_sphere(forces, torques)
call s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, forces, torques)
call s_apply_wall_collision_forces_soft_sphere(ib_ft)
call s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, ib_ft)
end select

end subroutine s_apply_collision_forces

!> @brief applies collision forces to IBs assuming a soft-sphere collision model (all IBs are circles or spheres)
subroutine s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, forces, torques)
subroutine s_apply_ib_collision_forces_soft_sphere(num_considered_collisions, ib_ft)

integer, intent(in) :: num_considered_collisions
real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft
integer :: i, encoded_pid1, encoded_pid2, xp1, xp2, yp1, yp2, zp1, zp2, pid1, pid2, l ! iterators and patch IDs
real(wp) :: overlap_distance
real(wp), dimension(3) :: normal_vector, centroid_1, centroid_2
Expand All @@ -93,7 +93,7 @@ contains
$:GPU_PARALLEL_LOOP(private='[i, l, encoded_pid1, encoded_pid2, xp1, xp2, yp1, yp2, zp1, zp2, pid1, pid2, centroid_1, &
& centroid_2, normal_vector, overlap_distance, effective_mass, k, eta, normal_velocity, &
& tangental_vector, normal_force, tangental_force, torque, radial_vector, rotation_velocity, vel1, &
& vel2]', copy='[forces, torques]')
& vel2]', copy='[ib_ft]')
do i = 1, num_considered_collisions
encoded_pid1 = collision_lookup(i, 3)
encoded_pid2 = collision_lookup(i, 4)
Expand Down Expand Up @@ -144,15 +144,15 @@ contains
do l = 1, num_dims
! update the first IB
$:GPU_ATOMIC(atomic='update')
forces(pid1, l) = forces(pid1, l) + (normal_force(l) + tangental_force(l))
ib_ft(l, pid1) = ib_ft(l, pid1) + (normal_force(l) + tangental_force(l))
$:GPU_ATOMIC(atomic='update')
torques(pid1, l) = torques(pid1, l) + torque(l)
ib_ft(l + 3, pid1) = ib_ft(l + 3, pid1) + torque(l)

! apply equal and opposite force/torque to second IB
$:GPU_ATOMIC(atomic='update')
forces(pid2, l) = forces(pid2, l) - (normal_force(l) + tangental_force(l))
ib_ft(l, pid2) = ib_ft(l, pid2) - (normal_force(l) + tangental_force(l))
$:GPU_ATOMIC(atomic='update')
torques(pid2, l) = torques(pid2, l) + torque(l)*patch_ib(pid2)%radius/patch_ib(pid1)%radius
ib_ft(l + 3, pid2) = ib_ft(l + 3, pid2) + torque(l)*patch_ib(pid2)%radius/patch_ib(pid1)%radius
end do
end if
end if
Expand All @@ -162,17 +162,17 @@ contains
end subroutine s_apply_ib_collision_forces_soft_sphere

!> @brief applies collision forces to IBs assuming a soft-sphere collision model (all IBs are circles or spheres)
subroutine s_apply_wall_collision_forces_soft_sphere(forces, torques)
subroutine s_apply_wall_collision_forces_soft_sphere(ib_ft)

real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft
integer :: patch_id, i, l
real(wp), dimension(3) :: normal_force, tangental_force, normal_vector, normal_velocity, tangental_vector, &
& collision_location, torque, radial_vector, rotation_velocity, velocity
real(wp) :: k, eta ! the spring stiffness and damping coefficient for a specific IB

$:GPU_PARALLEL_LOOP(private='[patch_id, i, l, collision_location, normal_vector, k, eta, normal_velocity, &
& tangental_vector, normal_force, tangental_force, torque, radial_vector, rotation_velocity, &
& velocity]', copy='[forces, torques]', collapse=2)
& velocity]', copy='[ib_ft]', collapse=2)
do patch_id = 1, num_ibs
do i = 1, num_dims*2
! only compute force contributions if there was an overlap
Expand Down Expand Up @@ -217,9 +217,9 @@ contains

do l = 1, num_dims
$:GPU_ATOMIC(atomic='update')
forces(patch_id, l) = forces(patch_id, l) + (normal_force(l) + tangental_force(l))
ib_ft(l, patch_id) = ib_ft(l, patch_id) + (normal_force(l) + tangental_force(l))
$:GPU_ATOMIC(atomic='update')
torques(patch_id, l) = torques(patch_id, l) + torque(l)
ib_ft(l + 3, patch_id) = ib_ft(l + 3, patch_id) + torque(l)
end do
end if
end do
Expand Down
78 changes: 33 additions & 45 deletions src/simulation/m_ibm.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ module m_ibm

! IB MPI buffers
integer, allocatable :: send_ids(:), recv_ids(:)
real(wp), allocatable :: send_ft(:,:), recv_ft(:,:)
real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:)
real(wp), allocatable :: recv_ft(:,:), recv_ft_snap(:,:)

contains

Expand Down Expand Up @@ -98,9 +97,8 @@ contains
! allocate some arrays for MPI communication, if required by this simulation
#ifdef MFC_MPI
if (num_procs > 1) then
@:ALLOCATE(send_ids(size(patch_ib)), send_ft(6, size(patch_ib)))
allocate (recv_forces_snap(size(patch_ib), 3), recv_torques_snap(size(patch_ib), 3), recv_ids(size(patch_ib)), &
& recv_ft(6, size(patch_ib)))
@:ALLOCATE(send_ids(size(patch_ib)))
allocate (recv_ft_snap(6, size(patch_ib)), recv_ids(size(patch_ib)), recv_ft(6, size(patch_ib)))
end if
#endif

Expand Down Expand Up @@ -908,7 +906,7 @@ contains
type(scalar_field), dimension(1:sys_size), intent(in) :: q_prim_vf
type(physical_parameters), dimension(1:num_fluids), intent(in) :: fluid_pp
integer :: i, j, k, l, encoded_ib_idx, ib_idx, ib_idx_temp, fluid_idx
real(wp), dimension(num_ibs, 3) :: forces, torques
real(wp), dimension(6, num_ibs) :: ib_ft

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a good place to put a comment about the contents of this array. Like I said, I already think it would be best renamed. But it is not clear immediately to a reader how memory is written here. A comment briefly describing that it is f_x, f_y, f_x, tau_x, tau_y, tau_z contiguously would make it easier for anyone reading the code to understand how they are supposed to fill in the array. Although the array bounds help clarify this a little for you.

! viscous stress tensor with temp vectors to hold divergence calculations
real(wp), dimension(1:3,1:3) :: viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2
real(wp), dimension(1:3) :: local_force_contribution, radial_vector, local_torque_contribution
Expand All @@ -922,8 +920,7 @@ contains

call nvtxStartRange("COMPUTE-IB-FORCES")

forces = 0._wp
torques = 0._wp
ib_ft = 0._wp

if (viscous) then
do fluid_idx = 1, num_fluids
Expand All @@ -937,8 +934,8 @@ contains

$:GPU_PARALLEL_LOOP(private='[i, j, k, l, ib_idx, ib_idx_temp, encoded_ib_idx, fluid_idx, radial_vector, &
& local_force_contribution, cell_volume, local_torque_contribution, dynamic_viscosity, &
& viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[forces, &
& torques]', copyin='[dynamic_viscosities]', collapse=3)
& viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, dx, dy, dz]', copy='[ib_ft]', &
& copyin='[dynamic_viscosities]', collapse=3)
do i = 0, m
do j = 0, n
do k = 0, p
Expand Down Expand Up @@ -1025,9 +1022,9 @@ contains
! Update the force and torque values atomically to prevent race conditions
do l = 1, 3
$:GPU_ATOMIC(atomic='update')
forces(ib_idx, l) = forces(ib_idx, l) + (local_force_contribution(l)*cell_volume)
ib_ft(l, ib_idx) = ib_ft(l, ib_idx) + (local_force_contribution(l)*cell_volume)
$:GPU_ATOMIC(atomic='update')
torques(ib_idx, l) = torques(ib_idx, l) + local_torque_contribution(l)*cell_volume
ib_ft(l + 3, ib_idx) = ib_ft(l + 3, ib_idx) + local_torque_contribution(l)*cell_volume
end do
end if ! ib_idx > 0
end if
Expand All @@ -1036,29 +1033,29 @@ contains
end do
$:END_GPU_PARALLEL_LOOP()

call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques)
call s_apply_collision_forces(ghost_points, num_gps, ib_markers, ib_ft)

! reduce the forces across local neighborhood ranks
call s_communicate_ib_forces(forces, torques)
call s_communicate_ib_forces(ib_ft)

! consider body forces after reducing to avoid double counting
do i = 1, num_ibs
if (bf_x) then
forces(i, 1) = forces(i, 1) + accel_bf(1)*patch_ib(i)%mass
ib_ft(1, i) = ib_ft(1, i) + accel_bf(1)*patch_ib(i)%mass

@danieljvickers danieljvickers Jun 23, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this was intentional, but you did a good job of respecting fortran's column-major ordering for memory layout. Something I forgot to consider in my original implementation.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Funny, I was looking into array shaping and saw the column-major preference Fortran has and thought that is what you had been implying with "no particular reason for their original shape" in your issue. Glad I could make an improvement.

end if
if (bf_y) then
forces(i, 2) = forces(i, 2) + accel_bf(2)*patch_ib(i)%mass
ib_ft(2, i) = ib_ft(2, i) + accel_bf(2)*patch_ib(i)%mass
end if
if (bf_z) then
forces(i, 3) = forces(i, 3) + accel_bf(3)*patch_ib(i)%mass
ib_ft(3, i) = ib_ft(3, i) + accel_bf(3)*patch_ib(i)%mass
end if
end do

! apply the summed forces
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]')
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[ib_ft]')
do i = 1, num_ibs
patch_ib(i)%force(:) = forces(i,:)
patch_ib(i)%torque(:) = torques(i,:)
patch_ib(i)%force(:) = ib_ft(1:3,i)
patch_ib(i)%torque(:) = ib_ft(4:6,i)
end do
$:END_GPU_PARALLEL_LOOP()

Expand Down Expand Up @@ -1247,9 +1244,9 @@ contains
!! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution
!! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each
!! overwriting local forces with the neighbor's accumulated total.
subroutine s_communicate_ib_forces(forces, torques)
subroutine s_communicate_ib_forces(ib_ft)

real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
real(wp), dimension(6, num_ibs), intent(inout) :: ib_ft

#ifdef MFC_MPI
integer :: i, j, k, pack_pos, unpack_pos, buf_size, ierr
Expand All @@ -1267,24 +1264,21 @@ contains
send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)

recv_forces_snap = 0._wp
recv_torques_snap = 0._wp
recv_ft_snap = 0._wp
tag = 300

do k = 1, 2*ib_neighborhood_radius
! send forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then
pack_pos = 0
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]')
$:GPU_PARALLEL_LOOP(private='[i]')
do i = 1, num_ibs
send_ids(i) = patch_ib(i)%gbl_patch_id
send_ft(1:3,i) = forces(i,:)
send_ft(4:6,i) = torques(i,:)
end do
$:END_GPU_PARALLEL_LOOP()
$:GPU_UPDATE(host='[send_ids, send_ft]')
$:GPU_UPDATE(host='[send_ids]')
call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, &
& MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)

Expand All @@ -1294,16 +1288,13 @@ contains
call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, &
& MPI_COMM_WORLD, ierr)
call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques, &
& recv_forces_snap, recv_torques_snap]')
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft, recv_ft_snap]')
do i = 1, recv_count
call s_get_neighborhood_idx(recv_ids(i), j)
if (j > 0) then
! add forces and subtract recv_snap prevent double-counting
forces(j,:) = forces(j,:) + recv_ft(1:3,i) - recv_forces_snap(j,:)
torques(j,:) = torques(j,:) + recv_ft(4:6,i) - recv_torques_snap(j,:)
recv_forces_snap(j,:) = recv_ft(1:3,i)
recv_torques_snap(j,:) = recv_ft(4:6,i)
ib_ft(:,j) = ib_ft(:,j) + recv_ft(:,i) - recv_ft_snap(:,j)
recv_ft_snap(:,j) = recv_ft(:,i)
end if
end do
$:END_GPU_PARALLEL_LOOP()
Expand All @@ -1321,17 +1312,15 @@ contains

do k = 1, 2*ib_neighborhood_radius
pack_pos = 0
$:GPU_PARALLEL_LOOP(private='[i]', copyin='[forces, torques]')
$:GPU_PARALLEL_LOOP(private='[i]')
do i = 1, num_ibs
send_ids(i) = patch_ib(i)%gbl_patch_id
send_ft(1:3,i) = forces(i,:)
send_ft(4:6,i) = torques(i,:)
end do
$:END_GPU_PARALLEL_LOOP()
$:GPU_UPDATE(host='[send_ids, send_ft]')
$:GPU_UPDATE(host='[send_ids]')
call MPI_PACK(num_ibs, 1, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(send_ids, num_ibs, MPI_INTEGER, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(send_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_PACK(ib_ft, 6*num_ibs, mpi_p, ib_force_send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
call MPI_SENDRECV(ib_force_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, ib_force_recv_buf, buf_size, &
& MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
if (recv_neighbor /= MPI_PROC_NULL) then
Expand All @@ -1340,12 +1329,11 @@ contains
call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ids, recv_count, MPI_INTEGER, &
& MPI_COMM_WORLD, ierr)
call MPI_UNPACK(ib_force_recv_buf, buf_size, unpack_pos, recv_ft, 6*recv_count, mpi_p, MPI_COMM_WORLD, ierr)
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[forces, torques]')
$:GPU_PARALLEL_LOOP(private='[i, j]', copyin='[recv_ft, recv_ids]', copy='[ib_ft]')
do i = 1, recv_count
call s_get_neighborhood_idx(recv_ids(i), j)
if (j > 0) then
forces(j,:) = recv_ft(1:3,i)
torques(j,:) = recv_ft(4:6,i)
ib_ft(:,j) = recv_ft(:,i)
end if
end do
$:END_GPU_PARALLEL_LOOP()
Expand Down Expand Up @@ -1557,8 +1545,8 @@ contains

#ifdef MFC_MPI
if (num_procs > 1) then
@:DEALLOCATE(send_ids, send_ft)
deallocate (recv_forces_snap, recv_torques_snap, recv_ids, recv_ft)
@:DEALLOCATE(send_ids)
deallocate (recv_ft_snap, recv_ids, recv_ft)
end if
#endif

Expand Down
1 change: 1 addition & 0 deletions src/simulation/m_time_steppers.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ contains

if (ib) then
if (moving_immersed_boundary_flag) then
$:GPU_UPDATE(host='[patch_ib(1:num_ibs)]')
call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc
call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors
else if (ib_state_wrt) then
Expand Down
Loading