diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 227fbde862..0558f32e07 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -93,6 +93,7 @@ module atm_time_integration integer, dimension(:), pointer :: bdyMaskEdge ! regional_MPAS addition logical :: config_apply_lbcs + logical :: do_halo_exchange ! Used in compute_solve_diagnostics real (kind=RKIND), allocatable, dimension(:,:) :: ke_vertex @@ -304,6 +305,8 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s #endif + do_halo_exchange = domain % dminfo % nProcs > 1 + #ifdef MPAS_CAM_DYCORE nullify(tend_physics) call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) @@ -2101,9 +2104,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_array(state, 'scalars', scalars_1, 1) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) - !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', config_gpu_aware_mpi) - !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_rk_integration_setup') @@ -2195,9 +2198,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(diag, 'exner', exner) - !$acc update self(exner) if (.not. config_gpu_aware_mpi) + !$acc update self(exner) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:exner', config_gpu_aware_mpi) - !$acc update device(exner) if (.not. config_gpu_aware_mpi) + !$acc update device(exner) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -2279,9 +2282,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! tend_u MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(tend, 'u', tend_u) - !$acc update self(tend_u) if (.not. config_gpu_aware_mpi) + !$acc update self(tend_u) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:tend_u', config_gpu_aware_mpi) - !$acc update device(tend_u) if (.not. config_gpu_aware_mpi) + !$acc update device(tend_u) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('small_step_prep') @@ -2360,9 +2363,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(diag, 'rho_pp', rho_pp) - !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi) + !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:rho_pp', config_gpu_aware_mpi) - !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi) + !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_advance_acoustic_step') @@ -2387,9 +2390,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) - !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi) + !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:rtheta_pp', config_gpu_aware_mpi) - !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi) + !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -2415,9 +2418,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_array(diag, 'rw_p', rw_p) call mpas_pool_get_array(diag, 'rho_pp', rho_pp) call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) - !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', config_gpu_aware_mpi) - !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_recover_large_step_variables') @@ -2488,14 +2491,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(state, 'u', u, 2) - !$acc update self(u) if (.not. config_gpu_aware_mpi) + !$acc update self(u) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) ! u if (config_apply_lbcs) then call exchange_halo_group(domain, 'dynamics:u_123', config_gpu_aware_mpi) else call exchange_halo_group(domain, 'dynamics:u_3', config_gpu_aware_mpi) end if - !$acc update device(u) if (.not. config_gpu_aware_mpi) + !$acc update device(u) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). @@ -2510,9 +2513,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) - !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2569,22 +2572,22 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_array(state, 'w', w, 2) call mpas_pool_get_array(diag, 'pv_edge', pv_edge) call mpas_pool_get_array(diag, 'rho_edge', rho_edge) - !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) + !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2] ! call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', config_gpu_aware_mpi) - !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) else ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2] ! call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', config_gpu_aware_mpi) end if - !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) + !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! set the zero-gradient condition on w for regional_MPAS @@ -2601,9 +2604,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! w halo values needs resetting after regional boundary update call mpas_pool_get_array(state, 'w', w, 2) - !$acc update self(w) if (.not. config_gpu_aware_mpi) + !$acc update self(w) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:w', config_gpu_aware_mpi) - !$acc update device(w) if (.not. config_gpu_aware_mpi) + !$acc update device(w) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if ! end of regional_MPAS addition @@ -2619,9 +2622,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_array(state, 'theta_m', theta_m, 2) call mpas_pool_get_array(diag, 'pressure_p', pressure_p) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) - !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', config_gpu_aware_mpi) - !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! @@ -2690,9 +2693,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! need to fill halo for horizontal filter call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) - !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -2721,9 +2724,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (rk_step < 3) then MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) - !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if @@ -2853,9 +2856,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) - !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -5215,7 +5218,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi) + !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER @@ -5225,7 +5228,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi) + !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') ! @@ -5622,7 +5625,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi) + !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER @@ -5632,7 +5635,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi) + !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi .and. do_halo_exchange) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$acc parallel