flow_global_dots_owned Subroutine

public subroutine flow_global_dots_owned(flow, n_dots, a, b, results)

Computes multiple global dot products in a single MPI_Allreduce.

This batched version reduces MPI latency by combining n_dots synchronizations.

Arguments

Type IntentOptional Attributes Name
type(flow_mpi_t), intent(in) :: flow
integer, intent(in) :: n_dots
real(kind=rk), intent(in) :: a(:,:)
real(kind=rk), intent(in) :: b(:,:)
real(kind=rk), intent(out) :: results(:)

Source Code

   subroutine flow_global_dots_owned(flow, n_dots, a, b, results)
      use mod_profiling, only : profiler_start, profiler_stop
      type(flow_mpi_t), intent(in) :: flow
      integer, intent(in) :: n_dots
      real(rk), intent(in) :: a(:,:)  ! (ncells, n_dots)
      real(rk), intent(in) :: b(:,:)  ! (ncells, n_dots)
      real(rk), intent(out) :: results(:) ! (n_dots)
      real(rk) :: local_dots(n_dots)
      integer :: c, i, ierr

      local_dots = zero
      do i = 1, n_dots
         do c = flow%first_cell, flow%last_cell
            local_dots(i) = local_dots(i) + a(c, i) * b(c, i)
         end do
      end do

      call profiler_start('MPI_Communication')
      call MPI_Allreduce(local_dots, results, n_dots, MPI_DOUBLE_PRECISION, MPI_SUM, flow%comm, ierr)
      call check_mpi(ierr, 'MPI_Allreduce batched dots')
      call profiler_stop('MPI_Communication')
   end subroutine flow_global_dots_owned