group_global_coordinator_impl Subroutine

subroutine group_global_coordinator_impl(ctx)

Uses

  • proc~~group_global_coordinator_impl~~UsesGraph proc~group_global_coordinator_impl group_global_coordinator_impl module~mqc_many_body_expansion mqc_many_body_expansion proc~group_global_coordinator_impl->module~mqc_many_body_expansion module~mqc_config_adapter mqc_config_adapter module~mqc_many_body_expansion->module~mqc_config_adapter module~mqc_json_output_types mqc_json_output_types module~mqc_many_body_expansion->module~mqc_json_output_types module~mqc_method_config mqc_method_config module~mqc_many_body_expansion->module~mqc_method_config module~mqc_physical_fragment mqc_physical_fragment module~mqc_many_body_expansion->module~mqc_physical_fragment module~mqc_resources mqc_resources module~mqc_many_body_expansion->module~mqc_resources pic_types pic_types module~mqc_many_body_expansion->pic_types module~mqc_config_adapter->module~mqc_method_config module~mqc_config_adapter->module~mqc_physical_fragment module~mqc_config_adapter->pic_types module~mqc_calculation_keywords mqc_calculation_keywords module~mqc_config_adapter->module~mqc_calculation_keywords module~mqc_config_parser mqc_config_parser module~mqc_config_adapter->module~mqc_config_parser module~mqc_elements mqc_elements module~mqc_config_adapter->module~mqc_elements module~mqc_error mqc_error module~mqc_config_adapter->module~mqc_error pic_logger pic_logger module~mqc_config_adapter->pic_logger module~mqc_json_output_types->pic_types module~mqc_thermochemistry mqc_thermochemistry module~mqc_json_output_types->module~mqc_thermochemistry module~mqc_method_config->pic_types module~mqc_method_types mqc_method_types module~mqc_method_config->module~mqc_method_types module~mqc_physical_fragment->pic_types module~mqc_cgto mqc_cgto module~mqc_physical_fragment->module~mqc_cgto module~mqc_physical_fragment->module~mqc_elements module~mqc_physical_fragment->module~mqc_error module~mqc_geometry mqc_geometry module~mqc_physical_fragment->module~mqc_geometry module~mqc_physical_constants mqc_physical_constants module~mqc_physical_fragment->module~mqc_physical_constants module~mqc_xyz_reader mqc_xyz_reader module~mqc_physical_fragment->module~mqc_xyz_reader module~mqc_mpi_comms mqc_mpi_comms module~mqc_resources->module~mqc_mpi_comms module~mqc_calculation_keywords->pic_types module~mqc_calculation_defaults mqc_calculation_defaults module~mqc_calculation_keywords->module~mqc_calculation_defaults module~mqc_cgto->pic_types module~mqc_config_parser->module~mqc_physical_fragment module~mqc_config_parser->pic_types module~mqc_config_parser->module~mqc_error module~mqc_config_parser->module~mqc_geometry module~mqc_config_parser->module~mqc_method_types module~mqc_calc_types mqc_calc_types module~mqc_config_parser->module~mqc_calc_types module~mqc_config_parser->module~mqc_calculation_defaults module~mqc_elements->pic_types pic_ascii pic_ascii module~mqc_elements->pic_ascii module~mqc_geometry->pic_types module~mqc_method_types->pic_types pic_mpi_lib pic_mpi_lib module~mqc_mpi_comms->pic_mpi_lib module~mqc_physical_constants->pic_types module~mqc_thermochemistry->pic_types module~mqc_thermochemistry->module~mqc_elements module~mqc_thermochemistry->module~mqc_physical_constants module~mqc_thermochemistry->pic_logger pic_io pic_io module~mqc_thermochemistry->pic_io pic_lapack_interfaces pic_lapack_interfaces module~mqc_thermochemistry->pic_lapack_interfaces module~mqc_xyz_reader->pic_types module~mqc_xyz_reader->module~mqc_error module~mqc_xyz_reader->module~mqc_geometry module~mqc_calc_types->pic_types module~mqc_calculation_defaults->pic_types

Group-global coordinator for distributing a fragment shard to node coordinators.

Arguments

Type IntentOptional Attributes Name
class(many_body_expansion_t), intent(in) :: ctx

Calls

proc~~group_global_coordinator_impl~~CallsGraph proc~group_global_coordinator_impl group_global_coordinator_impl abort_comm abort_comm proc~group_global_coordinator_impl->abort_comm error error proc~group_global_coordinator_impl->error isend isend proc~group_global_coordinator_impl->isend proc~flush_group_results flush_group_results proc~group_global_coordinator_impl->proc~flush_group_results proc~get_group_leader_rank get_group_leader_rank proc~group_global_coordinator_impl->proc~get_group_leader_rank proc~handle_group_node_requests handle_group_node_requests proc~group_global_coordinator_impl->proc~handle_group_node_requests proc~handle_local_worker_requests_group handle_local_worker_requests_group proc~group_global_coordinator_impl->proc~handle_local_worker_requests_group proc~handle_local_worker_results_to_batch handle_local_worker_results_to_batch proc~group_global_coordinator_impl->proc~handle_local_worker_results_to_batch proc~handle_node_results_to_batch handle_node_results_to_batch proc~group_global_coordinator_impl->proc~handle_node_results_to_batch proc~queue_destroy queue_destroy proc~group_global_coordinator_impl->proc~queue_destroy proc~queue_init_from_list queue_init_from_list proc~group_global_coordinator_impl->proc~queue_init_from_list proc~queue_is_empty queue_is_empty proc~group_global_coordinator_impl->proc~queue_is_empty proc~receive_group_assignment_matrix receive_group_assignment_matrix proc~group_global_coordinator_impl->proc~receive_group_assignment_matrix proc~flush_group_results->isend proc~result_destroy calculation_result_t%result_destroy proc~flush_group_results->proc~result_destroy proc~result_isend result_isend proc~flush_group_results->proc~result_isend proc~handle_group_node_requests->isend iprobe iprobe proc~handle_group_node_requests->iprobe irecv irecv proc~handle_group_node_requests->irecv proc~queue_pop queue_pop proc~handle_group_node_requests->proc~queue_pop proc~send_fragment_payload_from_row send_fragment_payload_from_row proc~handle_group_node_requests->proc~send_fragment_payload_from_row proc~handle_local_worker_requests_group->isend proc~handle_local_worker_requests_group->iprobe proc~handle_local_worker_requests_group->irecv proc~handle_local_worker_requests_group->proc~queue_pop proc~handle_local_worker_requests_group->proc~send_fragment_payload_from_row proc~handle_local_worker_results_to_batch->abort_comm proc~handle_local_worker_results_to_batch->error proc~handle_local_worker_results_to_batch->proc~flush_group_results proc~handle_local_worker_results_to_batch->iprobe proc~append_result_to_batch append_result_to_batch proc~handle_local_worker_results_to_batch->proc~append_result_to_batch proc~error_get_message error_t%error_get_message proc~handle_local_worker_results_to_batch->proc~error_get_message proc~handle_local_worker_results_to_batch->proc~result_destroy proc~result_irecv result_irecv proc~handle_local_worker_results_to_batch->proc~result_irecv to_char to_char proc~handle_local_worker_results_to_batch->to_char proc~handle_node_results_to_batch->abort_comm proc~handle_node_results_to_batch->error proc~handle_node_results_to_batch->proc~flush_group_results proc~handle_node_results_to_batch->iprobe proc~handle_node_results_to_batch->irecv proc~handle_node_results_to_batch->proc~append_result_to_batch proc~handle_node_results_to_batch->proc~error_get_message proc~handle_node_results_to_batch->proc~result_destroy proc~handle_node_results_to_batch->proc~result_irecv proc~handle_node_results_to_batch->to_char proc~receive_group_assignment_matrix->irecv recv recv proc~receive_group_assignment_matrix->recv proc~result_reset calculation_result_t%result_reset proc~result_destroy->proc~result_reset proc~result_irecv->irecv proc~result_irecv->recv proc~result_isend->isend send send proc~result_isend->send proc~send_fragment_payload_from_row->isend proc~build_fragment_payload_from_row build_fragment_payload_from_row proc~send_fragment_payload_from_row->proc~build_fragment_payload_from_row proc~energy_reset energy_t%energy_reset proc~result_reset->proc~energy_reset proc~error_clear error_t%error_clear proc~result_reset->proc~error_clear proc~mp2_reset mp2_energy_t%mp2_reset proc~energy_reset->proc~mp2_reset

Called by

proc~~group_global_coordinator_impl~~CalledByGraph proc~group_global_coordinator_impl group_global_coordinator_impl proc~node_coordinator_impl node_coordinator_impl proc~node_coordinator_impl->proc~group_global_coordinator_impl proc~node_coordinator node_coordinator proc~node_coordinator->proc~node_coordinator_impl interface~node_coordinator node_coordinator interface~node_coordinator->proc~node_coordinator proc~gmbe_run_distributed gmbe_context_t%gmbe_run_distributed proc~gmbe_run_distributed->interface~node_coordinator proc~mbe_run_distributed mbe_context_t%mbe_run_distributed proc~mbe_run_distributed->interface~node_coordinator

Variables

Type Visibility Attributes Name Initial
integer(kind=int32), private :: batch_count
integer(kind=int64), private, allocatable :: batch_ids(:)
type(calculation_result_t), private, allocatable :: batch_results(:)
integer, private :: finished_nodes
integer(kind=int64), private, allocatable :: group_fragment_ids(:)
integer, private :: group_id
integer, private :: group_leader_rank
integer, private :: group_node_count
integer, private, allocatable :: group_polymers(:,:)
type(queue_t), private :: group_queue
integer(kind=int64), private :: idx
integer, private :: local_finished_workers
integer, private :: local_node_done
type(request_t), private :: req
integer(kind=int64), private :: results_received
integer(kind=int64), private, allocatable :: temp_ids(:)
integer(kind=int64), private :: total_group_fragments
integer(kind=int64), private :: worker_fragment_map(ctx%resources%mpi_comms%node_comm%size())

Source Code

   subroutine group_global_coordinator_impl(ctx)
      !! Group-global coordinator for distributing a fragment shard to node coordinators.
      use mqc_many_body_expansion, only: many_body_expansion_t
      class(many_body_expansion_t), intent(in) :: ctx

      integer(int64), allocatable :: group_fragment_ids(:)
      integer, allocatable :: group_polymers(:, :)
      type(queue_t) :: group_queue
      integer(int64), allocatable :: temp_ids(:)
      integer(int64) :: idx
      integer(int32) :: batch_count
      integer(int64), allocatable :: batch_ids(:)
      type(calculation_result_t), allocatable :: batch_results(:)
      integer(int64) :: results_received
      integer(int64) :: total_group_fragments
      integer :: finished_nodes
      integer :: local_finished_workers
      integer :: group_node_count
      integer :: group_leader_rank, group_id
      integer :: local_node_done
      integer(int64) :: worker_fragment_map(ctx%resources%mpi_comms%node_comm%size())
      type(request_t) :: req

      call get_group_leader_rank(ctx, ctx%resources%mpi_comms%world_comm%rank(), group_leader_rank, group_id)
      if (group_leader_rank /= ctx%resources%mpi_comms%world_comm%rank()) then
         call logger%error("group_global_coordinator_impl called on non-group leader rank")
         call abort_comm(ctx%resources%mpi_comms%world_comm, 1)
      end if
      group_node_count = count(ctx%group_ids == group_id)

      call receive_group_assignment_matrix(ctx%resources%mpi_comms%world_comm, group_fragment_ids, group_polymers)

      if (size(group_fragment_ids) > 0) then
         ! Queue stores local indices (1..N) into group_fragment_ids/group_polymers.
         allocate (temp_ids(size(group_fragment_ids)))
         do idx = 1_int64, size(group_fragment_ids, kind=int64)
            temp_ids(idx) = idx
         end do
         call queue_init_from_list(group_queue, temp_ids)
         deallocate (temp_ids)
      else
         group_queue%count = 0_int64
         group_queue%head = 1_int64
      end if

      batch_count = 0
      allocate (batch_ids(GROUP_RESULT_BATCH_SIZE))
      allocate (batch_results(GROUP_RESULT_BATCH_SIZE))
      results_received = 0_int64
      total_group_fragments = int(size(group_fragment_ids, kind=int64), int64)
      finished_nodes = 0
      local_finished_workers = 0
      local_node_done = 0
      worker_fragment_map = 0

      do while (finished_nodes < group_node_count .or. results_received < total_group_fragments)

         call handle_local_worker_results_to_batch(ctx%resources%mpi_comms%node_comm, &
                                                   ctx%resources%mpi_comms%world_comm, &
                                                   worker_fragment_map, batch_count, batch_ids, batch_results, &
                                                   results_received)

         call handle_node_results_to_batch(ctx%resources%mpi_comms%world_comm, batch_count, batch_ids, batch_results, &
                                           results_received)

         call handle_group_node_requests(ctx, group_queue, group_fragment_ids, group_polymers, finished_nodes)

         if (ctx%resources%mpi_comms%node_comm%size() > 1 .and. &
             local_finished_workers < ctx%resources%mpi_comms%node_comm%size() - 1) then
            call handle_local_worker_requests_group(ctx, group_queue, group_fragment_ids, group_polymers, &
                                                    worker_fragment_map, local_finished_workers)
         end if

         if (local_node_done == 0) then
            if (queue_is_empty(group_queue) .and. &
                (ctx%resources%mpi_comms%node_comm%size() == 1 .or. &
                 local_finished_workers >= ctx%resources%mpi_comms%node_comm%size() - 1)) then
               local_node_done = 1
               finished_nodes = finished_nodes + 1
            end if
         end if

         if (batch_count >= GROUP_RESULT_BATCH_SIZE) then
            call flush_group_results(ctx%resources%mpi_comms%world_comm, batch_count, batch_ids, batch_results)
         end if
      end do

      call flush_group_results(ctx%resources%mpi_comms%world_comm, batch_count, batch_ids, batch_results)

      call isend(ctx%resources%mpi_comms%world_comm, 0, 0, TAG_GROUP_DONE, req)
      call wait(req)

      call queue_destroy(group_queue)
      deallocate (group_fragment_ids)
      deallocate (group_polymers)
      deallocate (batch_ids)
      deallocate (batch_results)
   end subroutine group_global_coordinator_impl