Report Typos and Errors    
Semi-Lagrangian Library
Modular library for kinetic and gyrokinetic simulations of plasmas in fusion energy devices.
sll_m_decomposition_advanced.F90
Go to the documentation of this file.
1 !**************************************************************
2 !
3 ! Domain decomposition module for Selalib (1D, ..., 6d)
4 !
5 ! Extension of the base module to support overlapping communication.
6 !
7 !**************************************************************
13 
14 ! The hierarchy of the classes in this module is as follows:
15 !
16 ! sll_t_decomposition, contains a single element of:
17 ! sll_t_decomposition__local, contains an array of:
18 ! sll_t_decomposition__dimension, contains an array of:
19 ! sll_t_decomposition__block, contains an array of:
20 ! sll_t_decomposition__buffer, specializes to:
21 ! sll_t_decomposition__buffer_3d
22 ! sll_t_decomposition__buffer_6d
23 !
24 ! This complex setup is necessary to implement multiple dimensions,
25 ! blocks per dimension, buffers per block.
26 
27 ! Preprocessor macro: Use single precision for the halo exchange, ie.
28 ! basically a poor man's lossy compression to speed up MPI communication.
29 ! NOTE: The define should be passed by the build system, please see
30 ! the build script <compile_mpcdf.sh> for an example.
31 #ifdef USE_HALO_REAL32
32 #define HALO_DTYPE sll_real32
33 #else
34 #define HALO_DTYPE sll_real64
35 #endif
36 
37 
39 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
40 #include "sll_assert.h"
41 #include "sll_errors.h"
42 #include "sll_memory.h"
43 #include "sll_working_precision.h"
44 
45  use mpi, only: &
46  mpi_double_precision, &
47  mpi_real, &
48  mpi_sendrecv, &
49  mpi_status_ignore, &
50  mpi_success
51 
52  ! use sll_m_decomposition
53  use sll_m_decomposition, only: &
57 
58 #ifdef _OPENMP
59  use omp_lib
60 #define OMP_COLLAPSE collapse(2)
61 #define OMP_SCHEDULE schedule(static)
62 !#define OMP_SCHEDULE schedule(dynamic)
63 #endif
64 
65 ! FMEMPOOL is currently not compatible with OpenMP nested parallelism!
66 #undef USE_FMEMPOOL
67 
68 #ifdef USE_FMEMPOOL
69  use fmempool
70 #endif
71 
73 
74  use sll_m_utilities, only: &
76 
77  implicit none
78 
79  public :: &
85  ! sll_t_decomposition__buffer_3d, &
86  ! sll_t_decomposition__buffer_6d, &
87 
88  public :: &
99 
100 
101  private
102 
103 
104  ! Temporary note: Only the decomposition stuff is re-implemented here,
105  ! the topology stuff from sll_m_decomposition can be reused for the moment.
106 
107 
108  ! --- nested hierarchy of decomposition object types, supporting 3d and 6d decompositions ---
109  ! names were shortened, nesting is indicated by the double underscore __
111  character(len=64) :: label
112  sll_int32, allocatable :: mn(:) ! min index specific to the buffer
113  sll_int32, allocatable :: mx(:) ! max index specific to the buffer
114  sll_int32, allocatable :: nw(:) ! net width of the buffer
115  logical :: valid
116  ! --- buffers for MPI communication ---
117  halo_dtype, pointer :: sendbuf(:) => null()
118  ! entries only used by blocking MPI_Sendrecv()
119  sll_int32 :: nel
120  sll_int32 :: mpi_source
121  sll_int32 :: mpi_dest
122  sll_int32 :: mpi_tag
123  sll_int32 :: mpi_precision
124  ! entry only used by non-blocking communication (isend,irecv)
125  sll_int32 :: request(2)
126  ! support for compressed mpi messages
127  logical :: use_compression = .false.
128  logical :: compression_verbose = .false.
129  type(sll_t_compressed_buffer) :: comp_sendbuf, comp_recvbuf
130  ! temporarily placed 6d array here because of 'select type' compiler bug with intel/17.0.4
131  halo_dtype, pointer :: mem(:,:,:,:,:,:) => null()
133 
134  ! type, extends(sll_t_decomposition__buffer) :: sll_t_decomposition__buffer_3d
135  ! HALO_DTYPE, pointer :: mem(:,:,:)
136  ! end type sll_t_decomposition__buffer_3d
137  !
138  ! type, extends(sll_t_decomposition__buffer) :: sll_t_decomposition__buffer_6d
139  ! HALO_DTYPE, pointer :: mem(:,:,:,:,:,:)
140  ! end type sll_t_decomposition__buffer_6d
141 
143  sll_int32, allocatable :: mn(:) ! min index specific to the block, w/o halo
144  sll_int32, allocatable :: mx(:) ! max index specific to the block, w/o halo
145  sll_int32, allocatable :: nw(:) ! net width of the block, w/o halo
146  class(sll_t_decomposition__buffer), pointer :: buffer(:) => null()
148 
150  sll_int32 :: block_dim ! dimension in which the blocking takes place
151  type(sll_t_decomposition__block), pointer :: block(:) => null()
153 
155  sll_int32, allocatable :: mn(:) ! min index, w/o halo
156  sll_int32, allocatable :: mx(:) ! max index, w/o halo
157  sll_int32, allocatable :: nw(:) ! net width of the array, w/o halo
158  type(sll_t_decomposition__dimension), pointer :: dimension(:) => null()
160 
162  sll_int32, allocatable :: global(:)
164  end type sll_t_decomposition
165 
166  ! --- maximum number of buffers per block, increase if necessary
167  sll_int32, parameter :: max_buffers_per_block = 8
168 
169 !#define VERBOSE(name) write(*,*) name // " called"
170 #define VERBOSE(name)
171 
172  sll_int32, parameter :: left_buffer = 1
173  sll_int32, parameter :: right_buffer = 0
174 
175 contains
176 
178  class(sll_t_decomposition__buffer) :: self
179  sll_int32 :: ierr
180  verbose("sll_s_destruct_decomposition__buffer")
181  if (self%valid) then
182 ! select type(ptr => self)
183 ! class is (sll_t_decomposition__buffer_3d)
184 ! #ifdef USE_FMEMPOOL
185 ! call mp_release(ptr%mem)
186 ! #else
187 ! SLL_DEALLOCATE(ptr%mem, ierr)
188 ! #endif
189 ! class is (sll_t_decomposition__buffer_6d)
190 #ifdef USE_FMEMPOOL
191  call mp_release(self%mem)
192 #else
193  sll_deallocate(self%mem, ierr)
194 #endif
195  ! class default
196  ! SLL_ERROR("sll_s_destruct_decomposition__buffer", "unknown buffer data structure")
197  ! end select
198  sll_deallocate_array(self%mn, ierr)
199  sll_deallocate_array(self%mx, ierr)
200  sll_deallocate_array(self%nw, ierr)
201  self%valid = .false.
202  endif
204 
206  type(sll_t_decomposition__block) :: self
207  sll_int32, intent(in) :: nd
208  sll_int32 :: i, ierr
209  character(len=16) :: str
210  verbose("sll_s_construct_decomposition__block")
211  ! select case (nd)
212  ! case (3)
213  ! allocate(sll_t_decomposition__buffer_3d::self%buffer(max_buffers_per_block))
214  ! case (6)
215  ! allocate(sll_t_decomposition__buffer_6d::self%buffer(max_buffers_per_block))
216  ! case default
217  ! write (str, *) nd
218  ! SLL_ERROR("sll_s_construct_decomposition__block", "nd = " // trim(str) // " is not supported.")
219  ! end select
220  allocate(self%buffer(max_buffers_per_block))
221  do i=1,max_buffers_per_block
222  self%buffer(i)%valid = .false.
223  enddo
224  sll_allocate(self%mn(nd), ierr)
225  sll_allocate(self%mx(nd), ierr)
226  sll_allocate(self%nw(nd), ierr)
228 
230  type(sll_t_decomposition__block) :: self
231  sll_int32 :: i, ierr
232  verbose("sll_s_destruct_decomposition__block")
233  if (associated(self%buffer)) then
234  do i=1,size(self%buffer)
235  call sll_s_destruct_decomposition__buffer(self%buffer(i))
236  enddo
237  sll_deallocate(self%buffer, ierr)
238  endif
239  if (allocated(self%mn)) then
240  sll_deallocate_array(self%mn, ierr)
241  endif
242  if (allocated(self%mx)) then
243  sll_deallocate_array(self%mx, ierr)
244  endif
245  if (allocated(self%nw)) then
246  sll_deallocate_array(self%nw, ierr)
247  endif
249 
250 
252  type(sll_t_decomposition__dimension) :: self
253  sll_int32, intent(in) :: nd
254  sll_int32 :: i, ierr
255  sll_int32, parameter :: n_default_blocks = 0 ! disabled
256  verbose("sll_s_construct_decomposition__dimension")
257  self%block_dim = 0
259 
261  type(sll_t_decomposition__dimension) :: self
262  sll_int32 :: i, ierr
263  verbose("sll_s_destruct_decomposition__dimension")
264  if (associated(self%block)) then
265  do i=1,size(self%block)
266  call sll_s_destruct_decomposition__block(self%block(i))
267  enddo
268  sll_deallocate(self%block, ierr)
269  endif
270  self%block_dim = 0
272 
273 
274  subroutine sll_s_construct_decomposition__local(self, topology, grid_size, nd)
275  type(sll_t_decomposition__local) :: self
276  type(sll_t_cartesian_topology_6d), pointer, intent(in) :: topology ! TODO: support generic nd topology
277  sll_int32, intent(in) :: nd
278  sll_int32, intent(in) :: grid_size(nd)
279  sll_int32 :: i, ierr
280  sll_int32 :: lp, l0, l1
281  verbose("sll_s_construct_decomposition__local")
282  ! --- initialize index values
283  sll_allocate(self%mn(nd), ierr)
284  sll_allocate(self%mx(nd), ierr)
285  sll_allocate(self%nw(nd), ierr)
286  ! loop over dimensions and compute index values for each dimension
287  do i=1,nd
288  ! compute the local number of grid points
289  lp = grid_size(i) / topology%procs(i)
290  ! compute the lower local index bound (the coords array starts at zero)
291  l0 = 1 + topology%coords(i) * lp
292  ! compute the upper local index bound (the coords array starts at zero)
293  l1 = (topology%coords(i) + 1) * lp
294  ! ---
295  self%mn(i) = l0
296  self%mx(i) = l1
297  self%nw(i) = lp
298  end do
299  sll_allocate(self%dimension(nd), ierr)
300  do i=1,nd
301  call sll_s_construct_decomposition__dimension(self%dimension(i), nd)
302  end do
304 
306  type(sll_t_decomposition__local) :: self
307  sll_int32 :: i, ierr
308  verbose("sll_s_destruct_decomposition__local")
309  if (associated(self%dimension)) then
310  do i=1,size(self%dimension)
311  call sll_s_destruct_decomposition__dimension(self%dimension(i))
312  end do
313  sll_deallocate(self%dimension, ierr)
314  endif
315  if (allocated(self%mn)) then
316  sll_deallocate_array(self%mn, ierr)
317  endif
318  if (allocated(self%mx)) then
319  sll_deallocate_array(self%mx, ierr)
320  endif
321  if (allocated(self%nw)) then
322  sll_deallocate_array(self%nw, ierr)
323  endif
325 
326 
327  ! --- public functions below ---
328 
329  ! Recursively build up a domain decomposition object (including nested objects).
330  function sll_f_new_cartesian_domain_decomposition(topology, grid_size, nd)
332  sll_int32, intent(in) :: nd
333  type(sll_t_cartesian_topology_6d), pointer, intent(in) :: topology ! TODO: support generic nd topology
334  sll_int32, intent(in) :: grid_size(nd)
335  sll_int32 :: i, ierr
336  type(sll_t_decomposition), pointer :: self ! short convenience alias
337  verbose("sll_f_new_cartesian_domain_decomposition")
338  ! --- initial consistency checks
339  sll_assert_always(size(topology%procs) == nd)
340  do i=1,nd
341  sll_assert(mod(grid_size(i), topology%procs(i)) == 0)
342  end do
343  ! --- initialize index values
344  sll_allocate(self, ierr)
345  sll_allocate(self%global(nd), ierr)
346  self%global(:) = grid_size(:)
347  ! --- recursively initialize decomposition object
348  call sll_s_construct_decomposition__local(self%local, topology, grid_size, nd)
350  nullify(self)
352 
353 
354  ! Recursively tear down a domain decomposition object (including nested objects and memory).
356  type(sll_t_decomposition), pointer :: decomposition
357  sll_int32 :: ierr
358  verbose("sll_s_deallocate_cartesian_domain_decomposition")
359  ! --- recursively deallocate/invalidata decomposition object
360  call sll_s_destruct_decomposition__local(decomposition%local)
361  sll_deallocate_array(decomposition%global, ierr)
362  sll_deallocate(decomposition, ierr)
363  nullify(decomposition)
365 
366 
368  subroutine sll_s_dd_define_blocks(decomposition, id, n_blocks, block_dim)
369  type(sll_t_decomposition), pointer, intent(in) :: decomposition
370  sll_int32, intent(in) :: id
371  sll_int32, intent(in) :: n_blocks
372  sll_int32, intent(in) :: block_dim
373  sll_int32 :: i, nd, ierr, nw, mn
374  nd = size(decomposition%global)
375  sll_assert_always( id <= nd )
376  sll_assert_always( block_dim <= nd )
377  sll_assert_always( associated(decomposition%local%dimension) )
378  ! clean up first, if blocks exist already
379  if (associated(decomposition%local%dimension(id)%block)) then
380  do i=1,size(decomposition%local%dimension(id)%block)
381  call sll_s_destruct_decomposition__block(decomposition%local%dimension(id)%block(i))
382  enddo
383  sll_deallocate(decomposition%local%dimension(id)%block, ierr)
384  endif
385  sll_assert_always(decomposition%local%nw(block_dim) >= n_blocks)
386  sll_assert_always(mod(decomposition%local%nw(block_dim), n_blocks) == 0)
387  sll_allocate(decomposition%local%dimension(id)%block(n_blocks), ierr)
388  do i=1,n_blocks
389  ! allocate index arrays, the buffer array, but not the memory fields inside
390  call sll_s_construct_decomposition__block(decomposition%local%dimension(id)%block(i), nd)
391  enddo
392  ! copy index arrays
393  do i=1,n_blocks
394  decomposition%local%dimension(id)%block(i)%mn = decomposition%local%mn
395  decomposition%local%dimension(id)%block(i)%mx = decomposition%local%mx
396  decomposition%local%dimension(id)%block(i)%nw = decomposition%local%nw
397  enddo
398  ! modify index arrays, sub-divide the blocks along block_dim
399  nw = decomposition%local%nw(block_dim) / n_blocks
400  mn = decomposition%local%mn(block_dim)
401  do i=1,n_blocks
402  decomposition%local%dimension(id)%block(i)%nw(block_dim) = nw
403  decomposition%local%dimension(id)%block(i)%mn(block_dim) = mn + (i-1) * nw
404  decomposition%local%dimension(id)%block(i)%mx(block_dim) = mn + i * nw - 1
405  end do
406  decomposition%local%dimension(id)%block_dim = block_dim
407  end subroutine sll_s_dd_define_blocks
408 
410  subroutine sll_s_dd_allocate_buffer(decomposition, id, i_block, i_buffer, width, label)
411  type(sll_t_decomposition), pointer :: decomposition
412  sll_int32, intent(in) :: id
413  sll_int32, intent(in) :: i_block
414  sll_int32, intent(in) :: i_buffer
415  sll_int32, intent(in) :: width
416  character(len=*), optional, intent(in) :: label
417  sll_int32 :: nd, ierr
418  class(sll_t_decomposition__buffer), pointer :: bufalias
419 
420  nd = size(decomposition%global)
421  sll_assert_always( width >= 0 )
422  sll_assert_always( id <= nd )
423  sll_assert_always( i_buffer <= max_buffers_per_block )
424  sll_assert_always( associated(decomposition%local%dimension(id)%block) )
425  sll_assert_always( associated(decomposition%local%dimension(id)%block(i_block)%buffer) )
426 
427  bufalias => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
428 
429  ! select type(ptr => BUFALIAS)
430  ! class is (sll_t_decomposition__buffer_3d)
431  ! SLL_ASSERT_ALWAYS(nd == 3)
432  ! class is (sll_t_decomposition__buffer_6d)
433  ! SLL_ASSERT_ALWAYS(nd == 6)
434  ! class is (sll_t_decomposition__buffer)
435  ! SLL_ERROR("sll_s_dd_allocate_buffer", "buffer is of generic type")
436  ! class default
437  ! SLL_ERROR("sll_s_dd_allocate_buffer", "unknown buffer data structure")
438  ! end select
439 
440  ! initialize index arrays
441  sll_allocate(bufalias%mn(nd), ierr)
442  sll_allocate(bufalias%mx(nd), ierr)
443  sll_allocate(bufalias%nw(nd), ierr)
444  bufalias%mn = decomposition%local%dimension(id)%block(i_block)%mn
445  bufalias%mx = decomposition%local%dimension(id)%block(i_block)%mx
446  bufalias%nw = decomposition%local%dimension(id)%block(i_block)%nw
447  if (get_buffer_side(i_buffer) == left_buffer) then
448  ! --- odd indices (1,3,5, ...) shall designate "left" buffers
449  bufalias%mx(id) = bufalias%mn(id) - 1
450  bufalias%mn(id) = bufalias%mx(id) - width + 1
451  bufalias%nw(id) = width
452  else ! RIGHT_BUFFER
453  ! --- even indices (2,4,6, ...) shall designate "right" buffers
454  bufalias%mn(id) = bufalias%mx(id) + 1
455  bufalias%mx(id) = bufalias%mn(id) + width - 1
456  bufalias%nw(id) = width
457  endif
458 
459 ! select type(ptr => BUFALIAS)
460 ! class is (sll_t_decomposition__buffer_3d)
461 ! #ifdef USE_FMEMPOOL
462 ! call mp_acquire(ptr%mem, ptr%mn, ptr%mx)
463 ! #else
464 ! allocate(ptr%mem(ptr%mn(1):ptr%mx(1), &
465 ! ptr%mn(2):ptr%mx(2), &
466 ! ptr%mn(3):ptr%mx(3)))
467 ! #endif
468 ! class is (sll_t_decomposition__buffer_6d)
469 ! #ifdef USE_FMEMPOOL
470 ! call mp_acquire(ptr%mem, ptr%mn, ptr%mx)
471 ! #else
472 ! allocate(ptr%mem(ptr%mn(1):ptr%mx(1), &
473 ! ptr%mn(2):ptr%mx(2), &
474 ! ptr%mn(3):ptr%mx(3), &
475 ! ptr%mn(4):ptr%mx(4), &
476 ! ptr%mn(5):ptr%mx(5), &
477 ! ptr%mn(6):ptr%mx(6)))
478 ! #endif
479 ! class default
480 ! SLL_ERROR("sll_s_dd_allocate_buffer", "unknown buffer data structure")
481 ! end select
482 
483 #ifdef USE_FMEMPOOL
484  call mp_acquire(bufalias%mem, bufalias%mn, bufalias%mx)
485 #else
486  allocate(bufalias%mem(bufalias%mn(1):bufalias%mx(1), &
487  bufalias%mn(2):bufalias%mx(2), &
488  bufalias%mn(3):bufalias%mx(3), &
489  bufalias%mn(4):bufalias%mx(4), &
490  bufalias%mn(5):bufalias%mx(5), &
491  bufalias%mn(6):bufalias%mx(6)))
492 #endif
493 
494  bufalias%valid = .true.
495 
496  nullify(bufalias)
497  end subroutine sll_s_dd_allocate_buffer
498 
499 
500  subroutine sll_s_dd_deallocate_buffer(decomposition, id, i_block, i_buffer)
501  type(sll_t_decomposition), pointer :: decomposition
502  sll_int32, intent(in) :: id, i_block, i_buffer
503  sll_int32 :: nd
504  class(sll_t_decomposition__buffer), pointer :: buffer ! convenience shortcut
505 
506  nd = size(decomposition%global)
507  sll_assert_always( id <= nd )
508  sll_assert_always( i_buffer <= max_buffers_per_block )
509  sll_assert_always( associated(decomposition%local%dimension(id)%block) )
510  buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
512  nullify(buffer)
513  end subroutine sll_s_dd_deallocate_buffer
514 
515 
516  subroutine sll_s_post_halo_exchange_real64(topology, decomposition, f6d, id, i_block, i_buffer, n_threads, post_halo_exchange)
517  integer, parameter :: nd = 6
518  type(sll_t_cartesian_topology_6d), pointer, intent(in) :: topology
519  type(sll_t_decomposition), pointer, intent(inout) :: decomposition
520  sll_real64, dimension(:,:,:,:,:,:), intent(inout) :: &
521  f6d(decomposition%local%mn(1):decomposition%local%mx(1), &
522  decomposition%local%mn(2):decomposition%local%mx(2), &
523  decomposition%local%mn(3):decomposition%local%mx(3), &
524  decomposition%local%mn(4):decomposition%local%mx(4), &
525  decomposition%local%mn(5):decomposition%local%mx(5), &
526  decomposition%local%mn(6):decomposition%local%mx(6))
527  sll_int32, intent(in) :: id, i_block, i_buffer
528  sll_int32, intent(in), optional :: n_threads
529  logical, intent(in), optional :: post_halo_exchange
530 
531  class(sll_t_decomposition__buffer), pointer :: buffer ! convenience shortcut
532  halo_dtype, pointer :: mem(:,:,:,:,:,:) ! convenience shortcut
533  integer :: jd, i, j, k, l, m, n
534  integer :: ierr, n_omp_threads
535  logical, save :: first_call = .true.
536  logical :: do_post
537 
538  ! --- MPI communication-related variables and buffers
539  sll_int64 :: nxc ! total number of elements to be exchanged
540  sll_int32 :: nel ! number of elements to be exchanged at a single MPI call
541  integer, dimension(:,:) :: r_rx(nd,2) ! index ranges for copy operations
542  integer, dimension(:,:) :: r_tx(nd,2)
543  sll_int32, parameter :: nxc_max = 2147483647
544 #ifdef USE_HALO_REAL32
545  integer, parameter :: mpi_precision = mpi_real
546  integer, parameter :: word_size = 4
547 #else
548  integer, parameter :: mpi_precision = mpi_double_precision
549  integer, parameter :: word_size = 8
550 #endif
551  integer :: mpi_tag, mpi_dest, mpi_source
552 
553  logical, save :: use_compression
554  integer, save :: prec
555  logical, save :: compression_verbose
556 
557 #ifdef _OPENMP
558  if (present(n_threads)) then
559  n_omp_threads = n_threads
560  else
561  n_omp_threads = omp_get_max_threads()
562  endif
563 #else
564  n_omp_threads = 1
565 #endif
566 
567  if (present(post_halo_exchange)) then
568  do_post = post_halo_exchange
569  else
570  do_post = .true.
571  endif
572 
573  if (first_call) then
574  first_call = .false.
575  compression_verbose = .false.
576 #ifdef USE_HALO_REAL32
577  if (topology%rank == 0) then
578  write(*,*) "sll_m_decomposition::post_halo_exchange() uses single precision messages"
579  endif
580  use_compression = .false.
581 #else
582 #ifdef USE_ZFP
583  use_compression = sll_f_query_environment("SLL_USE_COMPRESSION", .false.)
584  if (use_compression) then
585  prec = sll_f_query_environment("SLL_ZFP_PRECISION", 32)
586  call set_compression_precision(prec)
587  if (topology%rank == 0) then
588  write(*,*) "sll_m_decomposition::post_halo_exchange() uses message compression"
589  compression_verbose = sll_f_query_environment("SLL_COMPRESSION_VERBOSE", .false.)
590  endif
591  endif
592 #else
593  use_compression = .false.
594 #endif
595 #endif
596  endif
597 
598  sll_assert_always( size(decomposition%global) == nd )
599  sll_assert_always( id <= nd )
600  sll_assert_always( i_buffer <= max_buffers_per_block )
601  sll_assert_always( associated(decomposition%local%dimension(id)%block) )
602  buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
603 
604  if (buffer%nw(id) <= 0) then
605  return
606  endif
607 
608  if (get_buffer_side(i_buffer) == left_buffer) then
609  ! --- fill the left buffer, equivalent to copying from f6d to the right neighbor
610  mpi_dest = topology%neighbors(2*id)
611  mpi_source = topology%neighbors(2*id-1)
612  ! --- calculate rx and tx index ranges
613  ! index ranges on the computation array to be sent
614  r_tx(:,1) = decomposition%local%dimension(id)%block(i_block)%mn(:)
615  r_tx(:,2) = decomposition%local%dimension(id)%block(i_block)%mx(:)
616  r_tx(id,1) = decomposition%local%dimension(id)%block(i_block)%mx(id) - buffer%nw(id) + 1
617  r_tx(id,2) = decomposition%local%dimension(id)%block(i_block)%mx(id)
618  else ! RIGHT_BUFFER
619  ! --- fill the right buffer, equivalent to copying from f6d to the left neighbor
620  mpi_dest = topology%neighbors(2*id-1)
621  mpi_source = topology%neighbors(2*id)
622  ! --- calculate rx and tx index ranges
623  ! index ranges on the computation array to be sent
624  r_tx(:,1) = decomposition%local%dimension(id)%block(i_block)%mn(:)
625  r_tx(:,2) = decomposition%local%dimension(id)%block(i_block)%mx(:)
626  r_tx(id,1) = decomposition%local%dimension(id)%block(i_block)%mn(id)
627  r_tx(id,2) = decomposition%local%dimension(id)%block(i_block)%mn(id) + buffer%nw(id) - 1
628  endif
629  ! index ranges on the buffer are just its extents
630  r_rx(:,1) = decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)%mn(:)
631  r_rx(:,2) = decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)%mx(:)
632 
633  mem => sll_f_get_mem_6d_from_buffer_obj(buffer)
634 
635  if (topology%procs(id) == 1) then
636  ! --- we copy the ghost cells directly, assuming periodic BCs for the moment
637 ! buffer%mem(r_rx(1,1):r_rx(1,2), r_rx(2,1):r_rx(2,2), &
638 ! r_rx(3,1):r_rx(3,2), r_rx(4,1):r_rx(4,2), &
639 ! r_rx(5,1):r_rx(5,2), r_rx(6,1):r_rx(6,2)) = &
640 ! f6d(r_tx(1,1):r_tx(1,2), r_tx(2,1):r_tx(2,2), r_tx(3,1):r_tx(3,2), &
641 ! r_tx(4,1):r_tx(4,2), r_tx(5,1):r_tx(5,2), r_tx(6,1):r_tx(6,2))
642 !$omp parallel num_threads(n_omp_threads) default(shared) private(i,j,k,l,m,n)
643 !$omp do OMP_SCHEDULE OMP_COLLAPSE
644  do n = 0, r_rx(6,2)-r_rx(6,1)
645  do m = 0, r_rx(5,2)-r_rx(5,1)
646  do l = 0, r_rx(4,2)-r_rx(4,1)
647  do k = 0, r_rx(3,2)-r_rx(3,1)
648  do j = 0, r_rx(2,2)-r_rx(2,1)
649  do i = 0, r_rx(1,2)-r_rx(1,1)
650  mem(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, &
651  r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
652  f6d(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, &
653  r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
654  enddo
655  enddo
656  enddo
657  enddo
658  enddo
659  enddo
660 !$omp end do
661 !$omp end parallel
662  else
663  ! calculate the total number of elements to be exchanged
664  nxc = int(buffer%nw(id), i64)
665  do jd=1,nd
666  if (jd == id) then
667  cycle
668  else
669  nxc = nxc * buffer%nw(jd)
670  endif
671  end do
672  sll_allocate(buffer%sendbuf(nxc), ierr)
673 
675  decomposition%local%mn, &
676  decomposition%local%mx, &
677  buffer%sendbuf, r_tx, &
678  n_omp_threads)
679 
680  if (use_compression) then
681 #ifndef USE_HALO_REAL32
682  if (modulo(nxc, int(zfp_blocksize, i64)) /= 0) then
683  if (topology%rank == 0) then
684  write(*,*) "mpi_sendrecv_compressed() : disabled due to blocksize mismatch"
685  endif
686  buffer%use_compression = .false.
687  else
688  buffer%use_compression = .true.
689  buffer%compression_verbose = compression_verbose
690  i = int(nxc, i32) ! i64 -> i32
691  call deflate_buffer_real64(buffer%sendbuf, buffer%comp_sendbuf, n_doubles=i, &
692  n_threads=n_omp_threads)
693  sll_deallocate(buffer%sendbuf, ierr)
694  endif
695 #endif
696  endif
697 
698  sll_assert_always(nxc <= nxc_max) ! check if message size is within the allowed limit
699  nel = int(nxc, i32) ! 64 bit -> 32 bit
700  mpi_tag = 1000*id + 10*i_block + i_buffer
701 
702 ! call MPI_Sendrecv(buffer%sendbuf, nel, mpi_precision, mpi_dest, mpi_tag, &
703 ! mem, nel, mpi_precision, mpi_source, mpi_tag, &
704 ! topology%comm, MPI_STATUS_IGNORE, ierr)
705 ! SLL_ASSERT_ALWAYS(ierr == MPI_SUCCESS)
706 
707  if (do_post) then
708  call mpi_isend(buffer%sendbuf, nel, mpi_precision, mpi_dest, mpi_tag, topology%comm, buffer%request(1), ierr)
709  sll_assert_always(ierr == mpi_success)
710  call mpi_irecv(mem, nel, mpi_precision, mpi_source, mpi_tag, topology%comm, buffer%request(2), ierr)
711  sll_assert_always(ierr == mpi_success)
712  else
713  buffer%nel = nel
714  buffer%mpi_source = mpi_source
715  buffer%mpi_dest = mpi_dest
716  buffer%mpi_tag = mpi_tag
717  buffer%mpi_precision = mpi_precision
718  endif
719 
720  endif
721  end subroutine sll_s_post_halo_exchange_real64
722 
723 
724  subroutine sll_s_wait_halo_exchange(topology, decomposition, id, i_block, i_buffer)
725  integer, parameter :: nd = 6
726  type(sll_t_cartesian_topology_6d), pointer, intent(in) :: topology
727  type(sll_t_decomposition), pointer, intent(inout) :: decomposition
728  sll_int32, intent(in) :: id, i_block, i_buffer
729  class(sll_t_decomposition__buffer), pointer :: buffer ! convenience shortcut
730  integer :: ierr
731  sll_assert_always( size(decomposition%global) == nd )
732  sll_assert_always( id <= nd )
733  sll_assert_always( i_buffer <= max_buffers_per_block )
734  sll_assert_always( associated(decomposition%local%dimension(id)%block) )
735  buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
736  if (buffer%nw(id) <= 0) then
737  return
738  endif
739  if (topology%procs(id) == 1) then
740  ! --- do nothing ---
741  else
742  call mpi_waitall(2, buffer%request, mpi_status_ignore, ierr)
743  sll_assert_always(ierr == mpi_success)
744  sll_deallocate(buffer%sendbuf, ierr)
745  endif
746  end subroutine sll_s_wait_halo_exchange
747 
748 
749  subroutine sll_s_blocking_halo_exchange(topology, decomposition, id, i_block, i_buffer)
750  integer, parameter :: nd = 6
751  type(sll_t_cartesian_topology_6d), pointer, intent(in) :: topology
752  type(sll_t_decomposition), pointer, intent(inout) :: decomposition
753  sll_int32, intent(in) :: id, i_block, i_buffer
754  class(sll_t_decomposition__buffer), pointer :: buffer ! convenience shortcut
755  halo_dtype, pointer :: mem(:,:,:,:,:,:)
756  integer :: ierr
757  sll_assert_always( size(decomposition%global) == nd )
758  sll_assert_always( id <= nd )
759  sll_assert_always( i_buffer <= max_buffers_per_block )
760  sll_assert_always( associated(decomposition%local%dimension(id)%block) )
761  buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
762  if (buffer%nw(id) <= 0) then
763  return
764  else
765  if (topology%procs(id) == 1) then
766  ! --- do nothing ---
767  else
768  if (buffer%use_compression) then
769  call sll_s_mpi_sendrecv_compressed_core(buffer%comp_sendbuf, buffer%comp_recvbuf,&
770  buffer%mpi_dest, buffer%mpi_source,&
771  topology%comm, buffer%compression_verbose,&
772  buffer%mpi_tag)
773  call deallocate_compressed_buffer_obj(buffer%comp_sendbuf)
774  else
775  sll_assert_always(associated(buffer%sendbuf))
776  mem => sll_f_get_mem_6d_from_buffer_obj(buffer)
777  call mpi_sendrecv(buffer%sendbuf, buffer%nel, buffer%mpi_precision, buffer%mpi_dest, buffer%mpi_tag, &
778  mem, buffer%nel, buffer%mpi_precision, buffer%mpi_source, buffer%mpi_tag, &
779  topology%comm, mpi_status_ignore, ierr)
780  sll_assert_always(ierr == mpi_success)
781  sll_deallocate(buffer%sendbuf, ierr)
782  endif
783  endif
784  endif
785  end subroutine sll_s_blocking_halo_exchange
786 
787 
788  function get_buffer_side(i_buffer)
789  sll_int32 :: get_buffer_side, i_buffer
790  get_buffer_side = mod(i_buffer, 2)
791  end function get_buffer_side
792 
793 
795  halo_dtype, pointer :: sll_f_get_mem_6d_from_buffer_obj(:,:,:,:,:,:)
796  class(sll_t_decomposition__buffer) :: self
797  verbose("sll_f_get_mem_6d_from_buffer_obj")
799  if (self%valid) then
800  ! select type(ptr => self)
801  ! class is (sll_t_decomposition__buffer_6d)
803  ! class default
804  ! SLL_ERROR("sll_s_get_ptr_6d_from_buffer", "buffer does not contain 6d array")
805  ! end select
806  else
807  sll_error("sll_s_get_ptr_6d_from_buffer", "attempt to access invalid buffer")
808  endif
810 
811 
812  function sll_f_dd_get_n_blocks(decomposition, id)
813  sll_int32 :: sll_f_dd_get_n_blocks
814  type(sll_t_decomposition), pointer :: decomposition
815  sll_int32, intent(in) :: id
816  sll_int32 :: nd, ierr
817  nd = size(decomposition%global)
818  sll_assert_always( id <= nd )
819  sll_assert_always( associated(decomposition%local%dimension) )
820  if (associated(decomposition%local%dimension(id)%block)) then
821  sll_f_dd_get_n_blocks = size(decomposition%local%dimension(id)%block)
822  else
824  endif
825  end function sll_f_dd_get_n_blocks
826 
827 
Plain Fortran implementation of a memory pool.
Definition: fmempool.F90:5
logical, save verbose
Definition: fmempool.F90:58
Module providing an F90 interface to the ZFP compression library: http://computation....
subroutine set_compression_precision(prec)
subroutine deallocate_compressed_buffer_obj(comp)
subroutine deflate_buffer_real64(buf, comp, n_doubles, n_threads)
compress buffer
integer, parameter zfp_blocksize
data structure to support threaded ZFP compression and decompression
Module providing data structures and tools to implement domain decompositions.
subroutine sll_s_construct_decomposition__block(self, nd)
subroutine sll_s_construct_decomposition__dimension(self, nd)
subroutine sll_s_construct_decomposition__local(self, topology, grid_size, nd)
subroutine, public sll_s_dd_deallocate_buffer(decomposition, id, i_block, i_buffer)
type(sll_t_decomposition) function, pointer, public sll_f_new_cartesian_domain_decomposition(topology, grid_size, nd)
dimension(:,:,:,:,:,:), pointer, public sll_f_get_mem_6d_from_buffer_obj(self)
subroutine, public sll_s_dd_define_blocks(decomposition, id, n_blocks, block_dim)
Initialize blocking of halo cells for dimension id.
subroutine, public sll_s_blocking_halo_exchange(topology, decomposition, id, i_block, i_buffer)
subroutine, public sll_s_wait_halo_exchange(topology, decomposition, id, i_block, i_buffer)
function, public sll_f_dd_get_n_blocks(decomposition, id)
subroutine, public sll_s_dd_allocate_buffer(decomposition, id, i_block, i_buffer, width, label)
Allocate the i_block th halo buffers for dimension id.
subroutine, public sll_s_post_halo_exchange_real64(topology, decomposition, f6d, id, i_block, i_buffer, n_threads, post_halo_exchange)
subroutine, public sll_s_deallocate_cartesian_domain_decomposition(decomposition)
Module providing data structures and tools to implement domain decompositions.
subroutine, public sll_s_copy_array_to_buffer_6d_real64(arr, arr_lo, arr_hi, buf, ranges, n_threads)
subroutine, public sll_s_mpi_sendrecv_compressed_core(comp_send, comp_recv, rank_send, rank_recv, mpi_comm, verbose, mpi_tag)
MPI sendrecv functionality, wrapped for a compressed buffer.
Some common numerical utilities.
logical function, public sll_f_query_environment(env_variable, default_value)
Query an environment variable for the values on,off,1,0,true,false and return the result as a logical...
Information on the 6D cartesian process topology.
    Report Typos and Errors