31 #ifdef USE_HALO_REAL32
32 #define HALO_DTYPE sll_real32
34 #define HALO_DTYPE sll_real64
40 #include "sll_assert.h"
41 #include "sll_errors.h"
42 #include "sll_memory.h"
43 #include "sll_working_precision.h"
46 mpi_double_precision, &
60 #define OMP_COLLAPSE collapse(2)
61 #define OMP_SCHEDULE schedule(static)
111 character(len=64) :: label
112 sll_int32,
allocatable :: mn(:)
113 sll_int32,
allocatable :: mx(:)
114 sll_int32,
allocatable :: nw(:)
117 halo_dtype,
pointer :: sendbuf(:) => null()
120 sll_int32 :: mpi_source
121 sll_int32 :: mpi_dest
123 sll_int32 :: mpi_precision
125 sll_int32 :: request(2)
127 logical :: use_compression = .false.
128 logical :: compression_verbose = .false.
131 halo_dtype,
pointer :: mem(:,:,:,:,:,:) => null()
143 sll_int32,
allocatable :: mn(:)
144 sll_int32,
allocatable :: mx(:)
145 sll_int32,
allocatable :: nw(:)
150 sll_int32 :: block_dim
155 sll_int32,
allocatable :: mn(:)
156 sll_int32,
allocatable :: mx(:)
157 sll_int32,
allocatable :: nw(:)
162 sll_int32,
allocatable :: global(:)
167 sll_int32,
parameter :: max_buffers_per_block = 8
170 #define VERBOSE(name)
172 sll_int32,
parameter :: left_buffer = 1
173 sll_int32,
parameter :: right_buffer = 0
180 verbose(
"sll_s_destruct_decomposition__buffer")
193 sll_deallocate(self%mem, ierr)
198 sll_deallocate_array(self%mn, ierr)
199 sll_deallocate_array(self%mx, ierr)
200 sll_deallocate_array(self%nw, ierr)
207 sll_int32,
intent(in) :: nd
209 character(len=16) :: str
210 verbose(
"sll_s_construct_decomposition__block")
220 allocate(self%buffer(max_buffers_per_block))
221 do i=1,max_buffers_per_block
222 self%buffer(i)%valid = .false.
224 sll_allocate(self%mn(nd), ierr)
225 sll_allocate(self%mx(nd), ierr)
226 sll_allocate(self%nw(nd), ierr)
232 verbose(
"sll_s_destruct_decomposition__block")
233 if (
associated(self%buffer))
then
234 do i=1,
size(self%buffer)
237 sll_deallocate(self%buffer, ierr)
239 if (
allocated(self%mn))
then
240 sll_deallocate_array(self%mn, ierr)
242 if (
allocated(self%mx))
then
243 sll_deallocate_array(self%mx, ierr)
245 if (
allocated(self%nw))
then
246 sll_deallocate_array(self%nw, ierr)
253 sll_int32,
intent(in) :: nd
255 sll_int32,
parameter :: n_default_blocks = 0
256 verbose(
"sll_s_construct_decomposition__dimension")
263 verbose(
"sll_s_destruct_decomposition__dimension")
264 if (
associated(self%block))
then
265 do i=1,
size(self%block)
268 sll_deallocate(self%block, ierr)
277 sll_int32,
intent(in) :: nd
278 sll_int32,
intent(in) :: grid_size(nd)
280 sll_int32 :: lp, l0, l1
281 verbose(
"sll_s_construct_decomposition__local")
283 sll_allocate(self%mn(nd), ierr)
284 sll_allocate(self%mx(nd), ierr)
285 sll_allocate(self%nw(nd), ierr)
289 lp = grid_size(i) / topology%procs(i)
291 l0 = 1 + topology%coords(i) * lp
293 l1 = (topology%coords(i) + 1) * lp
299 sll_allocate(self%dimension(nd), ierr)
308 verbose(
"sll_s_destruct_decomposition__local")
309 if (
associated(self%dimension))
then
310 do i=1,
size(self%dimension)
313 sll_deallocate(self%dimension, ierr)
315 if (
allocated(self%mn))
then
316 sll_deallocate_array(self%mn, ierr)
318 if (
allocated(self%mx))
then
319 sll_deallocate_array(self%mx, ierr)
321 if (
allocated(self%nw))
then
322 sll_deallocate_array(self%nw, ierr)
332 sll_int32,
intent(in) :: nd
334 sll_int32,
intent(in) :: grid_size(nd)
337 verbose(
"sll_f_new_cartesian_domain_decomposition")
339 sll_assert_always(
size(topology%procs) == nd)
341 sll_assert(mod(grid_size(i), topology%procs(i)) == 0)
344 sll_allocate(self, ierr)
345 sll_allocate(self%global(nd), ierr)
346 self%global(:) = grid_size(:)
358 verbose(
"sll_s_deallocate_cartesian_domain_decomposition")
361 sll_deallocate_array(decomposition%global, ierr)
362 sll_deallocate(decomposition, ierr)
363 nullify(decomposition)
370 sll_int32,
intent(in) :: id
371 sll_int32,
intent(in) :: n_blocks
372 sll_int32,
intent(in) :: block_dim
373 sll_int32 :: i, nd, ierr, nw, mn
374 nd =
size(decomposition%global)
375 sll_assert_always( id <= nd )
376 sll_assert_always( block_dim <= nd )
377 sll_assert_always(
associated(decomposition%local%dimension) )
379 if (
associated(decomposition%local%dimension(id)%block))
then
380 do i=1,
size(decomposition%local%dimension(id)%block)
383 sll_deallocate(decomposition%local%dimension(id)%block, ierr)
385 sll_assert_always(decomposition%local%nw(block_dim) >= n_blocks)
386 sll_assert_always(mod(decomposition%local%nw(block_dim), n_blocks) == 0)
387 sll_allocate(decomposition%local%dimension(id)%block(n_blocks), ierr)
394 decomposition%local%dimension(id)%block(i)%mn = decomposition%local%mn
395 decomposition%local%dimension(id)%block(i)%mx = decomposition%local%mx
396 decomposition%local%dimension(id)%block(i)%nw = decomposition%local%nw
399 nw = decomposition%local%nw(block_dim) / n_blocks
400 mn = decomposition%local%mn(block_dim)
402 decomposition%local%dimension(id)%block(i)%nw(block_dim) = nw
403 decomposition%local%dimension(id)%block(i)%mn(block_dim) = mn + (i-1) * nw
404 decomposition%local%dimension(id)%block(i)%mx(block_dim) = mn + i * nw - 1
406 decomposition%local%dimension(id)%block_dim = block_dim
412 sll_int32,
intent(in) :: id
413 sll_int32,
intent(in) :: i_block
414 sll_int32,
intent(in) :: i_buffer
415 sll_int32,
intent(in) :: width
416 character(len=*),
optional,
intent(in) :: label
417 sll_int32 :: nd, ierr
420 nd =
size(decomposition%global)
421 sll_assert_always( width >= 0 )
422 sll_assert_always( id <= nd )
423 sll_assert_always( i_buffer <= max_buffers_per_block )
424 sll_assert_always(
associated(decomposition%local%dimension(id)%block) )
425 sll_assert_always(
associated(decomposition%local%dimension(id)%block(i_block)%buffer) )
427 bufalias => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
441 sll_allocate(bufalias%mn(nd), ierr)
442 sll_allocate(bufalias%mx(nd), ierr)
443 sll_allocate(bufalias%nw(nd), ierr)
444 bufalias%mn = decomposition%local%dimension(id)%block(i_block)%mn
445 bufalias%mx = decomposition%local%dimension(id)%block(i_block)%mx
446 bufalias%nw = decomposition%local%dimension(id)%block(i_block)%nw
449 bufalias%mx(id) = bufalias%mn(id) - 1
450 bufalias%mn(id) = bufalias%mx(id) - width + 1
451 bufalias%nw(id) = width
454 bufalias%mn(id) = bufalias%mx(id) + 1
455 bufalias%mx(id) = bufalias%mn(id) + width - 1
456 bufalias%nw(id) = width
484 call mp_acquire(bufalias%mem, bufalias%mn, bufalias%mx)
486 allocate(bufalias%mem(bufalias%mn(1):bufalias%mx(1), &
487 bufalias%mn(2):bufalias%mx(2), &
488 bufalias%mn(3):bufalias%mx(3), &
489 bufalias%mn(4):bufalias%mx(4), &
490 bufalias%mn(5):bufalias%mx(5), &
491 bufalias%mn(6):bufalias%mx(6)))
494 bufalias%valid = .true.
502 sll_int32,
intent(in) :: id, i_block, i_buffer
506 nd =
size(decomposition%global)
507 sll_assert_always( id <= nd )
508 sll_assert_always( i_buffer <= max_buffers_per_block )
509 sll_assert_always(
associated(decomposition%local%dimension(id)%block) )
510 buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
517 integer,
parameter :: nd = 6
520 sll_real64,
dimension(:,:,:,:,:,:),
intent(inout) :: &
521 f6d(decomposition%local%mn(1):decomposition%local%mx(1), &
522 decomposition%local%mn(2):decomposition%local%mx(2), &
523 decomposition%local%mn(3):decomposition%local%mx(3), &
524 decomposition%local%mn(4):decomposition%local%mx(4), &
525 decomposition%local%mn(5):decomposition%local%mx(5), &
526 decomposition%local%mn(6):decomposition%local%mx(6))
527 sll_int32,
intent(in) :: id, i_block, i_buffer
528 sll_int32,
intent(in),
optional :: n_threads
529 logical,
intent(in),
optional :: post_halo_exchange
532 halo_dtype,
pointer :: mem(:,:,:,:,:,:)
533 integer :: jd, i, j, k, l, m, n
534 integer :: ierr, n_omp_threads
535 logical,
save :: first_call = .true.
541 integer,
dimension(:,:) :: r_rx(nd,2)
542 integer,
dimension(:,:) :: r_tx(nd,2)
543 sll_int32,
parameter :: nxc_max = 2147483647
544 #ifdef USE_HALO_REAL32
545 integer,
parameter :: mpi_precision = mpi_real
546 integer,
parameter :: word_size = 4
548 integer,
parameter :: mpi_precision = mpi_double_precision
549 integer,
parameter :: word_size = 8
551 integer :: mpi_tag, mpi_dest, mpi_source
553 logical,
save :: use_compression
554 integer,
save :: prec
555 logical,
save :: compression_verbose
558 if (
present(n_threads))
then
559 n_omp_threads = n_threads
561 n_omp_threads = omp_get_max_threads()
567 if (
present(post_halo_exchange))
then
568 do_post = post_halo_exchange
575 compression_verbose = .false.
576 #ifdef USE_HALO_REAL32
577 if (topology%rank == 0)
then
578 write(*,*)
"sll_m_decomposition::post_halo_exchange() uses single precision messages"
580 use_compression = .false.
584 if (use_compression)
then
587 if (topology%rank == 0)
then
588 write(*,*)
"sll_m_decomposition::post_halo_exchange() uses message compression"
593 use_compression = .false.
598 sll_assert_always(
size(decomposition%global) == nd )
599 sll_assert_always( id <= nd )
600 sll_assert_always( i_buffer <= max_buffers_per_block )
601 sll_assert_always(
associated(decomposition%local%dimension(id)%block) )
602 buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
604 if (buffer%nw(id) <= 0)
then
610 mpi_dest = topology%neighbors(2*id)
611 mpi_source = topology%neighbors(2*id-1)
614 r_tx(:,1) = decomposition%local%dimension(id)%block(i_block)%mn(:)
615 r_tx(:,2) = decomposition%local%dimension(id)%block(i_block)%mx(:)
616 r_tx(id,1) = decomposition%local%dimension(id)%block(i_block)%mx(id) - buffer%nw(id) + 1
617 r_tx(id,2) = decomposition%local%dimension(id)%block(i_block)%mx(id)
620 mpi_dest = topology%neighbors(2*id-1)
621 mpi_source = topology%neighbors(2*id)
624 r_tx(:,1) = decomposition%local%dimension(id)%block(i_block)%mn(:)
625 r_tx(:,2) = decomposition%local%dimension(id)%block(i_block)%mx(:)
626 r_tx(id,1) = decomposition%local%dimension(id)%block(i_block)%mn(id)
627 r_tx(id,2) = decomposition%local%dimension(id)%block(i_block)%mn(id) + buffer%nw(id) - 1
630 r_rx(:,1) = decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)%mn(:)
631 r_rx(:,2) = decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)%mx(:)
635 if (topology%procs(id) == 1)
then
644 do n = 0, r_rx(6,2)-r_rx(6,1)
645 do m = 0, r_rx(5,2)-r_rx(5,1)
646 do l = 0, r_rx(4,2)-r_rx(4,1)
647 do k = 0, r_rx(3,2)-r_rx(3,1)
648 do j = 0, r_rx(2,2)-r_rx(2,1)
649 do i = 0, r_rx(1,2)-r_rx(1,1)
650 mem(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, &
651 r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
652 f6d(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, &
653 r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
664 nxc = int(buffer%nw(id), i64)
669 nxc = nxc * buffer%nw(jd)
672 sll_allocate(buffer%sendbuf(nxc), ierr)
675 decomposition%local%mn, &
676 decomposition%local%mx, &
677 buffer%sendbuf, r_tx, &
680 if (use_compression)
then
681 #ifndef USE_HALO_REAL32
683 if (topology%rank == 0)
then
684 write(*,*)
"mpi_sendrecv_compressed() : disabled due to blocksize mismatch"
686 buffer%use_compression = .false.
688 buffer%use_compression = .true.
689 buffer%compression_verbose = compression_verbose
692 n_threads=n_omp_threads)
693 sll_deallocate(buffer%sendbuf, ierr)
698 sll_assert_always(nxc <= nxc_max)
700 mpi_tag = 1000*id + 10*i_block + i_buffer
708 call mpi_isend(buffer%sendbuf, nel, mpi_precision, mpi_dest, mpi_tag, topology%comm, buffer%request(1), ierr)
709 sll_assert_always(ierr == mpi_success)
710 call mpi_irecv(mem, nel, mpi_precision, mpi_source, mpi_tag, topology%comm, buffer%request(2), ierr)
711 sll_assert_always(ierr == mpi_success)
714 buffer%mpi_source = mpi_source
715 buffer%mpi_dest = mpi_dest
716 buffer%mpi_tag = mpi_tag
717 buffer%mpi_precision = mpi_precision
725 integer,
parameter :: nd = 6
728 sll_int32,
intent(in) :: id, i_block, i_buffer
731 sll_assert_always(
size(decomposition%global) == nd )
732 sll_assert_always( id <= nd )
733 sll_assert_always( i_buffer <= max_buffers_per_block )
734 sll_assert_always(
associated(decomposition%local%dimension(id)%block) )
735 buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
736 if (buffer%nw(id) <= 0)
then
739 if (topology%procs(id) == 1)
then
742 call mpi_waitall(2, buffer%request, mpi_status_ignore, ierr)
743 sll_assert_always(ierr == mpi_success)
744 sll_deallocate(buffer%sendbuf, ierr)
750 integer,
parameter :: nd = 6
753 sll_int32,
intent(in) :: id, i_block, i_buffer
755 halo_dtype,
pointer :: mem(:,:,:,:,:,:)
757 sll_assert_always(
size(decomposition%global) == nd )
758 sll_assert_always( id <= nd )
759 sll_assert_always( i_buffer <= max_buffers_per_block )
760 sll_assert_always(
associated(decomposition%local%dimension(id)%block) )
761 buffer => decomposition%local%dimension(id)%block(i_block)%buffer(i_buffer)
762 if (buffer%nw(id) <= 0)
then
765 if (topology%procs(id) == 1)
then
768 if (buffer%use_compression)
then
770 buffer%mpi_dest, buffer%mpi_source,&
771 topology%comm, buffer%compression_verbose,&
775 sll_assert_always(
associated(buffer%sendbuf))
777 call mpi_sendrecv(buffer%sendbuf, buffer%nel, buffer%mpi_precision, buffer%mpi_dest, buffer%mpi_tag, &
778 mem, buffer%nel, buffer%mpi_precision, buffer%mpi_source, buffer%mpi_tag, &
779 topology%comm, mpi_status_ignore, ierr)
780 sll_assert_always(ierr == mpi_success)
781 sll_deallocate(buffer%sendbuf, ierr)
797 verbose(
"sll_f_get_mem_6d_from_buffer_obj")
807 sll_error(
"sll_s_get_ptr_6d_from_buffer",
"attempt to access invalid buffer")
815 sll_int32,
intent(in) :: id
816 sll_int32 :: nd, ierr
817 nd =
size(decomposition%global)
818 sll_assert_always( id <= nd )
819 sll_assert_always(
associated(decomposition%local%dimension) )
820 if (
associated(decomposition%local%dimension(id)%block))
then
Plain Fortran implementation of a memory pool.
Module providing an F90 interface to the ZFP compression library: http://computation....
subroutine set_compression_precision(prec)
subroutine deallocate_compressed_buffer_obj(comp)
subroutine deflate_buffer_real64(buf, comp, n_doubles, n_threads)
compress buffer
integer, parameter zfp_blocksize
data structure to support threaded ZFP compression and decompression
Module providing data structures and tools to implement domain decompositions.
subroutine sll_s_construct_decomposition__block(self, nd)
subroutine sll_s_construct_decomposition__dimension(self, nd)
subroutine sll_s_destruct_decomposition__block(self)
subroutine sll_s_construct_decomposition__local(self, topology, grid_size, nd)
subroutine, public sll_s_dd_deallocate_buffer(decomposition, id, i_block, i_buffer)
type(sll_t_decomposition) function, pointer, public sll_f_new_cartesian_domain_decomposition(topology, grid_size, nd)
subroutine sll_s_destruct_decomposition__buffer(self)
dimension(:,:,:,:,:,:), pointer, public sll_f_get_mem_6d_from_buffer_obj(self)
subroutine, public sll_s_dd_define_blocks(decomposition, id, n_blocks, block_dim)
Initialize blocking of halo cells for dimension id.
subroutine, public sll_s_blocking_halo_exchange(topology, decomposition, id, i_block, i_buffer)
function get_buffer_side(i_buffer)
subroutine sll_s_destruct_decomposition__dimension(self)
subroutine, public sll_s_wait_halo_exchange(topology, decomposition, id, i_block, i_buffer)
function, public sll_f_dd_get_n_blocks(decomposition, id)
subroutine, public sll_s_dd_allocate_buffer(decomposition, id, i_block, i_buffer, width, label)
Allocate the i_block th halo buffers for dimension id.
subroutine, public sll_s_post_halo_exchange_real64(topology, decomposition, f6d, id, i_block, i_buffer, n_threads, post_halo_exchange)
subroutine, public sll_s_deallocate_cartesian_domain_decomposition(decomposition)
subroutine sll_s_destruct_decomposition__local(self)
Module providing data structures and tools to implement domain decompositions.
subroutine, public sll_s_copy_array_to_buffer_6d_real64(arr, arr_lo, arr_hi, buf, ranges, n_threads)
subroutine, public sll_s_mpi_sendrecv_compressed_core(comp_send, comp_recv, rank_send, rank_recv, mpi_comm, verbose, mpi_tag)
MPI sendrecv functionality, wrapped for a compressed buffer.
Some common numerical utilities.
logical function, public sll_f_query_environment(env_variable, default_value)
Query an environment variable for the values on,off,1,0,true,false and return the result as a logical...
Information on the 6D cartesian process topology.