28 #ifdef USE_HALO_REAL32
29 #define HALO_DTYPE sll_real32
31 #define HALO_DTYPE sll_real64
37 #include "sll_assert.h"
38 #include "sll_memory.h"
39 #include "sll_working_precision.h"
49 mpi_double_precision, &
65 mpi_max_processor_name, &
66 mpi_get_processor_name, &
76 #define OMP_COLLAPSE collapse(2)
77 #define OMP_SCHEDULE schedule(static)
132 sll_int32 :: info = -1
134 sll_int32 :: procs(6)
136 logical :: periodic(6)
138 sll_int32 :: coords(6)
140 sll_int32 :: neighbors(12)
152 sll_int32 :: info = -1
154 sll_int32 :: procs(3)
156 logical :: periodic(3)
158 sll_int32 :: coords(3)
160 sll_int32 :: neighbors(6)
166 sll_int32,
dimension(:) :: mn(6)
167 sll_int32,
dimension(:) :: mx(6)
168 sll_int32,
dimension(:) :: hw(6)
170 sll_int32,
dimension(:) :: lo(6)
171 sll_int32,
dimension(:) :: hi(6)
173 sll_int32,
dimension(:) :: nw(6)
174 sll_int32,
dimension(:) :: gw(6)
176 sll_int32,
dimension(:) :: tx_lolo(6)
177 sll_int32,
dimension(:) :: tx_lohi(6)
178 sll_int32,
dimension(:) :: tx_hilo(6)
179 sll_int32,
dimension(:) :: tx_hihi(6)
180 sll_int32,
dimension(:) :: rx_lolo(6)
181 sll_int32,
dimension(:) :: rx_lohi(6)
182 sll_int32,
dimension(:) :: rx_hilo(6)
183 sll_int32,
dimension(:) :: rx_hihi(6)
189 sll_int32,
dimension(:) :: mn(6)
190 sll_int32,
dimension(:) :: mx(6)
191 sll_int32,
dimension(:) :: nw(6)
193 halo_dtype,
pointer :: buf(:,:,:,:,:,:) => null()
195 halo_dtype,
dimension(:,:,:,:,:,:),
allocatable :: buf
201 sll_int32,
dimension(:) :: mn(3)
202 sll_int32,
dimension(:) :: mx(3)
203 sll_int32,
dimension(:) :: nw(3)
204 halo_dtype,
dimension(:,:,:),
allocatable :: buf
211 sll_int32,
dimension(:) :: mn(6)
212 sll_int32,
dimension(:) :: mx(6)
213 sll_int32,
dimension(:) :: nw(6)
219 halo_dtype,
allocatable :: bc_left_send(:,:,:,:,:)
220 halo_dtype,
allocatable :: bc_right_send(:,:,:,:,:)
221 halo_dtype,
allocatable :: bc_left(:,:,:,:,:)
222 halo_dtype,
allocatable :: bc_right(:,:,:,:,:)
224 sll_int32,
dimension(:) :: mn_cell(6)
225 sll_int32,
dimension(:) :: mx_cell(6)
226 sll_int32,
dimension(:) :: n_cells(6)
233 sll_int32,
dimension(:) :: mn(3)
234 sll_int32,
dimension(:) :: mx(3)
235 sll_int32,
dimension(:) :: nw(3)
241 sll_int32,
dimension(:) :: mn_cell(6)
242 sll_int32,
dimension(:) :: mx_cell(6)
243 sll_int32,
dimension(:) :: n_cells(6)
251 sll_int32,
dimension(:) :: mn(3)
252 sll_int32,
dimension(:) :: mx(3)
253 sll_int32,
dimension(:) :: hw(3)
255 sll_int32,
dimension(:) :: lo(3)
256 sll_int32,
dimension(:) :: hi(3)
258 sll_int32,
dimension(:) :: nw(3)
259 sll_int32,
dimension(:) :: gw(3)
261 sll_int32,
dimension(:) :: tx_lolo(3)
262 sll_int32,
dimension(:) :: tx_lohi(3)
263 sll_int32,
dimension(:) :: tx_hilo(3)
264 sll_int32,
dimension(:) :: tx_hihi(3)
265 sll_int32,
dimension(:) :: rx_lolo(3)
266 sll_int32,
dimension(:) :: rx_lohi(3)
267 sll_int32,
dimension(:) :: rx_hilo(3)
268 sll_int32,
dimension(:) :: rx_hihi(3)
273 sll_int32,
dimension(:) :: global(6)
280 sll_int32,
dimension(:) :: global(6)
281 sll_int32,
dimension(:) :: n_cells(6)
288 sll_int32,
dimension(:) :: global(3)
295 sll_int32,
dimension(:) :: global(3)
326 integer,
parameter :: nd=6
327 sll_int32,
intent(in) :: procs_per_dimension(nd)
328 sll_int32,
intent(inout) :: rank_map(0:)
329 integer :: i,j,k,l,m,n,rk
330 integer,
allocatable :: mpi_grid(:,:,:,:,:,:)
332 allocate(mpi_grid(0:procs_per_dimension(1)-1,&
333 0:procs_per_dimension(2)-1,&
334 0:procs_per_dimension(3)-1,&
335 0:procs_per_dimension(4)-1,&
336 0:procs_per_dimension(5)-1,&
337 0:procs_per_dimension(6)-1))
342 do i=0,procs_per_dimension(1)-1
343 do j=0,procs_per_dimension(2)-1
344 do k=0,procs_per_dimension(3)-1
345 do l=0,procs_per_dimension(4)-1
346 do m=0,procs_per_dimension(5)-1
347 do n=0,procs_per_dimension(6)-1
348 mpi_grid(i,j,k,l,m,n) = rk
359 do n=0,procs_per_dimension(6)-1
360 do m=0,procs_per_dimension(5)-1
361 do l=0,procs_per_dimension(4)-1
362 do k=0,procs_per_dimension(3)-1
363 do j=0,procs_per_dimension(2)-1
364 do i=0,procs_per_dimension(1)-1
365 rank_map(rk) = mpi_grid(i,j,k,l,m,n)
382 integer,
parameter :: nd=6
383 sll_int32,
intent(in) :: procs_per_dimension(nd)
384 logical,
intent(in) :: periodic(nd)
388 integer :: name_len, my_rank, num_ranks, fd, in1, in2, new_rank, comm_temp
389 character(len=MPI_MAX_PROCESSOR_NAME) :: hostname
390 character(len=MPI_MAX_PROCESSOR_NAME),
allocatable :: hostnames(:)
391 integer,
allocatable :: ranks_reordered(:)
392 logical :: q_block, q_block_6d_py, q_transpose
394 my_rank = top_collective%rank
395 num_ranks = top_collective%size
401 inquire(file=
"./block6d.py", exist=q_block_6d_py)
404 if (((q_transpose).or.(q_block)).and.(q_reorder))
then
407 if ((q_transpose).and.(q_block))
then
408 q_transpose = .false.
411 if ((q_block).and.(.not.(q_block_6d_py)))
then
412 if (my_rank == 0)
then
413 write(*,*)
" MPI topology: disabled blocking due to missing <block6d.py>"
417 if ((q_block).and.(top_collective%size < 64))
then
418 if (my_rank == 0)
then
419 write(*,*)
" MPI topology: disabled blocking due to small number of processes"
429 if ((q_transpose).or.(q_block))
then
431 allocate(ranks_reordered(0:num_ranks-1))
437 call mpi_get_processor_name(hostname, name_len, ierr)
438 if (my_rank == 0)
then
439 write(*,*)
" MPI topology: blocked process topology"
441 allocate(hostnames(0:num_ranks-1))
444 hostnames(0) = hostname
446 call mpi_recv(hostnames(i), mpi_max_processor_name, mpi_character, &
447 i, 0, top_collective%comm, mpi_status_ignore, ierr)
452 open(unit=fd, file=
'rank_hostnames.dat', status=
'replace', action=
'write')
453 write(fd,*) num_ranks
459 write(fd,
'(I6,A,A)') i,
" ", trim(hostnames(i))
462 deallocate(hostnames)
468 call execute_command_line(
"python ./block6d.py")
471 open(unit=fd, file=
'rank_reordered.dat', status=
'old', action=
'read')
474 ranks_reordered(i) = in2
478 call mpi_send(hostname, mpi_max_processor_name, mpi_character, &
479 0, 0, top_collective%comm, ierr)
483 if (my_rank == 0)
then
484 write(*,*)
" MPI topology: transposed process topology"
490 call mpi_scatter(ranks_reordered, 1, mpi_integer, &
491 new_rank, 1, mpi_integer, &
492 0, top_collective%comm, ierr)
493 if (my_rank == 0)
then
494 deallocate(ranks_reordered)
498 call mpi_comm_split(top_collective%comm, 0, new_rank, comm_temp, ierr)
500 call mpi_cart_create(comm_temp, nd, &
506 call mpi_comm_free(comm_temp, ierr)
509 if ((my_rank == 0).and.(q_reorder))
then
510 write(*,*)
" MPI topology: MPI_Cart_create() _may_ reorder processes."
512 call mpi_cart_create(top_collective%comm, nd,&
518 sll_assert(ierr == mpi_success)
524 sll_assert(ierr == mpi_success)
529 sll_assert(ierr == mpi_success)
532 if ((q_transpose).or.(q_block))
then
543 sll_assert(ierr == mpi_success)
552 sll_assert(ierr == mpi_success)
564 integer,
parameter :: nd=3
565 sll_int32,
intent(in) :: procs_per_dimension(nd)
566 logical,
intent(in) :: periodic(nd)
568 logical,
parameter :: reorder = .false.
577 call mpi_cart_create(top_collective%comm, nd,&
583 sll_assert(ierr == mpi_success)
589 sll_assert(ierr == mpi_success)
594 sll_assert(ierr == mpi_success)
604 sll_assert(ierr == mpi_success)
613 sll_assert(ierr == mpi_success)
626 logical,
dimension(6),
intent(in) :: keep_dim
627 sll_int32,
parameter :: nd = 3
628 sll_int32 :: i, j, ierr
633 if (keep_dim(i) .eqv. .true.) j = j + 1
643 call mpi_cart_sub(t6d%comm, keep_dim, &
645 sll_assert(ierr == mpi_success)
647 call mpi_comm_rank(t3d%comm,&
650 sll_assert(ierr == mpi_success)
652 call mpi_comm_size(t3d%comm,&
655 sll_assert(ierr == mpi_success)
659 call mpi_allreduce(i, t3d%info, 1,&
660 mpi_integer, mpi_sum,&
662 sll_assert(ierr == mpi_success)
667 t3d%periodic = .false.
668 call mpi_cart_get(t3d%comm, nd,&
673 sll_assert(ierr == mpi_success)
678 call mpi_cart_shift(t3d%comm, i-1, 1, &
679 t3d%neighbors(2*i-1), &
680 t3d%neighbors(2*i), &
682 sll_assert(ierr == mpi_success)
693 sll_int32 :: i, j, n3d, ierr
694 sll_int32,
allocatable :: topo_3d_rank_table(:)
695 sll_int32,
allocatable :: topo_6d_rank_grouped_by_3d_rank(:)
696 sll_int32 :: group_6d, group_3d
699 allocate(topo_3d_rank_table(0:topo_6d%nprocs-1))
700 topo_3d_rank_table(:) = -1
701 call mpi_allgather(topo_3d%rank, 1, mpi_integer,&
702 topo_3d_rank_table, 1, mpi_integer,&
704 sll_assert(ierr == mpi_success)
709 do i=0, topo_6d%nprocs-1
710 if (topo_3d%rank == topo_3d_rank_table(i))
then
716 allocate(topo_6d_rank_grouped_by_3d_rank(n3d))
717 topo_6d_rank_grouped_by_3d_rank(:) = -1
719 do i=0, topo_6d%nprocs-1
720 if (topo_3d%rank == topo_3d_rank_table(i))
then
721 topo_6d_rank_grouped_by_3d_rank(j) = i
729 call mpi_comm_group(topo_6d%comm, group_6d, ierr)
730 sll_assert(ierr == mpi_success)
736 call mpi_group_incl(group_6d, n3d, topo_6d_rank_grouped_by_3d_rank, group_3d, ierr)
737 sll_assert(ierr == mpi_success)
740 sll_allocate(topo_3d_o, ierr)
741 sll_assert(ierr == mpi_success)
742 call mpi_comm_create(topo_6d%comm, group_3d, topo_3d_o%comm, ierr)
743 sll_assert(ierr == mpi_success)
745 call mpi_comm_rank(topo_3d_o%comm,&
748 sll_assert(ierr == mpi_success)
750 call mpi_comm_size(topo_3d_o%comm,&
753 sll_assert(ierr == mpi_success)
755 deallocate(topo_3d_rank_table)
756 deallocate(topo_6d_rank_grouped_by_3d_rank)
758 call mpi_group_free(group_3d, ierr)
759 sll_assert(ierr == mpi_success)
761 call mpi_group_free(group_6d, ierr)
762 sll_assert(ierr == mpi_success)
771 integer,
parameter :: nd=6
772 sll_int32,
intent(in) :: grid_size(nd)
773 sll_int32,
intent(in) :: halo_width(nd)
775 sll_int32 :: lp, l0, l1
779 sll_assert( halo_width(i) >= 0 )
783 sll_assert( mod(grid_size(i), topology%procs(i)) == 0 )
794 lp = grid_size(i) / topology%procs(i)
796 l0 = 1 + topology%coords(i) * lp
798 l1 = (topology%coords(i) + 1) * lp
800 sll_assert( lp/2 >= halo_width(i) )
838 integer,
parameter :: nd=6
839 sll_int32,
intent(in) :: grid_size(nd)
841 sll_int32 :: lp, l0, l1
846 sll_assert( mod(grid_size(i), topology%procs(i)) == 0 )
855 lp = grid_size(i) / topology%procs(i)
857 l0 = 1 + topology%coords(i) * lp
859 l1 = (topology%coords(i) + 1) * lp
883 integer,
parameter :: nd=3
884 sll_int32,
intent(in) :: grid_size(nd)
886 sll_int32 :: lp, l0, l1
891 sll_assert( mod(grid_size(i), topology%procs(i)) == 0 )
900 lp = grid_size(i) / topology%procs(i)
902 l0 = 1 + topology%coords(i) * lp
904 l1 = (topology%coords(i) + 1) * lp
924 integer,
parameter :: nd=6
925 sll_int32,
intent(in) :: n_cells(nd)
926 sll_int32,
intent(in) :: degree(nd)
929 sll_int32 :: lp, l0, l1
934 sll_assert( mod(n_cells(i), topology%procs(i)) == 0 )
944 lp = (n_cells(i) / topology%procs(i)) * degree(i)
946 l0 = 1 + topology%coords(i) * lp
948 l1 = (topology%coords(i) + 1) * lp
959 lp = n_cells(i) / topology%procs(i)
961 l0 = 1 + topology%coords(i) * lp
963 l1 = (topology%coords(i) + 1) * lp
989 integer,
parameter :: nd=3
990 sll_int32,
intent(in) :: grid_size(nd)
991 sll_int32,
intent(in) :: halo_width(nd)
993 sll_int32 :: lp, l0, l1
997 sll_assert( halo_width(i) >= 0 )
1001 sll_assert( mod(grid_size(i), topology%procs(i)) == 0 )
1012 lp = grid_size(i) / topology%procs(i)
1014 l0 = 1 + topology%coords(i) * lp
1016 l1 = (topology%coords(i) + 1) * lp
1018 sll_assert( lp/2 >= halo_width(i) )
1052 sll_int32,
dimension(6),
intent(in) :: arr_lo
1053 sll_int32,
dimension(6),
intent(in) :: arr_hi
1054 sll_real64,
dimension(arr_lo(1):arr_hi(1), arr_lo(2):arr_hi(2), arr_lo(3):arr_hi(3), &
arr_lo(4):arr_hi(4), arr_lo(5):arr_hi(5), arr_lo(6):arr_hi(6)), &
1056 halo_dtype,
dimension(:),
intent(out) :: buf
1057 sll_int32,
dimension(6,2),
intent(in) :: ranges
1058 sll_int32,
intent(in),
optional :: n_threads
1059 sll_int32 :: i,j,k,l,m,n
1060 sll_int32 :: ii,ij,ik,il,im,in
1061 sll_int32 :: wi,wj,wk,wl,wm,wn
1062 sll_int32 :: oj,ok,ol,om,on
1063 sll_int32 :: idx, n_omp_threads
1066 if (
present(n_threads))
then
1067 n_omp_threads = n_threads
1069 n_omp_threads = omp_get_max_threads()
1075 wi = ranges(1,2) - ranges(1,1) + 1
1076 wj = ranges(2,2) - ranges(2,1) + 1
1077 wk = ranges(3,2) - ranges(3,1) + 1
1078 wl = ranges(4,2) - ranges(4,1) + 1
1079 wm = ranges(5,2) - ranges(5,1) + 1
1080 wn = ranges(6,2) - ranges(6,1) + 1
1086 in = n + ranges(6,1)
1087 on = n * wi * wj * wk * wl * wm
1089 im = m + ranges(5,1)
1090 om = m * wi * wj * wk * wl
1092 il = l + ranges(4,1)
1093 ol = l * wi * wj * wk
1095 ik = k + ranges(3,1)
1098 ij = j + ranges(2,1)
1101 ii = i + ranges(1,1)
1103 idx = 1 + i + oj + ok + ol + om + on
1104 buf(idx) = arr(ii,ij,ik,il,im,in)
1133 sll_int32,
dimension(3),
intent(in) :: arr_lo
1134 sll_int32,
dimension(3),
intent(in) :: arr_hi
1135 sll_real64,
dimension(arr_lo(1):arr_hi(1), arr_lo(2):arr_hi(2), arr_lo(3):arr_hi(3)),
intent(in) :: arr
1136 halo_dtype,
dimension(:),
intent(out) :: buf
1137 sll_int32,
dimension(3,2),
intent(in) :: ranges
1138 sll_int32,
intent(in),
optional :: n_threads
1141 sll_int32 :: ii,ij,ik
1142 sll_int32 :: wi,wj,wk
1144 sll_int32 :: idx, n_omp_threads
1147 if (
present(n_threads))
then
1148 n_omp_threads = n_threads
1150 n_omp_threads = omp_get_max_threads()
1156 wi = ranges(1,2) - ranges(1,1) + 1
1157 wj = ranges(2,2) - ranges(2,1) + 1
1158 wk = ranges(3,2) - ranges(3,1) + 1
1163 ik = k + ranges(3,1)
1165 ij = j + ranges(2,1)
1168 ii = i + ranges(1,1)
1169 idx = 1 + i + oj + ok
1170 buf(idx) = arr(ii,ij,ik)
1181 halo_dtype,
dimension(:),
intent(in) :: buf
1182 sll_int32,
dimension(6),
intent(in) :: arr_lo
1183 sll_int32,
dimension(6),
intent(in) :: arr_hi
1184 sll_real64,
dimension(arr_lo(1):arr_hi(1), arr_lo(2):arr_hi(2), arr_lo(3):arr_hi(3), &
arr_lo(4):arr_hi(4), arr_lo(5):arr_hi(5), arr_lo(6):arr_hi(6)),&
1186 sll_int32,
dimension(6,2),
intent(in) :: ranges
1187 sll_int32,
intent(in),
optional :: n_threads
1188 sll_int32 :: i,j,k,l,m,n
1189 sll_int32 :: ii,ij,ik,il,im,in
1190 sll_int32 :: wi,wj,wk,wl,wm,wn
1191 sll_int32 :: oj,ok,ol,om,on
1192 sll_int32 :: idx, n_omp_threads
1195 if (
present(n_threads))
then
1196 n_omp_threads = n_threads
1198 n_omp_threads = omp_get_max_threads()
1204 wi = ranges(1,2) - ranges(1,1) + 1
1205 wj = ranges(2,2) - ranges(2,1) + 1
1206 wk = ranges(3,2) - ranges(3,1) + 1
1207 wl = ranges(4,2) - ranges(4,1) + 1
1208 wm = ranges(5,2) - ranges(5,1) + 1
1209 wn = ranges(6,2) - ranges(6,1) + 1
1215 in = n + ranges(6,1)
1216 on = n * wi * wj * wk * wl * wm
1218 im = m + ranges(5,1)
1219 om = m * wi * wj * wk * wl
1221 il = l + ranges(4,1)
1222 ol = l * wi * wj * wk
1224 ik = k + ranges(3,1)
1227 ij = j + ranges(2,1)
1230 ii = i + ranges(1,1)
1232 idx = 1 + i + oj + ok + ol + om + on
1233 arr(ii,ij,ik,il,im,in) = buf(idx)
1262 integer,
parameter :: nd = 6
1266 sll_real64,
dimension(:,:,:,:,:,:),
intent(inout) :: arr(decomp%local%lo(1):decomp%local%hi(1), &
1267 decomp%local%lo(2):decomp%local%hi(2), &
1268 decomp%local%lo(3):decomp%local%hi(3), &
1269 decomp%local%lo(4):decomp%local%hi(4), &
1270 decomp%local%lo(5):decomp%local%hi(5), &
1271 decomp%local%lo(6):decomp%local%hi(6))
1272 logical,
dimension(:),
intent(in),
optional :: dim_mask_in(nd)
1274 sll_int64,
save :: bufsize = 0
1279 integer,
dimension(:,:) :: r_rx(nd,2)
1280 integer,
dimension(:,:) :: r_tx(nd,2)
1281 #ifdef USE_HALO_REAL32
1282 sll_int32,
parameter :: nxc_max = 128000000
1283 integer,
parameter :: mpi_precision = mpi_real
1284 integer,
parameter :: word_size = 4
1286 sll_int32,
parameter :: nxc_max = 64000000
1287 integer,
parameter :: mpi_precision = mpi_double_precision
1288 integer,
parameter :: word_size = 8
1290 halo_dtype,
dimension(:),
allocatable,
target,
save :: sendbuf, recvbuf
1300 logical,
save :: first_call = .true.
1301 logical,
save :: sll_use_mpi_sendrecv = .true.
1302 integer :: i,j,k,l,m,n
1304 integer,
save :: dump_buffer_invocation_count = 0
1305 integer,
save :: dump_f6d_invocation_count = 0
1306 integer,
save :: dump_dd_info_invocation_count = 0
1308 logical,
dimension(:) :: dim_mask(nd)
1311 if (
present(dim_mask_in)) dim_mask = dim_mask_in
1316 if (first_call)
then
1317 #ifdef USE_HALO_REAL32
1318 if (topo%rank == 0)
then
1319 write(*,*)
"sll_m_decomposition::apply_halo_exchange() uses single precision messages"
1323 if (topo%rank == 0)
then
1324 if (sll_use_mpi_sendrecv)
then
1325 write(*,*)
"sll_m_decomposition::apply_halo_exchange() uses MPI_SENDRECV()"
1327 write(*,*)
"sll_m_decomposition::apply_halo_exchange() uses MPI_SEND() and MPI_RECV()"
1330 first_call = .false.
1335 if (.not. dim_mask(id)) cycle
1337 if (topo%procs(id) == 1)
then
1340 r_tx(:,1) = decomp%local%mn(:)
1341 r_tx(:,2) = decomp%local%mx(:)
1342 r_tx(id,1) = decomp%local%tx_lolo(id)
1343 r_tx(id,2) = decomp%local%tx_lohi(id)
1344 r_rx(:,1) = decomp%local%mn(:)
1345 r_rx(:,2) = decomp%local%mx(:)
1346 r_rx(id,1) = decomp%local%rx_hilo(id)
1347 r_rx(id,2) = decomp%local%rx_hihi(id)
1350 do n = 0, r_rx(6,2)-r_rx(6,1)
1351 do m = 0, r_rx(5,2)-r_rx(5,1)
1352 do l = 0, r_rx(4,2)-r_rx(4,1)
1353 do k = 0, r_rx(3,2)-r_rx(3,1)
1354 do j = 0, r_rx(2,2)-r_rx(2,1)
1355 do i = 0, r_rx(1,2)-r_rx(1,1)
1356 arr(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
1357 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
1367 r_tx(:,1) = decomp%local%mn(:)
1368 r_tx(:,2) = decomp%local%mx(:)
1369 r_tx(id,1) = decomp%local%tx_hilo(id)
1370 r_tx(id,2) = decomp%local%tx_hihi(id)
1371 r_rx(:,1) = decomp%local%mn(:)
1372 r_rx(:,2) = decomp%local%mx(:)
1373 r_rx(id,1) = decomp%local%rx_lolo(id)
1374 r_rx(id,2) = decomp%local%rx_lohi(id)
1377 do n = 0, r_rx(6,2)-r_rx(6,1)
1378 do m = 0, r_rx(5,2)-r_rx(5,1)
1379 do l = 0, r_rx(4,2)-r_rx(4,1)
1380 do k = 0, r_rx(3,2)-r_rx(3,1)
1381 do j = 0, r_rx(2,2)-r_rx(2,1)
1382 do i = 0, r_rx(1,2)-r_rx(1,1)
1383 arr(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
1384 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
1395 nxc = int(decomp%local%hw(id), i64)
1400 nxc = nxc * decomp%local%nw(jd)
1404 if (nxc > bufsize)
then
1405 if (
allocated(sendbuf))
deallocate(sendbuf)
1406 if (
allocated(recvbuf))
deallocate(recvbuf)
1407 allocate(sendbuf(nxc))
1408 allocate(recvbuf(nxc))
1413 r_tx(:,1) = decomp%local%mn(:)
1414 r_tx(:,2) = decomp%local%mx(:)
1415 r_tx(id,1) = decomp%local%tx_lolo(id)
1416 r_tx(id,2) = decomp%local%tx_lohi(id)
1423 if (rem > nxc_max)
then
1428 if (sll_use_mpi_sendrecv)
then
1429 call mpi_sendrecv(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
1430 recvbuf(off), nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
1431 topo%comm, mpi_status_ignore, ierr)
1432 sll_assert(ierr == mpi_success)
1435 if (mod(topo%coords(id), 2) > 0)
then
1436 call mpi_send(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag, topo%comm, ierr)
1437 sll_assert(ierr == mpi_success)
1438 call mpi_recv(recvbuf(off), nel, mpi_precision, topo%neighbors(2*id), mpi_tag, topo%comm, mpi_status_ignore, ierr)
1439 sll_assert(ierr == mpi_success)
1442 call mpi_recv(recvbuf(off), nel, mpi_precision, topo%neighbors(2*id), mpi_tag, topo%comm, mpi_status_ignore, ierr)
1443 sll_assert(ierr == mpi_success)
1444 call mpi_send(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag, topo%comm, ierr)
1445 sll_assert(ierr == mpi_success)
1452 r_rx(:,1) = decomp%local%mn(:)
1453 r_rx(:,2) = decomp%local%mx(:)
1454 r_rx(id,1) = decomp%local%rx_hilo(id)
1455 r_rx(id,2) = decomp%local%rx_hihi(id)
1460 r_tx(:,1) = decomp%local%mn(:)
1461 r_tx(:,2) = decomp%local%mx(:)
1462 r_tx(id,1) = decomp%local%tx_hilo(id)
1463 r_tx(id,2) = decomp%local%tx_hihi(id)
1468 if (rem > nxc_max)
then
1473 if (sll_use_mpi_sendrecv)
then
1474 call mpi_sendrecv(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id), 1,&
1475 recvbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), 1,&
1476 topo%comm, mpi_status_ignore, ierr)
1477 sll_assert(ierr == mpi_success)
1480 if (mod(topo%coords(id), 2) > 0)
then
1481 call mpi_send(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id), &
1482 mpi_tag, topo%comm, ierr)
1483 sll_assert(ierr == mpi_success)
1484 call mpi_recv(recvbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), &
1485 mpi_tag, topo%comm, mpi_status_ignore, ierr)
1486 sll_assert(ierr == mpi_success)
1489 call mpi_recv(recvbuf(off), nel, mpi_precision, topo%neighbors(2*id-1), &
1490 mpi_tag, topo%comm, mpi_status_ignore, ierr)
1491 sll_assert(ierr == mpi_success)
1492 call mpi_send(sendbuf(off), nel, mpi_precision, topo%neighbors(2*id), &
1493 mpi_tag, topo%comm, ierr)
1494 sll_assert(ierr == mpi_success)
1501 r_rx(:,1) = decomp%local%mn(:)
1502 r_rx(:,2) = decomp%local%mx(:)
1503 r_rx(id,1) = decomp%local%rx_lolo(id)
1504 r_rx(id,2) = decomp%local%rx_lohi(id)
1517 rank_recv, mpi_comm, verbose, mpi_tag)
1519 halo_dtype,
pointer :: sendbuf(:)
1520 halo_dtype,
pointer :: recvbuf(:,:,:,:,:,:), recv_view_1d(:)
1521 sll_int32 :: nel, rank_send, rank_recv, mpi_comm
1522 logical,
intent(in),
optional :: verbose
1523 integer,
intent(in),
optional :: mpi_tag
1527 if (
present(verbose))
then
1533 if (
present(mpi_tag))
then
1539 call c_f_pointer(c_loc(recvbuf), recv_view_1d, [nel])
1541 nullify(recv_view_1d)
1545 mpi_comm, verbose, mpi_tag)
1546 halo_dtype,
pointer :: sendbuf(:)
1547 halo_dtype,
pointer :: recvbuf(:)
1548 sll_int32 :: nel, rank_send, rank_recv, mpi_comm
1549 logical,
intent(in),
optional :: verbose
1550 integer,
intent(in),
optional :: mpi_tag
1552 #ifdef USE_HALO_REAL32
1553 integer,
parameter :: mpi_precision = mpi_real
1555 integer,
parameter :: mpi_precision = mpi_double_precision
1557 logical :: do_compress
1563 if (
present(verbose))
then
1569 if (
present(mpi_tag))
then
1576 omp_size = omp_get_max_threads()
1581 #ifdef USE_HALO_REAL32
1582 do_compress = .false.
1585 write(*,*)
"mpi_sendrecv_compressed() : disabled due to blocksize mismatch"
1586 do_compress = .false.
1591 do_compress = .true.
1595 if (do_compress)
then
1596 #ifndef USE_HALO_REAL32
1633 call mpi_sendrecv(sendbuf, nel, mpi_precision, rank_send, tag,&
1634 recvbuf, nel, mpi_precision, rank_recv, tag,&
1635 mpi_comm, mpi_status_ignore, ierr)
1636 sll_assert_always(ierr == mpi_success)
1644 sll_int32 :: rank_send, rank_recv, mpi_comm
1645 logical,
intent(in),
optional ::
verbose
1646 integer,
intent(in),
optional :: mpi_tag
1648 integer :: ierr, n_idx
1649 integer,
pointer :: comp_idx_send(:), comp_idx_recv(:)
1659 if (
present(mpi_tag))
then
1670 allocate(comp_idx_recv(0:n_idx-1))
1671 call mpi_sendrecv(comp_idx_send, n_idx, mpi_integer, rank_send, mpi_tag,&
1672 comp_idx_recv, n_idx, mpi_integer, rank_recv, mpi_tag,&
1673 mpi_comm, mpi_status_ignore, ierr)
1674 sll_assert_always(ierr == mpi_success)
1676 deallocate(comp_idx_send);
nullify(comp_idx_send)
1677 deallocate(comp_idx_recv);
nullify(comp_idx_recv)
1684 allocate(comp_recv%buffer(0:comp_recv%n_bytes_deflated_total-1))
1685 call mpi_sendrecv(comp_send%buffer, comp_send%n_bytes_deflated_total, mpi_byte, rank_send, tag,&
1686 comp_recv%buffer, comp_recv%n_bytes_deflated_total, mpi_byte, rank_recv, tag,&
1687 mpi_comm, mpi_status_ignore, ierr)
1688 sll_assert_always(ierr == mpi_success)
1694 integer,
parameter :: nd = 6
1698 sll_real64,
dimension(:,:,:,:,:,:),
intent(inout) :: arr(decomp%local%mn(1):decomp%local%mx(1), &
1699 decomp%local%mn(2):decomp%local%mx(2), &
1700 decomp%local%mn(3):decomp%local%mx(3), &
1701 decomp%local%mn(4):decomp%local%mx(4), &
1702 decomp%local%mn(5):decomp%local%mx(5), &
1703 decomp%local%mn(6):decomp%local%mx(6))
1704 sll_int32,
intent(in) :: id, hw_left, hw_right
1705 sll_int32 :: halo_block(6,2)
1706 halo_block(:,1) = decomp%local%mn
1707 halo_block(:,2) = decomp%local%mx
1714 integer,
parameter :: nd = 6
1718 sll_real64,
intent(inout) :: arr(decomp%local%mn(1):decomp%local%mx(1), &
1719 decomp%local%mn(2):decomp%local%mx(2), &
1720 decomp%local%mn(3):decomp%local%mx(3), &
1721 decomp%local%mn(4):decomp%local%mx(4), &
1722 decomp%local%mn(5):decomp%local%mx(5), &
1723 decomp%local%mn(6):decomp%local%mx(6))
1724 sll_int32,
intent(in) :: id, hw_left, hw_right
1725 sll_int32,
intent(in) :: halo_block(6,2)
1726 integer :: jd, i, j, k, l, m, n
1728 logical,
save :: first_call = .true.
1731 sll_int64,
save :: bufsize = 0
1734 integer,
dimension(:,:) :: r_rx(nd,2)
1735 integer,
dimension(:,:) :: r_tx(nd,2)
1736 #ifdef USE_HALO_REAL32
1737 sll_int32,
parameter :: nxc_max = 2147483647
1738 integer,
parameter :: mpi_precision = mpi_real
1739 integer,
parameter :: word_size = 4
1741 sll_int32,
parameter :: nxc_max = 2147483647
1742 integer,
parameter :: mpi_precision = mpi_double_precision
1743 integer,
parameter :: word_size = 8
1745 halo_dtype,
pointer,
save :: sendbuf(:)
1746 halo_dtype,
pointer :: recvbuf(:,:,:,:,:,:)
1748 logical,
save :: use_compression
1749 logical,
save :: compression_verbose
1750 integer,
save :: prec
1752 logical,
parameter :: sendbuf_dump = .false.
1753 integer,
save :: dump_counter = 0
1754 character(len=32) :: dump_filename
1758 if (first_call)
then
1759 first_call = .false.
1760 compression_verbose = .false.
1761 #ifdef USE_HALO_REAL32
1762 if (topo%rank == 0)
then
1763 write(*,*)
"sll_m_decomposition::apply_halo_exchange() uses single precision messages"
1765 use_compression = .false.
1769 if (use_compression)
then
1772 if (topo%rank == 0)
then
1773 write(*,*)
"sll_m_decomposition::apply_halo_exchange() uses message compression"
1778 use_compression = .false.
1796 decomp%local%id = id
1797 decomp%local%halo_right%mn(:) = halo_block(:,1)
1798 decomp%local%halo_right%mx(:) = halo_block(:,2)
1799 decomp%local%halo_right%nw(:) = decomp%local%halo_right%mx(:)-decomp%local%halo_right%mn(:)+1
1801 decomp%local%halo_right%mn(id) = decomp%local%mx(id) + 1
1802 decomp%local%halo_right%mx(id) = decomp%local%mx(id) + hw_right
1803 decomp%local%halo_right%nw(id) = hw_right
1805 if (hw_right > 0)
then
1808 if (
associated(decomp%local%halo_right%buf))
then
1811 call mp_acquire(decomp%local%halo_right%buf, decomp%local%halo_right%mn, decomp%local%halo_right%mx)
1813 if (
allocated(decomp%local%halo_right%buf)) &
1814 deallocate(decomp%local%halo_right%buf)
1815 allocate(decomp%local%halo_right%buf( &
1816 decomp%local%halo_right%mn(1):decomp%local%halo_right%mx(1), &
1817 decomp%local%halo_right%mn(2):decomp%local%halo_right%mx(2), &
1818 decomp%local%halo_right%mn(3):decomp%local%halo_right%mx(3), &
1819 decomp%local%halo_right%mn(4):decomp%local%halo_right%mx(4), &
1820 decomp%local%halo_right%mn(5):decomp%local%halo_right%mx(5), &
1821 decomp%local%halo_right%mn(6):decomp%local%halo_right%mx(6) ))
1825 recvbuf => decomp%local%halo_right%buf
1830 r_tx(:,1) = decomp%local%halo_right%mn(:)
1831 r_tx(:,2) = decomp%local%halo_right%mx(:)
1832 r_tx(id,1) = decomp%local%mn(id)
1833 r_tx(id,2) = decomp%local%mn(id) + hw_right - 1
1835 r_rx(:,1) = decomp%local%halo_right%mn(:)
1836 r_rx(:,2) = decomp%local%halo_right%mx(:)
1838 if (topo%procs(id) == 1)
then
1842 do n = 0, r_rx(6,2)-r_rx(6,1)
1843 do m = 0, r_rx(5,2)-r_rx(5,1)
1844 do l = 0, r_rx(4,2)-r_rx(4,1)
1845 do k = 0, r_rx(3,2)-r_rx(3,1)
1846 do j = 0, r_rx(2,2)-r_rx(2,1)
1847 do i = 0, r_rx(1,2)-r_rx(1,1)
1848 recvbuf(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, &
1849 r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
1850 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, &
1851 r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
1862 nxc = int(hw_right, i64)
1867 nxc = nxc * decomp%local%halo_right%nw(jd)
1870 if (nxc > bufsize)
then
1871 if (
associated(sendbuf))
deallocate(sendbuf)
1872 allocate(sendbuf(nxc))
1878 sll_assert_always(nxc <= nxc_max)
1881 if (use_compression)
then
1882 if (sendbuf_dump)
then
1883 write(dump_filename,
'(a,i1.1,a,i4.4,a)')
"L", id,
"_", dump_counter,
".txt"
1884 write(*,*) trim(dump_filename)
1885 open(unit=88, file=trim(dump_filename), status=
'replace')
1886 write(88,
'(E24.16)') (sendbuf(i), i=1,nel)
1890 topo%neighbors(2*id-1), topo%neighbors(2*id), topo%comm, &
1891 compression_verbose)
1893 call mpi_sendrecv(sendbuf, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
1894 recvbuf, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
1895 topo%comm, mpi_status_ignore, ierr)
1896 sll_assert_always(ierr == mpi_success)
1916 decomp%local%id = id
1917 decomp%local%halo_left%mn(:) = halo_block(:,1)
1918 decomp%local%halo_left%mx(:) = halo_block(:,2)
1919 decomp%local%halo_left%nw(:) = decomp%local%halo_left%mx(:)-decomp%local%halo_left%mn(:)+1
1921 decomp%local%halo_left%mx(id) = decomp%local%mn(id) - 1
1922 decomp%local%halo_left%mn(id) = decomp%local%mn(id) - hw_left
1923 decomp%local%halo_left%nw(id) = hw_left
1925 if (hw_left > 0)
then
1928 if (
associated(decomp%local%halo_left%buf))
then
1931 call mp_acquire(decomp%local%halo_left%buf, decomp%local%halo_left%mn, decomp%local%halo_left%mx)
1933 if (
allocated(decomp%local%halo_left%buf)) &
1934 deallocate(decomp%local%halo_left%buf)
1935 allocate(decomp%local%halo_left%buf( &
1936 decomp%local%halo_left%mn(1):decomp%local%halo_left%mx(1), &
1937 decomp%local%halo_left%mn(2):decomp%local%halo_left%mx(2), &
1938 decomp%local%halo_left%mn(3):decomp%local%halo_left%mx(3), &
1939 decomp%local%halo_left%mn(4):decomp%local%halo_left%mx(4), &
1940 decomp%local%halo_left%mn(5):decomp%local%halo_left%mx(5), &
1941 decomp%local%halo_left%mn(6):decomp%local%halo_left%mx(6) ))
1944 recvbuf => decomp%local%halo_left%buf
1948 r_tx(:,1) = decomp%local%halo_left%mn(:)
1949 r_tx(:,2) = decomp%local%halo_left%mx(:)
1950 r_tx(id,1) = decomp%local%mx(id) - hw_left + 1
1951 r_tx(id,2) = decomp%local%mx(id)
1953 r_rx(:,1) = decomp%local%halo_left%mn(:)
1954 r_rx(:,2) = decomp%local%halo_left%mx(:)
1956 if (topo%procs(id) == 1)
then
1960 do n = 0, r_rx(6,2)-r_rx(6,1)
1961 do m = 0, r_rx(5,2)-r_rx(5,1)
1962 do l = 0, r_rx(4,2)-r_rx(4,1)
1963 do k = 0, r_rx(3,2)-r_rx(3,1)
1964 do j = 0, r_rx(2,2)-r_rx(2,1)
1965 do i = 0, r_rx(1,2)-r_rx(1,1)
1966 recvbuf(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k, &
1967 r_rx(4,1)+l, r_rx(5,1)+m, r_rx(6,1)+n) = &
1968 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k, &
1969 r_tx(4,1)+l, r_tx(5,1)+m, r_tx(6,1)+n)
1980 nxc = int(hw_left, i64)
1985 nxc = nxc * decomp%local%halo_left%nw(jd)
1988 if (nxc > bufsize)
then
1989 if (
associated(sendbuf))
deallocate(sendbuf)
1990 allocate(sendbuf(nxc))
1996 sll_assert_always(nxc <= nxc_max)
1999 if (use_compression)
then
2000 if (sendbuf_dump)
then
2001 write(dump_filename,
'(a,i1.1,a,i4.4,a)')
"R", id,
"_", dump_counter,
".txt"
2002 write(*,*) trim(dump_filename)
2003 open(unit=88, file=dump_filename, status=
'replace')
2004 write(88,
'(E24.16)') (sendbuf(i), i=1,nel)
2008 topo%neighbors(2*id), topo%neighbors(2*id-1), topo%comm, &
2009 compression_verbose)
2011 call mpi_sendrecv(sendbuf, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
2012 recvbuf, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
2013 topo%comm, mpi_status_ignore, ierr)
2014 sll_assert_always(ierr == mpi_success)
2023 dump_counter = dump_counter + 1
2038 integer,
parameter :: nd = 3
2041 sll_real64,
dimension(:,:,:),
intent(inout) :: arr(decomp%local%mn(1):decomp%local%mx(1), &
2042 decomp%local%mn(2):decomp%local%mx(2), &
2043 decomp%local%mn(3):decomp%local%mx(3))
2044 sll_int32,
intent(in) :: id, hw_left, hw_right
2045 sll_int32 :: halo_block(3,2)
2046 halo_block(:,1) = decomp%local%mn
2047 halo_block(:,2) = decomp%local%mx
2054 integer,
parameter :: nd = 3
2057 sll_real64,
intent(inout) :: arr(decomp%local%mn(1):decomp%local%mx(1), &
2058 decomp%local%mn(2):decomp%local%mx(2), &
2059 decomp%local%mn(3):decomp%local%mx(3))
2060 sll_int32,
intent(in) :: id, hw_left, hw_right
2061 sll_int32,
intent(in) :: halo_block(3,2)
2062 integer :: jd, i, j, k
2064 logical,
save :: first_call = .true.
2067 sll_int64,
save :: bufsize = 0
2070 integer,
dimension(:,:) :: r_rx(nd,2)
2071 integer,
dimension(:,:) :: r_tx(nd,2)
2072 #ifdef USE_HALO_REAL32
2073 sll_int32,
parameter :: nxc_max = 2147483647
2074 integer,
parameter :: mpi_precision = mpi_real
2075 integer,
parameter :: word_size = 4
2077 sll_int32,
parameter :: nxc_max = 2147483647
2078 integer,
parameter :: mpi_precision = mpi_double_precision
2079 integer,
parameter :: word_size = 8
2082 halo_dtype,
pointer,
save :: sendbuf(:)
2083 halo_dtype,
pointer :: recvbuf(:,:,:)
2091 if (first_call)
then
2092 #ifdef USE_HALO_REAL32
2093 if (topo%rank == 0)
then
2094 write(*,*)
"sll_m_decomposition::sll_s_apply_halo_exchange_slim_3d_real64() uses single precision messages"
2097 first_call = .false.
2102 decomp%local%id = id
2103 decomp%local%halo_right%mn(:) = halo_block(:,1)
2104 decomp%local%halo_right%mx(:) = halo_block(:,2)
2105 decomp%local%halo_right%nw(:) = decomp%local%halo_right%mx(:)-decomp%local%halo_right%mn(:)+1
2107 decomp%local%halo_right%mn(id) = decomp%local%mx(id) + 1
2108 decomp%local%halo_right%mx(id) = decomp%local%mx(id) + hw_right
2109 decomp%local%halo_right%nw(id) = hw_right
2111 if (hw_right > 0)
then
2113 if (
allocated(decomp%local%halo_right%buf)) &
2114 deallocate(decomp%local%halo_right%buf)
2116 allocate(decomp%local%halo_right%buf( &
2117 decomp%local%halo_right%mn(1):decomp%local%halo_right%mx(1), &
2118 decomp%local%halo_right%mn(2):decomp%local%halo_right%mx(2), &
2119 decomp%local%halo_right%mn(3):decomp%local%halo_right%mx(3)))
2121 recvbuf => decomp%local%halo_right%buf
2125 r_tx(:,1) = decomp%local%halo_right%mn(:)
2126 r_tx(:,2) = decomp%local%halo_right%mx(:)
2127 r_tx(id,1) = decomp%local%mn(id)
2128 r_tx(id,2) = decomp%local%mn(id) + hw_right - 1
2130 r_rx(:,1) = decomp%local%halo_right%mn(:)
2131 r_rx(:,2) = decomp%local%halo_right%mx(:)
2133 if (topo%procs(id) == 1)
then
2137 do k = 0, r_rx(3,2)-r_rx(3,1)
2138 do j = 0, r_rx(2,2)-r_rx(2,1)
2139 do i = 0, r_rx(1,2)-r_rx(1,1)
2140 recvbuf(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k) = &
2141 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k)
2149 nxc = int(hw_right, i64)
2154 nxc = nxc * decomp%local%halo_right%nw(jd)
2157 if (nxc > bufsize)
then
2158 if (
associated(sendbuf))
deallocate(sendbuf)
2159 allocate(sendbuf(nxc))
2165 sll_assert_always(nxc <= nxc_max)
2167 call mpi_sendrecv(sendbuf, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
2168 decomp%local%halo_right%buf, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
2169 topo%comm, mpi_status_ignore, ierr)
2170 sll_assert_always(ierr == mpi_success)
2181 decomp%local%id = id
2182 decomp%local%halo_left%mn(:) = halo_block(:,1)
2183 decomp%local%halo_left%mx(:) = halo_block(:,2)
2184 decomp%local%halo_left%nw(:) = decomp%local%halo_left%mx(:)-decomp%local%halo_left%mn(:)+1
2186 decomp%local%halo_left%mx(id) = decomp%local%mn(id) - 1
2187 decomp%local%halo_left%mn(id) = decomp%local%mn(id) - hw_left
2188 decomp%local%halo_left%nw(id) = hw_left
2190 if (hw_left > 0)
then
2192 if (
allocated(decomp%local%halo_left%buf)) &
2193 deallocate(decomp%local%halo_left%buf)
2194 allocate(decomp%local%halo_left%buf( &
2195 decomp%local%halo_left%mn(1):decomp%local%halo_left%mx(1), &
2196 decomp%local%halo_left%mn(2):decomp%local%halo_left%mx(2), &
2197 decomp%local%halo_left%mn(3):decomp%local%halo_left%mx(3)))
2199 recvbuf => decomp%local%halo_left%buf
2203 r_tx(:,1) = decomp%local%halo_left%mn(:)
2204 r_tx(:,2) = decomp%local%halo_left%mx(:)
2205 r_tx(id,1) = decomp%local%mx(id) - hw_left + 1
2206 r_tx(id,2) = decomp%local%mx(id)
2208 r_rx(:,1) = decomp%local%halo_left%mn(:)
2209 r_rx(:,2) = decomp%local%halo_left%mx(:)
2211 if (topo%procs(id) == 1)
then
2215 do k = 0, r_rx(3,2)-r_rx(3,1)
2216 do j = 0, r_rx(2,2)-r_rx(2,1)
2217 do i = 0, r_rx(1,2)-r_rx(1,1)
2218 recvbuf(r_rx(1,1)+i, r_rx(2,1)+j, r_rx(3,1)+k) = &
2219 arr(r_tx(1,1)+i, r_tx(2,1)+j, r_tx(3,1)+k)
2227 nxc = int(hw_left, i64)
2232 nxc = nxc * decomp%local%halo_left%nw(jd)
2235 if (nxc > bufsize)
then
2236 if (
associated(sendbuf))
deallocate(sendbuf)
2237 allocate(sendbuf(nxc))
2243 sll_assert_always(nxc <= nxc_max)
2245 call mpi_sendrecv(sendbuf, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
2246 decomp%local%halo_left%buf, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
2247 topo%comm, mpi_status_ignore, ierr)
2248 sll_assert_always(ierr == mpi_success)
2261 sll_int32,
intent(in) :: id
2265 #ifdef USE_HALO_REAL32
2266 integer,
parameter :: mpi_precision = mpi_real
2268 integer,
parameter :: mpi_precision = mpi_double_precision
2270 integer,
parameter :: mpi_tag=1024
2272 if (
allocated(decomp%local%bc_left_send) .and.
allocated(decomp%local%bc_right))
then
2273 nel =
size(decomp%local%bc_left_send)
2274 call mpi_sendrecv(decomp%local%bc_left_send, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
2275 decomp%local%bc_right, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
2276 topo%comm, mpi_status_ignore, ierr)
2277 sll_assert_always(ierr == mpi_success)
2280 if (
allocated(decomp%local%bc_right_send) .and.
allocated(decomp%local%bc_left))
then
2281 nel =
size(decomp%local%bc_right_send)
2282 call mpi_sendrecv(decomp%local%bc_right_send, nel, mpi_precision, topo%neighbors(2*id-1), mpi_tag,&
2283 decomp%local%bc_left, nel, mpi_precision, topo%neighbors(2*id), mpi_tag,&
2284 topo%comm, mpi_status_ignore, ierr)
2285 sll_assert_always(ierr == mpi_success)
2292 sll_int32,
optional :: id
2294 if (
present(id))
then
2305 sll_int32,
intent(in) :: id
2307 sll_int32 :: idx_mn(5), idx_mx(5)
2313 idx_mn(j) = decomp%local%mn(i)
2314 idx_mx(j) = decomp%local%mx(i)
2324 sll_int32,
intent(in) :: id
2326 sll_int32,
intent(in) :: idx_mn(5), idx_mx(5)
2339 allocate(decomp%local%bc_right_send(idx_mn(1):idx_mx(1), &
2340 idx_mn(2):idx_mx(2), &
2341 idx_mn(3):idx_mx(3), &
2342 idx_mn(4):idx_mx(4), &
2343 idx_mn(5):idx_mx(5)))
2344 allocate(decomp%local%bc_left_send( idx_mn(1):idx_mx(1), &
2345 idx_mn(2):idx_mx(2), &
2346 idx_mn(3):idx_mx(3), &
2347 idx_mn(4):idx_mx(4), &
2348 idx_mn(5):idx_mx(5)))
2349 allocate(decomp%local%bc_right( idx_mn(1):idx_mx(1), &
2350 idx_mn(2):idx_mx(2), &
2351 idx_mn(3):idx_mx(3), &
2352 idx_mn(4):idx_mx(4), &
2353 idx_mn(5):idx_mx(5)))
2354 allocate(decomp%local%bc_left( idx_mn(1):idx_mx(1), &
2355 idx_mn(2):idx_mx(2), &
2356 idx_mn(3):idx_mx(3), &
2357 idx_mn(4):idx_mx(4), &
2358 idx_mn(5):idx_mx(5)))
2366 if (
allocated(decomp%local%bc_right_send))
deallocate(decomp%local%bc_right_send)
2367 if (
allocated(decomp%local%bc_left_send))
deallocate(decomp%local%bc_left_send)
2368 if (
allocated(decomp%local%bc_right))
deallocate(decomp%local%bc_right)
2369 if (
allocated(decomp%local%bc_left))
deallocate(decomp%local%bc_left)
2375 character(len=*),
intent(in) :: file
2376 #ifdef USE_HALO_REAL32
2377 sll_real32,
dimension(:),
intent(in) :: arr
2379 sll_real64,
dimension(:),
intent(in) :: arr
2381 integer,
parameter :: fd = 67
2383 open(unit=fd, file=file)
2393 character(len=*),
intent(in) :: filename
2394 sll_real64,
intent(in) :: array(:)
2395 integer,
parameter :: iunit = 67
2396 open(iunit, file=trim(filename), status=
'replace', form=
'unformatted')
2404 character(len=*),
intent(in) :: file
2406 sll_real64,
dimension(:,:,:,:,:,:),
intent(inout) :: arr(decomp%local%lo(1):decomp%local%hi(1), &
2407 decomp%local%lo(2):decomp%local%hi(2), &
2408 decomp%local%lo(3):decomp%local%hi(3), &
2409 decomp%local%lo(4):decomp%local%hi(4), &
2410 decomp%local%lo(5):decomp%local%hi(5), &
2411 decomp%local%lo(6):decomp%local%hi(6))
2412 integer,
parameter :: fd = 67
2413 sll_int32 :: i,j,k,l,m,n
2414 open(unit=fd, file=file)
2415 do n=decomp%local%mn(6),decomp%local%mx(6)
2416 do m=decomp%local%mn(5),decomp%local%mx(5)
2417 do l=decomp%local%mn(4),decomp%local%mx(4)
2418 do k=decomp%local%mn(3),decomp%local%mx(3)
2419 do j=decomp%local%mn(2),decomp%local%mx(2)
2420 do i=decomp%local%mn(1),decomp%local%mx(1)
2421 write(fd,
'(I3,I3,I3,I3,I3,I3,F20.16)') i, j, k, l, m, n, arr(i,j,k,l,m,n)
2433 character(len=*),
intent(in) :: file
2436 integer,
parameter :: fd = 67
2437 open(unit=fd, file=file)
2438 write(fd,
'(A,1I4)')
"topo%rank : ", topo%rank
2439 write(fd,
'(A,1I4)')
"topo%nprocs : ", topo%nprocs
2440 write(fd,
'(A,6I4)')
"topo%procs : ", topo%procs
2441 write(fd,
'(A,6L4)')
"topo%periodic : ", topo%periodic
2442 write(fd,
'(A,6I4)')
"topo%coords : ", topo%coords
2443 write(fd,
'(A,12I4)')
"topo%neighbors : ", topo%neighbors
2444 write(fd,
'(A,6I4)')
"decomp%global : ", decomp%global
2445 write(fd,
'(A,6I4)')
"decomp%local%mn : ", decomp%local%mn
2446 write(fd,
'(A,6I4)')
"decomp%local%mx : ", decomp%local%mx
2447 write(fd,
'(A,6I4)')
"decomp%local%hw : ", decomp%local%hw
2448 write(fd,
'(A,6I4)')
"decomp%local%lo : ", decomp%local%lo
2449 write(fd,
'(A,6I4)')
"decomp%local%hi : ", decomp%local%hi
2450 write(fd,
'(A,6I4)')
"decomp%local%nw : ", decomp%local%nw
2451 write(fd,
'(A,6I4)')
"decomp%local%gw : ", decomp%local%gw
2452 write(fd,
'(A,6I4)')
"decomp%local%tx_lolo : ", decomp%local%tx_lolo
2453 write(fd,
'(A,6I4)')
"decomp%local%tx_lohi : ", decomp%local%tx_lohi
2454 write(fd,
'(A,6I4)')
"decomp%local%tx_hilo : ", decomp%local%tx_hilo
2455 write(fd,
'(A,6I4)')
"decomp%local%tx_hihi : ", decomp%local%tx_hihi
2456 write(fd,
'(A,6I4)')
"decomp%local%rx_lolo : ", decomp%local%rx_lolo
2457 write(fd,
'(A,6I4)')
"decomp%local%rx_lohi : ", decomp%local%rx_lohi
2458 write(fd,
'(A,6I4)')
"decomp%local%rx_hilo : ", decomp%local%rx_hilo
2459 write(fd,
'(A,6I4)')
"decomp%local%rx_hihi : ", decomp%local%rx_hihi
2472 integer :: process_grid(6), i, j
2473 integer,
intent(in) :: mpi_world_size
2474 integer,
intent(in),
optional :: process_grid_par(6)
2475 logical :: swap_process_grid
2479 if ((
present(process_grid_par)) .and. (product(process_grid_par) == mpi_world_size))
then
2481 process_grid = process_grid_par
2488 select case(mpi_world_size)
2490 process_grid = [1,1,1,1,1,1]
2492 process_grid = [1,1,1,1,1,2]
2494 process_grid = [1,1,1,1,2,2]
2496 process_grid = [1,1,1,2,2,2]
2498 process_grid = [1,1,2,2,2,2]
2500 process_grid = [1,2,2,2,2,2]
2502 process_grid = [2,2,2,2,2,2]
2504 process_grid = [2,2,2,2,2,3]
2506 process_grid = [2,2,2,2,2,4]
2508 process_grid = [2,2,2,2,4,4]
2510 process_grid = [2,2,2,4,4,4]
2512 process_grid = [2,2,4,4,4,4]
2514 process_grid = [2,4,4,4,4,4]
2516 process_grid = [4,4,4,4,4,4]
2518 process_grid = [4,4,4,4,4,8]
2520 process_grid = [4,4,4,4,8,8]
2522 process_grid = [4,4,4,8,8,8]
2524 process_grid = [4,4,8,8,8,8]
2526 process_grid = [4,8,8,8,8,8]
2528 process_grid = [8,8,8,8,8,8]
2530 write(*,*)
"Error: No process topology implemented for ", mpi_world_size,
" processes. STOP."
2533 if (swap_process_grid)
then
2535 j = process_grid(7-i)
2536 process_grid(7-i) = process_grid(i)
2544
Plain Fortran implementation of a memory pool.
subroutine, public mp_statistics()
Module providing an F90 interface to the ZFP compression library: http://computation....
subroutine print_compression_information(comp, verbose)
integer function concatenate_index_arrays(comp, array)
allocate array, copy indices from comp into array, return
subroutine set_compression_precision(prec)
subroutine deallocate_compressed_buffer_obj(comp)
subroutine decatenate_index_arrays(comp, array)
subroutine deflate_buffer_real64(buf, comp, n_doubles, n_threads)
compress buffer
subroutine inflate_buffer_real64(buf, comp, n_threads)
decompress buffer
integer, parameter zfp_blocksize
data structure to support threaded ZFP compression and decompression
Module providing data structures and tools to implement domain decompositions.
type(sll_t_decomposition_3d) function, pointer, public sll_f_new_cartesian_domain_decomposition_3d(topology, grid_size, halo_width)
type(sll_t_cartesian_topology_3d) function, pointer, public sll_f_new_cartesian_topology_3d_from_6d(t6d, keep_dim)
6D-->3D topology mapper, creates a 3D sub-topology from a 6D topology.
subroutine dump_binary(filename, array)
subroutine sll_s_deallocate_cartesian_topology_6d()
6D Cartesian topology destructor
subroutine sll_s_deallocate_cartesian_domain_decomposition_slim_6d()
6D Cartesian slim domain decomposition destructor
subroutine sll_s_deallocate_cartesian_topology_3d()
3D Cartesian topology destructor
type(sll_t_decomposition_slim_6d) function, pointer, public sll_f_new_cartesian_cell_domain_decomposition_slim_6d(topology, n_cells, degree)
type(sll_t_decomposition_6d) function, pointer, public sll_f_new_cartesian_domain_decomposition_6d(topology, grid_size, halo_width)
subroutine sll_f_apply_halo_exchange_6d_real64(topo, decomp, arr, dim_mask_in)
type(sll_t_decomposition_slim_3d) function, pointer, public sll_f_new_cartesian_domain_decomposition_slim_3d(topology, grid_size)
type(sll_t_cartesian_topology_3d) function, pointer, public sll_f_new_cartesian_topology_3d(top_collective, procs_per_dimension, periodic)
3D Cartesian topology constructor function
subroutine sll_s_copy_array_to_buffer_3d_real64(arr, arr_lo, arr_hi, buf, ranges, n_threads)
logical function, dimension(6), public sll_f_select_dim(id)
subroutine sll_s_deallocate_cartesian_domain_decomposition_slim_3d()
3D Cartesian slim domain decomposition destructor
subroutine get_transposed_process_map(procs_per_dimension, rank_map)
Returns a mpi rank table with the processes transposed.
subroutine copy_buffer_to_array_6d_real64(buf, arr, arr_lo, arr_hi, ranges, n_threads)
subroutine, public sll_f_apply_halo_exchange_slim_3d_real64(topo, decomp, arr, id, hw_left, hw_right)
subroutine, public sll_s_apply_bc_exchange_slim_6d_real64(topo, decomp, id)
integer function, dimension(6), public sll_f_set_process_grid(mpi_world_size, process_grid_par)
subroutine mpi_sendrecv_compressed(sendbuf, recvbuf, nel, rank_send, rank_recv, mpi_comm, verbose, mpi_tag)
subroutine dump_dd_information(file, topo, decomp)
subroutine sll_s_deallocate_cartesian_domain_decomposition_6d()
6D Cartesian domain decomposition destructor
subroutine, public sll_s_deallocate_bc_buffers(decomp)
subroutine sll_s_apply_halo_exchange_slim_3d_real64(topo, decomp, arr, id, hw_left, hw_right, halo_block)
subroutine, public sll_s_allocate_bc_buffers_6d(decomp, id)
subroutine dump_ascii(file, arr)
subroutine, public sll_f_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right)
subroutine, public sll_s_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right, halo_block)
subroutine, public sll_s_allocate_bc_buffers_6d_part(decomp, id, idx_mn, idx_mx)
subroutine, public sll_s_copy_array_to_buffer_6d_real64(arr, arr_lo, arr_hi, buf, ranges, n_threads)
type(sll_t_cartesian_topology_3d) function, pointer, public sll_f_new_cartesian_topology_3d_orthogonal(topo_6d, topo_3d)
type(sll_t_decomposition_slim_6d) function, pointer, public sll_f_new_cartesian_domain_decomposition_slim_6d(topology, grid_size)
subroutine sll_s_deallocate_cartesian_domain_decomposition_3d()
3D Cartesian domain decomposition destructor
subroutine sll_s_deallocate_cartesian_cell_domain_decomposition_slim_6d()
6D Cartesian slim domain decomposition destructor
subroutine dump_ascii_6d(file, decomp, arr)
subroutine, public sll_s_mpi_sendrecv_compressed_core(comp_send, comp_recv, rank_send, rank_recv, mpi_comm, verbose, mpi_tag)
MPI sendrecv functionality, wrapped for a compressed buffer.
subroutine dummy_mempool()
subroutine mpi_sendrecv_compressed_6d_real64(sendbuf, recvbuf, nel, rank_send, rank_recv, mpi_comm, verbose, mpi_tag)
type(sll_t_cartesian_topology_6d) function, pointer, public sll_f_new_cartesian_topology_6d(top_collective, procs_per_dimension, periodic)
6D Cartesian topology constructor function
3D decomposition, index limits local to an MPI process.
6D decomposition, index limits local to an MPI process.
Some common numerical utilities.
logical function, public sll_f_query_environment(env_variable, default_value)
Query an environment variable for the values on,off,1,0,true,false and return the result as a logical...
Wrapper around the communicator.
6D decomposition, "slim" redesign with dynamic halo cells
Information on the 3D cartesian process topology.
Information on the 6D cartesian process topology.
3D decomposition, global array size information and local information.
6D decomposition, global array size information and local information.
6D decomposition, slim redesign, global array size information and local information.