11 #ifdef USE_HALO_REAL32
12 #define HALO_DTYPE sll_real32
14 #define HALO_DTYPE sll_real64
26 #include "sll_assert.h"
27 #include "sll_errors.h"
28 #include "sll_working_precision.h"
59 #define OMP_COLLAPSE collapse(2)
60 #define OMP_SCHEDULE schedule(static)
84 sll_real64,
allocatable :: displacement_eta1(:)
85 sll_real64,
allocatable :: displacement_eta2(:)
86 sll_real64,
allocatable :: displacement_eta3(:)
88 sll_int32 ,
allocatable :: halo_blocks_eta1(:,:,:)
89 sll_int32 ,
allocatable :: halo_blocks_eta2(:,:,:)
90 sll_int32 ,
allocatable :: halo_blocks_eta3(:,:,:)
92 sll_int32 ,
allocatable :: halo_blocks5d_eta2(:,:,:)
93 sll_int32 ,
allocatable :: halo_blocks5d_eta3(:,:,:)
95 sll_int32 ,
allocatable :: halo_width_eta1(:,:)
96 sll_int32 ,
allocatable :: halo_width_eta2(:,:)
97 sll_int32 ,
allocatable :: halo_width_eta3(:,:)
99 sll_int32,
allocatable :: idisplacement_eta1(:)
100 sll_int32,
allocatable :: idisplacement_eta2(:)
101 sll_int32,
allocatable :: idisplacement_eta3(:)
103 sll_int32 :: n_halo_blocks(3)
112 if (
allocated(self%displacement_eta1))
deallocate(self%displacement_eta1)
113 if (
allocated(self%displacement_eta2))
deallocate(self%displacement_eta2)
114 if (
allocated(self%displacement_eta3))
deallocate(self%displacement_eta3)
116 if (
allocated(self%halo_blocks_eta1))
deallocate(self%halo_blocks_eta1)
117 if (
allocated(self%halo_blocks_eta2))
deallocate(self%halo_blocks_eta2)
118 if (
allocated(self%halo_blocks_eta3))
deallocate(self%halo_blocks_eta3)
120 if (
allocated(self%halo_blocks5d_eta2))
deallocate(self%halo_blocks5d_eta2)
121 if (
allocated(self%halo_blocks5d_eta3))
deallocate(self%halo_blocks5d_eta3)
123 if (
allocated(self%halo_width_eta1))
deallocate(self%halo_width_eta1)
124 if (
allocated(self%halo_width_eta2))
deallocate(self%halo_width_eta2)
125 if (
allocated(self%halo_width_eta3))
deallocate(self%halo_width_eta3)
127 if (
allocated(self%idisplacement_eta1))
deallocate(self%idisplacement_eta1)
128 if (
allocated(self%idisplacement_eta2))
deallocate(self%idisplacement_eta2)
129 if (
allocated(self%idisplacement_eta3))
deallocate(self%idisplacement_eta3)
134 displacement_eta2, displacement_eta3)
138 sll_real64,
intent(in) :: displacement_eta1(:)
139 sll_real64,
intent(in) :: displacement_eta2(:)
140 sll_real64,
intent(in) :: displacement_eta3(:)
142 allocate(self%displacement_eta1(decomposition%local%mn(4):decomposition%local%mx(4)))
143 self%displacement_eta1 = displacement_eta1
144 allocate(self%displacement_eta2(decomposition%local%mn(5):decomposition%local%mx(5)))
145 self%displacement_eta2 = displacement_eta2
146 allocate(self%displacement_eta3(decomposition%local%mn(6):decomposition%local%mx(6)))
147 self%displacement_eta3 = displacement_eta3
150 self%displacement_eta1, self%idisplacement_eta1, self%halo_blocks_eta1, &
151 self%halo_width_eta1, self%n_halo_blocks(1) )
153 self%displacement_eta2, self%idisplacement_eta2, self%halo_blocks_eta2, &
154 self%halo_width_eta2, self%n_halo_blocks(2) )
156 self%displacement_eta3, self%idisplacement_eta3, self%halo_blocks_eta3, &
157 self%halo_width_eta3, self%n_halo_blocks(3) )
159 allocate(self%halo_blocks5d_eta2(5, 2, self%n_halo_blocks(2)))
160 allocate(self%halo_blocks5d_eta3(5, 2, self%n_halo_blocks(3)))
162 self%halo_blocks5d_eta2(1,:,:) = self%halo_blocks_eta2(1,:,:)
163 self%halo_blocks5d_eta2(2:5,:,:) = self%halo_blocks_eta2(3:6,:,:)
164 self%halo_blocks5d_eta3(1:2,:,:) = self%halo_blocks_eta3(1:2,:,:)
165 self%halo_blocks5d_eta3(3:5,:,:) = self%halo_blocks_eta3(4:6,:,:)
166 #ifdef DISABLE_CACHE_BLOCKING
167 write(*,*)
"sll_m_advection_6d_spline_dd_slim :: cache blocking disabled"
175 sll_int32,
intent(in),
optional :: id_in
176 sll_int32 ::
get_wx, wx, id
177 #ifndef DISABLE_CACHE_BLOCKING
179 if (
present(id_in))
then
193 wx = decomposition%local%nw(id)
202 subroutine make_blocks_spline( ind, decomposition, disp, disp_int, halo_blocks , halo_width, n_halo_blocks)
203 sll_int32,
intent( in ) :: ind
205 sll_real64,
intent( inout ) :: disp(decomposition%local%mn(ind):decomposition%local%mx(ind))
206 sll_int32,
intent( out ),
allocatable :: disp_int(:)
207 sll_int32,
intent( out ),
allocatable :: halo_blocks(:,:,:)
208 sll_int32,
intent( out ),
allocatable :: halo_width(:,:)
209 sll_int32,
intent( out ) :: n_halo_blocks
211 sll_int32 :: index_range(2)
212 sll_int32 :: blocks, j, bl
213 sll_int32 :: box1, box2
216 index_range = [ decomposition%local%mn(ind), decomposition%local%mx(ind) ]
221 if ( abs(disp(bl) ) == 0.0_f64) bl = bl+1
222 box1 = floor( disp(bl) )
224 if ( abs(disp(bl) ) == 0.0_f64) bl = bl-1
225 box2 = floor( disp(bl) )
228 blocks = abs(box2-box1)+1
231 allocate( halo_blocks(6, 2, blocks) )
232 allocate( halo_width(2, blocks) )
233 allocate( disp_int( blocks ) )
235 halo_blocks(:, 1, j) = decomposition%local%mn
236 halo_blocks(:, 2, j) = decomposition%local%mx
240 if (box1 > box2 )
then
243 if ( abs(disp(j)) == 0.0_f64 ) j = j+1
245 halo_width(1, bl ) = -si
246 halo_width(2, bl ) = si+1
248 halo_blocks(ind, 1, bl ) = j
249 do while( disp(j) > box1-bl+1 )
251 if ( j > index_range(2))
exit
253 if ( abs(disp(j-1)) == 0.0_f64 )
then
254 halo_blocks(ind, 2, bl ) = j-2
256 halo_blocks(ind, 2, bl ) = j-1
262 if ( abs(disp(j)) == 0.0_f64 ) j = j+1
263 halo_width(1, bl-box1+1) = -bl
264 halo_width(2, bl-box1+1) = bl+1
265 disp_int( bl-box1+1 ) = bl
266 halo_blocks(ind, 1, bl-box1+1) = j
267 do while ( (disp(j) < bl+1) )
269 if ( j > index_range(2) )
exit
271 if ( abs(disp(j-1)) == 0.0_f64 )
then
272 halo_blocks(ind, 2, bl-box1+1) = j-2
274 halo_blocks(ind, 2, bl-box1+1) = j-1
279 n_halo_blocks = blocks
282 do j= index_range(1), index_range(2)
283 disp(j) = disp(j) - real( floor( disp(j)), f64)
284 sll_assert( disp(j) >= 0.0_f64 )
285 sll_assert( disp(j) <= 1.0_f64 )
295 sll_real64,
intent(inout) :: f6d(&
296 decomposition%local%mn(1):decomposition%local%mx(1), &
297 decomposition%local%mn(2):decomposition%local%mx(2), &
298 decomposition%local%mn(3):decomposition%local%mx(3), &
299 decomposition%local%mn(4):decomposition%local%mx(4), &
300 decomposition%local%mn(5):decomposition%local%mx(5), &
301 decomposition%local%mn(6):decomposition%local%mx(6))
303 sll_int32,
parameter :: id = 1
304 sll_int32 :: i,j,k,l,m,n,w,wx,bl
305 sll_int32 :: num_points
306 sll_real64,
allocatable :: buf_i(:,:)
307 sll_real64,
allocatable :: buf_o(:,:)
308 sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
309 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
310 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
311 halo_dtype,
allocatable :: d0(:), c_np2(:)
312 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
314 c_mn = decomposition%local%mn(id)
315 c_mx = decomposition%local%mx(id)
316 num_points = decomposition%local%nw(id)
317 loop_mn => decomposition%local%mn
318 loop_mx => decomposition%local%mx
321 wx =
get_wx(decomposition, 2)
323 do bl=1, self%n_halo_blocks(id)
325 l_mn = c_mn-self%halo_width_eta1(1, bl)
328 r_mx = c_mx+self%halo_width_eta1(2, bl)
332 self%halo_blocks_eta1(2:6,1,bl), self%halo_blocks_eta1(2:6,2,bl))
335 allocate(c_np2(0:wx-1))
338 do n=loop_mn(6), loop_mx(6)
339 do m=loop_mn(5), loop_mx(5)
340 do l=self%halo_blocks_eta1(id+3, 1, bl), self%halo_blocks_eta1(id+3, 2, bl)
341 do k=loop_mn(3), loop_mx(3)
342 #ifndef DISABLE_CACHE_BLOCKING
343 do j=loop_mn(2), loop_mx(2), wx
347 f6d(:,j+w,k,l,m,n), self%idisplacement_eta1(bl), &
348 num_points, d0(w), c_np2(w))
351 decomposition%local%bc_left_send(j+w,k,l,m,n) = c_np2(w)
354 decomposition%local%bc_right_send(j+w,k,l,m,n) = d0(w)
358 do j=loop_mn(2), loop_mx(2)
361 f6d(:,j,k,l,m,n), self%idisplacement_eta1(bl), &
362 num_points, d0(0), c_np2(0) )
363 decomposition%local%bc_left_send(j,k,l,m,n) = c_np2(0)
364 decomposition%local%bc_right_send(j,k,l,m,n) = d0(0)
382 self%halo_width_eta1(1, bl), &
383 self%halo_width_eta1(2, bl), &
384 self%halo_blocks_eta1(:,:,bl))
385 l_halo => decomposition%local%halo_left%buf
386 r_halo => decomposition%local%halo_right%buf
388 allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
389 allocate(buf_o(l_mn-1:r_mx,0:wx-1))
391 do n=loop_mn(6), loop_mx(6)
392 do m=loop_mn(5), loop_mx(5)
393 do l=self%halo_blocks_eta1(id+3, 1, bl), self%halo_blocks_eta1(id+3, 2, bl)
394 do k=loop_mn(3), loop_mx(3)
395 #ifndef DISABLE_CACHE_BLOCKING
396 do j=loop_mn(2), loop_mx(2), wx
400 buf_i(i,w) = f6d(i,j+w,k,l,m,n)
407 buf_i(c_mn:c_mx,w), &
408 self%idisplacement_eta1(bl), num_points, &
409 decomposition%local%bc_left(j+w,k,l,m,n), &
410 decomposition%local%bc_right(j+w,k,l,m,n))
415 buf_i(l_mn-1,w) = decomposition%local%bc_left(j+w,k,l,m,n)
420 if ( l_mn <= l_mx )
then
424 buf_i(i,w) = l_halo(i,j+w,k,l,m,n)
431 buf_i(i,w) = r_halo(i,j+w,k,l,m,n)
436 buf_i(r_mx+1,w) = decomposition%local%bc_right(j+w,k,l,m,n)
443 buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w) )
450 buf_i(:,w), self%displacement_eta1(l), num_points, buf_o(:,w))
457 f6d(c_mn+i,j+w,k,l,m,n) = buf_o(l_mn-1+i,w)
462 do j=loop_mn(2), loop_mx(2)
465 buf_i(i,0) = f6d(i,j,k,l,m,n)
470 buf_i(c_mn:c_mx,0), &
471 self%idisplacement_eta1( bl ), num_points, &
472 decomposition%local%bc_left(j,k,l,m,n), &
473 decomposition%local%bc_right(j,k,l,m,n))
475 buf_i(l_mn-1,0) = decomposition%local%bc_left(j,k,l,m,n)
477 if ( l_mn <= l_mx )
then
480 buf_i(i,0) = l_halo(i,j,k,l,m,n)
485 buf_i(i,0) = r_halo(i,j,k,l,m,n)
488 buf_i(r_mx+1,0) = decomposition%local%bc_right(j,k,l,m,n)
492 buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
496 buf_i(:,0), self%displacement_eta1(l), num_points, buf_o(:,0))
500 f6d(c_mn+i,j,k,l,m,n) = buf_o(l_mn-1+i,0)
523 sll_real64,
intent(inout) :: f6d(&
524 decomposition%local%mn(1):decomposition%local%mx(1), &
525 decomposition%local%mn(2):decomposition%local%mx(2), &
526 decomposition%local%mn(3):decomposition%local%mx(3), &
527 decomposition%local%mn(4):decomposition%local%mx(4), &
528 decomposition%local%mn(5):decomposition%local%mx(5), &
529 decomposition%local%mn(6):decomposition%local%mx(6))
531 sll_int32,
parameter :: id = 2
532 sll_int32 :: i,j,k,l,m,n,bl,w
533 sll_int32 :: num_points
534 sll_real64,
allocatable :: buf_i(:,:)
535 sll_real64,
allocatable :: buf_o(:,:)
536 sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
537 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
538 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
540 halo_dtype,
allocatable :: d0(:), c_np2(:)
541 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
543 c_mn = decomposition%local%mn(id)
544 c_mx = decomposition%local%mx(id)
545 num_points = decomposition%local%nw(id)
546 loop_mn => decomposition%local%mn
547 loop_mx => decomposition%local%mx
548 wx =
get_wx(decomposition)
550 do bl=1, self%n_halo_blocks(id)
553 self%halo_blocks5d_eta2(:,1,bl), self%halo_blocks5d_eta2(:,2,bl))
554 l_mn = c_mn-self%halo_width_eta2(1, bl)
557 r_mx = c_mx+self%halo_width_eta2(2, bl)
560 allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
561 allocate(buf_o(l_mn-1:r_mx,0:wx-1))
563 allocate(c_np2(0:wx-1))
566 do n=loop_mn(6), loop_mx(6)
567 do m=self%halo_blocks_eta2(id+3, 1, bl), self%halo_blocks_eta2(id+3, 2, bl)
568 do l=loop_mn(4), loop_mx(4)
569 do k=loop_mn(3), loop_mx(3)
570 #ifndef DISABLE_CACHE_BLOCKING
571 do i=loop_mn(1), loop_mx(1), wx
575 buf_i(j,w) = f6d(i+w,j,k,l,m,n)
581 buf_i(c_mn:c_mx,w), self%idisplacement_eta2(bl), &
582 num_points, d0(w), c_np2(w) )
585 decomposition%local%bc_left_send(i+w,k,l,m,n) = c_np2(w)
588 decomposition%local%bc_right_send(i+w,k,l,m,n) = d0(w)
592 do i=loop_mn(1), loop_mx(1)
595 buf_i(j,0) = f6d(i,j,k,l,m,n)
599 buf_i(c_mn:c_mx,0), self%idisplacement_eta2( bl), &
600 num_points, d0(0), c_np2(0))
601 decomposition%local%bc_left_send(i,k,l,m,n) = c_np2(0)
602 decomposition%local%bc_right_send(i,k,l,m,n) = d0(0)
620 self%halo_width_eta2(1, bl), &
621 self%halo_width_eta2(2, bl), &
622 self%halo_blocks_eta2(:,:,bl))
623 l_halo => decomposition%local%halo_left%buf
624 r_halo => decomposition%local%halo_right%buf
627 do n=loop_mn(6), loop_mx(6)
628 do m=self%halo_blocks_eta2(id+3, 1, bl), self%halo_blocks_eta2(id+3, 2, bl)
629 do l=loop_mn(4), loop_mx(4)
630 do k=loop_mn(3), loop_mx(3)
631 #ifndef DISABLE_CACHE_BLOCKING
632 do i=loop_mn(1), loop_mx(1), wx
636 buf_i(j,w) = f6d(i+w,j,k,l,m,n)
643 buf_i(c_mn:c_mx,w), self%idisplacement_eta2( bl ), num_points, &
644 decomposition%local%bc_left(i+w,k,l,m,n), &
645 decomposition%local%bc_right(i+w,k,l,m,n) )
649 buf_i(l_mn-1,w) = decomposition%local%bc_left(i+w,k,l,m,n)
652 if ( l_mn <= l_mx )
then
656 buf_i(j,w) = l_halo(i+w,j,k,l,m,n)
663 buf_i(j,w) = r_halo(i+w,j,k,l,m,n)
668 buf_i(r_mx+1,w) = decomposition%local%bc_right(i+w,k,l,m,n)
674 buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w) )
680 buf_i(:,w), self%displacement_eta2(m), num_points, buf_o(:,w))
686 f6d(i+w,c_mn+j,k,l,m,n) = buf_o(l_mn-1+j,w)
691 do i=loop_mn(1), loop_mx(1)
694 buf_i(j,0) = f6d(i,j,k,l,m,n)
699 buf_i(c_mn:c_mx,0), self%idisplacement_eta2( bl ), num_points, &
700 decomposition%local%bc_left(i,k,l,m,n), &
701 decomposition%local%bc_right(i,k,l,m,n) )
703 buf_i(l_mn-1,0) = decomposition%local%bc_left(i,k,l,m,n)
705 if ( l_mn <= l_mx )
then
708 buf_i(j,0) = l_halo(i,j,k,l,m,n)
713 buf_i(j,0) = r_halo(i,j,k,l,m,n)
716 buf_i(r_mx+1,0) = decomposition%local%bc_right(i,k,l,m,n)
720 buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
724 buf_i(:,0), self%displacement_eta2(m), num_points, buf_o(:,0))
728 f6d(i,c_mn+j,k,l,m,n) = buf_o(l_mn-1+j,0)
750 sll_real64,
intent(inout) :: f6d(&
751 decomposition%local%mn(1):decomposition%local%mx(1), &
752 decomposition%local%mn(2):decomposition%local%mx(2), &
753 decomposition%local%mn(3):decomposition%local%mx(3), &
754 decomposition%local%mn(4):decomposition%local%mx(4), &
755 decomposition%local%mn(5):decomposition%local%mx(5), &
756 decomposition%local%mn(6):decomposition%local%mx(6))
758 sll_int32,
parameter :: id = 3
759 sll_int32 :: i,j,k,l,m,n,bl,w
760 sll_int32 :: num_points
761 sll_real64,
allocatable :: buf_i(:,:)
762 sll_real64,
allocatable :: buf_o(:,:)
763 sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
764 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
765 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
767 halo_dtype,
allocatable :: d0(:), c_np2(:)
768 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
770 c_mn = decomposition%local%mn(id)
771 c_mx = decomposition%local%mx(id)
772 num_points = decomposition%local%nw(id)
773 loop_mn => decomposition%local%mn
774 loop_mx => decomposition%local%mx
775 wx =
get_wx(decomposition)
777 do bl=1, self%n_halo_blocks(id)
780 self%halo_blocks5d_eta3(:,1,bl), self%halo_blocks5d_eta3(:,2,bl))
781 l_mn = c_mn-self%halo_width_eta3(1, bl)
784 r_mx = c_mx+self%halo_width_eta3(2, bl)
787 allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
788 allocate(buf_o(l_mn-1:r_mx,0:wx-1))
790 allocate(c_np2(0:wx-1))
793 do n=self%halo_blocks_eta3(id+3, 1, bl), self%halo_blocks_eta3(id+3, 2, bl)
794 do m=loop_mn(5), loop_mx(5)
795 do l=loop_mn(4), loop_mx(4)
796 do j=loop_mn(2), loop_mx(2)
797 #ifndef DISABLE_CACHE_BLOCKING
798 do i=loop_mn(1), loop_mx(1), wx
802 buf_i(k,w) = f6d(i+w,j,k,l,m,n)
808 buf_i(c_mn:c_mx,w), self%idisplacement_eta3(bl), &
809 num_points, d0(w), c_np2(w))
812 decomposition%local%bc_left_send(i+w,j,l,m,n) = c_np2(w)
815 decomposition%local%bc_right_send(i+w,j,l,m,n) = d0(w)
819 do i=loop_mn(1), loop_mx(1)
822 buf_i(k,0) = f6d(i,j,k,l,m,n)
826 buf_i(c_mn:c_mx,0), self%idisplacement_eta3(bl), &
827 num_points, d0(0), c_np2(0) )
828 decomposition%local%bc_left_send(i,j,l,m,n) = c_np2(0)
829 decomposition%local%bc_right_send(i,j,l,m,n) = d0(0)
847 self%halo_width_eta3(1, bl), &
848 self%halo_width_eta3(2, bl), &
849 self%halo_blocks_eta3(:,:,bl))
850 l_halo => decomposition%local%halo_left%buf
851 r_halo => decomposition%local%halo_right%buf
854 do n=self%halo_blocks_eta3(id+3, 1, bl), self%halo_blocks_eta3(id+3, 2, bl)
855 do m=loop_mn(5), loop_mx(5)
856 do l=loop_mn(4), loop_mx(4)
857 do j=loop_mn(2), loop_mx(2)
858 #ifndef DISABLE_CACHE_BLOCKING
859 do i=loop_mn(1), loop_mx(1), wx
863 buf_i(k,w) = f6d(i+w,j,k,l,m,n)
870 buf_i(c_mn:c_mx,w), self%idisplacement_eta3( bl ), num_points, &
871 decomposition%local%bc_left(i+w,j,l,m,n), &
872 decomposition%local%bc_right(i+w,j,l,m,n) )
876 buf_i(l_mn-1,w) = decomposition%local%bc_left(i+w,j,l,m,n)
879 if ( l_mn <= l_mx )
then
883 buf_i(k,w) = l_halo(i+w,j,k,l,m,n)
890 buf_i(k,w) = r_halo(i+w,j,k,l,m,n)
895 buf_i(r_mx+1,w) = decomposition%local%bc_right(i+w,j,l,m,n)
901 buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w))
908 buf_i(:,w), self%displacement_eta3(n), num_points, buf_o(:,w))
914 f6d(i+w,j,c_mn+k,l,m,n) = buf_o(l_mn-1+k,w)
919 do i=loop_mn(1), loop_mx(1)
922 buf_i(k,0) = f6d(i,j,k,l,m,n)
927 buf_i(c_mn:c_mx,0), self%idisplacement_eta3( bl ), num_points, &
928 decomposition%local%bc_left(i,j,l,m,n), &
929 decomposition%local%bc_right(i,j,l,m,n) )
931 buf_i(l_mn-1,0) = decomposition%local%bc_left(i,j,l,m,n)
933 if ( l_mn <= l_mx )
then
936 buf_i(k,0) = l_halo(i,j,k,l,m,n)
941 buf_i(k,0) = r_halo(i,j,k,l,m,n)
944 buf_i(r_mx+1,0) = decomposition%local%bc_right(i,j,l,m,n)
948 buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
953 buf_i(:,0), self%displacement_eta3(n), num_points, buf_o(:,0))
958 f6d(i,j,c_mn+k,l,m,n) = buf_o(l_mn-1+k,0)
980 sll_real64,
intent(in) :: displacement(&
981 decomposition%local%mn(1):decomposition%local%mx(1), &
982 decomposition%local%mn(2):decomposition%local%mx(2), &
983 decomposition%local%mn(3):decomposition%local%mx(3))
984 sll_real64,
intent(inout) :: f6d(&
985 decomposition%local%mn(1):decomposition%local%mx(1), &
986 decomposition%local%mn(2):decomposition%local%mx(2), &
987 decomposition%local%mn(3):decomposition%local%mx(3), &
988 decomposition%local%mn(4):decomposition%local%mx(4), &
989 decomposition%local%mn(5):decomposition%local%mx(5), &
990 decomposition%local%mn(6):decomposition%local%mx(6))
992 sll_int32,
parameter :: id = 4
993 sll_int32 :: i,j,k,l,m,n,w
995 sll_real64,
allocatable :: buf_i(:,:)
996 sll_real64,
allocatable :: buf_o(:,:)
997 sll_int32 :: n_loc, ind_max
998 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
999 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
1000 sll_int32,
allocatable :: si(:)
1001 sll_int32,
allocatable :: indm(:)
1002 sll_real64,
allocatable :: alpha(:)
1004 halo_dtype,
allocatable :: d0(:), c_np2(:)
1005 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
1010 c_mn = decomposition%local%mn(id)
1011 n_loc = decomposition%local%nw(id)
1012 loop_mn => decomposition%local%mn
1013 loop_mx => decomposition%local%mx
1015 wx =
get_wx(decomposition)
1020 allocate(buf_i(1:ind_max, 0:wx-1))
1021 allocate(buf_o(1:ind_max-1, 0:wx-1))
1022 allocate(si(0:wx-1))
1023 allocate(indm(0:wx-1))
1024 allocate(alpha(0:wx-1))
1025 allocate(d0(0:wx-1))
1026 allocate(c_np2(0:wx-1))
1029 do n=loop_mn(6), loop_mx(6)
1030 do m=loop_mn(5), loop_mx(5)
1031 do k=loop_mn(3), loop_mx(3)
1032 do j=loop_mn(2), loop_mx(2)
1033 #ifndef DISABLE_CACHE_BLOCKING
1034 do i=loop_mn(1), loop_mx(1), wx
1038 buf_i(1+l,w) = f6d(i+w,j,k,c_mn+l,m,n)
1042 si(w) = floor(displacement(i+w,j,k))
1047 buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1050 decomposition%local%bc_left_send(i+w,j,k,m,n) = c_np2(w)
1053 decomposition%local%bc_right_send(i+w,j,k,m,n) = d0(w)
1057 do i=loop_mn(1), loop_mx(1)
1058 si(0) = floor( displacement(i,j,k) )
1061 buf_i(1+l,0) = f6d(i,j,k,c_mn+l,m,n)
1065 buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1066 decomposition%local%bc_left_send(i,j,k,m,n) = c_np2(0)
1067 decomposition%local%bc_right_send(i,j,k,m,n) = d0(0)
1088 l_halo => decomposition%local%halo_left%buf
1089 r_halo => decomposition%local%halo_right%buf
1092 do n=loop_mn(6), loop_mx(6)
1093 do m=loop_mn(5), loop_mx(5)
1094 do k=loop_mn(3), loop_mx(3)
1095 do j=loop_mn(2), loop_mx(2)
1096 #ifndef DISABLE_CACHE_BLOCKING
1097 do i=loop_mn(1), loop_mx(1), wx
1099 si(w) = floor( displacement(i+w,j,k) )
1102 alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1105 if ( si(w) == 0 )
then
1107 buf_i(n_loc+2,w) = r_halo(i+w,j,k,loop_mx(4)+1,m,n)
1110 buf_i(indm(w),w) = l_halo(i+w,j,k,c_mn-1,m,n)
1116 buf_i(indm(w)+1+l,w) = f6d(i+w,j,k,c_mn+l,m,n)
1123 buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1124 decomposition%local%bc_left(i+w,j,k,m,n), &
1125 decomposition%local%bc_right(i+w,j,k,m,n) )
1128 buf_i(1, w) = decomposition%local%bc_left(i+w,j,k,m,n)
1131 buf_i(ind_max, w) = decomposition%local%bc_right(i+w,j,k,m,n)
1137 buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1143 buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1149 f6d(i+w,j,k,c_mn+l,m,n) = buf_o(1+l,w)
1154 do i=loop_mn(1), loop_mx(1)
1155 si(0) = floor( displacement(i,j,k) )
1156 alpha(0) = displacement(i,j,k) - real(si(0), f64)
1157 if ( si(0) == 0 )
then
1159 buf_i(n_loc+2,0) = r_halo(i,j,k,loop_mx(4)+1,m,n)
1162 buf_i(indm(0),0) = l_halo(i,j,k,c_mn-1,m,n)
1166 buf_i(indm(0)+1+l,0) = f6d(i,j,k,c_mn+l,m,n)
1171 buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1172 decomposition%local%bc_left(i,j,k,m,n), &
1173 decomposition%local%bc_right(i,j,k,m,n) )
1174 buf_i( 1, 0 ) = decomposition%local%bc_left(i,j,k,m,n)
1175 buf_i( ind_max, 0 ) = decomposition%local%bc_right(i,j,k,m,n)
1179 buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1183 buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1187 f6d(i,j,k,c_mn+l,m,n) = buf_o(1+l,0)
1211 sll_real64,
intent(in) :: displacement(&
1212 decomposition%local%mn(1):decomposition%local%mx(1), &
1213 decomposition%local%mn(2):decomposition%local%mx(2), &
1214 decomposition%local%mn(3):decomposition%local%mx(3))
1215 sll_real64,
intent(inout) :: f6d(&
1216 decomposition%local%mn(1):decomposition%local%mx(1), &
1217 decomposition%local%mn(2):decomposition%local%mx(2), &
1218 decomposition%local%mn(3):decomposition%local%mx(3), &
1219 decomposition%local%mn(4):decomposition%local%mx(4), &
1220 decomposition%local%mn(5):decomposition%local%mx(5), &
1221 decomposition%local%mn(6):decomposition%local%mx(6))
1223 sll_int32,
parameter :: id = 5
1224 sll_int32 :: i,j,k,l,m,n,w
1225 sll_real64,
allocatable :: buf_i(:,:)
1226 sll_real64,
allocatable :: buf_o(:,:)
1227 sll_int32 :: n_loc, ind_max
1228 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
1229 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
1230 sll_int32,
allocatable :: si(:)
1231 sll_int32,
allocatable :: indm(:)
1232 sll_real64,
allocatable :: alpha(:)
1235 halo_dtype,
allocatable :: d0(:), c_np2(:)
1236 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
1241 c_mn = decomposition%local%mn(id)
1242 n_loc = decomposition%local%nw(id)
1243 loop_mn => decomposition%local%mn
1244 loop_mx => decomposition%local%mx
1246 wx =
get_wx(decomposition)
1249 allocate(buf_i(1:ind_max, 0:wx-1))
1250 allocate(buf_o(1:ind_max-1, 0:wx-1))
1251 allocate(si(0:wx-1))
1252 allocate(indm(0:wx-1))
1253 allocate(alpha(0:wx-1))
1254 allocate(d0(0:wx-1))
1255 allocate(c_np2(0:wx-1))
1258 do n=loop_mn(6), loop_mx(6)
1259 do l=loop_mn(4), loop_mx(4)
1260 do k=loop_mn(3), loop_mx(3)
1261 do j=loop_mn(2), loop_mx(2)
1262 #ifndef DISABLE_CACHE_BLOCKING
1263 do i=loop_mn(1), loop_mx(1), wx
1265 si(w) = floor( displacement(i+w,j,k) )
1270 buf_i(1+m,w) = f6d(i+w,j,k,l,c_mn+m,n)
1276 buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1279 decomposition%local%bc_left_send(i+w,j,k,l,n) = c_np2(w)
1282 decomposition%local%bc_right_send(i+w,j,k,l,n) = d0(w)
1286 do i=loop_mn(1), loop_mx(1)
1287 si(0) = floor( displacement(i,j,k) )
1290 buf_i(1+m,0) = f6d(i,j,k,l,c_mn+m,n)
1294 buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1295 decomposition%local%bc_left_send(i,j,k,l,n) = c_np2(0)
1296 decomposition%local%bc_right_send(i,j,k,l,n) = d0(0)
1317 l_halo => decomposition%local%halo_left%buf
1318 r_halo => decomposition%local%halo_right%buf
1321 do n=loop_mn(6), loop_mx(6)
1322 do l=loop_mn(4), loop_mx(4)
1323 do k=loop_mn(3), loop_mx(3)
1324 do j=loop_mn(2), loop_mx(2)
1325 #ifndef DISABLE_CACHE_BLOCKING
1326 do i=loop_mn(1), loop_mx(1), wx
1328 si(w) = floor( displacement(i+w,j,k) )
1331 alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1334 if ( si(w) == 0 )
then
1336 buf_i(n_loc+2,w) = r_halo(i+w,j,k,l,loop_mx(5)+1,n)
1339 buf_i(indm(w),w) = l_halo(i+w,j,k,l,c_mn-1,n)
1345 buf_i(indm(w)+1+m,w) = f6d(i+w,j,k,l,c_mn+m,n)
1353 buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1354 decomposition%local%bc_left(i+w,j,k,l,n), &
1355 decomposition%local%bc_right(i+w,j,k,l,n) )
1358 buf_i(1,w) = decomposition%local%bc_left(i+w,j,k,l,n)
1361 buf_i(ind_max,w) = decomposition%local%bc_right(i+w,j,k,l,n)
1367 buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1373 buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1379 f6d(i+w,j,k,l,c_mn+m,n) = buf_o(1+m,w)
1384 do i=loop_mn(1), loop_mx(1)
1385 si(0) = floor( displacement(i,j,k) )
1386 alpha(0) = displacement(i,j,k) - real(si(0), f64)
1387 if ( si(0) == 0 )
then
1389 buf_i(n_loc+2,0) = r_halo(i,j,k,l,loop_mx(5)+1,n)
1392 buf_i(indm(0),0) = l_halo(i,j,k,l,c_mn-1,n)
1397 buf_i(indm(0)+1+m,0) = f6d(i,j,k,l,c_mn+m,n)
1403 buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1404 decomposition%local%bc_left(i,j,k,l,n), &
1405 decomposition%local%bc_right(i,j,k,l,n) )
1406 buf_i(1,0) = decomposition%local%bc_left(i,j,k,l,n)
1407 buf_i(ind_max,0) = decomposition%local%bc_right(i,j,k,l,n)
1411 buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1415 buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1419 f6d(i,j,k,l,c_mn+m,n) = buf_o(1+m,0)
1443 sll_real64,
intent(in) :: displacement(&
1444 decomposition%local%mn(1):decomposition%local%mx(1), &
1445 decomposition%local%mn(2):decomposition%local%mx(2), &
1446 decomposition%local%mn(3):decomposition%local%mx(3))
1447 sll_real64,
intent(inout) :: f6d(&
1448 decomposition%local%mn(1):decomposition%local%mx(1), &
1449 decomposition%local%mn(2):decomposition%local%mx(2), &
1450 decomposition%local%mn(3):decomposition%local%mx(3), &
1451 decomposition%local%mn(4):decomposition%local%mx(4), &
1452 decomposition%local%mn(5):decomposition%local%mx(5), &
1453 decomposition%local%mn(6):decomposition%local%mx(6))
1455 sll_int32,
parameter :: id = 6
1456 sll_int32 :: i,j,k,l,m,n,w
1457 sll_real64,
allocatable :: buf_i(:,:)
1458 sll_real64,
allocatable :: buf_o(:,:)
1459 sll_int32 :: n_loc, ind_max
1460 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
1461 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
1462 sll_int32,
allocatable :: si(:)
1463 sll_int32,
allocatable :: indm(:)
1464 sll_real64,
allocatable :: alpha(:)
1467 halo_dtype,
allocatable :: d0(:), c_np2(:)
1468 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
1473 c_mn = decomposition%local%mn(id)
1474 n_loc = decomposition%local%nw(id)
1475 loop_mn => decomposition%local%mn
1476 loop_mx => decomposition%local%mx
1478 wx =
get_wx(decomposition)
1481 allocate(buf_i(1:ind_max, 0:wx-1))
1482 allocate(buf_o(1:ind_max-1, 0:wx-1))
1483 allocate(si(0:wx-1))
1484 allocate(indm(0:wx-1))
1485 allocate(alpha(0:wx-1))
1486 allocate(d0(0:wx-1))
1487 allocate(c_np2(0:wx-1))
1490 do m=loop_mn(5), loop_mx(5)
1491 do l=loop_mn(4), loop_mx(4)
1492 do k=loop_mn(3), loop_mx(3)
1493 do j=loop_mn(2), loop_mx(2)
1494 #ifndef DISABLE_CACHE_BLOCKING
1495 do i=loop_mn(1), loop_mx(1), wx
1497 si(w) = floor(displacement(i+w,j,k))
1502 buf_i(1+n,w) = f6d(i+w,j,k,l,m,c_mn+n)
1508 buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1511 decomposition%local%bc_left_send(i+w,j,k,l,m) = c_np2(w)
1514 decomposition%local%bc_right_send(i+w,j,k,l,m) = d0(w)
1518 do i=loop_mn(1), loop_mx(1)
1519 si(0) = floor( displacement(i,j,k) )
1522 buf_i(1+n,0) = f6d(i,j,k,l,m,c_mn+n)
1526 buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1527 decomposition%local%bc_left_send(i,j,k,l,m) = c_np2(0)
1528 decomposition%local%bc_right_send(i,j,k,l,m) = d0(0)
1549 l_halo => decomposition%local%halo_left%buf
1550 r_halo => decomposition%local%halo_right%buf
1553 do m=loop_mn(5), loop_mx(5)
1554 do l=loop_mn(4), loop_mx(4)
1555 do k=loop_mn(3), loop_mx(3)
1556 do j=loop_mn(2), loop_mx(2)
1557 #ifndef DISABLE_CACHE_BLOCKING
1558 do i=loop_mn(1), loop_mx(1), wx
1560 si(w) = floor( displacement(i+w,j,k) )
1563 alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1566 if ( si(w) == 0 )
then
1568 buf_i(n_loc+2,w) = r_halo(i+w,j,k,l,m,loop_mx(6)+1)
1571 buf_i(indm(w),w) = l_halo(i+w,j,k,l,m,c_mn-1)
1578 buf_i(indm(w)+1+n,w) = f6d(i+w,j,k,l,m,c_mn+n)
1586 buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1587 decomposition%local%bc_left(i+w,j,k,l,m), &
1588 decomposition%local%bc_right(i+w,j,k,l,m) )
1591 buf_i(1,w) = decomposition%local%bc_left(i+w,j,k,l,m)
1594 buf_i(ind_max,w) = decomposition%local%bc_right(i+w,j,k,l,m)
1601 buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w))
1608 buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1615 f6d(i+w,j,k,l,m,c_mn+n) = buf_o(1+n,w)
1620 do i=loop_mn(1), loop_mx(1)
1621 si(0) = floor( displacement(i,j,k) )
1622 alpha(0) = displacement(i,j,k) - real(si(0), f64)
1623 if ( si(0) == 0 )
then
1625 buf_i(n_loc+2,0) = r_halo(i,j,k,l,m,loop_mx(6)+1)
1628 buf_i(indm(0),0) = l_halo(i,j,k,l,m,c_mn-1)
1633 buf_i(indm(0)+1+n,0) = f6d(i,j,k,l,m,c_mn+n)
1639 buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1640 decomposition%local%bc_left(i,j,k,l,m), &
1641 decomposition%local%bc_right(i,j,k,l,m) )
1643 buf_i(1,0) = decomposition%local%bc_left(i,j,k,l,m)
1644 buf_i(ind_max,0) = decomposition%local%bc_right(i,j,k,l,m)
1649 buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0))
1654 buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1659 f6d(i,j,k,l,m,c_mn+n) = buf_o(1+n,0)
1684 sll_real64,
intent(in) :: displacement(&
1685 decomposition%local%mn(4):decomposition%local%mx(4), &
1686 decomposition%local%mn(5):decomposition%local%mx(5))
1687 sll_real64,
intent(inout) :: f6d(&
1688 decomposition%local%mn(1):decomposition%local%mx(1), &
1689 decomposition%local%mn(2):decomposition%local%mx(2), &
1690 decomposition%local%mn(3):decomposition%local%mx(3), &
1691 decomposition%local%mn(4):decomposition%local%mx(4), &
1692 decomposition%local%mn(5):decomposition%local%mx(5), &
1693 decomposition%local%mn(6):decomposition%local%mx(6))
1695 sll_int32,
parameter :: id = 2
1696 sll_int32 :: i,j,k,l,m,n,w
1698 sll_real64,
allocatable :: buf_i(:,:)
1699 sll_real64,
allocatable :: buf_o(:,:)
1700 sll_int32 :: n_loc, ind_max
1701 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
1702 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
1707 halo_dtype,
allocatable :: d0(:), c_np2(:)
1708 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
1713 c_mn = decomposition%local%mn(id)
1714 n_loc = decomposition%local%nw(id)
1715 loop_mn => decomposition%local%mn
1716 loop_mx => decomposition%local%mx
1718 wx =
get_wx(decomposition)
1723 allocate(buf_i(1:ind_max, 0:wx-1))
1724 allocate(buf_o(1:ind_max-1, 0:wx-1))
1725 allocate(d0(0:wx-1))
1726 allocate(c_np2(0:wx-1))
1729 do n=loop_mn(6), loop_mx(6)
1730 do m=loop_mn(5), loop_mx(5)
1731 do l=loop_mn(4), loop_mx(4)
1732 do k=loop_mn(3), loop_mx(3)
1734 #ifndef DISABLE_CACHE_BLOCKING
1735 do i=loop_mn(1), loop_mx(1), wx
1739 buf_i(1+j,w) = f6d(i+w,c_mn+j,k,l,m,n)
1742 si = floor(displacement(l,m))
1746 buf_i(1:n_loc,w), si, n_loc, d0(w), c_np2(w) )
1749 decomposition%local%bc_left_send(i+w,k,l,m,n) = c_np2(w)
1752 decomposition%local%bc_right_send(i+w,k,l,m,n) = d0(w)
1756 do i=loop_mn(1), loop_mx(1)
1757 si = floor( displacement(l,m) )
1760 buf_i(1+j,0) = f6d(i,c_mn+j,k,l,m,n)
1764 buf_i(1:n_loc,0), si, n_loc, d0(0), c_np2(0) )
1765 decomposition%local%bc_left_send(i,k,l,m,n) = c_np2(0)
1766 decomposition%local%bc_right_send(i,k,l,m,n) = d0(0)
1787 l_halo => decomposition%local%halo_left%buf
1788 r_halo => decomposition%local%halo_right%buf
1791 do n=loop_mn(6), loop_mx(6)
1792 do m=loop_mn(5), loop_mx(5)
1793 do l=loop_mn(4), loop_mx(4)
1794 do k=loop_mn(3), loop_mx(3)
1796 #ifndef DISABLE_CACHE_BLOCKING
1797 do i=loop_mn(1), loop_mx(1), wx
1798 si = floor( displacement(l,m) )
1804 if ( si> 0 ) print*,
'######## si', si
1805 if (si<-1) print*,
'####### si', si
1806 alpha = displacement(l,m) - real(si, f64)
1809 buf_i(n_loc+2,w) = r_halo(i+w,loop_mx(2)+1,k,l,m,n)
1811 buf_i(indm,w) = l_halo(i+w,c_mn-1,k,l,m,n)
1817 buf_i(indm+1+j,w) = f6d(i+w,c_mn+j,k,l,m,n)
1824 buf_i(indm+1:n_loc+indm,w), si, n_loc, &
1825 decomposition%local%bc_left(i+w,k,l,m,n), &
1826 decomposition%local%bc_right(i+w,k,l,m,n) )
1829 buf_i(1, w) = decomposition%local%bc_left(i+w,k,l,m,n)
1832 buf_i(ind_max, w) = decomposition%local%bc_right(i+w,k,l,m,n)
1840 buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1848 buf_i(:,w), alpha, n_loc, buf_o(:,w))
1854 f6d(i+w,c_mn+j,k,l,m,n) = buf_o(1+j,w)
1859 do i=loop_mn(1), loop_mx(1)
1861 si = floor( displacement(l,m) )
1867 alpha = displacement(l,m) - real(si, f64)
1869 buf_i(n_loc+2,0) = r_halo(i,loop_mx(2)+1,k,l,m,n)
1871 buf_i(indm,0) = l_halo(i,c_mn-1,k,l,m,n)
1875 buf_i(indm+1+j,0) = f6d(i,c_mn+j,k,l,m,n)
1880 buf_i(indm+1:n_loc+indm,0), si, n_loc, &
1881 decomposition%local%bc_left(i,k,l,m,n), &
1882 decomposition%local%bc_right(i,k,l,m,n) )
1883 buf_i( 1, 0 ) = decomposition%local%bc_left(i,k,l,m,n)
1884 buf_i( ind_max, 0 ) = decomposition%local%bc_right(i,k,l,m,n)
1888 buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1892 buf_i(:,0), alpha, n_loc, buf_o(:,0))
1896 f6d(i,c_mn+j,k,l,m,n) = buf_o(1+j,0)
1917 sll_real64,
intent(in) :: displacement(&
1918 decomposition%local%mn(4):decomposition%local%mx(4), &
1919 decomposition%local%mn(5):decomposition%local%mx(5))
1920 sll_real64,
intent(inout) :: f6d(&
1921 decomposition%local%mn(1):decomposition%local%mx(1), &
1922 decomposition%local%mn(2):decomposition%local%mx(2), &
1923 decomposition%local%mn(3):decomposition%local%mx(3), &
1924 decomposition%local%mn(4):decomposition%local%mx(4), &
1925 decomposition%local%mn(5):decomposition%local%mx(5), &
1926 decomposition%local%mn(6):decomposition%local%mx(6))
1928 sll_int32,
parameter :: id = 1
1929 sll_int32 :: i,j,k,l,m,n,w
1931 sll_real64,
allocatable :: buf_i(:,:)
1932 sll_real64,
allocatable :: buf_o(:,:)
1933 sll_int32 :: n_loc, ind_max
1934 halo_dtype,
pointer :: l_halo(:,:,:,:,:,:)
1935 halo_dtype,
pointer :: r_halo(:,:,:,:,:,:)
1940 halo_dtype,
allocatable :: d0(:), c_np2(:)
1941 sll_int32,
pointer :: loop_mn(:), loop_mx(:)
1946 c_mn = decomposition%local%mn(id)
1947 n_loc = decomposition%local%nw(id)
1948 loop_mn => decomposition%local%mn
1949 loop_mx => decomposition%local%mx
1951 wx =
get_wx(decomposition)
1956 allocate(buf_i(1:ind_max, 0:wx-1))
1957 allocate(buf_o(1:ind_max-1, 0:wx-1))
1958 allocate(d0(0:wx-1))
1959 allocate(c_np2(0:wx-1))
1962 do n=loop_mn(6), loop_mx(6)
1963 do m=loop_mn(5), loop_mx(5)
1964 do l=loop_mn(4), loop_mx(4)
1965 do k=loop_mn(3), loop_mx(3)
1966 #ifndef DISABLE_CACHE_BLOCKING
1967 do j=loop_mn(2), loop_mx(2), wx
1974 si = floor(displacement(l,m))
1975 if ( si> 0 ) print*,
'######## si', si
1976 if (si<-1) print*,
'####### si', si
1980 f6d(:,j+w,k,l,m,n), si, n_loc, d0(w), c_np2(w) )
1984 decomposition%local%bc_left_send(j+w,k,l,m,n) = c_np2(w)
1987 decomposition%local%bc_right_send(j+w,k,l,m,n) = d0(w)
1991 do j=loop_mn(2), loop_mx(2)
1992 si = floor( displacement(l,m) )
1993 if ( si> 0 ) print*,
'######## si', si
1994 if (si<-1) print*,
'####### si', si
2001 f6d(:,j,k,l,m,n), si, n_loc, d0(0), c_np2(0) )
2003 decomposition%local%bc_left_send(j,k,l,m,n) = c_np2(0)
2004 decomposition%local%bc_right_send(j,k,l,m,n) = d0(0)
2025 l_halo => decomposition%local%halo_left%buf
2026 r_halo => decomposition%local%halo_right%buf
2029 do n=loop_mn(6), loop_mx(6)
2030 do m=loop_mn(5), loop_mx(5)
2031 do l=loop_mn(4), loop_mx(4)
2032 do k=loop_mn(3), loop_mx(3)
2034 #ifndef DISABLE_CACHE_BLOCKING
2035 do j=loop_mn(2), loop_mx(2), wx
2036 si = floor( displacement(l,m) )
2042 alpha = displacement(l,m) - real(si, f64)
2045 buf_i(n_loc+2,w) = r_halo(loop_mx(1)+1,j+w,k,l,m,n)
2047 buf_i(indm,w) = l_halo(c_mn-1,j+w,k,l,m,n)
2053 buf_i(indm+1+i,w) = f6d(c_mn+i,j+w,k,l,m,n)
2060 buf_i(indm+1:n_loc+indm,w), si, n_loc, &
2061 decomposition%local%bc_left(j+w,k,l,m,n), &
2062 decomposition%local%bc_right(j+w,k,l,m,n) )
2065 buf_i(1, w) = decomposition%local%bc_left(j+w,k,l,m,n)
2068 buf_i(ind_max, w) = decomposition%local%bc_right(j+w,k,l,m,n)
2074 buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
2080 buf_i(:,w), alpha, n_loc, buf_o(:,w))
2086 f6d(c_mn+i,j+w,k,l,m,n) = buf_o(1+i,w)
2091 do j=loop_mn(2), loop_mx(2)
2092 si = floor( displacement(l,m) )
2098 alpha = displacement(l,m) - real(si, f64)
2100 buf_i(n_loc+2,0) = r_halo(loop_mx(1)+1,j,k,l,m,n)
2102 buf_i(indm,0) = l_halo(c_mn-1,j,k,l,m,n)
2106 buf_i(indm+1+i,0) = f6d(c_mn+i,j,k,l,m,n)
2111 buf_i(indm+1:n_loc+indm,0), si, n_loc, &
2112 decomposition%local%bc_left(j,k,l,m,n), &
2113 decomposition%local%bc_right(j,k,l,m,n) )
2114 buf_i( 1, 0 ) = decomposition%local%bc_left(j,k,l,m,n)
2115 buf_i( ind_max, 0 ) = decomposition%local%bc_right(j,k,l,m,n)
2119 buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
2123 buf_i(:,0), alpha, n_loc, buf_o(:,0))
2127 f6d(c_cm+i,j,k,l,m,n) = buf_o(1+i,0)
Module implementing spline advection for the setting of a domain decomposition in 6d with extra buffe...
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta5(self, topology, decomposition, displacement, f6d)
Advection along eta4 with displacement dependent on eta1-eta3.
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta3(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta6(self, topology, decomposition, displacement, f6d)
Advection along eta5 with displacement dependent on eta1-eta3.
subroutine make_blocks_spline(ind, decomposition, disp, disp_int, halo_blocks, halo_width, n_halo_blocks)
Helper function to calculate the communication blocks for the spline interpolation.
subroutine, public sll_s_advection_6d_spline_dd_slim_free(self)
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta2_dispeta45(self, topology, decomposition, displacement, f6d)
Advection along eta2 with displacement dependent on eta4 and eta5.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta1_dispeta45(self, topology, decomposition, displacement, f6d)
Advection along eta1 with displacement dependent on eta4 and eta5.
integer(kind=i32) function get_wx(decomposition, id_in)
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta2(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta4(self, topology, decomposition, displacement, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_init(self, decomposition, displacement_eta1, displacement_eta2, displacement_eta3)
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta1(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
Fortran module where set some physical and mathematical constants.
real(kind=f64), parameter, public sll_p_twopi
Interpolator 1d using cubic splines on regular mesh with halo cells.
subroutine, public sll_s_cubic_spline_halo_1d_compute_interpolant(f, num_points, d, coeffs)
Compute the coefficients of the local interpolating spline (after d(0) and c(num_points+2) have been ...
subroutine, public sll_s_cubic_spline_halo_1d_finish_boundary_conditions(fdata, si, num_points, d_0, c_np2)
Complete d(0) and c(num_points+2) with local data after their values have been received from the neig...
subroutine, public sll_s_cubic_spline_halo_1d_prepare_exchange(fdata, si, num_points, d_0, c_np2)
Compute the part of d(0) and c(num_points+2) that need to be send to neighboring processors.
subroutine, public sll_s_cubic_spline_halo_1d_eval_disp(coeffs, alpha, num_points, fout)
This function corresponds to the interpolate_array_disp function of the interpolators but the displac...
Module providing data structures and tools to implement domain decompositions.
subroutine, public sll_s_apply_bc_exchange_slim_6d_real64(topo, decomp, id)
subroutine, public sll_s_deallocate_bc_buffers(decomp)
subroutine, public sll_s_allocate_bc_buffers_6d(decomp, id)
subroutine, public sll_f_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right)
subroutine, public sll_s_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right, halo_block)
subroutine, public sll_s_allocate_bc_buffers_6d_part(decomp, id, idx_mn, idx_mx)
Module for 1D interpolation and reconstruction.
Interpolator class and methods of Lagrange 1D interpolator.
integer(kind=i32), parameter, public sll_p_lagrange_fixed
Flag to specify Lagrange interpolation on a fixed interval centered around the point that is displace...
integer(kind=i32), parameter, public sll_p_lagrange_centered
Flag to specify Lagrange interpolation centered around the interpolation point.
Information on the 6D cartesian process topology.
6D decomposition, slim redesign, global array size information and local information.
Abstract class for 1D interpolation and reconstruction.