Report Typos and Errors    
Semi-Lagrangian Library
Modular library for kinetic and gyrokinetic simulations of plasmas in fusion energy devices.
sll_m_advection_6d_spline_dd_slim.F90
Go to the documentation of this file.
1 
7 
8 ! Note: This module was (partly) rewritten such that Fortran range operators ":"
9 ! are avoided wherever possible. These operators slow down the code a lot.
10 
11 #ifdef USE_HALO_REAL32
12 #define HALO_DTYPE sll_real32
13 #else
14 #define HALO_DTYPE sll_real64
15 #endif
16 
17 ! ---
18 ! Cache blocking greatly improves access to the large 6D arrays but heavily complicates the code.%
19 ! For the interpolators below, it gives a performance boost of at least 2x.
20 ! Performance needs to be measured with all the cores busy on a machine (MPI/OMP/hybrid),
21 ! not only with a single thread!
22 ! ---
23 
25 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
26 #include "sll_assert.h"
27 #include "sll_errors.h"
28 #include "sll_working_precision.h"
29 
30  use sll_m_constants, only : &
32 
33  use sll_m_interpolators_1d_base, only: &
35 
39 
40  use sll_m_decomposition, only: &
49 
50  use sll_m_cubic_spline_halo_1d, only : &
55 
56 #ifdef _OPENMP
57  use omp_lib
58 ! TODO : confirm safety of collapse(2)
59 #define OMP_COLLAPSE collapse(2)
60 #define OMP_SCHEDULE schedule(static)
61 #endif
62 
63  implicit none
64 
65  ! --- spline routines ---
66  public :: &
78 
79  private
80 
81 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
82 
84  sll_real64, allocatable :: displacement_eta1(:)
85  sll_real64, allocatable :: displacement_eta2(:)
86  sll_real64, allocatable :: displacement_eta3(:)
87 
88  sll_int32 , allocatable :: halo_blocks_eta1(:,:,:)
89  sll_int32 , allocatable :: halo_blocks_eta2(:,:,:)
90  sll_int32 , allocatable :: halo_blocks_eta3(:,:,:)
91 
92  sll_int32 , allocatable :: halo_blocks5d_eta2(:,:,:)
93  sll_int32 , allocatable :: halo_blocks5d_eta3(:,:,:)
94 
95  sll_int32 , allocatable :: halo_width_eta1(:,:)
96  sll_int32 , allocatable :: halo_width_eta2(:,:)
97  sll_int32 , allocatable :: halo_width_eta3(:,:)
98 
99  sll_int32, allocatable :: idisplacement_eta1(:)
100  sll_int32, allocatable :: idisplacement_eta2(:)
101  sll_int32, allocatable :: idisplacement_eta3(:)
102 
103  sll_int32 :: n_halo_blocks(3)
105 
106 contains
107 
108 
110  class(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
111 
112  if (allocated(self%displacement_eta1)) deallocate(self%displacement_eta1)
113  if (allocated(self%displacement_eta2)) deallocate(self%displacement_eta2)
114  if (allocated(self%displacement_eta3)) deallocate(self%displacement_eta3)
115 
116  if (allocated(self%halo_blocks_eta1)) deallocate(self%halo_blocks_eta1)
117  if (allocated(self%halo_blocks_eta2)) deallocate(self%halo_blocks_eta2)
118  if (allocated(self%halo_blocks_eta3)) deallocate(self%halo_blocks_eta3)
119 
120  if (allocated(self%halo_blocks5d_eta2)) deallocate(self%halo_blocks5d_eta2)
121  if (allocated(self%halo_blocks5d_eta3)) deallocate(self%halo_blocks5d_eta3)
122 
123  if (allocated(self%halo_width_eta1)) deallocate(self%halo_width_eta1)
124  if (allocated(self%halo_width_eta2)) deallocate(self%halo_width_eta2)
125  if (allocated(self%halo_width_eta3)) deallocate(self%halo_width_eta3)
126 
127  if (allocated(self%idisplacement_eta1)) deallocate(self%idisplacement_eta1)
128  if (allocated(self%idisplacement_eta2)) deallocate(self%idisplacement_eta2)
129  if (allocated(self%idisplacement_eta3)) deallocate(self%idisplacement_eta3)
131 
132 
133  subroutine sll_s_advection_6d_spline_dd_slim_init(self, decomposition, displacement_eta1, &
134  displacement_eta2, displacement_eta3)
135  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
136  type(sll_t_decomposition_slim_6d), intent(in) :: decomposition
137 
138  sll_real64, intent(in) :: displacement_eta1(:)
139  sll_real64, intent(in) :: displacement_eta2(:)
140  sll_real64, intent(in) :: displacement_eta3(:)
141 
142  allocate(self%displacement_eta1(decomposition%local%mn(4):decomposition%local%mx(4)))
143  self%displacement_eta1 = displacement_eta1
144  allocate(self%displacement_eta2(decomposition%local%mn(5):decomposition%local%mx(5)))
145  self%displacement_eta2 = displacement_eta2
146  allocate(self%displacement_eta3(decomposition%local%mn(6):decomposition%local%mx(6)))
147  self%displacement_eta3 = displacement_eta3
148 
149  call make_blocks_spline(4, decomposition, &
150  self%displacement_eta1, self%idisplacement_eta1, self%halo_blocks_eta1, &
151  self%halo_width_eta1, self%n_halo_blocks(1) )
152  call make_blocks_spline(5, decomposition, &
153  self%displacement_eta2, self%idisplacement_eta2, self%halo_blocks_eta2, &
154  self%halo_width_eta2, self%n_halo_blocks(2) )
155  call make_blocks_spline(6, decomposition, &
156  self%displacement_eta3, self%idisplacement_eta3, self%halo_blocks_eta3, &
157  self%halo_width_eta3, self%n_halo_blocks(3) )
158 
159  allocate(self%halo_blocks5d_eta2(5, 2, self%n_halo_blocks(2)))
160  allocate(self%halo_blocks5d_eta3(5, 2, self%n_halo_blocks(3)))
161 
162  self%halo_blocks5d_eta2(1,:,:) = self%halo_blocks_eta2(1,:,:)
163  self%halo_blocks5d_eta2(2:5,:,:) = self%halo_blocks_eta2(3:6,:,:)
164  self%halo_blocks5d_eta3(1:2,:,:) = self%halo_blocks_eta3(1:2,:,:)
165  self%halo_blocks5d_eta3(3:5,:,:) = self%halo_blocks_eta3(4:6,:,:)
166 #ifdef DISABLE_CACHE_BLOCKING
167  write(*,*) "sll_m_advection_6d_spline_dd_slim :: cache blocking disabled"
168 #endif
170 
171 
172  ! --- calculate depth of the buffers used for cache blocking ---
173  function get_wx(decomposition, id_in)
174  type(sll_t_decomposition_slim_6d), intent(in) :: decomposition
175  sll_int32, intent(in), optional :: id_in
176  sll_int32 :: get_wx, wx, id
177 #ifndef DISABLE_CACHE_BLOCKING
178  ! --- by default we want to block in the first dimension
179  if (present(id_in)) then
180  id = id_in
181  else
182  id = 1
183  endif
184 ! ! --- calculate width of prefetch cache buffer
185 ! wx = 8 ! SandyBridge cache line is 64 Bytes long
186 ! do while(wx > 1)
187 ! if (mod(decomposition%local%nw(id),wx) == 0) then
188 ! exit
189 ! else
190 ! wx = wx / 2
191 ! endif
192 ! enddo
193  wx = decomposition%local%nw(id)
194 #else
195  wx = 1
196 #endif
197  get_wx = wx
198  end function get_wx
199 
200 
202  subroutine make_blocks_spline( ind, decomposition, disp, disp_int, halo_blocks , halo_width, n_halo_blocks)
203  sll_int32, intent( in ) :: ind
204  type(sll_t_decomposition_slim_6d), intent( in ) :: decomposition
205  sll_real64, intent( inout ) :: disp(decomposition%local%mn(ind):decomposition%local%mx(ind))
206  sll_int32, intent( out ), allocatable :: disp_int(:)
207  sll_int32, intent( out ), allocatable :: halo_blocks(:,:,:)
208  sll_int32, intent( out ), allocatable :: halo_width(:,:)
209  sll_int32, intent( out ) :: n_halo_blocks
210 
211  sll_int32 :: index_range(2)
212  sll_int32 :: blocks, j, bl
213  sll_int32 :: box1, box2
214  sll_int32 :: si
215 
216  index_range = [ decomposition%local%mn(ind), decomposition%local%mx(ind) ]
217 
218  ! However, not all blocks might be present, so let us check first how many blocks there are or rather the interval of boxes that are there
219  ! Since we assume monotonic displacement, we only need to identify the box of the first and last value
220  bl = index_range(1)
221  if ( abs(disp(bl) ) == 0.0_f64) bl = bl+1
222  box1 = floor( disp(bl) )
223  bl = index_range(2)
224  if ( abs(disp(bl) ) == 0.0_f64) bl = bl-1
225  box2 = floor( disp(bl) )
226 
227  ! Compute number of blocks
228  blocks = abs(box2-box1)+1
229 
230  ! Now that we know the number of blocks, we can allocate the array holding the block information
231  allocate( halo_blocks(6, 2, blocks) )
232  allocate( halo_width(2, blocks) )
233  allocate( disp_int( blocks ) )
234  do j=1,blocks
235  halo_blocks(:, 1, j) = decomposition%local%mn
236  halo_blocks(:, 2, j) = decomposition%local%mx
237  end do
238 
239  ! We have to distinguish increasing and decreasing displacement
240  if (box1 > box2 ) then
241  j = index_range(1)
242  do bl = 1,blocks!box1, box2
243  if ( abs(disp(j)) == 0.0_f64 ) j = j+1
244  si = box1-bl+1
245  halo_width(1, bl ) = -si!max( - si, 0 )!stencil/2 - (box1-bl+1)-1
246  halo_width(2, bl ) = si+1!max( si+1, 0 )!stencil/2 + (box1-bl+1)
247  disp_int( bl ) = si
248  halo_blocks(ind, 1, bl ) = j
249  do while( disp(j) > box1-bl+1 )
250  j = j+1
251  if ( j > index_range(2)) exit
252  end do
253  if ( abs(disp(j-1)) == 0.0_f64 ) then
254  halo_blocks(ind, 2, bl ) = j-2
255  else
256  halo_blocks(ind, 2, bl ) = j-1
257  end if
258  end do
259  else
260  j = index_range(1)
261  do bl=box1, box2
262  if ( abs(disp(j)) == 0.0_f64 ) j = j+1
263  halo_width(1, bl-box1+1) = -bl!max( - bl, 0 )!stencil/2 - bl-1
264  halo_width(2, bl-box1+1) = bl+1!max( bl+1, 0 )!stencil/2 + bl
265  disp_int( bl-box1+1 ) = bl
266  halo_blocks(ind, 1, bl-box1+1) = j
267  do while ( (disp(j) < bl+1) )
268  j = j+1
269  if ( j > index_range(2) ) exit
270  end do
271  if ( abs(disp(j-1)) == 0.0_f64 ) then
272  halo_blocks(ind, 2, bl-box1+1) = j-2
273  else
274  halo_blocks(ind, 2, bl-box1+1) = j-1
275  end if
276  end do
277  end if
278 
279  n_halo_blocks = blocks
280 
281  ! Compute normalized displacement
282  do j= index_range(1), index_range(2)
283  disp(j) = disp(j) - real( floor( disp(j)), f64)
284  sll_assert( disp(j) >= 0.0_f64 )
285  sll_assert( disp(j) <= 1.0_f64 )
286  end do
287  end subroutine make_blocks_spline
288 
289 
291  subroutine sll_s_advection_6d_spline_dd_slim_fadvect_eta1(self, topology, decomposition, f6d)
292  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
293  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
294  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
295  sll_real64, intent(inout) :: f6d(&
296  decomposition%local%mn(1):decomposition%local%mx(1), &
297  decomposition%local%mn(2):decomposition%local%mx(2), &
298  decomposition%local%mn(3):decomposition%local%mx(3), &
299  decomposition%local%mn(4):decomposition%local%mx(4), &
300  decomposition%local%mn(5):decomposition%local%mx(5), &
301  decomposition%local%mn(6):decomposition%local%mx(6))
302 
303  sll_int32, parameter :: id = 1 ! eta1
304  sll_int32 :: i,j,k,l,m,n,w,wx,bl
305  sll_int32 :: num_points
306  sll_real64, allocatable :: buf_i(:,:)
307  sll_real64, allocatable :: buf_o(:,:)
308  sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
309  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
310  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
311  halo_dtype, allocatable :: d0(:), c_np2(:)
312  sll_int32, pointer :: loop_mn(:), loop_mx(:)
313 
314  c_mn = decomposition%local%mn(id)
315  c_mx = decomposition%local%mx(id)
316  num_points = decomposition%local%nw(id)
317  loop_mn => decomposition%local%mn
318  loop_mx => decomposition%local%mx
319  ! NOTE: Buffering is done here for the second dim "j".
320  ! This implementation uses 0-based indexing in w!
321  wx = get_wx(decomposition, 2)
322 
323  do bl=1, self%n_halo_blocks(id)
324  ! cache values to avoid multiple dereferencing inside the loops
325  l_mn = c_mn-self%halo_width_eta1(1, bl)
326  l_mx = c_mn-1
327  r_mn = c_mx+1
328  r_mx = c_mx+self%halo_width_eta1(2, bl)
329 
330  !call sll_s_allocate_bc_buffers_6d(decomposition, id)
331  call sll_s_allocate_bc_buffers_6d_part(decomposition, id, &
332  self%halo_blocks_eta1(2:6,1,bl), self%halo_blocks_eta1(2:6,2,bl))
333 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2)
334  allocate(d0(0:wx-1))
335  allocate(c_np2(0:wx-1))
336 !$omp do OMP_COLLAPSE OMP_SCHEDULE
337  ! Compute the remote part of the boundary conditions
338  do n=loop_mn(6), loop_mx(6)
339  do m=loop_mn(5), loop_mx(5)
340  do l=self%halo_blocks_eta1(id+3, 1, bl), self%halo_blocks_eta1(id+3, 2, bl)
341  do k=loop_mn(3), loop_mx(3)
342 #ifndef DISABLE_CACHE_BLOCKING
343  do j=loop_mn(2), loop_mx(2), wx
344  do w=0,wx-1
345 !DIR$ FORCEINLINE
347  f6d(:,j+w,k,l,m,n), self%idisplacement_eta1(bl), &
348  num_points, d0(w), c_np2(w))
349  enddo
350  do w=0,wx-1
351  decomposition%local%bc_left_send(j+w,k,l,m,n) = c_np2(w)
352  enddo
353  do w=0,wx-1
354  decomposition%local%bc_right_send(j+w,k,l,m,n) = d0(w)
355  enddo
356  end do
357 #else
358  do j=loop_mn(2), loop_mx(2)
359 !DIR$ FORCEINLINE
361  f6d(:,j,k,l,m,n), self%idisplacement_eta1(bl), &
362  num_points, d0(0), c_np2(0) )
363  decomposition%local%bc_left_send(j,k,l,m,n) = c_np2(0)
364  decomposition%local%bc_right_send(j,k,l,m,n) = d0(0)
365  end do
366 #endif
367  end do
368  end do
369  end do
370  end do
371 !$omp end do
372  deallocate(c_np2)
373  deallocate(d0)
374 !$omp single
375  ! Exchange boundary conditions
376  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
377  ! Exchange data for the neighboring cells
379  decomposition, &
380  f6d, &
381  id, &
382  self%halo_width_eta1(1, bl), &
383  self%halo_width_eta1(2, bl), &
384  self%halo_blocks_eta1(:,:,bl))
385  l_halo => decomposition%local%halo_left%buf
386  r_halo => decomposition%local%halo_right%buf
387 !$omp end single
388  allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
389  allocate(buf_o(l_mn-1:r_mx,0:wx-1))
390 !$omp do OMP_COLLAPSE OMP_SCHEDULE
391  do n=loop_mn(6), loop_mx(6)
392  do m=loop_mn(5), loop_mx(5)
393  do l=self%halo_blocks_eta1(id+3, 1, bl), self%halo_blocks_eta1(id+3, 2, bl)
394  do k=loop_mn(3), loop_mx(3)
395 #ifndef DISABLE_CACHE_BLOCKING
396  do j=loop_mn(2), loop_mx(2), wx
397 ! buf_i(c_mn:c_mx) = f6d(c_mn:c_mx,j,k,l,m,n)
398  do w=0,wx-1
399  do i=c_mn,c_mx
400  buf_i(i,w) = f6d(i,j+w,k,l,m,n)
401  enddo
402  enddo
403  ! add local contributions to boundary conditions for the spline
404  do w=0,wx-1
405 !DIR$ FORCEINLINE
407  buf_i(c_mn:c_mx,w), &
408  self%idisplacement_eta1(bl), num_points, &
409  decomposition%local%bc_left(j+w,k,l,m,n), &
410  decomposition%local%bc_right(j+w,k,l,m,n))
411  enddo
412 
413  ! fill input buffer piecewise
414  do w=0,wx-1
415  buf_i(l_mn-1,w) = decomposition%local%bc_left(j+w,k,l,m,n)!d_0
416  enddo
417 
418  ! For splines, we always have halos and precisely one side
419 
420  if ( l_mn <= l_mx ) then
421 ! buf_i(l_mn:l_mx) = l_halo(:,j,k,l,m,n)
422  do w=0,wx-1
423  do i=l_mn,l_mx
424  buf_i(i,w) = l_halo(i,j+w,k,l,m,n)
425  enddo
426  enddo
427  else
428 ! buf_i(r_mn:r_mx) = r_halo(:,j,k,l,m,n)
429  do w=0,wx-1
430  do i=r_mn,r_mx
431  buf_i(i,w) = r_halo(i,j+w,k,l,m,n)
432  enddo
433  enddo
434  end if
435  do w=0,wx-1
436  buf_i(r_mx+1,w) = decomposition%local%bc_right(j+w,k,l,m,n)!c_np2
437  enddo
438 
439  ! compute interpolant
440  do w=0,wx-1
441 !DIR$ FORCEINLINE
443  buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w) )
444  enddo
445 
446  ! perform interpolation
447  do w=0,wx-1
448 !DIR$ FORCEINLINE
450  buf_i(:,w), self%displacement_eta1(l), num_points, buf_o(:,w))
451  enddo
452 
453  ! copy-back interpolated values
454 ! f6d(:,j,k,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
455  do w=0,wx-1
456  do i=0,num_points-1
457  f6d(c_mn+i,j+w,k,l,m,n) = buf_o(l_mn-1+i,w)
458  enddo
459  enddo
460  end do
461 #else
462  do j=loop_mn(2), loop_mx(2)
463 ! buf_i(c_mn:c_mx) = f6d(c_mn:c_mx,j,k,l,m,n)
464  do i=c_mn,c_mx
465  buf_i(i,0) = f6d(i,j,k,l,m,n)
466  enddo
467  ! add local contributions to boundary conditions for the spline
468 !DIR$ FORCEINLINE
470  buf_i(c_mn:c_mx,0), &
471  self%idisplacement_eta1( bl ), num_points, &
472  decomposition%local%bc_left(j,k,l,m,n), &
473  decomposition%local%bc_right(j,k,l,m,n))
474  ! fill input buffer piecewise
475  buf_i(l_mn-1,0) = decomposition%local%bc_left(j,k,l,m,n)!d_0
476  ! For splines, we always have halos and precisely one side
477  if ( l_mn <= l_mx ) then
478 ! buf_i(l_mn:l_mx) = l_halo(:,j,k,l,m,n)
479  do i=l_mn,l_mx
480  buf_i(i,0) = l_halo(i,j,k,l,m,n)
481  enddo
482  else
483 ! buf_i(r_mn:r_mx) = r_halo(:,j,k,l,m,n)
484  do i=r_mn,r_mx
485  buf_i(i,0) = r_halo(i,j,k,l,m,n)
486  enddo
487  end if
488  buf_i(r_mx+1,0) = decomposition%local%bc_right(j,k,l,m,n)!c_np2
489  ! compute interpolant
490 !DIR$ FORCEINLINE
492  buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
493  ! perform interpolation
494 !DIR$ FORCEINLINE
496  buf_i(:,0), self%displacement_eta1(l), num_points, buf_o(:,0))
497  ! copy-back interpolated values
498 ! f6d(:,j,k,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
499  do i=0,num_points-1
500  f6d(c_mn+i,j,k,l,m,n) = buf_o(l_mn-1+i,0)
501  enddo
502  end do
503 #endif
504  end do
505  end do
506  end do
507  end do
508 !$omp end do
509  deallocate(buf_i)
510  deallocate(buf_o)
511 !$omp end parallel
512 
513  call sll_s_deallocate_bc_buffers(decomposition)
514  end do
516 
517 
519  subroutine sll_s_advection_6d_spline_dd_slim_fadvect_eta2(self, topology, decomposition, f6d)
520  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
521  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
522  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
523  sll_real64, intent(inout) :: f6d(&
524  decomposition%local%mn(1):decomposition%local%mx(1), &
525  decomposition%local%mn(2):decomposition%local%mx(2), &
526  decomposition%local%mn(3):decomposition%local%mx(3), &
527  decomposition%local%mn(4):decomposition%local%mx(4), &
528  decomposition%local%mn(5):decomposition%local%mx(5), &
529  decomposition%local%mn(6):decomposition%local%mx(6))
530 
531  sll_int32, parameter :: id = 2 ! eta1
532  sll_int32 :: i,j,k,l,m,n,bl,w
533  sll_int32 :: num_points
534  sll_real64, allocatable :: buf_i(:,:)
535  sll_real64, allocatable :: buf_o(:,:)
536  sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
537  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
538  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
539  sll_int32 :: wx
540  halo_dtype, allocatable :: d0(:), c_np2(:)
541  sll_int32, pointer :: loop_mn(:), loop_mx(:)
542 
543  c_mn = decomposition%local%mn(id)
544  c_mx = decomposition%local%mx(id)
545  num_points = decomposition%local%nw(id)
546  loop_mn => decomposition%local%mn
547  loop_mx => decomposition%local%mx
548  wx = get_wx(decomposition)
549 
550  do bl=1, self%n_halo_blocks(id)
551  !call sll_s_allocate_bc_buffers_6d(decomposition, id)
552  call sll_s_allocate_bc_buffers_6d_part(decomposition, id, &
553  self%halo_blocks5d_eta2(:,1,bl), self%halo_blocks5d_eta2(:,2,bl))
554  l_mn = c_mn-self%halo_width_eta2(1, bl)
555  l_mx = c_mn-1
556  r_mn = c_mx+1
557  r_mx = c_mx+self%halo_width_eta2(2, bl)
558 
559 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2)
560  allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
561  allocate(buf_o(l_mn-1:r_mx,0:wx-1))
562  allocate(d0(0:wx-1))
563  allocate(c_np2(0:wx-1))
564 !$omp do OMP_COLLAPSE OMP_SCHEDULE
565  ! Compute the remote part of the boundary conditions
566  do n=loop_mn(6), loop_mx(6)
567  do m=self%halo_blocks_eta2(id+3, 1, bl), self%halo_blocks_eta2(id+3, 2, bl)
568  do l=loop_mn(4), loop_mx(4)
569  do k=loop_mn(3), loop_mx(3)
570 #ifndef DISABLE_CACHE_BLOCKING
571  do i=loop_mn(1), loop_mx(1), wx
572 ! buf_i(c_mn:c_mx) = f6d(i,:,k,l,m,n)
573  do j=c_mn,c_mx
574  do w=0,wx-1
575  buf_i(j,w) = f6d(i+w,j,k,l,m,n)
576  enddo
577  enddo
578  do w=0,wx-1
579 !DIR$ FORCEINLINE
581  buf_i(c_mn:c_mx,w), self%idisplacement_eta2(bl), &
582  num_points, d0(w), c_np2(w) )
583  enddo
584  do w=0,wx-1
585  decomposition%local%bc_left_send(i+w,k,l,m,n) = c_np2(w)
586  enddo
587  do w=0,wx-1
588  decomposition%local%bc_right_send(i+w,k,l,m,n) = d0(w)
589  enddo
590  end do
591 #else
592  do i=loop_mn(1), loop_mx(1)
593 ! buf_i(c_mn:c_mx) = f6d(i,:,k,l,m,n)
594  do j=c_mn,c_mx
595  buf_i(j,0) = f6d(i,j,k,l,m,n)
596  enddo
597 !DIR$ FORCEINLINE
599  buf_i(c_mn:c_mx,0), self%idisplacement_eta2( bl), &
600  num_points, d0(0), c_np2(0))
601  decomposition%local%bc_left_send(i,k,l,m,n) = c_np2(0)
602  decomposition%local%bc_right_send(i,k,l,m,n) = d0(0)
603  end do
604 #endif
605  end do
606  end do
607  end do
608  end do
609 !$omp end do
610  deallocate(c_np2)
611  deallocate(d0)
612 !$omp single
613  ! Exchange boundary conditions
614  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
615  ! Exchange data for the neighboring cells
617  decomposition, &
618  f6d, &
619  id, &
620  self%halo_width_eta2(1, bl), &
621  self%halo_width_eta2(2, bl), &
622  self%halo_blocks_eta2(:,:,bl))
623  l_halo => decomposition%local%halo_left%buf
624  r_halo => decomposition%local%halo_right%buf
625 !$omp end single
626 !$omp do OMP_COLLAPSE OMP_SCHEDULE
627  do n=loop_mn(6), loop_mx(6)
628  do m=self%halo_blocks_eta2(id+3, 1, bl), self%halo_blocks_eta2(id+3, 2, bl)
629  do l=loop_mn(4), loop_mx(4)
630  do k=loop_mn(3), loop_mx(3)
631 #ifndef DISABLE_CACHE_BLOCKING
632  do i=loop_mn(1), loop_mx(1), wx
633 ! buf_i(c_mn:c_mx) = f6d(i,:,k,l,m,n)
634  do j=c_mn,c_mx
635  do w=0,wx-1
636  buf_i(j,w) = f6d(i+w,j,k,l,m,n)
637  enddo
638  enddo
639  ! Add local contributions to boundary conditions for the spline
640  do w=0,wx-1
641 !DIR$ FORCEINLINE
643  buf_i(c_mn:c_mx,w), self%idisplacement_eta2( bl ), num_points, &
644  decomposition%local%bc_left(i+w,k,l,m,n), &
645  decomposition%local%bc_right(i+w,k,l,m,n) )
646  enddo
647  ! fill input buffer piecewise
648  do w=0,wx-1
649  buf_i(l_mn-1,w) = decomposition%local%bc_left(i+w,k,l,m,n)!d_0
650  enddo
651  ! For splines, we always have halos and precisely one side
652  if ( l_mn <= l_mx ) then
653 ! buf_i(l_mn:l_mx) = l_halo(i,:,k,l,m,n)
654  do j=l_mn,l_mx
655  do w=0,wx-1
656  buf_i(j,w) = l_halo(i+w,j,k,l,m,n)
657  enddo
658  enddo
659  else
660 ! buf_i(r_mn:r_mx) = r_halo(i,:,k,l,m,n)
661  do j=r_mn,r_mx
662  do w=0,wx-1
663  buf_i(j,w) = r_halo(i+w,j,k,l,m,n)
664  enddo
665  enddo
666  end if
667  do w=0,wx-1
668  buf_i(r_mx+1,w) = decomposition%local%bc_right(i+w,k,l,m,n)!c_np2
669  enddo
670  ! compute interpolant
671  do w=0,wx-1
672 !DIR$ FORCEINLINE
674  buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w) )
675  enddo
676  ! perform interpolation
677  do w=0,wx-1
678 !DIR$ FORCEINLINE
680  buf_i(:,w), self%displacement_eta2(m), num_points, buf_o(:,w))
681  enddo
682  ! copy-back interpolated values
683 ! f6d(i,:,k,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
684  do j=0,num_points-1
685  do w=0,wx-1
686  f6d(i+w,c_mn+j,k,l,m,n) = buf_o(l_mn-1+j,w)
687  enddo
688  enddo
689  end do
690 #else
691  do i=loop_mn(1), loop_mx(1)
692 ! buf_i(c_mn:c_mx) = f6d(i,:,k,l,m,n)
693  do j=c_mn,c_mx
694  buf_i(j,0) = f6d(i,j,k,l,m,n)
695  enddo
696  ! Add local contributions to boundary conditions for the spline
697 !DIR$ FORCEINLINE
699  buf_i(c_mn:c_mx,0), self%idisplacement_eta2( bl ), num_points, &
700  decomposition%local%bc_left(i,k,l,m,n), &
701  decomposition%local%bc_right(i,k,l,m,n) )
702  ! fill input buffer piecewise
703  buf_i(l_mn-1,0) = decomposition%local%bc_left(i,k,l,m,n)!d_0
704  ! For splines, we always have halos and precisely one side
705  if ( l_mn <= l_mx ) then
706 ! buf_i(l_mn:l_mx) = l_halo(i,:,k,l,m,n)
707  do j=l_mn,l_mx
708  buf_i(j,0) = l_halo(i,j,k,l,m,n)
709  enddo
710  else
711 ! buf_i(r_mn:r_mx) = r_halo(i,:,k,l,m,n)
712  do j=r_mn,r_mx
713  buf_i(j,0) = r_halo(i,j,k,l,m,n)
714  enddo
715  end if
716  buf_i(r_mx+1,0) = decomposition%local%bc_right(i,k,l,m,n)!c_np2
717  ! compute interpolant
718 !DIR$ FORCEINLINE
720  buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
721  ! perform interpolation
722 !DIR$ FORCEINLINE
724  buf_i(:,0), self%displacement_eta2(m), num_points, buf_o(:,0))
725  ! copy-back interpolated values
726 ! f6d(i,:,k,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
727  do j=0,num_points-1
728  f6d(i,c_mn+j,k,l,m,n) = buf_o(l_mn-1+j,0)
729  enddo
730  end do
731 #endif
732  end do
733  end do
734  end do
735  end do
736 !$omp end do
737  deallocate(buf_i)
738  deallocate(buf_o)
739 !$omp end parallel
740  call sll_s_deallocate_bc_buffers(decomposition)
741  end do
743 
744 
746  subroutine sll_s_advection_6d_spline_dd_slim_fadvect_eta3(self, topology, decomposition, f6d)
747  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
748  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
749  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
750  sll_real64, intent(inout) :: f6d(&
751  decomposition%local%mn(1):decomposition%local%mx(1), &
752  decomposition%local%mn(2):decomposition%local%mx(2), &
753  decomposition%local%mn(3):decomposition%local%mx(3), &
754  decomposition%local%mn(4):decomposition%local%mx(4), &
755  decomposition%local%mn(5):decomposition%local%mx(5), &
756  decomposition%local%mn(6):decomposition%local%mx(6))
757 
758  sll_int32, parameter :: id = 3 ! eta1
759  sll_int32 :: i,j,k,l,m,n,bl,w
760  sll_int32 :: num_points
761  sll_real64, allocatable :: buf_i(:,:)
762  sll_real64, allocatable :: buf_o(:,:)
763  sll_int32 :: l_mn, l_mx, c_mn, c_mx, r_mn, r_mx
764  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
765  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
766  sll_int32 :: wx
767  halo_dtype, allocatable :: d0(:), c_np2(:)
768  sll_int32, pointer :: loop_mn(:), loop_mx(:)
769 
770  c_mn = decomposition%local%mn(id)
771  c_mx = decomposition%local%mx(id)
772  num_points = decomposition%local%nw(id)
773  loop_mn => decomposition%local%mn
774  loop_mx => decomposition%local%mx
775  wx = get_wx(decomposition)
776 
777  do bl=1, self%n_halo_blocks(id)
778  !call sll_s_allocate_bc_buffers_6d(decomposition, id)
779  call sll_s_allocate_bc_buffers_6d_part(decomposition, id, &
780  self%halo_blocks5d_eta3(:,1,bl), self%halo_blocks5d_eta3(:,2,bl))
781  l_mn = c_mn-self%halo_width_eta3(1, bl)
782  l_mx = c_mn-1
783  r_mn = c_mx+1
784  r_mx = c_mx+self%halo_width_eta3(2, bl)
785 
786 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2)
787  allocate(buf_i(l_mn-1:r_mx+1,0:wx-1))
788  allocate(buf_o(l_mn-1:r_mx,0:wx-1))
789  allocate(d0(0:wx-1))
790  allocate(c_np2(0:wx-1))
791 !$omp do OMP_COLLAPSE OMP_SCHEDULE
792  ! Compute the remote part of the boundary conditions
793  do n=self%halo_blocks_eta3(id+3, 1, bl), self%halo_blocks_eta3(id+3, 2, bl)
794  do m=loop_mn(5), loop_mx(5)
795  do l=loop_mn(4), loop_mx(4)
796  do j=loop_mn(2), loop_mx(2)
797 #ifndef DISABLE_CACHE_BLOCKING
798  do i=loop_mn(1), loop_mx(1), wx
799 ! buf_i(c_mn:c_mx) = f6d(i,j,:,l,m,n)
800  do k=c_mn,c_mx
801  do w=0,wx-1
802  buf_i(k,w) = f6d(i+w,j,k,l,m,n)
803  enddo
804  enddo
805  do w=0,wx-1
806 !DIR$ FORCEINLINE
808  buf_i(c_mn:c_mx,w), self%idisplacement_eta3(bl), &
809  num_points, d0(w), c_np2(w))
810  enddo
811  do w=0,wx-1
812  decomposition%local%bc_left_send(i+w,j,l,m,n) = c_np2(w)
813  enddo
814  do w=0,wx-1
815  decomposition%local%bc_right_send(i+w,j,l,m,n) = d0(w)
816  enddo
817  end do
818 #else
819  do i=loop_mn(1), loop_mx(1)
820 ! buf_i(c_mn:c_mx) = f6d(i,j,:,l,m,n)
821  do k=c_mn,c_mx
822  buf_i(k,0) = f6d(i,j,k,l,m,n)
823  enddo
824 !DIR$ FORCEINLINE
826  buf_i(c_mn:c_mx,0), self%idisplacement_eta3(bl), &
827  num_points, d0(0), c_np2(0) )
828  decomposition%local%bc_left_send(i,j,l,m,n) = c_np2(0)
829  decomposition%local%bc_right_send(i,j,l,m,n) = d0(0)
830  end do
831 #endif
832  end do
833  end do
834  end do
835  end do
836 !$omp end do
837  deallocate(c_np2)
838  deallocate(d0)
839 !$omp single
840  ! Exchange boundary conditions
841  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
842  ! Exchange data for the neighboring cells
844  decomposition, &
845  f6d, &
846  id, &
847  self%halo_width_eta3(1, bl), &
848  self%halo_width_eta3(2, bl), &
849  self%halo_blocks_eta3(:,:,bl))
850  l_halo => decomposition%local%halo_left%buf
851  r_halo => decomposition%local%halo_right%buf
852 !$omp end single
853 !$omp do OMP_COLLAPSE OMP_SCHEDULE
854  do n=self%halo_blocks_eta3(id+3, 1, bl), self%halo_blocks_eta3(id+3, 2, bl)
855  do m=loop_mn(5), loop_mx(5)
856  do l=loop_mn(4), loop_mx(4)
857  do j=loop_mn(2), loop_mx(2)
858 #ifndef DISABLE_CACHE_BLOCKING
859  do i=loop_mn(1), loop_mx(1), wx
860 ! buf_i(c_mn:c_mx) = f6d(i,j,:,l,m,n)
861  do k=c_mn,c_mx
862  do w=0,wx-1
863  buf_i(k,w) = f6d(i+w,j,k,l,m,n)
864  enddo
865  enddo
866  ! Add local contributions to boundary conditions for the spline
867  do w=0,wx-1
868 !DIR$ FORCEINLINE
870  buf_i(c_mn:c_mx,w), self%idisplacement_eta3( bl ), num_points, &
871  decomposition%local%bc_left(i+w,j,l,m,n), &
872  decomposition%local%bc_right(i+w,j,l,m,n) )
873  enddo
874  ! fill input buffer piecewise
875  do w=0,wx-1
876  buf_i(l_mn-1,w) = decomposition%local%bc_left(i+w,j,l,m,n)!d_0
877  enddo
878  ! For splines, we always have halos and precisely one side
879  if ( l_mn <= l_mx ) then
880 ! buf_i(l_mn:l_mx) = l_halo(i,j,:,l,m,n)
881  do k=l_mn,l_mx
882  do w=0,wx-1
883  buf_i(k,w) = l_halo(i+w,j,k,l,m,n)
884  enddo
885  enddo
886  else
887 ! buf_i(r_mn:r_mx) = r_halo(i,j,:,l,m,n)
888  do k=r_mn,r_mx
889  do w=0,wx-1
890  buf_i(k,w) = r_halo(i+w,j,k,l,m,n)
891  enddo
892  enddo
893  end if
894  do w=0,wx-1
895  buf_i(r_mx+1,w) = decomposition%local%bc_right(i+w,j,l,m,n)!c_np2
896  enddo
897  ! compute interpolant
898  do w=0,wx-1
899 !DIR$ FORCEINLINE
901  buf_i(:,w), num_points, buf_o(:,w), buf_i(:,w))
902  enddo
903 
904  ! perform interpolation
905  do w=0,wx-1
906 !DIR$ FORCEINLINE
908  buf_i(:,w), self%displacement_eta3(n), num_points, buf_o(:,w))
909  enddo
910  ! copy-back interpolated values
911 ! f6d(i,j,:,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
912  do k=0,num_points-1
913  do w=0,wx-1
914  f6d(i+w,j,c_mn+k,l,m,n) = buf_o(l_mn-1+k,w)
915  enddo
916  enddo
917  end do
918 #else
919  do i=loop_mn(1), loop_mx(1)
920 ! buf_i(c_mn:c_mx) = f6d(i,j,:,l,m,n)
921  do k=c_mn,c_mx
922  buf_i(k,0) = f6d(i,j,k,l,m,n)
923  enddo
924  ! Add local contributions to boundary conditions for the spline
925 !DIR$ FORCEINLINE
927  buf_i(c_mn:c_mx,0), self%idisplacement_eta3( bl ), num_points, &
928  decomposition%local%bc_left(i,j,l,m,n), &
929  decomposition%local%bc_right(i,j,l,m,n) )
930  ! fill input buffer piecewise
931  buf_i(l_mn-1,0) = decomposition%local%bc_left(i,j,l,m,n)!d_0
932  ! For splines, we always have halos and precisely one side
933  if ( l_mn <= l_mx ) then
934 ! buf_i(l_mn:l_mx) = l_halo(i,j,:,l,m,n)
935  do k=l_mn,l_mx
936  buf_i(k,0) = l_halo(i,j,k,l,m,n)
937  enddo
938  else
939 ! buf_i(r_mn:r_mx) = r_halo(i,j,:,l,m,n)
940  do k=r_mn,r_mx
941  buf_i(k,0) = r_halo(i,j,k,l,m,n)
942  enddo
943  end if
944  buf_i(r_mx+1,0) = decomposition%local%bc_right(i,j,l,m,n)!c_np2
945  ! compute interpolant
946 !DIR$ FORCEINLINE
948  buf_i(:,0), num_points, buf_o(:,0), buf_i(:,0) )
949 
950  ! perform interpolation
951 !DIR$ FORCEINLINE
953  buf_i(:,0), self%displacement_eta3(n), num_points, buf_o(:,0))
954 
955  ! copy-back interpolated values
956 ! f6d(i,j,:,l,m,n) = buf_o(l_mn-1:r_mx-2)!(c_mn:c_mx) !TODO: First until last -3
957  do k=0,num_points-1
958  f6d(i,j,c_mn+k,l,m,n) = buf_o(l_mn-1+k,0)
959  enddo
960  end do
961 #endif
962  end do
963  end do
964  end do
965  end do
966 !$omp end do
967  deallocate(buf_i)
968  deallocate(buf_o)
969 !$omp end parallel
970  call sll_s_deallocate_bc_buffers(decomposition)
971  end do
973 
974 
976  subroutine sll_s_advection_6d_spline_dd_slim_advect_eta4(self, topology, decomposition, displacement, f6d)
977  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
978  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
979  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
980  sll_real64, intent(in) :: displacement(&
981  decomposition%local%mn(1):decomposition%local%mx(1), &
982  decomposition%local%mn(2):decomposition%local%mx(2), &
983  decomposition%local%mn(3):decomposition%local%mx(3))
984  sll_real64, intent(inout) :: f6d(&
985  decomposition%local%mn(1):decomposition%local%mx(1), &
986  decomposition%local%mn(2):decomposition%local%mx(2), &
987  decomposition%local%mn(3):decomposition%local%mx(3), &
988  decomposition%local%mn(4):decomposition%local%mx(4), &
989  decomposition%local%mn(5):decomposition%local%mx(5), &
990  decomposition%local%mn(6):decomposition%local%mx(6))
991 
992  sll_int32, parameter :: id = 4 ! eta1
993  sll_int32 :: i,j,k,l,m,n,w
994  sll_int32 :: c_mn
995  sll_real64, allocatable :: buf_i(:,:)
996  sll_real64, allocatable :: buf_o(:,:)
997  sll_int32 :: n_loc, ind_max
998  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
999  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
1000  sll_int32, allocatable :: si(:)
1001  sll_int32, allocatable :: indm(:)
1002  sll_real64, allocatable :: alpha(:)
1003  sll_int32 :: wx
1004  halo_dtype, allocatable :: d0(:), c_np2(:)
1005  sll_int32, pointer :: loop_mn(:), loop_mx(:)
1006 
1007  call sll_s_allocate_bc_buffers_6d(decomposition, id)
1008 
1009  ! cache values to avoid multiple dereferencing inside the loops
1010  c_mn = decomposition%local%mn(id)
1011  n_loc = decomposition%local%nw(id)
1012  loop_mn => decomposition%local%mn
1013  loop_mx => decomposition%local%mx
1014  ind_max = n_loc+3
1015  wx = get_wx(decomposition)
1016 
1017 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2,si,alpha,indm)
1018  ! WARNING: We use zero based indexing for the cache blocking to avoid numerous "+1"
1019  ! in indexing operations, different from the other advector routines!!!
1020  allocate(buf_i(1:ind_max, 0:wx-1))
1021  allocate(buf_o(1:ind_max-1, 0:wx-1))
1022  allocate(si(0:wx-1))
1023  allocate(indm(0:wx-1))
1024  allocate(alpha(0:wx-1))
1025  allocate(d0(0:wx-1))
1026  allocate(c_np2(0:wx-1))
1027 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1028  ! Compute the remote part of the boundary conditions
1029  do n=loop_mn(6), loop_mx(6)
1030  do m=loop_mn(5), loop_mx(5)
1031  do k=loop_mn(3), loop_mx(3)
1032  do j=loop_mn(2), loop_mx(2)
1033 #ifndef DISABLE_CACHE_BLOCKING
1034  do i=loop_mn(1), loop_mx(1), wx
1035 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1036  do l=0,n_loc-1
1037  do w=0,wx-1
1038  buf_i(1+l,w) = f6d(i+w,j,k,c_mn+l,m,n)
1039  enddo
1040  enddo
1041  do w=0,wx-1
1042  si(w) = floor(displacement(i+w,j,k))
1043  enddo
1044  do w=0,wx-1
1045 !DIR$ FORCEINLINE
1047  buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1048  enddo
1049  do w=0,wx-1
1050  decomposition%local%bc_left_send(i+w,j,k,m,n) = c_np2(w)
1051  enddo
1052  do w=0,wx-1
1053  decomposition%local%bc_right_send(i+w,j,k,m,n) = d0(w)
1054  enddo
1055  end do
1056 #else
1057  do i=loop_mn(1), loop_mx(1)
1058  si(0) = floor( displacement(i,j,k) )
1059 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1060  do l=0,n_loc-1
1061  buf_i(1+l,0) = f6d(i,j,k,c_mn+l,m,n)
1062  enddo
1063 !DIR$ FORCEINLINE
1065  buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1066  decomposition%local%bc_left_send(i,j,k,m,n) = c_np2(0)
1067  decomposition%local%bc_right_send(i,j,k,m,n) = d0(0)
1068  end do
1069 #endif
1070  end do
1071  end do
1072  end do
1073  end do
1074 !$omp end do
1075  deallocate(c_np2)
1076  deallocate(d0)
1077 !$omp single
1078  ! Exchange boundary conditions
1079  ! TODO: restrict exchange to the actual range needed by the blocks
1080  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
1081  ! Exchange data for the neighboring cells
1083  decomposition, &
1084  f6d, &
1085  id, &
1086  1, &
1087  1)
1088  l_halo => decomposition%local%halo_left%buf
1089  r_halo => decomposition%local%halo_right%buf
1090 !$omp end single
1091 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1092  do n=loop_mn(6), loop_mx(6)
1093  do m=loop_mn(5), loop_mx(5)
1094  do k=loop_mn(3), loop_mx(3)
1095  do j=loop_mn(2), loop_mx(2)
1096 #ifndef DISABLE_CACHE_BLOCKING
1097  do i=loop_mn(1), loop_mx(1), wx
1098  do w=0,wx-1
1099  si(w) = floor( displacement(i+w,j,k) )
1100  enddo
1101  do w=0,wx-1
1102  alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1103  enddo
1104  do w=0,wx-1
1105  if ( si(w) == 0 ) then
1106  indm(w) = 1
1107  buf_i(n_loc+2,w) = r_halo(i+w,j,k,loop_mx(4)+1,m,n)
1108  else ! si = -1
1109  indm(w) = 2
1110  buf_i(indm(w),w) = l_halo(i+w,j,k,c_mn-1,m,n)
1111  end if
1112  enddo
1113 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
1114  do l=0,n_loc-1
1115  do w=0,wx-1
1116  buf_i(indm(w)+1+l,w) = f6d(i+w,j,k,c_mn+l,m,n)
1117  enddo
1118  enddo
1119  ! add local contributions to boundary conditions for the spline
1120  do w=0,wx-1
1121 !DIR$ FORCEINLINE
1123  buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1124  decomposition%local%bc_left(i+w,j,k,m,n), &
1125  decomposition%local%bc_right(i+w,j,k,m,n) )
1126  enddo
1127  do w=0,wx-1
1128  buf_i(1, w) = decomposition%local%bc_left(i+w,j,k,m,n)
1129  enddo
1130  do w=0,wx-1
1131  buf_i(ind_max, w) = decomposition%local%bc_right(i+w,j,k,m,n)
1132  enddo
1133  ! compute interpolant
1134  do w=0,wx-1
1135 !DIR$ FORCEINLINE
1137  buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1138  enddo
1139  ! perform interpolation
1140  do w=0,wx-1
1141 !DIR$ FORCEINLINE
1143  buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1144  enddo
1145  ! copy-back interpolated values
1146 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
1147  do l=0,n_loc-1
1148  do w=0,wx-1
1149  f6d(i+w,j,k,c_mn+l,m,n) = buf_o(1+l,w)
1150  enddo
1151  enddo
1152  end do
1153 #else
1154  do i=loop_mn(1), loop_mx(1)
1155  si(0) = floor( displacement(i,j,k) )
1156  alpha(0) = displacement(i,j,k) - real(si(0), f64)
1157  if ( si(0) == 0 ) then
1158  indm(0) = 1
1159  buf_i(n_loc+2,0) = r_halo(i,j,k,loop_mx(4)+1,m,n)
1160  else ! si = -1
1161  indm(0) = 2
1162  buf_i(indm(0),0) = l_halo(i,j,k,c_mn-1,m,n)
1163  end if
1164 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
1165  do l=0,n_loc-1
1166  buf_i(indm(0)+1+l,0) = f6d(i,j,k,c_mn+l,m,n)
1167  enddo
1168  ! add local contributions to boundary conditions for the spline
1169 !DIR$ FORCEINLINE
1171  buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1172  decomposition%local%bc_left(i,j,k,m,n), &
1173  decomposition%local%bc_right(i,j,k,m,n) )
1174  buf_i( 1, 0 ) = decomposition%local%bc_left(i,j,k,m,n)
1175  buf_i( ind_max, 0 ) = decomposition%local%bc_right(i,j,k,m,n)
1176  ! compute interpolant
1177 !DIR$ FORCEINLINE
1179  buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1180  ! perform interpolation
1181 !DIR$ FORCEINLINE
1183  buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1184  ! copy-back interpolated values
1185 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
1186  do l=0,n_loc-1
1187  f6d(i,j,k,c_mn+l,m,n) = buf_o(1+l,0)
1188  enddo
1189  end do
1190 #endif
1191  end do
1192  end do
1193  end do
1194  end do
1195 !$omp end do
1196  deallocate(buf_i)
1197  deallocate(buf_o)
1198  deallocate(si)
1199  deallocate(indm)
1200  deallocate(alpha)
1201 !$omp end parallel
1202  call sll_s_deallocate_bc_buffers(decomposition)
1204 
1205 
1207  subroutine sll_s_advection_6d_spline_dd_slim_advect_eta5(self, topology, decomposition, displacement, f6d)
1208  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
1209  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
1210  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
1211  sll_real64, intent(in) :: displacement(&
1212  decomposition%local%mn(1):decomposition%local%mx(1), &
1213  decomposition%local%mn(2):decomposition%local%mx(2), &
1214  decomposition%local%mn(3):decomposition%local%mx(3))
1215  sll_real64, intent(inout) :: f6d(&
1216  decomposition%local%mn(1):decomposition%local%mx(1), &
1217  decomposition%local%mn(2):decomposition%local%mx(2), &
1218  decomposition%local%mn(3):decomposition%local%mx(3), &
1219  decomposition%local%mn(4):decomposition%local%mx(4), &
1220  decomposition%local%mn(5):decomposition%local%mx(5), &
1221  decomposition%local%mn(6):decomposition%local%mx(6))
1222 
1223  sll_int32, parameter :: id = 5 ! eta1
1224  sll_int32 :: i,j,k,l,m,n,w
1225  sll_real64, allocatable :: buf_i(:,:)
1226  sll_real64, allocatable :: buf_o(:,:)
1227  sll_int32 :: n_loc, ind_max
1228  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
1229  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
1230  sll_int32, allocatable :: si(:)
1231  sll_int32, allocatable :: indm(:)
1232  sll_real64, allocatable :: alpha(:)
1233  sll_int32 :: c_mn
1234  sll_int32 :: wx
1235  halo_dtype, allocatable :: d0(:), c_np2(:)
1236  sll_int32, pointer :: loop_mn(:), loop_mx(:)
1237 
1238  call sll_s_allocate_bc_buffers_6d(decomposition, id)
1239 
1240  ! cache values to avoid multiple dereferencing inside the loops
1241  c_mn = decomposition%local%mn(id)
1242  n_loc = decomposition%local%nw(id)
1243  loop_mn => decomposition%local%mn
1244  loop_mx => decomposition%local%mx
1245  ind_max = n_loc+3
1246  wx = get_wx(decomposition)
1247 
1248 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2,si,alpha,indm)
1249  allocate(buf_i(1:ind_max, 0:wx-1))
1250  allocate(buf_o(1:ind_max-1, 0:wx-1))
1251  allocate(si(0:wx-1))
1252  allocate(indm(0:wx-1))
1253  allocate(alpha(0:wx-1))
1254  allocate(d0(0:wx-1))
1255  allocate(c_np2(0:wx-1))
1256 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1257  ! Compute the remote part of the boundary conditions
1258  do n=loop_mn(6), loop_mx(6)
1259  do l=loop_mn(4), loop_mx(4)
1260  do k=loop_mn(3), loop_mx(3)
1261  do j=loop_mn(2), loop_mx(2)
1262 #ifndef DISABLE_CACHE_BLOCKING
1263  do i=loop_mn(1), loop_mx(1), wx
1264  do w=0,wx-1
1265  si(w) = floor( displacement(i+w,j,k) )
1266  enddo
1267 ! buf_i(1:n_loc) = f6d(i,j,k,l,:,n)
1268  do m=0,n_loc-1
1269  do w=0,wx-1
1270  buf_i(1+m,w) = f6d(i+w,j,k,l,c_mn+m,n)
1271  enddo
1272  enddo
1273  do w=0,wx-1
1274 !DIR$ FORCEINLINE
1276  buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1277  enddo
1278  do w=0,wx-1
1279  decomposition%local%bc_left_send(i+w,j,k,l,n) = c_np2(w)
1280  enddo
1281  do w=0,wx-1
1282  decomposition%local%bc_right_send(i+w,j,k,l,n) = d0(w)
1283  enddo
1284  end do
1285 #else
1286  do i=loop_mn(1), loop_mx(1)
1287  si(0) = floor( displacement(i,j,k) )
1288 ! buf_i(1:n_loc) = f6d(i,j,k,l,:,n)
1289  do m=0,n_loc-1
1290  buf_i(1+m,0) = f6d(i,j,k,l,c_mn+m,n)
1291  enddo
1292 !DIR$ FORCEINLINE
1294  buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1295  decomposition%local%bc_left_send(i,j,k,l,n) = c_np2(0)
1296  decomposition%local%bc_right_send(i,j,k,l,n) = d0(0)
1297  end do
1298 #endif
1299  end do
1300  end do
1301  end do
1302  end do
1303 !$omp end do
1304  deallocate(c_np2)
1305  deallocate(d0)
1306 !$omp single
1307  ! Exchange boundary conditions
1308  ! TODO: restrict exchange to the actual range needed by the blocks
1309  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
1310  ! Exchange data for the neighboring cells
1312  decomposition, &
1313  f6d, &
1314  id, &
1315  1, &
1316  1)
1317  l_halo => decomposition%local%halo_left%buf
1318  r_halo => decomposition%local%halo_right%buf
1319 !$omp end single
1320 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1321  do n=loop_mn(6), loop_mx(6)
1322  do l=loop_mn(4), loop_mx(4)
1323  do k=loop_mn(3), loop_mx(3)
1324  do j=loop_mn(2), loop_mx(2)
1325 #ifndef DISABLE_CACHE_BLOCKING
1326  do i=loop_mn(1), loop_mx(1), wx
1327  do w=0,wx-1
1328  si(w) = floor( displacement(i+w,j,k) )
1329  enddo
1330  do w=0,wx-1
1331  alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1332  enddo
1333  do w=0,wx-1
1334  if ( si(w) == 0 ) then
1335  indm(w) = 1
1336  buf_i(n_loc+2,w) = r_halo(i+w,j,k,l,loop_mx(5)+1,n)
1337  else ! si = -1
1338  indm(w) = 2
1339  buf_i(indm(w),w) = l_halo(i+w,j,k,l,c_mn-1,n)
1340  end if
1341  enddo
1342 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,l,:,n)
1343  do m=0,n_loc-1
1344  do w=0,wx-1
1345  buf_i(indm(w)+1+m,w) = f6d(i+w,j,k,l,c_mn+m,n)
1346  enddo
1347  enddo
1348 
1349  ! add local contributions to boundary conditions for the spline
1350  do w=0,wx-1
1351 !DIR$ FORCEINLINE
1353  buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1354  decomposition%local%bc_left(i+w,j,k,l,n), &
1355  decomposition%local%bc_right(i+w,j,k,l,n) )
1356  enddo
1357  do w=0,wx-1
1358  buf_i(1,w) = decomposition%local%bc_left(i+w,j,k,l,n)
1359  enddo
1360  do w=0,wx-1
1361  buf_i(ind_max,w) = decomposition%local%bc_right(i+w,j,k,l,n)
1362  enddo
1363  ! compute interpolant
1364  do w=0,wx-1
1365 !DIR$ FORCEINLINE
1367  buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1368  enddo
1369  ! perform interpolation
1370  do w=0,wx-1
1371 !DIR$ FORCEINLINE
1373  buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1374  enddo
1375  ! copy-back interpolated values
1376 ! f6d(i,j,k,l,:,n) = buf_o(1:n_loc)!First until last -3
1377  do m=0,n_loc-1
1378  do w=0,wx-1
1379  f6d(i+w,j,k,l,c_mn+m,n) = buf_o(1+m,w)
1380  enddo
1381  enddo
1382  end do
1383 #else
1384  do i=loop_mn(1), loop_mx(1)
1385  si(0) = floor( displacement(i,j,k) )
1386  alpha(0) = displacement(i,j,k) - real(si(0), f64)
1387  if ( si(0) == 0 ) then
1388  indm(0) = 1
1389  buf_i(n_loc+2,0) = r_halo(i,j,k,l,loop_mx(5)+1,n)
1390  else ! si = -1
1391  indm(0) = 2
1392  buf_i(indm(0),0) = l_halo(i,j,k,l,c_mn-1,n)
1393  end if
1394 
1395 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,l,:,n)
1396  do m=0,n_loc-1
1397  buf_i(indm(0)+1+m,0) = f6d(i,j,k,l,c_mn+m,n)
1398  enddo
1399 
1400  ! add local contributions to boundary conditions for the spline
1401 !DIR$ FORCEINLINE
1403  buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1404  decomposition%local%bc_left(i,j,k,l,n), &
1405  decomposition%local%bc_right(i,j,k,l,n) )
1406  buf_i(1,0) = decomposition%local%bc_left(i,j,k,l,n)
1407  buf_i(ind_max,0) = decomposition%local%bc_right(i,j,k,l,n)
1408  ! compute interpolant
1409 !DIR$ FORCEINLINE
1411  buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1412  ! perform interpolation
1413 !DIR$ FORCEINLINE
1415  buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1416  ! copy-back interpolated values
1417 ! f6d(i,j,k,l,:,n) = buf_o(1:n_loc)!First until last -3
1418  do m=0,n_loc-1
1419  f6d(i,j,k,l,c_mn+m,n) = buf_o(1+m,0)
1420  enddo
1421  end do
1422 #endif
1423  end do
1424  end do
1425  end do
1426  end do
1427 !$omp end do
1428  deallocate(buf_i)
1429  deallocate(buf_o)
1430  deallocate(si)
1431  deallocate(indm)
1432  deallocate(alpha)
1433 !$omp end parallel
1434  call sll_s_deallocate_bc_buffers(decomposition)
1436 
1437 
1439  subroutine sll_s_advection_6d_spline_dd_slim_advect_eta6(self, topology, decomposition, displacement, f6d)
1440  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
1441  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
1442  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
1443  sll_real64, intent(in) :: displacement(&
1444  decomposition%local%mn(1):decomposition%local%mx(1), &
1445  decomposition%local%mn(2):decomposition%local%mx(2), &
1446  decomposition%local%mn(3):decomposition%local%mx(3))
1447  sll_real64, intent(inout) :: f6d(&
1448  decomposition%local%mn(1):decomposition%local%mx(1), &
1449  decomposition%local%mn(2):decomposition%local%mx(2), &
1450  decomposition%local%mn(3):decomposition%local%mx(3), &
1451  decomposition%local%mn(4):decomposition%local%mx(4), &
1452  decomposition%local%mn(5):decomposition%local%mx(5), &
1453  decomposition%local%mn(6):decomposition%local%mx(6))
1454 
1455  sll_int32, parameter :: id = 6 ! eta6
1456  sll_int32 :: i,j,k,l,m,n,w
1457  sll_real64, allocatable :: buf_i(:,:)
1458  sll_real64, allocatable :: buf_o(:,:)
1459  sll_int32 :: n_loc, ind_max
1460  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
1461  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
1462  sll_int32, allocatable :: si(:)
1463  sll_int32, allocatable :: indm(:)
1464  sll_real64, allocatable :: alpha(:)
1465  sll_int32 :: c_mn
1466  sll_int32 :: wx
1467  halo_dtype, allocatable :: d0(:), c_np2(:)
1468  sll_int32, pointer :: loop_mn(:), loop_mx(:)
1469 
1470  call sll_s_allocate_bc_buffers_6d(decomposition, id)
1471 
1472  ! cache values to avoid multiple dereferencing inside the loops
1473  c_mn = decomposition%local%mn(id)
1474  n_loc = decomposition%local%nw(id)
1475  loop_mn => decomposition%local%mn
1476  loop_mx => decomposition%local%mx
1477  ind_max = n_loc+3
1478  wx = get_wx(decomposition)
1479 
1480 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2,si,alpha,indm)
1481  allocate(buf_i(1:ind_max, 0:wx-1))
1482  allocate(buf_o(1:ind_max-1, 0:wx-1))
1483  allocate(si(0:wx-1))
1484  allocate(indm(0:wx-1))
1485  allocate(alpha(0:wx-1))
1486  allocate(d0(0:wx-1))
1487  allocate(c_np2(0:wx-1))
1488 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1489  ! Compute the remote part of the boundary conditions
1490  do m=loop_mn(5), loop_mx(5)
1491  do l=loop_mn(4), loop_mx(4)
1492  do k=loop_mn(3), loop_mx(3)
1493  do j=loop_mn(2), loop_mx(2)
1494 #ifndef DISABLE_CACHE_BLOCKING
1495  do i=loop_mn(1), loop_mx(1), wx
1496  do w=0,wx-1
1497  si(w) = floor(displacement(i+w,j,k))
1498  enddo
1499 ! buf_i(1:n_loc) = f6d(i,j,k,l,m,:)
1500  do n=0,n_loc-1
1501  do w=0,wx-1
1502  buf_i(1+n,w) = f6d(i+w,j,k,l,m,c_mn+n)
1503  enddo
1504  enddo
1505  do w=0,wx-1
1506 !DIR$ FORCEINLINE
1508  buf_i(1:n_loc,w), si(w), n_loc, d0(w), c_np2(w) )
1509  enddo
1510  do w=0,wx-1
1511  decomposition%local%bc_left_send(i+w,j,k,l,m) = c_np2(w)
1512  enddo
1513  do w=0,wx-1
1514  decomposition%local%bc_right_send(i+w,j,k,l,m) = d0(w)
1515  enddo
1516  end do
1517 #else
1518  do i=loop_mn(1), loop_mx(1)
1519  si(0) = floor( displacement(i,j,k) )
1520 ! buf_i(1:n_loc) = f6d(i,j,k,l,m,:)
1521  do n=0,n_loc-1
1522  buf_i(1+n,0) = f6d(i,j,k,l,m,c_mn+n)
1523  enddo
1524 !DIR$ FORCEINLINE
1526  buf_i(1:n_loc,0), si(0), n_loc, d0(0), c_np2(0) )
1527  decomposition%local%bc_left_send(i,j,k,l,m) = c_np2(0)
1528  decomposition%local%bc_right_send(i,j,k,l,m) = d0(0)
1529  end do
1530 #endif
1531  end do
1532  end do
1533  end do
1534  end do
1535 !$omp end do
1536  deallocate(c_np2)
1537  deallocate(d0)
1538 !$omp single
1539  ! Exchange boundary conditions
1540  ! TODO: restrict exchange to the actual range needed by the blocks
1541  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
1542  ! Exchange data for the neighboring cells
1544  decomposition, &
1545  f6d, &
1546  id, &
1547  1, &
1548  1)
1549  l_halo => decomposition%local%halo_left%buf
1550  r_halo => decomposition%local%halo_right%buf
1551 !$omp end single
1552 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1553  do m=loop_mn(5), loop_mx(5)
1554  do l=loop_mn(4), loop_mx(4)
1555  do k=loop_mn(3), loop_mx(3)
1556  do j=loop_mn(2), loop_mx(2)
1557 #ifndef DISABLE_CACHE_BLOCKING
1558  do i=loop_mn(1), loop_mx(1), wx
1559  do w=0,wx-1
1560  si(w) = floor( displacement(i+w,j,k) )
1561  enddo
1562  do w=0,wx-1
1563  alpha(w) = displacement(i+w,j,k) - real(si(w), f64)
1564  enddo
1565  do w=0,wx-1
1566  if ( si(w) == 0 ) then
1567  indm(w) = 1
1568  buf_i(n_loc+2,w) = r_halo(i+w,j,k,l,m,loop_mx(6)+1)
1569  else ! si = -1
1570  indm(w) = 2
1571  buf_i(indm(w),w) = l_halo(i+w,j,k,l,m,c_mn-1)
1572  end if
1573  enddo
1574 
1575 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,l,m,:)
1576  do n=0,n_loc-1
1577  do w=0,wx-1
1578  buf_i(indm(w)+1+n,w) = f6d(i+w,j,k,l,m,c_mn+n)
1579  enddo
1580  enddo
1581 
1582  ! add local contributions to boundary conditions for the spline
1583  do w=0,wx-1
1584 !DIR$ FORCEINLINE
1586  buf_i(indm(w)+1:n_loc+indm(w),w), si(w), n_loc, &
1587  decomposition%local%bc_left(i+w,j,k,l,m), &
1588  decomposition%local%bc_right(i+w,j,k,l,m) )
1589  enddo
1590  do w=0,wx-1
1591  buf_i(1,w) = decomposition%local%bc_left(i+w,j,k,l,m)
1592  enddo
1593  do w=0,wx-1
1594  buf_i(ind_max,w) = decomposition%local%bc_right(i+w,j,k,l,m)
1595  enddo
1596 
1597  ! compute interpolant
1598  do w=0,wx-1
1599 !DIR$ FORCEINLINE
1601  buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w))
1602  enddo
1603 
1604  ! perform interpolation
1605  do w=0,wx-1
1606 !DIR$ FORCEINLINE
1608  buf_i(:,w), alpha(w), n_loc, buf_o(:,w))
1609  enddo
1610 
1611  ! copy-back interpolated values
1612 ! f6d(i,j,k,l,m,:) = buf_o(1:n_loc)!First until last -3
1613  do n=0,n_loc-1
1614  do w=0,wx-1
1615  f6d(i+w,j,k,l,m,c_mn+n) = buf_o(1+n,w)
1616  enddo
1617  enddo
1618  end do
1619 #else
1620  do i=loop_mn(1), loop_mx(1)
1621  si(0) = floor( displacement(i,j,k) )
1622  alpha(0) = displacement(i,j,k) - real(si(0), f64)
1623  if ( si(0) == 0 ) then
1624  indm(0) = 1
1625  buf_i(n_loc+2,0) = r_halo(i,j,k,l,m,loop_mx(6)+1)
1626  else ! si = -1
1627  indm(0) = 2
1628  buf_i(indm(0),0) = l_halo(i,j,k,l,m,c_mn-1)
1629  end if
1630 
1631 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,l,m,:)
1632  do n=0,n_loc-1
1633  buf_i(indm(0)+1+n,0) = f6d(i,j,k,l,m,c_mn+n)
1634  enddo
1635 
1636  ! add local contributions to boundary conditions for the spline
1637 !DIR$ FORCEINLINE
1639  buf_i(indm(0)+1:n_loc+indm(0),0), si(0), n_loc, &
1640  decomposition%local%bc_left(i,j,k,l,m), &
1641  decomposition%local%bc_right(i,j,k,l,m) )
1642 
1643  buf_i(1,0) = decomposition%local%bc_left(i,j,k,l,m)
1644  buf_i(ind_max,0) = decomposition%local%bc_right(i,j,k,l,m)
1645 
1646  ! compute interpolant
1647 !DIR$ FORCEINLINE
1649  buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0))
1650 
1651  ! perform interpolation
1652 !DIR$ FORCEINLINE
1654  buf_i(:,0), alpha(0), n_loc, buf_o(:,0))
1655 
1656  ! copy-back interpolated values
1657 ! f6d(i,j,k,l,m,:) = buf_o(1:n_loc)!First until last -3
1658  do n=0,n_loc-1
1659  f6d(i,j,k,l,m,c_mn+n) = buf_o(1+n,0)
1660  enddo
1661  end do
1662 #endif
1663  end do
1664  end do
1665  end do
1666  end do
1667 !$omp end do
1668  deallocate(buf_i)
1669  deallocate(buf_o)
1670  deallocate(si)
1671  deallocate(indm)
1672  deallocate(alpha)
1673 !$omp end parallel
1674  call sll_s_deallocate_bc_buffers(decomposition)
1676 
1677 
1678 
1680  subroutine sll_s_advection_6d_spline_dd_slim_advect_eta2_dispeta45(self, topology, decomposition, displacement, f6d)
1681  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
1682  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
1683  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
1684  sll_real64, intent(in) :: displacement(&
1685  decomposition%local%mn(4):decomposition%local%mx(4), &
1686  decomposition%local%mn(5):decomposition%local%mx(5))
1687  sll_real64, intent(inout) :: f6d(&
1688  decomposition%local%mn(1):decomposition%local%mx(1), &
1689  decomposition%local%mn(2):decomposition%local%mx(2), &
1690  decomposition%local%mn(3):decomposition%local%mx(3), &
1691  decomposition%local%mn(4):decomposition%local%mx(4), &
1692  decomposition%local%mn(5):decomposition%local%mx(5), &
1693  decomposition%local%mn(6):decomposition%local%mx(6))
1694 
1695  sll_int32, parameter :: id = 2 ! eta2
1696  sll_int32 :: i,j,k,l,m,n,w
1697  sll_int32 :: c_mn
1698  sll_real64, allocatable :: buf_i(:,:)
1699  sll_real64, allocatable :: buf_o(:,:)
1700  sll_int32 :: n_loc, ind_max
1701  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
1702  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
1703  sll_int32 :: si
1704  sll_int32 :: indm
1705  sll_real64 :: alpha
1706  sll_int32 :: wx
1707  halo_dtype, allocatable :: d0(:), c_np2(:)
1708  sll_int32, pointer :: loop_mn(:), loop_mx(:)
1709 
1710  call sll_s_allocate_bc_buffers_6d(decomposition, id)
1711 
1712  ! cache values to avoid multiple dereferencing inside the loops
1713  c_mn = decomposition%local%mn(id)
1714  n_loc = decomposition%local%nw(id)
1715  loop_mn => decomposition%local%mn
1716  loop_mx => decomposition%local%mx
1717  ind_max = n_loc+3
1718  wx = get_wx(decomposition)
1719 
1720 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2,si,alpha,indm)
1721  ! WARNING: We use zero based indexing for the cache blocking to avoid numerous "+1"
1722  ! in indexing operations, different from the other advector routines!!!
1723  allocate(buf_i(1:ind_max, 0:wx-1))
1724  allocate(buf_o(1:ind_max-1, 0:wx-1))
1725  allocate(d0(0:wx-1))
1726  allocate(c_np2(0:wx-1))
1727 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1728  ! Compute the remote part of the boundary conditions
1729  do n=loop_mn(6), loop_mx(6)
1730  do m=loop_mn(5), loop_mx(5)
1731  do l=loop_mn(4), loop_mx(4)
1732  do k=loop_mn(3), loop_mx(3)
1733  !do j=loop_mn(2), loop_mx(2)
1734 #ifndef DISABLE_CACHE_BLOCKING
1735  do i=loop_mn(1), loop_mx(1), wx
1736 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1737  do j=0,n_loc-1
1738  do w=0,wx-1
1739  buf_i(1+j,w) = f6d(i+w,c_mn+j,k,l,m,n)
1740  enddo
1741  enddo
1742  si = floor(displacement(l,m))
1743  do w=0,wx-1
1744 !DIR$ FORCEINLINE
1746  buf_i(1:n_loc,w), si, n_loc, d0(w), c_np2(w) )
1747  enddo
1748  do w=0,wx-1
1749  decomposition%local%bc_left_send(i+w,k,l,m,n) = c_np2(w)
1750  enddo
1751  do w=0,wx-1
1752  decomposition%local%bc_right_send(i+w,k,l,m,n) = d0(w)
1753  enddo
1754  end do
1755 #else
1756  do i=loop_mn(1), loop_mx(1)
1757  si = floor( displacement(l,m) )
1758 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1759  do j=0,n_loc-1
1760  buf_i(1+j,0) = f6d(i,c_mn+j,k,l,m,n)
1761  enddo
1762 !DIR$ FORCEINLINE
1764  buf_i(1:n_loc,0), si, n_loc, d0(0), c_np2(0) )
1765  decomposition%local%bc_left_send(i,k,l,m,n) = c_np2(0)
1766  decomposition%local%bc_right_send(i,k,l,m,n) = d0(0)
1767  end do
1768 #endif
1769  end do
1770  end do
1771  end do
1772  end do
1773 !$omp end do
1774  deallocate(c_np2)
1775  deallocate(d0)
1776 !$omp single
1777  ! Exchange boundary conditions
1778  ! TODO: restrict exchange to the actual range needed by the blocks
1779  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
1780  ! Exchange data for the neighboring cells
1782  decomposition, &
1783  f6d, &
1784  id, &
1785  1, &
1786  1)
1787  l_halo => decomposition%local%halo_left%buf
1788  r_halo => decomposition%local%halo_right%buf
1789 !$omp end single
1790 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1791  do n=loop_mn(6), loop_mx(6)
1792  do m=loop_mn(5), loop_mx(5)
1793  do l=loop_mn(4), loop_mx(4)
1794  do k=loop_mn(3), loop_mx(3)
1795  !do j=loop_mn(2), loop_mx(2)
1796 #ifndef DISABLE_CACHE_BLOCKING
1797  do i=loop_mn(1), loop_mx(1), wx
1798  si = floor( displacement(l,m) )
1799  if ( si == 0 ) then
1800  indm = 1
1801  else ! si = -1
1802  indm = 2
1803  end if
1804  if ( si> 0 ) print*, '######## si', si
1805  if (si<-1) print*, '####### si', si
1806  alpha = displacement(l,m) - real(si, f64)
1807  do w=0,wx-1
1808  if ( si == 0 ) then
1809  buf_i(n_loc+2,w) = r_halo(i+w,loop_mx(2)+1,k,l,m,n)
1810  else ! si = -1
1811  buf_i(indm,w) = l_halo(i+w,c_mn-1,k,l,m,n)
1812  end if
1813  enddo
1814 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
1815  do j=0,n_loc-1
1816  do w=0,wx-1
1817  buf_i(indm+1+j,w) = f6d(i+w,c_mn+j,k,l,m,n)
1818  enddo
1819  enddo
1820  ! add local contributions to boundary conditions for the spline
1821  do w=0,wx-1
1822 !DIR$ FORCEINLINE
1824  buf_i(indm+1:n_loc+indm,w), si, n_loc, &
1825  decomposition%local%bc_left(i+w,k,l,m,n), &
1826  decomposition%local%bc_right(i+w,k,l,m,n) )
1827  enddo
1828  do w=0,wx-1
1829  buf_i(1, w) = decomposition%local%bc_left(i+w,k,l,m,n)
1830  enddo
1831  do w=0,wx-1
1832  buf_i(ind_max, w) = decomposition%local%bc_right(i+w,k,l,m,n)
1833  enddo
1834  ! compute interpolant
1835  do w=0,wx-1
1836  ! print*, i,k,l,m,n
1837  ! write(25,*) buf_i(:,w)
1838 !DIR$ FORCEINLINE
1840  buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
1841  !write(26,*) buf_i(:,w)
1842  !stop
1843  enddo
1844  ! perform interpolation
1845  do w=0,wx-1
1846 !DIR$ FORCEINLINE
1848  buf_i(:,w), alpha, n_loc, buf_o(:,w))
1849  enddo
1850  ! copy-back interpolated values
1851 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
1852  do j=0,n_loc-1
1853  do w=0,wx-1
1854  f6d(i+w,c_mn+j,k,l,m,n) = buf_o(1+j,w)
1855  enddo
1856  enddo
1857  end do
1858 #else
1859  do i=loop_mn(1), loop_mx(1)
1860 
1861  si = floor( displacement(l,m) )
1862  if ( si == 0 ) then
1863  indm = 1
1864  else ! si = -1
1865  indm = 2
1866  end if
1867  alpha = displacement(l,m) - real(si, f64)
1868  if ( si == 0 ) then
1869  buf_i(n_loc+2,0) = r_halo(i,loop_mx(2)+1,k,l,m,n)
1870  else ! si = -1
1871  buf_i(indm,0) = l_halo(i,c_mn-1,k,l,m,n)
1872  end if
1873 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
1874  do j=0,n_loc-1
1875  buf_i(indm+1+j,0) = f6d(i,c_mn+j,k,l,m,n)
1876  enddo
1877  ! add local contributions to boundary conditions for the spline
1878 !DIR$ FORCEINLINE
1880  buf_i(indm+1:n_loc+indm,0), si, n_loc, &
1881  decomposition%local%bc_left(i,k,l,m,n), &
1882  decomposition%local%bc_right(i,k,l,m,n) )
1883  buf_i( 1, 0 ) = decomposition%local%bc_left(i,k,l,m,n)
1884  buf_i( ind_max, 0 ) = decomposition%local%bc_right(i,k,l,m,n)
1885  ! compute interpolant
1886 !DIR$ FORCEINLINE
1888  buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
1889  ! perform interpolation
1890 !DIR$ FORCEINLINE
1892  buf_i(:,0), alpha, n_loc, buf_o(:,0))
1893  ! copy-back interpolated values
1894 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
1895  do j=0,n_loc-1
1896  f6d(i,c_mn+j,k,l,m,n) = buf_o(1+j,0)
1897  enddo
1898  end do
1899 #endif
1900  end do
1901  end do
1902  end do
1903  end do
1904 !$omp end do
1905  deallocate(buf_i)
1906  deallocate(buf_o)
1907 !$omp end parallel
1908  call sll_s_deallocate_bc_buffers(decomposition)
1910 
1911 
1913  subroutine sll_s_advection_6d_spline_dd_slim_advect_eta1_dispeta45(self, topology, decomposition, displacement, f6d)
1914  type(sll_t_advection_6d_spline_dd_slim), intent(inout) :: self
1915  type(sll_t_cartesian_topology_6d), intent( in ) :: topology
1916  type(sll_t_decomposition_slim_6d), target, intent(inout) :: decomposition
1917  sll_real64, intent(in) :: displacement(&
1918  decomposition%local%mn(4):decomposition%local%mx(4), &
1919  decomposition%local%mn(5):decomposition%local%mx(5))
1920  sll_real64, intent(inout) :: f6d(&
1921  decomposition%local%mn(1):decomposition%local%mx(1), &
1922  decomposition%local%mn(2):decomposition%local%mx(2), &
1923  decomposition%local%mn(3):decomposition%local%mx(3), &
1924  decomposition%local%mn(4):decomposition%local%mx(4), &
1925  decomposition%local%mn(5):decomposition%local%mx(5), &
1926  decomposition%local%mn(6):decomposition%local%mx(6))
1927 
1928  sll_int32, parameter :: id = 1 ! eta2
1929  sll_int32 :: i,j,k,l,m,n,w
1930  sll_int32 :: c_mn
1931  sll_real64, allocatable :: buf_i(:,:)
1932  sll_real64, allocatable :: buf_o(:,:)
1933  sll_int32 :: n_loc, ind_max
1934  halo_dtype, pointer :: l_halo(:,:,:,:,:,:)
1935  halo_dtype, pointer :: r_halo(:,:,:,:,:,:)
1936  sll_int32 :: si
1937  sll_int32 :: indm
1938  sll_real64 :: alpha
1939  sll_int32 :: wx
1940  halo_dtype, allocatable :: d0(:), c_np2(:)
1941  sll_int32, pointer :: loop_mn(:), loop_mx(:)
1942 
1943  call sll_s_allocate_bc_buffers_6d(decomposition, id)
1944 
1945  ! cache values to avoid multiple dereferencing inside the loops
1946  c_mn = decomposition%local%mn(id)
1947  n_loc = decomposition%local%nw(id)
1948  loop_mn => decomposition%local%mn
1949  loop_mx => decomposition%local%mx
1950  ind_max = n_loc+3
1951  wx = get_wx(decomposition)
1952 
1953 !$omp parallel default(shared) private(i,j,k,l,m,n,w,buf_i,buf_o,d0,c_np2,si,alpha,indm)
1954  ! WARNING: We use zero based indexing for the cache blocking to avoid numerous "+1"
1955  ! in indexing operations, different from the other advector routines!!!
1956  allocate(buf_i(1:ind_max, 0:wx-1))
1957  allocate(buf_o(1:ind_max-1, 0:wx-1))
1958  allocate(d0(0:wx-1))
1959  allocate(c_np2(0:wx-1))
1960 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1961  ! Compute the remote part of the boundary conditions
1962  do n=loop_mn(6), loop_mx(6)
1963  do m=loop_mn(5), loop_mx(5)
1964  do l=loop_mn(4), loop_mx(4)
1965  do k=loop_mn(3), loop_mx(3)
1966 #ifndef DISABLE_CACHE_BLOCKING
1967  do j=loop_mn(2), loop_mx(2), wx
1968 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1969  ! do i=0,n_loc-1
1970  ! do w=0,wx-1
1971  ! buf_i(1+i,w) = f6d(c_mn+i,j+w,k,l,m,n)
1972  ! enddo
1973  ! enddo
1974  si = floor(displacement(l,m))
1975  if ( si> 0 ) print*, '######## si', si
1976  if (si<-1) print*, '####### si', si
1977  do w=0,wx-1
1978 !DIR$ FORCEINLINE
1980  f6d(:,j+w,k,l,m,n), si, n_loc, d0(w), c_np2(w) )
1981  !buf_i(1:n_loc,w), si, n_loc, d0(w), c_np2(w) )
1982  enddo
1983  do w=0,wx-1
1984  decomposition%local%bc_left_send(j+w,k,l,m,n) = c_np2(w)
1985  enddo
1986  do w=0,wx-1
1987  decomposition%local%bc_right_send(j+w,k,l,m,n) = d0(w)
1988  enddo
1989  end do
1990 #else
1991  do j=loop_mn(2), loop_mx(2)
1992  si = floor( displacement(l,m) )
1993  if ( si> 0 ) print*, '######## si', si
1994  if (si<-1) print*, '####### si', si
1995 ! buf_i(1:n_loc) = f6d(i,j,k,:,m,n)
1996  ! do i=0,n_loc-1
1997  ! buf_i(1+i,0) = f6d(c_mn+i,j,k,l,m,n)
1998  ! enddo
1999 !DIR$ FORCEINLINE
2001  f6d(:,j,k,l,m,n), si, n_loc, d0(0), c_np2(0) )
2002  ! buf_i(1:n_loc,0), si, n_loc, d0(0), c_np2(0) )
2003  decomposition%local%bc_left_send(j,k,l,m,n) = c_np2(0)
2004  decomposition%local%bc_right_send(j,k,l,m,n) = d0(0)
2005  end do
2006 #endif
2007  end do
2008  end do
2009  end do
2010  end do
2011 !$omp end do
2012  deallocate(c_np2)
2013  deallocate(d0)
2014 !$omp single
2015  ! Exchange boundary conditions
2016  ! TODO: restrict exchange to the actual range needed by the blocks
2017  call sll_s_apply_bc_exchange_slim_6d_real64(topology, decomposition, id)
2018  ! Exchange data for the neighboring cells
2020  decomposition, &
2021  f6d, &
2022  id, &
2023  1, &
2024  1)
2025  l_halo => decomposition%local%halo_left%buf
2026  r_halo => decomposition%local%halo_right%buf
2027 !$omp end single
2028 !$omp do OMP_COLLAPSE OMP_SCHEDULE
2029  do n=loop_mn(6), loop_mx(6)
2030  do m=loop_mn(5), loop_mx(5)
2031  do l=loop_mn(4), loop_mx(4)
2032  do k=loop_mn(3), loop_mx(3)
2033  !do j=loop_mn(2), loop_mx(2)
2034 #ifndef DISABLE_CACHE_BLOCKING
2035  do j=loop_mn(2), loop_mx(2), wx
2036  si = floor( displacement(l,m) )
2037  if ( si == 0 ) then
2038  indm = 1
2039  else ! si = -1
2040  indm = 2
2041  end if
2042  alpha = displacement(l,m) - real(si, f64)
2043  do w=0,wx-1
2044  if ( si == 0 ) then
2045  buf_i(n_loc+2,w) = r_halo(loop_mx(1)+1,j+w,k,l,m,n)
2046  else ! si = -1
2047  buf_i(indm,w) = l_halo(c_mn-1,j+w,k,l,m,n)
2048  end if
2049  enddo
2050 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
2051  do w=0,wx-1
2052  do i=0,n_loc-1
2053  buf_i(indm+1+i,w) = f6d(c_mn+i,j+w,k,l,m,n)
2054  enddo
2055  enddo
2056  ! add local contributions to boundary conditions for the spline
2057  do w=0,wx-1
2058 !DIR$ FORCEINLINE
2060  buf_i(indm+1:n_loc+indm,w), si, n_loc, &
2061  decomposition%local%bc_left(j+w,k,l,m,n), &
2062  decomposition%local%bc_right(j+w,k,l,m,n) )
2063  enddo
2064  do w=0,wx-1
2065  buf_i(1, w) = decomposition%local%bc_left(j+w,k,l,m,n)
2066  enddo
2067  do w=0,wx-1
2068  buf_i(ind_max, w) = decomposition%local%bc_right(j+w,k,l,m,n)
2069  enddo
2070  ! compute interpolant
2071  do w=0,wx-1
2072 !DIR$ FORCEINLINE
2074  buf_i(:,w), n_loc, buf_o(:,w), buf_i(:,w) )
2075  enddo
2076  ! perform interpolation
2077  do w=0,wx-1
2078 !DIR$ FORCEINLINE
2080  buf_i(:,w), alpha, n_loc, buf_o(:,w))
2081  enddo
2082  ! copy-back interpolated values
2083 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
2084  do w=0,wx-1
2085  do i=0,n_loc-1
2086  f6d(c_mn+i,j+w,k,l,m,n) = buf_o(1+i,w)
2087  enddo
2088  enddo
2089  end do
2090 #else
2091  do j=loop_mn(2), loop_mx(2)
2092  si = floor( displacement(l,m) )
2093  if ( si == 0 ) then
2094  indm = 1
2095  else ! si = -1
2096  indm = 2
2097  end if
2098  alpha = displacement(l,m) - real(si, f64)
2099  if ( si == 0 ) then
2100  buf_i(n_loc+2,0) = r_halo(loop_mx(1)+1,j,k,l,m,n)
2101  else ! si = -1
2102  buf_i(indm,0) = l_halo(c_mn-1,j,k,l,m,n)
2103  end if
2104 ! buf_i(indm+1:n_loc+indm) = f6d(i,j,k,:,m,n)
2105  do i=0,n_loc-1
2106  buf_i(indm+1+i,0) = f6d(c_mn+i,j,k,l,m,n)
2107  enddo
2108  ! add local contributions to boundary conditions for the spline
2109 !DIR$ FORCEINLINE
2111  buf_i(indm+1:n_loc+indm,0), si, n_loc, &
2112  decomposition%local%bc_left(j,k,l,m,n), &
2113  decomposition%local%bc_right(j,k,l,m,n) )
2114  buf_i( 1, 0 ) = decomposition%local%bc_left(j,k,l,m,n)
2115  buf_i( ind_max, 0 ) = decomposition%local%bc_right(j,k,l,m,n)
2116  ! compute interpolant
2117 !DIR$ FORCEINLINE
2119  buf_i(:,0), n_loc, buf_o(:,0), buf_i(:,0) )
2120  ! perform interpolation
2121 !DIR$ FORCEINLINE
2123  buf_i(:,0), alpha, n_loc, buf_o(:,0))
2124  ! copy-back interpolated values
2125 ! f6d(i,j,k,:,m,n) = buf_o(1:n_loc)!First until last -3
2126  do i=0,n_loc-1
2127  f6d(c_cm+i,j,k,l,m,n) = buf_o(1+i,0)
2128  enddo
2129  end do
2130 #endif
2131  end do
2132  end do
2133  end do
2134  end do
2135 !$omp end do
2136  deallocate(buf_i)
2137  deallocate(buf_o)
2138 !$omp end parallel
2139  call sll_s_deallocate_bc_buffers(decomposition)
2141 
2142 
Module implementing spline advection for the setting of a domain decomposition in 6d with extra buffe...
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta5(self, topology, decomposition, displacement, f6d)
Advection along eta4 with displacement dependent on eta1-eta3.
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta3(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta6(self, topology, decomposition, displacement, f6d)
Advection along eta5 with displacement dependent on eta1-eta3.
subroutine make_blocks_spline(ind, decomposition, disp, disp_int, halo_blocks, halo_width, n_halo_blocks)
Helper function to calculate the communication blocks for the spline interpolation.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta2_dispeta45(self, topology, decomposition, displacement, f6d)
Advection along eta2 with displacement dependent on eta4 and eta5.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta1_dispeta45(self, topology, decomposition, displacement, f6d)
Advection along eta1 with displacement dependent on eta4 and eta5.
integer(kind=i32) function get_wx(decomposition, id_in)
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta2(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_advect_eta4(self, topology, decomposition, displacement, f6d)
Advection along eta1 with displacement dependent on eta4.
subroutine, public sll_s_advection_6d_spline_dd_slim_init(self, decomposition, displacement_eta1, displacement_eta2, displacement_eta3)
subroutine, public sll_s_advection_6d_spline_dd_slim_fadvect_eta1(self, topology, decomposition, f6d)
Advection along eta1 with displacement dependent on eta4.
Fortran module where set some physical and mathematical constants.
real(kind=f64), parameter, public sll_p_twopi
Interpolator 1d using cubic splines on regular mesh with halo cells.
subroutine, public sll_s_cubic_spline_halo_1d_compute_interpolant(f, num_points, d, coeffs)
Compute the coefficients of the local interpolating spline (after d(0) and c(num_points+2) have been ...
subroutine, public sll_s_cubic_spline_halo_1d_finish_boundary_conditions(fdata, si, num_points, d_0, c_np2)
Complete d(0) and c(num_points+2) with local data after their values have been received from the neig...
subroutine, public sll_s_cubic_spline_halo_1d_prepare_exchange(fdata, si, num_points, d_0, c_np2)
Compute the part of d(0) and c(num_points+2) that need to be send to neighboring processors.
subroutine, public sll_s_cubic_spline_halo_1d_eval_disp(coeffs, alpha, num_points, fout)
This function corresponds to the interpolate_array_disp function of the interpolators but the displac...
Module providing data structures and tools to implement domain decompositions.
subroutine, public sll_s_apply_bc_exchange_slim_6d_real64(topo, decomp, id)
subroutine, public sll_s_deallocate_bc_buffers(decomp)
subroutine, public sll_s_allocate_bc_buffers_6d(decomp, id)
subroutine, public sll_f_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right)
subroutine, public sll_s_apply_halo_exchange_slim_6d_real64(topo, decomp, arr, id, hw_left, hw_right, halo_block)
subroutine, public sll_s_allocate_bc_buffers_6d_part(decomp, id, idx_mn, idx_mx)
Module for 1D interpolation and reconstruction.
Interpolator class and methods of Lagrange 1D interpolator.
integer(kind=i32), parameter, public sll_p_lagrange_fixed
Flag to specify Lagrange interpolation on a fixed interval centered around the point that is displace...
integer(kind=i32), parameter, public sll_p_lagrange_centered
Flag to specify Lagrange interpolation centered around the interpolation point.
Information on the 6D cartesian process topology.
6D decomposition, slim redesign, global array size information and local information.
Abstract class for 1D interpolation and reconstruction.
    Report Typos and Errors