Report Typos and Errors    
Semi-Lagrangian Library
Modular library for kinetic and gyrokinetic simulations of plasmas in fusion energy devices.
sll_m_poisson_3d_periodic_par.F90
Go to the documentation of this file.
1 
7 
8 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 #include "sll_assert.h"
10 #include "sll_memory.h"
11 #include "sll_working_precision.h"
12 
13  use sll_m_collective, only: &
16 
17  use sll_m_constants, only: &
18  sll_p_pi, &
20 
21  use sll_m_fft, only: &
27  sll_t_fft
28 
29  use sll_m_remapper, only: &
30  sll_o_apply_remap_3d, &
31  sll_o_compute_local_sizes, &
32  sll_o_get_layout_collective, &
33  sll_o_initialize_layout_with_distributed_array, &
34  sll_t_layout_3d, &
35  sll_o_local_to_global, &
36  sll_f_new_layout_3d, &
37  sll_o_new_remap_plan, &
38  sll_t_remap_plan_3d_comp64, &
39  sll_t_remap_plan_3d_real64, &
40  sll_o_delete, &
41  sll_s_factorize_in_two_powers_of_two
42 
43  use sll_m_utilities, only: &
44  sll_f_is_even, &
46 
47 #ifdef _OPENMP
48  use omp_lib
49 #define OMP_COLLAPSE collapse(2)
50 #define OMP_SCHEDULE schedule(static)
51 #endif
52 
53  implicit none
54 
55  public :: &
62 
63  private
64 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
65 
71  sll_int32 :: ncx
72  sll_int32 :: ncy
73  sll_int32 :: ncz
74  sll_real64 :: lx
75  sll_real64 :: ly
76  sll_real64 :: lz
77  sll_real64 :: dx
78  sll_real64 :: dy
79  sll_real64 :: dz
80  type(sll_t_fft) :: px
81  type(sll_t_fft) :: py
82  type(sll_t_fft) :: pz
83  type(sll_t_fft) :: px_inv
84  type(sll_t_fft) :: py_inv
85  type(sll_t_fft) :: pz_inv
86  type(sll_t_layout_3d), pointer :: layout_split
87  type(sll_t_layout_3d), pointer :: layout_x
88  type(sll_t_layout_3d), pointer :: layout_y
89  type(sll_t_layout_3d), pointer :: layout_z
90 
91  sll_int32, dimension(3, 3) :: loc_sizes
92  sll_comp64, dimension(:, :, :), pointer :: array_x
93  sll_comp64, dimension(:, :, :), pointer :: array_y
94  sll_comp64, dimension(:, :, :), pointer :: array_z
95  sll_comp64, dimension(:, :, :), pointer :: array_z1
96  sll_comp64, dimension(:, :, :), pointer :: array_z2
97  sll_comp64, allocatable :: array1d_x(:)
98  sll_comp64, allocatable :: array1d_y(:)
99  sll_comp64, allocatable :: array1d_z(:)
100  sll_real64, allocatable :: phi_x1(:, :, :)
101  sll_real64, allocatable :: phi_x2(:, :, :)
102  sll_real64, allocatable :: phi_x3(:, :, :)
103  sll_real64, allocatable :: ex_x1(:, :, :)
104  sll_real64, allocatable :: ey_x2(:, :, :)
105  sll_real64, allocatable :: ez_x3(:, :, :)
106 
107  type(sll_t_remap_plan_3d_comp64), pointer :: rmp3_xy
108  type(sll_t_remap_plan_3d_comp64), pointer :: rmp3_yz
109  type(sll_t_remap_plan_3d_comp64), pointer :: rmp3_zy
110  type(sll_t_remap_plan_3d_comp64), pointer :: rmp3_yx
111  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x1_to_split
112  type(sll_t_remap_plan_3d_real64), pointer :: rmp_split_to_x1
113  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x2_to_split
114  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x3_to_split
115  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x1_to_x2
116  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x2_to_x3
117  type(sll_t_remap_plan_3d_real64), pointer :: rmp_x3_to_x1
119 
120  ! Toggle thread parallelism of the routines
121  ! sll_s_poisson_3d_periodic_par_solve() and
122  ! sll_s_compute_e{x,y,z}_from_phi()
123  !(temporary switch introduced for code development work, may be removed again)
124  logical, parameter :: use_openmp_threading = .true.
125 
126 contains
127 
131  start_layout, &
132  ncx, &
133  ncy, &
134  ncz, &
135  Lx, &
136  Ly, &
137  Lz, &
138  plan)
139 
140  type(sll_t_layout_3d), pointer, intent(in) :: start_layout
141  sll_int32, intent(in) :: ncx
142  sll_int32, intent(in) :: ncy
143  sll_int32, intent(in) :: ncz
144  sll_real64, intent(in) :: lx
145  sll_real64, intent(in) :: ly
146  sll_real64, intent(in) :: lz
147  type(sll_t_poisson_3d_periodic_par), intent(out) :: plan
148 
149  sll_comp64 :: x(ncx)
150  sll_comp64 :: y(ncy)
151  sll_comp64 :: z(ncz)
152  sll_int64 :: colsz ! collective size
153  type(sll_t_collective_t), pointer :: collective
154  ! npx, npy, npz are the numbers of processors in directions x, y, z
155  sll_int32 :: npx, npy, npz
156  sll_int32 :: ierr
157  sll_int32, dimension(3, 3) :: loc_sizes
158  sll_real64, allocatable :: tmp(:, :, :)
159 
160  collective => sll_o_get_layout_collective(start_layout)
161  colsz = int(sll_f_get_collective_size(collective), i64)
162 
163  if (int(colsz, i32) > min(ncx*ncy, ncy*ncz, ncz*ncx)) then
164  print *, 'This test needs to run in a number of processes which', &
165  ' is less than or equal', min(ncx*ncy, ncy*ncz, ncz*ncx), ' in order to ', &
166  'be able properly split the arrays.'
167  print *, 'Exiting...'
168  stop
169  end if
170 
171  plan%ncx = ncx
172  plan%ncy = ncy
173  plan%ncz = ncz
174  plan%Lx = lx
175  plan%Ly = ly
176  plan%Lz = lz
177 
178  plan%dx = lx/real(ncx, f64)
179  plan%dy = ly/real(ncy, f64)
180  plan%dz = lz/real(ncz, f64)
181 
182  ! For FFTs (in each direction)
183  call sll_s_fft_init_c2c_1d(plan%px, ncx, x, x, sll_p_fft_forward)
184  call sll_s_fft_init_c2c_1d(plan%py, ncy, y, y, sll_p_fft_forward)
185  call sll_s_fft_init_c2c_1d(plan%pz, ncz, z, z, sll_p_fft_forward)
186 
187  ! For inverse FFTs (in each direction)
188  call sll_s_fft_init_c2c_1d(plan%px_inv, ncx, x, x, sll_p_fft_backward)
189  call sll_s_fft_init_c2c_1d(plan%py_inv, ncy, y, y, sll_p_fft_backward)
190  call sll_s_fft_init_c2c_1d(plan%pz_inv, ncz, z, z, sll_p_fft_backward)
191 
192  ! Layout and local sizes for FFTs in x-direction
193  plan%layout_split => start_layout
194 
195  ! Layout and local sizes for FFTs in x-direction
196  plan%layout_x => sll_f_new_layout_3d(collective)
197  npx = 1
198  call sll_s_factorize_in_two_powers_of_two(int(colsz, i32), npy, npz)
199 
200  call sll_o_initialize_layout_with_distributed_array( &
201  ncx, &
202  ncy, &
203  ncz, &
204  npx, &
205  npy, &
206  npz, &
207  plan%layout_x)
208  call sll_o_compute_local_sizes( &
209  plan%layout_x, &
210  loc_sizes(1, 1), &
211  loc_sizes(1, 2), &
212  loc_sizes(1, 3))
213 
214  ! Layout and local sizes for FFTs in y-direction
215  plan%layout_y => sll_f_new_layout_3d(collective)
216  npx = npy
217  npy = 1
218  call sll_o_initialize_layout_with_distributed_array( &
219  ncx, &
220  ncy, &
221  ncz, &
222  npx, &
223  npy, &
224  npz, &
225  plan%layout_y)
226 
227  call sll_o_compute_local_sizes( &
228  plan%layout_y, &
229  loc_sizes(2, 1), &
230  loc_sizes(2, 2), &
231  loc_sizes(2, 3))
232 
233  ! Layout and local sizes for FFTs in z-direction
234  plan%layout_z => sll_f_new_layout_3d(collective)
235  ! npx remains the same. Exchange npy and npz.
236  npy = npz
237  npz = 1
238  call sll_o_initialize_layout_with_distributed_array( &
239  ncx, &
240  ncy, &
241  ncz, &
242  npx, &
243  npy, &
244  npz, &
245  plan%layout_z)
246 
247  call sll_o_compute_local_sizes( &
248  plan%layout_z, &
249  loc_sizes(3, 1), &
250  loc_sizes(3, 2), &
251  loc_sizes(3, 3))
252 
253  plan%loc_sizes = loc_sizes
254 
255  sll_allocate(plan%array_x(loc_sizes(1, 1), loc_sizes(1, 2), loc_sizes(1, 3)), ierr)
256  sll_allocate(plan%array_y(loc_sizes(2, 1), loc_sizes(2, 2), loc_sizes(2, 3)), ierr)
257  sll_allocate(plan%array_z(loc_sizes(3, 1), loc_sizes(3, 2), loc_sizes(3, 3)), ierr)
258  sll_allocate(plan%array_z1(loc_sizes(3, 1), loc_sizes(3, 2), loc_sizes(3, 3)), ierr)
259  sll_allocate(plan%array_z2(loc_sizes(3, 1), loc_sizes(3, 2), loc_sizes(3, 3)), ierr)
260 
261  allocate (plan%array1d_x(loc_sizes(1, 1)))
262  allocate (plan%array1d_y(loc_sizes(2, 2)))
263  allocate (plan%array1d_z(loc_sizes(3, 3)))
264 
265  plan%rmp3_xy => sll_o_new_remap_plan(plan%layout_x, plan%layout_y, plan%array_x)
266  plan%rmp3_yz => sll_o_new_remap_plan(plan%layout_y, plan%layout_z, plan%array_y)
267  plan%rmp3_zy => sll_o_new_remap_plan(plan%layout_z, plan%layout_y, plan%array_z)
268  plan%rmp3_yx => sll_o_new_remap_plan(plan%layout_y, plan%layout_x, plan%array_y)
269 
270  sll_allocate(plan%phi_x1(loc_sizes(1, 1), loc_sizes(1, 2), loc_sizes(1, 3)), ierr)
271  sll_allocate(plan%phi_x2(loc_sizes(2, 1), loc_sizes(2, 2), loc_sizes(2, 3)), ierr)
272  sll_allocate(plan%phi_x3(loc_sizes(3, 1), loc_sizes(3, 2), loc_sizes(3, 3)), ierr)
273  sll_allocate(plan%ex_x1(loc_sizes(1, 1), loc_sizes(1, 2), loc_sizes(1, 3)), ierr)
274  sll_allocate(plan%ey_x2(loc_sizes(2, 1), loc_sizes(2, 2), loc_sizes(2, 3)), ierr)
275  sll_allocate(plan%ez_x3(loc_sizes(3, 1), loc_sizes(3, 2), loc_sizes(3, 3)), ierr)
276 
277  plan%rmp_x1_to_split => sll_o_new_remap_plan(plan%layout_x, plan%layout_split, plan%ex_x1)
278  plan%rmp_x2_to_split => sll_o_new_remap_plan(plan%layout_y, plan%layout_split, plan%ey_x2)
279  plan%rmp_x3_to_split => sll_o_new_remap_plan(plan%layout_z, plan%layout_split, plan%ez_x3)
280  plan%rmp_x1_to_x2 => sll_o_new_remap_plan(plan%layout_x, plan%layout_y, plan%ex_x1)
281  plan%rmp_x2_to_x3 => sll_o_new_remap_plan(plan%layout_y, plan%layout_z, plan%ey_x2)
282  plan%rmp_x3_to_x1 => sll_o_new_remap_plan(plan%layout_z, plan%layout_x, plan%ez_x3)
283 
284  call sll_o_compute_local_sizes( &
285  plan%layout_split, &
286  loc_sizes(1, 1), &
287  loc_sizes(1, 2), &
288  loc_sizes(1, 3))
289 
290  sll_allocate(tmp(loc_sizes(1, 1), loc_sizes(1, 2), loc_sizes(1, 3)), ierr)
291  plan%rmp_split_to_x1 => sll_o_new_remap_plan(plan%layout_split, plan%layout_x, tmp)
292 
294 
297  subroutine sll_s_poisson_3d_periodic_par_solve(plan, rho, phi)
298  type(sll_t_poisson_3d_periodic_par) :: plan
299  sll_real64, dimension(:, :, :) :: rho
300  sll_real64, dimension(:, :, :) :: phi
301  sll_int32 :: nx, ny, nz
302  ! nx, ny, nz are the numbers of points - 1 in directions x, y ,z
303  sll_int32 :: nx_loc, ny_loc, nz_loc
304  sll_int32 :: i, j, k, ijk(3)
305  sll_real64 :: lx, ly, lz
306  sll_real64 :: kx0, ky0, kz0
307  sll_real64 :: kx, ky, kz
308  sll_real64 :: ind_x, ind_y, ind_z
309  sll_real64 :: nxyz_inv
310  type(sll_t_layout_3d), pointer :: layout_x, layout_y, layout_z
311  sll_int32, dimension(1:3) :: global
312  sll_int32 :: gi, gj, gk
313  sll_comp64, allocatable :: stripe(:)
314 
315  ! Get geometry information
316  nx = plan%ncx
317  ny = plan%ncy
318  nz = plan%ncz
319  lx = plan%Lx
320  ly = plan%Ly
321  lz = plan%Lz
322  kx0 = sll_p_twopi/lx
323  ky0 = sll_p_twopi/ly
324  kz0 = sll_p_twopi/lz
325 
326  nxyz_inv = 1.0_f64/real(nx*ny*nz, f64)
327 
328  ! Get layouts to compute FFTs (in each direction)
329  layout_x => plan%layout_x
330  layout_y => plan%layout_y
331  layout_z => plan%layout_z
332 
333  if (use_openmp_threading) then
334 ! if (.false.) then
335 
336  ! FFTs in x-direction
337  nx_loc = plan%loc_sizes(1, 1)
338  ny_loc = plan%loc_sizes(1, 2)
339  nz_loc = plan%loc_sizes(1, 3)
340  call sll_o_apply_remap_3d(plan%rmp_split_to_x1, rho, plan%ex_x1)
341 
342 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc) private(stripe,j,k)
343  allocate (stripe(nx_loc))
344 !$omp do OMP_COLLAPSE OMP_SCHEDULE
345  do k = 1, nz_loc
346  do j = 1, ny_loc
347  stripe = cmplx(plan%ex_x1(:, j, k), 0_f64, kind=f64)
348  call sll_s_fft_exec_c2c_1d(plan%px, stripe, stripe)
349  plan%array_x(:, j, k) = stripe
350  end do
351  end do
352 !$omp end do
353  deallocate (stripe)
354 !$omp end parallel
355 
356  ! FFTs in y-direction
357  nx_loc = plan%loc_sizes(2, 1)
358  ny_loc = plan%loc_sizes(2, 2)
359  nz_loc = plan%loc_sizes(2, 3)
360  call sll_o_apply_remap_3d(plan%rmp3_xy, plan%array_x, plan%array_y)
361 
362 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc) private(stripe,i,k)
363  allocate (stripe(ny_loc))
364 !$omp do OMP_COLLAPSE OMP_SCHEDULE
365  do k = 1, nz_loc
366  do i = 1, nx_loc
367  stripe = plan%array_y(i, :, k)
368  call sll_s_fft_exec_c2c_1d(plan%py, stripe, stripe)
369  plan%array_y(i, :, k) = stripe
370  end do
371  end do
372 !$omp end do
373  deallocate (stripe)
374 !$omp end parallel
375 
376  ! FFTs in z-direction
377  nx_loc = plan%loc_sizes(3, 1)
378  ny_loc = plan%loc_sizes(3, 2)
379  nz_loc = plan%loc_sizes(3, 3)
380  call sll_o_apply_remap_3d(plan%rmp3_yz, plan%array_y, plan%array_z)
381 
382 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc, nxyz_inv) private(stripe,i,j)
383  allocate (stripe(nz_loc))
384 !$omp do OMP_COLLAPSE OMP_SCHEDULE
385  do j = 1, ny_loc
386  do i = 1, nx_loc
387  stripe = plan%array_z(i, j, :)
388  call sll_s_fft_exec_c2c_1d(plan%pz, stripe, stripe)
389  plan%array_z(i, j, :) = stripe*cmplx(nxyz_inv, 0.0_f64, kind=f64)
390  end do
391  end do
392 !$omp end do
393  deallocate (stripe)
394 !$omp end parallel
395 
396  ! move this normalization elsewhere where the modes are modified
397  ! update: was moved into the loop above
398 ! plan%array_z = plan%array_z/(nx*ny*nz)
399 
400  ! Compute hat_phi, phi = inv_fft(hat_phi)
401 !$omp parallel default(none) &
402 !$omp shared(plan, nx, ny, nz, kx0, ky0, kz0, layout_z, nx_loc, ny_loc, nz_loc) &
403 !$omp private(i, j, k, ijk, global, gi, gj, gk, kx, ky, kz, ind_x, ind_y, ind_z)
404 !$omp do OMP_COLLAPSE OMP_SCHEDULE
405  do k = 1, nz_loc
406  do j = 1, ny_loc
407  do i = 1, nx_loc
408  ijk(1) = i
409  ijk(2) = j
410  ijk(3) = k
411  global = sll_o_local_to_global(layout_z, ijk)
412  gi = global(1)
413  gj = global(2)
414  gk = global(3)
415  if ((gi == 1) .and. (gj == 1) .and. (gk == 1)) then
416  plan%array_z(1, 1, 1) = (0.0_f64, 0.0_f64)
417  else
418  if (gi <= nx/2) then
419  ind_x = real(gi - 1, f64)
420  else
421  ind_x = real(nx - (gi - 1), f64)
422  end if
423  if (gj <= ny/2) then
424  ind_y = real(gj - 1, f64)
425  else
426  ind_y = real(ny - (gj - 1), f64)
427  end if
428  if (gk <= nz/2) then
429  ind_z = real(gk - 1, f64)
430  else
431  ind_z = real(nz - (gk - 1), f64)
432  end if
433  kx = kx0*ind_x
434  ky = ky0*ind_y
435  kz = kz0*ind_z
436  plan%array_z(i, j, k) = plan%array_z(i, j, k)/ &
437  cmplx(kx**2 + ky**2 + kz**2, 0.0_f64, kind=f64)
438  end if
439  end do
440  end do
441  end do
442 !$omp end do
443 !$omp end parallel
444 
445  ! Inverse FFTs in z-direction
446 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc) private(stripe,i,j)
447  allocate (stripe(nz_loc))
448 !$omp do OMP_COLLAPSE OMP_SCHEDULE
449  do j = 1, ny_loc
450  do i = 1, nx_loc
451  stripe = plan%array_z(i, j, :)
452  call sll_s_fft_exec_c2c_1d(plan%pz_inv, stripe, stripe)
453  plan%array_z(i, j, :) = stripe
454  end do
455  end do
456 !$omp end do
457  deallocate (stripe)
458 !$omp end parallel
459 
460  ! Inverse FFTs in y-direction
461  nx_loc = plan%loc_sizes(2, 1)
462  ny_loc = plan%loc_sizes(2, 2)
463  nz_loc = plan%loc_sizes(2, 3)
464  call sll_o_apply_remap_3d(plan%rmp3_zy, plan%array_z, plan%array_y)
465 
466 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc) private(stripe,i,k)
467  allocate (stripe(ny_loc))
468 !$omp do OMP_COLLAPSE OMP_SCHEDULE
469  do k = 1, nz_loc
470  do i = 1, nx_loc
471  stripe = plan%array_y(i, :, k)
472  call sll_s_fft_exec_c2c_1d(plan%py_inv, stripe, stripe)
473  plan%array_y(i, :, k) = stripe
474  end do
475  end do
476 !$omp end do
477  deallocate (stripe)
478 !$omp end parallel
479 
480  ! Inverse FFTs in x-direction
481  nx_loc = plan%loc_sizes(1, 1)
482  ny_loc = plan%loc_sizes(1, 2)
483  nz_loc = plan%loc_sizes(1, 3)
484  call sll_o_apply_remap_3d(plan%rmp3_yx, plan%array_y, plan%array_x)
485 
486 !$omp parallel default(none) shared(plan, nx_loc, ny_loc, nz_loc, phi) private(stripe,j,k)
487  allocate (stripe(nx_loc))
488 !$omp do OMP_COLLAPSE OMP_SCHEDULE
489  do k = 1, nz_loc
490  do j = 1, ny_loc
491  stripe = plan%array_x(:, j, k)
492  call sll_s_fft_exec_c2c_1d(plan%px_inv, stripe, stripe)
493  phi(:, j, k) = real(stripe, f64)
494  end do
495  end do
496 !$omp end do
497  deallocate (stripe)
498 !$omp end parallel
499 
500 ! --- threaded version below uses a single parallel region but does not work (yet to be understood) ---
501 ! !$omp parallel default(none) &
502 ! !$omp shared(plan, nx, ny, nz, Lx, Ly, Lz, kx0, ky0, kz0) &
503 ! !$omp shared(layout_x, layout_y, layout_z, nxyz_inv, rho, phi) &
504 ! !$omp shared(nx_loc, ny_loc, nz_loc) &
505 ! !$omp private(i, j, k, ijk, global, gi, gj, gk, kx, ky, kz, ind_x, ind_y, ind_z) &
506 ! !$omp private(stripe)
507 !
508 ! ! FFTs in x-direction
509 ! !$omp master
510 ! nx_loc = plan%loc_sizes(1,1)
511 ! ny_loc = plan%loc_sizes(1,2)
512 ! nz_loc = plan%loc_sizes(1,3)
513 ! call sll_o_apply_remap_3d(plan%rmp_split_to_x1, rho, plan%ex_x1)
514 ! !$omp end master
515 ! !$omp barrier
516 !
517 ! allocate(stripe(nx_loc))
518 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
519 ! do k=1,nz_loc
520 ! do j=1,ny_loc
521 ! stripe = cmplx(plan%ex_x1(:,j,k), 0_f64, kind=f64)
522 ! call sll_s_fft_exec_c2c_1d(plan%px, stripe, stripe)
523 ! plan%array_x(:,j,k) = stripe
524 ! enddo
525 ! enddo
526 ! !$omp end do
527 ! deallocate(stripe)
528 !
529 ! ! FFTs in y-direction
530 ! !$omp barrier
531 ! !$omp master
532 ! nx_loc = plan%loc_sizes(2,1)
533 ! ny_loc = plan%loc_sizes(2,2)
534 ! nz_loc = plan%loc_sizes(2,3)
535 ! call sll_o_apply_remap_3d(plan%rmp3_xy, plan%array_x, plan%array_y)
536 ! !$omp end master
537 ! !$omp barrier
538 !
539 ! allocate(stripe(ny_loc))
540 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
541 ! do k=1,nz_loc
542 ! do i=1,nx_loc
543 ! stripe = plan%array_y(i,:,k)
544 ! call sll_s_fft_exec_c2c_1d(plan%py, stripe, stripe)
545 ! plan%array_y(i,:,k) = stripe
546 ! enddo
547 ! enddo
548 ! !$omp end do
549 ! deallocate(stripe)
550 !
551 ! ! FFTs in z-direction
552 ! !$omp barrier
553 ! !$omp master
554 ! nx_loc = plan%loc_sizes(3,1)
555 ! ny_loc = plan%loc_sizes(3,2)
556 ! nz_loc = plan%loc_sizes(3,3)
557 ! call sll_o_apply_remap_3d(plan%rmp3_yz, plan%array_y, plan%array_z)
558 ! !$omp end master
559 !
560 ! allocate(stripe(nz_loc))
561 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
562 ! do j=1,ny_loc
563 ! do i=1,nx_loc
564 ! stripe = plan%array_z(i,j,:)
565 ! call sll_s_fft_exec_c2c_1d(plan%pz, stripe, stripe)
566 ! plan%array_z(i,j,:) = stripe * nxyz_inv
567 ! enddo
568 ! enddo
569 ! !$omp end do
570 ! ! deallocate(stripe) is done below
571 !
572 ! ! move this normalization elsewhere where the modes are modified
573 ! ! update: was moved into the loop above
574 ! ! plan%array_z = plan%array_z/(nx*ny*nz)
575 !
576 ! ! Compute hat_phi, phi = inv_fft(hat_phi)
577 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
578 ! do k=1,nz_loc
579 ! do j=1,ny_loc
580 ! do i=1,nx_loc
581 ! ijk(1) = i
582 ! ijk(2) = j
583 ! ijk(3) = k
584 ! global = sll_o_local_to_global(layout_z, ijk)
585 ! gi = global(1)
586 ! gj = global(2)
587 ! gk = global(3)
588 ! if( (gi==1) .and. (gj==1) .and. (gk==1) ) then
589 ! plan%array_z(1,1,1) = (0.0_f64,0.0_f64)
590 ! else
591 ! if (gi<=nx/2) then
592 ! ind_x = real(gi-1,f64)
593 ! else
594 ! ind_x = real(nx-(gi-1),f64)
595 ! endif
596 ! if (gj<=ny/2) then
597 ! ind_y = real(gj-1,f64)
598 ! else
599 ! ind_y = real(ny-(gj-1),f64)
600 ! endif
601 ! if (gk<=nz/2) then
602 ! ind_z = real(gk-1,f64)
603 ! else
604 ! ind_z = real(nz-(gk-1),f64)
605 ! endif
606 ! kx = kx0*ind_x
607 ! ky = ky0*ind_y
608 ! kz = kz0*ind_z
609 ! plan%array_z(i,j,k) = plan%array_z(i,j,k)/(kx**2+ky**2+kz**2)
610 ! endif
611 ! enddo
612 ! enddo
613 ! enddo
614 ! !$omp end do
615 !
616 ! ! Inverse FFTs in z-direction
617 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
618 ! do j=1,ny_loc
619 ! do i=1,nx_loc
620 ! stripe = plan%array_z(i,j,:)
621 ! call sll_s_fft_exec_c2c_1d(plan%pz_inv, stripe, stripe)
622 ! plan%array_z(i,j,:) = stripe
623 ! enddo
624 ! enddo
625 ! !$omp end do
626 ! deallocate(stripe)
627 !
628 ! ! Inverse FFTs in y-direction
629 ! !$omp barrier
630 ! !$omp master
631 ! nx_loc = plan%loc_sizes(2,1)
632 ! ny_loc = plan%loc_sizes(2,2)
633 ! nz_loc = plan%loc_sizes(2,3)
634 ! call sll_o_apply_remap_3d( plan%rmp3_zy, plan%array_z, plan%array_y )
635 ! !$omp end master
636 ! !$omp barrier
637 !
638 ! allocate(stripe(ny_loc))
639 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
640 ! do k=1,nz_loc
641 ! do i=1,nx_loc
642 ! stripe = plan%array_y(i,:,k)
643 ! call sll_s_fft_exec_c2c_1d(plan%py_inv, stripe, stripe)
644 ! plan%array_y(i,:,k) = stripe
645 ! enddo
646 ! enddo
647 ! !$omp end do
648 ! deallocate(stripe)
649 !
650 ! ! Inverse FFTs in x-direction
651 ! !$omp barrier
652 ! !$omp master
653 ! nx_loc = plan%loc_sizes(1,1)
654 ! ny_loc = plan%loc_sizes(1,2)
655 ! nz_loc = plan%loc_sizes(1,3)
656 ! call sll_o_apply_remap_3d( plan%rmp3_yx, plan%array_y, plan%array_x )
657 ! !$omp end master
658 ! !$omp barrier
659 !
660 ! allocate(stripe(nx_loc))
661 ! !$omp do OMP_COLLAPSE OMP_SCHEDULE
662 ! do k=1,nz_loc
663 ! do j=1,ny_loc
664 ! stripe = plan%array_x(:,j,k)
665 ! call sll_s_fft_exec_c2c_1d(plan%px_inv, stripe, stripe)
666 ! ! plan%array_x(:,j,k) = stripe
667 ! phi(:,j,k) = real(stripe, f64)
668 ! enddo
669 ! enddo
670 ! !$omp end do
671 ! deallocate(stripe)
672 ! ! phi = real(plan%array_x, f64)
673 ! !$omp end parallel
674 
675  else
676  ! --- unthreaded version below ---
677 
678  ! FFTs in x-direction
679  nx_loc = plan%loc_sizes(1, 1)
680  ny_loc = plan%loc_sizes(1, 2)
681  nz_loc = plan%loc_sizes(1, 3)
682  call sll_o_apply_remap_3d(plan%rmp_split_to_x1, rho, plan%ex_x1)
683  do k = 1, nz_loc
684  do j = 1, ny_loc
685  plan%array1d_x = cmplx(plan%ex_x1(:, j, k), 0_f64, kind=f64)
686  call sll_s_fft_exec_c2c_1d(plan%px, plan%array1d_x, plan%array1d_x)
687  plan%array_x(:, j, k) = plan%array1d_x
688  end do
689  end do
690 
691  ! FFTs in y-direction
692  nx_loc = plan%loc_sizes(2, 1)
693  ny_loc = plan%loc_sizes(2, 2)
694  nz_loc = plan%loc_sizes(2, 3)
695  call sll_o_apply_remap_3d(plan%rmp3_xy, plan%array_x, plan%array_y)
696  do k = 1, nz_loc
697  do i = 1, nx_loc
698  plan%array1d_y = plan%array_y(i, :, k)
699  call sll_s_fft_exec_c2c_1d(plan%py, plan%array1d_y, plan%array1d_y)
700  plan%array_y(i, :, k) = plan%array1d_y
701  end do
702  end do
703 
704  ! FFTs in z-direction
705  nx_loc = plan%loc_sizes(3, 1)
706  ny_loc = plan%loc_sizes(3, 2)
707  nz_loc = plan%loc_sizes(3, 3)
708  call sll_o_apply_remap_3d(plan%rmp3_yz, plan%array_y, plan%array_z)
709  do j = 1, ny_loc
710  do i = 1, nx_loc
711  plan%array1d_z = plan%array_z(i, j, :)
712  call sll_s_fft_exec_c2c_1d(plan%pz, plan%array1d_z, plan%array1d_z)
713  plan%array_z(i, j, :) = plan%array1d_z
714  end do
715  end do
716 
717  ! move this normalization elsewhere where the modes are modified
718  plan%array_z = plan%array_z/cmplx(nx*ny*nz, 0.0_f64, f64)
719 
720  ! Compute hat_phi, phi = inv_fft(hat_phi)
721  do k = 1, nz_loc
722  do j = 1, ny_loc
723  do i = 1, nx_loc
724  ijk(1) = i
725  ijk(2) = j
726  ijk(3) = k
727  global = sll_o_local_to_global(layout_z, ijk)
728  global = sll_o_local_to_global(layout_z, (/i, j, k/))
729  gi = global(1)
730  gj = global(2)
731  gk = global(3)
732  if ((gi == 1) .and. (gj == 1) .and. (gk == 1)) then
733  plan%array_z(1, 1, 1) = (0.0_f64, 0.0_f64)
734  else
735  if (gi <= nx/2) then
736  ind_x = real(gi - 1, f64)
737  else
738  ind_x = real(nx - (gi - 1), f64)
739  end if
740  if (gj <= ny/2) then
741  ind_y = real(gj - 1, f64)
742  else
743  ind_y = real(ny - (gj - 1), f64)
744  end if
745  if (gk <= nz/2) then
746  ind_z = real(gk - 1, f64)
747  else
748  ind_z = real(nz - (gk - 1), f64)
749  end if
750  kx = kx0*ind_x
751  ky = ky0*ind_y
752  kz = kz0*ind_z
753  plan%array_z(i, j, k) = plan%array_z(i, j, k)/ &
754  cmplx(kx**2 + ky**2 + kz**2, 0.0_f64, kind=f64)
755  end if
756  end do
757  end do
758  end do
759 
760  ! Inverse FFTs in z-direction
761  do j = 1, ny_loc
762  do i = 1, nx_loc
763  plan%array1d_z = plan%array_z(i, j, :)
764  call sll_s_fft_exec_c2c_1d(plan%pz_inv, plan%array1d_z, plan%array1d_z)
765  plan%array_z(i, j, :) = plan%array1d_z
766  end do
767  end do
768 
769  ! Inverse FFTs in y-direction
770  nx_loc = plan%loc_sizes(2, 1)
771  ny_loc = plan%loc_sizes(2, 2)
772  nz_loc = plan%loc_sizes(2, 3)
773  call sll_o_apply_remap_3d(plan%rmp3_zy, plan%array_z, plan%array_y)
774  do k = 1, nz_loc
775  do i = 1, nx_loc
776  plan%array1d_y = plan%array_y(i, :, k)
777  call sll_s_fft_exec_c2c_1d(plan%py_inv, plan%array1d_y, plan%array1d_y)
778  plan%array_y(i, :, k) = plan%array1d_y
779  end do
780  end do
781 
782  ! Inverse FFTs in x-direction
783  nx_loc = plan%loc_sizes(1, 1)
784  ny_loc = plan%loc_sizes(1, 2)
785  nz_loc = plan%loc_sizes(1, 3)
786  call sll_o_apply_remap_3d(plan%rmp3_yx, plan%array_y, plan%array_x)
787  do k = 1, nz_loc
788  do j = 1, ny_loc
789  plan%array1d_x = plan%array_x(:, j, k)
790  call sll_s_fft_exec_c2c_1d(plan%px_inv, plan%array1d_x, plan%array1d_x)
791  plan%array_x(:, j, k) = plan%array1d_x
792  end do
793  end do
794  phi = real(plan%array_x, f64)
795  end if
797 
801  subroutine sll_s_poisson_3d_periodic_par_solve_e(plan, rho, ex, ey, ez)
802  type(sll_t_poisson_3d_periodic_par) :: plan
803  sll_real64, dimension(:, :, :) :: rho
804  sll_real64, dimension(:, :, :) :: ex
805  sll_real64, dimension(:, :, :) :: ey
806  sll_real64, dimension(:, :, :) :: ez
807  sll_int32 :: nx, ny, nz
808  ! nx, ny, nz are the numbers of points - 1 in directions x, y ,z
809  sll_int32 :: nx_loc, ny_loc, nz_loc
810  sll_int32 :: i, j, k
811  sll_real64 :: lx, ly, lz
812  sll_real64 :: kx0, ky0, kz0
813  sll_real64 :: kx, ky, kz
814  sll_real64 :: ind_x, ind_y, ind_z
815  type(sll_t_layout_3d), pointer :: layout_x
816  type(sll_t_layout_3d), pointer :: layout_y
817  type(sll_t_layout_3d), pointer :: layout_z
818  sll_int32, dimension(1:3) :: global
819  sll_int32 :: gi, gj, gk
820  sll_comp64 :: kxi
821 
822  ! Get geometry information
823  nx = plan%ncx
824  ny = plan%ncy
825  nz = plan%ncz
826  lx = plan%Lx
827  ly = plan%Ly
828  lz = plan%Lz
829  kx0 = sll_p_twopi/lx
830  ky0 = sll_p_twopi/ly
831  kz0 = sll_p_twopi/lz
832 
833  ! Get layouts to compute FFTs (in each direction)
834  layout_x => plan%layout_x
835  layout_y => plan%layout_y
836  layout_z => plan%layout_z
837 
838  call sll_o_apply_remap_3d(plan%rmp_split_to_x1, rho, plan%ex_x1)
839 
840  !call verify_argument_sizes_par(layout_x, rho, phi)
841 
842  ! FFTs in x-direction
843  nx_loc = plan%loc_sizes(1, 1)
844  ny_loc = plan%loc_sizes(1, 2)
845  nz_loc = plan%loc_sizes(1, 3)
846  !plan%array_x = cmplx(plan%ex_x1, 0_f64, kind=f64)
847  do k = 1, nz_loc
848  do j = 1, ny_loc
849  plan%array1d_x = cmplx(plan%ex_x1(:, j, k), 0_f64, kind=f64)
850  call sll_s_fft_exec_c2c_1d(plan%px, plan%array1d_x, plan%array1d_x)
851  !call sll_s_fft_apply_plan_c2c_1d(plan%px, plan%array_x(:,j,k), plan%array_x(:,j,k))
852  plan%array_x(:, j, k) = plan%array1d_x
853  end do
854  end do
855 
856  ! FFTs in y-direction
857  nx_loc = plan%loc_sizes(2, 1)
858  ny_loc = plan%loc_sizes(2, 2)
859  nz_loc = plan%loc_sizes(2, 3)
860  call sll_o_apply_remap_3d(plan%rmp3_xy, plan%array_x, plan%array_y)
861  do k = 1, nz_loc
862  do i = 1, nx_loc
863  plan%array1d_y = plan%array_y(i, :, k)
864  call sll_s_fft_exec_c2c_1d(plan%py, plan%array1d_y, plan%array1d_y)
865  plan%array_y(i, :, k) = plan%array1d_y
866  end do
867  end do
868 
869  ! FFTs in z-direction
870  nx_loc = plan%loc_sizes(3, 1)
871  ny_loc = plan%loc_sizes(3, 2)
872  nz_loc = plan%loc_sizes(3, 3)
873  call sll_o_apply_remap_3d(plan%rmp3_yz, plan%array_y, plan%array_z)
874  do j = 1, ny_loc
875  do i = 1, nx_loc
876  plan%array1d_z = plan%array_z(i, j, :)
877  call sll_s_fft_exec_c2c_1d(plan%pz, plan%array1d_z, plan%array1d_z)
878  plan%array_z(i, j, :) = plan%array1d_z
879  end do
880  end do
881 
882  ! move this normalization elsewhere where the modes are modified
883  plan%array_z = plan%array_z/cmplx(nx*ny*nz, 0.0_f64, f64)
884 
885  ! Compute hat_phi, phi = inv_fft(hat_phi)
886  do k = 1, nz_loc
887  do j = 1, ny_loc
888  do i = 1, nx_loc
889  global = sll_o_local_to_global(layout_z, (/i, j, k/))
890  gi = global(1)
891  gj = global(2)
892  gk = global(3)
893  if ((gi == 1) .and. (gj == 1) .and. (gk == 1)) then
894  plan%array_z(1, 1, 1) = (0.0_f64, 0.0_f64)
895  plan%array_z1(1, 1, 1) = (0.0_f64, 0.0_f64)
896  plan%array_z2(1, 1, 1) = (0.0_f64, 0.0_f64)
897  else
898  if (gi <= nx/2) then
899  ind_x = real(gi - 1, f64)
900  else
901  ind_x = real(nx - (gi - 1), f64)
902  end if
903  if (gj <= ny/2) then
904  ind_y = real(gj - 1, f64)
905  else
906  ind_y = real(ny - (gj - 1), f64)
907  end if
908  if (gk <= nz/2) then
909  ind_z = real(gk - 1, f64)
910  else
911  ind_z = real(nz - (gk - 1), f64)
912  end if
913  kx = kx0*ind_x
914  ky = ky0*ind_y
915  kz = kz0*ind_z
916  plan%array_z(i, j, k) = plan%array_z(i, j, k)/cmplx(kx**2 + ky**2 + kz**2, 0.0_f64, kind=f64)
917  !kxi = cmplx(0.0_f64, kx , kind=f64)
918  !plan%array_z1(i,j,k) = kxi * plan%array_z(i,j,k)
919  !kxi = cmplx(0.0_f64, ky , kind=f64)
920  !plan%array_z2(i,j,k) = kxi * plan%array_z(i,j,k)
921  kxi = cmplx(0.0_f64, -kz0*real(i - 1, f64), kind=f64)
922  plan%array_z(i, j, k) = kxi*plan%array_z(i, j, k)/ &
923  cmplx(nz*(nz/2 - 1), 0.0_f64, kind=f64)
924  !plan%array_z(i,j,k) = cmplx( kz * aimag(plan%array_z(i,j,k)), -kz * real(plan%array_z(i,j,k)), kind=f64)
925 
926  end if
927  end do
928  end do
929  end do
930 
931  ! Inverse FFTs in z-direction
932  do j = 1, ny_loc
933  do i = 1, nx_loc
934  plan%array1d_z = plan%array_z(i, j, :)
935  call sll_s_fft_exec_c2c_1d( &
936  plan%pz_inv, &
937  plan%array1d_z, &
938  plan%array1d_z)
939  plan%array_z(i, j, :) = plan%array1d_z
940  end do
941  end do
942 
943  ! Inverse FFTs in y-direction
944  nx_loc = plan%loc_sizes(2, 1)
945  ny_loc = plan%loc_sizes(2, 2)
946  nz_loc = plan%loc_sizes(2, 3)
947  call sll_o_apply_remap_3d(plan%rmp3_zy, plan%array_z, plan%array_y)
948  do k = 1, nz_loc
949  do i = 1, nx_loc
950  plan%array1d_y = plan%array_y(i, :, k)
951  call sll_s_fft_exec_c2c_1d( &
952  plan%py_inv, &
953  plan%array1d_y, &
954  plan%array1d_y)
955  plan%array_y(i, :, k) = plan%array1d_y
956  end do
957  end do
958 
959  ! Inverse FFTs in x-direction
960  nx_loc = plan%loc_sizes(1, 1)
961  ny_loc = plan%loc_sizes(1, 2)
962  nz_loc = plan%loc_sizes(1, 3)
963  call sll_o_apply_remap_3d(plan%rmp3_yx, plan%array_y, plan%array_x)
964  do k = 1, nz_loc
965  do j = 1, ny_loc
966  plan%array1d_x = plan%array_x(:, j, k)
967  call sll_s_fft_exec_c2c_1d( &
968  plan%px_inv, &
969  plan%array1d_x, &
970  plan%array1d_x)
971  plan%array_x(:, j, k) = plan%array1d_x
972  end do
973  end do
974 
975  !phi = real(plan%array_x, f64)
976 
977  call sll_o_apply_remap_3d(plan%rmp_x1_to_split, real(plan%array_x, f64), ez)
978 
980 
981  subroutine sll_s_poisson_3d_periodic_par_compute_e_from_phi(plan, phi, ex, ey, ez)
982  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
983  sll_real64, intent(in) :: phi(:, :, :)
984  sll_real64, intent(out) :: ex(:, :, :)
985  sll_real64, intent(out) :: ey(:, :, :)
986  sll_real64, intent(out) :: ez(:, :, :)
987 
988  ! compute the values of the electric field. phi is configured for
989  ! sequential operations in x1, thus we start by computing the E_x
990  ! component.
991  call sll_s_compute_ex_from_phi(plan, phi, plan%ex_x1)
992  call sll_o_apply_remap_3d(plan%rmp_x1_to_split, plan%ex_x1, ex)
993 
994  call sll_o_apply_remap_3d(plan%rmp_x1_to_x2, phi, plan%phi_x2)
995  call sll_s_compute_ey_from_phi(plan, plan%phi_x2, plan%ey_x2)
996  call sll_o_apply_remap_3d(plan%rmp_x2_to_split, plan%ey_x2, ey)
997 
998  call sll_o_apply_remap_3d(plan%rmp_x2_to_x3, plan%phi_x2, plan%phi_x3)
999  call sll_s_compute_ez_from_phi(plan, plan%phi_x3, plan%ez_x3)
1000  call sll_o_apply_remap_3d(plan%rmp_x3_to_split, plan%ez_x3, ez)
1002 
1005  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1006  sll_real64, intent(in) :: phi(:, :, :)
1007  sll_real64, intent(out) :: ex(:, :, :)
1008  sll_real64, intent(out) :: ey(:, :, :)
1009  sll_real64, intent(out) :: ez(:, :, :)
1010 
1011  ! compute the values of the electric field. rho is configured for
1012  ! sequential operations in x3, thus we start by computing the E_x
1013  ! component.
1014  call sll_s_compute_ez_from_phi(plan, phi, plan%ez_x3)
1015  call sll_o_apply_remap_3d(plan%rmp_x3_to_split, plan%ez_x3, ez)
1016 
1017  call sll_o_apply_remap_3d(plan%rmp_x3_to_x1, phi, plan%phi_x1)
1018  call sll_s_compute_ex_from_phi(plan, plan%phi_x1, plan%ex_x1)
1019  call sll_o_apply_remap_3d(plan%rmp_x1_to_split, plan%ex_x1, ex)
1020 
1021  call sll_o_apply_remap_3d(plan%rmp_x1_to_x2, plan%phi_x1, plan%phi_x2)
1022  call sll_s_compute_ey_from_phi(plan, plan%phi_x2, plan%ey_x2)
1023  call sll_o_apply_remap_3d(plan%rmp_x2_to_split, plan%ey_x2, ey)
1025 
1028  type(sll_t_poisson_3d_periodic_par) :: plan
1029  sll_int32 :: ierr
1030 
1031  call sll_s_fft_free(plan%px)
1032  call sll_s_fft_free(plan%py)
1033  call sll_s_fft_free(plan%pz)
1034 
1035  call sll_s_fft_free(plan%px_inv)
1036  call sll_s_fft_free(plan%py_inv)
1037  call sll_s_fft_free(plan%pz_inv)
1038 
1039  call sll_o_delete(plan%layout_x)
1040  call sll_o_delete(plan%layout_y)
1041  call sll_o_delete(plan%layout_z)
1042 
1043  sll_deallocate_array(plan%array_x, ierr)
1044  sll_deallocate_array(plan%array_y, ierr)
1045  sll_deallocate_array(plan%array_z, ierr)
1046  end subroutine sll_s_poisson_3d_periodic_par_free
1047 
1049  subroutine verify_argument_sizes_par(layout, rho, phi)
1050  type(sll_t_layout_3d), pointer :: layout
1051  sll_real64, dimension(:, :, :) :: rho
1052  sll_real64, dimension(:, :, :) :: phi
1053  sll_int32, dimension(3) :: n ! nx_loc, ny_loc, nz_loc
1054  sll_int32 :: i
1055 
1056  call sll_o_compute_local_sizes(layout, n(1), n(2), n(3))
1057 
1058  do i = 1, 3
1059  if ((n(i) /= size(rho, i)) .or. (n(i) /= size(phi, i))) then
1060  print *, 'Input sizes passed to sll_s_poisson_3d_periodic_par_solve do not match'
1061  if (i == 1) then
1062  print *, 'Input sizes passed to "sll_s_poisson_3d_periodic_par_solve" ', &
1063  'do not match in direction x'
1064  elseif (i == 2) then
1065  print *, 'Input sizes passed to "sll_s_poisson_3d_periodic_par_solve" ', &
1066  'do not match in direction y'
1067  else
1068  print *, 'Input sizes passed to "sll_s_poisson_3d_periodic_par_solve" ', &
1069  'do not match in direction z'
1070  end if
1071  print *, 'Exiting...'
1072  stop
1073  end if
1074  end do
1075  end subroutine verify_argument_sizes_par
1076 
1079  subroutine sll_s_compute_ex_from_phi(plan, phi, ex)
1080  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1081  sll_real64, dimension(:, :, :), intent(in) :: phi
1082  sll_real64, dimension(:, :, :), intent(out) :: ex
1083  sll_int32 :: nx
1084  ! nx, ny, nz are the numbers of points - 1 in directions x, y ,z
1085  sll_int32 :: nx_loc, ny_loc, nz_loc
1086  sll_int32 :: i, j, k
1087  sll_real64 :: kx0
1088  sll_comp64 :: norm_fac
1089  sll_comp64, allocatable :: stripe(:)
1090 
1091  ! Get geometry information
1092  nx = plan%ncx
1093  kx0 = sll_p_twopi/plan%Lx
1094  norm_fac = cmplx(1.0_f64/real(nx, f64), 0.0_f64, f64)
1095 
1096  nx_loc = plan%loc_sizes(1, 1)
1097  ny_loc = plan%loc_sizes(1, 2)
1098  nz_loc = plan%loc_sizes(1, 3)
1099 
1100  if (use_openmp_threading) then
1101  ! Threaded implementation, using thread-local temporary arrays.
1102 !$omp parallel default(none) &
1103 !$omp shared(plan, kx0, nx, nx_loc, ny_loc, nz_loc, norm_fac, ex, phi) &
1104 !$omp private(i,j,k,stripe)
1105  allocate (stripe(nx_loc))
1106 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1107  do k = 1, nz_loc
1108  do j = 1, ny_loc
1109  stripe = cmplx(phi(:, j, k), 0_f64, kind=f64)
1110  call sll_s_fft_exec_c2c_1d(plan%px, stripe, stripe)
1111  do i = 1, nx_loc/2
1112  stripe(i) = stripe(i)*cmplx(0_f64, -kx0*real(i - 1, f64), kind=f64) &
1113  *norm_fac
1114  end do
1115  do i = nx_loc/2 + 1, nx_loc
1116  stripe(i) = stripe(i)*cmplx(0_f64, kx0*real(nx - i + 1, f64), kind=f64) &
1117  *norm_fac
1118  end do
1119  call sll_s_fft_exec_c2c_1d(plan%px_inv, stripe, stripe)
1120  ex(:, j, k) = real(stripe, f64)
1121  end do
1122  end do
1123 !$omp end do
1124  deallocate (stripe)
1125 !$omp end parallel
1126  else
1127  ! --- previous unthreaded implementation, using internal temporary arrays ---
1128  do k = 1, nz_loc
1129  do j = 1, ny_loc
1130  plan%array1d_x = cmplx(phi(:, j, k), 0_f64, kind=f64)
1131  call sll_s_fft_exec_c2c_1d(plan%px, plan%array1d_x, plan%array1d_x)
1132  do i = 1, nx_loc/2
1133  plan%array_x(i, j, k) = plan%array1d_x(i)* &
1134  cmplx(0_f64, -kx0*real(i - 1, f64), kind=f64) &
1135  *norm_fac
1136  end do
1137  do i = nx_loc/2 + 1, nx_loc
1138  plan%array_x(i, j, k) = plan%array1d_x(i)* &
1139  cmplx(0_f64, kx0*real(nx - i + 1, f64), kind=f64) &
1140  *norm_fac
1141  end do
1142  end do
1143  end do
1144  do k = 1, nz_loc
1145  do j = 1, ny_loc
1146  plan%array1d_x = plan%array_x(:, j, k)
1147  call sll_s_fft_exec_c2c_1d( &
1148  plan%px_inv, &
1149  plan%array1d_x, &
1150  plan%array1d_x)
1151  plan%array_x(:, j, k) = plan%array1d_x
1152  end do
1153  end do
1154  ex = real(plan%array_x, f64)
1155  end if
1156  end subroutine sll_s_compute_ex_from_phi
1157 
1160  subroutine sll_s_compute_ey_from_phi(plan, phi, ey)
1161  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1162  sll_real64, dimension(:, :, :), intent(in) :: phi
1163  sll_real64, dimension(:, :, :), intent(out) :: ey
1164  sll_int32 :: ny
1165  ! nx, ny, nz are the numbers of points - 1 in directions x, y ,z
1166  sll_int32 :: nx_loc, ny_loc, nz_loc
1167  sll_int32 :: i, j, k
1168  sll_real64 :: ky0
1169  sll_comp64 :: norm_fac
1170  sll_comp64, allocatable :: stripe(:)
1171 
1172  ! Get geometry information
1173  ny = plan%ncy
1174  ky0 = sll_p_twopi/plan%Ly
1175  norm_fac = cmplx(1.0_f64/real(ny, f64), 0.0_f64, f64)
1176 
1177  nx_loc = plan%loc_sizes(2, 1)
1178  ny_loc = plan%loc_sizes(2, 2)
1179  nz_loc = plan%loc_sizes(2, 3)
1180 
1181  if (use_openmp_threading) then
1182  ! Threaded implementation, using thread-local temporary arrays.
1183  ! TODO: Implement a simple iteration count condition when to use threads.
1184 !$omp parallel default(none) &
1185 !$omp shared(plan, ky0, ny, nx_loc, ny_loc, nz_loc, norm_fac, ey, phi) &
1186 !$omp private(i,j,k,stripe)
1187  allocate (stripe(ny_loc))
1188 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1189  do k = 1, nz_loc
1190  do i = 1, nx_loc
1191  stripe = cmplx(phi(i, :, k), 0_f64, kind=f64)
1192  call sll_s_fft_exec_c2c_1d(plan%py, stripe, stripe)
1193  do j = 1, ny_loc/2
1194  stripe(j) = stripe(j)*cmplx(0_f64, -ky0*real(j - 1, f64), kind=f64) &
1195  *norm_fac
1196  end do
1197  do j = ny_loc/2 + 1, ny_loc
1198  stripe(j) = stripe(j)*cmplx(0_f64, ky0*real(ny - j + 1, f64), kind=f64) &
1199  *norm_fac
1200  end do
1201  call sll_s_fft_exec_c2c_1d(plan%py_inv, stripe, stripe)
1202  ey(i, :, k) = real(stripe, f64)
1203  end do
1204  end do
1205 !$omp end do
1206  deallocate (stripe)
1207 !$omp end parallel
1208  else
1209  ! --- previous unthreaded implementation, using internal temporary arrays ---
1210  do k = 1, nz_loc
1211  do i = 1, nx_loc
1212  plan%array1d_y = cmplx(phi(i, :, k), 0_f64, kind=f64)
1213  call sll_s_fft_exec_c2c_1d(plan%py, plan%array1d_y, plan%array1d_y)
1214  do j = 1, ny_loc/2
1215  plan%array_y(i, j, k) = plan%array1d_y(j) &
1216  *cmplx(0_f64, -ky0*real(j - 1, f64), kind=f64) &
1217  *norm_fac
1218  end do
1219  do j = ny_loc/2 + 1, ny_loc
1220  plan%array_y(i, j, k) = plan%array1d_y(j) &
1221  *cmplx(0_f64, ky0*real(ny - j + 1, f64), kind=f64) &
1222  *norm_fac
1223  end do
1224  end do
1225  end do
1226  do k = 1, nz_loc
1227  do i = 1, nx_loc
1228  plan%array1d_y = plan%array_y(i, :, k)
1229  call sll_s_fft_exec_c2c_1d( &
1230  plan%py_inv, &
1231  plan%array1d_y, &
1232  plan%array1d_y)
1233  plan%array_y(i, :, k) = plan%array1d_y
1234  end do
1235  end do
1236  ey = real(plan%array_y, f64)
1237  end if
1238  end subroutine sll_s_compute_ey_from_phi
1239 
1242  subroutine sll_s_compute_ez_from_phi(plan, phi, ez)
1243  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1244  sll_real64, dimension(:, :, :), intent(in) :: phi
1245  sll_real64, dimension(:, :, :), intent(out) :: ez
1246  sll_int32 :: nz
1247  ! nx, ny, nz are the numbers of points - 1 in directions x, y ,z
1248  sll_int32 :: nx_loc, ny_loc, nz_loc
1249  sll_int32 :: i, j, k
1250  sll_real64 :: kz0
1251  sll_comp64 :: norm_fac
1252  sll_comp64, allocatable :: stripe(:)
1253 
1254  ! Get geometry information
1255  nz = plan%ncz
1256  kz0 = sll_p_twopi/plan%Lz
1257  norm_fac = cmplx(1.0_f64/real(nz, f64), 0.0_f64, f64)
1258 
1259  nx_loc = plan%loc_sizes(3, 1)
1260  ny_loc = plan%loc_sizes(3, 2)
1261  nz_loc = plan%loc_sizes(3, 3)
1262 
1263  if (use_openmp_threading) then
1264  ! Threaded implementation, using thread-local temporary arrays.
1265  ! TODO: Implement a simple iteration count condition when to use threads.
1266 !$omp parallel default(none) &
1267 !$omp shared(plan, kz0, nz, nx_loc, ny_loc, nz_loc, norm_fac, ez, phi) &
1268 !$omp private(i,j,k,stripe)
1269  allocate (stripe(nz_loc))
1270 !$omp do OMP_COLLAPSE OMP_SCHEDULE
1271  do j = 1, ny_loc
1272  do i = 1, nx_loc
1273  stripe = cmplx(phi(i, j, :), 0_f64, kind=f64)
1274  call sll_s_fft_exec_c2c_1d(plan%pz, stripe, stripe)
1275  do k = 1, nz_loc/2
1276  stripe(k) = stripe(k) &
1277  *cmplx(0_f64, -kz0*real(k - 1, f64), kind=f64)*norm_fac
1278  end do
1279  do k = nz_loc/2 + 1, nz_loc
1280  stripe(k) = stripe(k) &
1281  *cmplx(0_f64, kz0*real(nz - k + 1, f64), kind=f64)*norm_fac
1282  end do
1283  call sll_s_fft_exec_c2c_1d(plan%pz_inv, stripe, stripe)
1284  ez(i, j, :) = real(stripe, f64)
1285  end do
1286  end do
1287 !$omp end do
1288  deallocate (stripe)
1289 !$omp end parallel
1290  else
1291  ! --- previous unthreaded implementation, using internal temporary arrays ---
1292  do j = 1, ny_loc
1293  do i = 1, nx_loc
1294  plan%array1d_z = cmplx(phi(i, j, :), 0_f64, kind=f64)
1295  call sll_s_fft_exec_c2c_1d(plan%pz, plan%array1d_z, plan%array1d_z)
1296  do k = 1, nz_loc/2
1297  plan%array_z(i, j, k) = plan%array1d_z(k) &
1298  *cmplx(0_f64, -kz0*real(k - 1, f64), kind=f64) &
1299  *norm_fac
1300  end do
1301  do k = nz_loc/2 + 1, nz_loc
1302  plan%array_z(i, j, k) = plan%array1d_z(k) &
1303  *cmplx(0_f64, kz0*real(nz - k + 1, f64), kind=f64) &
1304  *norm_fac
1305  end do
1306  end do
1307  end do
1308  do j = 1, ny_loc
1309  do i = 1, nx_loc
1310  plan%array1d_z = plan%array_z(i, j, :)
1311  call sll_s_fft_exec_c2c_1d( &
1312  plan%pz_inv, &
1313  plan%array1d_z, &
1314  plan%array1d_z)
1315  plan%array_z(i, j, :) = plan%array1d_z
1316  end do
1317  end do
1318  ez = real(plan%array_z, f64)
1319  end if
1320  end subroutine sll_s_compute_ez_from_phi
1321 
1322  !------------------------------------------------------------------------------!
1323  ! From here alternative function based on finite differences.
1324  ! This function only sets the Ex component of the electric field.
1326  plan, &
1327  phi_x1, &
1328  efield_x1)
1329  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1330  sll_real64, intent(in) :: phi_x1(:, :, :)
1331  sll_real64, intent(out) :: efield_x1(:, :, :)
1332 
1333  ! local variables
1334  sll_int32 :: num_pts_x1
1335  sll_int32 :: i
1336  sll_int32 :: j
1337  sll_int32 :: k
1338  sll_real64 :: r_delta
1339  sll_real64 :: ex
1340  ! FIXME: arg checking
1341 
1342  num_pts_x1 = plan%loc_sizes(1, 1)
1343 
1344  r_delta = 1.0_f64/plan%dx
1345 
1346  ! Compute the electric field values on the left and right edges.
1347  do k = 1, plan%loc_sizes(1, 3)
1348  do j = 1, plan%loc_sizes(1, 2)
1349  ! i=1 plane:
1350  ex = r_delta*(-1.5_f64*phi_x1(1, j, k) + &
1351  2.0_f64*phi_x1(2, j, k) - &
1352  0.5_f64*phi_x1(3, j, k))
1353  efield_x1(1, j, k) = -ex
1354  ! i=num_pts_x1 plane:
1355  ex = r_delta*(0.5_f64*phi_x1(num_pts_x1 - 2, j, k) - &
1356  2.0_f64*phi_x1(num_pts_x1 - 1, j, k) + &
1357  1.5_f64*phi_x1(num_pts_x1, j, k))
1358  efield_x1(num_pts_x1, j, k) = -ex
1359  end do
1360  end do
1361 
1362  ! Electric field in interior points
1363  do k = 1, plan%loc_sizes(1, 3)
1364  do j = 1, plan%loc_sizes(1, 2)
1365  do i = 2, num_pts_x1 - 1
1366  ex = r_delta*0.5_f64*(phi_x1(i + 1, j, k) - phi_x1(i - 1, j, k))
1367  efield_x1(i, j, k) = -ex
1368  end do
1369  end do
1370  end do
1371 
1372  end subroutine compute_electric_field_x1_3d
1373 
1374  ! This function only sets the Ey component of the electric field.
1376  plan, &
1377  phi_x2, &
1378  efield_x2)
1379  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1380  sll_real64, intent(in) :: phi_x2(:, :, :)
1381  sll_real64, intent(out) :: efield_x2(:, :, :)
1382 
1383  ! local variables
1384  sll_int32 :: num_pts_x2
1385  sll_int32 :: i
1386  sll_int32 :: j
1387  sll_int32 :: k
1388  sll_real64 :: r_delta
1389  sll_real64 :: ey
1390 
1391  ! FIXME: arg checking
1392  num_pts_x2 = plan%loc_sizes(2, 2)
1393  r_delta = 1.0_f64/plan%dy
1394 
1395  ! Compute the electric field values on the bottom and top edges.
1396  do k = 1, plan%loc_sizes(2, 3)
1397  do i = 1, plan%loc_sizes(2, 1)
1398  ! bottom:
1399  ey = r_delta*(-1.5_f64*phi_x2(i, 1, k) + 2.0_f64*phi_x2(i, 2, k) - &
1400  0.5_f64*phi_x2(i, 3, k))
1401  efield_x2(i, 1, k) = -ey
1402  ! top:
1403  ey = r_delta*(0.5_f64*phi_x2(i, num_pts_x2 - 2, k) - &
1404  2.0_f64*phi_x2(i, num_pts_x2 - 1, k) + &
1405  1.5_f64*phi_x2(i, num_pts_x2, k))
1406  efield_x2(i, num_pts_x2, k) = -ey
1407  end do
1408  end do
1409 
1410  ! Electric field in interior points
1411  do k = 1, plan%loc_sizes(2, 3)
1412  do j = 2, num_pts_x2 - 1
1413  do i = 1, plan%loc_sizes(2, 1)
1414  ey = r_delta*0.5_f64*(phi_x2(i, j + 1, k) - phi_x2(i, j - 1, k))
1415  efield_x2(i, j, k) = -ey
1416  end do
1417  end do
1418  end do
1419 
1420  end subroutine compute_electric_field_x2_3d
1421 
1422  ! This function only sets the Ey component of the electric field.
1424  plan, &
1425  phi_x3, &
1426  efield_x3)
1427  type(sll_t_poisson_3d_periodic_par), intent(inout) :: plan
1428  sll_real64, intent(in) :: phi_x3(:, :, :)
1429  sll_real64, intent(out) :: efield_x3(:, :, :)
1430 
1431  ! local variables
1432  sll_int32 :: num_pts_x1
1433  sll_int32 :: num_pts_x2
1434  sll_int32 :: num_pts_x3
1435  sll_int32 :: i
1436  sll_int32 :: j
1437  sll_int32 :: k
1438  sll_real64 :: r_delta
1439  sll_real64 :: ez
1440 
1441  ! FIXME: arg checking
1442  num_pts_x1 = plan%loc_sizes(3, 1)
1443  num_pts_x2 = plan%loc_sizes(3, 2)
1444  num_pts_x3 = plan%loc_sizes(3, 3)
1445 
1446  r_delta = 1.0_f64/plan%dz
1447 
1448  ! Compute the electric field values on the end faces.
1449  do j = 1, num_pts_x2
1450  do i = 1, num_pts_x1
1451  ez = r_delta*(-1.5_f64*phi_x3(i, j, 1) + 2.0_f64*phi_x3(i, j, 2) - &
1452  0.5_f64*phi_x3(i, j, 3))
1453  efield_x3(i, j, 1) = -ez
1454  ! top:
1455  ez = r_delta*(0.5_f64*phi_x3(i, j, num_pts_x3 - 2) - &
1456  2.0_f64*phi_x3(i, j, num_pts_x3 - 1) + &
1457  1.5_f64*phi_x3(i, j, num_pts_x3))
1458  efield_x3(i, j, num_pts_x3) = -ez
1459  end do
1460  end do
1461 
1462  ! Electric field in interior points
1463  do k = 2, num_pts_x3 - 1
1464  do j = 1, num_pts_x2
1465  do i = 1, num_pts_x1
1466  ez = r_delta*0.5_f64*(phi_x3(i, j, k + 1) - phi_x3(i, j, k - 1))
1467  efield_x3(i, j, k) = -ez
1468  end do
1469  end do
1470  end do
1471 
1472  end subroutine compute_electric_field_x3_3d
1473 
Parallelizing facility.
integer(kind=i32) function, public sll_f_get_collective_size(col)
Determines the size of the group associated with a communicator.
Fortran module where set some physical and mathematical constants.
real(kind=f64), parameter, public sll_p_pi
real(kind=f64), parameter, public sll_p_twopi
FFT interface for FFTW.
subroutine, public sll_s_fft_free(plan)
Delete a plan.
integer, parameter, public sll_p_fft_forward
Flag to specify forward FFT (negative sign) [value -1].
subroutine, public sll_s_fft_exec_c2c_1d(plan, array_in, array_out)
Compute fast Fourier transform in complex to complex mode.
subroutine, public sll_s_fft_init_c2c_1d(plan, nx, array_in, array_out, direction, normalized, aligned, optimization)
Create new 1d complex to complex plan.
integer, parameter, public sll_p_fft_backward
Flag to specify backward FFT (positive sign) [value 1].
Periodic 3D poisson solver (parallel version)
subroutine, public sll_s_poisson_3d_periodic_par_solve(plan, rho, phi)
Compute the 3d potential from the Poisson equation with periodic boundary conditions.
subroutine sll_s_compute_ez_from_phi(plan, phi, ez)
Compute the 3d potential from the Poisson equation with periodic boundary conditions.
subroutine, public sll_s_poisson_3d_periodic_par_free(plan)
Delete the solver structure.
subroutine, public sll_s_poisson_3d_periodic_par_compute_e_from_phi_layoutseq3(plan, phi, ex, ey, ez)
Compute the electric fields from the potential (phi sequential along x3)
subroutine sll_s_compute_ey_from_phi(plan, phi, ey)
Compute the 3d potential from the Poisson equation with periodic boundary conditions.
subroutine sll_s_compute_ex_from_phi(plan, phi, ex)
Compute the 3d potential from the Poisson equation with periodic boundary conditions.
subroutine, public sll_s_poisson_3d_periodic_par_init(start_layout, ncx, ncy, ncz, Lx, Ly, Lz, plan)
Allocate the structure for the 3d parallel Poisson solver.
subroutine compute_electric_field_x3_3d(plan, phi_x3, efield_x3)
subroutine sll_s_poisson_3d_periodic_par_solve_e(plan, rho, ex, ey, ez)
Compute the electric fields from the potential (phi sequential along x1) Compute the 3d potential fro...
subroutine compute_electric_field_x1_3d(plan, phi_x1, efield_x1)
subroutine, public sll_s_poisson_3d_periodic_par_compute_e_from_phi(plan, phi, ex, ey, ez)
subroutine verify_argument_sizes_par(layout, rho, phi)
Check sizes of arrays in input.
subroutine compute_electric_field_x2_3d(plan, phi_x2, efield_x2)
Some common numerical utilities.
logical function, public sll_f_is_even(n)
Check if an integer is even.
logical function, public sll_f_is_power_of_two(n)
Check if an integer is equal to.
Wrapper around the communicator.
Type for FFT plan in SeLaLib.
Structure to solve Poisson equation on 3d mesh with periodic boundary conditions. Solver is parallel ...
    Report Typos and Errors