Report Typos and Errors    
Semi-Lagrangian Library
Modular library for kinetic and gyrokinetic simulations of plasmas in fusion energy devices.
sll_m_fft_sllfft.F90
Go to the documentation of this file.
1 #ifndef DOXYGEN_SHOULD_SKIP_THIS
2 !**************************************************************
3 ! Copyright INRIA
4 ! Authors :
5 ! CALVI project team
6 !
7 ! This code SeLaLib (for Semi-Lagrangian-Library)
8 ! is a parallel library for simulating the plasma turbulence
9 ! in a tokamak.
10 !
11 ! This software is governed by the CeCILL-B license
12 ! under French law and abiding by the rules of distribution
13 ! of free software. You can use, modify and redistribute
14 ! the software under the terms of the CeCILL-B license as
15 ! circulated by CEA, CNRS and INRIA at the following URL
16 ! "http://www.cecill.info".
17 !**************************************************************
18 
26 module sll_m_fft
27 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
28 #include "sll_working_precision.h"
29 #include "sll_assert.h"
30 #include "sll_memory.h"
31 #include "sll_errors.h"
32 #include "sll_fftw.h"
33 
34  use sll_m_constants, only: &
35  sll_p_pi
36 
37  use sll_m_utilities, only: &
39 
40  implicit none
41 
42  public :: &
43  sll_t_fft, &
76 
77  private
78 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
79 
81  type :: sll_t_fft
82  ! twiddle factors complex case
83  sll_comp64, allocatable, private :: t(:)
84  ! twiddles factors real case
85  sll_real64, allocatable, private :: twiddles(:)
86  ! twiddles factors real case
87  sll_real64, allocatable, private :: twiddles_n(:)
88 
89  logical :: normalized
90  sll_int32 :: direction
91  sll_int32 :: problem_rank
92  sll_int32, allocatable :: problem_shape(:)
93  sll_int32, allocatable, private :: scramble_index(:)
94  sll_int32, private :: transform_type
95  end type sll_t_fft
96 
98  integer, parameter :: sll_p_fft_forward = -1
100  integer, parameter :: sll_p_fft_backward = 1
101 
102  ! Flags for initialization of the plan: These options are only used in FFTW interface and all set to -1 here
103  integer, parameter :: sll_p_fft_measure = -1
104  integer, parameter :: sll_p_fft_patient = -1
105  integer, parameter :: sll_p_fft_estimate = -1
106  integer, parameter :: sll_p_fft_exhaustive = -1
107  integer, parameter :: sll_p_fft_wisdom_only = -1 ! 2097152 !< FFTW planning-rigor flag FFTW_WISDOM_ONLY (planner only initialized if wisdom is available) [value 2097152]
108 
109  ! Flags for the various types of transform (to make sure same type of init and execute functions are used)
110  integer, parameter :: p_fftw_c2c_1d = 0
111  integer, parameter :: p_fftw_r2r_1d = 1
112  integer, parameter :: p_fftw_r2c_1d = 2
113  integer, parameter :: p_fftw_c2r_1d = 3
114  integer, parameter :: p_fftw_c2c_2d = 4
115  integer, parameter :: p_fftw_r2r_2d = 5
116  integer, parameter :: p_fftw_r2c_2d = 6
117  integer, parameter :: p_fftw_c2r_2d = 7
118 
119 contains
120 
122  subroutine sll_s_fft_print_defaultlib()
123  print *, 'The library used is SLLFFT'
124  end subroutine
125 
127  function sll_f_fft_allocate_aligned_complex(n) result(data)!, ptr_data, data)
128  sll_int32, intent(in) :: n
129  complex(f64), pointer :: data(:)
130 
131  allocate (data(n))
132  sll_warning('sll_f_fft_allocate_aligned_complex', 'Aligned allocation not implemented for SLLFFT. Usual allocation.')
133 
135 
137  function sll_f_fft_allocate_aligned_real(n) result(data)!, ptr_data, data)
138  sll_int32, intent(in) :: n
139  real(f64), pointer :: data(:)
140 
141  allocate (data(n))
142  sll_warning('sll_f_fft_allocate_aligned_real', 'Aligned allocation not implemented for SLLFFT. Usual allocation.')
143 
145 
146  !-----------------------------------------------------------------------------
148  !-----------------------------------------------------------------------------
149  subroutine sll_s_fft_deallocate_aligned_complex(data)
150  complex(f64), pointer :: data(:)
151 
152  deallocate (data)
153  sll_warning('sll_s_fft_deallocate_aligned_complex', 'Aligned deallocation not implemented for SLLFFT. Usual deallocation.')
154 
156 
157  !-----------------------------------------------------------------------------
159  !-----------------------------------------------------------------------------
160  subroutine sll_s_fft_deallocate_aligned_real(data)
161  real(f64), pointer :: data(:)
162 
163  deallocate (data)
164  sll_warning('sll_s_fft_deallocate_aligned_real', 'Aligned deallocation not implemented for SLLFFT. Usual deallocation.')
165 
166  end subroutine sll_s_fft_deallocate_aligned_real
167 
168  !-----------------------------------------------------------------------------
171  !-----------------------------------------------------------------------------
172  subroutine sll_s_fft_get_k_list_c2c_1d(plan, k_list)
173  type(sll_t_fft), intent(in) :: plan
174  sll_int32, intent(out) :: k_list(0:)
175 
176  sll_int32 :: k ! single mode
177  sll_int32 :: n ! problem size
178  n = plan%problem_shape(1)
179 
180  if (size(k_list) /= n) then
181  sll_error("sll_s_fft_get_k_list_c2c_1d", "k_list must have n elements")
182  end if
183 
184  k_list(0:n/2) = [(k, k=0, n/2)]
185  k_list(n/2 + 1:n - 1) = [(k - n, k=n/2 + 1, n - 1)]
186 
187  end subroutine sll_s_fft_get_k_list_c2c_1d
188 
189  !-----------------------------------------------------------------------------
192  !-----------------------------------------------------------------------------
193  subroutine sll_s_fft_get_k_list_r2r_1d(plan, k_list)
194  type(sll_t_fft), intent(in) :: plan
195  sll_int32, intent(out) :: k_list(0:)
196 
197  sll_int32 :: k ! single mode
198  sll_int32 :: n ! problem size
199  n = plan%problem_shape(1)
200 
201  if (size(k_list) /= n) then
202  sll_error("sll_s_fft_get_k_list_r2r_1d", "k_list must have n elements")
203  end if
204 
205  k_list(0:n/2) = [(k, k=0, n/2)]
206  k_list(n/2 + 1:n - 1) = [(n - k, k=n/2 + 1, n - 1)]
207 
208  end subroutine sll_s_fft_get_k_list_r2r_1d
209 
210  !-----------------------------------------------------------------------------
213  !-----------------------------------------------------------------------------
214  subroutine sll_s_fft_get_k_list_r2c_1d(plan, k_list)
215  type(sll_t_fft), intent(in) :: plan
216  sll_int32, intent(out) :: k_list(0:)
217 
218  sll_int32 :: k ! single mode
219  sll_int32 :: n_2 ! problem size / 2 (rounded down)
220  n_2 = plan%problem_shape(1)/2
221 
222  if (size(k_list) /= n_2 + 1) then
223  sll_error("sll_s_fft_get_k_list_r2c_1d", "k_list must have n/2+1 elements")
224  end if
225 
226  k_list(:) = [(k, k=0, n_2)]
227 
228  end subroutine sll_s_fft_get_k_list_r2c_1d
229 
230  !-----------------------------------------------------------------------------
232  !-----------------------------------------------------------------------------
233  function sll_f_fft_get_mode_r2c_1d(plan, data, k) result(ck)
234  type(sll_t_fft), intent(in) :: plan
235  sll_real64, intent(in) :: data(0:)
236  sll_int32, intent(in) :: k
237  sll_comp64 :: ck
238 
239  sll_int32 :: n ! problem size
240  n = plan%problem_shape(1)
241 
242  if (k == 0) then; ck = cmplx(data(k), 0.0_f64, kind=f64)
243  else if (k <= (n + 1)/2 - 1) then; ck = cmplx(data(k), data(n - k), kind=f64)
244  else if (k == n/2) then; ck = cmplx(data(k), 0.0_f64, kind=f64)
245  else if (k <= n - 1) then; ck = cmplx(data(n - k), -data(k), kind=f64)
246  else
247  sll_error("sll_f_fft_get_mode_r2c_1d", "k must be between 0 and n-1")
248  end if
249 
250  end function sll_f_fft_get_mode_r2c_1d
251 
252  !-----------------------------------------------------------------------------
254  !-----------------------------------------------------------------------------
255  subroutine sll_s_fft_set_mode_c2r_1d(plan, data, ck, k)
256  type(sll_t_fft), intent(in) :: plan
257  sll_real64, intent(inout) :: data(0:)
258  sll_comp64, intent(in) :: ck
259  sll_int32, intent(in) :: k
260 
261  sll_int32 :: n ! problem size
262  n = plan%problem_shape(1)
263 
264  if (k == 0) then; data(0) = real(ck) ! imaginary part ignored
265  else if (k <= (n + 1)/2 - 1) then; data(k) = real(ck); data(n - k) = aimag(ck)
266  else if (k == n/2) then; data(k) = real(ck) ! imaginary part ignored
267  else if (k <= n - 1) then; data(n - k) = real(ck); data(k) = -aimag(ck)
268  else
269  sll_error("sll_s_fft_set_mode_c2r_1d", "k must be between 0 and n-1")
270  end if
271 
272  end subroutine sll_s_fft_set_mode_c2r_1d
273 
274  !-----------------------------------------------------------------------------
275 
276 ! THIS IS THE ORDERING THAT SLLFFT R2R PRODUCES WITHOUT THE REORDERING TO FFTW
277 !!$
278 !!$ function sll_f_fft_get_mode_r2c_1d(plan,data,k) result(mode)
279 !!$ type(sll_t_fft), pointer :: plan
280 !!$ sll_real64, dimension(0:) :: data
281 !!$ sll_int32 :: k, n_2, n
282 !!$ sll_comp64 :: mode
283 !!$
284 !!$ n = plan%problem_shape(1)
285 !!$ n_2 = n/2 !ishft(n,-1)
286 !!$
287 !!$ if( k .eq. 0 ) then
288 !!$ mode = cmplx(data(0),0.0_f64,kind=f64)
289 !!$ else if( k .eq. n_2 ) then
290 !!$ mode = cmplx(data(1),0.0_f64,kind=f64)
291 !!$ else if( k .gt. n_2 ) then
292 !!$ mode = cmplx( data(2*(n-k)) , -data(2*(n-k)+1),kind=f64 )
293 !!$ else
294 !!$ mode = cmplx( data(2*k) , data(2*k+1) ,kind=f64)
295 !!$ endif
296 !!$ end function
297 !!$
298 !!$
299 !!$ subroutine sll_s_fft_set_mode_c2r_1d(plan,data,new_value,k)
300 !!$ type(sll_t_fft), pointer :: plan
301 !!$ sll_real64, dimension(0:) :: data
302 !!$ sll_int32 :: k, n_2, n
303 !!$ sll_comp64 :: new_value
304 !!$
305 !!$ n = plan%problem_shape(1)
306 !!$ n_2 = n/2 !ishft(n,-1)
307 !!$
308 !!$ if( k .eq. 0 ) then
309 !!$ data(0) = real(new_value,kind=f64)
310 !!$ else if( k .eq. n_2 ) then
311 !!$ data(1) = real(new_value,kind=f64)
312 !!$ else if( k .gt. n_2 ) then
313 !!$ data(2*(n-k)) = real(new_value,kind=f64)
314 !!$ data(2*(n-k)+1) = -aimag(new_value)
315 !!$ else
316 !!$ data(2*k) = real(new_value,kind=f64)
317 !!$ data(2*k+1) = aimag(new_value)
318 !!$ endif
319 !!$ end subroutine
320 
321 ! COMPLEX
322 ! ------
323 ! - 1D -
324 ! ------
326  subroutine sll_s_fft_init_c2c_1d(plan, nx, array_in, array_out, direction, normalized, aligned, optimization)
327  type(sll_t_fft), intent(out) :: plan
328  sll_int32, intent(in) :: nx
329  sll_comp64, intent(inout) :: array_in(:)
330  sll_comp64, intent(inout) :: array_out(:)
331  sll_int32, intent(in) :: direction
332  logical, optional, intent(in) :: normalized
333  logical, optional, intent(in) :: aligned
334  sll_int32, optional, intent(in) :: optimization
335 
336  sll_int32 :: ierr, i
337 
338  sll_assert(nx > 2) ! For nx=2, there is a bug in SLLFFT
339  sll_assert(size(array_in) .ge. nx)
340  sll_assert(size(array_out) .ge. nx)
341  plan%transform_type = p_fftw_c2c_1d
342  plan%direction = direction
343  if (present(normalized)) then
344  plan%normalized = normalized
345  else
346  plan%normalized = .false.
347  end if
348 
349  plan%problem_rank = 1
350  sll_allocate(plan%problem_shape(1), ierr)
351  plan%problem_shape = (/nx/)
352 
353  sll_allocate(plan%scramble_index(0:nx - 1), ierr)
354  ! For the moment the mode are stored in the natural order
355  ! 0,1,2,...,n-1
356  do i = 0, nx - 1
357  plan%scramble_index(i) = i
358  end do
359  sll_allocate(plan%t(1:nx/2), ierr)
360  call compute_twiddles(nx, plan%t)
361  if (direction == sll_p_fft_forward) then
362  plan%t = conjg(plan%t)
363  end if
364  call bit_reverse_complex(nx/2, plan%t)
365  end subroutine sll_s_fft_init_c2c_1d
366 
368  subroutine sll_s_fft_exec_c2c_1d(plan, array_in, array_out)
369  type(sll_t_fft), intent(in) :: plan
370  sll_comp64, intent(inout) :: array_in(:)
371  sll_comp64, intent(inout) :: array_out(:)
372 
373  sll_real64 :: factor
374 
375  sll_assert(plan%transform_type == p_fftw_c2c_1d)
376 
377  if (loc(array_in(1)) .ne. loc(array_out(1))) then ! out-place transform
378  array_out = array_in
379  end if
380 
381  call fft_dit_nr(array_out, plan%problem_shape(1), plan%t, plan%direction)
382  call bit_reverse_complex(plan%problem_shape(1), array_out)
383 
384  if (plan%normalized .EQV. .true.) then
385  factor = 1.0_f64/real(plan%problem_shape(1), kind=f64)
386  array_out = factor*array_out
387  end if
388 
389  end subroutine
390 
391  ! --------------------
392  ! - 2D -
393  ! --------------------
395  subroutine sll_s_fft_init_c2c_2d(plan, nx, ny, array_in, array_out, direction, normalized, aligned, optimization)
396  type(sll_t_fft), intent(out) :: plan
397  sll_int32, intent(in) :: nx
398  sll_int32, intent(in) :: ny
399  sll_comp64, intent(inout) :: array_in(0:, 0:)
400  sll_comp64, intent(inout) :: array_out(0:, 0:)
401  sll_int32, intent(in) :: direction
402  logical, optional, intent(in) :: normalized
403  logical, optional, intent(in) :: aligned
404  sll_int32, optional, intent(in) :: optimization
405  sll_int32 :: ierr
406 
407  ! This does not look good.
408  ! 1. Error checking like this should be permanent, not with assertions.
409  sll_assert(size(array_in, dim=1) .ge. nx)
410  sll_assert(size(array_in, dim=2) .ge. ny)
411  sll_assert(size(array_out, dim=1) .ge. nx)
412  sll_assert(size(array_out, dim=2) .ge. ny)
413  sll_assert(nx > 2) ! For nx=2, there is a bug in SLLFFT
414  sll_assert(ny > 2)
415 
416  plan%transform_type = p_fftw_c2c_2d
417  plan%direction = direction
418  if (present(normalized)) then
419  plan%normalized = normalized
420  else
421  plan%normalized = .false.
422  end if
423 
424  plan%problem_rank = 2
425  sll_allocate(plan%problem_shape(2), ierr)
426  plan%problem_shape = (/nx, ny/)
427 
428  sll_allocate(plan%t(1:nx/2 + ny/2), ierr)
429 
430  call compute_twiddles(nx, plan%t(1:nx/2))
431  if (direction == sll_p_fft_forward) then
432  plan%t(1:nx/2) = conjg(plan%t(1:nx/2))
433  end if
434  call bit_reverse_complex(nx/2, plan%t(1:nx/2))
435 
436  call compute_twiddles(ny, plan%t(nx/2 + 1:nx/2 + ny/2))
437  if (direction == sll_p_fft_forward) then
438  plan%t(nx/2 + 1:nx/2 + ny/2) = conjg(plan%t(nx/2 + 1:nx/2 + ny/2))
439  end if
440  call bit_reverse_complex(ny/2, plan%t(nx/2 + 1:nx/2 + ny/2))
441 
442  end subroutine sll_s_fft_init_c2c_2d
443 
445  subroutine sll_s_fft_exec_c2c_2d(plan, array_in, array_out)
446  type(sll_t_fft), intent(in) :: plan
447  sll_comp64, intent(inout) :: array_in(0:, 0:)
448  sll_comp64, intent(inout) :: array_out(0:, 0:)
449 
450  sll_int32 :: i, nx, ny
451  sll_int32, dimension(2) :: fft_shape
452  sll_real64 :: factor
453 
454  sll_assert(plan%transform_type == p_fftw_c2c_2d)
455 
456  if (loc(array_in(1, 1)) .ne. loc(array_out(1, 1))) then ! out-place transform
457  array_out = array_in ! copy source
458  end if
459  fft_shape(1:2) = plan%problem_shape(1:2)
460  nx = fft_shape(1)
461  ny = fft_shape(2)
462 
463  do i = 0, ny - 1
464  call fft_dit_nr(array_out(0:nx - 1, i), nx, plan%t(1:nx/2), plan%direction)
465  call bit_reverse_complex(nx, array_out(0:nx - 1, i))
466  end do
467 
468  do i = 0, nx - 1
469  call fft_dit_nr( &
470  array_out(i, 0:ny - 1), &
471  ny, &
472  plan%t(nx/2 + 1:nx/2 + ny/2), &
473  plan%direction)
474  call bit_reverse_complex(ny, array_out(i, 0:ny - 1))
475  end do
476 
477  if (plan%normalized .EQV. .true.) then
478  factor = 1.0_f64/real(nx*ny, kind=f64)
479  array_out = factor*array_out
480  end if
481 
482  end subroutine
483 
484 ! REAL
486  subroutine sll_s_fft_init_r2r_1d(plan, nx, array_in, array_out, direction, normalized, aligned, optimization)
487  type(sll_t_fft), intent(out) :: plan
488  sll_int32, intent(in) :: nx
489  sll_real64, intent(inout) :: array_in(:)
490  sll_real64, intent(inout) :: array_out(:)
491  sll_int32, intent(in) :: direction
492  logical, optional, intent(in) :: normalized
493  logical, optional, intent(in) :: aligned
494  sll_int32, optional, intent(in) :: optimization
495 
496  sll_int32 :: ierr, i
497 
498  sll_assert(size(array_in) .eq. nx)
499  sll_assert(size(array_out) .eq. nx)
500  plan%transform_type = p_fftw_r2r_1d
501  plan%direction = direction
502  if (present(normalized)) then
503  plan%normalized = normalized
504  else
505  plan%normalized = .false.
506  end if
507 
508  plan%problem_rank = 1
509  sll_allocate(plan%problem_shape(1), ierr)
510  plan%problem_shape = (/nx/)
511 
512  sll_allocate(plan%scramble_index(0:nx/2), ierr)
513  ! The mode are stored in the order 0,n/2,1,2,3,...,n/2-1
514  ! with the representation r_0,r_n/2,r_1,i_1,...,
515  ! The modes 0 and n/2 are purely real.
516  plan%scramble_index(0) = 0
517  plan%scramble_index(1) = nx/2
518  do i = 1, nx/2 - 1
519  plan%scramble_index(i + 1) = i
520  end do
521 
522  sll_allocate(plan%twiddles(0:nx/2 - 1), ierr)
523  sll_allocate(plan%twiddles_n(0:nx - 1), ierr)
524  call compute_twiddles_real_array(nx, plan%twiddles_n)
525  call compute_twiddles_real_array(nx/2, plan%twiddles(0:nx/2 - 1))
526  if (direction .eq. sll_p_fft_forward) then
527  call conjg_in_pairs(nx/2, plan%twiddles)
528  end if
529  call bit_reverse_in_pairs(nx/4, plan%twiddles(0:nx/2 - 1))
530  end subroutine sll_s_fft_init_r2r_1d
531 
533  subroutine sll_s_fft_exec_r2r_1d(plan, array_in, array_out)
534  type(sll_t_fft), intent(in) :: plan
535  sll_real64, intent(inout) :: array_in(:)
536  sll_real64, intent(inout) :: array_out(:)
537 
538  sll_int32 :: nx, k
539  sll_real64 :: factor
540  sll_real64 :: tmp(plan%problem_shape(1))
541 
542  sll_assert(plan%transform_type == p_fftw_r2r_1d)
543 
544  nx = plan%problem_shape(1)
545 
546  if (loc(array_in(1)) .ne. loc(array_out(1))) then ! out-place transform
547  array_out = array_in
548  end if
549 
550  ! Change from FFTW ordering back to SLLFFT ordering
551  if (plan%direction .EQ. sll_p_fft_backward) then
552  tmp = array_out
553  do k = 1, nx/2 - 1
554  array_out(2*k + 1) = tmp(k + 1)
555  array_out(2*k + 2) = tmp(nx - k + 1)
556  end do
557  array_out(1) = tmp(1)
558  array_out(2) = tmp(nx/2 + 1)
559  end if
560 
561  call real_data_fft_dit(array_out, nx, plan%twiddles, plan%twiddles_n, plan%direction)
562 
563  ! Change to FFTW ordering
564  if (plan%direction .EQ. sll_p_fft_forward) then
565  tmp = array_out
566  do k = 1, nx/2 - 1
567  array_out(k + 1) = tmp(2*k + 1)
568  array_out(nx - k + 1) = tmp(2*k + 1 + 1)
569  end do
570  array_out(nx/2 + 1) = tmp(2)
571  end if
572 
573  if (plan%normalized .EQV. .true.) then
574  factor = 1.0_f64/real(plan%problem_shape(1), kind=f64)
575  array_out = factor*array_out
576  end if
577 
578  end subroutine
579 ! END REAL
580 
581 ! REAL TO COMPLEX
583  subroutine sll_s_fft_init_r2c_1d(plan, nx, array_in, array_out, normalized, aligned, optimization)
584  type(sll_t_fft), intent(out) :: plan
585  sll_int32, intent(in) :: nx
586  sll_real64, intent(inout) :: array_in(:)
587  sll_comp64, intent(out) :: array_out(:)
588  logical, optional, intent(in) :: normalized
589  logical, optional, intent(in) :: aligned
590  sll_int32, optional, intent(in) :: optimization
591 
592  sll_int32 :: ierr
593 
594  sll_assert(size(array_in) .eq. nx)
595  sll_assert(size(array_out) .eq. nx/2 + 1)
596  plan%transform_type = p_fftw_r2c_1d
597  plan%direction = sll_p_fft_forward
598  if (present(normalized)) then
599  plan%normalized = normalized
600  else
601  plan%normalized = .false.
602  end if
603  plan%problem_rank = 1
604  sll_allocate(plan%problem_shape(1), ierr)
605  plan%problem_shape = (/nx/)
606 
607  sll_allocate(plan%twiddles(0:nx/2 - 1), ierr)
608  sll_allocate(plan%twiddles_n(0:nx - 1), ierr)
609  call compute_twiddles_real_array(nx, plan%twiddles_n)
610  call compute_twiddles_real_array(nx/2, plan%twiddles(0:nx/2 - 1))
611  call conjg_in_pairs(nx/2, plan%twiddles)
612  call bit_reverse_in_pairs(nx/4, plan%twiddles(0:nx/2 - 1))
613  end subroutine sll_s_fft_init_r2c_1d
614 
616  subroutine sll_s_fft_exec_r2c_1d(plan, array_in, array_out)
617  type(sll_t_fft), intent(in) :: plan
618  sll_real64, intent(inout) :: array_in(0:)
619  sll_comp64, intent(out) :: array_out(0:)
620 
621  sll_real64 :: factor
622  sll_int32 :: i, nx
623 
624  sll_assert(plan%transform_type == p_fftw_r2c_1d)
625 
626  nx = plan%problem_shape(1)
627 
628  call real_data_fft_dit(array_in, nx, plan%twiddles, plan%twiddles_n, plan%direction)
629  !mode k=0
630  array_out(0) = cmplx(array_in(0), 0.0_f64, kind=f64)
631  !mode k=n/2
632  array_out(nx/2) = cmplx(array_in(1), 0.0_f64, kind=f64)
633  !mode k=1 to k= n-2
634  do i = 1, nx/2 - 1
635  array_out(i) = cmplx(array_in(2*i), array_in(2*i + 1), kind=f64)
636  !array_out(plan%N-i) = cmplx(array_in(2*i),-array_in(2*i+1),kind=f64)
637  end do
638 
639  if (plan%normalized .EQV. .true.) then
640  factor = 1.0_f64/real(plan%problem_shape(1), kind=f64)
641  array_out = factor*array_out
642  end if
643  end subroutine
644 ! END REAL TO COMPLEX
645 
646 ! COMPLEX TO REAL
648  subroutine sll_s_fft_init_c2r_1d(plan, nx, array_in, array_out, normalized, aligned, optimization)
649  type(sll_t_fft), intent(out) :: plan
650  sll_int32, intent(in) :: nx
651  sll_comp64, intent(in) :: array_in(:)
652  sll_real64, intent(out) :: array_out(:)
653  logical, optional, intent(in) :: normalized
654  logical, optional, intent(in) :: aligned
655  sll_int32, optional, intent(in) :: optimization
656 
657  sll_int32 :: ierr
658 
659  sll_assert(size(array_in) .eq. nx/2 + 1)
660  sll_assert(size(array_out) .eq. nx)
661  plan%transform_type = p_fftw_c2r_1d
662  plan%direction = sll_p_fft_backward
663  if (present(normalized)) then
664  plan%normalized = normalized
665  else
666  plan%normalized = .false.
667  end if
668  plan%problem_rank = 1
669  sll_allocate(plan%problem_shape(1), ierr)
670  plan%problem_shape = (/nx/)
671 
672  sll_allocate(plan%twiddles(0:nx/2 - 1), ierr)
673  sll_allocate(plan%twiddles_n(0:nx - 1), ierr)
674  call compute_twiddles_real_array(nx, plan%twiddles_n)
675  call compute_twiddles_real_array(nx/2, plan%twiddles(0:nx/2 - 1))
676  call bit_reverse_in_pairs(nx/4, plan%twiddles(0:nx/2 - 1))
677  end subroutine sll_s_fft_init_c2r_1d
678 
680  subroutine sll_s_fft_exec_c2r_1d(plan, array_in, array_out)
681  type(sll_t_fft), intent(in) :: plan
682  sll_comp64, intent(inout) :: array_in(0:)
683  sll_real64, intent(inout) :: array_out(0:)
684 
685  sll_int32 :: nx, i
686  sll_real64 :: factor
687 
688  sll_assert(plan%transform_type == p_fftw_c2r_1d)
689 
690  nx = plan%problem_shape(1)
691 
692  !mode k=0
693  array_out(0) = real(array_in(0), kind=f64)
694  !mode k=n/2
695  array_out(1) = real(array_in(nx/2), kind=f64)
696  !mode k=1 to k= n-2
697  do i = 1, nx/2 - 1
698  array_out(2*i) = real(array_in(i), kind=f64)
699  array_out(2*i + 1) = aimag(array_in(i))
700  end do
701  call real_data_fft_dit(array_out, nx, plan%twiddles, plan%twiddles_n, plan%direction)
702 
703  if (plan%normalized .EQV. .true.) then
704  factor = 1.0_f64/real(plan%problem_shape(1), kind=f64)
705  array_out = factor*array_out
706  end if
707 
708  end subroutine
709 ! END COMPLEX TO REAL
710 
711 ! REAL TO COMPLEX 2D
713  subroutine sll_s_fft_init_r2c_2d(plan, nx, ny, array_in, array_out, normalized, aligned, optimization)
714  type(sll_t_fft), intent(out) :: plan
715  sll_int32, intent(in) :: nx
716  sll_int32, intent(in) :: ny
717  sll_real64, intent(inout) :: array_in(:, :)
718  sll_comp64, intent(out) :: array_out(:, :)
719  logical, optional, intent(in) :: normalized
720  logical, optional, intent(in) :: aligned
721  sll_int32, optional, intent(in) :: optimization
722 
723  sll_int32 :: ierr
724 
725  sll_assert(size(array_in, dim=1) .eq. nx)
726  sll_assert(size(array_in, dim=2) .eq. ny)
727  sll_assert(size(array_out, dim=1) .eq. nx/2 + 1)
728  sll_assert(size(array_out, dim=2) .eq. ny)
729  plan%transform_type = p_fftw_r2c_2d
730  plan%direction = sll_p_fft_forward
731  if (present(normalized)) then
732  plan%normalized = normalized
733  else
734  plan%normalized = .false.
735  end if
736  plan%problem_rank = 2
737  sll_allocate(plan%problem_shape(2), ierr)
738  plan%problem_shape = (/nx, ny/)
739 
740  sll_allocate(plan%t(1:ny/2), ierr)
741  call compute_twiddles(ny, plan%t)
742  plan%t = conjg(plan%t)
743  call bit_reverse_complex(ny/2, plan%t)
744 
745  sll_allocate(plan%twiddles(0:nx/2 - 1), ierr)
746  sll_allocate(plan%twiddles_n(0:nx - 1), ierr)
747  call compute_twiddles_real_array(nx, plan%twiddles_n)
748  call compute_twiddles_real_array(nx/2, plan%twiddles(0:nx/2 - 1))
749  call conjg_in_pairs(nx/2, plan%twiddles)
750  call bit_reverse_in_pairs(nx/4, plan%twiddles(0:nx/2 - 1))
751  end subroutine sll_s_fft_init_r2c_2d
752 
754  subroutine sll_s_fft_exec_r2c_2d(plan, array_in, array_out)
755  type(sll_t_fft), intent(in) :: plan
756  sll_real64, intent(inout) :: array_in(0:, 0:)
757  sll_comp64, intent(out) :: array_out(0:, 0:)
758 
759  sll_int32 :: nx, i, ny, k
760  sll_real64 :: factor
761 
762  sll_assert(plan%transform_type == p_fftw_r2c_2d)
763 
764  nx = plan%problem_shape(1)
765  ny = plan%problem_shape(2)
766 
767  do k = 0, ny - 1
768  call real_data_fft_dit(array_in(0:nx - 1, k), nx, plan%twiddles, plan%twiddles_n, plan%direction)
769  array_out(0, k) = cmplx(array_in(0, k), 0.0_f64, kind=f64)
770  array_out(nx/2, k) = cmplx(array_in(1, k), 0.0_f64, kind=f64)
771  do i = 1, nx/2 - 1
772  array_out(i, k) = cmplx(array_in(2*i, k), array_in(2*i + 1, k), kind=f64)
773  end do
774  end do
775  do k = 0, nx/2
776  call fft_dit_nr(array_out(k, 0:ny - 1), ny, plan%t, plan%direction)
777  call bit_reverse_complex(ny, array_out(k, 0:ny - 1))
778  end do
779 
780  if (plan%normalized .EQV. .true.) then
781  factor = 1.0_f64/real(nx*ny, kind=f64)
782  array_out = factor*array_out
783  end if
784  end subroutine
785 ! END REAL TO COMPLEX 2D
786 
787 ! COMPLEX TO REAL 2D
789  subroutine sll_s_fft_init_c2r_2d(plan, nx, ny, array_in, array_out, normalized, aligned, optimization)
790  type(sll_t_fft), intent(out) :: plan
791  sll_int32, intent(in) :: nx
792  sll_int32, intent(in) :: ny
793  sll_comp64, intent(in) :: array_in(:, :)
794  sll_real64, intent(out) :: array_out(:, :)
795  logical, optional, intent(in) :: normalized
796  logical, optional, intent(in) :: aligned
797  sll_int32, optional, intent(in) :: optimization
798 
799  sll_int32 :: ierr
800 
801  sll_assert(size(array_in, dim=1) .eq. nx/2 + 1)
802  sll_assert(size(array_in, dim=2) .eq. ny)
803  sll_assert(size(array_out, dim=1) .eq. nx)
804  sll_assert(size(array_out, dim=2) .eq. ny)
805 
806  plan%transform_type = p_fftw_c2r_2d
807  plan%direction = sll_p_fft_backward
808  if (present(normalized)) then
809  plan%normalized = normalized
810  else
811  plan%normalized = .false.
812  end if
813  plan%problem_rank = 2
814  sll_allocate(plan%problem_shape(2), ierr)
815  plan%problem_shape = (/nx, ny/)
816 
817  sll_allocate(plan%t(1:ny/2), ierr)
818  call compute_twiddles(ny, plan%t)
819  call bit_reverse_complex(ny/2, plan%t)
820 
821  sll_allocate(plan%twiddles(0:nx/2 - 1), ierr)
822  sll_allocate(plan%twiddles_n(0:nx - 1), ierr)
823  call compute_twiddles_real_array(nx, plan%twiddles_n)
824  call compute_twiddles_real_array(nx/2, plan%twiddles(0:nx/2 - 1))
825  call bit_reverse_in_pairs(nx/4, plan%twiddles(0:nx/2 - 1))
826  end subroutine sll_s_fft_init_c2r_2d
827 
829  subroutine sll_s_fft_exec_c2r_2d(plan, array_in, array_out)
830  type(sll_t_fft), intent(in) :: plan
831  sll_comp64, intent(inout) :: array_in(0:, 0:)
832  sll_real64, intent(inout) :: array_out(0:, 0:)
833 
834  sll_int32 :: nx, i, ny, k, j
835  sll_real64 :: factor
836 
837  sll_assert(plan%transform_type == p_fftw_c2r_2d)
838 
839  nx = plan%problem_shape(1)
840  ny = plan%problem_shape(2)
841 
842  do j = 0, nx/2
843  call fft_dit_nr(array_in(j, 0:ny - 1), ny, plan%t, plan%direction)
844  call bit_reverse_complex(ny, array_in(j, 0:ny - 1))
845  end do
846  do i = 0, ny - 1
847  array_out(0, i) = real(array_in(0, i), kind=f64)
848  array_out(1, i) = real(array_in(nx/2, i), kind=f64)
849  do k = 1, nx/2 - 1
850  array_out(2*k, i) = real(array_in(k, i), kind=f64)
851  array_out(2*k + 1, i) = aimag(array_in(k, i))
852  end do
853  call real_data_fft_dit(array_out(0:nx - 1, i), nx, plan%twiddles, plan%twiddles_n, plan%direction)
854  end do
855 
856  if (plan%normalized .EQV. .true.) then
857  factor = 1.0_f64/real(nx*ny, kind=f64)
858  array_out = factor*array_out
859  end if
860  end subroutine
861 ! END COMPLEX TO REAL 2D
862 
863 ! SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB
864 ! SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB
865 ! SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB SELALIB
866 
868  subroutine sll_s_fft_free(plan)
869  type(sll_t_fft), intent(inout) :: plan
870 
871  if (allocated(plan%t)) then
872  deallocate (plan%t)
873  end if
874  if (allocated(plan%twiddles)) then
875  deallocate (plan%twiddles)
876  end if
877  if (allocated(plan%twiddles_n)) then
878  deallocate (plan%twiddles_n)
879  end if
880  if (allocated(plan%problem_shape)) then
881  deallocate (plan%problem_shape)
882  end if
883  end subroutine
884 
885 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
886 !! The following is only concerning the sll implementation of the fft !!
887 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
888 
889  ! We choose the convention in which the direction of the FFT is determined
890  ! by the conjugation of the twiddle factors. If
891  !
892  ! omega_N = -j2*pi/N
893  !
894  ! then we call this the FORWARD transform.
895 
896  ! Compute the twiddle factors by Singleton's method. N is a factor of 2.
897  ! N represents the full size of the transform for which the twiddles are
898  ! needed, however, because of redundancies in the values of the twiddle
899  ! factors, only half of them need to be stored.
900  subroutine compute_twiddles(n, t)
901  intrinsic :: exp, real
902  sll_int32 :: n ! size of data for FFT
903  sll_comp64, dimension(1:n/2), intent(out) :: t
904  sll_int32 :: k
905  sll_real64 :: theta
906  if (n .eq. 0 .or. n .eq. 1) then
907  print *, "ERROR: Zero array size passed to compute_twiddle_factors()."
908  return
909  else if (size(t) .lt. n/2) then
910  print *, "ERROR: compute_twiddles(), array is of insufficient size."
911  return
912  else
913  theta = 2.0_f64*sll_p_pi/real(n, kind=f64) ! basic angular interval
914  ! By whatever means we use to compute the twiddles, some sanity
915  ! checks are in order:
916  ! t(1) = (1,0)
917  ! t(n/8+1) = (sqrt(2)/2, sqrt(2)/2)
918  ! t(n/4+1) = (0,1)
919  ! t(n/2+1) = (0,-1) ... but this one is not stored
920  do k = 0, n/2 - 1
921  t(k + 1) = exp((0.0_f64, 1.0_f64)*theta*cmplx(k, 0.0, kind=f64))
922  end do
923  ! might as well fix this by hand since the result isn't exact otherwise:
924  t(n/4 + 1) = (0.0_f64, 1.0_f64)
925  end if
926  end subroutine compute_twiddles
927 
928  ! Need the following macros to access a real array as if it were made of
929  ! complex numbers. Please notice an important detail:
930  ! The validity of these macros depends on the type of indexing of the array.
931  ! For example, for a zero-indexed array, the following pair should be used:
932 #define CREAL0(array, i) array(2*(i))
933 #define CIMAG0(array, i) array(2*(i)+1)
934  ! For a one-indexed array, the following pair is appropriate
935 #define CREAL1(array, i) array(2*(i)-1)
936 #define CIMAG1(array, i) array(2*(i))
937  ! In this module we use both types of indexings, so one should be aware
938  ! of the macro pair to use.
939 
940  subroutine compute_twiddles_real_array(n, t)
941  intrinsic :: cos, sin, real
942  sll_int32 :: n ! size of data for FFT
943  sll_real64, dimension(0:n - 1), intent(out) :: t
944  sll_int32 :: k
945  sll_real64 :: theta
946  sll_assert(sll_f_is_power_of_two(int(n, i64)))
947  theta = 2.0_f64*sll_p_pi/real(n, kind=f64) ! basic angular interval
948  ! By whatever means we use to compute the twiddles, some sanity
949  ! checks are in order:
950  ! t(0) = 1, t(1) = 0
951  ! t(n/8) = (sqrt(2)/2, sqrt(2)/2)
952  ! t(n/4) = (0,1)
953  ! t(n/2) = (0,-1) ... but this one is not stored
954  do k = 0, n/2 - 1
955  creal0(t, k) = cos(real(k, kind=f64)*theta)
956  cimag0(t, k) = sin(real(k, kind=f64)*theta)
957  end do
958  ! might as well fix this by hand since the result isn't exact otherwise:
959  creal0(t, 0) = 1.0_f64
960  cimag0(t, 0) = 0.0_f64
961 
962  if (n >= 4) then
963  creal0(t, n/4) = 0.0_f64
964  cimag0(t, n/4) = 1.0_f64
965  end if
966 
967  if (n >= 8) then
968  creal0(t, n/8) = sqrt(2.0_f64)/2.0_f64
969  cimag0(t, n/8) = sqrt(2.0_f64)/2.0_f64
970  end if
971  end subroutine compute_twiddles_real_array
972 
973  !***************************************************************************
974  ! The following function uses Meyer's algorithm (also known as the
975  ! Gold-Rader algorithm according to "Rodriguez. J. IEEE Transactions on
976  ! Acoustics, Speech and Signal Porcessing. Vol. 37. No. 8. August 1989.
977  ! p. 1298.") to yield the in-place, bit-reversed ordering of the array 'a'.
978  ! Use with n = power of 2.
979  !
980  !***************************************************************************
981 #define MAKE_BIT_REVERSE_FUNCTION( function_name, data_type ) \
982  subroutine function_name(n, a); \
983  intrinsic ishft, iand; \
984  data_type, dimension(:), intent(inout) :: a; \
985  integer, intent(in) :: n; \
986  integer :: i; \
987  integer :: j; \
988  integer :: k; \
989  data_type :: tmp; \
990  sll_assert(sll_f_is_power_of_two(int(n, i64))); \
991  j = 0; \
992  k = n; \
993  do i = 0, n - 2; \
994  if (i < j) then; \
995  tmp = a(j + 1); \
996  a(j + 1) = a(i + 1); \
997  a(i + 1) = tmp; \
998  end if; \
999  k = ishft(n, -1); \
1000  do while (k <= j); \
1001  j = j - k; \
1002  k = ishft(k, -1); \
1003  end do; \
1004  j = j + k; \
1005  end do; \
1006  end subroutine function_name
1007 
1008  make_bit_reverse_function(bit_reverse_complex, sll_comp64)
1009 ! /* PN DEFINED BUT NOT USED */
1010 ! /* MAKE_BIT_REVERSE_FUNCTION( bit_reverse_integer32, sll_int32 ) */
1011 ! /* MAKE_BIT_REVERSE_FUNCTION( bit_reverse_integer64, sll_int64 ) */
1012 ! /* MAKE_BIT_REVERSE_FUNCTION( bit_reverse_real, sll_real64 ) */
1013 
1014  ! ugly special case to bit-reverse a complex array that is represented
1015  ! by an array of reals. This is truly awful...
1016  subroutine bit_reverse_in_pairs(num_pairs, a)
1017  intrinsic ishft, iand
1018  integer, intent(in) :: num_pairs
1019  sll_real64, dimension(0:2*num_pairs - 1), intent(inout) :: a
1020  integer :: i
1021  integer :: j
1022  integer :: k
1023  sll_real64, dimension(1:2) :: tmp
1024  sll_assert(sll_f_is_power_of_two(int(num_pairs, i64)))
1025  j = 0
1026  k = num_pairs
1027 #define ARRAY(i) a(2*(i):2*(i)+1)
1028  do i = 0, num_pairs - 2
1029  if (i < j) then
1030 ! write (*,'(a,i4,a,i4)') 'will swap ',i,' and ',j
1031  tmp = array(j)
1032  array(j) = array(i)
1033  array(i) = tmp
1034  end if
1035  k = ishft(num_pairs, -1)
1036  do while (k <= j)
1037  j = j - k
1038  k = ishft(k, -1)
1039  end do
1040  j = j + k
1041  end do
1042  end subroutine bit_reverse_in_pairs
1043 #undef ARRAY
1044 
1045  !Conjugate complex array that is represented by an array of reals.
1046  subroutine conjg_in_pairs(n, array)
1047  sll_real64, dimension(:) :: array
1048  sll_int32 :: n, i
1049 
1050  sll_assert(size(array) .eq. n)
1051 
1052  do i = 2, n, 2
1053  array(i) = -array(i)
1054  end do
1055  end subroutine
1056 
1057  ! *************************************************************************
1058  ! Decimation in time FFT, natural order input, bit-reversed output (=NR).
1059  ! Size of the data must be a power of 2. Twiddle array must be in
1060  ! bit-reversed order. This implementation is 'cache-oblivious'. This is
1061  ! only a placeholder for an FFT really, it is not very efficient since it:
1062  ! - is just a simple radix-2
1063  ! - is not parallelized
1064  ! - no hardware acceleration
1065  ! But it is good to test our intarfaces.
1066  !
1067  ! Implementation notes:
1068  !
1069  ! The following function implements the Cooley-Tukey algorithm:
1070  !
1071  ! Y_r^(n-1) ---------------> Y_r^(n)=Y_r^(n-1) + omega_N^r*Z_r^(n-1) = X_r
1072  ! * *
1073  ! * *
1074  ! *
1075  ! * *
1076  ! * *
1077  ! Z_r^(n-1) ---------------> Y_(r+N/2)^(n-1) - omega_N^r*Z_r^(n-1)=X_(r+N/2)
1078  !
1079  ! In terms of the twiddles used at each level, the size two problems use only
1080  ! the omega_2^1 twiddle, the size-4 problems use omega_4^1 and omega_4^2,
1081  ! the size-8 problems use omega_8^1, omega_8^2, omega_8^3 and omega_8^4. This
1082  ! is the expected progression of the twiddle indices as we move deeper into
1083  ! the recursions.
1084  !
1085  ! *************************************************************************
1086 
1087  subroutine fft_dit_nr(data, n, twiddles, sign)
1088  sll_comp64, dimension(:), intent(inout) :: data
1089  sll_int32, intent(in) :: sign
1090  sll_comp64, dimension(:), intent(in) :: twiddles
1091  sll_int32, intent(in) :: n
1092  sll_assert(sll_f_is_power_of_two(int(n, i64)))
1093  sll_assert(size(data) .ge. n)
1094  call fft_dit_nr_aux(data, n, twiddles, 0, sign)
1095  end subroutine fft_dit_nr
1096 
1097  ! Decimation-in-time, natural-order input, bit-reversed order output:
1098  recursive subroutine fft_dit_nr_aux(dat, size, twiddles, &
1099  twiddle_index, sign)
1100  intrinsic ishft, conjg
1101  integer, intent(in) :: size
1102  !sll_comp64, dimension(0:size-1), intent(inout) :: dat
1103  sll_comp64, dimension(0:), intent(inout) :: dat
1104  ! It is more convenient when the twiddles are 0-indexed
1105  sll_comp64, dimension(0:), intent(in) :: twiddles
1106  integer, intent(in) :: twiddle_index
1107  integer, optional, intent(in) :: sign
1108  integer :: half
1109  integer :: j
1110  integer :: t_index_new
1111  sll_comp64 :: omega
1112  sll_comp64 :: tmp
1113 
1114  half = ishft(size, -1) ! any evidence this is faster?
1115  !if ( sign == sll_p_fft_backward ) then
1116  ! omega = twiddles(twiddle_index)
1117  !else if ( sign == sll_p_fft_forward ) then
1118  ! omega = conjg(twiddles(twiddle_index))
1119  !else
1120  ! stop 'ERROR in =fft_dit_nr_aux= argument sign invalid'
1121  !end if
1122  omega = twiddles(twiddle_index)
1123  ! Do the butterflies for this stage
1124  do j = 0, half - 1
1125  tmp = dat(j + half)*omega
1126  dat(j + half) = dat(j) - tmp
1127  dat(j) = dat(j) + tmp
1128  end do
1129  ! Spawn two recursive calls for the two new problems that have been created:
1130  ! the upper and lower halves of the data
1131  if (half > 1) then
1132  t_index_new = ishft(twiddle_index, 1)
1133  call fft_dit_nr_aux(dat(0:half - 1), &
1134  half, &
1135  twiddles, &
1136  t_index_new, &
1137  sign)
1138  call fft_dit_nr_aux(dat(half:2*half - 1), &
1139  half, &
1140  twiddles, &
1141  t_index_new + 1, &
1142  sign)
1143  else
1144  return
1145  end if
1146  end subroutine fft_dit_nr_aux
1147 
1148 !PN /*
1149 !PN DEFINED BUT NOT USED
1150 !PN subroutine fft_dit_rn( data, sign )
1151 !PN sll_comp64, dimension(:), intent(inout) :: data
1152 !PN sll_int32, intent(in) :: sign
1153 !PN sll_comp64, dimension(:), pointer :: twiddles
1154 !PN integer :: n
1155 !PN sll_int32 :: ierr
1156 !PN n = size(data) ! bad
1157 !PN SLL_ASSERT(sll_f_is_power_of_two(int(n,i64)))
1158 !PN SLL_ALLOCATE(twiddles(n/2),ierr)
1159 !PN call compute_twiddles(n,twiddles)
1160 !PN ! This algorithm uses the twiddles in natural order. The '1'
1161 !PN ! argument is because fft_dit_rn_aux internally 1-indexes its
1162 !PN ! arrays, so we are just indicating the first twiddle factor.
1163 !PN call fft_dit_rn_aux(data, n, twiddles, 1, sign)
1164 !PN end subroutine fft_dit_rn
1165 !PN */
1166 
1167  recursive subroutine fft_dit_rn_aux(data, &
1168  data_size, &
1169  twiddles, &
1170  twiddle_stride, &
1171  sign)
1172  intrinsic ishft, conjg
1173  sll_int32, intent(in) :: data_size
1174  ! looks like the most natural indexing for this algorithm is
1175  ! the 1-based indexing...
1176  sll_comp64, dimension(1:data_size), intent(inout) :: data
1177  sll_comp64, dimension(1:data_size/2), intent(in) :: twiddles
1178  integer, intent(in) :: twiddle_stride
1179  sll_int32 :: new_stride
1180  sll_int32 :: jtwiddle
1181  integer, intent(in) :: sign
1182  integer :: half
1183  integer :: j
1184  sll_comp64 :: omega
1185  sll_comp64 :: tmp
1186  jtwiddle = 1
1187  half = ishft(data_size, -1) ! is this be faster than a division in Fortran?
1188  if (half > 1) then
1189  new_stride = twiddle_stride*2
1190  call fft_dit_rn_aux(data(1:half), half, twiddles, new_stride, sign)
1191  call fft_dit_rn_aux(data(half + 1:data_size), half, twiddles, new_stride, sign)
1192  end if
1193  ! Do the butterflies for this stage
1194  do j = 1, half
1195  !if ( sign == sll_p_fft_forward ) then
1196  ! omega = conjg(twiddles(jtwiddle))
1197  !else if ( sign == sll_p_fft_backward ) then
1198  ! omega = twiddles(jtwiddle)
1199  !else
1200  ! stop 'ERROR in =fft_dit_rn_aux= argument sign invalid'
1201  !end if
1202  omega = twiddles(jtwiddle)
1203  tmp = data(j + half)*omega
1204  data(j + half) = data(j) - tmp
1205  data(j) = data(j) + tmp
1206  jtwiddle = jtwiddle + twiddle_stride
1207  end do
1208  end subroutine fft_dit_rn_aux
1209 
1210  ! notes:
1211  ! - the real data is size n
1212  ! - but we apply a complex fft, treating the n-sized array as n/2
1213  ! complex numbers. This means that the twiddle array should be
1214  ! n/4-size (number of complex nums), but n/2 if seen as an array
1215  ! of reals.
1216  ! - this is badly named, it is not that the this is an FFT for real
1217  ! values, but that the argument itself is real and the FFT interprets
1218  ! it as complex. Change this confusing function name.
1219  ! Proposed interface for this version of the FFT function:
1220  ! - Treat it as a function for FFT of complex data, even though the
1221  ! argument is a real array. This implies:
1222  ! - The size of the problem is the number of complex numbers, not the
1223  ! number of elements in the real array. Thus, if the real array is
1224  ! length 'n', this corresponds to a problem size 'n/2', as this is the
1225  ! number of complex numbers that are represented.
1226  recursive subroutine fft_dit_nr_real_array_aux(samples, &
1227  num_complex, &
1228  twiddles, &
1229  twiddle_index, &
1230  sign)
1231  intrinsic ishft, conjg
1232  ! num_complex is the number of complex numbers represented in the array.
1233  integer, intent(in) :: num_complex
1234  sll_real64, dimension(0:2*num_complex - 1), intent(inout):: samples
1235  ! It is more convenient when the twiddles are 0-indexed
1236  ! Declaring the size of the twiddles as in the following line, gives
1237  ! a runtime error as the array is accessed out of bounds.
1238  ! sll_real64, dimension(0:num_complex-1), intent(in) :: twiddles
1239  sll_real64, dimension(0:), intent(in) :: twiddles
1240  integer, intent(in) :: twiddle_index
1241  integer, intent(in) :: sign
1242  ! half represents half of the complex problem, not half of the array
1243  ! size.
1244  integer :: half
1245  integer :: j
1246  integer :: t_index_new
1247  sll_real64 :: omega_re
1248  sll_real64 :: omega_im
1249  sll_real64 :: tmp_re
1250  sll_real64 :: tmp_im
1251  sll_assert(num_complex .le. size(samples))
1252  half = ishft(num_complex, -1) ! would this be faster than a division
1253  ! in Fortran?
1254  ! select the value of the twiddle factor for this stage depending on
1255  ! the direction of the transform
1256  !if ( sign == sll_p_fft_forward ) then
1257  ! omega_re = CREAL0(twiddles, twiddle_index)
1258  ! omega_im = -CIMAG0(twiddles, twiddle_index)
1259  !else if ( sign == sll_p_fft_backward ) then
1260  ! omega_re = CREAL0(twiddles, twiddle_index)
1261  ! omega_im = CIMAG0(twiddles, twiddle_index)
1262  !else
1263  ! stop 'ERROR in =fft_dit_nr_real_array_aux= argument sign invalid'
1264  !end if
1265  omega_re = creal0(twiddles, twiddle_index)
1266  omega_im = cimag0(twiddles, twiddle_index)
1267  ! Do the butterflies for this stage
1268  do j = 0, half - 1
1269  ! multiply samples(j+half) and omega, store in tmp
1270  tmp_re = &
1271  creal0(samples, j + half)*omega_re - cimag0(samples, j + half)*omega_im
1272  tmp_im = &
1273  creal0(samples, j + half)*omega_im + cimag0(samples, j + half)*omega_re
1274  ! subtract tmp from samples(j), store in samples(j+half)
1275  creal0(samples, j + half) = creal0(samples, j) - tmp_re
1276  cimag0(samples, j + half) = cimag0(samples, j) - tmp_im
1277  ! add samples(j) and tmp, store in samples(j)
1278  creal0(samples, j) = creal0(samples, j) + tmp_re
1279  cimag0(samples, j) = cimag0(samples, j) + tmp_im
1280  end do
1281  ! Spawn two recursive calls for the two new problems that have been created:
1282  ! the upper and lower halves of the samples.
1283  if (half > 1) then
1284  t_index_new = ishft(twiddle_index, 1)
1285  call fft_dit_nr_real_array_aux(samples(0:2*half - 1), &
1286  half, &
1287  twiddles, &
1288  t_index_new, &
1289  sign)
1290  call fft_dit_nr_real_array_aux(samples(2*half:4*half - 1), &
1291  half, &
1292  twiddles, &
1293  t_index_new + 1, &
1294  sign)
1295  else
1296  return
1297  end if
1298  end subroutine fft_dit_nr_real_array_aux
1299 
1300  recursive subroutine fft_dit_rn_real_array_aux(data, &
1301  num_complex, &
1302  twiddles, &
1303  twiddle_stride, &
1304  sign)
1305  intrinsic ishft, conjg
1306  ! num_complex is the number of complex numbers represented in the array.
1307  integer, intent(in) :: num_complex
1308  sll_real64, dimension(1:2*num_complex), intent(inout) :: data
1309  ! For this reverse-to-natural order algorithm, it is more convenient
1310  ! to use the 1-based indexing. Note that this is a num_complex-sized FFT,
1311  ! which means that we need num_complex/2 twiddles, but since they are
1312  ! stored as reals, we need that the twiddle array be of size num_complex.
1313  sll_real64, dimension(1:num_complex), intent(in) :: twiddles
1314  sll_int32, intent(in) :: twiddle_stride
1315  sll_int32 :: new_stride
1316  sll_int32 :: jtwiddle
1317  integer, intent(in) :: sign
1318  integer :: half
1319  integer :: j
1320  sll_real64 :: omega_re
1321  sll_real64 :: omega_im
1322  sll_real64 :: tmp_re
1323  sll_real64 :: tmp_im
1324  sll_assert(num_complex .le. size(data))
1325  jtwiddle = 1
1326  ! note that half represents half the complex problem size, not half
1327  ! the array size.
1328  half = ishft(num_complex, -1) ! would this be faster than a division
1329  ! in Fortran?
1330  if (half > 1) then
1331  new_stride = twiddle_stride*2
1332  call fft_dit_rn_real_array_aux(data(1:num_complex), &
1333  half, &
1334  twiddles, &
1335  new_stride, &
1336  sign)
1337  call fft_dit_rn_real_array_aux(data(num_complex + 1:2*num_complex), &
1338  half, &
1339  twiddles, &
1340  new_stride, &
1341  sign)
1342  end if
1343  ! Do the butterflies for this stage
1344  do j = 1, half
1345  ! select the value of the twiddle factor for this stage depending on
1346  ! the direction of the transform
1347  !if( sign == sll_p_fft_forward ) then
1348  ! omega_re = CREAL1(twiddles, jtwiddle)
1349  ! omega_im = -CIMAG1(twiddles, jtwiddle)
1350  !else if ( sign == sll_p_fft_backward ) then
1351  ! omega_re = CREAL1(twiddles, jtwiddle)
1352  ! omega_im = CIMAG1(twiddles, jtwiddle)
1353  !else
1354  ! stop 'ERROR in =fft_dit_rn_real_array_aux= argument sign invalid'
1355  !end if
1356  omega_re = creal1(twiddles, jtwiddle)
1357  omega_im = cimag1(twiddles, jtwiddle)
1358  ! multiply data(j+half) and omega, store in tmp
1359  tmp_re = creal1(data, j + half)*omega_re - cimag1(data, j + half)*omega_im
1360  tmp_im = creal1(data, j + half)*omega_im + cimag1(data, j + half)*omega_re
1361  ! subtract tmp from data(j), store in data(j+half)
1362  creal1(data, j + half) = creal1(data, j) - tmp_re
1363  cimag1(data, j + half) = cimag1(data, j) - tmp_im
1364  ! add data(j) and tmp, store in data(j)
1365  creal1(data, j) = creal1(data, j) + tmp_re
1366  cimag1(data, j) = cimag1(data, j) + tmp_im
1367  jtwiddle = jtwiddle + twiddle_stride
1368  end do
1369  end subroutine fft_dit_rn_real_array_aux
1370 
1371  ! Important stuff that should be checked by the function or preferably
1372  ! at the plan-level:
1373  ! - twiddles is needed by the call to the complex FFT that operates on
1374  ! real-formatted data. Therefore, if the real data to be processed is
1375  ! size 'n', then, in the first step this data will be interpreted as
1376  ! a complex array of size 'n/2'. This means that the twiddles array is
1377  ! composed of 'n/4' complex numbers or 'n/2' reals
1378  ! - This function should be allowed to operate in an 'out-of-place' way.
1379  ! In case that an 'in-place' transform is required, the data array
1380  ! should have at least size n+2 real elements. The reason for this is
1381  ! that the FFT of real data has the symmetry property:
1382  !
1383  ! X_r = X_(N-r)*
1384  !
1385  ! which means that it requires storage for only N/2+1 independent modes.
1386  ! Two of these modes (0 and N/2) are real-valued. The remaining N/2-1 are
1387  ! complex valued. In terms of required memory:
1388  !
1389  ! 2 real modes => 2 reals
1390  ! N/2-1 complex modes => N-2 reals
1391  ! +___________
1392  ! N reals
1393  !
1394  ! Thus, in principle we could store all the information in the original
1395  ! real array. This has several inconveniences. Firstly, it would require
1396  ! packing the data in 'special' ways (the Numerical Recipes approach),
1397  ! like putting the real modes share a single complex-valued field. This
1398  ! requires special packing/unpacking and 'if' statements when it comes
1399  ! down to reading the modes. Another way could be to require the input
1400  ! data to be padded to allow storage for the extra modes. This is also
1401  ! very problematic, as it requires, from the moment of the allocation,
1402  ! some foresight that a given array will be the subject of an FFT
1403  ! operation. This may not be so bad... In any case, it seems that
1404  ! special logic will be required to deliver a requested Fourier mode,
1405  ! since in some cases one could read a value from an array but in other
1406  ! cases one needs the conjugate... For now, the present implementation
1407  ! chooses to pack the data in the minimum space possible, to the 'weird'
1408  ! packing and unpacking and use special logic to access the Fourier modes.
1409  subroutine real_data_fft_dit(data, n, twiddles, twiddles_n, sign)
1410  sll_int32, intent(in) :: n
1411  sll_real64, dimension(0:), intent(inout) :: data
1412  sll_real64, dimension(0:n/2 - 1), intent(in) :: twiddles
1413  sll_real64, dimension(0:n - 1), intent(in) :: twiddles_n
1414  sll_int32, intent(in) :: sign
1415  sll_int32 :: n_2
1416  sll_int32 :: i
1417  sll_real64 :: hi_re
1418  sll_real64 :: hi_im
1419  sll_real64 :: hn_2mi_re
1420  sll_real64 :: hn_2mi_im
1421  sll_real64 :: tmp_re
1422  sll_real64 :: tmp_im
1423  sll_real64 :: tmp2_re
1424  sll_real64 :: tmp2_im
1425  sll_real64 :: tmp3_re
1426  sll_real64 :: tmp3_im
1427  sll_real64 :: omega_re
1428  sll_real64 :: omega_im
1429  sll_real64 :: s
1430  sll_assert(sll_f_is_power_of_two(int(n, i64)))
1431  sll_assert(size(data) .ge. n)
1432  sll_assert(size(twiddles) .eq. n/2)
1433  n_2 = n/2
1434  ! First step in computing the FFT of the real array is to launch
1435  ! a complex FFT on the data, interpreting the real-array data as
1436  ! complex numbers. In other words, if the data is:
1437  !
1438  ! [d1, d2, d3, d4, ..., dn]
1439  !
1440  ! We reinterpret each array element as the real or imaginary part
1441  ! of a sequence of complex numbers totalling in number to half
1442  ! the number of elements in the original array.
1443  !
1444  ! [re1, im1, re2, im2, ..., ren/2, imn/2]
1445  !
1446  ! The next part of the algorithm is to construct the modes of the
1447  ! real FFT from the results of this complex transform. The inverse
1448  ! transform is computed by essentially inverting the order of this
1449  ! process and changing some signs...
1450  if (sign .eq. sll_p_fft_forward) then
1451  ! we use the following as the 'switch' to flip signs between
1452  ! FORWARD and INVERSE transforms.
1453  s = -1.0_f64
1454  ! The following call has to change to refer to its natural wrapper...
1455  call fft_dit_nr_real_array_aux(data(0:n - 1), &
1456  n_2, &
1457  twiddles, &
1458  0, &
1460  ! but our _nr_ algorithm bit reverses the result, so, until we have
1461  ! some way to index the data correctly we have to do this:
1462  call bit_reverse_in_pairs(n_2, data(0:n - 1))
1463  else if (sign .eq. sll_p_fft_backward) then
1464  s = 1.0_f64
1465  else
1466  stop 'ERROR IN =REAL_DATA_FFT_DIT= invalid argument sign'
1467  end if
1468  do i = 1, n_2/2 ! the index 'i' corresponds to indexing H
1469  ! sll_p_fft_forward case: We intend to mix the odd/even components that we
1470  ! have computed into complex numbers H_n. These Complex numbers will
1471  ! give the modes as in:
1472  !
1473  ! F_i = 1/2*(H_i + H_(N/2-i)^*) - i/2*(H_i-H_(N/2-i)^*)*exp(-j*2*pi*i/N)
1474  !
1475  ! which is the answer we are after.
1476  !
1477  ! sll_p_fft_backward case: The process of decomposing the H_n's into the
1478  ! corresponding even and odd terms:
1479  !
1480  ! (even terms:) F_n^e = (F_n + F_(N/2-n)^*)
1481  !
1482  ! (odd terms: ) F_n^o = (F_n - F_(N/2-n)^*)exp(j*2*pi*i/N)
1483  !
1484  ! followed by a complex transform to calculate
1485  !
1486  ! H_n = F_n^(1) + i*F_n^(2)
1487  hi_re = creal0(data, i)
1488  hi_im = cimag0(data, i)
1489  hn_2mi_re = creal0(data, n_2 - i)
1490  hn_2mi_im = -cimag0(data, n_2 - i)
1491  omega_re = creal0(twiddles_n, i)
1492  omega_im = s*cimag0(twiddles_n, i) ! conjugated for FORWARD
1493  ! Compute tmp = 1/2*(H_i + H_(N/2-i)^*)
1494  tmp_re = 0.5_f64*(hi_re + hn_2mi_re)
1495  tmp_im = 0.5_f64*(hi_im + hn_2mi_im)
1496  ! Compute tmp2 = i/2*(H_n - H_(N/2-n)^*), the sign depends on the
1497  ! direction of the FFT.
1498  tmp2_re = s*0.5_f64*(hi_im - hn_2mi_im)
1499  tmp2_im = -s*0.5_f64*(hi_re - hn_2mi_re)
1500  ! Multiply tmp2 by the twiddle factor and add to tmp.
1501  tmp3_re = tmp2_re*omega_re - tmp2_im*omega_im
1502  tmp3_im = tmp2_re*omega_im + tmp2_im*omega_re
1503  creal0(data, i) = tmp_re - tmp3_re
1504  cimag0(data, i) = tmp_im - tmp3_im
1505  creal0(data, n_2 - i) = tmp_re + tmp3_re
1506  cimag0(data, n_2 - i) = -tmp_im - tmp3_im
1507  end do
1508  if (sign .eq. sll_p_fft_forward) then
1509  ! Set the first and N/2 values independently and pack them in the
1510  ! memory space provided by the original data.
1511  tmp_re = data(0)
1512  data(0) = tmp_re + data(1) ! mode 0 is real
1513  data(1) = tmp_re - data(1) ! mode N/2 is real
1514  else if (sign .eq. sll_p_fft_backward) then
1515  ! Unpack the modes.
1516  tmp_re = data(0)
1517  data(0) = 0.5_f64*(tmp_re + data(1))
1518  data(1) = 0.5_f64*(tmp_re - data(1))
1519  ! The following call has to change to refere to its natural wrapper...
1520  call fft_dit_nr_real_array_aux(data(0:n - 1), &
1521  n_2, &
1522  twiddles, &
1523  0, &
1525  ! but our _nr_ algorithm bit reverses the result, so, until we have
1526  ! some way to index the data correctly we have to do this:
1527  call bit_reverse_in_pairs(n_2, data(0:n - 1))
1528  data = data*2.0_f64
1529  end if
1530  end subroutine real_data_fft_dit
1531 
1532 #undef CREAL0
1533 #undef CIMAG0
1534 #undef CREAL1
1535 #undef CIMAG1
1536 
1537 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
1538 end module sll_m_fft
Fortran module where set some physical and mathematical constants.
real(kind=f64), parameter, public sll_p_pi
FFT interface for FFTW.
subroutine, public sll_s_fft_get_k_list_r2r_1d(plan, k_list)
Get a list of the k modes in an r2r FFT k_list = [0, 1, 2, ..., n/2, (n+1)/2-1, .....
subroutine, public sll_s_fft_exec_r2c_1d(plan, array_in, array_out)
Compute fast Fourier transform in real to complex mode.
subroutine, public sll_s_fft_free(plan)
Delete a plan.
integer(c_int), parameter, public sll_p_fft_wisdom_only
subroutine, public sll_s_fft_init_c2c_2d(plan, nx, ny, array_in, array_out, direction, normalized, aligned, optimization)
Create new 2d complex to complex plan.
subroutine, public sll_s_fft_init_r2c_1d(plan, nx, array_in, array_out, normalized, aligned, optimization)
Create new 1d real to complex plan for forward FFT.
subroutine, public sll_s_fft_print_defaultlib()
Function to print the FFT library behind the interface.
integer(c_int), parameter, public sll_p_fft_exhaustive
FFTW planning-rigor flag FFTW_EXHAUSTIVE (more optimization than PATIENT) NOTE: planner overwrites th...
subroutine, public sll_s_fft_exec_c2c_2d(plan, array_in, array_out)
Compute fast Fourier transform in complex to complex mode.
subroutine, public sll_s_fft_deallocate_aligned_complex(data)
Function to deallocate an aligned complex array.
subroutine, public sll_s_fft_exec_c2r_1d(plan, array_in, array_out)
Compute fast Fourier transform in complex to real mode.
integer(c_int), parameter, public sll_p_fft_estimate
FFTW planning-rigor flag FFTW_ESTIMATE (simple heuristic for planer) [value 64]. This is our default ...
integer, parameter, public sll_p_fft_forward
Flag to specify forward FFT (negative sign) [value -1].
subroutine, public sll_s_fft_exec_c2r_2d(plan, array_in, array_out)
Compute fast Fourier transform in complex to real mode.
subroutine, public sll_s_fft_init_c2r_1d(plan, nx, array_in, array_out, normalized, aligned, optimization)
Create new 1d complex to real plan for backward FFT.
integer(c_int), parameter, public sll_p_fft_patient
FFTW planning-rigor flag FFTW_PATIENT (more optimizaton than MEASURE) NOTE: planner overwrites the in...
subroutine, public sll_s_fft_set_mode_c2r_1d(plan, data, ck, k)
Subroutine to set a complex mode to the real representation of r2r.
real(c_double) function, dimension(:), pointer, public sll_f_fft_allocate_aligned_real(n)
Function to allocate an aligned real array.
subroutine, public sll_s_fft_get_k_list_r2c_1d(plan, k_list)
Get a list of the k modes in an r2c FFT k_list = [0, 1, 2, ..., n/2].
subroutine, public sll_s_fft_get_k_list_c2c_1d(plan, k_list)
Get a list of the k modes in a c2c FFT k_list = [0, 1, 2, ..., n/2, (n/2+1)-n, ......
subroutine, public sll_s_fft_init_c2r_2d(plan, nx, ny, array_in, array_out, normalized, aligned, optimization)
Create new 2d real to complex plan for backward FFT.
subroutine, public sll_s_fft_init_r2r_1d(plan, nx, array_in, array_out, direction, normalized, aligned, optimization)
Create new 1d real to real plan.
subroutine, public sll_s_fft_exec_c2c_1d(plan, array_in, array_out)
Compute fast Fourier transform in complex to complex mode.
subroutine, public sll_s_fft_exec_r2c_2d(plan, array_in, array_out)
Compute fast Fourier transform in real to complex mode.
subroutine, public sll_s_fft_deallocate_aligned_real(data)
Function to deallocate an aligned real array.
subroutine, public sll_s_fft_init_c2c_1d(plan, nx, array_in, array_out, direction, normalized, aligned, optimization)
Create new 1d complex to complex plan.
subroutine, public sll_s_fft_init_r2c_2d(plan, nx, ny, array_in, array_out, normalized, aligned, optimization)
Create new 2d complex to real plan for forward FFT.
integer, parameter, public sll_p_fft_backward
Flag to specify backward FFT (positive sign) [value 1].
subroutine, public sll_s_fft_exec_r2r_1d(plan, array_in, array_out)
Compute fast Fourier transform in real to real mode.
complex(kind=f64) function, public sll_f_fft_get_mode_r2c_1d(plan, data, k)
Function to reconstruct a complex FFT mode from the data of a r2r transform.
integer(c_int), parameter, public sll_p_fft_measure
FFTW planning-rigor flag FFTW_MEASURE (optimized plan) NOTE: planner overwrites the input array durin...
complex(c_double_complex) function, dimension(:), pointer, public sll_f_fft_allocate_aligned_complex(n)
Function to allocate an aligned complex array.
Some common numerical utilities.
logical function, public sll_f_is_power_of_two(n)
Check if an integer is equal to.
Type for FFT plan in SeLaLib.
    Report Typos and Errors