Report Typos and Errors    
Semi-Lagrangian Library
Modular library for kinetic and gyrokinetic simulations of plasmas in fusion energy devices.
sll_m_compression.F90
Go to the documentation of this file.
1 !**************************************************************
9 !**************************************************************
10 
11 
13 #include "sll_working_precision.h"
14 
15  use iso_c_binding
16 
17 #ifdef _OPENMP
18  use omp_lib
19 #endif
20 
21  implicit none
22 
23 #ifdef USE_ZFP
24  interface
25  integer (c_int) function sll_f_compressbound_zfp(buf_in, n_doubles_in, prec) &
26  bind(c, name='zfp_maximum_size')
27  use iso_c_binding
28  implicit none
29  type (c_ptr), value :: buf_in
30  integer (c_int), value :: n_doubles_in
31  integer (c_int), value :: prec
32  end function sll_f_compressbound_zfp
33 
34  integer (c_int) function sll_f_deflate_zfp(buf_in, buf_out, n_doubles_in, n_bytes_out_max, prec) &
35  bind(c, name='zfp_compress_safe')
36  use iso_c_binding
37  implicit none
38  type (c_ptr), value :: buf_in
39  type (c_ptr), value :: buf_out
40  integer (c_int), value :: n_doubles_in
41  integer (c_int), value :: n_bytes_out_max
42  integer (c_int), value :: prec
43  end function sll_f_deflate_zfp
44 
45  integer (c_int) function sll_f_inflate_zfp(buf_in, buf_out, n_bytes_in, n_doubles_out_max, prec) &
46  bind(c, name='zfp_decompress_safe')
47  use iso_c_binding
48  implicit none
49  type (c_ptr), value :: buf_in
50  type (c_ptr), value :: buf_out
51  integer (c_int), value :: n_bytes_in
52  integer (c_int), value :: n_doubles_out_max
53  integer (c_int), value :: prec
54  end function sll_f_inflate_zfp
55  end interface
56 #else
57  ! add stub code here
58 #endif
59 
60  ! ZFP compresses in blocks of 64 double precision numbers
61  integer, parameter :: zfp_blocksize = 64
62  integer :: zfp_precision = 32
63 
66  character, dimension(:), pointer :: buffer => null() ! concatenated slices of compressed data
67  ! NOTE -- integers below need to be taken care of during MPI communication (concatenate/decatenate)
68  integer :: n_slices ! number of data slices (== threads used during compression)
69  integer :: n_bytes_deflated_total = 0 ! total size of compressed data
70  integer :: n_bytes_inflated_total = 0 ! total size of decompressed data
71  integer, dimension(:), pointer :: n_bytes_deflated => null() ! length of the i-th slice of compressed data
72  integer, dimension(:), pointer :: offset_deflated => null() ! offset for the i-th slice of compressed data
73  integer, dimension(:), pointer :: n_bytes_inflated => null() ! length of the i-th slice of uncompressed data
74  integer, dimension(:), pointer :: offset_inflated => null() ! offset for the i-th slice of uncompressed data
76 
77 ! public :: sll_f_compressbound_zfp, &
78 ! sll_f_deflate_zfp, &
79 ! sll_f_inflate_zfp, &
80 ! zfp_blocksize, &
81 ! zfp_accuracy
82 
83 
84 contains
85 
86 
88  subroutine deflate_buffer_real64(buf, comp, n_doubles, n_threads)
89  sll_real64, intent(in), target :: buf(0:) ! buffer to be compressed
90  type(sll_t_compressed_buffer), intent(inout) :: comp ! data structure containing compressed data and offsets
91  integer, intent(in), optional :: n_doubles, n_threads ! number of doubles to be compressed, number of threads to be used
92 #ifdef USE_ZFP
93  integer :: n_omp_threads, omp_size, omp_rank
94  integer :: n_total, n_per_thread ! total and per-thread number of elements in 'buf'
95  integer :: n_offset_thread ! offset to indicate where a thread shall start compressing
96  integer :: n_bytes_comp_thread_max ! maximum possible size for compressed data (needed for allocation)
97  integer :: i, off
98  character, allocatable, target :: thread_buf(:)
99  integer, parameter :: word_size = 8 ! double precision
100 
101  if (present(n_doubles)) then
102  n_total = n_doubles
103  else
104  n_total = size(buf)
105  endif
106 
107 #ifdef _OPENMP
108  if (present(n_threads)) then
109  n_omp_threads = n_threads
110  else
111  n_omp_threads = omp_get_max_threads()
112  endif
113  ! n_omp_threads = 1
114 #else
115  n_omp_threads = 1
116 #endif
117 
119  call allocate_compressed_buffer_index_arrays(comp, n_omp_threads)
120 
121 !$omp parallel num_threads(n_omp_threads) default(shared) &
122 !$omp& private(n_per_thread, n_offset_thread, n_bytes_comp_thread_max, thread_buf) &
123 !$omp& private(omp_size, omp_rank, i, off)
124 #ifdef _OPENMP
125  omp_size = omp_get_num_threads()
126  omp_rank = omp_get_thread_num()
127 #else
128  omp_size = 1
129  omp_rank = 0
130 #endif
131 
132  ! distribute the input data among the threads, taking care not to violate the ZFP blocksize
133  n_per_thread = n_total / zfp_blocksize / omp_size
134  n_offset_thread = n_per_thread * omp_rank
135  if (omp_rank == omp_size-1) then
136  n_per_thread = (n_total / zfp_blocksize) - n_per_thread * omp_rank
137  endif
138  n_per_thread = n_per_thread * zfp_blocksize
139  n_offset_thread = n_offset_thread * zfp_blocksize
140 
141  ! add size and offset information to 'comp' data structure
142  comp%n_bytes_inflated(omp_rank) = word_size * n_per_thread
143  comp%offset_inflated(omp_rank) = word_size * n_offset_thread
144 
145  n_bytes_comp_thread_max = sll_f_compressbound_zfp(c_loc(buf(n_offset_thread)), n_per_thread, zfp_precision)
146  allocate(thread_buf(0:n_bytes_comp_thread_max-1))
147 
148  comp%n_bytes_deflated(omp_rank) = sll_f_deflate_zfp(c_loc(buf(n_offset_thread)), c_loc(thread_buf), &
149  n_per_thread, n_bytes_comp_thread_max, zfp_precision)
150 
151 !$omp barrier
152 !$omp master
153  comp%n_bytes_deflated_total = sum(comp%n_bytes_deflated)
154  allocate(comp%buffer(0:comp%n_bytes_deflated_total-1))
155  comp%offset_deflated(0) = 0
156  do i=1, omp_size-1
157  comp%offset_deflated(i) = comp%offset_deflated(i-1) + comp%n_bytes_deflated(i-1)
158  enddo
159 !$omp end master
160 !$omp barrier
161 
162  ! cache thread's offset for the use in the subsequent loop
163  off = comp%offset_deflated(omp_rank)
164  ! copy the thread buffers in parallel into the buffer
165  do i=0, comp%n_bytes_deflated(omp_rank)-1
166  comp%buffer(off + i) = thread_buf(i)
167  enddo
168 
169  deallocate(thread_buf)
170 !$omp end parallel
171 
172  comp%n_bytes_inflated_total = word_size * n_total
173 #else
174  ! add stub code here
175 #endif
176  end subroutine deflate_buffer_real64
177 
178 
180  subroutine inflate_buffer_real64(buf, comp, n_threads)
181  sll_real64, intent(inout), target :: buf(0:) ! 1d view on buffer to be decompressed into
182  type(sll_t_compressed_buffer), intent(inout) :: comp ! data structure containing compressed data and offsets
183  integer, intent(in), optional :: n_threads
184 #ifdef USE_ZFP
185  integer :: n_omp_threads, omp_size, omp_rank
186  integer :: i, off, n_el, ierr
187  integer, parameter :: word_size = 8 ! double precision
188 
189 #ifdef _OPENMP
190  if (present(n_threads)) then
191  n_omp_threads = n_threads
192  else
193  n_omp_threads = omp_get_max_threads()
194  endif
195  ! --- DEBUG ---
196  ! use as many threads as were used during compression
197  ! n_omp_threads = comp%n_slices
198  ! n_omp_threads = 1
199 #else
200  n_omp_threads = 1
201 #endif
202 
203 !$omp parallel num_threads(n_omp_threads) default(shared) &
204 !$omp& private(omp_size, omp_rank, i, ierr, off, n_el)
205 #ifdef _OPENMP
206  omp_size = omp_get_num_threads()
207  omp_rank = omp_get_thread_num()
208 #else
209  omp_size = 1
210  omp_rank = 0
211 #endif
212 
213 !$omp do schedule(static,1)
214  do i=0,comp%n_slices-1
215  ! offset and element count must be in units of double precision numbers (not raw bytes)
216  off = comp%offset_inflated(i)/word_size
217  n_el = comp%n_bytes_inflated(i)/word_size
218  ierr = sll_f_inflate_zfp(c_loc(comp%buffer(comp%offset_deflated(i))), c_loc(buf(off)), &
219  comp%n_bytes_deflated(i), n_el, zfp_precision)
220  enddo
221 !$omp end do
222 
223 !$omp end parallel
224 
225 #else
226  ! add stub code here
227 #endif
228  end subroutine inflate_buffer_real64
229 
230 
232  type(sll_t_compressed_buffer), intent(inout) :: comp ! data structure containing compressed data and offsets
233  if (associated(comp%buffer)) then
234  deallocate(comp%buffer); nullify(comp%buffer)
235  endif
236  if (associated(comp%n_bytes_deflated)) then
237  deallocate(comp%n_bytes_deflated); nullify(comp%n_bytes_deflated)
238  endif
239  if (associated(comp%offset_deflated)) then
240  deallocate(comp%offset_deflated); nullify(comp%offset_deflated)
241  endif
242  if (associated(comp%n_bytes_inflated)) then
243  deallocate(comp%n_bytes_inflated); nullify(comp%n_bytes_inflated)
244  endif
245  if (associated(comp%offset_inflated)) then
246  deallocate(comp%offset_inflated); nullify(comp%offset_inflated)
247  endif
248  comp%n_slices = 0
249  comp%n_bytes_inflated_total = 0
250  comp%n_bytes_deflated_total = 0
251  end subroutine deallocate_compressed_buffer_obj
252 
253 
254  subroutine allocate_compressed_buffer_index_arrays(comp, n_slices)
255  type(sll_t_compressed_buffer), intent(inout) :: comp
256  integer, intent(in) :: n_slices
257  allocate(comp%n_bytes_deflated(0:n_slices-1))
258  allocate(comp%offset_deflated(0:n_slices-1))
259  allocate(comp%n_bytes_inflated(0:n_slices-1))
260  allocate(comp%offset_inflated(0:n_slices-1))
261  comp%n_slices = n_slices
263 
264 
265  subroutine print_compression_information(comp, verbose)
266  type(sll_t_compressed_buffer), intent(in) :: comp
267  logical, intent(in), optional :: verbose
268 
269  write(*,*) "--- sll_m_compression ---------------------"
270  if ((present(verbose)).and.(verbose)) then
271  write(*,*) "n_slices =", comp%n_slices
272  !write(*,*) "n_bytes_inflated_total =", comp%n_bytes_inflated_total
273  write(*,*) "n_bytes_inflated =", comp%n_bytes_inflated
274  write(*,*) "offset_inflated =", comp%offset_inflated
275  write(*,*) "n_bytes_deflated_total =", comp%n_bytes_deflated_total
276  write(*,*) "n_bytes_deflated =", comp%n_bytes_deflated
277  write(*,*) "offset_deflated =", comp%offset_deflated
278  write(*,*) "zfp_precision =", zfp_precision
279  endif
280  if (comp%n_bytes_deflated_total > 0) then
281  write(*,*) "n_bytes_inflated_total =", comp%n_bytes_inflated_total
282  write(*,*) "ratio =", &
283  real(comp%n_bytes_inflated_total)/real(comp%n_bytes_deflated_total)
284  endif
285  write(*,*) "-------------------------------------------"
286  end subroutine print_compression_information
287 
288 
289  subroutine set_compression_precision(prec)
290  integer, intent(in) :: prec
291  zfp_precision = prec
292  end subroutine set_compression_precision
293 
294 
296  function concatenate_index_arrays(comp, array) result(n_el)
297  type(sll_t_compressed_buffer) :: comp
298  integer, pointer :: array(:)
299  integer :: i, n_el, idx
300 
301  n_el = 3 + 4 * comp%n_slices
302  allocate(array(0:n_el-1))
303 
304  idx = 0
305  array(idx) = comp%n_slices
306  idx = idx + 1
307  array(idx) = comp%n_bytes_deflated_total
308  idx = idx + 1
309  array(idx) = comp%n_bytes_inflated_total
310  idx = idx + 1
311  do i=0,comp%n_slices-1
312  array(idx) = comp%n_bytes_deflated(i)
313  idx = idx + 1
314  enddo
315  do i=0,comp%n_slices-1
316  array(idx) = comp%offset_deflated(i)
317  idx = idx + 1
318  enddo
319  do i=0,comp%n_slices-1
320  array(idx) = comp%n_bytes_inflated(i)
321  idx = idx + 1
322  enddo
323  do i=0,comp%n_slices-1
324  array(idx) = comp%offset_inflated(i)
325  idx = idx + 1
326  enddo
327  end function concatenate_index_arrays
328 
329 
330  subroutine decatenate_index_arrays(comp, array)
331  type(sll_t_compressed_buffer) :: comp
332  integer, pointer :: array(:)
333  integer :: i, n_el, idx
334  idx = 0
335  comp%n_slices = array(idx)
336  idx = idx + 1
337  comp%n_bytes_deflated_total = array(idx)
338  idx = idx + 1
339  comp%n_bytes_inflated_total = array(idx)
340  idx = idx + 1
341  call allocate_compressed_buffer_index_arrays(comp, comp%n_slices)
342  do i=0,comp%n_slices-1
343  comp%n_bytes_deflated(i) = array(idx)
344  idx = idx + 1
345  enddo
346  do i=0,comp%n_slices-1
347  comp%offset_deflated(i) = array(idx)
348  idx = idx + 1
349  enddo
350  do i=0,comp%n_slices-1
351  comp%n_bytes_inflated(i) = array(idx)
352  idx = idx + 1
353  enddo
354  do i=0,comp%n_slices-1
355  comp%offset_inflated(i) = array(idx)
356  idx = idx + 1
357  enddo
358  end subroutine decatenate_index_arrays
359 
360 
361  ! simple time function for the unit test code to be independent from the rest of selalib
362  function get_time()
363  double precision :: get_time
364 #ifdef _OPENMP
365  get_time = omp_get_wtime()
366 #else
367  get_time = 0.0
368 #endif
369  end function
370 
371 end module sll_m_compression
Module providing an F90 interface to the ZFP compression library: http://computation....
subroutine allocate_compressed_buffer_index_arrays(comp, n_slices)
subroutine print_compression_information(comp, verbose)
integer function concatenate_index_arrays(comp, array)
allocate array, copy indices from comp into array, return
subroutine set_compression_precision(prec)
subroutine deallocate_compressed_buffer_obj(comp)
double precision function get_time()
subroutine decatenate_index_arrays(comp, array)
subroutine deflate_buffer_real64(buf, comp, n_doubles, n_threads)
compress buffer
subroutine inflate_buffer_real64(buf, comp, n_threads)
decompress buffer
integer, parameter zfp_blocksize
data structure to support threaded ZFP compression and decompression
    Report Typos and Errors