!----------------------------------------------------------------------- !Main program !----------------------------------------------------------------------- program openmp05 use omp_lib implicit none integer, parameter :: isp = selected_int_kind(9) integer, parameter :: idp = selected_int_kind(18) integer, parameter :: rsp = kind(1.0) integer, parameter :: rdp = selected_real_kind(2*precision(1.0_rsp)) integer, parameter :: csp = kind((1.0,1.0)) integer, parameter :: cdp = kind((1.0_rdp,1.0_rdp)) integer, parameter :: FFTW_R2HC = 0 integer, parameter :: FFTW_HC2R = 1 integer, parameter :: FFTW_DHT = 2 integer, parameter :: FFTW_REDFT00 = 3 integer, parameter :: FFTW_REDFT01 = 4 integer, parameter :: FFTW_REDFT10 = 5 integer, parameter :: FFTW_REDFT11 = 6 integer, parameter :: FFTW_RODFT00 = 7 integer, parameter :: FFTW_RODFT01 = 8 integer, parameter :: FFTW_RODFT10 = 9 integer, parameter :: FFTW_RODFT11 = 10 integer, parameter :: FFTW_FORWARD = -1 integer, parameter :: FFTW_BACKWARD = + 1 integer, parameter :: FFTW_MEASURE = 0 integer, parameter :: FFTW_DESTROY_INPUT = 1 integer, parameter :: FFTW_UNALIGNED = 2 integer, parameter :: FFTW_CONSERVE_MEMORY = 4 integer, parameter :: FFTW_EXHAUSTIVE = 8 integer, parameter :: FFTW_PRESERVE_INPUT = 16 integer, parameter :: FFTW_PATIENT = 32 integer, parameter :: FFTW_ESTIMATE = 64 integer, parameter :: FFTW_ESTIMATE_PATIENT = 128 integer, parameter :: FFTW_BELIEVE_PCOST = 256 integer, parameter :: FFTW_DFT_R2HC_ICKY = 512 integer, parameter :: FFTW_NONTHREADED_ICKY = 1024 integer, parameter :: FFTW_NO_BUFFERING = 2048 integer, parameter :: FFTW_NO_INDIRECT_OP = 4096 integer, parameter :: FFTW_ALLOW_LARGE_GENERIC = 8192 integer, parameter :: FFTW_NO_RANK_SPLITS = 16384 integer, parameter :: FFTW_NO_VRANK_SPLITS = 32768 integer, parameter :: FFTW_NO_VRECURSE = 65536 integer, parameter :: FFTW_NO_SIMD = 131072 integer, parameter :: n=256 integer, parameter :: m=256 integer, parameter :: o=128 complex(cdp), allocatable, dimension(:,:,:) :: a,b real(rdp), allocatable, dimension(:,:,:) :: ar,br complex(cdp), allocatable, dimension(:) :: ad real(rdp), allocatable, dimension(:) :: adr,bdr integer :: i, j, k, l,nproc,iret real (rdp) :: cputime, wcputime real(rdp), dimension(1:10) :: cputime_a, wcputime_a integer, parameter :: CURRENT_PLAN_FLAG=ior(FFTW_ESTIMATE,FFTW_UNALIGNED) !quick start-up but slower plan integer (idp) :: plan_1,plan_2 integer (idp) :: plan3d_1 integer (idp), dimension(m,n) :: plan_1a allocate(a(1:m,1:n,1:o)) allocate (b(1:m,1:n,1:o)) allocate(ar(1:m,1:n,1:o)) allocate (br(1:m,1:n,1:o)) allocate(ad(1:o)) allocate(adr(1:o)) allocate(bdr(1:o)) nproc=2 call omp_set_num_threads(nproc) call dfftw_init_threads(iret) if(iret==0) stop "stop" call random_number(ar) call random_number(adr) call random_number(br) call random_number(bdr) write(*,*) "debug 00a" a=cmplx(ar,br,kind=cdp) b=a ad=cmplx(adr,bdr,kind=cdp) write(*,*) "debug 00b" call dfftw_plan_with_nthreads(1) ! call dfftw_plan_with_nthreads(nproc) call dfftw_plan_dft_1d(plan_1,o,ad,ad,FFTW_FORWARD,CURRENT_PLAN_FLAG) do j = 1, n do i = 1, m call dfftw_plan_dft_1d(plan_1a(i,j),o,ad,ad,FFTW_FORWARD,CURRENT_PLAN_FLAG) enddo enddo call dfftw_plan_with_nthreads(nproc) call dfftw_plan_dft_3d(plan3d_1,m,n,o,b,b,FFTW_FORWARD,CURRENT_PLAN_FLAG) ! cputimes call cpu_time(cputime_a(1)) wcputime_a(1) = omp_get_wtime() write(*,*) "debug 01" !$omp parallel do private(i,j,ad) shared(a,plan_1a), default(none) do j = 1, n do i = 1, m ad = a(i,j,:) call dfftw_execute_dft(plan_1a(i,j),ad,ad) !HOTSPOT a(i,j,:) = ad end do end do write(*,*) "debug 02" call cpu_time(cputime_a(2)) wcputime_a(2) = omp_get_wtime() call dfftw_execute_dft(plan3d_1,b,b) write(*,*) "debug 03" call cpu_time(cputime_a(3)) wcputime_a(3) = omp_get_wtime() cputime = (cputime_a(2) - cputime_a(1))/nproc wcputime = wcputime_a(2) - wcputime_a(1) write(*,'(2(A,F16.3))') 'a CPU Time:',cputime,' OMP wtime:',wcputime cputime = (cputime_a(3) - cputime_a(2))/nproc wcputime = wcputime_a(3) - wcputime_a(2) write(*,'(2(A,F16.3))') 'b CPU Time:',cputime,' OMP wtime:',wcputime call omp_set_num_threads(1) call dfftw_destroy_plan(plan3d_1) do j = 1, n do i = 1, m call dfftw_destroy_plan(plan_1a(i,j)) enddo enddo call dfftw_cleanup_threads() end program openmp05 |