From 195319ab299d523f66f06945c8eede42b9cf4186 Mon Sep 17 00:00:00 2001 From: "J.A. de Jong - ASCEE" Date: Sun, 29 Dec 2019 22:07:27 +0100 Subject: [PATCH] Added the possibility to shift to different fft backend. Now set to fftw. Seems to work properly --- CMakeLists.txt | 106 +++++++++++++++++++++---------------- lasp/c/CMakeLists.txt | 2 +- lasp/c/lasp_fft.c | 117 ++++++++++++++++++++++++++++++----------- lasp/c/lasp_mat.h | 1 + lasp/c/lasp_math_raw.h | 2 + lasp/c/lasp_python.h | 9 ++-- test/fft_test.py | 2 +- 7 files changed, 156 insertions(+), 83 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0738550..5690093 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,17 +23,32 @@ add_definitions(-DLASP_MAX_NFFT=33554432) # 2**25 add_definitions(-D_REENTRANT) +# ############### Choose an fft backend here +#set(LASP_FFT_BACKEND fftpack) +set(LASP_FFT_BACKEND "fftw") + +if(LASP_FFT_BACKEND STREQUAL "fftw") + find_library(FFTW_LIBRARY NAMES fftw3 fftw) + set(FFTW_LIBRARIES "${FFTW_LIBRARY}") + set(LASP_FFT_LIBRARY "${FFTW_LIBRARIES}") + add_definitions(-DLASP_FFT_BACKEND_FFTW) +elseif(LASP_FFT_BACKEND STREQUAL "fftpack") + add_definitions(-DLASP_FFT_BACKEND_FFTPACK) + set(LASP_FFT_LIBRARY "fftpack") +endif() + if(LASP_FLOAT STREQUAL "double") - add_definitions(-DLASP_FLOAT=64) - add_definitions(-DLASP_DOUBLE_PRECISION) + add_definitions(-DLASP_FLOAT=64) + add_definitions(-DLASP_DOUBLE_PRECISION) else() - add_definitions(-DLASP_FLOAT=32) + add_definitions(-DLASP_FLOAT=32) + add_definitions(-DLASP_SINGLE_PRECISION) endif(LASP_FLOAT STREQUAL "double") if(NOT DEFINED LASP_DEBUG) - message(FATAL_ERROR "LASP_DEBUG flag not defined. Please set -DLASP_DEBUG=TRUE - or -DLASP_DEBUG=FALSE") + message(FATAL_ERROR "LASP_DEBUG flag not defined. Please set -DLASP_DEBUG=TRUE + or -DLASP_DEBUG=FALSE") endif(NOT DEFINED LASP_DEBUG) # ##################### END Cmake variables converted to a macro @@ -41,63 +56,62 @@ set(Python_ADDITIONAL_VERSIONS "3") # #################### Setting definitions and debug-specific compilation flags if(LASP_DEBUG) - set(TRACERNAME LASPTracer) - set(LASP_DEBUG_CYTHON=True) - set(CMAKE_BUILD_TYPE Debug) - message("Building debug code") - set(CMAKE_BUILD_TYPE Debug) - add_definitions(-DLASP_DEBUG=1) - add_definitions(-DTRACERNAME=${TRACERNAME}) - add_definitions(-DDEBUG) - add_definitions(-DTRACER=1) - # This will produce html files - set(CYTHON_ANNOTATE ON) - # Add the __FILENAME__ macro - # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$(subst ${CMAKE_SOURCE_DIR}/,,$(abspath $<))\"'") + set(TRACERNAME LASPTracer) + set(LASP_DEBUG_CYTHON=True) + set(CMAKE_BUILD_TYPE Debug) + message("Building debug code") + set(CMAKE_BUILD_TYPE Debug) + add_definitions(-DLASP_DEBUG=1) + add_definitions(-DTRACERNAME=${TRACERNAME}) + add_definitions(-DDEBUG) + add_definitions(-DTRACER=1) + # This will produce html files + set(CYTHON_ANNOTATE ON) + # Add the __FILENAME__ macro + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$(subst ${CMAKE_SOURCE_DIR}/,,$(abspath $<))\"'") else() - message("Building LASP for release") - set(CMAKE_BUILD_TYPE Release) - set(LASP_DEBUG_CYTHON=False) - set(CYTHON_ANNOTATE OFF) - set(CYTHON_NO_DOCSTRINGS ON) - # Strip unnecessary symbols - # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--gc-sections") - # set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--gc-sections") + message("Building LASP for release") + set(CMAKE_BUILD_TYPE Release) + set(LASP_DEBUG_CYTHON=False) + set(CYTHON_ANNOTATE OFF) + set(CYTHON_NO_DOCSTRINGS ON) + # Strip unnecessary symbols + # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--gc-sections") + # set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--gc-sections") - add_definitions(-DTRACER=0 -DNDEBUG) + add_definitions(-DTRACER=0 -DNDEBUG) endif(LASP_DEBUG) # The last argument here takes care of calling SIGABRT when an integer overflow # occures. ############################## General compilation flags (independent of debug mode, windows or linux) -set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough\ - -Wno-incompatible-pointer-types -Wno-strict-aliasing") +set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough -Wno-incompatible-pointer-types -Wno-strict-aliasing") # General make flags set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11 -Wall -Wextra -Wno-type-limits \ - -Werror=implicit-function-declaration -Werror=incompatible-pointer-types \ +-Werror=implicit-function-declaration -Werror=incompatible-pointer-types \ -Werror=return-type") # Debug make flags set(CMAKE_C_FLAGS_DEBUG "-g" ) set(CMAKE_C_FLAGS_RELEASE "-O2 -mfpmath=sse -march=x86-64 -mtune=native \ - -fdata-sections -ffunction-sections -fomit-frame-pointer -finline-functions") +-fdata-sections -ffunction-sections -fomit-frame-pointer -finline-functions") # set(CMAKE_C_FLAGS_RELEASE "-O2 -march=native -mtune=native -fomit-frame-pointer") if(LASP_USE_BLAS) - add_definitions(-DLASP_USE_BLAS=1) + add_definitions(-DLASP_USE_BLAS=1) else() - add_definitions(-DLASP_USE_BLAS=0) + add_definitions(-DLASP_USE_BLAS=0) endif(LASP_USE_BLAS) # ############################# End compilation flags if(CMAKE_SYSTEM_NAME STREQUAL "Windows") - set(win32 true) + set(win32 true) else() - set(win32 false) + set(win32 false) endif(CMAKE_SYSTEM_NAME STREQUAL "Windows") find_package(PythonLibs REQUIRED ) @@ -105,9 +119,9 @@ find_package(PythonInterp REQUIRED) add_subdirectory(fftpack) include_directories( - fftpack - lasp/c - ) + fftpack + lasp/c + ) add_subdirectory(lasp) add_subdirectory(test) @@ -116,18 +130,18 @@ add_subdirectory(test) set(SETUP_PY "${CMAKE_CURRENT_BINARY_DIR}/setup.py") set(DEPS "${CMAKE_CURRENT_SOURCE_DIR}/*.py" - "${CMAKE_CURRENT_SOURCE_DIR}/lasp/*.py" - "wrappers" - "lasp_rtaudio") + "${CMAKE_CURRENT_SOURCE_DIR}/lasp/*.py" + "wrappers" + "lasp_rtaudio") - # ) +# ) set(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp") # configure_file(${SETUP_PY_IN} ${SETUP_PY}) add_custom_command(OUTPUT ${OUTPUT} - COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} build - COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT} - DEPENDS ${DEPS}) + COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} build + COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT} + DEPENDS ${DEPS}) add_custom_target(target ALL DEPENDS ${OUTPUT}) @@ -135,7 +149,7 @@ if(DEFINED INSTALL_DEBUG) set(EXTRA_SETUP_ARG --user -e) else() set(EXTRA_SETUP_ARG "") - endif() +endif() install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install ${EXTRA_SETUP_ARG} .)") diff --git a/lasp/c/CMakeLists.txt b/lasp/c/CMakeLists.txt index 3551e6b..43a38ec 100644 --- a/lasp/c/CMakeLists.txt +++ b/lasp/c/CMakeLists.txt @@ -25,4 +25,4 @@ add_library(lasp_lib -target_link_libraries(lasp_lib fftpack openblas) +target_link_libraries(lasp_lib ${LASP_FFT_LIBRARY} openblas) diff --git a/lasp/c/lasp_fft.c b/lasp/c/lasp_fft.c index f92374a..4a05be7 100644 --- a/lasp/c/lasp_fft.c +++ b/lasp/c/lasp_fft.c @@ -9,12 +9,24 @@ #include "lasp_tracer.h" #include "lasp_fft.h" #include "lasp_types.h" -#include "fftpack.h" +#ifdef LASP_FFT_BACKEND_FFTPACK +#include "fftpack.h" typedef struct Fft_s { us nfft; - vd fft_work; -} Fft; + vd fft_work; // Storage memory for fftpack +}; +#elif defined LASP_FFT_BACKEND_FFTW +#include +typedef struct Fft_s { + us nfft; + fftw_plan forward_plan; + fftw_plan reverse_plan; + c* complex_storage; + d* real_storage; +}; + +#endif Fft* Fft_create(const us nfft) { fsTRACE(15); @@ -24,72 +36,103 @@ Fft* Fft_create(const us nfft) { } Fft* fft = a_malloc(sizeof(Fft)); - if(fft==NULL) { - WARN("Fft allocation failed"); - return NULL; - } fft->nfft = nfft; +#ifdef LASP_FFT_BACKEND_FFTPACK /* Initialize foreign fft lib */ fft->fft_work = vd_alloc(2*nfft+15); - npy_rffti(nfft,getvdval(&fft->fft_work,0)); check_overflow_vx(fft->fft_work); +#elif defined LASP_FFT_BACKEND_FFTW + fft->complex_storage = fftw_malloc(sizeof(c) * (nfft/2 + 1)); + fft->real_storage = fftw_malloc(sizeof(d) * nfft); + + fft->forward_plan = fftw_plan_dft_r2c_1d(nfft, + fft->real_storage, + fft->complex_storage, + FFTW_MEASURE); + fft->reverse_plan = fftw_plan_dft_c2r_1d(nfft, + fft->complex_storage, + fft->real_storage, + FFTW_MEASURE); + +#endif /* print_vd(&fft->fft_work); */ - + feTRACE(15); return fft; } void Fft_free(Fft* fft) { fsTRACE(15); dbgassert(fft,NULLPTRDEREF); +#ifdef LASP_FFT_BACKEND_FFTPACK vd_free(&fft->fft_work); +#elif defined LASP_FFT_BACKEND_FFTW + fftw_free(fft->complex_storage); + fftw_free(fft->real_storage); + fftw_destroy_plan(fft->forward_plan); + fftw_destroy_plan(fft->reverse_plan); +#endif a_free(fft); feTRACE(15); } + us Fft_nfft(const Fft* fft) {return fft->nfft;} + void Fft_ifft_single(const Fft* fft,const vc* freqdata,vd* result) { fsTRACE(15); dbgassert(fft && freqdata && result,NULLPTRDEREF); const us nfft = fft->nfft; dbgassert(result->n_rows == nfft, - "Invalid size for time data rows." - " Should be equal to nfft"); + "Invalid size for time data rows." + " Should be equal to nfft"); dbgassert(freqdata->n_rows == (nfft/2+1),"Invalid number of rows in" - " result array"); + " result array"); - d* freqdata_ptr = (d*) getvcval(freqdata,0); d* result_ptr = getvdval(result,0); +#ifdef LASP_FFT_BACKEND_FFTPACK + d* freqdata_ptr = (d*) getvcval(freqdata,0); /* Copy freqdata, to fft_result. */ d_copy(&result_ptr[1],&freqdata_ptr[2],nfft-1,1,1); result_ptr[0] = freqdata_ptr[0]; /* Perform inplace backward transform */ npy_rfftb(nfft, - result_ptr, - getvdval(&fft->fft_work,0)); + result_ptr, + getvdval(&fft->fft_work,0)); + + +#elif defined LASP_FFT_BACKEND_FFTW + c* freqdata_ptr = (c*) getvcval(freqdata,0); + + c_copy(fft->complex_storage, freqdata_ptr,nfft/2+1); + + fftw_execute(fft->reverse_plan); + + d_copy(result_ptr, fft->real_storage, nfft, 1, 1); + +#endif + check_overflow_vx(*result); /* Scale by dividing by nfft. Checked with numpy implementation * that this indeed needs to be done for FFTpack. */ d_scale(result_ptr,1/((d) nfft),nfft); - - check_overflow_vx(*result); feTRACE(15); } void Fft_ifft(const Fft* fft,const cmat* freqdata,dmat* timedata) { fsTRACE(15); - + dbgassert(fft && timedata && freqdata,NULLPTRDEREF); const us nchannels = timedata->n_cols; dbgassert(timedata->n_cols == freqdata->n_cols, - "Number of columns in timedata and result" - " should be equal."); + "Number of columns in timedata and result" + " should be equal."); for(us col=0;coln_rows == nfft, - "Invalid size for time data rows." - " Should be equal to nfft"); + "Invalid size for time data rows." + " Should be equal to nfft"); dbgassert(result->n_rows == (nfft/2+1),"Invalid number of rows in" - " result array"); + " result array"); + +#ifdef LASP_FFT_BACKEND_FFTPACK d* result_ptr = (d*) getvcval(result,0); /* Fftpack stores the data a bit strange, the resulting array @@ -136,35 +181,45 @@ void Fft_fft_single(const Fft* fft,const vd* timedata,vc* result) { /* Perform fft */ npy_rfftf(nfft,&result_ptr[1], - getvdval(&fft->fft_work,0)); + getvdval(&fft->fft_work,0)); /* Set real part of DC component to first index of the rfft * routine */ result_ptr[0] = result_ptr[1]; result_ptr[1] = 0; /* Set imaginary part of DC component - * to zero */ + * to zero */ /* For an even fft, the imaginary part of the Nyquist frequency * bin equals zero.*/ if(likely(nfft%2 == 0)) { result_ptr[nfft+1] = 0; } - - check_overflow_vx(*result); check_overflow_vx(fft->fft_work); +#elif defined LASP_FFT_BACKEND_FFTW + + d* timedata_ptr = getvdval(timedata,0); + c* result_ptr = getvcval(result,0); + d_copy(fft->real_storage,timedata_ptr, nfft, 1, 1); + + fftw_execute(fft->forward_plan); + + c_copy(result_ptr, fft->complex_storage, nfft/2+1); + +#endif + check_overflow_vx(*result); feTRACE(15); } void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) { fsTRACE(15); - + dbgassert(fft && timedata && result,NULLPTRDEREF); const us nchannels = timedata->n_cols; dbgassert(timedata->n_cols == result->n_cols, - "Number of columns in timedata and result" - " should be equal."); + "Number of columns in timedata and result" + " should be equal."); for(us col=0;col