From 195319ab299d523f66f06945c8eede42b9cf4186 Mon Sep 17 00:00:00 2001
From: "J.A. de Jong - ASCEE" <j.a.dejong@ascee.nl>
Date: Sun, 29 Dec 2019 22:07:27 +0100
Subject: [PATCH] Added the possibility to shift to different fft backend. Now
 set to fftw. Seems to work properly

---
 CMakeLists.txt         | 106 +++++++++++++++++++++----------------
 lasp/c/CMakeLists.txt  |   2 +-
 lasp/c/lasp_fft.c      | 117 ++++++++++++++++++++++++++++++-----------
 lasp/c/lasp_mat.h      |   1 +
 lasp/c/lasp_math_raw.h |   2 +
 lasp/c/lasp_python.h   |   9 ++--
 test/fft_test.py       |   2 +-
 7 files changed, 156 insertions(+), 83 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0738550..5690093 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,17 +23,32 @@ add_definitions(-DLASP_MAX_NFFT=33554432) # 2**25
 
 add_definitions(-D_REENTRANT)
 
+# ############### Choose an fft backend here
+#set(LASP_FFT_BACKEND fftpack)
+set(LASP_FFT_BACKEND "fftw")
+
+if(LASP_FFT_BACKEND STREQUAL "fftw")
+    find_library(FFTW_LIBRARY NAMES fftw3 fftw)
+    set(FFTW_LIBRARIES "${FFTW_LIBRARY}")
+    set(LASP_FFT_LIBRARY "${FFTW_LIBRARIES}")
+    add_definitions(-DLASP_FFT_BACKEND_FFTW)
+elseif(LASP_FFT_BACKEND STREQUAL "fftpack")
+    add_definitions(-DLASP_FFT_BACKEND_FFTPACK)
+    set(LASP_FFT_LIBRARY "fftpack")
+endif()
+
 
 if(LASP_FLOAT STREQUAL "double")
-  add_definitions(-DLASP_FLOAT=64)
-  add_definitions(-DLASP_DOUBLE_PRECISION)
+    add_definitions(-DLASP_FLOAT=64)
+    add_definitions(-DLASP_DOUBLE_PRECISION)
 else()
-  add_definitions(-DLASP_FLOAT=32)
+    add_definitions(-DLASP_FLOAT=32)
+    add_definitions(-DLASP_SINGLE_PRECISION)
 endif(LASP_FLOAT STREQUAL "double")
 
 if(NOT DEFINED LASP_DEBUG)
-  message(FATAL_ERROR "LASP_DEBUG flag not defined. Please set -DLASP_DEBUG=TRUE
-   or -DLASP_DEBUG=FALSE")
+    message(FATAL_ERROR "LASP_DEBUG flag not defined. Please set -DLASP_DEBUG=TRUE
+    or -DLASP_DEBUG=FALSE")
 endif(NOT DEFINED LASP_DEBUG)
 
 # ##################### END Cmake variables converted to a macro
@@ -41,63 +56,62 @@ set(Python_ADDITIONAL_VERSIONS "3")
 # #################### Setting definitions and debug-specific compilation flags
 
 if(LASP_DEBUG)
-  set(TRACERNAME LASPTracer)
-  set(LASP_DEBUG_CYTHON=True)
-  set(CMAKE_BUILD_TYPE Debug)
-  message("Building debug code")
-  set(CMAKE_BUILD_TYPE Debug)
-  add_definitions(-DLASP_DEBUG=1)
-  add_definitions(-DTRACERNAME=${TRACERNAME})
-  add_definitions(-DDEBUG)
-  add_definitions(-DTRACER=1)
-  # This will produce html files
-  set(CYTHON_ANNOTATE ON)
-  # Add the __FILENAME__ macro
-  # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$(subst ${CMAKE_SOURCE_DIR}/,,$(abspath $<))\"'")
+    set(TRACERNAME LASPTracer)
+    set(LASP_DEBUG_CYTHON=True)
+    set(CMAKE_BUILD_TYPE Debug)
+    message("Building debug code")
+    set(CMAKE_BUILD_TYPE Debug)
+    add_definitions(-DLASP_DEBUG=1)
+    add_definitions(-DTRACERNAME=${TRACERNAME})
+    add_definitions(-DDEBUG)
+    add_definitions(-DTRACER=1)
+    # This will produce html files
+    set(CYTHON_ANNOTATE ON)
+    # Add the __FILENAME__ macro
+    # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$(subst ${CMAKE_SOURCE_DIR}/,,$(abspath $<))\"'")
 else()
-  message("Building LASP for release")
-  set(CMAKE_BUILD_TYPE Release)
-  set(LASP_DEBUG_CYTHON=False)
-  set(CYTHON_ANNOTATE OFF)
-  set(CYTHON_NO_DOCSTRINGS ON)
-  # Strip unnecessary symbols
-  # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--gc-sections")
-  # set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--gc-sections")
+    message("Building LASP for release")
+    set(CMAKE_BUILD_TYPE Release)
+    set(LASP_DEBUG_CYTHON=False)
+    set(CYTHON_ANNOTATE OFF)
+    set(CYTHON_NO_DOCSTRINGS ON)
+    # Strip unnecessary symbols
+    # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--gc-sections")
+    # set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--gc-sections")
 
-  add_definitions(-DTRACER=0 -DNDEBUG)
+    add_definitions(-DTRACER=0 -DNDEBUG)
 endif(LASP_DEBUG)
 
 # The last argument here takes care of calling SIGABRT when an integer overflow
 # occures.
 ############################## General compilation flags (independent of debug mode, windows or linux)
-set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough\
- -Wno-incompatible-pointer-types -Wno-strict-aliasing")
+set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough -Wno-incompatible-pointer-types -Wno-strict-aliasing")
 
 # General make flags
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11  -Wall -Wextra -Wno-type-limits \
- -Werror=implicit-function-declaration -Werror=incompatible-pointer-types \
+-Werror=implicit-function-declaration -Werror=incompatible-pointer-types \
 -Werror=return-type")
 
 # Debug make flags
 set(CMAKE_C_FLAGS_DEBUG "-g" )
 
 set(CMAKE_C_FLAGS_RELEASE  "-O2 -mfpmath=sse -march=x86-64 -mtune=native \
-  -fdata-sections -ffunction-sections -fomit-frame-pointer -finline-functions")
+-fdata-sections -ffunction-sections -fomit-frame-pointer -finline-functions")
 
 # set(CMAKE_C_FLAGS_RELEASE  "-O2 -march=native -mtune=native -fomit-frame-pointer")
 
 if(LASP_USE_BLAS)
-  add_definitions(-DLASP_USE_BLAS=1)
+    add_definitions(-DLASP_USE_BLAS=1)
 else()
-  add_definitions(-DLASP_USE_BLAS=0)
+    add_definitions(-DLASP_USE_BLAS=0)
 endif(LASP_USE_BLAS)
 
 # ############################# End compilation flags
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
-  set(win32 true)
+    set(win32 true)
 else()
-  set(win32 false)
+    set(win32 false)
 endif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 
 find_package(PythonLibs REQUIRED )
@@ -105,9 +119,9 @@ find_package(PythonInterp REQUIRED)
 
 add_subdirectory(fftpack)
 include_directories(
-  fftpack
-  lasp/c
-  )
+    fftpack
+    lasp/c
+    )
 
 add_subdirectory(lasp)
 add_subdirectory(test)
@@ -116,18 +130,18 @@ add_subdirectory(test)
 set(SETUP_PY    "${CMAKE_CURRENT_BINARY_DIR}/setup.py")
 
 set(DEPS  "${CMAKE_CURRENT_SOURCE_DIR}/*.py"
-          "${CMAKE_CURRENT_SOURCE_DIR}/lasp/*.py"
-          "wrappers"
-          "lasp_rtaudio")
+    "${CMAKE_CURRENT_SOURCE_DIR}/lasp/*.py"
+    "wrappers"
+    "lasp_rtaudio")
 
-             # )
+# )
 set(OUTPUT      "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
 
 # configure_file(${SETUP_PY_IN} ${SETUP_PY})
 add_custom_command(OUTPUT ${OUTPUT}
-                   COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} build
-                   COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
-                   DEPENDS ${DEPS})
+    COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} build
+    COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+    DEPENDS ${DEPS})
 
 add_custom_target(target ALL DEPENDS ${OUTPUT})
 
@@ -135,7 +149,7 @@ if(DEFINED INSTALL_DEBUG)
     set(EXTRA_SETUP_ARG --user -e)
 else()
     set(EXTRA_SETUP_ARG "")
- endif()
+endif()
 
 
 install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install ${EXTRA_SETUP_ARG} .)")
diff --git a/lasp/c/CMakeLists.txt b/lasp/c/CMakeLists.txt
index 3551e6b..43a38ec 100644
--- a/lasp/c/CMakeLists.txt
+++ b/lasp/c/CMakeLists.txt
@@ -25,4 +25,4 @@ add_library(lasp_lib
 
 
 
-target_link_libraries(lasp_lib fftpack openblas)
+target_link_libraries(lasp_lib ${LASP_FFT_LIBRARY} openblas)
diff --git a/lasp/c/lasp_fft.c b/lasp/c/lasp_fft.c
index f92374a..4a05be7 100644
--- a/lasp/c/lasp_fft.c
+++ b/lasp/c/lasp_fft.c
@@ -9,12 +9,24 @@
 #include "lasp_tracer.h"
 #include "lasp_fft.h"
 #include "lasp_types.h"
-#include "fftpack.h"
 
+#ifdef LASP_FFT_BACKEND_FFTPACK
+#include "fftpack.h"
 typedef struct Fft_s {
     us nfft;
-    vd fft_work;
-} Fft;
+    vd fft_work; // Storage memory for fftpack
+};
+#elif defined LASP_FFT_BACKEND_FFTW
+#include <fftw3.h>
+typedef struct Fft_s {
+    us nfft;
+    fftw_plan forward_plan;
+    fftw_plan reverse_plan;
+    c* complex_storage;
+    d* real_storage;
+};
+
+#endif
 
 Fft* Fft_create(const us nfft) {
     fsTRACE(15);
@@ -24,72 +36,103 @@ Fft* Fft_create(const us nfft) {
     }
 
     Fft* fft = a_malloc(sizeof(Fft));
-    if(fft==NULL) {
-        WARN("Fft allocation failed");
-        return NULL;
-    }
 
     fft->nfft = nfft;
 
+#ifdef LASP_FFT_BACKEND_FFTPACK
     /* Initialize foreign fft lib */
     fft->fft_work = vd_alloc(2*nfft+15);
-
     npy_rffti(nfft,getvdval(&fft->fft_work,0));
     check_overflow_vx(fft->fft_work);
+#elif defined LASP_FFT_BACKEND_FFTW
+    fft->complex_storage = fftw_malloc(sizeof(c) * (nfft/2 + 1));
+    fft->real_storage = fftw_malloc(sizeof(d) * nfft);
+
+    fft->forward_plan = fftw_plan_dft_r2c_1d(nfft,
+            fft->real_storage,
+            fft->complex_storage,
+            FFTW_MEASURE);
+    fft->reverse_plan = fftw_plan_dft_c2r_1d(nfft,
+            fft->complex_storage,
+            fft->real_storage,
+            FFTW_MEASURE);
+
+#endif
 
     /* print_vd(&fft->fft_work); */
-    
+
     feTRACE(15); 
     return fft;
 }
 void Fft_free(Fft* fft) {
     fsTRACE(15);
     dbgassert(fft,NULLPTRDEREF);
+#ifdef LASP_FFT_BACKEND_FFTPACK
     vd_free(&fft->fft_work);
+#elif defined LASP_FFT_BACKEND_FFTW
+    fftw_free(fft->complex_storage);
+    fftw_free(fft->real_storage);
+    fftw_destroy_plan(fft->forward_plan);
+    fftw_destroy_plan(fft->reverse_plan);
+#endif
     a_free(fft);
     feTRACE(15);
 }
+
 us Fft_nfft(const Fft* fft) {return fft->nfft;}
+
 void Fft_ifft_single(const Fft* fft,const vc* freqdata,vd* result) {
     fsTRACE(15);
     dbgassert(fft && freqdata && result,NULLPTRDEREF);
     const us nfft = fft->nfft;
     dbgassert(result->n_rows == nfft,
-              "Invalid size for time data rows."
-              " Should be equal to nfft");
+            "Invalid size for time data rows."
+            " Should be equal to nfft");
 
     dbgassert(freqdata->n_rows == (nfft/2+1),"Invalid number of rows in"
-              " result array");
+            " result array");
 
 
-    d* freqdata_ptr = (d*) getvcval(freqdata,0);
     d* result_ptr = getvdval(result,0);
 
+#ifdef LASP_FFT_BACKEND_FFTPACK
+    d* freqdata_ptr = (d*) getvcval(freqdata,0);
     /* Copy freqdata, to fft_result. */
     d_copy(&result_ptr[1],&freqdata_ptr[2],nfft-1,1,1);
     result_ptr[0] = freqdata_ptr[0];
 
     /* Perform inplace backward transform */
     npy_rfftb(nfft,
-              result_ptr,
-              getvdval(&fft->fft_work,0));
+            result_ptr,
+            getvdval(&fft->fft_work,0));
+
+
+#elif defined LASP_FFT_BACKEND_FFTW
+    c* freqdata_ptr = (c*) getvcval(freqdata,0);
+
+    c_copy(fft->complex_storage, freqdata_ptr,nfft/2+1);
+
+    fftw_execute(fft->reverse_plan);
+    
+    d_copy(result_ptr, fft->real_storage, nfft, 1, 1);
+    
+#endif
+    check_overflow_vx(*result);
 
     /* Scale by dividing by nfft. Checked with numpy implementation
      * that this indeed needs to be done for FFTpack. */
     d_scale(result_ptr,1/((d) nfft),nfft);
-    
-    check_overflow_vx(*result);
     feTRACE(15);
 }
 void Fft_ifft(const Fft* fft,const cmat* freqdata,dmat* timedata) {
     fsTRACE(15);
-    
+
     dbgassert(fft && timedata && freqdata,NULLPTRDEREF);
 
     const us nchannels = timedata->n_cols;
     dbgassert(timedata->n_cols == freqdata->n_cols,
-              "Number of columns in timedata and result"
-              " should be equal.");
+            "Number of columns in timedata and result"
+            " should be equal.");
 
     for(us col=0;col<nchannels;col++) {
 
@@ -108,7 +151,7 @@ void Fft_ifft(const Fft* fft,const cmat* freqdata,dmat* timedata) {
 }
 
 void Fft_fft_single(const Fft* fft,const vd* timedata,vc* result) {
-    
+
     fsTRACE(15);
     dbgassert(fft && timedata && result,NULLPTRDEREF);
 
@@ -116,12 +159,14 @@ void Fft_fft_single(const Fft* fft,const vd* timedata,vc* result) {
     assert_vx(timedata);
     assert_vx(result);
     dbgassert(timedata->n_rows == nfft,
-              "Invalid size for time data rows."
-              " Should be equal to nfft");
+            "Invalid size for time data rows."
+            " Should be equal to nfft");
 
     dbgassert(result->n_rows == (nfft/2+1),"Invalid number of rows in"
-              " result array");
+            " result array");
 
+
+#ifdef LASP_FFT_BACKEND_FFTPACK
     d* result_ptr = (d*) getvcval(result,0);
 
     /* Fftpack stores the data a bit strange, the resulting array
@@ -136,35 +181,45 @@ void Fft_fft_single(const Fft* fft,const vd* timedata,vc* result) {
 
     /* Perform fft */
     npy_rfftf(nfft,&result_ptr[1],
-              getvdval(&fft->fft_work,0));
+            getvdval(&fft->fft_work,0));
 
     /* Set real part of DC component to first index of the rfft
      * routine */
     result_ptr[0] = result_ptr[1];
 
     result_ptr[1] = 0;        /* Set imaginary part of DC component
-                                 * to zero */
+                               * to zero */
 
     /* For an even fft, the imaginary part of the Nyquist frequency
      * bin equals zero.*/
     if(likely(nfft%2 == 0)) {
         result_ptr[nfft+1] = 0;
     }
-
-    check_overflow_vx(*result);
     check_overflow_vx(fft->fft_work);
+#elif defined LASP_FFT_BACKEND_FFTW
+
+    d* timedata_ptr = getvdval(timedata,0);
+    c* result_ptr = getvcval(result,0);
+    d_copy(fft->real_storage,timedata_ptr, nfft, 1, 1);
+
+    fftw_execute(fft->forward_plan);
+    
+    c_copy(result_ptr, fft->complex_storage, nfft/2+1);
+    
+#endif
+    check_overflow_vx(*result);
     feTRACE(15);
 
 }
 void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) {
     fsTRACE(15);
-    
+
     dbgassert(fft && timedata && result,NULLPTRDEREF);
 
     const us nchannels = timedata->n_cols;
     dbgassert(timedata->n_cols == result->n_cols,
-              "Number of columns in timedata and result"
-              " should be equal.");
+            "Number of columns in timedata and result"
+            " should be equal.");
 
     for(us col=0;col<nchannels;col++) {
 
diff --git a/lasp/c/lasp_mat.h b/lasp/c/lasp_mat.h
index 1a78459..50473cd 100644
--- a/lasp/c/lasp_mat.h
+++ b/lasp/c/lasp_mat.h
@@ -21,6 +21,7 @@ typedef struct {
     us stride;
     d* _data;
 } dmat;
+
 /// Dense matrix of complex floating point values
 typedef struct {
     us n_rows;
diff --git a/lasp/c/lasp_math_raw.h b/lasp/c/lasp_math_raw.h
index 3767b94..06cf60d 100644
--- a/lasp/c/lasp_math_raw.h
+++ b/lasp/c/lasp_math_raw.h
@@ -173,6 +173,8 @@ static inline d d_dot(const d a[],const d b[],const us size){
  * @param to : Array to write to
  * @param from : Array to read from
  * @param size : Size of arrays
+ * @param to_inc : Mostly equal to 1, the stride of the array to copy to
+ * @param from_inc : Mostly equal to 1, the stride of the array to copy from
  */
 static inline void d_copy(d to[],
                           const d from[],
diff --git a/lasp/c/lasp_python.h b/lasp/c/lasp_python.h
index 8a351e0..1970718 100644
--- a/lasp/c/lasp_python.h
+++ b/lasp/c/lasp_python.h
@@ -20,10 +20,11 @@
 /** 
  * Create a numpy array from an existing dmat.
  *
- * @param mat 
- * @param transfer_ownership 
+ * @param mat dmat struccture containing array data and metadata.
+ * @param transfer_ownership If set to true, Numpy array will be responsible
+ * for freeing the data.
  *
- * @return 
+ * @return Numpy array
  */
 static inline PyObject* dmat_to_ndarray(dmat* mat,bool transfer_ownership) {
     fsTRACE(15);
@@ -50,7 +51,7 @@ static inline PyObject* dmat_to_ndarray(dmat* mat,bool transfer_ownership) {
     }
 
     // Transpose the array
-    PyObject* arr = PyArray_Transpose(arr_t,NULL);
+    PyObject* arr = PyArray_Transpose((PyArrayObject*) arr_t,NULL);
     if(!arr) {
         WARN("Array transpose failure");
         feTRACE(15);
diff --git a/test/fft_test.py b/test/fft_test.py
index 8d915ab..58b6e25 100644
--- a/test/fft_test.py
+++ b/test/fft_test.py
@@ -6,7 +6,7 @@ Created on Mon Jan 15 19:45:33 2018
 @author: anne
 """
 import numpy as np
-from lasp import Fft
+from lasp.wrappers import Fft
 
 nfft=9
 print('nfft:',nfft)