diff --git a/.gitignore b/.gitignore
index a540170..06ce893 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,7 @@ build
 __pycache__
 cython_debug
 beamforming/config.pxi
-beamforming/fft.c
+beamforming/wrappers.c
 *.so
 test/test_bf
 test/test_fft
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d07c5d6..5982828 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 3.0)
 project(beamforming)
 
 # Whether we want to use blas yes or no
-set(ASCEE_USE_BLAS 1)
-add_definitions(-DASCEE_USE_BLAS=1)
+set(ASCEE_USE_BLAS TRUE)
+# set(ASCEE_USE_BLAS FALSE)
 
 set(ASCEE_FLOAT double)
 # set(ASCEE_FLOAT float)
@@ -68,10 +68,12 @@ endif(ASCEE_DEBUG)
 # The last argument here takes care of calling SIGABRT when an integer overflow
 # occures.
 ############################## General compilation flags (independent of debug mode, windows or linux)
+set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough\
+ -Wno-incompatible-pointer-types -Wno-strict-aliasing")
 
 # General make flags
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11  -Wall -Wextra \
-  -Wimplicit-function-declaration -ftrapv -Wno-type-limits")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11  -Wall -Wextra -Wno-type-limits \
+ -Werror=implicit-function-declaration -Werror=incompatible-pointer-types")
 
 # Debug make flags
 set(CMAKE_C_FLAGS_DEBUG "-g" )
diff --git a/beamforming/CMakeLists.txt b/beamforming/CMakeLists.txt
index 2311f17..9133f5a 100644
--- a/beamforming/CMakeLists.txt
+++ b/beamforming/CMakeLists.txt
@@ -13,7 +13,6 @@ include_directories(
   )
 
 
-set_source_files_properties(fft.c PROPERTIES COMPILE_FLAGS
-  "-Wno-cpp -Wno-sign-compare -Wno-incompatible-pointer-types -Wno-strict-aliasing -Wno-implicit-fallthrough")
-cython_add_module(fft fft.pyx)
-target_link_libraries(fft beamforming_lib )
+set_source_files_properties(wrappers.c PROPERTIES COMPILE_FLAGS "${CMAKE_C_FLAGS} ${CYTHON_EXTRA_C_FLAGS}")  
+cython_add_module(wrappers wrappers.pyx)
+target_link_libraries(wrappers beamforming_lib )
diff --git a/beamforming/__init__.py b/beamforming/__init__.py
index e69de29..fff7b7d 100644
--- a/beamforming/__init__.py
+++ b/beamforming/__init__.py
@@ -0,0 +1 @@
+from .wrappers import *
diff --git a/beamforming/c/aps.c b/beamforming/c/aps.c
index 899a8f7..c7c7d5a 100644
--- a/beamforming/c/aps.c
+++ b/beamforming/c/aps.c
@@ -49,26 +49,31 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
     /* Check nfft */
     if(nfft % 2 != 0 || nfft > ASCEE_MAX_NFFT) {
         WARN("nfft should be even");
+        feTRACE(15);
         return NULL;
     }
 
     /* Check overlap percentage */
     if(overlap_percentage >= 100) {
         WARN("Overlap percentage >= 100!");
+        feTRACE(15);
         return NULL;
     }
     if(overlap_percentage < 0) {
         WARN("Overlap percentage should be positive!");
+        feTRACE(15);
         return NULL;
     }
     
     /* Compute and check overlap offset */
-    us oo = nfft - (us) (overlap_percentage*nfft/100);
+    us oo = (us) (((d) nfft)-overlap_percentage*((d) nfft)/100);
+    iVARTRACE(15,oo);
     if(oo == 0) {oo++;}
         
     PowerSpectra* ps = PowerSpectra_alloc(nfft,nchannels,wt);
     if(!ps) {
         WARN(ALLOCFAILED "ps");
+        feTRACE(15);
         return NULL;
     }
 
@@ -76,8 +81,10 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
     if(!aps) {
         WARN("Allocation of AvPowerSpectra memory failed");
         PowerSpectra_free(ps);
+        feTRACE(15);        
         return NULL;
     }
+    
     aps->nchannels = nchannels;
     aps->nfft = nfft;
     aps->ps = ps;
@@ -91,7 +98,7 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
     aps->ps_single = cmat_alloc(nfft/2+1,nchannels*nchannels);
 
     cmat_set(&aps->ps_storage,0);
-    
+    feTRACE(15);
     return aps;
 }
 
@@ -113,26 +120,30 @@ static void AvPowerSpectra_addBlock(AvPowerSpectra* aps,
     dbgassert(aps && block,NULLPTRDEREF);
     dbgassert(block->n_rows == aps->nfft,"Invalid block n_rows");
     dbgassert(block->n_cols == aps->nchannels,"Invalid block n_cols");
-
-    cmat ps_single = aps->ps_single;
-    cmat ps_storage = aps->ps_storage;
+    const us nchannels = aps->nchannels;
+    const us nfft = aps->nfft;
+    iVARTRACE(15,nfft);
+    
+    cmat* ps_single = &aps->ps_single;
+    cmat* ps_storage = &aps->ps_storage;
     
     c naverages = (++aps->naverages);
-    
+    cVARTRACE(15,naverages);
+
     /* Scale previous result */
-    cmat_scale(&ps_storage,
+    cmat_scale(ps_storage,
                (naverages-1)/naverages);
 
-
+    uVARTRACE(15,(us) aps->ps);
+    
     PowerSpectra_compute(aps->ps,
                          block,
-                         &ps_single);
+                         ps_single);
 
 
     /* Add new result, scaled properly */
-    cmat_add_cmat(&ps_storage,
-                  &ps_single,1/naverages);
-
+    cmat_add_cmat(ps_storage,
+                  ps_single,1/naverages);
     
     feTRACE(15);
 }
@@ -153,60 +164,71 @@ cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps,
 
     const us oo = aps->oo;
     us* os = &aps->os;
+    iVARTRACE(15,*os);
     us os_timedata = 0;
 
     dmat buffer = aps->buffer;
 
     /* Retrieve the buffer and use it to make the first time block. */
     if(*os < oo) {
-
+        TRACE(15,"Using saved data from previous run");
+        dbgassert(false,"not tested")
         dmat tmp = dmat_alloc(nfft,nchannels);
+        dbgassert(0 <= *os,"BUG");
+        dbgassert(*os <= nfft,"BUG"); 
 
-        copy_dmat_rows(&tmp,
-                       &buffer,
-                       *os,       /* Startrow_from */
-                       0,         /* Startrow to */
-                       nfft - *os /* nrows */
-            );
+        /* copy_dmat_rows(&tmp, */
+        /*                &buffer, */
+        /*                *os,       /\* Startrow_from *\/ */
+        /*                0,         /\* Startrow to *\/ */
+        /*                nfft - *os /\* nrows *\/ */
+        /*     ); */
 
-        copy_dmat_rows(&tmp,
-                       timedata,
-                       0,
-                       nfft - *os,
-                       *os
-            );
+        /* copy_dmat_rows(&tmp, */
+        /*                timedata, */
+        /*                0, */
+        /*                nfft - *os, */
+        /*                *os */
+        /*     ); */
 
 
         AvPowerSpectra_addBlock(aps,&tmp);
 
         os_timedata = oo + *os - nfft;
-        
+        dbgassert(os_timedata < nfft,"BUG");
         dmat_free(&tmp);
     }
 
     /* Run until we cannot go any further */
-    while (os_timedata + nfft <= timedata->n_rows) {
-        dmat tmp = dmat_submat(timedata,
-                               os_timedata,
-                               0,nfft,nchannels);
+    while ((os_timedata + nfft) <= timedata->n_rows) {
 
+        dmat tmp = dmat_submat(timedata,
+                               os_timedata, /* Startrow */
+                               0,           /* Start column */
+                               nfft,        /* Number of rows */
+                               nchannels);  /* Number of columns */
+
+        /* Process the block of time data */
         AvPowerSpectra_addBlock(aps,&tmp);
-        
+
+        iVARTRACE(15,os_timedata);
         os_timedata += oo;
 
         dmat_free(&tmp);
+        iVARTRACE(15,os_timedata);
     }
 
     /* We copy the last piece of samples from the timedata to the
      * buffer */
     copy_dmat_rows(&buffer,
                    timedata,
-                   timedata->n_rows-nfft,
-                   0,
-                   nfft);
+                   timedata->n_rows-nfft, /* startrow_from */
+                   0,           /* startrow_to */
+                   nfft);       /* Number of rows */
+    
     *os = os_timedata+nfft-timedata->n_rows;
     
-    dbgassert(*os < nfft,"BUG");
+    dbgassert(*os <= nfft,"BUG");
 
     feTRACE(15);
     return &aps->ps_storage;
diff --git a/beamforming/c/ascee_alg.c b/beamforming/c/ascee_alg.c
index a01e1c4..8d36d68 100644
--- a/beamforming/c/ascee_alg.c
+++ b/beamforming/c/ascee_alg.c
@@ -5,7 +5,6 @@
 // Description:
 // (Linear) algebra routine implementations
 //////////////////////////////////////////////////////////////////////
-
 #include "ascee_alg.h"
 
 
@@ -13,11 +12,13 @@
 
 void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
 
-    assert(A->n_rows == b->size);
-    assert(A->n_cols == x->size);
+    dbgassert(A->n_rows == b->size,SIZEINEQUAL);
+    dbgassert(A->n_cols == x->size,SIZEINEQUAL);
 	
     #if ASCEE_USE_BLAS == 1
 
+    dbgassert(A->_data,"Matrix-vector product only works for allocated data matrices");
+    
     /* typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */
     /* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */
     /* 
@@ -41,12 +42,12 @@ void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
                 A->n_rows,
                 A->n_cols,
                 (d*) &alpha,			/* alpha */
-                (d*) A->data,		/* A */
+                (d*) A->_data,		/* A */
                 A->n_rows,		/* lda */
-                (d*) x->data,		/*  */
+                (d*) x->ptr,		/*  */
                 1,
                 (d*) &beta,			/* beta */
-                (d*) b->data,
+                (d*) b->ptr,
                 1);
 				
 				
diff --git a/beamforming/c/ascee_alg.h b/beamforming/c/ascee_alg.h
index 186cde0..cf68cc9 100644
--- a/beamforming/c/ascee_alg.h
+++ b/beamforming/c/ascee_alg.h
@@ -86,16 +86,17 @@ static inline void dmat_add_dmat(dmat* x,dmat* y,d fac) {
  * @param[in] fac 
  */
 static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) {
-    dbgassert(x && y,NULLPTRDEREF);
-    dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
-    dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
-    if(likely(x->data && y->data)) {
-        c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
-    }
-    else {
-        for(us col=0;col<y->n_cols;col++) {
-            c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
-        }
+    // dbgassert(x && y,NULLPTRDEREF);
+    // dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
+    // dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
+    // if(likely(x->data && y->data)) {
+    //     TRACE(15,"Scale whole");
+    //     c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
+    // }
+    // else {
+    for(us col=0;col<y->n_cols;col++) {
+        TRACE(15,"Scale columns");
+        c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
     }
 }
 
@@ -107,11 +108,30 @@ static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) {
  * @param[in] a 
  * @param[in] b 
  */
-static inline void vc_elem_prod(vc* result,vc* a,vc* b) {
+static inline void vd_elem_prod(vd* result,const vd* a,const vd* b) {
     dbgassert(result  && a && b,NULLPTRDEREF);
     dbgassert(result->size==a->size,SIZEINEQUAL);
     dbgassert(b->size==a->size,SIZEINEQUAL);
-    c_elem_prod_c(result->ptr,a->ptr,b->ptr,a->size);
+    d_elem_prod_d(result->ptr,a->ptr,b->ptr,a->size);
+}
+/** 
+ * Compute the element-wise (Hadamard) product of a and b, and store
+ * in result
+ *
+ * @param[out] result 
+ * @param[in] a 
+ * @param[in] b 
+ */
+static inline void vc_hadamard(vc* result,const vc* a,const vc* b) {
+    fsTRACE(15);
+    dbgassert(result  && a && b,NULLPTRDEREF);
+    dbgassert(result->size==a->size,SIZEINEQUAL);
+    dbgassert(b->size==a->size,SIZEINEQUAL);
+    c_hadamard(result->ptr,a->ptr,b->ptr,a->size);
+    check_overflow_vx(*result);
+    check_overflow_vx(*a);
+    check_overflow_vx(*b);    
+    feTRACE(15);
 }
 /** 
  * Compute the matrix vector product for complex-valued types: b = A*x.
diff --git a/beamforming/c/ascee_assert.h b/beamforming/c/ascee_assert.h
index 46e134c..82be4d1 100644
--- a/beamforming/c/ascee_assert.h
+++ b/beamforming/c/ascee_assert.h
@@ -34,6 +34,8 @@ void DBG_AssertFailedExtImplementation(const char* file,
 
 #define dbgassert(assertion, assert_string)
 
+#define assertvalidptr(ptr) dbgassert(ptr,NULLPTRDEREF)
+
 #endif  // ASCEE_DEBUG
 
 #endif // ASCEE_ASSERT_H
diff --git a/beamforming/c/ascee_math.c b/beamforming/c/ascee_math.c
index e67b3ca..5cdecfa 100644
--- a/beamforming/c/ascee_math.c
+++ b/beamforming/c/ascee_math.c
@@ -12,13 +12,14 @@
 #include "ascee_math.h"
 #include "ascee_tracer.h"
 
-
 #include <math.h>
 
 #ifdef ASCEE_DEBUG
 void print_dmat(const dmat* m) {
+    feTRACE(50);
     size_t row,col;
     for(row=0;row<m->n_rows;row++){
+        indent_trace();
         for(col=0;col<m->n_cols;col++){
             d val = *getdmatval(m,row,col);
             printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val));
@@ -27,10 +28,13 @@ void print_dmat(const dmat* m) {
         printf("\n");
 
     }
+    fsTRACE(50);
 }
 void print_cmat(const cmat* m) {
+    feTRACE(50);
     size_t row,col;
     for(row=0;row<m->n_rows;row++){
+        indent_trace();
         for(col=0;col<m->n_cols;col++){
             c val = *getcmatval(m,row,col);
 
@@ -44,32 +48,36 @@ void print_cmat(const cmat* m) {
         printf("\n");
 
     }
+    feTRACE(50);
 }
 void print_vc(const vc* m) {
-    TRACE(20,"print_vc");
+    fsTRACE(50);
     size_t row;
 
     for(row=0;row<m->size;row++){
-	
-        d rval = creal(m->data[row]);
-        d ival = cimag(m->data[row]);
+        c val = *getvcval(m,row);
 
+        d rval = creal(val);
+        d ival = cimag(val);
+        indent_trace();
         printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ', d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ;
         printf("\n");
 
     }
+    feTRACE(50);    
 }
 void print_vd(const vd* m) {
-    TRACE(20,"print_vd");
+    fsTRACE(50);
     size_t row;
     iVARTRACE(20,m->size);
     for(row=0;row<m->size;row++){
 	
-        d rval = m->data[row];
-
+        d rval = m->ptr[row];
+        indent_trace();
         printf("%c%2.2e ",rval< 0 ? '\r': ' ',rval);
         printf("\n");
     }
+    feTRACE(50);
 }
 
 #endif
diff --git a/beamforming/c/ascee_math.h b/beamforming/c/ascee_math.h
index baab8d4..9f71e86 100644
--- a/beamforming/c/ascee_math.h
+++ b/beamforming/c/ascee_math.h
@@ -18,7 +18,7 @@ typedef struct {
     us size;
     d* ptr;                     /**< This pointer points to the data
                                    of this vector */
-    d* data;                    /**< Pointer set if data storage is
+    d* _data;                    /**< Pointer set if data storage is
                                    intern. If this is set to zero, the
                                    vector is a sub-vector. */
 } vd;
@@ -27,7 +27,7 @@ typedef struct {
     us size;
     c* ptr;                     /**< This pointer points to the data
                                    of this vector */
-    c* data;                    /**< Pointer set if data storage is
+    c* _data;                    /**< Pointer set if data storage is
                                    intern. If this is set to zero, the
                                    vector is a sub-vector. */
 } vc;
@@ -36,16 +36,39 @@ typedef struct {
     us n_rows; 
     us n_cols;
     d** col_ptrs;
-    d* data;
+    d* _data;
 } dmat; 
 /// Dense matrix of complex floating point values
 typedef struct { 
     us n_rows; 
     us n_cols;
     c** col_ptrs;
-    c* data; 
+    c* _data; 
 } cmat; 
 
+#ifdef ASCEE_DEBUG
+#define OVERFLOW_MAGIC_NUMBER (-10e-45)
+
+#define check_overflow_vx(vx)                   \
+    TRACE(15,"Checking overflow " #vx);          \
+    if((vx)._data) {                                            \
+        dbgassert((vx)._data[(vx).size] == OVERFLOW_MAGIC_NUMBER,       \
+                  "Buffer overflow detected on" #vx );          \
+              }
+#define check_overflow_xmat(xmat)                               \
+    TRACE(15,"Checking overflow " #xmat);                        \
+    if((xmat)._data) {                                            \
+        for(us _overflow=0;_overflow<(xmat).n_cols;_overflow++)   \
+            dbgassert((xmat)._data[(xmat).n_rows*(xmat).n_cols+   \
+                                   _overflow] == OVERFLOW_MAGIC_NUMBER, \
+                  "Buffer overflow detected on" #xmat );          \
+    }
+
+#else
+#define check_overflow_vx(vx)
+#define check_overflow_xmat(xmat)
+#endif
+
 /** 
  * Sets all values in a vector to the value
  *
@@ -74,14 +97,8 @@ static inline void vc_set(vc* vec,const c value){
  */
 static inline void dmat_set(dmat* mat,const d value){
     dbgassert(mat,NULLPTRDEREF);
-    us size = mat->n_cols*mat->n_rows;
-    if(likely(mat->data)){
-        d_set(mat->data,value,size);
-    }
-    else {
-        for(us col=0;col<mat->n_cols;col++) {
-            d_set(mat->col_ptrs[col],value,mat->n_rows);
-        }
+    for(us col=0;col<mat->n_cols;col++) {
+        d_set(mat->col_ptrs[col],value,mat->n_rows);
     }
 }
 
@@ -94,14 +111,8 @@ static inline void dmat_set(dmat* mat,const d value){
  */
 static inline void cmat_set(cmat* mat,const c value){
     dbgassert(mat,NULLPTRDEREF);
-    us size = mat->n_cols*mat->n_rows;
-    if(likely(mat->data)){
-        c_set(mat->data,value,size);
-    }
-    else {
-        for(us col=0;col<mat->n_cols;col++) {
-            c_set(mat->col_ptrs[col],value,mat->n_rows);
-        }
+    for(us col=0;col<mat->n_cols;col++) {
+        c_set(mat->col_ptrs[col],value,mat->n_rows);
     }
 }
 
@@ -114,9 +125,15 @@ static inline void cmat_set(cmat* mat,const c value){
  */
 static inline vd vd_alloc(us size) {
     vd result = { size, NULL,NULL};
-    result.data = (d*) a_malloc(size*sizeof(d));
-    result.ptr = result.data;
-    dbgassert(result.data,ALLOCFAILED);
+    #ifdef ASCEE_DEBUG
+    result._data = (d*) a_malloc((size+1)*sizeof(d));    
+    result._data[size] = OVERFLOW_MAGIC_NUMBER;
+    #else
+    result._data = (d*) a_malloc(size*sizeof(d));    
+    #endif //  ASCEE_DEBUG
+
+    result.ptr = result._data;
+    dbgassert(result._data,ALLOCFAILED);
     #ifdef ASCEE_DEBUG
     vd_set(&result,NAN);
     #endif // ASCEE_DEBUG    
@@ -131,8 +148,14 @@ static inline vd vd_alloc(us size) {
  */
 static inline vc vc_alloc(us size) {
     vc result = { size, NULL, NULL};
-    result.data = (c*) a_malloc(size*sizeof(c));
-    result.ptr = result.data;
+    #ifdef ASCEE_DEBUG
+    result._data = (c*) a_malloc((size+1)*sizeof(c));    
+    result._data[size] = OVERFLOW_MAGIC_NUMBER;
+    #else
+    result._data = (c*) a_malloc(size*sizeof(c));    
+    #endif //  ASCEE_DEBUG
+    dbgassert(result._data,ALLOCFAILED);
+    result.ptr = result._data;
     #ifdef ASCEE_DEBUG
     vc_set(&result,NAN+I*NAN);
     #endif // ASCEE_DEBUG    
@@ -153,16 +176,21 @@ static inline dmat dmat_alloc(us n_rows,
     
     /**
      * Here storage is allocated for both the data, as well as the
-     * column pointers. The column pointer data is stored at the end
-     * of the allocated block.
+     * column pointers. In debug mode, extra memory is allocated to
+     * check for possible buffer overflows.
      */
-    result.data = (d*) a_malloc(n_rows*n_cols*sizeof(d)
-                                +sizeof(d*)*n_cols);
+    #ifdef ASCEE_DEBUG
+    result._data = (d*) a_malloc((n_rows*n_cols+n_cols)*sizeof(d));
+    for(us i=0;i<n_cols;i++)
+        result._data[n_rows*n_cols+i] = OVERFLOW_MAGIC_NUMBER;
+    #else
+    result._data = (d*) a_malloc((n_rows*n_cols+1)*sizeof(d));    
+    #endif //  ASCEE_DEBUG
 
-    dbgassert(result.data,ALLOCFAILED);
-    result.col_ptrs = (d**) &result.data[n_rows*n_cols];
+    dbgassert(result._data,ALLOCFAILED);
+    result.col_ptrs = a_malloc(n_cols*sizeof(d*));
     for(us col=0;col<n_cols;col++) {
-        result.col_ptrs[col] = &result.data[n_rows*col];
+        result.col_ptrs[col] = &result._data[n_rows*col];
     }
     #ifdef ASCEE_DEBUG
     dmat_set(&result,NAN);
@@ -186,16 +214,19 @@ static inline cmat cmat_alloc(const us n_rows,
     cmat result = { n_rows, n_cols, NULL, NULL};
     /**
      * Here storage is allocated for both the data, as well as the
-     * column pointers. The column pointer data is stored at the end
-     * of the allocated block.
+     * column pointers. In debug mode, extra memory is allocated to
+     * check for possible buffer overflows.
      */
-    result.data = (c*) a_malloc(n_rows*n_cols*sizeof(c)
-                                +sizeof(c*)*n_cols);
-
-    dbgassert(result.data,ALLOCFAILED);
-    result.col_ptrs = (c**) &result.data[n_rows*n_cols];
+    #ifdef ASCEE_DEBUG
+    result._data = (c*) a_malloc((n_rows*n_cols+n_cols)*sizeof(c));
+    for(us i=0;i<n_cols;i++)
+        result._data[n_rows*n_cols+i] = OVERFLOW_MAGIC_NUMBER;
+    #else
+    result._data = (c*) a_malloc((n_rows*n_cols+1)*sizeof(c));    
+    #endif //  ASCEE_DEBUG
+    result.col_ptrs = a_malloc(n_cols*sizeof(c*));
     for(us col=0;col<n_cols;col++) {
-        result.col_ptrs[col] = &result.data[n_rows*col];
+        result.col_ptrs[col] = &result._data[n_rows*col];
     }
 
     #ifdef ASCEE_DEBUG
@@ -222,6 +253,7 @@ static inline dmat dmat_foreign(const us n_rows,
     dbgassert(data,NULLPTRDEREF);
     dmat result = {n_rows,n_cols,NULL,NULL};
     d** colptrs = malloc(sizeof(d*)*n_cols);
+    
     dbgassert(colptrs,ALLOCFAILED);
     result.col_ptrs = colptrs;
     for(us i=0;i<n_cols;i++) {
@@ -263,7 +295,7 @@ static inline cmat cmat_foreign(const us n_rows,
  */
 static inline void vd_free(vd* f) {
     dbgassert(f,NULLPTRDEREF);
-    if(likely(f->data)) a_free(f->data);
+    if(likely(f->_data)) a_free(f->_data);
 }
 /** 
  * Free's data of a vector. Is safe to run on sub-vecs as well, to
@@ -273,7 +305,7 @@ static inline void vd_free(vd* f) {
  */
 static inline void vc_free(vc* f) {
     dbgassert(f,NULLPTRDEREF);
-    if(likely(f->data)) a_free(f->data);
+    if(likely(f->_data)) a_free(f->_data);
 }
 /** 
  * Free's data of dmat. Safe to run on sub-matrices as well.
@@ -281,14 +313,9 @@ static inline void vc_free(vc* f) {
  * @param m Matrix to free
  */
 static inline void dmat_free(dmat* m) {
-    if(likely(m->data)) {
-        a_free(m->data);
-    }
-    else {
-        // Only column pointers allocated. This was a submat
-        dbgassert(m->col_ptrs,NULLPTRDEREF);
-        a_free(m->col_ptrs);
-    }
+    if(m->_data) a_free(m->_data);
+    dbgassert(m->col_ptrs,NULLPTRDEREF);
+    a_free(m->col_ptrs);
 }
 /** 
  * Free's data of dmat. Safe to run on sub-matrices as well.
@@ -296,19 +323,14 @@ static inline void dmat_free(dmat* m) {
  * @param m Matrix to free
  */
 static inline void cmat_free(cmat* m) {
-    if(likely(m->data)) {
-        a_free(m->data);
-    }
-    else {
-        // Only column pointers allocated. This was a submat
-        dbgassert(m->col_ptrs,NULLPTRDEREF);
-        a_free(m->col_ptrs);
-    }
+    if(m->_data) a_free(m->_data);
+    dbgassert(m->col_ptrs,NULLPTRDEREF);
+    a_free(m->col_ptrs);
 }
 
 #define setvecval(vec,index,val)                              \
     dbgassert((((us) index) <= (vec)->size),OUTOFBOUNDSVEC);  \
-    (vec)->data[index] = val;
+    (vec)->ptr[index] = val;
 
     
 #define setmatval(mat,row,col,val)                              \
@@ -418,7 +440,8 @@ static inline dmat dmat_submat(const dmat* parent,
                                    startcol+col);
 
     }
-    dmat result = { n_rows,n_cols,col_ptrs,NULL};
+    dmat result = { n_rows,n_cols,NULL,NULL};
+    result.col_ptrs = col_ptrs;
     return result;
 }
 /** 
@@ -437,7 +460,7 @@ static inline cmat cmat_submat(cmat* parent,
                                const us startcol,
                                const us n_rows,
                                const us n_cols) {
-
+    dbgassert(false,"untested");
     dbgassert(parent,NULLPTRDEREF);
     dbgassert(n_rows+startrow <= parent->n_rows,OUTOFBOUNDSMATR);
     dbgassert(n_cols+startcol <= parent->n_cols,OUTOFBOUNDSMATC);
@@ -515,7 +538,10 @@ static inline void cmat_copy(cmat* to,const cmat* from) {
  * @return vector with reference to column
  */
 static inline vd dmat_column(dmat* x,us col) {
-    vd res = { x->n_rows, getdmatval(x,0,col),NULL};
+    vd res;
+    res.size = x->n_rows;
+    res.ptr = getdmatval(x,0,col);
+    res._data = NULL;
     return res;
 }
 
@@ -528,7 +554,10 @@ static inline vd dmat_column(dmat* x,us col) {
  * @return vector with reference to column
  */
 static inline vc cmat_column(cmat* x,us col) {
-    vc res = { x->n_rows, getcmatval(x,0,col),NULL};
+    vc res;
+    res.size = x->n_rows;
+    res.ptr = getcmatval(x,0,col);
+    res._data = NULL;
     return res;
 }
 
@@ -538,10 +567,12 @@ static inline vc cmat_column(cmat* x,us col) {
  * @param a 
  * @param b 
  */
-static inline void vc_conj_vc(vc* a,const vc* b) {
+static inline void vc_conj(vc* a,const vc* b) {
+    fsTRACE(15);
     dbgassert(a && b,NULLPTRDEREF);
     dbgassert(a->size == b->size,SIZEINEQUAL);
-    c_conj_c(a->ptr,b->ptr,a->size);
+    carray_conj(a->ptr,b->ptr,a->size);
+    feTRACE(15);
 }
 
 /** 
@@ -551,13 +582,8 @@ static inline void vc_conj_vc(vc* a,const vc* b) {
  */
 static inline void cmat_conj(cmat* x) {
     dbgassert(x,NULLPTRDEREF);
-    if(likely(x->data)) {
-        c_conj_inplace(x->data,x->n_cols*x->n_rows);
-    }
-    else {
-        for(us col=0;col<x->n_cols;col++) {
-            c_conj_inplace(x->col_ptrs[col],x->n_rows);
-        }
+    for(us col=0;col<x->n_cols;col++) {
+        c_conj_inplace(x->col_ptrs[col],x->n_rows);
     }
 }
 
diff --git a/beamforming/c/ascee_math_raw.c b/beamforming/c/ascee_math_raw.c
index 3f4efdf..bcd4704 100644
--- a/beamforming/c/ascee_math_raw.c
+++ b/beamforming/c/ascee_math_raw.c
@@ -15,7 +15,7 @@ void d_elem_prod_d(d res[],
                    const d arr2[],
                    const us size) {
 
-    #if ASCEE_USE_BLAS
+    #if ASCEE_USE_BLAS == 1
 
     #if ASCEE_DEBUG
 
@@ -77,15 +77,16 @@ void d_elem_prod_d(d res[],
     #endif
 }
 
-void c_elem_prod_c(c res[],
-                   const c arr1[],
-                   const c arr2[],
-                   const us size) {
+void c_hadamard(c res[],
+                const c arr1[],
+                const c arr2[],
+                const us size) {
 
-    TRACE(15,"c_elem_prod_c");
+    fsTRACE(15);
     uVARTRACE(15,size);
+    dbgassert(arr1 && arr2 && res,NULLPTRDEREF);
     
-    #if ASCEE_USE_BLAS
+    #if ASCEE_USE_BLAS == 1
 
     #if ASCEE_DEBUG
 
@@ -105,38 +106,36 @@ void c_elem_prod_c(c res[],
     #define elem_prod_fun cblas_cgbmv
     #endif
 
-    /* These parameters do not matter for this specific case */
-    const CBLAS_ORDER  mat_order= CblasColMajor;
-    const CBLAS_TRANSPOSE tr = CblasNoTrans;
+    c alpha = 1.0;
+    c beta = 0.0;
 
-    const c alpha = 1.0;
-    const c beta = 0.0;
     TRACE(15,"Calling " annestr(elem_prod_fun));
-    
-    elem_prod_fun(mat_order,
-                  tr,
+    uVARTRACE(15,size);
+    elem_prod_fun(CblasColMajor,
+                  CblasNoTrans,
                   (blasint) size, /* M: Number of rows */
                   (blasint) size, /* B: Number of columns */
                   0,              /* KL: Number of sub-diagonals */
                   0,              /* KU: Number of super-diagonals */
-                  (d*) &alpha,        /* Multiplication factor */
-                  (d*) arr2,          /* A */
-                  1,            /* LDA */
-                  (d*) arr1,    /* x */
-                  1, /* incX = 1 */
+                  (d*) &alpha,    /* Multiplication factor */
+                  (d*) arr2,      /* A */
+                  1,              /* LDA */
+                  (d*) arr1,      /* x */
+                  1,              /* incX = 1 */
                   (d*) &beta,
                   (d*) res,    /* The Y matrix to write to */
-                  1); /* incY */
+                  1);          /* incY (increment in res) */
 
     #undef elem_prod_fun
 
     #else  /* No blas routines, routine is very simple, but here we
             * go! */
-    DBGWARN("Performing slow non-blas vector-vector multiplication");
+    DBGWARN("Performing slower non-blas vector-vector multiplication");
     for(us i=0;i<size;i++) {
         res[i] = arr1[i]*arr2[i];
     }
     #endif
+    feTRACE(15);
 }
 
 
diff --git a/beamforming/c/ascee_math_raw.h b/beamforming/c/ascee_math_raw.h
index 9385cb4..004089b 100644
--- a/beamforming/c/ascee_math_raw.h
+++ b/beamforming/c/ascee_math_raw.h
@@ -15,6 +15,9 @@
 
 #if ASCEE_USE_BLAS == 1
 #include <cblas.h>
+#elif ASCEE_USE_BLAS == 0
+#else
+#error "ASCEE_USE_BLAS should be set to either 0 or 1"
 #endif
 
 #ifdef ASCEE_DOUBLE_PRECISION
@@ -249,6 +252,7 @@ static inline void d_add_to(d x[],const d y[],d fac,us size){
  * @param size Size of the arrays
  */
 static inline void c_add_to(c x[],const c y[],c fac,us size){
+    fsTRACE(15);
     #if ASCEE_USE_BLAS == 1
     #if ASCEE_DOUBLE_PRECISION
     cblas_zaxpy(size,(d*) &fac,(d*) y,1,(d*) x,1);
@@ -260,6 +264,7 @@ static inline void c_add_to(c x[],const c y[],c fac,us size){
     for(i=0;i<size;i++)
         x[i]+=fac*y[i];
     #endif
+    feTRACE(15);
 }
 
 /** 
@@ -298,12 +303,11 @@ static inline void c_scale(c a[],const c scale_fac,us size){
     // part. Fortunately the (c) type stores the two values in this
     // order. To be portable and absolutely sure anything goes well,
     // we convert it explicitly here.
-    d scale_fac_d [] = {creal(scale_fac),cimag(scale_fac)};
 
     #if ASCEE_DOUBLE_PRECISION
-    cblas_zscal(size,scale_fac_d,(d*) a,1);
+    cblas_zscal(size,(d*) &scale_fac,(d*) a,1);
     #else
-    cblas_cscal(size,scale_fac_d,(d*) a,1);
+    cblas_cscal(size,(d*) &scale_fac,(d*) a,1);
     #endif
     #else
     us i;
@@ -411,10 +415,10 @@ void d_elem_prod_d(d res[],
  * @param vec2 Array 2
  * @param size: Size of the arrays
  */
-void c_elem_prod_c(c res[],
-                   const c arr1[],
-                   const c arr2[],
-                   const us size);
+void c_hadamard(c res[],
+                const c arr1[],
+                const c arr2[],
+                const us size);
 
 /** 
  * Compute the complex conjugate of a complex vector and store the
@@ -424,15 +428,16 @@ void c_elem_prod_c(c res[],
  * @param in Input vector
  * @param size Size of the vector
  */
-static inline void c_conj_c(c res[],const c in[],const us size) {
+static inline void carray_conj(c res[],const c in[],const us size) {
     // First set the result vector to zero
+    fsTRACE(15);
     c_set(res,0,size);
-    #ifdef ASCEE_USE_BLAS
+    #if ASCEE_USE_BLAS == 1
     #if ASCEE_DOUBLE_PRECISION
     // Cast as a float, scale all odd elements with minus one to find
     // the complex conjugate.
-    cblas_daxpy(size ,1,(d*) in,2,(d*) res,2);
-    cblas_daxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
+    cblas_daxpy(size ,1.0,(d*) in,2,(d*) res,2);
+    cblas_daxpy(size,-1.0,&((d*) in)[1],2,&((d*) res)[1],2);
     #else
     cblas_faxpy(size ,1,(d*) in,2,(d*) res,2);
     cblas_faxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
@@ -442,6 +447,7 @@ static inline void c_conj_c(c res[],const c in[],const us size) {
         res[i] = c_conj(in[i]);
     }
     #endif  // ASCEE_USE_BLAS
+    feTRACE(15);
 }
 /** 
  * In place complex conjugation
@@ -450,7 +456,7 @@ static inline void c_conj_c(c res[],const c in[],const us size) {
  * @param size Size of the vector
  */
 static inline void c_conj_inplace(c res[],us size) {
-    #ifdef ASCEE_USE_BLAS
+    #if ASCEE_USE_BLAS
     #if ASCEE_DOUBLE_PRECISION
     // Cast as a float, scale all odd elements with minus one to find
     // the complex conjugate.
diff --git a/beamforming/c/ascee_tracer.c b/beamforming/c/ascee_tracer.c
index d18dca8..0202590 100644
--- a/beamforming/c/ascee_tracer.c
+++ b/beamforming/c/ascee_tracer.c
@@ -12,8 +12,10 @@
 
 #ifdef _REENTRANT
 #include <stdatomic.h>
+static _Thread_local us ASCEE_FN_LEVEL = 0;
+
+static _Atomic(int) TRACERNAME = ATOMIC_VAR_INIT(DEFAULTTRACERLEVEL);
 
-_Atomic(int) TRACERNAME = ATOMIC_VAR_INIT(DEFAULTTRACERLEVEL);
 
 void setTracerLevel(int level) {
     atomic_store(&TRACERNAME,level);
@@ -21,35 +23,52 @@ void setTracerLevel(int level) {
 int getTracerLevel() {
     return atomic_load(&TRACERNAME);
 }
+
 #else
 
 int TRACERNAME;
-
+static us ASCEE_FN_LEVEL = 0;
 /* setTracerLevel and getTracerLevel are defined as macros in
  * tracer.h */
 #endif
 
+void indent_trace() {
+    for(us i=0;i<ASCEE_FN_LEVEL;i++) {
+        printf("--");
+    }
+    printf("* ");
+}
+
 
 
 void trace_impl(const char* file,int pos, const char * string){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: %s\n",file,pos,string);
 }
 void fstrace_impl(const char* file,int pos,const char* fn){
+    ASCEE_FN_LEVEL++;
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: start function: %s()\n",file,pos,fn);
 }
 void fetrace_impl(const char* file,int pos,const char* fn){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: end   function: %s()\n",file,pos,fn);
+    ASCEE_FN_LEVEL--;
 }
 void ivartrace_impl(const char* pos,int line,const char* varname, int var){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: %s = %i\n",pos,line,varname,var);
 }
 void uvartrace_impl(const char* pos,int line,const char* varname,size_t var){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: %s = %zu\n",pos,line,varname,var);
 }
 void dvartrace_impl(const char* pos,int line,const char* varname, d var){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: %s = %0.5e\n",pos,line,varname,var);
 }
 void cvartrace_impl(const char* pos,int line,const char* varname, c var){
+    indent_trace();
     printf(annestr(TRACERNAME) ":%s:%i: %s = %0.5e+%0.5ei\n",pos,line,varname,creal(var),cimag(var));
 }
 #endif
diff --git a/beamforming/c/ascee_tracer.h b/beamforming/c/ascee_tracer.h
index da9bcd8..c0725d7 100644
--- a/beamforming/c/ascee_tracer.h
+++ b/beamforming/c/ascee_tracer.h
@@ -8,6 +8,17 @@
 #pragma once
 #ifndef ASCEE_TRACER_H
 #define ASCEE_TRACER_H
+#include <string.h>
+#include <stdio.h>
+
+static inline void clearScreen() {
+    printf("\033c\n");
+}
+
+/** 
+ * Indent the rule for tracing visibility.
+ */
+void indent_trace();
 
 // Some console colors
 #define RESET   "\033[0m"
@@ -28,9 +39,6 @@
 #define BOLDCYAN    "\033[1m\033[36m"      /* Bold Cyan */
 #define BOLDWHITE   "\033[1m\033[37m"      /* Bold White */
 
-#include <assert.h>
-#include <string.h>
-
 // Not so interesting part
 #define rawstr(x) #x
 #define namestr(x) rawstr(x)
diff --git a/beamforming/c/fft.c b/beamforming/c/fft.c
index 14f7f80..1dde2f6 100644
--- a/beamforming/c/fft.c
+++ b/beamforming/c/fft.c
@@ -14,7 +14,7 @@
 typedef struct Fft_s {
     us nfft;
     us nchannels;
-    Fftr* fftr;
+    vd fft_work;
 } Fft;
 
 Fft* Fft_alloc(const us nfft,const us nchannels) {
@@ -35,19 +35,20 @@ Fft* Fft_alloc(const us nfft,const us nchannels) {
     fft->nfft = nfft;
     fft->nchannels = nchannels;
 
-    fft->fftr = Fftr_alloc(nfft);
-    if(!fft->fftr) {
-        a_free(fft);
-        WARN(ALLOCFAILED "fftr");
-        return NULL;
-    }
+    /* Initialize foreign fft lib */
+    fft->fft_work = vd_alloc(2*nfft+15);
+    npy_rffti(nfft,fft->fft_work.ptr);
+    check_overflow_vx(fft->fft_work);
 
+    /* print_vd(&fft->fft_work); */
+    
     feTRACE(15); 
     return fft;
 }
 void Fft_free(Fft* fft) {
     fsTRACE(15);
-    Fftr_free(fft->fftr);
+    dbgassert(fft,NULLPTRDEREF);
+    vd_free(&fft->fft_work);
     a_free(fft);
     feTRACE(15);
 }
@@ -55,20 +56,51 @@ us Fft_nchannels(const Fft* fft) {return fft->nchannels;}
 us Fft_nfft(const Fft* fft) {return fft->nfft;}
 void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) {
     fsTRACE(15);
-    dbgassert(timedata && result,NULLPTRDEREF);
+    
+    dbgassert(fft && timedata && result,NULLPTRDEREF);
     dbgassert(timedata->n_rows == fft->nfft,"Invalid size for time data rows."
         " Should be equal to nfft");
     dbgassert(timedata->n_cols == fft->nchannels,"Invalid size for time data cols."
         " Should be equal to nchannels");
-    
+
+    const us nfft = fft->nfft;
+
+    vd fft_result = vd_alloc(fft->nfft);
     for(us col=0;col<fft->nchannels;col++) {
 
-        Fftr_fftr(fft->fftr,
-             getdmatval(timedata,0,col),
-             getcmatval(result,0,col));
-        
+        vd timedata_col = dmat_column((dmat*) timedata,col);
+        vd_copy(&fft_result,&timedata_col);
+        vd_free(&timedata_col);
+
+        npy_rfftf(fft->nfft,fft_result.ptr,fft->fft_work.ptr);
+
+        /* Fftpack stores the data a bit strange, the resulting array
+         * has the DC value at 0,the first cosine at 1, the first sine
+         * at 2 etc. This needs to be shifted properly in the
+         * resulting matrix, as for the complex data, the imaginary
+         * part of the DC component equals zero. */
+
+        check_overflow_vx(fft_result);
+        check_overflow_vx(fft->fft_work);
+
+        vc resultcol = cmat_column(result,col);
+        *getvcval(&resultcol,0) = *getvdval(&fft_result,0);
+
+        memcpy((void*) getvcval(&resultcol,1),
+               (void*) getvdval(&fft_result,1),
+               (nfft-1)*sizeof(d));
+
+        /* Set imaginary part of Nyquist frequency to zero */
+        ((d*) getvcval(&resultcol,nfft/2))[1] = 0;
+        check_overflow_xmat(*timedata);
+
+        /* Free up storage of the result column */
+        vc_free(&resultcol);
     }
 
+    /* print_vd(&fft->fft_work); */
+    
+    vd_free(&fft_result);
     feTRACE(15);
 }
 
diff --git a/beamforming/c/mq.c b/beamforming/c/mq.c
index ef4f53b..ed121aa 100644
--- a/beamforming/c/mq.c
+++ b/beamforming/c/mq.c
@@ -14,11 +14,12 @@
 #include "mq.h"
 #include <pthread.h>
 
-#ifdef __linux__
+/* #ifdef linux */
+#define _GNU_SOURCE
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/syscall.h>
-#endif
+/* #endif */
 
 typedef struct {
     void* job_ptr;
@@ -252,13 +253,9 @@ void* JobQueue_assign(JobQueue* jq) {
 
     TRACE(16,"JobQueue_assign: found ready job. Assigned to:");
     #ifdef ASCEE_DEBUG
-    #ifdef __linux__
-    
-    pid_t tid = syscall(SYS_gettid);    
-    iVARTRACE(16,tid);    
-    #endif // __linux__
-    #endif //  ASCEE_DEBUG
-
+    pthread_t thisthread = pthread_self();
+    iVARTRACE(16,thisthread);    
+    #endif
 
     /* print_job_queue(jq); */
     /* Find a job from the queue, assign it and return it */
diff --git a/beamforming/c/ps.c b/beamforming/c/ps.c
index 080f161..edefcdd 100644
--- a/beamforming/c/ps.c
+++ b/beamforming/c/ps.c
@@ -15,12 +15,8 @@
 typedef struct PowerSpectra_s {
 
     vd window;
-
     d win_pow;                  /**< The power of the window */
     Fft* fft;                    /**< Fft routines storage */
-    cmat fft_work;              /**< Work area for FFt's */
-    dmat timedata_work;         /**< Work area for timedata */
-    vc j_vec_conj;              /**< Work area for conjugate of j */
 } PowerSpectra;
 
 PowerSpectra* PowerSpectra_alloc(const us nfft,
@@ -53,11 +49,9 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
 
     /* Allocate vectors and matrices */
     ps->window = vd_alloc(nfft);
-    ps->fft_work = cmat_alloc(nfft/2+1,nchannels);
-    ps->timedata_work= dmat_alloc(nfft,nchannels);
-    ps->j_vec_conj = vc_alloc(nfft/2+1);
     
     rv = window_create(wt,&ps->window,&ps->win_pow);
+    check_overflow_vx(ps->window);
     if(rv!=0) {
         WARN("Error creating window function, continuing anyway");
     }
@@ -67,14 +61,9 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
 
 void PowerSpectra_free(PowerSpectra* ps) {
     fsTRACE(15);
-
     Fft_free(ps->fft);
     vd_free(&ps->window);
-    cmat_free(&ps->fft_work);
-    dmat_free(&ps->timedata_work);
-    vc_free(&ps->j_vec_conj);
     a_free(ps);
-
     feTRACE(15);
 }
 
@@ -93,8 +82,6 @@ void PowerSpectra_compute(const PowerSpectra* ps,
     const d win_pow = ps->win_pow;
     dVARTRACE(15,win_pow);
 
-    us i,j;
-
     /* Sanity checks for the matrices */
     dbgassert(timedata->n_cols == nchannels,"timedata n_cols "
               "should be equal to nchannels");
@@ -111,66 +98,81 @@ void PowerSpectra_compute(const PowerSpectra* ps,
 
     /* Multiply time data with the window and store result in
      * timedata_work. */
-    dmat timedata_work = ps->timedata_work;
+    dmat timedata_work = dmat_alloc(nfft,nchannels);
+    for(us i=0;i<nchannels;i++) {
+        vd column = dmat_column((dmat*) timedata,i);
+        vd column_work = dmat_column(&timedata_work,i);
 
-    for(i=0;i<nchannels;i++) {
-
-        d_elem_prod_d(getdmatval(&timedata_work,0,i), /* Result */
-                      getdmatval(timedata,0,i),
-                      ps->window.data,
-                      nfft);
-                                 
+        vd_elem_prod(&column_work,&column,&ps->window);
+        
+        vd_free(&column);
+        vd_free(&column_work);
     }
-
+    check_overflow_xmat(timedata_work);
     /* print_dmat(&timedata_work); */
     
     /* Compute fft of the time data */
-    cmat fft_work = ps->fft_work;
+    cmat fft_work = cmat_alloc(nfft/2+1,nchannels);
     Fft_fft(ps->fft,
             &timedata_work,
             &fft_work);
 
+    dmat_free(&timedata_work);
+    
     TRACE(15,"fft done");
     
     /* Scale fft such that power is easily comxputed */
     const c scale_fac = d_sqrt(2/win_pow)/nfft;
     cmat_scale(&fft_work,scale_fac);
     TRACE(15,"scale done");
-    
-    for(i=0;i< nchannels;i++) {
+
+    for(us i=0;i< nchannels;i++) {
         /* Multiply DC term with 1/sqrt(2) */
         *getcmatval(&fft_work,0,i) *= 1/d_sqrt(2.)+0*I;
 
         /* Multiply Nyquist term with 1/sqrt(2) */
         *getcmatval(&fft_work,nfft/2,i) *= 1/d_sqrt(2.)+0*I;
     }
+    check_overflow_xmat(fft_work);
 
     /* print_cmat(&fft_work); */
     TRACE(15,"Nyquist and DC correction done");
 
-    vc j_vec_conj = ps->j_vec_conj;
+    vc j_vec_conj = vc_alloc(nfft/2+1);
 
     /* Compute Cross-power spectra and store result */
-    for(i =0; i<nchannels;i++) {
-        for(j=0;j<nchannels;j++) {
-            iVARTRACE(15,i);
-            iVARTRACE(15,j);
+    for(us i =0; i<nchannels;i++) {
+        for(us j=0;j<nchannels;j++) {
+
             /* The indices here are important. This is also how it
              * is documented */
             vc res = cmat_column(result,i+j*nchannels);
-            TRACE(15,"SFSG");
+
+            check_overflow_vx(res);
             vc i_vec = cmat_column(&fft_work,i);
             vc j_vec = cmat_column(&fft_work,j);
-            TRACE(15,"SFSG");
+
+            check_overflow_xmat(fft_work);
+
             /* Compute the conjugate of spectra j */
-            vc_conj_vc(&j_vec_conj,&j_vec);
-            TRACE(15,"SFSG");
+            vc_conj(&j_vec_conj,&j_vec);
+
+            check_overflow_xmat(fft_work);
+
             /* Compute the element-wise product of the two vectors and
              * store the result as the result */
-            vc_elem_prod(&res,&i_vec,&j_vec_conj);
-            TRACE(15,"SFSG");
+            vc_hadamard(&res,&i_vec,&j_vec_conj);
+
+            vc_free(&i_vec);
+            vc_free(&j_vec);
+            vc_free(&res);
+
         }
     }
+    check_overflow_xmat(*result);
+    check_overflow_xmat(*timedata);
+    cmat_free(&fft_work);
+    vc_free(&j_vec_conj);
     feTRACE(15);
 }
 
diff --git a/beamforming/c/types.h b/beamforming/c/types.h
index 7244009..7fbdd43 100644
--- a/beamforming/c/types.h
+++ b/beamforming/c/types.h
@@ -9,15 +9,18 @@
 #pragma once
 #ifndef TYPES_H
 #define TYPES_H
-
-// Branch prediction performance improvement
-#if !defined(likely) && defined(__GNUC__) && !defined(ASCEE_DEBUG)
+#include <stddef.h>
+// // Branch prediction performance improvement
+#if !defined(likely)
+#if defined(__GNUC__) && !defined(ASCEE_DEBUG) 
 #define likely(x)       __builtin_expect(!!(x), 1)
 #define unlikely(x)     __builtin_expect(!!(x), 0)
 #else
 #define likely(x)       (x)
 #define unlikely(x)     (x)
-#endif
+#endif  // if defined(__GNUC__) && !defined(ASCEE_DEBUG)
+
+#endif  // !defined(likely)
 
 /// We often use boolean values
 #include <stdbool.h>            // true, false
diff --git a/beamforming/c/window.c b/beamforming/c/window.c
index 3e2db16..570e9cd 100644
--- a/beamforming/c/window.c
+++ b/beamforming/c/window.c
@@ -8,7 +8,7 @@
 
 #include "window.h"
 #include "signals.h"
-
+#include <stdlib.h>
 /** 
  * Compute the Hann window
  *
@@ -53,9 +53,12 @@ static d rectangle(us i,us N) {
     dbgassert(i<N,"Invalid index for window function Hann");
     return 1.0;
 }
-
+static d bartlett(us n,us N) {
+    dbgassert(n<N,"Invalid index for window function Bartlett");
+    return 1 - d_abs(2*(n - (N-1)/2.)/(N-1));
+}
 int window_create(const WindowType wintype,vd* result,d* win_pow) {
-
+    fsTRACE(15);
     us nfft = result->size;
     d (*win_fun)(us,us);
     switch (wintype) {
@@ -67,21 +70,26 @@ int window_create(const WindowType wintype,vd* result,d* win_pow) {
         win_fun = hamming;
         break;
     }
-    case Blackman: {
-        win_fun = blackman;
-        break;
-    }
     case Rectangular: {
         win_fun = rectangle;
         break;
     }
+    case Bartlett: {
+        win_fun = bartlett;
+        break;
+    }
+    case Blackman: {
+        win_fun = blackman;
+        break;
+    }
     default:
-        WARN("Unkown window function");
-        return FAILURE;
+        DBGWARN("BUG: Unknown window function");
+        abort();
         break;
     }
     us index;
     for(index=0;index<nfft;index++) {
+
         /* Compute the window function value */
         d val = win_fun(index,nfft);
 
@@ -89,10 +97,10 @@ int window_create(const WindowType wintype,vd* result,d* win_pow) {
         setvecval(result,index,val);
     }
 
+    /* Store window power in result */
     *win_pow = signal_power(result);
 
-    /* Store window power in result */
-    
+    feTRACE(15);
     return SUCCESS;
 }
 
diff --git a/beamforming/c/window.h b/beamforming/c/window.h
index 5eea788..3360604 100644
--- a/beamforming/c/window.h
+++ b/beamforming/c/window.h
@@ -13,8 +13,10 @@
 typedef enum {
     Hann = 0,
     Hamming = 1,
-    Blackman = 2,
-    Rectangular = 3,
+    Rectangular = 2,    
+    Bartlett = 3,
+    Blackman = 4,
+
 } WindowType;
 
 /** 
diff --git a/beamforming/config.pxi.in b/beamforming/config.pxi.in
index 711fe09..40aa728 100644
--- a/beamforming/config.pxi.in
+++ b/beamforming/config.pxi.in
@@ -38,6 +38,12 @@ cdef extern from "ascee_math.h":
                       us n_cols,
                       c* data)
 
+    cmat cmat_alloc(us n_rows,us n_cols)
+    dmat dmat_alloc(us n_rows,us n_cols)
+    
     void dmat_free(dmat*)
     void cmat_free(cmat*)
     void cmat_copy(cmat* to,cmat* from_)
+
+cdef extern from "ascee_tracer.h":
+    void clearScreen()
diff --git a/beamforming/fft.pyx b/beamforming/wrappers.pyx
similarity index 83%
rename from beamforming/fft.pyx
rename to beamforming/wrappers.pyx
index fe94876..d8fe8ed 100644
--- a/beamforming/fft.pyx
+++ b/beamforming/wrappers.pyx
@@ -1,6 +1,19 @@
 include "config.pxi"
 
-# setTracerLevel(0)
+setTracerLevel(-5)
+cdef extern from "cblas.h":
+    int openblas_get_num_threads()
+    void openblas_set_num_threads(int)
+
+# If we touch this variable: we get segfaults when running from
+# Spyder!
+#openblas_set_num_threads(8)
+# print("Number of threads: ",
+# openblas_get_num_threads())
+
+def cls():
+    clearScreen()
+cls()
 
 cdef extern from "fft.h":
     ctypedef struct c_Fft "Fft"
@@ -30,14 +43,16 @@ cdef class Fft:
         assert timedata.shape[0] ==nfft
         assert timedata.shape[1] == nchannels
         
-        result = np.empty((nfft//2+1,
-                           nchannels),
-                           dtype=NUMPY_COMPLEX_TYPE,order='F')
+        result = np.empty((nfft//2+1,nchannels),
+                          dtype=NUMPY_COMPLEX_TYPE,
+                          order='F')
+
         # result[:,:] = np.nan+1j*np.nan
         cdef c[::1,:] result_view = result
         cdef cmat r = cmat_foreign(result.shape[0],
                                    result.shape[1],
                                    &result_view[0,0])
+        
         cdef dmat t = dmat_foreign(timedata.shape[0],
                                    timedata.shape[1],
                                    &timedata[0,0])
@@ -54,8 +69,14 @@ cdef extern from "window.h":
     ctypedef enum WindowType:
         Hann = 0
         Hamming = 1
-        Blackman = 2
-        Rectangular = 3
+        Rectangular = 2
+        Bartlett = 3
+        Blackman = 4
+hann = 0
+hamming = 1
+rectangular = 2
+bartlett = 3
+blackman = 4
 
 cdef extern from "ps.h":
     ctypedef struct c_PowerSpectra "PowerSpectra"
@@ -73,8 +94,8 @@ cdef class PowerSpectra:
     cdef:
         c_PowerSpectra* _ps
 
-    def __cinit__(self, us nfft,us nchannels):
-        self._ps = PowerSpectra_alloc(nfft,nchannels,Rectangular)
+    def __cinit__(self, us nfft,us nchannels,us window=rectangular):
+        self._ps = PowerSpectra_alloc(nfft,nchannels,<WindowType> window)
         if self._ps == NULL:
             raise RuntimeError('PowerSpectra allocation failed')
 
@@ -85,6 +106,8 @@ cdef class PowerSpectra:
             int rv
             dmat td
             cmat result_mat
+
+        
         td = dmat_foreign(nfft,
                           nchannels,
                           &timedata[0,0])
@@ -99,12 +122,15 @@ cdef class PowerSpectra:
         result = np.empty((nfft//2+1,nchannels,nchannels),
                           dtype = NUMPY_COMPLEX_TYPE,
                           order='F')
+        
         cdef c[::1,:,:] result_view = result
 
         result_mat = cmat_foreign(nfft//2+1,
                                   nchannels*nchannels,
                                   &result_view[0,0,0])
 
+
+        
         PowerSpectra_compute(self._ps,&td,&result_mat)
 
         dmat_free(&td)
@@ -137,11 +163,14 @@ cdef class AvPowerSpectra:
         c_AvPowerSpectra* aps
         us nfft, nchannels
 
-    def __cinit__(self,us nfft,us nchannels,d overlap_percentage):
+    def __cinit__(self,us nfft,
+                  us nchannels,
+                  d overlap_percentage,
+                  us window=rectangular):
         self.aps = AvPowerSpectra_alloc(nfft,
                                         nchannels,
                                         overlap_percentage,
-                                        Rectangular)
+                                        <WindowType> window)
         self.nchannels = nchannels
         self.nfft = nfft
 
@@ -196,3 +225,8 @@ cdef class AvPowerSpectra:
 
 
         return result
+    
+    def getFreq(self, d fs):
+        cdef d df = fs/self.nfft  # frequency resolution 
+        cdef us K = self.nfft//2+1      # number of frequency bins
+        return np.linspace(0, (K-1)*df, K)
diff --git a/fftpack/fftpack.h b/fftpack/fftpack.h
index f3d569e..7458aa1 100644
--- a/fftpack/fftpack.h
+++ b/fftpack/fftpack.h
@@ -1,111 +1,27 @@
-// fftpack.h
-//
-// Author: J.A. de Jong - ASCEE
-//
-// Description:
-// Header file for FFT routines of fftpack
-//////////////////////////////////////////////////////////////////////
-#pragma once
-#ifndef FFTPACK_H
-#define FFTPACK_H
-#include "ascee_tracer.h"
-#include "ascee_alloc.h"
-#include "npy_fftpack.h"
-
 /*
- * Fortran has two kind of routines: functions and subroutines. A
- Fortran function is a C function that returns a single value.
-
- A * subroutine called from C is in fact a C function
- which returns void.
+ * This file is part of tela the Tensor Language.
+ * Copyright (c) 1994-1995 Pekka Janhunen
  */
-#if ASCEE_DOUBLE_PRECISION
-// These are all subroutines
-extern void dffti_ (int *nfft,d* wsave);
-extern void dfftf_ (int *nfft,d* r,d* wsave);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define NPY_VISIBILITY_HIDDEN
+
+#ifdef ASCEE_DOUBLE_PRECISION
+#define Treal double
 #else
-extern void rffti_ (int *nfft,d* wsave);
-extern void rfftf_ (int *nfft,d* r,d* wsave);
-#endif // ASCEE_DOUBLE_PRECISION
+#define Treal float
+#endif
 
-typedef struct Fftr_s {
-    d *work_area;
-    d* wsave_ptr;
-    int nfft;
-} Fftr;
+extern NPY_VISIBILITY_HIDDEN void npy_cfftf(int N, Treal data[], const Treal wrk[]);
+extern NPY_VISIBILITY_HIDDEN void npy_cfftb(int N, Treal data[], const Treal wrk[]);
+extern NPY_VISIBILITY_HIDDEN void npy_cffti(int N, Treal wrk[]);
 
-/** 
- * 
- *
- * @param nfft the length of the sequence to be transformed
+extern NPY_VISIBILITY_HIDDEN void npy_rfftf(int N, Treal data[], const Treal wrk[]);
+extern NPY_VISIBILITY_HIDDEN void npy_rfftb(int N, Treal data[], const Treal wrk[]);
+extern NPY_VISIBILITY_HIDDEN void npy_rffti(int N, Treal wrk[]);
 
- * @param wsave a work array which must be dimensioned at least
- * 2*nfft+15.  the same work array can be used for both rfftf and
- * rfftb as long as n remains unchanged. different wsave arrays
- * are required for different values of n. the contents of wsave
- * must not be changed between calls of rfftf or rfftb.
- */
-
-Fftr* Fftr_alloc(int nfft) {
-    fsTRACE(15);
-
-    Fftr* fftr = a_malloc(sizeof(fftr));
-    dbgassert(nfft>0,"Invalid nfft")
-    dbgassert(fftr,ALLOCFAILED "Fftr_alloc");
-    fftr->work_area = a_malloc(sizeof(d)*(3*nfft+15));
-    fftr->wsave_ptr = &fftr->work_area[nfft];
-    fftr->nfft = nfft;
-    #if ASCEE_DOUBLE_PRECISION
-    // dffti_(&nfft,fftr->wsave);
-    npy_rffti(nfft,fftr->wsave_ptr);
-    #else
-    // rffti_(&nfft,fftr->wsave);
-    #endif
-
-    feTRACE(15);
-    return fftr;
+#ifdef __cplusplus
 }
-void Fftr_free(Fftr* fftr) {
-    dbgassert(fftr,NULLPTRDEREF "Fftr_free");
-    a_free(fftr->work_area);
-    a_free(fftr);
-}
-
-
-/** 
- * 
- *
- * @param nfft 
- * @param wsave 
- * @param input 
- * @param work
- * @param result 
- */
-static inline void Fftr_fftr(Fftr* fftr,d* input,c* result) {
-
-
-    // Copy contents of input to the work area
-    d_copy(fftr->work_area,input,fftr->nfft);
-
-    int nfft = fftr->nfft;
-    
-    #if ASCEE_DOUBLE_PRECISION
-    // dfftf_(&nfft,fftr->work,fftr->wsave);
-    npy_rfftf(nfft,fftr->work_area,fftr->wsave_ptr);
-    #else
-    NOT TESTED
-    rfftf_(&nfft,fftr->work_area,fftr->wsave_ptr);
-    #endif
-
-    result[0] = fftr->work_area[0];
-    d_copy((d*) (&result[1]),&fftr->work_area[1],nfft);
-    // Not portable way of setting imaginary part to zero. Works with
-    // gcc, though.
-    __imag__ result[nfft/2] = 0;
-    
-}
-
-
-
-#endif // FFTPACK_H
-//////////////////////////////////////////////////////////////////////
+#endif
diff --git a/fftpack/npy_fftpack.h b/fftpack/npy_fftpack.h
deleted file mode 100644
index aa706ea..0000000
--- a/fftpack/npy_fftpack.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is part of tela the Tensor Language.
- * Copyright (c) 1994-1995 Pekka Janhunen
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#define NPY_VISIBILITY_HIDDEN
-
-#define DOUBLE
-
-#ifdef DOUBLE
-#define Treal double
-#else
-#define Treal float
-#endif
-
-extern NPY_VISIBILITY_HIDDEN void npy_cfftf(int N, Treal data[], const Treal wrk[]);
-extern NPY_VISIBILITY_HIDDEN void npy_cfftb(int N, Treal data[], const Treal wrk[]);
-extern NPY_VISIBILITY_HIDDEN void npy_cffti(int N, Treal wrk[]);
-
-extern NPY_VISIBILITY_HIDDEN void npy_rfftf(int N, Treal data[], const Treal wrk[]);
-extern NPY_VISIBILITY_HIDDEN void npy_rfftb(int N, Treal data[], const Treal wrk[]);
-extern NPY_VISIBILITY_HIDDEN void npy_rffti(int N, Treal wrk[]);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/test/benchmark.py b/test/benchmark.py
index 1039303..03c5d15 100644
--- a/test/benchmark.py
+++ b/test/benchmark.py
@@ -1,12 +1,15 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Created on Mon Jan 15 19:45:33 2018
 
 @author: anne
 """
+from pycallgraph import PyCallGraph
+from pycallgraph.output import GraphvizOutput
+
 import numpy as np
-from beamforming.fft import Fft
+from beamforming import Fft
 
 nfft=2**17
 
diff --git a/test/fft_test.py b/test/fft_test.py
index e2a0012..a42078f 100644
--- a/test/fft_test.py
+++ b/test/fft_test.py
@@ -6,7 +6,7 @@ Created on Mon Jan 15 19:45:33 2018
 @author: anne
 """
 import numpy as np
-from beamforming.fft import Fft
+from beamforming import Fft
 
 nfft=6
 print('nfft:',nfft)
diff --git a/test/ps_test.py b/test/ps_test.py
index 7add7d1..ed7859f 100644
--- a/test/ps_test.py
+++ b/test/ps_test.py
@@ -6,9 +6,9 @@ Created on Mon Jan 15 19:45:33 2018
 @author: anne
 """
 import numpy as np
-from beamforming.fft import Fft, PowerSpectra
-
-nfft=2**4
+from beamforming import Fft, PowerSpectra, cls
+cls
+nfft=2048
 print('nfft:',nfft)
 #print(nfft)
 nchannels = 2
diff --git a/test/test_bf.c b/test/test_bf.c
index 98779d5..4871741 100644
--- a/test/test_bf.c
+++ b/test/test_bf.c
@@ -35,7 +35,7 @@ int main() {
 
 
     vc res2 = vc_alloc(3);
-    c_elem_prod_c(res2.data,vc1.data,vc2.data,3);
+    c_hadamard(res2.ptr,vc1.ptr,vc2.ptr,3);
 
     print_vc(&res2);
 
diff --git a/test/test_math.c b/test/test_math.c
index 8d9bbd9..2c84a19 100644
--- a/test/test_math.c
+++ b/test/test_math.c
@@ -9,25 +9,39 @@
 #include "fft.h"
 #include "ascee_math.h"
 #include "ascee_tracer.h"
+#include "ascee_alg.h"
 
 int main() {
 
     iVARTRACE(15,getTracerLevel());
 
-    vc a = vc_alloc(5);
-    vc_set(&a,2+3*I);
+    c a[5];
+    c_set(a,1-I,5);
 
-    c_conj_inplace(a.data,a.size);
-    print_vc(&a);
+    /* print_vc(&a); */
 
     vc b = vc_alloc(5);    
+    vc_set(&b,2);
 
-    c_conj_c(b.data,a.data,5);
 
+    printf("b:\n");    
     print_vc(&b);    
 
-    vc_free(&a);
-    vc_free(&b);    
+
+    vc c1 = vc_alloc(5);
+    /* vc_set(&c1,10); */
+    /* c_add_to(c1.ptr,a.ptr,1,3); */
+    c_hadamard(c1.ptr,a,b.ptr,5);
+
+
+
+    printf("c1:\n");
+    print_vc(&c1); 
+    
+    vc_free(&b);
+
+
+    
     return 0;
 }
 
diff --git a/test/test_workers.c b/test/test_workers.c
index bd41e22..5cda373 100644
--- a/test/test_workers.c
+++ b/test/test_workers.c
@@ -8,6 +8,7 @@
 #include "worker.h"
 #include "mq.h"
 #include "ascee_tracer.h"
+#include "ascee_assert.h"
 #include <unistd.h>
 static void* walloc(void*);
 static int worker(void*,void*);
@@ -22,7 +23,7 @@ int main() {
 
     us njobs = 4;
     JobQueue* jq = JobQueue_alloc(njobs);
-    assert(jq);
+    dbgassert(jq,NULLPTRDEREF);
 
     Workers* w = Workers_create(njobs,
                                 jq,
@@ -30,7 +31,7 @@ int main() {
                                 worker,
                                 wfree,
                                 (void*) 101);
-    assert(w);
+    dbgassert(jq,NULLPTRDEREF);
 
     for(us i=0; i< njobs; i++) {
         iVARTRACE(15,i);