Refactoring math routines. Added comments. Removed multithreading from fft routines. Switched to fftpack for FFT. Added Doxyfile

2018-02-06 12:01:27 +01:00 · 2018-02-06 12:01:27 +01:00 · 83aaf69bef
commit 83aaf69bef
parent 52e5a31bdb
28 changed files with 4508 additions and 1254 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,7 @@ cython_debug
 beamforming/config.pxi
 beamforming/fft.c
 *.so
 test/test_bf
 test/test_fft
 test/test_math
 test/test_workers
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,6 +13,9 @@ add_definitions(-DASCEE_MAX_NUM_THREADS=8)
 add_definitions(-DASCEE_MAX_NUM_CHANNELS=80)
 # Reasonable maximum to the nfft size, at 48kHz this is 700s of data..
 add_definitions(-DASCEE_MAX_NFFT=33554432) # 2**25
 # ####################################### End of user-adjustable variables section
 add_definitions(-D_REENTRANT)
--- a/2482
+++ b/2482
--- a/beamforming/c/CMakeLists.txt
+++ b/beamforming/c/CMakeLists.txt
@ -6,10 +6,13 @@ endif(!ASCEE_DEBUG)
 add_library(beamforming_lib
  fft.c
  ascee_math.c
  ascee_math_raw.c
  ascee_alg.c
  ascee_assert.c
-  tracer.c
+  ascee_tracer.c
  window.c
-  # ps.c
+  aps.c
  ps.c
  mq.c
  worker.c
  )
--- a/beamforming/c/aps.c
+++ b/beamforming/c/aps.c
@ -0,0 +1,228 @@
 // aps.c
 //
 // Author: J.A. de Jong -ASCEE
 // 
 // Description:
 //
 //////////////////////////////////////////////////////////////////////
 #include "aps.h"
 #include "ps.h"
 #include "ascee_alg.h"
 typedef struct AvPowerSpectra_s {
    us os;              /**< Counter set to the position where
                         * the next time block should be taken
                         * from */
    us nfft, nchannels;
    us oo;
    us naverages;               /* Counter that counts the number of
                                 * averages taken for the computation
                                 * of this averaged power spectra. */
    dmat buffer;                /**< Buffer storage of some of the
                                 * previous samples. Number of rows is
                                 * equal to nfft. */
    cmat ps_storage;           /**< Here we store the averaged
                                * results for each Cross-power
                                * spectra computed so far. */
    cmat ps_single;             /**< This is the work area for a
                                 * PowerSpectra computation on a
                                 * single block */
    PowerSpectra* ps;           /**< Pointer to underlying
                                 * PowerSpectra calculator. */
 } AvPowerSpectra;
 AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
                                     const us nchannels,
                                     const d overlap_percentage,
                                     const WindowType wt) {
    fsTRACE(15);
    /* Check nfft */
    if(nfft % 2 != 0 || nfft > ASCEE_MAX_NFFT) {
        WARN("nfft should be even");
        return NULL;
    }
    /* Check overlap percentage */
    if(overlap_percentage >= 100) {
        WARN("Overlap percentage >= 100!");
        return NULL;
    }
    if(overlap_percentage < 0) {
        WARN("Overlap percentage should be positive!");
        return NULL;
    }
    /* Compute and check overlap offset */
    us oo = nfft - (us) (overlap_percentage*nfft/100);
    if(oo == 0) {oo++;}
    PowerSpectra* ps = PowerSpectra_alloc(nfft,nchannels,wt);
    if(!ps) {
        WARN(ALLOCFAILED "ps");
        return NULL;
    }
    AvPowerSpectra* aps = a_malloc(sizeof(AvPowerSpectra));
    if(!aps) {
        WARN("Allocation of AvPowerSpectra memory failed");
        PowerSpectra_free(ps);
        return NULL;
    }
    aps->nchannels = nchannels;
    aps->nfft = nfft;
    aps->ps = ps;
    aps->naverages = 0;
    aps->oo = oo;
    aps->os = oo;
    /* Allocate vectors and matrices */
    aps->buffer = dmat_alloc(nfft,nchannels);
    aps->ps_storage = cmat_alloc(nfft/2+1,nchannels*nchannels);
    aps->ps_single = cmat_alloc(nfft/2+1,nchannels*nchannels);
    cmat_set(&aps->ps_storage,0);
    return aps;
 }
 us AvPowerSpectra_getAverages(const AvPowerSpectra* ps) {
    return ps->naverages;
 }
 /** 
 * Helper function that adds a block of time data to the APS
 *
 * @param aps AvPowerSpectra handle 
 * @param block Time data block. Size should be exactly nfft*nchannels.
 */
 static void AvPowerSpectra_addBlock(AvPowerSpectra* aps,
                                    const dmat* block) {
    fsTRACE(15);
    dbgassert(aps && block,NULLPTRDEREF);
    dbgassert(block->n_rows == aps->nfft,"Invalid block n_rows");
    dbgassert(block->n_cols == aps->nchannels,"Invalid block n_cols");
    cmat ps_single = aps->ps_single;
    cmat ps_storage = aps->ps_storage;
    c naverages = (++aps->naverages);
    /* Scale previous result */
    cmat_scale(&ps_storage,
               (naverages-1)/naverages);
    PowerSpectra_compute(aps->ps,
                         block,
                         &ps_single);
    /* Add new result, scaled properly */
    cmat_add_cmat(&ps_storage,
                  &ps_single,1/naverages);
    feTRACE(15);
 }
 cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps,
                                const dmat* timedata) {
    fsTRACE(15);
    dbgassert(aps && timedata,NULLPTRDEREF);
    const us nchannels = aps->nchannels;
    const us nfft = aps->nfft;
    dbgassert(timedata->n_cols == nchannels,"Invalid time data");
    dbgassert(timedata->n_rows >= nfft,"Invalid time data. "
              "Should at least have nfft rows");
    const us oo = aps->oo;
    us* os = &aps->os;
    us os_timedata = 0;
    dmat buffer = aps->buffer;
    /* Retrieve the buffer and use it to make the first time block. */
    if(*os < oo) {
        dmat tmp = dmat_alloc(nfft,nchannels);
        copy_dmat_rows(&tmp,
                       &buffer,
                       *os,       /* Startrow_from */
                       0,         /* Startrow to */
                       nfft - *os /* nrows */
            );
        copy_dmat_rows(&tmp,
                       timedata,
                       0,
                       nfft - *os,
                       *os
            );
        AvPowerSpectra_addBlock(aps,&tmp);
        os_timedata = oo + *os - nfft;
        dmat_free(&tmp);
    }
    /* Run until we cannot go any further */
    while (os_timedata + nfft <= timedata->n_rows) {
        dmat tmp = dmat_submat(timedata,
                               os_timedata,
                               0,nfft,nchannels);
        AvPowerSpectra_addBlock(aps,&tmp);
        os_timedata += oo;
        dmat_free(&tmp);
    }
    /* We copy the last piece of samples from the timedata to the
     * buffer */
    copy_dmat_rows(&buffer,
                   timedata,
                   timedata->n_rows-nfft,
                   0,
                   nfft);
    *os = os_timedata+nfft-timedata->n_rows;
    dbgassert(*os < nfft,"BUG");
    feTRACE(15);
    return &aps->ps_storage;
 }
 void AvPowerSpectra_free(AvPowerSpectra* aps) {
    fsTRACE(15);
    PowerSpectra_free(aps->ps);
    dmat_free(&aps->buffer);
    cmat_free(&aps->ps_storage);
    cmat_free(&aps->ps_single);
    a_free(aps);
    feTRACE(15);
 }
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/aps.h
+++ b/beamforming/c/aps.h
@ -0,0 +1,77 @@
 // aps.h
 //
 // Author: J.A. de Jong - ASCEE
 //
 // Description:
 //
 //////////////////////////////////////////////////////////////////////
 #pragma once
 #ifndef APS_H
 #define APS_H
 #include "types.h"
 #include "ascee_math.h"
 #include "window.h"
 typedef struct AvPowerSpectra_s AvPowerSpectra;
 /** 
 * Allocates a AvPowerSpectra object which is used to compute an
 * averaged power spectra from given input time samples.
 *
 * @param[in] nfft Size of the fft,
 *
 * @param[in] nchannels Number of channels of time data to allocate
 * for.
 *
 * @param[in] overlap_percentage. Should be 0<= overlap_pct < 100. The
 * overlap percentage will be coerced, as the offset will be an
 * integer number of samples at all cases. Therefore, the real overlap
 * percentage can be obtained later on
 *
 * @param wt 
 *
 * @return 
 */
 AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
                                     const us nchannels,
                                     const d overlap_percentage,
                                     const WindowType wt);
 /** 
 * Computes the real overlap offset 
 *
 * @param aps 
 */
 d AvPowerSpectra_realoverlappct(const AvPowerSpectra* aps);
 /** 
 * Return the current number of averages taken to obtain the result.
 *
 * @return The number of averages taken.
 */
 us AvPowerSpectra_getAverages(const AvPowerSpectra*);
 /** 
 * Add time data to this Averaged Power Spectra. Remembers the part
 * that should be overlapped with the next time data.
 *
 * @param aps AvPowerSpectra handle 
 *
 * @param timedata Pointer to timedata buffer. Number of rows SHOULD
 * *at least* be nfft. Number of columns should exactly be nchannels.
 *
 * @return pointer to the averaged power spectra buffer that is being
 * written to. Pointer is valid until AvPowerSpectra_free() is called.
 */
 cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps,
                                 const dmat* timedata);
 /** 
 * Free storage of the AvPowerSpectra
 */
 void AvPowerSpectra_free(AvPowerSpectra*);
 #endif // APS_H
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_alg.c
+++ b/beamforming/c/ascee_alg.c
@ -0,0 +1,230 @@
 // ascee_alg.c
 //
 // Author: J.A. de Jong -ASCEE
 // 
 // Description:
 // (Linear) algebra routine implementations
 //////////////////////////////////////////////////////////////////////
 #include "ascee_alg.h"
 void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
    assert(A->n_rows == b->size);
    assert(A->n_cols == x->size);
    #if ASCEE_USE_BLAS == 1
    /* typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */
    /* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */
    /* 
       void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order,
       OPENBLAS_CONST enum CBLAS_TRANSPOSE trans,
       OPENBLAS_CONST blasint m,
       OPENBLAS_CONST blasint n,
       OPENBLAS_CONST double *alpha,
       OPENBLAS_CONST double  *a,
       OPENBLAS_CONST blasint lda,
       OPENBLAS_CONST double  *x,
       OPENBLAS_CONST blasint incx,
       OPENBLAS_CONST double *beta,
       double  *y,
       OPENBLAS_CONST blasint incy);
    */
    c alpha = 1.0;
    c beta = 0.0;
    cblas_zgemv(CblasColMajor,
                CblasNoTrans,
                A->n_rows,
                A->n_cols,
                (d*) &alpha,			/* alpha */
                (d*) A->data,		/* A */
                A->n_rows,		/* lda */
                (d*) x->data,		/*  */
                1,
                (d*) &beta,			/* beta */
                (d*) b->data,
                1);
    #else
    size_t i,j;
    size_t n_rows = A->n_rows;
    vc_set(b,0.0);
    iVARTRACE(20,A->n_cols);
    iVARTRACE(20,A->n_rows);
    for(j=0;j<A->n_cols;j++){
        for(i=0;i<A->n_rows;i++) {
            c* Aij = &A->data[i+j*n_rows];
            b->data[i] += *Aij * x->data[j];
        }
    }
    #endif
 }
 /// The code below here is not yet worked on. Should be improved to
 /// directly couple to openblas, instead of using lapacke.h
 #if 0
 /* These functions can be directly linked to openBLAS */
 #define lapack_complex_double   double _Complex
 #define lapack_complex_float   float _Complex
 #define LAPACK_ROW_MAJOR               101
 #define LAPACK_COL_MAJOR               102
 #define LAPACK_WORK_MEMORY_ERROR       -1010
 #define LAPACK_TRANSPOSE_MEMORY_ERROR  -1011
 typedef int lapack_int;
 int LAPACKE_cgelss( int matrix_layout, int m, int n,
                    int nrhs, lapack_complex_float* a,
                    int lda, lapack_complex_float* b,
                    int ldb, float* s, float rcond,
                    int* rank );
 int LAPACKE_zgelss( int matrix_layout, int m, int n,
                    int nrhs, lapack_complex_double* a,
                    int lda, lapack_complex_double* b,
                    int ldb, double* s, double rcond,
                    int* rank );
 lapack_int LAPACKE_zgels( int matrix_layout, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_complex_double* b, lapack_int ldb );
 #if ASCEE_FLOAT == 64
 #define lapack_gelss LAPACKE_zgelss
 #define lapack_gels LAPACKE_zgels
 #else
 #define lapack_gelss LAPACKE_cgelss
 #endif
 #define max(a,b) ((a)>(b)?(a):(b))
 /* int lsq_solve(const cmat* A,const vc* b,vc* x){ */
 /*     POOL_INIT(lsq_solve_pool); */
 /*     int rv; */
 /*     /\* M: number of rows of matrix *\/ */
 /*     /\* N: Number of columns *\/ */
 /*     /\* Norm: L2|b-A*x| *\/ */
 /*     /\* NRHS: Number of right hand sides: Number of columns of matrix B *\/ */
 /*     assert(A->n_rows>=A->n_cols); */
 /*     assert(x->size == A->n_cols); */
 /*     assert(b->size == A->n_rows); */
 /*     int info; */
 /*     size_t lda = max(1,A->n_rows); */
 /*     size_t ldb = max(lda,A->n_cols); */
 /*     /\* Make explicit copy of matrix A data, as it will be overwritten */
 /*      * by lapack_gels *\/ */
 /*     c* A_data = Pool_allocatec(&lsq_solve_pool,A->n_rows*A->n_cols); */
 /*     c_copy(A_data,A->data,A->n_cols*A->n_rows); */
 /*     c* work_data = Pool_allocatec(&lsq_solve_pool,b->size); */
 /*     c_copy(work_data,b->data,b->size); */
 /*     /\* Lapack documentation says: *\/ */
 /*     /\* 	if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /*         squares solution vectors; the residual sum of squares for the */
 /*         solution in each column is given by the sum of squares of the */
 /*         modulus of elements N+1 to M in that column; */
 /*     *\/ */
 /*     /\* We always assume one RHS column *\/ */
 /*     const int nrhs = 1; */
 /*     /\* General Least Squares Solve *\/ */
 /*     info = lapack_gels(LAPACK_COL_MAJOR, /\* Column-major ordering *\/ */
 /*                        'N', */
 /*                        A->n_rows,	/\* Number of rows in matrix *\/ */
 /*                        A->n_cols,	/\* Number of columns *\/ */
 /*                        nrhs,   /\* nrhs, which is number_mics *\/ */
 /*                        A_data, /\* The A-matrix *\/ */
 /*                        lda,	  /\* lda: the leading dimension of matrix A *\/ */
 /*                        work_data,	  /\* The b-matrix *\/ */
 /*                        ldb);  /\* ldb: the leading dimension of b: max(1,M,N) *\/ */
 /*     if(info==0){ */
 /*         c_copy(x->data,work_data,x->size); */
 /*         rv = SUCCESS; */
 /*     } */
 /*     else { */
 /*         memset(x->data,0,x->size); */
 /*         WARN("LAPACK INFO VALUE"); */
 /*         printf("%i\n", info ); */
 /*         TRACE(15,"Solving least squares problem failed\n"); */
 /*         rv = FAILURE; */
 /*     } */
 /*     Pool_free(&lsq_solve_pool,A_data); */
 /*     Pool_free(&lsq_solve_pool,work_data); */
 /*     POOL_EXIT(lsq_solve_pool,15);     */
 /*     return rv; */
 /* } */
 /* d c_normdiff(const cmat* A,const cmat* B) { */
 /*     TRACE(15,"c_normdif"); */
 /*     dbgassert(A->n_cols==B->n_cols,"Number of columns of A and B " */
 /*               "should be equal"); */
 /*     dbgassert(A->n_rows==B->n_rows,"Number of rows of A and B " */
 /*               "should be equal"); */
 /*     size_t size = A->n_cols*A->n_rows; */
 /*     vc diff_temp = vc_al[MAX_MATRIX_SIZE]; */
 /*     c_copy(diff_temp,A->data,size); */
 /*     c alpha = -1.0; */
 /*     /\* This routine computes y <- alpha*x + beta*y *\/ */
 /*     /\* void cblas_zaxpy(OPENBLAS_CONST blasint n,  *\/ */
 /*     /\* 				 OPENBLAS_CONST double *alpha, *\/ */
 /*     /\* 				 OPENBLAS_CONST double *x, *\/ */
 /*     /\* 				 OPENBLAS_CONST blasint incx, *\/ */
 /*     /\* 				 double *y, *\/ */
 /*     /\* 				 OPENBLAS_CONST blasint incy); *\/ */
 /*     cblas_zaxpy(size, */
 /*                 (d*) &alpha, */
 /*                 (d*) B->data, */
 /*                 1, */
 /*                 (d*) diff_temp, */
 /*                 1 ); */
 /*     return c_norm(diff_temp,size); */
 /* } */
 #endif  /* if 0 */
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_alg.h
+++ b/beamforming/c/ascee_alg.h
@ -0,0 +1,149 @@
 // ascee_alg.h
 //
 // Author: J.A. de Jong - ASCEE
 //
 // Description:
 // (Linear) algebra routines on matrices and vectors
 //////////////////////////////////////////////////////////////////////
 #pragma once
 #ifndef ASCEE_ALG_H
 #define ASCEE_ALG_H
 #include "ascee_math.h"
 /** 
 * Compute the dot product of two vectors of floats
 *
 * @param a First vector
 * @param b Second second vector
 * @return dot product as float
 */
 static inline d vd_dot(const vd * a,const vd* b) {
    dbgassert(a->size == b->size,SIZEINEQUAL);
    return d_dot(a->ptr,b->ptr,a->size);
 }
 /** 
 * y = fac * y
 *
 * @param y 
 * @param fac scale factor
 */
 static inline void dmat_scale(dmat* y,const c fac){
    dbgassert(y,NULLPTRDEREF);
    if(likely(y->data)) {
        d_scale(y->data,fac,y->n_cols*y->n_rows);
    }
    else {
        for(us col=0;col<y->n_cols;col++) {
            d_scale(y->col_ptrs[col],fac,y->n_rows);
        }
    }
 }
 /** 
 * y = fac * y
 *
 * @param y 
 * @param fac scale factor
 */
 static inline void cmat_scale(cmat* y,const c fac){
    dbgassert(y,NULLPTRDEREF);
    dbgassert(y,NULLPTRDEREF);
    if(likely(y->data)) {
        c_scale(y->data,fac,y->n_cols*y->n_rows);
    }
    else {
        for(us col=0;col<y->n_cols;col++) {
            c_scale(y->col_ptrs[col],fac,y->n_rows);
        }
    }
 }
 /** 
 * x = x + fac*y
 *
 * @param x 
 * @param y 
 * @param fac 
 */
 static inline void dmat_add_dmat(dmat* x,dmat* y,d fac) {
    dbgassert(x && y,NULLPTRDEREF);
    dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
    dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
    if(likely(x->data && y->data)) {
        d_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
    }
    else {
        for(us col=0;col<y->n_cols;col++) {
            d_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
        }
    }
 }
 /** 
 * x = x + fac*y
 *
 * @param[in,out] x 
 * @param[in] y 
 * @param[in] fac 
 */
 static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) {
    dbgassert(x && y,NULLPTRDEREF);
    dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
    dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
    if(likely(x->data && y->data)) {
        c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
    }
    else {
        for(us col=0;col<y->n_cols;col++) {
            c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
        }
    }
 }
 /** 
 * Compute the element-wise (Hadamard) product of a and b, and store
 * in result
 *
 * @param[out] result 
 * @param[in] a 
 * @param[in] b 
 */
 static inline void vc_elem_prod(vc* result,vc* a,vc* b) {
    dbgassert(result  && a && b,NULLPTRDEREF);
    dbgassert(result->size==a->size,SIZEINEQUAL);
    dbgassert(b->size==a->size,SIZEINEQUAL);
    c_elem_prod_c(result->ptr,a->ptr,b->ptr,a->size);
 }
 /** 
 * Compute the matrix vector product for complex-valued types: b = A*x.
 *
 * @param[in] A Matrix A
 * @param[in] x Vector x
 * @param[out] b Result of computation
 */
 void cmv_dot(const cmat* A,
             const vc* restrict x,
             vc* restrict b);
 int lsq_solve(const cmat* A,
              const vc* restrict b,
              vc* restrict x);
 /** 
 * Compute the norm of the difference of two complex matrices
 *
 * @param A 
 * @param B 
 */
 d cmat_normdiff(const cmat* A,const cmat* B);
 /** 
 * Computes the Kronecker product of a kron b, stores result in result.
 *
 * @param a a
 * @param b b
 * @param result a kron b
 */
 void kronecker_product(const cmat* a,const cmat* b,cmat* result);
 #endif // ASCEE_ALG_H
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_math.c
+++ b/beamforming/c/ascee_math.c
@ -1,33 +1,44 @@
-// si_math.c
+// ascee_math.c
 //
-// last-edit-by: J.A. de Jong 
+// Author: J.A. de Jong -ASCEE
 // 
 // Description:
 // 
 //////////////////////////////////////////////////////////////////////
 #define TRACERPLUS (-10)
 #include "ascee_math.h"
 #include "ascee_assert.h"
 #include "ascee_math.h"
-#include "tracer.h"
+#include "ascee_tracer.h"
 #if ASCEE_USE_BLAS
 #include <cblas.h>
 #endif
 #include <math.h>
 #ifdef ASCEE_DEBUG
 void print_dmat(const dmat* m) {
    size_t row,col;
    for(row=0;row<m->n_rows;row++){
        for(col=0;col<m->n_cols;col++){
            d val = *getdmatval(m,row,col);
            printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val));
        }
        printf("\n");
    }
 }
 void print_cmat(const cmat* m) {
    size_t row,col;
    for(row=0;row<m->n_rows;row++){
        for(col=0;col<m->n_cols;col++){
-            c val = m->data[row+m->n_rows*col];
+            c val = *getcmatval(m,row,col);
            d rval = creal(val);
            d ival = cimag(val);
-            printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ', d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ;
+            printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ',
                   d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ;
        }
        printf("\n");
@ -60,394 +71,7 @@ void print_vd(const vd* m) {
        printf("\n");
    }
 }
 void print_dmat(const dmat* m) {
    size_t row,col;
    for(row=0;row<m->n_rows;row++){
        for(col=0;col<m->n_cols;col++){
            d val = m->data[row+m->n_rows*col];
            printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val));
        }
        printf("\n");
    }
 }
 #endif
 void d_elem_prod_d(d res[],
                   const d arr1[],
                   const d arr2[],
                   const us size) {
    #if ASCEE_USE_BLAS
    #if ASCEE_DEBUG
    if(arr1 == arr2) {
        DBGWARN("d_elem_prod_d: Array 1 and array 2 point to the same"
                " memory. This results in pointer aliasing, for which"
                " testing is still to be done. Results might be"
                " unrealiable.");
    }
    #endif
    #if ASCEE_DOUBLE_PRECISION
    #define elem_prod_fun cblas_dsbmv
    #else
    #define elem_prod_fun cblas_ssbmv
    #endif
    /* These parameters do not matter for this specific case */
    const CBLAS_ORDER  mat_order= CblasColMajor;
    const CBLAS_UPLO   uplo = CblasLower;
    /* Extra multiplication factor */
    const d alpha = 1.0;
    /* void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, */
    /*                  OPENBLAS_CONST enum CBLAS_UPLO Uplo, */
    /*                  OPENBLAS_CONST blasint N, */
    /*                  OPENBLAS_CONST blasint K, */
    /*                  OPENBLAS_CONST double alpha, */
    /*                  OPENBLAS_CONST double *A, */
    /*                  OPENBLAS_CONST blasint lda, */
    /*                  OPENBLAS_CONST double *X, */
    /*                  OPENBLAS_CONST blasint incX, */
    /*                  OPENBLAS_CONST double beta, */
    /*                  double *Y, */
    /*                  OPENBLAS_CONST blasint incY); */
    elem_prod_fun(mat_order,
                  uplo,
                  (blasint) size,
                  0,             // Just the diagonal; 0 super-diagonal bands
                  alpha,        /* Multiplication factor alpha */
                  arr1,
                  1,            /* LDA */
                  arr2,         /* x */
                  1, /* incX = 1 */
                  0.0,          /* Beta */
                  res,    /* The Y matrix to write to */
                  1); /* incY */
    #undef elem_prod_fun
    #else  /* No blas routines, routine is very simple, but here we
            * go! */
    DBGWARN("Performing slow non-blas vector-vector multiplication");
    for(us i=0;i<size;i++) {
        res[i] = arr1[i]*arr2[i];
    }
    #endif
 }
 void c_elem_prod_c(c res[],
                   const c arr1[],
                   const c arr2[],
                   const us size) {
    TRACE(15,"c_elem_prod_c");
    uVARTRACE(15,size);
    #if ASCEE_USE_BLAS
    #if ASCEE_DEBUG
    if(arr1 == arr2) {
        DBGWARN("c_elem_prod_c: Array 1 and array 2 point to the same"
                " memory. This results in pointer aliasing, for which"
                " testing is still to be done. Results might be"
                " unrealiable.");
    }
    #endif  /* ASCEE_DEBUG */
    #if ASCEE_DOUBLE_PRECISION
    #define elem_prod_fun cblas_zgbmv
    #else
    #define elem_prod_fun cblas_cgbmv
    #endif
    /* These parameters do not matter for this specific case */
    const CBLAS_ORDER  mat_order= CblasColMajor;
    const CBLAS_TRANSPOSE tr = CblasNoTrans;
    const c alpha = 1.0;
    const c beta = 0.0;
    TRACE(15,"Calling " annestr(elem_prod_fun));
    elem_prod_fun(mat_order,
                  tr,
                  (blasint) size, /* M: Number of rows */
                  (blasint) size, /* B: Number of columns */
                  0,              /* KL: Number of sub-diagonals */
                  0,              /* KU: Number of super-diagonals */
                  (d*) &alpha,        /* Multiplication factor */
                  (d*) arr2,          /* A */
                  1,            /* LDA */
                  (d*) arr1,    /* x */
                  1, /* incX = 1 */
                  (d*) &beta,
                  (d*) res,    /* The Y matrix to write to */
                  1); /* incY */
    #undef elem_prod_fun
    #else  /* No blas routines, routine is very simple, but here we
            * go! */
    DBGWARN("Performing slow non-blas vector-vector multiplication");
    for(us i=0;i<size;i++) {
        res[i] = arr1[i]*arr2[i];
    }
    #endif
 }
 void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
    assert(A->n_rows == b->size);
    assert(A->n_cols == x->size);
    #if ASCEE_USE_BLAS == 1
    /* typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */
    /* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */
    /* 
       void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order,
       OPENBLAS_CONST enum CBLAS_TRANSPOSE trans,
       OPENBLAS_CONST blasint m,
       OPENBLAS_CONST blasint n,
       OPENBLAS_CONST double *alpha,
       OPENBLAS_CONST double  *a,
       OPENBLAS_CONST blasint lda,
       OPENBLAS_CONST double  *x,
       OPENBLAS_CONST blasint incx,
       OPENBLAS_CONST double *beta,
       double  *y,
       OPENBLAS_CONST blasint incy);
    */
    c alpha = 1.0;
    c beta = 0.0;
    cblas_zgemv(CblasColMajor,
                CblasNoTrans,
                A->n_rows,
                A->n_cols,
                (d*) &alpha,			/* alpha */
                (d*) A->data,		/* A */
                A->n_rows,		/* lda */
                (d*) x->data,		/*  */
                1,
                (d*) &beta,			/* beta */
                (d*) b->data,
                1);
    #else
    size_t i,j;
    size_t n_rows = A->n_rows;
    vc_set(b,0.0);
    iVARTRACE(20,A->n_cols);
    iVARTRACE(20,A->n_rows);
    for(j=0;j<A->n_cols;j++){
        for(i=0;i<A->n_rows;i++) {
            c* Aij = &A->data[i+j*n_rows];
            b->data[i] += *Aij * x->data[j];
        }
    }
    #endif
 }
 void kronecker_product(const cmat* a,const cmat* b,cmat* result){
    assert(result->n_rows == a->n_rows*b->n_rows);
    assert(result->n_cols == a->n_cols*b->n_cols);
    c a_rs;
    c b_vw;
    int r_col;
    int r_row;
    for(size_t r=0; r< a->n_rows;r++){
        for(size_t s=0; s <a->n_cols;s++) {
            for(size_t v=0;v < b->n_rows; v++) {
                for(size_t w=0;w < b->n_cols;w++) {
                    a_rs = *getcmatval(a,r,s);
                    b_vw = *getcmatval(b,v,w);
                    r_row = b->n_rows*r+v;
                    r_col = b->n_cols*s+w;
                    result->data[r_row + r_col * result->n_rows] = a_rs * b_vw;
                }
            }
        }
    }
 } /* void kronecker_product */
 /* #include <lapacke.h> */
 /* These functions can be directly linked to openBLAS */
 #define lapack_complex_double   double _Complex
 #define lapack_complex_float   float _Complex
 #define LAPACK_ROW_MAJOR               101
 #define LAPACK_COL_MAJOR               102
 #define LAPACK_WORK_MEMORY_ERROR       -1010
 #define LAPACK_TRANSPOSE_MEMORY_ERROR  -1011
 typedef int lapack_int;
 int LAPACKE_cgelss( int matrix_layout, int m, int n,
                    int nrhs, lapack_complex_float* a,
                    int lda, lapack_complex_float* b,
                    int ldb, float* s, float rcond,
                    int* rank );
 int LAPACKE_zgelss( int matrix_layout, int m, int n,
                    int nrhs, lapack_complex_double* a,
                    int lda, lapack_complex_double* b,
                    int ldb, double* s, double rcond,
                    int* rank );
 lapack_int LAPACKE_zgels( int matrix_layout, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_complex_double* b, lapack_int ldb );
 #if ASCEE_FLOAT == 64
 #define lapack_gelss LAPACKE_zgelss
 #define lapack_gels LAPACKE_zgels
 #else
 #define lapack_gelss LAPACKE_cgelss
 #endif
 #define max(a,b) ((a)>(b)?(a):(b))
 /* int lsq_solve(const cmat* A,const vc* b,vc* x){ */
 /*     POOL_INIT(lsq_solve_pool); */
 /*     int rv; */
 /*     /\* M: number of rows of matrix *\/ */
 /*     /\* N: Number of columns *\/ */
 /*     /\* Norm: L2|b-A*x| *\/ */
 /*     /\* NRHS: Number of right hand sides: Number of columns of matrix B *\/ */
 /*     assert(A->n_rows>=A->n_cols); */
 /*     assert(x->size == A->n_cols); */
 /*     assert(b->size == A->n_rows); */
 /*     int info; */
 /*     size_t lda = max(1,A->n_rows); */
 /*     size_t ldb = max(lda,A->n_cols); */
 /*     /\* Make explicit copy of matrix A data, as it will be overwritten */
 /*      * by lapack_gels *\/ */
 /*     c* A_data = Pool_allocatec(&lsq_solve_pool,A->n_rows*A->n_cols); */
 /*     c_copy(A_data,A->data,A->n_cols*A->n_rows); */
 /*     c* work_data = Pool_allocatec(&lsq_solve_pool,b->size); */
 /*     c_copy(work_data,b->data,b->size); */
 /*     /\* Lapack documentation says: *\/ */
 /*     /\* 	if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /*         squares solution vectors; the residual sum of squares for the */
 /*         solution in each column is given by the sum of squares of the */
 /*         modulus of elements N+1 to M in that column; */
 /*     *\/ */
 /*     /\* We always assume one RHS column *\/ */
 /*     const int nrhs = 1; */
 /*     /\* General Least Squares Solve *\/ */
 /*     info = lapack_gels(LAPACK_COL_MAJOR, /\* Column-major ordering *\/ */
 /*                        'N', */
 /*                        A->n_rows,	/\* Number of rows in matrix *\/ */
 /*                        A->n_cols,	/\* Number of columns *\/ */
 /*                        nrhs,   /\* nrhs, which is number_mics *\/ */
 /*                        A_data, /\* The A-matrix *\/ */
 /*                        lda,	  /\* lda: the leading dimension of matrix A *\/ */
 /*                        work_data,	  /\* The b-matrix *\/ */
 /*                        ldb);  /\* ldb: the leading dimension of b: max(1,M,N) *\/ */
 /*     if(info==0){ */
 /*         c_copy(x->data,work_data,x->size); */
 /*         rv = SUCCESS; */
 /*     } */
 /*     else { */
 /*         memset(x->data,0,x->size); */
 /*         WARN("LAPACK INFO VALUE"); */
 /*         printf("%i\n", info ); */
 /*         TRACE(15,"Solving least squares problem failed\n"); */
 /*         rv = FAILURE; */
 /*     } */
 /*     Pool_free(&lsq_solve_pool,A_data); */
 /*     Pool_free(&lsq_solve_pool,work_data); */
 /*     POOL_EXIT(lsq_solve_pool,15);     */
 /*     return rv; */
 /* } */
 /* d c_normdiff(const cmat* A,const cmat* B) { */
 /*     TRACE(15,"c_normdif"); */
 /*     dbgassert(A->n_cols==B->n_cols,"Number of columns of A and B " */
 /*               "should be equal"); */
 /*     dbgassert(A->n_rows==B->n_rows,"Number of rows of A and B " */
 /*               "should be equal"); */
 /*     size_t size = A->n_cols*A->n_rows; */
 /*     vc diff_temp = vc_al[MAX_MATRIX_SIZE]; */
 /*     c_copy(diff_temp,A->data,size); */
 /*     c alpha = -1.0; */
 /*     /\* This routine computes y <- alpha*x + beta*y *\/ */
 /*     /\* void cblas_zaxpy(OPENBLAS_CONST blasint n,  *\/ */
 /*     /\* 				 OPENBLAS_CONST double *alpha, *\/ */
 /*     /\* 				 OPENBLAS_CONST double *x, *\/ */
 /*     /\* 				 OPENBLAS_CONST blasint incx, *\/ */
 /*     /\* 				 double *y, *\/ */
 /*     /\* 				 OPENBLAS_CONST blasint incy); *\/ */
 /*     cblas_zaxpy(size, */
 /*                 (d*) &alpha, */
 /*                 (d*) B->data, */
 /*                 1, */
 /*                 (d*) diff_temp, */
 /*                 1 ); */
 /*     return c_norm(diff_temp,size); */
 /* } */
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_math.h
+++ b/beamforming/c/ascee_math.h
--- a/beamforming/c/ascee_math_raw.c
+++ b/beamforming/c/ascee_math_raw.c
@ -0,0 +1,146 @@
 // si_math.c
 //
 // last-edit-by: J.A. de Jong 
 // 
 // Description:
 // Operations working on raw arrays of floating point numbers
 //////////////////////////////////////////////////////////////////////
 #include "ascee_math_raw.h"
 #if ASCEE_USE_BLAS
 #include <cblas.h>
 #endif
 void d_elem_prod_d(d res[],
                   const d arr1[],
                   const d arr2[],
                   const us size) {
    #if ASCEE_USE_BLAS
    #if ASCEE_DEBUG
    if(arr1 == arr2) {
        DBGWARN("d_elem_prod_d: Array 1 and array 2 point to the same"
                " memory. This results in pointer aliasing, for which"
                " testing is still to be done. Results might be"
                " unrealiable.");
    }
    #endif
    #if ASCEE_DOUBLE_PRECISION
    #define elem_prod_fun cblas_dsbmv
    #else
    #define elem_prod_fun cblas_ssbmv
    #endif
    /* These parameters do not matter for this specific case */
    const CBLAS_ORDER  mat_order= CblasColMajor;
    const CBLAS_UPLO   uplo = CblasLower;
    /* Extra multiplication factor */
    const d alpha = 1.0;
    /* void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, */
    /*                  OPENBLAS_CONST enum CBLAS_UPLO Uplo, */
    /*                  OPENBLAS_CONST blasint N, */
    /*                  OPENBLAS_CONST blasint K, */
    /*                  OPENBLAS_CONST double alpha, */
    /*                  OPENBLAS_CONST double *A, */
    /*                  OPENBLAS_CONST blasint lda, */
    /*                  OPENBLAS_CONST double *X, */
    /*                  OPENBLAS_CONST blasint incX, */
    /*                  OPENBLAS_CONST double beta, */
    /*                  double *Y, */
    /*                  OPENBLAS_CONST blasint incY); */
    elem_prod_fun(mat_order,
                  uplo,
                  (blasint) size,
                  0,             // Just the diagonal; 0 super-diagonal bands
                  alpha,        /* Multiplication factor alpha */
                  arr1,
                  1,            /* LDA */
                  arr2,         /* x */
                  1, /* incX = 1 */
                  0.0,          /* Beta */
                  res,    /* The Y matrix to write to */
                  1); /* incY */
    #undef elem_prod_fun
    #else  /* No blas routines, routine is very simple, but here we
            * go! */
    DBGWARN("Performing slow non-blas vector-vector multiplication");
    for(us i=0;i<size;i++) {
        res[i] = arr1[i]*arr2[i];
    }
    #endif
 }
 void c_elem_prod_c(c res[],
                   const c arr1[],
                   const c arr2[],
                   const us size) {
    TRACE(15,"c_elem_prod_c");
    uVARTRACE(15,size);
    #if ASCEE_USE_BLAS
    #if ASCEE_DEBUG
    if(arr1 == arr2) {
        DBGWARN("c_elem_prod_c: Array 1 and array 2 point to the same"
                " memory. This results in pointer aliasing, for which"
                " testing is still to be done. Results might be"
                " unrealiable.");
    }
    #endif  /* ASCEE_DEBUG */
    #if ASCEE_DOUBLE_PRECISION
    #define elem_prod_fun cblas_zgbmv
    #else
    #define elem_prod_fun cblas_cgbmv
    #endif
    /* These parameters do not matter for this specific case */
    const CBLAS_ORDER  mat_order= CblasColMajor;
    const CBLAS_TRANSPOSE tr = CblasNoTrans;
    const c alpha = 1.0;
    const c beta = 0.0;
    TRACE(15,"Calling " annestr(elem_prod_fun));
    elem_prod_fun(mat_order,
                  tr,
                  (blasint) size, /* M: Number of rows */
                  (blasint) size, /* B: Number of columns */
                  0,              /* KL: Number of sub-diagonals */
                  0,              /* KU: Number of super-diagonals */
                  (d*) &alpha,        /* Multiplication factor */
                  (d*) arr2,          /* A */
                  1,            /* LDA */
                  (d*) arr1,    /* x */
                  1, /* incX = 1 */
                  (d*) &beta,
                  (d*) res,    /* The Y matrix to write to */
                  1); /* incY */
    #undef elem_prod_fun
    #else  /* No blas routines, routine is very simple, but here we
            * go! */
    DBGWARN("Performing slow non-blas vector-vector multiplication");
    for(us i=0;i<size;i++) {
        res[i] = arr1[i]*arr2[i];
    }
    #endif
 }
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_math_raw.h
+++ b/beamforming/c/ascee_math_raw.h
@ -0,0 +1,469 @@
 // ascee_math_raw.h
 //
 // Author: J.A. de Jong - ASCEE
 //
 // Description: Raw math routines working on raw arrays of floats and
 // complex numbers.
 //////////////////////////////////////////////////////////////////////
 #pragma once
 #ifndef ASCEE_MATH_RAW_H
 #define ASCEE_MATH_RAW_H
 #include "ascee_assert.h"
 #include "ascee_tracer.h"
 #include "types.h"
 #include <math.h>
 #if ASCEE_USE_BLAS == 1
 #include <cblas.h>
 #endif
 #ifdef ASCEE_DOUBLE_PRECISION
 #define c_real creal
 #define c_imag cimag
 #define d_abs fabs
 #define c_abs cabs
 #define c_conj conj
 #define d_atan2 atan2
 #define d_acos acos
 #define d_sqrt sqrt
 #define c_exp cexp
 #define d_sin sin
 #define d_cos cos
 #define d_pow pow
 #else  // ASCEE_DOUBLE_PRECISION not defined
 #define c_conj conjf
 #define c_real crealf
 #define c_imag cimagf
 #define d_abs fabsf
 #define c_abs cabsf
 #define d_atan2 atan2f
 #define d_acos acosf
 #define d_sqrt sqrtf
 #define c_exp cexpf
 #define d_sin sinf
 #define d_cos cosf
 #define d_pow powf
 #endif // ASCEE_DOUBLE_PRECISION
 #ifdef M_PI
 static const d number_pi = M_PI;
 #else
 static const d number_pi = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679;
 #endif
 /** 
 * Set all elements in an array equal to val
 *
 * @param to 
 * @param val 
 * @param size 
 */
 static inline void d_set(d to[],d val,us size) {
    for(us i=0;i<size;i++) {
        to[i]=val;
    }
 }
 /** 
 * Set all elements in an array equal to val
 *
 * @param to 
 * @param val 
 * @param size 
 */
 static inline void c_set(c to[],c val,us size) {
    for(us i=0;i<size;i++) {
        to[i]=val;
    }
 }
 /** 
 * Return the maximum of two doubles
 *
 * @param a value 1
 * @param b value 2
 * 
 * @returns the maximum of value 1 and 2
 */
 static inline d d_max(const d a,const d b) {
    return a>b?a:b;
 }
 /** 
 * Return the dot product of two arrays, one of them complex-valued,
 * the other real-valued
 *
 * @param a the complex-valued array
 * @param b the real-valued array
 * @param size the size of the arrays. *Should be equal-sized!*
 *
 * @return the dot product
 */
 static inline c cd_dot(const c a[],const d b[],us size){
    c result = 0;
    us i;
    for(i=0;i<size;i++){
        result+=a[i]*b[i];
    }
    return result;
 }
 /** 
 * Return the dot product of two complex-valued arrays. Wraps BLAS
 * when ASCEE_USE_BLAS == 1.
 *
 * @param a complex-valued array
 * @param b complex-valued array
 * @param size the size of the arrays. *Should be equal-sized!*
 *
 * @return the dot product
 */
 static inline c cc_dot(const c a[],const c b[],us size){
    #if ASCEE_USE_BLAS == 1
    WARN("CBlas zdotu not yet tested");
    #if ASCEE_DOUBLE_PRECISION
    // assert(0);
    return cblas_zdotu(size,(d*) a,1,(d*) b,1);
    #else
    return cblas_cdotu(size,(d*) a,1,(d*) b,1);
    #endif
    #else
    c result = 0;
    us i;
    for(i=0;i<size;i++){
        result+=a[i]*b[i];
    }
    return result;
    #endif
 }
 /** 
 * Compute the dot product of two real arrays.
 *
 * @param a First array.
 * @param b Second array.
 * @param size Size of the arrays.
 * @return The result.
 */
 static inline d d_dot(const d a[],const d b[],const us size){
    #if ASCEE_USE_BLAS == 1
    #if ASCEE_DOUBLE_PRECISION
    return cblas_ddot(size,a,1,b,1);
    #else  // Single precision function
    return cblas_sdot(size,a,1,b,1);
    #endif
    #else  // No BLAS, do it manually
    d result = 0;
    us i;
    for(i=0;i<size;i++){
        result+=a[i]*b[i];
    }
    return result;
    #endif
 }
 /** 
 * Copy array of floats.
 *
 * @param to : Array to write to
 * @param from : Array to read from
 * @param size : Size of arrays
 */
 static inline void d_copy(d to[],const d from[],const us size){
    #if ASCEE_USE_BLAS == 1
    cblas_dcopy(size,from,1,to,1);
    #else
    us i;
    for(i=0;i<size;i++)
        to[i] = from[i];
    #endif
 }
 /** 
 * Copy array of floats to array of complex floats. Imaginary part set
 * to zero.
 *
 * @param to : Array to write to
 * @param from : Array to read from
 * @param size : Size of arrays
 */
 static inline void cd_copy(c to[],const d from[],const us size) {
    us i;
    for(i=0;i<size;i++) {
        to[i] = (c) (from[i]);
        dbgassert(cimag(to[i]) == 0,"Imaginary part not equal to zero");
    }
 }
 /** 
 * Copy float vector to complex vector. Imaginary part set
 * to zero.
 *
 * @param to : Vector to write to
 * @param from : Vector to read from
 */
 static inline void c_copy(c to[],const c from[],us size){
    #if ASCEE_USE_BLAS == 1
    #if ASCEE_DOUBLE_PRECISION
    cblas_zcopy(size,(d*) from,1,(d*) to,1);
    #else
    cblas_ccopy(size,(d*) from,1,(d*) to,1);
    #endif
    #else
    us i;
    for(i=0;i<size;i++)
        to[i] = from[i];
    #endif
 }
 /** 
 * Multiply y with fac, and add result to x
 *
 * @param x Array to add to
 * @param y Array to add to x
 * @param fac Factor with which to multiply y
 * @param size Size of the arrays
 */
 static inline void d_add_to(d x[],const d y[],d fac,us size){
    #if ASCEE_USE_BLAS == 1
    #if ASCEE_DOUBLE_PRECISION
    cblas_daxpy(size,fac,y,1,x,1);
    #else
    cblas_saxpy(size,fac,y,1,x,1);
    #endif
    #else
    us i;
    for(i=0;i<size;i++)
        x[i]+=fac*y[i];
    #endif
 }
 /** 
 * x = x + fac*y
 *
 * @param x Array to add to
 * @param y Array to add to x
 * @param fac Factor with which to multiply y
 * @param size Size of the arrays
 */
 static inline void c_add_to(c x[],const c y[],c fac,us size){
    #if ASCEE_USE_BLAS == 1
    #if ASCEE_DOUBLE_PRECISION
    cblas_zaxpy(size,(d*) &fac,(d*) y,1,(d*) x,1);
    #else
    cblas_caxpy(size,(d*) &fac,(d*) y,1,(d*) x,1);
    #endif
    #else
    us i;
    for(i=0;i<size;i++)
        x[i]+=fac*y[i];
    #endif
 }
 /** 
 * Scale an array of doubles
 *
 * @param a array
 * @param scale_fac scale factor
 * @param size size of the array
 */
 static inline void d_scale(d a[],const d scale_fac,us size){
    #if ASCEE_USE_BLAS == 1
    #if ASCEE_DOUBLE_PRECISION    
    cblas_dscal(size,scale_fac,a,1);
    #else
    cblas_sscal(size,scale_fac,a,1);
    #endif
    #else
    us i;
    for(i=0;i<size;i++)
        a[i]*=scale_fac;
    #endif
 }
 /** 
 * Scale an array of complex floats
 *
 * @param a array
 * @param scale_fac scale factor
 * @param size size of the array
 */
 static inline void c_scale(c a[],const c scale_fac,us size){
    #if ASCEE_USE_BLAS == 1
    // Complex argument should be given in as array of two double
    // values. The first the real part, the second the imaginary
    // part. Fortunately the (c) type stores the two values in this
    // order. To be portable and absolutely sure anything goes well,
    // we convert it explicitly here.
    d scale_fac_d [] = {creal(scale_fac),cimag(scale_fac)};
    #if ASCEE_DOUBLE_PRECISION
    cblas_zscal(size,scale_fac_d,(d*) a,1);
    #else
    cblas_cscal(size,scale_fac_d,(d*) a,1);
    #endif
    #else
    us i;
    for(i=0;i<size;i++)
        a[i]*=scale_fac;
    #endif
 }
 /** 
 * Compute the maximum value of an array
 *
 * @param a array
 * @param size size of the array
 * @return maximum
 */
 static inline d darray_max(const d a[],us size){
    us i;
    d max = a[0];
    for(i=1;i<size;i++){
        if(a[i] > max) max=a[i];
    }
    return max;
 }
 /** 
 * Compute the minimum of an array
 *
 * @param a array
 * @param size size of the array
 * @return minimum
 */
 static inline d d_min(const d a[],us size){
    us i;
    d min = a[0];
    for(i=1;i<size;i++){
        if(a[i] > min) min=a[i];
    }
    return min;
 }
 /** 
 * Compute the \f$ L_2 \f$ norm of an array of doubles
 *
 * @param a Array
 * @param size Size of array
 */
 static inline d d_norm(const d a[],us size){
    #if ASCEE_USE_BLAS == 1
    return cblas_dnrm2(size,a,1);
    #else	
    d norm = 0;
    us i;
    for(i=0;i<size;i++){
        norm+=a[i]*a[i];
    }
    norm = d_sqrt(norm);
    return norm;
    #endif
 }
 /** 
 * Compute the \f$ L_2 \f$ norm of an array of complex floats
 *
 * @param a Array
 * @param size Size of array
 */
 static inline d c_norm(const c a[],us size){
    #if ASCEE_USE_BLAS == 1
    return cblas_dznrm2(size,(d*) a,1);
    #else	
    d norm = 0;
    us i;
    for(i=0;i<size;i++){
        d absa = c_abs(a[i]);
        norm+=absa*absa;
    }
    norm = d_sqrt(norm);
    return norm;
    #endif
 }
 /** 
 * Computes the element-wise vector product, or Hadamard product of
 * arr1 and arr2
 *
 * @param res Where the result will be stored
 * @param arr1 Array 1
 * @param vec2 Array 2
 * @param size: Size of the arrays
 */
 void d_elem_prod_d(d res[],
                   const d arr1[],
                   const d arr2[],
                   const us size);
 /** 
 * Computes the element-wise vector product, or Hadamard product of
 * arr1 and arr2 for complex floats
 *
 * @param res Where the result will be stored
 * @param arr1 Array 1
 * @param vec2 Array 2
 * @param size: Size of the arrays
 */
 void c_elem_prod_c(c res[],
                   const c arr1[],
                   const c arr2[],
                   const us size);
 /** 
 * Compute the complex conjugate of a complex vector and store the
 * result.
 *
 * @param res Result vector
 * @param in Input vector
 * @param size Size of the vector
 */
 static inline void c_conj_c(c res[],const c in[],const us size) {
    // First set the result vector to zero
    c_set(res,0,size);
    #ifdef ASCEE_USE_BLAS
    #if ASCEE_DOUBLE_PRECISION
    // Cast as a float, scale all odd elements with minus one to find
    // the complex conjugate.
    cblas_daxpy(size ,1,(d*) in,2,(d*) res,2);
    cblas_daxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
    #else
    cblas_faxpy(size ,1,(d*) in,2,(d*) res,2);
    cblas_faxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
    #endif  // ASCEE_DOUBLE_PRECISION
    #else
    for(us i=0;i<size;i++) {
        res[i] = c_conj(in[i]);
    }
    #endif  // ASCEE_USE_BLAS
 }
 /** 
 * In place complex conjugation
 *
 * @param res Result vector
 * @param size Size of the vector
 */
 static inline void c_conj_inplace(c res[],us size) {
    #ifdef ASCEE_USE_BLAS
    #if ASCEE_DOUBLE_PRECISION
    // Cast as a float, scale all odd elements with minus one to find
    // the complex conjugate.
    cblas_dscal(size,-1,&((d*) res)[1],2);
    #else
    cblas_sscal(size,-1,&((d*) res)[1],2);
    #endif  // ASCEE_DOUBLE_PRECISION
    #else
    for(us i=0;i<size;i++) {
        res[i] = c_conj(res[i]);
    }
    #endif  // ASCEE_USE_BLAS
 }
 #endif // ASCEE_MATH_RAW_H
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ascee_tracer.c
+++ b/beamforming/c/ascee_tracer.c
@ -7,7 +7,7 @@
 //////////////////////////////////////////////////////////////////////
 #if TRACER == 1
 #include <stdio.h>
-#include "tracer.h"
+#include "ascee_tracer.h"
 #include "types.h"
 #ifdef _REENTRANT
--- a/beamforming/c/ascee_tracer.h
+++ b/beamforming/c/ascee_tracer.h
@ -1,4 +1,4 @@
-// tracer.h
+// ascee_tracer.h
 //
 // Author: J.A. de Jong - ASCEE
 //
@ -6,8 +6,8 @@
 // Basic tracing code for debugging.
 //////////////////////////////////////////////////////////////////////
 #pragma once
-#ifndef TRACER_H
+#ifndef ASCEE_TRACER_H
-#define TRACER_H
+#define ASCEE_TRACER_H
 // Some console colors
 #define RESET   "\033[0m"
@ -214,6 +214,5 @@ void uvartrace_impl(const char* pos,int line,const char* varname,size_t var);
 #endif	// ######################################## TRACER ==1
-
+#endif // ASCEE_TRACER_H
 #endif // TRACER_H
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/fft.c
+++ b/beamforming/c/fft.c
@ -6,24 +6,20 @@
 // 
 //////////////////////////////////////////////////////////////////////
 #define TRACERPLUS (-5)
-#include "tracer.h"
+#include "ascee_tracer.h"
 #include "fft.h"
 #include "types.h"
 /* #include "kiss_fftr.h" */
 #include "fftpack.h"
 typedef struct Fft_s {
    us nfft;
    us nchannels;
    Fftr* fftr;
 } Fft;
 Fft* Fft_alloc(const us nfft,const us nchannels) {
    fsTRACE(15);
    #ifdef ASCEE_DEBUG
    if(nchannels > ASCEE_MAX_NUM_CHANNELS) {
        WARN("Too high number of channels! Please increase the "
             "ASCEE_MAX_NUM_CHANNELS compilation flag");
@ -41,10 +37,10 @@ Fft* Fft_alloc(const us nfft,const us nchannels) {
    fft->fftr = Fftr_alloc(nfft);
    if(!fft->fftr) {
        a_free(fft);
        WARN(ALLOCFAILED "fftr");
        return NULL;
    }
    #endif // ASCEE_PARALLEL
    feTRACE(15); 
    return fft;
@ -57,10 +53,9 @@ void Fft_free(Fft* fft) {
 }
 us Fft_nchannels(const Fft* fft) {return fft->nchannels;}
 us Fft_nfft(const Fft* fft) {return fft->nfft;}
 void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) {
    fsTRACE(15);
-
+    dbgassert(timedata && result,NULLPTRDEREF);
    dbgassert(timedata->n_rows == fft->nfft,"Invalid size for time data rows."
        " Should be equal to nfft");
    dbgassert(timedata->n_cols == fft->nchannels,"Invalid size for time data cols."
--- a/beamforming/c/mq.c
+++ b/beamforming/c/mq.c
@ -8,7 +8,7 @@
 //////////////////////////////////////////////////////////////////////
 #define TRACERPLUS (-6)
 #include "types.h"
-#include "tracer.h"
+#include "ascee_tracer.h"
 #include "ascee_assert.h"
 #include "ascee_alloc.h"
 #include "mq.h"
@ -257,8 +257,6 @@ void* JobQueue_assign(JobQueue* jq) {
    pid_t tid = syscall(SYS_gettid);    
    iVARTRACE(16,tid);    
    #endif // __linux__
    #endif //  ASCEE_DEBUG
--- a/beamforming/c/ps.c
+++ b/beamforming/c/ps.c
@ -5,11 +5,11 @@
 // Description:
 //
 //////////////////////////////////////////////////////////////////////
-#define TRACERPLUS 1000
+/* #define TRACERPLUS 1000 */
 #include "ps.h"
 #include "fft.h"
 #include "ascee_alloc.h"
-#include "ascee_math.h"
+#include "ascee_alg.h"
 #include "ascee_assert.h"
 typedef struct PowerSpectra_s {
@ -27,7 +27,7 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
                                 const us nchannels,
                                 const WindowType wt) {
-    TRACE(15,"PowerSpectra_alloc");
+    fsTRACE(15);
    int rv;
    /* Check nfft */
@ -61,11 +61,12 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
    if(rv!=0) {
        WARN("Error creating window function, continuing anyway");
    }
    feTRACE(15);
    return ps;
 }
 void PowerSpectra_free(PowerSpectra* ps) {
-    TRACE(15,"PowerSpectra_free");
+    fsTRACE(15);
    Fft_free(ps->fft);
    vd_free(&ps->window);
@ -73,14 +74,18 @@ void PowerSpectra_free(PowerSpectra* ps) {
    dmat_free(&ps->timedata_work);
    vc_free(&ps->j_vec_conj);
    a_free(ps);
    feTRACE(15);
 }
-int PowerSpectra_compute(const PowerSpectra* ps,
+void PowerSpectra_compute(const PowerSpectra* ps,
                         const dmat * timedata,
                         cmat * result) {
-    TRACE(15,"PowerSpectra_compute");
+    fsTRACE(15);
    dbgassert(ps && timedata && result,NULLPTRDEREF);
    const us nchannels = Fft_nchannels(ps->fft);
    const us nfft = Fft_nfft(ps->fft);
@ -125,10 +130,12 @@ int PowerSpectra_compute(const PowerSpectra* ps,
            &timedata_work,
            &fft_work);
    TRACE(15,"fft done");
    /* Scale fft such that power is easily comxputed */
    const c scale_fac = d_sqrt(2/win_pow)/nfft;
-    c_scale(fft_work.data,scale_fac,(nfft/2+1)*nchannels);
+    cmat_scale(&fft_work,scale_fac);
    TRACE(15,"scale done");
    for(i=0;i< nchannels;i++) {
        /* Multiply DC term with 1/sqrt(2) */
@ -139,166 +146,34 @@ int PowerSpectra_compute(const PowerSpectra* ps,
    }
    /* print_cmat(&fft_work); */
    TRACE(15,"Nyquist and DC correction done");
-    c* j_vec_conj = ps->j_vec_conj.data;
+    vc j_vec_conj = ps->j_vec_conj;
    /* Compute Cross-power spectra and store result */
    for(i =0; i<nchannels;i++) {
        for(j=0;j<nchannels;j++) {
            iVARTRACE(15,i);
            iVARTRACE(15,j);
            /* The indices here are important. This is also how it
             * is documented */
-            c* res = getcmatval(result,0,i+j*nchannels);
+            vc res = cmat_column(result,i+j*nchannels);
-
+            TRACE(15,"SFSG");
-            c* i_vec = getcmatval(&fft_work,0,i);
+            vc i_vec = cmat_column(&fft_work,i);
-            c* j_vec = getcmatval(&fft_work,0,j);
+            vc j_vec = cmat_column(&fft_work,j);
-
+            TRACE(15,"SFSG");
            /* Compute the conjugate of spectra j */
-            c_conj_c(j_vec_conj,j_vec,nfft/2+1);
+            vc_conj_vc(&j_vec_conj,&j_vec);
-
+            TRACE(15,"SFSG");
-            /* Compute the product of the two vectors and store the
+            /* Compute the element-wise product of the two vectors and
-             * result as the result */
+             * store the result as the result */
-            c_elem_prod_c(res,i_vec,j_vec_conj,nfft/2+1);
+            vc_elem_prod(&res,&i_vec,&j_vec_conj);
-
+            TRACE(15,"SFSG");
        }
    }
-
+    feTRACE(15);
    return SUCCESS;
 }
 /* typedef struct AvPowerSpectra_s { */
 /*     us overlap_offset; */
 /*     us naverages;               /\* The number of averages taken *\/ */
 /*     dmat prev_timedata;         /\**< Storage for previous */
 /*                                  * timedata buffer *\/ */
 /*     vc* ps;           /\**< Here we store the averaged */
 /*                        * results for each Cross-power */
 /*                        * spectra. These are */
 /*                        * nchannels*nchannels vectors *\/ */
 /*     vc* ps_work;         /\**< Work area for the results, also */
 /*                           * nchannels*nchannels *\/ */
 /* } AvPowerSpectra; */
 /* AvPowerSpectra* AvPowerSpectra_alloc(const us nfft, */
 /*                                      const us nchannels, */
 /*                                      const d overlap_percentage) { */
 /*     TRACE(15,"AvPowerSpectra_alloc"); */
 /*     int rv; */
 /*     /\* Check nfft *\/ */
 /*     if(nfft % 2 != 0) { */
 /*         WARN("nfft should be even"); */
 /*         return NULL; */
 /*     } */
 /*     /\* Check overlap percentage *\/ */
 /*     us overlap_offset = nfft - (us) overlap_percentage*nfft/100; */
 /*     if(overlap_offset == 0 || overlap_offset > nfft) { */
 /*         WARN("Overlap percentage results in offset of 0, or a too high number of */
 /* overlap" */
 /*              " samples. Number of overlap samples should be < nfft"); */
 /*         WARN("Illegal overlap percentage. Should be 0 <= %% < 100"); */
 /*         return NULL; */
 /*     } */
 /*     /\* ALlocate space *\/ */
 /*     Fft fft; */
 /*     rv = Fft_alloc(&fft,nfft,nchannels); */
 /*     if(rv != SUCCESS) { */
 /*         WARN("Fft allocation failed"); */
 /*         return NULL; */
 /*     } */
 /*     AvPowerSpectra* aps = a_malloc(sizeof(AvPowerSpectra)); */
 /*     if(!aps) { */
 /*         WARN("Allocation of AvPowerSpectra memory failed"); */
 /*         return NULL; */
 /*     } */
 /*     ps->naverages = 0; */
 /*     ps->overlap_offset = overlap_offset; */
 /*     /\* Allocate vectors and matrices *\/ */
 /*     ps->prev_timedata = dmat_alloc(nfft,nchannels); */
 /*     return ps; */
 /* } */
 /* us AvPowerSpectra_getAverages(const AvPowerSpectra* ps) { */
 /*     return ps->naverages; */
 /* } */
 /* /\**  */
 /*  * Compute single power spectra by  */
 /*  * */
 /*  * @param ps Initialized AvPowerSpectra structure */
 /*  * @param timedata Timedata to compute for */
 /*  * @param result Result */
 /*  * */
 /*  * @return  */
 /*  *\/ */
 /* static int AvPowerSpectra_computeSingle(const AvPowerSpectra* ps, */
 /*                                       const dmat* timedata, */
 /*                                       vc* results) { */
 /*     us nchannels = ps->fft.nchannels; */
 /*     for(us channel=0;channel<ps;channel++) { */
 /*     } */
 /*     return SUCCESS; */
 /* } */
 /* int AvPowerSpectra_addTimeData(AvPowerSpectra* ps, */
 /*                              const dmat* timedata) { */
 /*     TRACE(15,"AvPowerSpectra_addTimeData"); */
 /*     dbgassert(ps,"Null pointer given for ps"); */
 /*     const us nchannels = ps->fft.nchannels; */
 /*     const us nfft = ps->fft.nfft; */
 /*     dbgassert(timedata->n_cols == nchannels,"Invalid time data"); */
 /*     dbgassert(timedata->n_rows == nfft,"Invalid time data"); */
 /*     dmat prevt = ps->prev_timedata; */
 /*     us noverlap = ps->noverlap; */
 /*     if(ps->naverages != 0) { */
 /*         /\* Copy the overlap buffer to the tbuf *\/ */
 /*         copy_dmat_rows(&tbuf,&overlap,0,0,noverlap); */
 /*         /\* Copy the new timedata buffer to the tbuf *\/ */
 /*         copy_dmat_rows(&tbuf,timedata,0,noverlap,(nfft-noverlap)); */
 /*     } */
 /*     else { */
 /*         /\* Copy the overlap buffer to the tbuf *\/ */
 /*         copy_dmat_rows(&tbuf,&overlap,0,0,noverlap); */
 /*     } */
 /*     return SUCCESS; */
 /* } */
 /* void AvPowerSpectra_free(AvPowerSpectra* ps) { */
 /*     TRACE(15,"AvPowerSpectra_free"); */
 /*     Fft_free(&ps->fft); */
 /*     dmat_free(&ps->prev_timedata); */
 /*     vd_free(&ps->window);     */
 /*     a_free(ps); */
 /* } */
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/ps.h
+++ b/beamforming/c/ps.h
@ -9,59 +9,54 @@
 #ifndef PS_H
 #define PS_H
 #include "window.h"
 struct PowerSpectra_s;
 struct AvPowerSpectra_s;
 typedef struct PowerSpectra_s PowerSpectra;
 typedef struct AvPowerSpectra_s AvPowerSpectra;
 /** 
 * Allocate a PowerSpectra computer
 *
 * @param nfft fft length
 * @param nchannels Number of channels
 * @param wt Windowtype, as defined in window.h
 *
 * @return PowerSpectra handle, NULL on error
 */
 PowerSpectra* PowerSpectra_alloc(const us nfft,
                                 const us nchannels,
                                 const WindowType wt);
 /** 
- * Compute power spectra, returns to a complex array
+ * Compute power spectra.
 *
 * @param ps[in] PowerSpectra handle
 * @param[in] timedata Time data. Should have size nfft*nchannels
 *
 * @param[out] result Here, the result will be stored. Should have
 * size (nfft/2+1)*nchannels^2, such that the Cij at frequency index f
 * can be obtained as result[f,i+j*nchannels]
 *
 * @param[in] timedata Time data, should be of size nfft*nchannels
 * @param [out] Cross-power spectra, array should be of size
 * (nfft/2+1) x (nchannels*nchannels), such that the cross spectra of
 * channel i with channel j at can be found as
 * getvcval(0,i+j*nchannels).
 * @return status code, SUCCESS on succes.
 */
-int PowerSpectra_compute(const PowerSpectra*,
+void PowerSpectra_compute(const PowerSpectra* ps,
                         const dmat* timedata,
                         cmat *result);
 /** 
- * Free storage of the PowerSpectra
+ * Return nfft 
 */
 void PowerSpectra_free(PowerSpectra*);
 // AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
 //                                      const us nchannels,
 //                                      const d overlap_percentage,
 //                                      const WindowType wt);
 /** 
 * Return the current number of averages taken to obtain the result.
 *
- * @return The number of averages taken.
+ * @param ps[in] PowerSpectra handle
 *
 * @return nfft
 */
-us AvPowerSpectra_getAverages(const AvPowerSpectra*);
+us PowerSpectra_getnfft(const PowerSpectra* ps);
 int AvPowerSpectra_addTimeData(AvPowerSpectra*,
                             const dmat* timedata);
 /** 
- * Free storage of the AvPowerSpectra
+ * Free PowerSpectra
 *
 * @param[in] ps PowerSpectra handle
 */
-void AvPowerSpectra_free(AvPowerSpectra*);
+void PowerSpectra_free(PowerSpectra* ps);
 #endif // PS_H
 //////////////////////////////////////////////////////////////////////
--- a/beamforming/c/types.h
+++ b/beamforming/c/types.h
@ -10,6 +10,15 @@
 #ifndef TYPES_H
 #define TYPES_H
 // Branch prediction performance improvement
 #if !defined(likely) && defined(__GNUC__) && !defined(ASCEE_DEBUG)
 #define likely(x)       __builtin_expect(!!(x), 1)
 #define unlikely(x)     __builtin_expect(!!(x), 0)
 #else
 #define likely(x)       (x)
 #define unlikely(x)     (x)
 #endif
 /// We often use boolean values
 #include <stdbool.h>            // true, false
 #include <stddef.h>
--- a/beamforming/c/worker.c
+++ b/beamforming/c/worker.c
@ -11,7 +11,7 @@
 #include "ascee_alloc.h"
 #include <pthread.h>
 #include "ascee_assert.h"
-#include "tracer.h"
+#include "ascee_tracer.h"
 typedef struct Workers_s {
    JobQueue* jq;
--- a/beamforming/config.pxi.in
+++ b/beamforming/config.pxi.in
@ -18,25 +18,26 @@ ELSE:
 ctypedef size_t us
-cdef extern from "math.h":
+cdef extern from "ascee_tracer.h":
    void setTracerLevel(int)
 cdef extern from "ascee_math.h":
    ctypedef struct dmat:
        d* data
        d** col_ptrs
        us n_cols,n_rows
    ctypedef struct cmat:
        c* data
        c** col_ptrs
        us n_cols,n_rows
-# cdef np.ndarray empty(us nrows,us ncols,dtype):
+    dmat dmat_foreign(us n_rows,
-cdef dmat dmat_from_array(d[::1,:] arr):
+                      us n_cols,
-    cdef dmat result
+                      d* data)
-    result.data = &arr[0,0]
+    cmat cmat_foreign(us n_rows,
-    result.n_rows = arr.shape[0]
+                      us n_cols,
-    result.n_cols = arr.shape[1]
+                      c* data)
    return result
-cdef cmat cmat_from_array(c[::1,:] arr):
+    void dmat_free(dmat*)
-    cdef cmat result
+    void cmat_free(cmat*)
-    result.data = &arr[0,0]
+    void cmat_copy(cmat* to,cmat* from_)
    result.n_rows = arr.shape[0]
    result.n_cols = arr.shape[1]
    return result
--- a/beamforming/fft.pyx
+++ b/beamforming/fft.pyx
@ -1,5 +1,7 @@
 include "config.pxi"
 # setTracerLevel(0)
 cdef extern from "fft.h":
    ctypedef struct c_Fft "Fft"
    c_Fft* Fft_alloc(us nfft,us nchannels)
@ -32,11 +34,19 @@ cdef class Fft:
                           nchannels),
                           dtype=NUMPY_COMPLEX_TYPE,order='F')
        # result[:,:] = np.nan+1j*np.nan
        cdef c[::1,:] result_view = result
        cdef cmat r = cmat_foreign(result.shape[0],
                                   result.shape[1],
                                   &result_view[0,0])
        cdef dmat t = dmat_foreign(timedata.shape[0],
                                   timedata.shape[1],
                                   &timedata[0,0])
        cdef cmat r = cmat_from_array(result)
        cdef dmat t = dmat_from_array(timedata)
        Fft_fft(self._fft,&t,&r)
        dmat_free(&t)
        cmat_free(&r)
        return result
@ -52,44 +62,137 @@ cdef extern from "ps.h":
    c_PowerSpectra* PowerSpectra_alloc(const us nfft,
                                       const us nchannels,
                                       const WindowType wt)
-    int PowerSpectra_compute(const c_PowerSpectra* ps,
+    void PowerSpectra_compute(const c_PowerSpectra* ps,
                             const dmat * timedata,
                             cmat * result)
    void PowerSpectra_free(c_PowerSpectra*)
-# cdef class PowerSpectra:
+cdef class PowerSpectra:
-#     cdef:
+    cdef:
-#         c_PowerSpectra* _ps
+        c_PowerSpectra* _ps
-#     def __cinit__(self, us nfft,us nchannels):
+    def __cinit__(self, us nfft,us nchannels):
-#         self._ps = PowerSpectra_alloc(nfft,nchannels,Rectangular)
+        self._ps = PowerSpectra_alloc(nfft,nchannels,Rectangular)
-#         if self._ps == NULL:
+        if self._ps == NULL:
-#             raise RuntimeError('PowerSpectra allocation failed')
+            raise RuntimeError('PowerSpectra allocation failed')
-#     def compute(self,d[::1,:] timedata):
+    def compute(self,d[::1,:] timedata):
-#         cdef:
+        cdef:
-#             us nchannels = timedata.shape[1]
+            us nchannels = timedata.shape[1]
-#             us nfft = timedata.shape[0]
+            us nfft = timedata.shape[0]
-#             int rv
+            int rv
-#             dmat td
+            dmat td
-#             cmat result_mat
+            cmat result_mat
-#         td.data = &timedata[0,0]
+        td = dmat_foreign(nfft,
-#         td.n_rows = nfft
+                          nchannels,
-#         td.n_cols = nchannels
+                          &timedata[0,0])
 #         result = np.empty((nfft//2+1,nchannels*nchannels),
 #                           dtype = NUMPY_COMPLEX_TYPE,
 #                           order='F')
 #         result_mat = cmat_from_array(result)
-#         rv = PowerSpectra_compute(self._ps,&td,&result_mat)
+        # The array here is created in such a way that the strides
-#         if rv !=0:
+        # increase with increasing dimension. This is required for
-#             raise RuntimeError("Error computing power spectra")
+        # interoperability with the C-code, that stores all
        # cross-spectra in a 2D matrix, where the first axis is the
        # frequency axis, and the second axis corresponds to a certain
        # cross-spectrum, as C_ij(f) = result[freq,i+j*nchannels]
-#         return result
+        result = np.empty((nfft//2+1,nchannels,nchannels),
                          dtype = NUMPY_COMPLEX_TYPE,
                          order='F')
        cdef c[::1,:,:] result_view = result
        result_mat = cmat_foreign(nfft//2+1,
                                  nchannels*nchannels,
                                  &result_view[0,0,0])
        PowerSpectra_compute(self._ps,&td,&result_mat)
        dmat_free(&td)
        cmat_free(&result_mat)
        return result
-#     def __dealloc__(self):
+    def __dealloc__(self):
-#         if self._ps != NULL:
+        if self._ps != NULL:
-#             PowerSpectra_free(self._ps)
+            PowerSpectra_free(self._ps)
 cdef extern from "aps.h":
    ctypedef struct c_AvPowerSpectra "AvPowerSpectra"
    c_AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
                                           const us nchannels,
                                           d overlap_percentage,
                                           const WindowType wt)
    cmat* AvPowerSpectra_addTimeData(const c_AvPowerSpectra* ps,
                                     const dmat * timedata)
    void AvPowerSpectra_free(c_AvPowerSpectra*)
 cdef class AvPowerSpectra:
    cdef:
        c_AvPowerSpectra* aps
        us nfft, nchannels
    def __cinit__(self,us nfft,us nchannels,d overlap_percentage):
        self.aps = AvPowerSpectra_alloc(nfft,
                                        nchannels,
                                        overlap_percentage,
                                        Rectangular)
        self.nchannels = nchannels
        self.nfft = nfft
        if self.aps == NULL:
            raise RuntimeError('AvPowerSpectra allocation failed')
    def __dealloc__(self):
        if self.aps:
            AvPowerSpectra_free(self.aps)
    def addTimeData(self,d[::1,:] timedata):
        """!
        Adds time data, returns current result
        """
        cdef:
            us nsamples = timedata.shape[0]
            us nchannels = timedata.shape[1]
            dmat td
            cmat* result_ptr
        if nsamples < self.nfft:
            raise RuntimeError('Number of samples should be > nfft')
        if nchannels != self.nchannels:
            raise RuntimeError('Invalid number of channels')            
        td = dmat_foreign(nsamples,
                          nchannels,
                          &timedata[0,0])
        result_ptr = AvPowerSpectra_addTimeData(self.aps,
                                                &td)
        # The array here is created in such a way that the strides
        # increase with increasing dimension. This is required for
        # interoperability with the C-code, that stores all
        # cross-spectra in a 2D matrix, where the first axis is the
        # frequency axis, and the second axis corresponds to a certain
        # cross-spectrum, as C_ij(f) = result[freq,i+j*nchannels]
        result = np.empty((self.nfft//2+1,nchannels,nchannels),
                          dtype = NUMPY_COMPLEX_TYPE,
                          order='F')
        cdef c[::1,:,:] result_view = result
        cdef cmat res = cmat_foreign(self.nfft//2+1,
                                     nchannels*nchannels,
                                     &result_view[0,0,0])
        # Copy result
        cmat_copy(&res,result_ptr)
        cmat_free(&res)
        dmat_free(&td)
        return result
--- a/fftpack/fftpack.h
+++ b/fftpack/fftpack.h
@ -8,7 +8,7 @@
 #pragma once
 #ifndef FFTPACK_H
 #define FFTPACK_H
-#include "tracer.h"
+#include "ascee_tracer.h"
 #include "ascee_alloc.h"
 #include "npy_fftpack.h"
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -2,6 +2,8 @@ include_directories(${CMAKE_SOURCE_DIR}/beamforming/c)
 add_executable(test_bf test_bf.c)
 add_executable(test_workers test_workers.c)
 add_executable(test_fft test_fft.c)
 add_executable(test_math test_math.c)
 target_link_libraries(test_bf beamforming_lib)
 target_link_libraries(test_fft beamforming_lib pthread)
 target_link_libraries(test_workers beamforming_lib pthread)
 target_link_libraries(test_math beamforming_lib pthread efence)
--- a/test/ps_test.py
+++ b/test/ps_test.py
@ -8,10 +8,10 @@ Created on Mon Jan 15 19:45:33 2018
 import numpy as np
 from beamforming.fft import Fft, PowerSpectra
-nfft=2**10
+nfft=2**4
 print('nfft:',nfft)
-print(nfft)
+#print(nfft)
-nchannels = 8
+nchannels = 2
 t = np.linspace(0,1,nfft+1)
 # print(t)
--- a/test/test_fft.c
+++ b/test/test_fft.c
@ -7,11 +7,13 @@
 //////////////////////////////////////////////////////////////////////
 #include "fft.h"
-#include "tracer.h"
+#include "ascee_tracer.h"
 int main() {
    setTracerLevel(0);
    iVARTRACE(15,getTracerLevel());
    Fft* fft = Fft_alloc(100000,8);
--- a/test/test_math.c
+++ b/test/test_math.c
@ -0,0 +1,36 @@
 // test_bf.c
 //
 // Author: J.A. de Jong -ASCEE
 // 
 // Description:
 //
 //////////////////////////////////////////////////////////////////////
 #include "fft.h"
 #include "ascee_math.h"
 #include "ascee_tracer.h"
 int main() {
    iVARTRACE(15,getTracerLevel());
    vc a = vc_alloc(5);
    vc_set(&a,2+3*I);
    c_conj_inplace(a.data,a.size);
    print_vc(&a);
    vc b = vc_alloc(5);    
    c_conj_c(b.data,a.data,5);
    print_vc(&b);    
    vc_free(&a);
    vc_free(&b);    
    return 0;
 }
 //////////////////////////////////////////////////////////////////////
--- a/test/test_workers.c
+++ b/test/test_workers.c
@ -7,8 +7,8 @@
 //////////////////////////////////////////////////////////////////////
 #include "worker.h"
 #include "mq.h"
-#include "tracer.h"
+#include "ascee_tracer.h"
-
+#include <unistd.h>
 static void* walloc(void*);
 static int worker(void*,void*);
 static void wfree(void*);
@ -20,7 +20,7 @@ int main() {
    iVARTRACE(15,getTracerLevel());
-    int njobs = 4;
+    us njobs = 4;
    JobQueue* jq = JobQueue_alloc(njobs);
    assert(jq);
@ -43,11 +43,10 @@ int main() {
    return 0;
 }
 static void* walloc(void* data) {
    TRACE(15,"WALLOC");
-    uVARTRACE(15,(int) data);
+    uVARTRACE(15,(us) data);
-    return (void*) getpid();
+    return (void*) 1;
 }
 static int worker(void* w_data,void* tj) {