diff --git a/.gitignore b/.gitignore index a540170..06ce893 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ build __pycache__ cython_debug beamforming/config.pxi -beamforming/fft.c +beamforming/wrappers.c *.so test/test_bf test/test_fft diff --git a/CMakeLists.txt b/CMakeLists.txt index d07c5d6..5982828 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 3.0) project(beamforming) # Whether we want to use blas yes or no -set(ASCEE_USE_BLAS 1) -add_definitions(-DASCEE_USE_BLAS=1) +set(ASCEE_USE_BLAS TRUE) +# set(ASCEE_USE_BLAS FALSE) set(ASCEE_FLOAT double) # set(ASCEE_FLOAT float) @@ -68,10 +68,12 @@ endif(ASCEE_DEBUG) # The last argument here takes care of calling SIGABRT when an integer overflow # occures. ############################## General compilation flags (independent of debug mode, windows or linux) +set(CYTHON_EXTRA_C_FLAGS "-Wno-sign-compare -Wno-cpp -Wno-implicit-fallthrough\ + -Wno-incompatible-pointer-types -Wno-strict-aliasing") # General make flags -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11 -Wall -Wextra \ - -Wimplicit-function-declaration -ftrapv -Wno-type-limits") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -std=c11 -Wall -Wextra -Wno-type-limits \ + -Werror=implicit-function-declaration -Werror=incompatible-pointer-types") # Debug make flags set(CMAKE_C_FLAGS_DEBUG "-g" ) diff --git a/beamforming/CMakeLists.txt b/beamforming/CMakeLists.txt index 2311f17..9133f5a 100644 --- a/beamforming/CMakeLists.txt +++ b/beamforming/CMakeLists.txt @@ -13,7 +13,6 @@ include_directories( ) -set_source_files_properties(fft.c PROPERTIES COMPILE_FLAGS - "-Wno-cpp -Wno-sign-compare -Wno-incompatible-pointer-types -Wno-strict-aliasing -Wno-implicit-fallthrough") -cython_add_module(fft fft.pyx) -target_link_libraries(fft beamforming_lib ) +set_source_files_properties(wrappers.c PROPERTIES COMPILE_FLAGS "${CMAKE_C_FLAGS} ${CYTHON_EXTRA_C_FLAGS}") +cython_add_module(wrappers wrappers.pyx) +target_link_libraries(wrappers beamforming_lib ) diff --git a/beamforming/__init__.py b/beamforming/__init__.py index e69de29..fff7b7d 100644 --- a/beamforming/__init__.py +++ b/beamforming/__init__.py @@ -0,0 +1 @@ +from .wrappers import * diff --git a/beamforming/c/aps.c b/beamforming/c/aps.c index 899a8f7..c7c7d5a 100644 --- a/beamforming/c/aps.c +++ b/beamforming/c/aps.c @@ -49,26 +49,31 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft, /* Check nfft */ if(nfft % 2 != 0 || nfft > ASCEE_MAX_NFFT) { WARN("nfft should be even"); + feTRACE(15); return NULL; } /* Check overlap percentage */ if(overlap_percentage >= 100) { WARN("Overlap percentage >= 100!"); + feTRACE(15); return NULL; } if(overlap_percentage < 0) { WARN("Overlap percentage should be positive!"); + feTRACE(15); return NULL; } /* Compute and check overlap offset */ - us oo = nfft - (us) (overlap_percentage*nfft/100); + us oo = (us) (((d) nfft)-overlap_percentage*((d) nfft)/100); + iVARTRACE(15,oo); if(oo == 0) {oo++;} PowerSpectra* ps = PowerSpectra_alloc(nfft,nchannels,wt); if(!ps) { WARN(ALLOCFAILED "ps"); + feTRACE(15); return NULL; } @@ -76,8 +81,10 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft, if(!aps) { WARN("Allocation of AvPowerSpectra memory failed"); PowerSpectra_free(ps); + feTRACE(15); return NULL; } + aps->nchannels = nchannels; aps->nfft = nfft; aps->ps = ps; @@ -91,7 +98,7 @@ AvPowerSpectra* AvPowerSpectra_alloc(const us nfft, aps->ps_single = cmat_alloc(nfft/2+1,nchannels*nchannels); cmat_set(&aps->ps_storage,0); - + feTRACE(15); return aps; } @@ -113,26 +120,30 @@ static void AvPowerSpectra_addBlock(AvPowerSpectra* aps, dbgassert(aps && block,NULLPTRDEREF); dbgassert(block->n_rows == aps->nfft,"Invalid block n_rows"); dbgassert(block->n_cols == aps->nchannels,"Invalid block n_cols"); - - cmat ps_single = aps->ps_single; - cmat ps_storage = aps->ps_storage; + const us nchannels = aps->nchannels; + const us nfft = aps->nfft; + iVARTRACE(15,nfft); + + cmat* ps_single = &aps->ps_single; + cmat* ps_storage = &aps->ps_storage; c naverages = (++aps->naverages); - + cVARTRACE(15,naverages); + /* Scale previous result */ - cmat_scale(&ps_storage, + cmat_scale(ps_storage, (naverages-1)/naverages); - + uVARTRACE(15,(us) aps->ps); + PowerSpectra_compute(aps->ps, block, - &ps_single); + ps_single); /* Add new result, scaled properly */ - cmat_add_cmat(&ps_storage, - &ps_single,1/naverages); - + cmat_add_cmat(ps_storage, + ps_single,1/naverages); feTRACE(15); } @@ -153,60 +164,71 @@ cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps, const us oo = aps->oo; us* os = &aps->os; + iVARTRACE(15,*os); us os_timedata = 0; dmat buffer = aps->buffer; /* Retrieve the buffer and use it to make the first time block. */ if(*os < oo) { - + TRACE(15,"Using saved data from previous run"); + dbgassert(false,"not tested") dmat tmp = dmat_alloc(nfft,nchannels); + dbgassert(0 <= *os,"BUG"); + dbgassert(*os <= nfft,"BUG"); - copy_dmat_rows(&tmp, - &buffer, - *os, /* Startrow_from */ - 0, /* Startrow to */ - nfft - *os /* nrows */ - ); + /* copy_dmat_rows(&tmp, */ + /* &buffer, */ + /* *os, /\* Startrow_from *\/ */ + /* 0, /\* Startrow to *\/ */ + /* nfft - *os /\* nrows *\/ */ + /* ); */ - copy_dmat_rows(&tmp, - timedata, - 0, - nfft - *os, - *os - ); + /* copy_dmat_rows(&tmp, */ + /* timedata, */ + /* 0, */ + /* nfft - *os, */ + /* *os */ + /* ); */ AvPowerSpectra_addBlock(aps,&tmp); os_timedata = oo + *os - nfft; - + dbgassert(os_timedata < nfft,"BUG"); dmat_free(&tmp); } /* Run until we cannot go any further */ - while (os_timedata + nfft <= timedata->n_rows) { - dmat tmp = dmat_submat(timedata, - os_timedata, - 0,nfft,nchannels); + while ((os_timedata + nfft) <= timedata->n_rows) { + dmat tmp = dmat_submat(timedata, + os_timedata, /* Startrow */ + 0, /* Start column */ + nfft, /* Number of rows */ + nchannels); /* Number of columns */ + + /* Process the block of time data */ AvPowerSpectra_addBlock(aps,&tmp); - + + iVARTRACE(15,os_timedata); os_timedata += oo; dmat_free(&tmp); + iVARTRACE(15,os_timedata); } /* We copy the last piece of samples from the timedata to the * buffer */ copy_dmat_rows(&buffer, timedata, - timedata->n_rows-nfft, - 0, - nfft); + timedata->n_rows-nfft, /* startrow_from */ + 0, /* startrow_to */ + nfft); /* Number of rows */ + *os = os_timedata+nfft-timedata->n_rows; - dbgassert(*os < nfft,"BUG"); + dbgassert(*os <= nfft,"BUG"); feTRACE(15); return &aps->ps_storage; diff --git a/beamforming/c/ascee_alg.c b/beamforming/c/ascee_alg.c index a01e1c4..8d36d68 100644 --- a/beamforming/c/ascee_alg.c +++ b/beamforming/c/ascee_alg.c @@ -5,7 +5,6 @@ // Description: // (Linear) algebra routine implementations ////////////////////////////////////////////////////////////////////// - #include "ascee_alg.h" @@ -13,11 +12,13 @@ void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){ - assert(A->n_rows == b->size); - assert(A->n_cols == x->size); + dbgassert(A->n_rows == b->size,SIZEINEQUAL); + dbgassert(A->n_cols == x->size,SIZEINEQUAL); #if ASCEE_USE_BLAS == 1 + dbgassert(A->_data,"Matrix-vector product only works for allocated data matrices"); + /* typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */ /* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */ /* @@ -41,12 +42,12 @@ void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){ A->n_rows, A->n_cols, (d*) &alpha, /* alpha */ - (d*) A->data, /* A */ + (d*) A->_data, /* A */ A->n_rows, /* lda */ - (d*) x->data, /* */ + (d*) x->ptr, /* */ 1, (d*) &beta, /* beta */ - (d*) b->data, + (d*) b->ptr, 1); diff --git a/beamforming/c/ascee_alg.h b/beamforming/c/ascee_alg.h index 186cde0..cf68cc9 100644 --- a/beamforming/c/ascee_alg.h +++ b/beamforming/c/ascee_alg.h @@ -86,16 +86,17 @@ static inline void dmat_add_dmat(dmat* x,dmat* y,d fac) { * @param[in] fac */ static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) { - dbgassert(x && y,NULLPTRDEREF); - dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL); - dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL); - if(likely(x->data && y->data)) { - c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows); - } - else { - for(us col=0;coln_cols;col++) { - c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows); - } + // dbgassert(x && y,NULLPTRDEREF); + // dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL); + // dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL); + // if(likely(x->data && y->data)) { + // TRACE(15,"Scale whole"); + // c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows); + // } + // else { + for(us col=0;coln_cols;col++) { + TRACE(15,"Scale columns"); + c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows); } } @@ -107,11 +108,30 @@ static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) { * @param[in] a * @param[in] b */ -static inline void vc_elem_prod(vc* result,vc* a,vc* b) { +static inline void vd_elem_prod(vd* result,const vd* a,const vd* b) { dbgassert(result && a && b,NULLPTRDEREF); dbgassert(result->size==a->size,SIZEINEQUAL); dbgassert(b->size==a->size,SIZEINEQUAL); - c_elem_prod_c(result->ptr,a->ptr,b->ptr,a->size); + d_elem_prod_d(result->ptr,a->ptr,b->ptr,a->size); +} +/** + * Compute the element-wise (Hadamard) product of a and b, and store + * in result + * + * @param[out] result + * @param[in] a + * @param[in] b + */ +static inline void vc_hadamard(vc* result,const vc* a,const vc* b) { + fsTRACE(15); + dbgassert(result && a && b,NULLPTRDEREF); + dbgassert(result->size==a->size,SIZEINEQUAL); + dbgassert(b->size==a->size,SIZEINEQUAL); + c_hadamard(result->ptr,a->ptr,b->ptr,a->size); + check_overflow_vx(*result); + check_overflow_vx(*a); + check_overflow_vx(*b); + feTRACE(15); } /** * Compute the matrix vector product for complex-valued types: b = A*x. diff --git a/beamforming/c/ascee_assert.h b/beamforming/c/ascee_assert.h index 46e134c..82be4d1 100644 --- a/beamforming/c/ascee_assert.h +++ b/beamforming/c/ascee_assert.h @@ -34,6 +34,8 @@ void DBG_AssertFailedExtImplementation(const char* file, #define dbgassert(assertion, assert_string) +#define assertvalidptr(ptr) dbgassert(ptr,NULLPTRDEREF) + #endif // ASCEE_DEBUG #endif // ASCEE_ASSERT_H diff --git a/beamforming/c/ascee_math.c b/beamforming/c/ascee_math.c index e67b3ca..5cdecfa 100644 --- a/beamforming/c/ascee_math.c +++ b/beamforming/c/ascee_math.c @@ -12,13 +12,14 @@ #include "ascee_math.h" #include "ascee_tracer.h" - #include #ifdef ASCEE_DEBUG void print_dmat(const dmat* m) { + feTRACE(50); size_t row,col; for(row=0;rown_rows;row++){ + indent_trace(); for(col=0;coln_cols;col++){ d val = *getdmatval(m,row,col); printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val)); @@ -27,10 +28,13 @@ void print_dmat(const dmat* m) { printf("\n"); } + fsTRACE(50); } void print_cmat(const cmat* m) { + feTRACE(50); size_t row,col; for(row=0;rown_rows;row++){ + indent_trace(); for(col=0;coln_cols;col++){ c val = *getcmatval(m,row,col); @@ -44,32 +48,36 @@ void print_cmat(const cmat* m) { printf("\n"); } + feTRACE(50); } void print_vc(const vc* m) { - TRACE(20,"print_vc"); + fsTRACE(50); size_t row; for(row=0;rowsize;row++){ - - d rval = creal(m->data[row]); - d ival = cimag(m->data[row]); + c val = *getvcval(m,row); + d rval = creal(val); + d ival = cimag(val); + indent_trace(); printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ', d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ; printf("\n"); } + feTRACE(50); } void print_vd(const vd* m) { - TRACE(20,"print_vd"); + fsTRACE(50); size_t row; iVARTRACE(20,m->size); for(row=0;rowsize;row++){ - d rval = m->data[row]; - + d rval = m->ptr[row]; + indent_trace(); printf("%c%2.2e ",rval< 0 ? '\r': ' ',rval); printf("\n"); } + feTRACE(50); } #endif diff --git a/beamforming/c/ascee_math.h b/beamforming/c/ascee_math.h index baab8d4..9f71e86 100644 --- a/beamforming/c/ascee_math.h +++ b/beamforming/c/ascee_math.h @@ -18,7 +18,7 @@ typedef struct { us size; d* ptr; /**< This pointer points to the data of this vector */ - d* data; /**< Pointer set if data storage is + d* _data; /**< Pointer set if data storage is intern. If this is set to zero, the vector is a sub-vector. */ } vd; @@ -27,7 +27,7 @@ typedef struct { us size; c* ptr; /**< This pointer points to the data of this vector */ - c* data; /**< Pointer set if data storage is + c* _data; /**< Pointer set if data storage is intern. If this is set to zero, the vector is a sub-vector. */ } vc; @@ -36,16 +36,39 @@ typedef struct { us n_rows; us n_cols; d** col_ptrs; - d* data; + d* _data; } dmat; /// Dense matrix of complex floating point values typedef struct { us n_rows; us n_cols; c** col_ptrs; - c* data; + c* _data; } cmat; +#ifdef ASCEE_DEBUG +#define OVERFLOW_MAGIC_NUMBER (-10e-45) + +#define check_overflow_vx(vx) \ + TRACE(15,"Checking overflow " #vx); \ + if((vx)._data) { \ + dbgassert((vx)._data[(vx).size] == OVERFLOW_MAGIC_NUMBER, \ + "Buffer overflow detected on" #vx ); \ + } +#define check_overflow_xmat(xmat) \ + TRACE(15,"Checking overflow " #xmat); \ + if((xmat)._data) { \ + for(us _overflow=0;_overflow<(xmat).n_cols;_overflow++) \ + dbgassert((xmat)._data[(xmat).n_rows*(xmat).n_cols+ \ + _overflow] == OVERFLOW_MAGIC_NUMBER, \ + "Buffer overflow detected on" #xmat ); \ + } + +#else +#define check_overflow_vx(vx) +#define check_overflow_xmat(xmat) +#endif + /** * Sets all values in a vector to the value * @@ -74,14 +97,8 @@ static inline void vc_set(vc* vec,const c value){ */ static inline void dmat_set(dmat* mat,const d value){ dbgassert(mat,NULLPTRDEREF); - us size = mat->n_cols*mat->n_rows; - if(likely(mat->data)){ - d_set(mat->data,value,size); - } - else { - for(us col=0;coln_cols;col++) { - d_set(mat->col_ptrs[col],value,mat->n_rows); - } + for(us col=0;coln_cols;col++) { + d_set(mat->col_ptrs[col],value,mat->n_rows); } } @@ -94,14 +111,8 @@ static inline void dmat_set(dmat* mat,const d value){ */ static inline void cmat_set(cmat* mat,const c value){ dbgassert(mat,NULLPTRDEREF); - us size = mat->n_cols*mat->n_rows; - if(likely(mat->data)){ - c_set(mat->data,value,size); - } - else { - for(us col=0;coln_cols;col++) { - c_set(mat->col_ptrs[col],value,mat->n_rows); - } + for(us col=0;coln_cols;col++) { + c_set(mat->col_ptrs[col],value,mat->n_rows); } } @@ -114,9 +125,15 @@ static inline void cmat_set(cmat* mat,const c value){ */ static inline vd vd_alloc(us size) { vd result = { size, NULL,NULL}; - result.data = (d*) a_malloc(size*sizeof(d)); - result.ptr = result.data; - dbgassert(result.data,ALLOCFAILED); + #ifdef ASCEE_DEBUG + result._data = (d*) a_malloc((size+1)*sizeof(d)); + result._data[size] = OVERFLOW_MAGIC_NUMBER; + #else + result._data = (d*) a_malloc(size*sizeof(d)); + #endif // ASCEE_DEBUG + + result.ptr = result._data; + dbgassert(result._data,ALLOCFAILED); #ifdef ASCEE_DEBUG vd_set(&result,NAN); #endif // ASCEE_DEBUG @@ -131,8 +148,14 @@ static inline vd vd_alloc(us size) { */ static inline vc vc_alloc(us size) { vc result = { size, NULL, NULL}; - result.data = (c*) a_malloc(size*sizeof(c)); - result.ptr = result.data; + #ifdef ASCEE_DEBUG + result._data = (c*) a_malloc((size+1)*sizeof(c)); + result._data[size] = OVERFLOW_MAGIC_NUMBER; + #else + result._data = (c*) a_malloc(size*sizeof(c)); + #endif // ASCEE_DEBUG + dbgassert(result._data,ALLOCFAILED); + result.ptr = result._data; #ifdef ASCEE_DEBUG vc_set(&result,NAN+I*NAN); #endif // ASCEE_DEBUG @@ -153,16 +176,21 @@ static inline dmat dmat_alloc(us n_rows, /** * Here storage is allocated for both the data, as well as the - * column pointers. The column pointer data is stored at the end - * of the allocated block. + * column pointers. In debug mode, extra memory is allocated to + * check for possible buffer overflows. */ - result.data = (d*) a_malloc(n_rows*n_cols*sizeof(d) - +sizeof(d*)*n_cols); + #ifdef ASCEE_DEBUG + result._data = (d*) a_malloc((n_rows*n_cols+n_cols)*sizeof(d)); + for(us i=0;idata)) a_free(f->data); + if(likely(f->_data)) a_free(f->_data); } /** * Free's data of a vector. Is safe to run on sub-vecs as well, to @@ -273,7 +305,7 @@ static inline void vd_free(vd* f) { */ static inline void vc_free(vc* f) { dbgassert(f,NULLPTRDEREF); - if(likely(f->data)) a_free(f->data); + if(likely(f->_data)) a_free(f->_data); } /** * Free's data of dmat. Safe to run on sub-matrices as well. @@ -281,14 +313,9 @@ static inline void vc_free(vc* f) { * @param m Matrix to free */ static inline void dmat_free(dmat* m) { - if(likely(m->data)) { - a_free(m->data); - } - else { - // Only column pointers allocated. This was a submat - dbgassert(m->col_ptrs,NULLPTRDEREF); - a_free(m->col_ptrs); - } + if(m->_data) a_free(m->_data); + dbgassert(m->col_ptrs,NULLPTRDEREF); + a_free(m->col_ptrs); } /** * Free's data of dmat. Safe to run on sub-matrices as well. @@ -296,19 +323,14 @@ static inline void dmat_free(dmat* m) { * @param m Matrix to free */ static inline void cmat_free(cmat* m) { - if(likely(m->data)) { - a_free(m->data); - } - else { - // Only column pointers allocated. This was a submat - dbgassert(m->col_ptrs,NULLPTRDEREF); - a_free(m->col_ptrs); - } + if(m->_data) a_free(m->_data); + dbgassert(m->col_ptrs,NULLPTRDEREF); + a_free(m->col_ptrs); } #define setvecval(vec,index,val) \ dbgassert((((us) index) <= (vec)->size),OUTOFBOUNDSVEC); \ - (vec)->data[index] = val; + (vec)->ptr[index] = val; #define setmatval(mat,row,col,val) \ @@ -418,7 +440,8 @@ static inline dmat dmat_submat(const dmat* parent, startcol+col); } - dmat result = { n_rows,n_cols,col_ptrs,NULL}; + dmat result = { n_rows,n_cols,NULL,NULL}; + result.col_ptrs = col_ptrs; return result; } /** @@ -437,7 +460,7 @@ static inline cmat cmat_submat(cmat* parent, const us startcol, const us n_rows, const us n_cols) { - + dbgassert(false,"untested"); dbgassert(parent,NULLPTRDEREF); dbgassert(n_rows+startrow <= parent->n_rows,OUTOFBOUNDSMATR); dbgassert(n_cols+startcol <= parent->n_cols,OUTOFBOUNDSMATC); @@ -515,7 +538,10 @@ static inline void cmat_copy(cmat* to,const cmat* from) { * @return vector with reference to column */ static inline vd dmat_column(dmat* x,us col) { - vd res = { x->n_rows, getdmatval(x,0,col),NULL}; + vd res; + res.size = x->n_rows; + res.ptr = getdmatval(x,0,col); + res._data = NULL; return res; } @@ -528,7 +554,10 @@ static inline vd dmat_column(dmat* x,us col) { * @return vector with reference to column */ static inline vc cmat_column(cmat* x,us col) { - vc res = { x->n_rows, getcmatval(x,0,col),NULL}; + vc res; + res.size = x->n_rows; + res.ptr = getcmatval(x,0,col); + res._data = NULL; return res; } @@ -538,10 +567,12 @@ static inline vc cmat_column(cmat* x,us col) { * @param a * @param b */ -static inline void vc_conj_vc(vc* a,const vc* b) { +static inline void vc_conj(vc* a,const vc* b) { + fsTRACE(15); dbgassert(a && b,NULLPTRDEREF); dbgassert(a->size == b->size,SIZEINEQUAL); - c_conj_c(a->ptr,b->ptr,a->size); + carray_conj(a->ptr,b->ptr,a->size); + feTRACE(15); } /** @@ -551,13 +582,8 @@ static inline void vc_conj_vc(vc* a,const vc* b) { */ static inline void cmat_conj(cmat* x) { dbgassert(x,NULLPTRDEREF); - if(likely(x->data)) { - c_conj_inplace(x->data,x->n_cols*x->n_rows); - } - else { - for(us col=0;coln_cols;col++) { - c_conj_inplace(x->col_ptrs[col],x->n_rows); - } + for(us col=0;coln_cols;col++) { + c_conj_inplace(x->col_ptrs[col],x->n_rows); } } diff --git a/beamforming/c/ascee_math_raw.c b/beamforming/c/ascee_math_raw.c index 3f4efdf..bcd4704 100644 --- a/beamforming/c/ascee_math_raw.c +++ b/beamforming/c/ascee_math_raw.c @@ -15,7 +15,7 @@ void d_elem_prod_d(d res[], const d arr2[], const us size) { - #if ASCEE_USE_BLAS + #if ASCEE_USE_BLAS == 1 #if ASCEE_DEBUG @@ -77,15 +77,16 @@ void d_elem_prod_d(d res[], #endif } -void c_elem_prod_c(c res[], - const c arr1[], - const c arr2[], - const us size) { +void c_hadamard(c res[], + const c arr1[], + const c arr2[], + const us size) { - TRACE(15,"c_elem_prod_c"); + fsTRACE(15); uVARTRACE(15,size); + dbgassert(arr1 && arr2 && res,NULLPTRDEREF); - #if ASCEE_USE_BLAS + #if ASCEE_USE_BLAS == 1 #if ASCEE_DEBUG @@ -105,38 +106,36 @@ void c_elem_prod_c(c res[], #define elem_prod_fun cblas_cgbmv #endif - /* These parameters do not matter for this specific case */ - const CBLAS_ORDER mat_order= CblasColMajor; - const CBLAS_TRANSPOSE tr = CblasNoTrans; + c alpha = 1.0; + c beta = 0.0; - const c alpha = 1.0; - const c beta = 0.0; TRACE(15,"Calling " annestr(elem_prod_fun)); - - elem_prod_fun(mat_order, - tr, + uVARTRACE(15,size); + elem_prod_fun(CblasColMajor, + CblasNoTrans, (blasint) size, /* M: Number of rows */ (blasint) size, /* B: Number of columns */ 0, /* KL: Number of sub-diagonals */ 0, /* KU: Number of super-diagonals */ - (d*) &alpha, /* Multiplication factor */ - (d*) arr2, /* A */ - 1, /* LDA */ - (d*) arr1, /* x */ - 1, /* incX = 1 */ + (d*) &alpha, /* Multiplication factor */ + (d*) arr2, /* A */ + 1, /* LDA */ + (d*) arr1, /* x */ + 1, /* incX = 1 */ (d*) &beta, (d*) res, /* The Y matrix to write to */ - 1); /* incY */ + 1); /* incY (increment in res) */ #undef elem_prod_fun #else /* No blas routines, routine is very simple, but here we * go! */ - DBGWARN("Performing slow non-blas vector-vector multiplication"); + DBGWARN("Performing slower non-blas vector-vector multiplication"); for(us i=0;i +#elif ASCEE_USE_BLAS == 0 +#else +#error "ASCEE_USE_BLAS should be set to either 0 or 1" #endif #ifdef ASCEE_DOUBLE_PRECISION @@ -249,6 +252,7 @@ static inline void d_add_to(d x[],const d y[],d fac,us size){ * @param size Size of the arrays */ static inline void c_add_to(c x[],const c y[],c fac,us size){ + fsTRACE(15); #if ASCEE_USE_BLAS == 1 #if ASCEE_DOUBLE_PRECISION cblas_zaxpy(size,(d*) &fac,(d*) y,1,(d*) x,1); @@ -260,6 +264,7 @@ static inline void c_add_to(c x[],const c y[],c fac,us size){ for(i=0;i +static _Thread_local us ASCEE_FN_LEVEL = 0; + +static _Atomic(int) TRACERNAME = ATOMIC_VAR_INIT(DEFAULTTRACERLEVEL); -_Atomic(int) TRACERNAME = ATOMIC_VAR_INIT(DEFAULTTRACERLEVEL); void setTracerLevel(int level) { atomic_store(&TRACERNAME,level); @@ -21,35 +23,52 @@ void setTracerLevel(int level) { int getTracerLevel() { return atomic_load(&TRACERNAME); } + #else int TRACERNAME; - +static us ASCEE_FN_LEVEL = 0; /* setTracerLevel and getTracerLevel are defined as macros in * tracer.h */ #endif +void indent_trace() { + for(us i=0;i +#include + +static inline void clearScreen() { + printf("\033c\n"); +} + +/** + * Indent the rule for tracing visibility. + */ +void indent_trace(); // Some console colors #define RESET "\033[0m" @@ -28,9 +39,6 @@ #define BOLDCYAN "\033[1m\033[36m" /* Bold Cyan */ #define BOLDWHITE "\033[1m\033[37m" /* Bold White */ -#include -#include - // Not so interesting part #define rawstr(x) #x #define namestr(x) rawstr(x) diff --git a/beamforming/c/fft.c b/beamforming/c/fft.c index 14f7f80..1dde2f6 100644 --- a/beamforming/c/fft.c +++ b/beamforming/c/fft.c @@ -14,7 +14,7 @@ typedef struct Fft_s { us nfft; us nchannels; - Fftr* fftr; + vd fft_work; } Fft; Fft* Fft_alloc(const us nfft,const us nchannels) { @@ -35,19 +35,20 @@ Fft* Fft_alloc(const us nfft,const us nchannels) { fft->nfft = nfft; fft->nchannels = nchannels; - fft->fftr = Fftr_alloc(nfft); - if(!fft->fftr) { - a_free(fft); - WARN(ALLOCFAILED "fftr"); - return NULL; - } + /* Initialize foreign fft lib */ + fft->fft_work = vd_alloc(2*nfft+15); + npy_rffti(nfft,fft->fft_work.ptr); + check_overflow_vx(fft->fft_work); + /* print_vd(&fft->fft_work); */ + feTRACE(15); return fft; } void Fft_free(Fft* fft) { fsTRACE(15); - Fftr_free(fft->fftr); + dbgassert(fft,NULLPTRDEREF); + vd_free(&fft->fft_work); a_free(fft); feTRACE(15); } @@ -55,20 +56,51 @@ us Fft_nchannels(const Fft* fft) {return fft->nchannels;} us Fft_nfft(const Fft* fft) {return fft->nfft;} void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) { fsTRACE(15); - dbgassert(timedata && result,NULLPTRDEREF); + + dbgassert(fft && timedata && result,NULLPTRDEREF); dbgassert(timedata->n_rows == fft->nfft,"Invalid size for time data rows." " Should be equal to nfft"); dbgassert(timedata->n_cols == fft->nchannels,"Invalid size for time data cols." " Should be equal to nchannels"); - + + const us nfft = fft->nfft; + + vd fft_result = vd_alloc(fft->nfft); for(us col=0;colnchannels;col++) { - Fftr_fftr(fft->fftr, - getdmatval(timedata,0,col), - getcmatval(result,0,col)); - + vd timedata_col = dmat_column((dmat*) timedata,col); + vd_copy(&fft_result,&timedata_col); + vd_free(&timedata_col); + + npy_rfftf(fft->nfft,fft_result.ptr,fft->fft_work.ptr); + + /* Fftpack stores the data a bit strange, the resulting array + * has the DC value at 0,the first cosine at 1, the first sine + * at 2 etc. This needs to be shifted properly in the + * resulting matrix, as for the complex data, the imaginary + * part of the DC component equals zero. */ + + check_overflow_vx(fft_result); + check_overflow_vx(fft->fft_work); + + vc resultcol = cmat_column(result,col); + *getvcval(&resultcol,0) = *getvdval(&fft_result,0); + + memcpy((void*) getvcval(&resultcol,1), + (void*) getvdval(&fft_result,1), + (nfft-1)*sizeof(d)); + + /* Set imaginary part of Nyquist frequency to zero */ + ((d*) getvcval(&resultcol,nfft/2))[1] = 0; + check_overflow_xmat(*timedata); + + /* Free up storage of the result column */ + vc_free(&resultcol); } + /* print_vd(&fft->fft_work); */ + + vd_free(&fft_result); feTRACE(15); } diff --git a/beamforming/c/mq.c b/beamforming/c/mq.c index ef4f53b..ed121aa 100644 --- a/beamforming/c/mq.c +++ b/beamforming/c/mq.c @@ -14,11 +14,12 @@ #include "mq.h" #include -#ifdef __linux__ +/* #ifdef linux */ +#define _GNU_SOURCE #include #include #include -#endif +/* #endif */ typedef struct { void* job_ptr; @@ -252,13 +253,9 @@ void* JobQueue_assign(JobQueue* jq) { TRACE(16,"JobQueue_assign: found ready job. Assigned to:"); #ifdef ASCEE_DEBUG - #ifdef __linux__ - - pid_t tid = syscall(SYS_gettid); - iVARTRACE(16,tid); - #endif // __linux__ - #endif // ASCEE_DEBUG - + pthread_t thisthread = pthread_self(); + iVARTRACE(16,thisthread); + #endif /* print_job_queue(jq); */ /* Find a job from the queue, assign it and return it */ diff --git a/beamforming/c/ps.c b/beamforming/c/ps.c index 080f161..edefcdd 100644 --- a/beamforming/c/ps.c +++ b/beamforming/c/ps.c @@ -15,12 +15,8 @@ typedef struct PowerSpectra_s { vd window; - d win_pow; /**< The power of the window */ Fft* fft; /**< Fft routines storage */ - cmat fft_work; /**< Work area for FFt's */ - dmat timedata_work; /**< Work area for timedata */ - vc j_vec_conj; /**< Work area for conjugate of j */ } PowerSpectra; PowerSpectra* PowerSpectra_alloc(const us nfft, @@ -53,11 +49,9 @@ PowerSpectra* PowerSpectra_alloc(const us nfft, /* Allocate vectors and matrices */ ps->window = vd_alloc(nfft); - ps->fft_work = cmat_alloc(nfft/2+1,nchannels); - ps->timedata_work= dmat_alloc(nfft,nchannels); - ps->j_vec_conj = vc_alloc(nfft/2+1); rv = window_create(wt,&ps->window,&ps->win_pow); + check_overflow_vx(ps->window); if(rv!=0) { WARN("Error creating window function, continuing anyway"); } @@ -67,14 +61,9 @@ PowerSpectra* PowerSpectra_alloc(const us nfft, void PowerSpectra_free(PowerSpectra* ps) { fsTRACE(15); - Fft_free(ps->fft); vd_free(&ps->window); - cmat_free(&ps->fft_work); - dmat_free(&ps->timedata_work); - vc_free(&ps->j_vec_conj); a_free(ps); - feTRACE(15); } @@ -93,8 +82,6 @@ void PowerSpectra_compute(const PowerSpectra* ps, const d win_pow = ps->win_pow; dVARTRACE(15,win_pow); - us i,j; - /* Sanity checks for the matrices */ dbgassert(timedata->n_cols == nchannels,"timedata n_cols " "should be equal to nchannels"); @@ -111,66 +98,81 @@ void PowerSpectra_compute(const PowerSpectra* ps, /* Multiply time data with the window and store result in * timedata_work. */ - dmat timedata_work = ps->timedata_work; + dmat timedata_work = dmat_alloc(nfft,nchannels); + for(us i=0;iwindow.data, - nfft); - + vd_elem_prod(&column_work,&column,&ps->window); + + vd_free(&column); + vd_free(&column_work); } - + check_overflow_xmat(timedata_work); /* print_dmat(&timedata_work); */ /* Compute fft of the time data */ - cmat fft_work = ps->fft_work; + cmat fft_work = cmat_alloc(nfft/2+1,nchannels); Fft_fft(ps->fft, &timedata_work, &fft_work); + dmat_free(&timedata_work); + TRACE(15,"fft done"); /* Scale fft such that power is easily comxputed */ const c scale_fac = d_sqrt(2/win_pow)/nfft; cmat_scale(&fft_work,scale_fac); TRACE(15,"scale done"); - - for(i=0;i< nchannels;i++) { + + for(us i=0;i< nchannels;i++) { /* Multiply DC term with 1/sqrt(2) */ *getcmatval(&fft_work,0,i) *= 1/d_sqrt(2.)+0*I; /* Multiply Nyquist term with 1/sqrt(2) */ *getcmatval(&fft_work,nfft/2,i) *= 1/d_sqrt(2.)+0*I; } + check_overflow_xmat(fft_work); /* print_cmat(&fft_work); */ TRACE(15,"Nyquist and DC correction done"); - vc j_vec_conj = ps->j_vec_conj; + vc j_vec_conj = vc_alloc(nfft/2+1); /* Compute Cross-power spectra and store result */ - for(i =0; i +// // Branch prediction performance improvement +#if !defined(likely) +#if defined(__GNUC__) && !defined(ASCEE_DEBUG) #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #else #define likely(x) (x) #define unlikely(x) (x) -#endif +#endif // if defined(__GNUC__) && !defined(ASCEE_DEBUG) + +#endif // !defined(likely) /// We often use boolean values #include // true, false diff --git a/beamforming/c/window.c b/beamforming/c/window.c index 3e2db16..570e9cd 100644 --- a/beamforming/c/window.c +++ b/beamforming/c/window.c @@ -8,7 +8,7 @@ #include "window.h" #include "signals.h" - +#include /** * Compute the Hann window * @@ -53,9 +53,12 @@ static d rectangle(us i,us N) { dbgassert(isize; d (*win_fun)(us,us); switch (wintype) { @@ -67,21 +70,26 @@ int window_create(const WindowType wintype,vd* result,d* win_pow) { win_fun = hamming; break; } - case Blackman: { - win_fun = blackman; - break; - } case Rectangular: { win_fun = rectangle; break; } + case Bartlett: { + win_fun = bartlett; + break; + } + case Blackman: { + win_fun = blackman; + break; + } default: - WARN("Unkown window function"); - return FAILURE; + DBGWARN("BUG: Unknown window function"); + abort(); break; } us index; for(index=0;index window) if self._ps == NULL: raise RuntimeError('PowerSpectra allocation failed') @@ -85,6 +106,8 @@ cdef class PowerSpectra: int rv dmat td cmat result_mat + + td = dmat_foreign(nfft, nchannels, &timedata[0,0]) @@ -99,12 +122,15 @@ cdef class PowerSpectra: result = np.empty((nfft//2+1,nchannels,nchannels), dtype = NUMPY_COMPLEX_TYPE, order='F') + cdef c[::1,:,:] result_view = result result_mat = cmat_foreign(nfft//2+1, nchannels*nchannels, &result_view[0,0,0]) + + PowerSpectra_compute(self._ps,&td,&result_mat) dmat_free(&td) @@ -137,11 +163,14 @@ cdef class AvPowerSpectra: c_AvPowerSpectra* aps us nfft, nchannels - def __cinit__(self,us nfft,us nchannels,d overlap_percentage): + def __cinit__(self,us nfft, + us nchannels, + d overlap_percentage, + us window=rectangular): self.aps = AvPowerSpectra_alloc(nfft, nchannels, overlap_percentage, - Rectangular) + window) self.nchannels = nchannels self.nfft = nfft @@ -196,3 +225,8 @@ cdef class AvPowerSpectra: return result + + def getFreq(self, d fs): + cdef d df = fs/self.nfft # frequency resolution + cdef us K = self.nfft//2+1 # number of frequency bins + return np.linspace(0, (K-1)*df, K) diff --git a/fftpack/fftpack.h b/fftpack/fftpack.h index f3d569e..7458aa1 100644 --- a/fftpack/fftpack.h +++ b/fftpack/fftpack.h @@ -1,111 +1,27 @@ -// fftpack.h -// -// Author: J.A. de Jong - ASCEE -// -// Description: -// Header file for FFT routines of fftpack -////////////////////////////////////////////////////////////////////// -#pragma once -#ifndef FFTPACK_H -#define FFTPACK_H -#include "ascee_tracer.h" -#include "ascee_alloc.h" -#include "npy_fftpack.h" - /* - * Fortran has two kind of routines: functions and subroutines. A - Fortran function is a C function that returns a single value. - - A * subroutine called from C is in fact a C function - which returns void. + * This file is part of tela the Tensor Language. + * Copyright (c) 1994-1995 Pekka Janhunen */ -#if ASCEE_DOUBLE_PRECISION -// These are all subroutines -extern void dffti_ (int *nfft,d* wsave); -extern void dfftf_ (int *nfft,d* r,d* wsave); + +#ifdef __cplusplus +extern "C" { +#endif +#define NPY_VISIBILITY_HIDDEN + +#ifdef ASCEE_DOUBLE_PRECISION +#define Treal double #else -extern void rffti_ (int *nfft,d* wsave); -extern void rfftf_ (int *nfft,d* r,d* wsave); -#endif // ASCEE_DOUBLE_PRECISION +#define Treal float +#endif -typedef struct Fftr_s { - d *work_area; - d* wsave_ptr; - int nfft; -} Fftr; +extern NPY_VISIBILITY_HIDDEN void npy_cfftf(int N, Treal data[], const Treal wrk[]); +extern NPY_VISIBILITY_HIDDEN void npy_cfftb(int N, Treal data[], const Treal wrk[]); +extern NPY_VISIBILITY_HIDDEN void npy_cffti(int N, Treal wrk[]); -/** - * - * - * @param nfft the length of the sequence to be transformed +extern NPY_VISIBILITY_HIDDEN void npy_rfftf(int N, Treal data[], const Treal wrk[]); +extern NPY_VISIBILITY_HIDDEN void npy_rfftb(int N, Treal data[], const Treal wrk[]); +extern NPY_VISIBILITY_HIDDEN void npy_rffti(int N, Treal wrk[]); - * @param wsave a work array which must be dimensioned at least - * 2*nfft+15. the same work array can be used for both rfftf and - * rfftb as long as n remains unchanged. different wsave arrays - * are required for different values of n. the contents of wsave - * must not be changed between calls of rfftf or rfftb. - */ - -Fftr* Fftr_alloc(int nfft) { - fsTRACE(15); - - Fftr* fftr = a_malloc(sizeof(fftr)); - dbgassert(nfft>0,"Invalid nfft") - dbgassert(fftr,ALLOCFAILED "Fftr_alloc"); - fftr->work_area = a_malloc(sizeof(d)*(3*nfft+15)); - fftr->wsave_ptr = &fftr->work_area[nfft]; - fftr->nfft = nfft; - #if ASCEE_DOUBLE_PRECISION - // dffti_(&nfft,fftr->wsave); - npy_rffti(nfft,fftr->wsave_ptr); - #else - // rffti_(&nfft,fftr->wsave); - #endif - - feTRACE(15); - return fftr; +#ifdef __cplusplus } -void Fftr_free(Fftr* fftr) { - dbgassert(fftr,NULLPTRDEREF "Fftr_free"); - a_free(fftr->work_area); - a_free(fftr); -} - - -/** - * - * - * @param nfft - * @param wsave - * @param input - * @param work - * @param result - */ -static inline void Fftr_fftr(Fftr* fftr,d* input,c* result) { - - - // Copy contents of input to the work area - d_copy(fftr->work_area,input,fftr->nfft); - - int nfft = fftr->nfft; - - #if ASCEE_DOUBLE_PRECISION - // dfftf_(&nfft,fftr->work,fftr->wsave); - npy_rfftf(nfft,fftr->work_area,fftr->wsave_ptr); - #else - NOT TESTED - rfftf_(&nfft,fftr->work_area,fftr->wsave_ptr); - #endif - - result[0] = fftr->work_area[0]; - d_copy((d*) (&result[1]),&fftr->work_area[1],nfft); - // Not portable way of setting imaginary part to zero. Works with - // gcc, though. - __imag__ result[nfft/2] = 0; - -} - - - -#endif // FFTPACK_H -////////////////////////////////////////////////////////////////////// +#endif diff --git a/fftpack/npy_fftpack.h b/fftpack/npy_fftpack.h deleted file mode 100644 index aa706ea..0000000 --- a/fftpack/npy_fftpack.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is part of tela the Tensor Language. - * Copyright (c) 1994-1995 Pekka Janhunen - */ - -#ifdef __cplusplus -extern "C" { -#endif -#define NPY_VISIBILITY_HIDDEN - -#define DOUBLE - -#ifdef DOUBLE -#define Treal double -#else -#define Treal float -#endif - -extern NPY_VISIBILITY_HIDDEN void npy_cfftf(int N, Treal data[], const Treal wrk[]); -extern NPY_VISIBILITY_HIDDEN void npy_cfftb(int N, Treal data[], const Treal wrk[]); -extern NPY_VISIBILITY_HIDDEN void npy_cffti(int N, Treal wrk[]); - -extern NPY_VISIBILITY_HIDDEN void npy_rfftf(int N, Treal data[], const Treal wrk[]); -extern NPY_VISIBILITY_HIDDEN void npy_rfftb(int N, Treal data[], const Treal wrk[]); -extern NPY_VISIBILITY_HIDDEN void npy_rffti(int N, Treal wrk[]); - -#ifdef __cplusplus -} -#endif diff --git a/test/benchmark.py b/test/benchmark.py index 1039303..03c5d15 100644 --- a/test/benchmark.py +++ b/test/benchmark.py @@ -1,12 +1,15 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # -*- coding: utf-8 -*- """ Created on Mon Jan 15 19:45:33 2018 @author: anne """ +from pycallgraph import PyCallGraph +from pycallgraph.output import GraphvizOutput + import numpy as np -from beamforming.fft import Fft +from beamforming import Fft nfft=2**17 diff --git a/test/fft_test.py b/test/fft_test.py index e2a0012..a42078f 100644 --- a/test/fft_test.py +++ b/test/fft_test.py @@ -6,7 +6,7 @@ Created on Mon Jan 15 19:45:33 2018 @author: anne """ import numpy as np -from beamforming.fft import Fft +from beamforming import Fft nfft=6 print('nfft:',nfft) diff --git a/test/ps_test.py b/test/ps_test.py index 7add7d1..ed7859f 100644 --- a/test/ps_test.py +++ b/test/ps_test.py @@ -6,9 +6,9 @@ Created on Mon Jan 15 19:45:33 2018 @author: anne """ import numpy as np -from beamforming.fft import Fft, PowerSpectra - -nfft=2**4 +from beamforming import Fft, PowerSpectra, cls +cls +nfft=2048 print('nfft:',nfft) #print(nfft) nchannels = 2 diff --git a/test/test_bf.c b/test/test_bf.c index 98779d5..4871741 100644 --- a/test/test_bf.c +++ b/test/test_bf.c @@ -35,7 +35,7 @@ int main() { vc res2 = vc_alloc(3); - c_elem_prod_c(res2.data,vc1.data,vc2.data,3); + c_hadamard(res2.ptr,vc1.ptr,vc2.ptr,3); print_vc(&res2); diff --git a/test/test_math.c b/test/test_math.c index 8d9bbd9..2c84a19 100644 --- a/test/test_math.c +++ b/test/test_math.c @@ -9,25 +9,39 @@ #include "fft.h" #include "ascee_math.h" #include "ascee_tracer.h" +#include "ascee_alg.h" int main() { iVARTRACE(15,getTracerLevel()); - vc a = vc_alloc(5); - vc_set(&a,2+3*I); + c a[5]; + c_set(a,1-I,5); - c_conj_inplace(a.data,a.size); - print_vc(&a); + /* print_vc(&a); */ vc b = vc_alloc(5); + vc_set(&b,2); - c_conj_c(b.data,a.data,5); + printf("b:\n"); print_vc(&b); - vc_free(&a); - vc_free(&b); + + vc c1 = vc_alloc(5); + /* vc_set(&c1,10); */ + /* c_add_to(c1.ptr,a.ptr,1,3); */ + c_hadamard(c1.ptr,a,b.ptr,5); + + + + printf("c1:\n"); + print_vc(&c1); + + vc_free(&b); + + + return 0; } diff --git a/test/test_workers.c b/test/test_workers.c index bd41e22..5cda373 100644 --- a/test/test_workers.c +++ b/test/test_workers.c @@ -8,6 +8,7 @@ #include "worker.h" #include "mq.h" #include "ascee_tracer.h" +#include "ascee_assert.h" #include static void* walloc(void*); static int worker(void*,void*); @@ -22,7 +23,7 @@ int main() { us njobs = 4; JobQueue* jq = JobQueue_alloc(njobs); - assert(jq); + dbgassert(jq,NULLPTRDEREF); Workers* w = Workers_create(njobs, jq, @@ -30,7 +31,7 @@ int main() { worker, wfree, (void*) 101); - assert(w); + dbgassert(jq,NULLPTRDEREF); for(us i=0; i< njobs; i++) { iVARTRACE(15,i);