Refactoring math routines. Added comments. Removed multithreading from fft routines. Switched to fftpack for FFT. Added Doxyfile

This commit is contained in:
Anne de Jong 2018-02-06 12:01:27 +01:00 committed by asceenl
parent 52e5a31bdb
commit 83aaf69bef
28 changed files with 4508 additions and 1254 deletions

4
.gitignore vendored
View File

@ -12,3 +12,7 @@ cython_debug
beamforming/config.pxi
beamforming/fft.c
*.so
test/test_bf
test/test_fft
test/test_math
test/test_workers

View File

@ -13,6 +13,9 @@ add_definitions(-DASCEE_MAX_NUM_THREADS=8)
add_definitions(-DASCEE_MAX_NUM_CHANNELS=80)
# Reasonable maximum to the nfft size, at 48kHz this is 700s of data..
add_definitions(-DASCEE_MAX_NFFT=33554432) # 2**25
# ####################################### End of user-adjustable variables section
add_definitions(-D_REENTRANT)

2482
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

View File

@ -6,10 +6,13 @@ endif(!ASCEE_DEBUG)
add_library(beamforming_lib
fft.c
ascee_math.c
ascee_math_raw.c
ascee_alg.c
ascee_assert.c
tracer.c
ascee_tracer.c
window.c
# ps.c
aps.c
ps.c
mq.c
worker.c
)

228
beamforming/c/aps.c Normal file
View File

@ -0,0 +1,228 @@
// aps.c
//
// Author: J.A. de Jong -ASCEE
//
// Description:
//
//////////////////////////////////////////////////////////////////////
#include "aps.h"
#include "ps.h"
#include "ascee_alg.h"
typedef struct AvPowerSpectra_s {
us os; /**< Counter set to the position where
* the next time block should be taken
* from */
us nfft, nchannels;
us oo;
us naverages; /* Counter that counts the number of
* averages taken for the computation
* of this averaged power spectra. */
dmat buffer; /**< Buffer storage of some of the
* previous samples. Number of rows is
* equal to nfft. */
cmat ps_storage; /**< Here we store the averaged
* results for each Cross-power
* spectra computed so far. */
cmat ps_single; /**< This is the work area for a
* PowerSpectra computation on a
* single block */
PowerSpectra* ps; /**< Pointer to underlying
* PowerSpectra calculator. */
} AvPowerSpectra;
AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
const us nchannels,
const d overlap_percentage,
const WindowType wt) {
fsTRACE(15);
/* Check nfft */
if(nfft % 2 != 0 || nfft > ASCEE_MAX_NFFT) {
WARN("nfft should be even");
return NULL;
}
/* Check overlap percentage */
if(overlap_percentage >= 100) {
WARN("Overlap percentage >= 100!");
return NULL;
}
if(overlap_percentage < 0) {
WARN("Overlap percentage should be positive!");
return NULL;
}
/* Compute and check overlap offset */
us oo = nfft - (us) (overlap_percentage*nfft/100);
if(oo == 0) {oo++;}
PowerSpectra* ps = PowerSpectra_alloc(nfft,nchannels,wt);
if(!ps) {
WARN(ALLOCFAILED "ps");
return NULL;
}
AvPowerSpectra* aps = a_malloc(sizeof(AvPowerSpectra));
if(!aps) {
WARN("Allocation of AvPowerSpectra memory failed");
PowerSpectra_free(ps);
return NULL;
}
aps->nchannels = nchannels;
aps->nfft = nfft;
aps->ps = ps;
aps->naverages = 0;
aps->oo = oo;
aps->os = oo;
/* Allocate vectors and matrices */
aps->buffer = dmat_alloc(nfft,nchannels);
aps->ps_storage = cmat_alloc(nfft/2+1,nchannels*nchannels);
aps->ps_single = cmat_alloc(nfft/2+1,nchannels*nchannels);
cmat_set(&aps->ps_storage,0);
return aps;
}
us AvPowerSpectra_getAverages(const AvPowerSpectra* ps) {
return ps->naverages;
}
/**
* Helper function that adds a block of time data to the APS
*
* @param aps AvPowerSpectra handle
* @param block Time data block. Size should be exactly nfft*nchannels.
*/
static void AvPowerSpectra_addBlock(AvPowerSpectra* aps,
const dmat* block) {
fsTRACE(15);
dbgassert(aps && block,NULLPTRDEREF);
dbgassert(block->n_rows == aps->nfft,"Invalid block n_rows");
dbgassert(block->n_cols == aps->nchannels,"Invalid block n_cols");
cmat ps_single = aps->ps_single;
cmat ps_storage = aps->ps_storage;
c naverages = (++aps->naverages);
/* Scale previous result */
cmat_scale(&ps_storage,
(naverages-1)/naverages);
PowerSpectra_compute(aps->ps,
block,
&ps_single);
/* Add new result, scaled properly */
cmat_add_cmat(&ps_storage,
&ps_single,1/naverages);
feTRACE(15);
}
cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps,
const dmat* timedata) {
fsTRACE(15);
dbgassert(aps && timedata,NULLPTRDEREF);
const us nchannels = aps->nchannels;
const us nfft = aps->nfft;
dbgassert(timedata->n_cols == nchannels,"Invalid time data");
dbgassert(timedata->n_rows >= nfft,"Invalid time data. "
"Should at least have nfft rows");
const us oo = aps->oo;
us* os = &aps->os;
us os_timedata = 0;
dmat buffer = aps->buffer;
/* Retrieve the buffer and use it to make the first time block. */
if(*os < oo) {
dmat tmp = dmat_alloc(nfft,nchannels);
copy_dmat_rows(&tmp,
&buffer,
*os, /* Startrow_from */
0, /* Startrow to */
nfft - *os /* nrows */
);
copy_dmat_rows(&tmp,
timedata,
0,
nfft - *os,
*os
);
AvPowerSpectra_addBlock(aps,&tmp);
os_timedata = oo + *os - nfft;
dmat_free(&tmp);
}
/* Run until we cannot go any further */
while (os_timedata + nfft <= timedata->n_rows) {
dmat tmp = dmat_submat(timedata,
os_timedata,
0,nfft,nchannels);
AvPowerSpectra_addBlock(aps,&tmp);
os_timedata += oo;
dmat_free(&tmp);
}
/* We copy the last piece of samples from the timedata to the
* buffer */
copy_dmat_rows(&buffer,
timedata,
timedata->n_rows-nfft,
0,
nfft);
*os = os_timedata+nfft-timedata->n_rows;
dbgassert(*os < nfft,"BUG");
feTRACE(15);
return &aps->ps_storage;
}
void AvPowerSpectra_free(AvPowerSpectra* aps) {
fsTRACE(15);
PowerSpectra_free(aps->ps);
dmat_free(&aps->buffer);
cmat_free(&aps->ps_storage);
cmat_free(&aps->ps_single);
a_free(aps);
feTRACE(15);
}
//////////////////////////////////////////////////////////////////////

77
beamforming/c/aps.h Normal file
View File

@ -0,0 +1,77 @@
// aps.h
//
// Author: J.A. de Jong - ASCEE
//
// Description:
//
//////////////////////////////////////////////////////////////////////
#pragma once
#ifndef APS_H
#define APS_H
#include "types.h"
#include "ascee_math.h"
#include "window.h"
typedef struct AvPowerSpectra_s AvPowerSpectra;
/**
* Allocates a AvPowerSpectra object which is used to compute an
* averaged power spectra from given input time samples.
*
* @param[in] nfft Size of the fft,
*
* @param[in] nchannels Number of channels of time data to allocate
* for.
*
* @param[in] overlap_percentage. Should be 0<= overlap_pct < 100. The
* overlap percentage will be coerced, as the offset will be an
* integer number of samples at all cases. Therefore, the real overlap
* percentage can be obtained later on
*
* @param wt
*
* @return
*/
AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
const us nchannels,
const d overlap_percentage,
const WindowType wt);
/**
* Computes the real overlap offset
*
* @param aps
*/
d AvPowerSpectra_realoverlappct(const AvPowerSpectra* aps);
/**
* Return the current number of averages taken to obtain the result.
*
* @return The number of averages taken.
*/
us AvPowerSpectra_getAverages(const AvPowerSpectra*);
/**
* Add time data to this Averaged Power Spectra. Remembers the part
* that should be overlapped with the next time data.
*
* @param aps AvPowerSpectra handle
*
* @param timedata Pointer to timedata buffer. Number of rows SHOULD
* *at least* be nfft. Number of columns should exactly be nchannels.
*
* @return pointer to the averaged power spectra buffer that is being
* written to. Pointer is valid until AvPowerSpectra_free() is called.
*/
cmat* AvPowerSpectra_addTimeData(AvPowerSpectra* aps,
const dmat* timedata);
/**
* Free storage of the AvPowerSpectra
*/
void AvPowerSpectra_free(AvPowerSpectra*);
#endif // APS_H
//////////////////////////////////////////////////////////////////////

230
beamforming/c/ascee_alg.c Normal file
View File

@ -0,0 +1,230 @@
// ascee_alg.c
//
// Author: J.A. de Jong -ASCEE
//
// Description:
// (Linear) algebra routine implementations
//////////////////////////////////////////////////////////////////////
#include "ascee_alg.h"
void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
assert(A->n_rows == b->size);
assert(A->n_cols == x->size);
#if ASCEE_USE_BLAS == 1
/* typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */
/* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */
/*
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order,
OPENBLAS_CONST enum CBLAS_TRANSPOSE trans,
OPENBLAS_CONST blasint m,
OPENBLAS_CONST blasint n,
OPENBLAS_CONST double *alpha,
OPENBLAS_CONST double *a,
OPENBLAS_CONST blasint lda,
OPENBLAS_CONST double *x,
OPENBLAS_CONST blasint incx,
OPENBLAS_CONST double *beta,
double *y,
OPENBLAS_CONST blasint incy);
*/
c alpha = 1.0;
c beta = 0.0;
cblas_zgemv(CblasColMajor,
CblasNoTrans,
A->n_rows,
A->n_cols,
(d*) &alpha, /* alpha */
(d*) A->data, /* A */
A->n_rows, /* lda */
(d*) x->data, /* */
1,
(d*) &beta, /* beta */
(d*) b->data,
1);
#else
size_t i,j;
size_t n_rows = A->n_rows;
vc_set(b,0.0);
iVARTRACE(20,A->n_cols);
iVARTRACE(20,A->n_rows);
for(j=0;j<A->n_cols;j++){
for(i=0;i<A->n_rows;i++) {
c* Aij = &A->data[i+j*n_rows];
b->data[i] += *Aij * x->data[j];
}
}
#endif
}
/// The code below here is not yet worked on. Should be improved to
/// directly couple to openblas, instead of using lapacke.h
#if 0
/* These functions can be directly linked to openBLAS */
#define lapack_complex_double double _Complex
#define lapack_complex_float float _Complex
#define LAPACK_ROW_MAJOR 101
#define LAPACK_COL_MAJOR 102
#define LAPACK_WORK_MEMORY_ERROR -1010
#define LAPACK_TRANSPOSE_MEMORY_ERROR -1011
typedef int lapack_int;
int LAPACKE_cgelss( int matrix_layout, int m, int n,
int nrhs, lapack_complex_float* a,
int lda, lapack_complex_float* b,
int ldb, float* s, float rcond,
int* rank );
int LAPACKE_zgelss( int matrix_layout, int m, int n,
int nrhs, lapack_complex_double* a,
int lda, lapack_complex_double* b,
int ldb, double* s, double rcond,
int* rank );
lapack_int LAPACKE_zgels( int matrix_layout, char trans, lapack_int m,
lapack_int n, lapack_int nrhs,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* b, lapack_int ldb );
#if ASCEE_FLOAT == 64
#define lapack_gelss LAPACKE_zgelss
#define lapack_gels LAPACKE_zgels
#else
#define lapack_gelss LAPACKE_cgelss
#endif
#define max(a,b) ((a)>(b)?(a):(b))
/* int lsq_solve(const cmat* A,const vc* b,vc* x){ */
/* POOL_INIT(lsq_solve_pool); */
/* int rv; */
/* /\* M: number of rows of matrix *\/ */
/* /\* N: Number of columns *\/ */
/* /\* Norm: L2|b-A*x| *\/ */
/* /\* NRHS: Number of right hand sides: Number of columns of matrix B *\/ */
/* assert(A->n_rows>=A->n_cols); */
/* assert(x->size == A->n_cols); */
/* assert(b->size == A->n_rows); */
/* int info; */
/* size_t lda = max(1,A->n_rows); */
/* size_t ldb = max(lda,A->n_cols); */
/* /\* Make explicit copy of matrix A data, as it will be overwritten */
/* * by lapack_gels *\/ */
/* c* A_data = Pool_allocatec(&lsq_solve_pool,A->n_rows*A->n_cols); */
/* c_copy(A_data,A->data,A->n_cols*A->n_rows); */
/* c* work_data = Pool_allocatec(&lsq_solve_pool,b->size); */
/* c_copy(work_data,b->data,b->size); */
/* /\* Lapack documentation says: *\/ */
/* /\* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
/* squares solution vectors; the residual sum of squares for the */
/* solution in each column is given by the sum of squares of the */
/* modulus of elements N+1 to M in that column; */
/* *\/ */
/* /\* We always assume one RHS column *\/ */
/* const int nrhs = 1; */
/* /\* General Least Squares Solve *\/ */
/* info = lapack_gels(LAPACK_COL_MAJOR, /\* Column-major ordering *\/ */
/* 'N', */
/* A->n_rows, /\* Number of rows in matrix *\/ */
/* A->n_cols, /\* Number of columns *\/ */
/* nrhs, /\* nrhs, which is number_mics *\/ */
/* A_data, /\* The A-matrix *\/ */
/* lda, /\* lda: the leading dimension of matrix A *\/ */
/* work_data, /\* The b-matrix *\/ */
/* ldb); /\* ldb: the leading dimension of b: max(1,M,N) *\/ */
/* if(info==0){ */
/* c_copy(x->data,work_data,x->size); */
/* rv = SUCCESS; */
/* } */
/* else { */
/* memset(x->data,0,x->size); */
/* WARN("LAPACK INFO VALUE"); */
/* printf("%i\n", info ); */
/* TRACE(15,"Solving least squares problem failed\n"); */
/* rv = FAILURE; */
/* } */
/* Pool_free(&lsq_solve_pool,A_data); */
/* Pool_free(&lsq_solve_pool,work_data); */
/* POOL_EXIT(lsq_solve_pool,15); */
/* return rv; */
/* } */
/* d c_normdiff(const cmat* A,const cmat* B) { */
/* TRACE(15,"c_normdif"); */
/* dbgassert(A->n_cols==B->n_cols,"Number of columns of A and B " */
/* "should be equal"); */
/* dbgassert(A->n_rows==B->n_rows,"Number of rows of A and B " */
/* "should be equal"); */
/* size_t size = A->n_cols*A->n_rows; */
/* vc diff_temp = vc_al[MAX_MATRIX_SIZE]; */
/* c_copy(diff_temp,A->data,size); */
/* c alpha = -1.0; */
/* /\* This routine computes y <- alpha*x + beta*y *\/ */
/* /\* void cblas_zaxpy(OPENBLAS_CONST blasint n, *\/ */
/* /\* OPENBLAS_CONST double *alpha, *\/ */
/* /\* OPENBLAS_CONST double *x, *\/ */
/* /\* OPENBLAS_CONST blasint incx, *\/ */
/* /\* double *y, *\/ */
/* /\* OPENBLAS_CONST blasint incy); *\/ */
/* cblas_zaxpy(size, */
/* (d*) &alpha, */
/* (d*) B->data, */
/* 1, */
/* (d*) diff_temp, */
/* 1 ); */
/* return c_norm(diff_temp,size); */
/* } */
#endif /* if 0 */
//////////////////////////////////////////////////////////////////////

149
beamforming/c/ascee_alg.h Normal file
View File

@ -0,0 +1,149 @@
// ascee_alg.h
//
// Author: J.A. de Jong - ASCEE
//
// Description:
// (Linear) algebra routines on matrices and vectors
//////////////////////////////////////////////////////////////////////
#pragma once
#ifndef ASCEE_ALG_H
#define ASCEE_ALG_H
#include "ascee_math.h"
/**
* Compute the dot product of two vectors of floats
*
* @param a First vector
* @param b Second second vector
* @return dot product as float
*/
static inline d vd_dot(const vd * a,const vd* b) {
dbgassert(a->size == b->size,SIZEINEQUAL);
return d_dot(a->ptr,b->ptr,a->size);
}
/**
* y = fac * y
*
* @param y
* @param fac scale factor
*/
static inline void dmat_scale(dmat* y,const c fac){
dbgassert(y,NULLPTRDEREF);
if(likely(y->data)) {
d_scale(y->data,fac,y->n_cols*y->n_rows);
}
else {
for(us col=0;col<y->n_cols;col++) {
d_scale(y->col_ptrs[col],fac,y->n_rows);
}
}
}
/**
* y = fac * y
*
* @param y
* @param fac scale factor
*/
static inline void cmat_scale(cmat* y,const c fac){
dbgassert(y,NULLPTRDEREF);
dbgassert(y,NULLPTRDEREF);
if(likely(y->data)) {
c_scale(y->data,fac,y->n_cols*y->n_rows);
}
else {
for(us col=0;col<y->n_cols;col++) {
c_scale(y->col_ptrs[col],fac,y->n_rows);
}
}
}
/**
* x = x + fac*y
*
* @param x
* @param y
* @param fac
*/
static inline void dmat_add_dmat(dmat* x,dmat* y,d fac) {
dbgassert(x && y,NULLPTRDEREF);
dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
if(likely(x->data && y->data)) {
d_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
}
else {
for(us col=0;col<y->n_cols;col++) {
d_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
}
}
}
/**
* x = x + fac*y
*
* @param[in,out] x
* @param[in] y
* @param[in] fac
*/
static inline void cmat_add_cmat(cmat* x,cmat* y,c fac) {
dbgassert(x && y,NULLPTRDEREF);
dbgassert(x->n_cols == y->n_cols,SIZEINEQUAL);
dbgassert(x->n_rows == y->n_rows,SIZEINEQUAL);
if(likely(x->data && y->data)) {
c_add_to(x->data,y->data,fac,x->n_cols*x->n_rows);
}
else {
for(us col=0;col<y->n_cols;col++) {
c_add_to(x->col_ptrs[col],y->col_ptrs[col],fac,x->n_rows);
}
}
}
/**
* Compute the element-wise (Hadamard) product of a and b, and store
* in result
*
* @param[out] result
* @param[in] a
* @param[in] b
*/
static inline void vc_elem_prod(vc* result,vc* a,vc* b) {
dbgassert(result && a && b,NULLPTRDEREF);
dbgassert(result->size==a->size,SIZEINEQUAL);
dbgassert(b->size==a->size,SIZEINEQUAL);
c_elem_prod_c(result->ptr,a->ptr,b->ptr,a->size);
}
/**
* Compute the matrix vector product for complex-valued types: b = A*x.
*
* @param[in] A Matrix A
* @param[in] x Vector x
* @param[out] b Result of computation
*/
void cmv_dot(const cmat* A,
const vc* restrict x,
vc* restrict b);
int lsq_solve(const cmat* A,
const vc* restrict b,
vc* restrict x);
/**
* Compute the norm of the difference of two complex matrices
*
* @param A
* @param B
*/
d cmat_normdiff(const cmat* A,const cmat* B);
/**
* Computes the Kronecker product of a kron b, stores result in result.
*
* @param a a
* @param b b
* @param result a kron b
*/
void kronecker_product(const cmat* a,const cmat* b,cmat* result);
#endif // ASCEE_ALG_H
//////////////////////////////////////////////////////////////////////

View File

@ -1,33 +1,44 @@
// si_math.c
// ascee_math.c
//
// last-edit-by: J.A. de Jong
// Author: J.A. de Jong -ASCEE
//
// Description:
//
//
//////////////////////////////////////////////////////////////////////
#define TRACERPLUS (-10)
#include "ascee_math.h"
#include "ascee_assert.h"
#include "ascee_math.h"
#include "tracer.h"
#include "ascee_tracer.h"
#if ASCEE_USE_BLAS
#include <cblas.h>
#endif
#include <math.h>
#ifdef ASCEE_DEBUG
void print_dmat(const dmat* m) {
size_t row,col;
for(row=0;row<m->n_rows;row++){
for(col=0;col<m->n_cols;col++){
d val = *getdmatval(m,row,col);
printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val));
}
printf("\n");
}
}
void print_cmat(const cmat* m) {
size_t row,col;
for(row=0;row<m->n_rows;row++){
for(col=0;col<m->n_cols;col++){
c val = m->data[row+m->n_rows*col];
c val = *getcmatval(m,row,col);
d rval = creal(val);
d ival = cimag(val);
printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ', d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ;
printf("%c%2.2e%c%2.2ei ",rval< 0 ?'-': ' ',
d_abs(rval),ival<0 ? '-' : '+',d_abs(ival) ) ;
}
printf("\n");
@ -60,394 +71,7 @@ void print_vd(const vd* m) {
printf("\n");
}
}
void print_dmat(const dmat* m) {
size_t row,col;
for(row=0;row<m->n_rows;row++){
for(col=0;col<m->n_cols;col++){
d val = m->data[row+m->n_rows*col];
printf("%c%2.2e ", val<0?'-':' ' ,d_abs(val));
}
printf("\n");
}
}
#endif
void d_elem_prod_d(d res[],
const d arr1[],
const d arr2[],
const us size) {
#if ASCEE_USE_BLAS
#if ASCEE_DEBUG
if(arr1 == arr2) {
DBGWARN("d_elem_prod_d: Array 1 and array 2 point to the same"
" memory. This results in pointer aliasing, for which"
" testing is still to be done. Results might be"
" unrealiable.");
}
#endif
#if ASCEE_DOUBLE_PRECISION
#define elem_prod_fun cblas_dsbmv
#else
#define elem_prod_fun cblas_ssbmv
#endif
/* These parameters do not matter for this specific case */
const CBLAS_ORDER mat_order= CblasColMajor;
const CBLAS_UPLO uplo = CblasLower;
/* Extra multiplication factor */
const d alpha = 1.0;
/* void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, */
/* OPENBLAS_CONST enum CBLAS_UPLO Uplo, */
/* OPENBLAS_CONST blasint N, */
/* OPENBLAS_CONST blasint K, */
/* OPENBLAS_CONST double alpha, */
/* OPENBLAS_CONST double *A, */
/* OPENBLAS_CONST blasint lda, */
/* OPENBLAS_CONST double *X, */
/* OPENBLAS_CONST blasint incX, */
/* OPENBLAS_CONST double beta, */
/* double *Y, */
/* OPENBLAS_CONST blasint incY); */
elem_prod_fun(mat_order,
uplo,
(blasint) size,
0, // Just the diagonal; 0 super-diagonal bands
alpha, /* Multiplication factor alpha */
arr1,
1, /* LDA */
arr2, /* x */
1, /* incX = 1 */
0.0, /* Beta */
res, /* The Y matrix to write to */
1); /* incY */
#undef elem_prod_fun
#else /* No blas routines, routine is very simple, but here we
* go! */
DBGWARN("Performing slow non-blas vector-vector multiplication");
for(us i=0;i<size;i++) {
res[i] = arr1[i]*arr2[i];
}
#endif
}
void c_elem_prod_c(c res[],
const c arr1[],
const c arr2[],
const us size) {
TRACE(15,"c_elem_prod_c");
uVARTRACE(15,size);
#if ASCEE_USE_BLAS
#if ASCEE_DEBUG
if(arr1 == arr2) {
DBGWARN("c_elem_prod_c: Array 1 and array 2 point to the same"
" memory. This results in pointer aliasing, for which"
" testing is still to be done. Results might be"
" unrealiable.");
}
#endif /* ASCEE_DEBUG */
#if ASCEE_DOUBLE_PRECISION
#define elem_prod_fun cblas_zgbmv
#else
#define elem_prod_fun cblas_cgbmv
#endif
/* These parameters do not matter for this specific case */
const CBLAS_ORDER mat_order= CblasColMajor;
const CBLAS_TRANSPOSE tr = CblasNoTrans;
const c alpha = 1.0;
const c beta = 0.0;
TRACE(15,"Calling " annestr(elem_prod_fun));
elem_prod_fun(mat_order,
tr,
(blasint) size, /* M: Number of rows */
(blasint) size, /* B: Number of columns */
0, /* KL: Number of sub-diagonals */
0, /* KU: Number of super-diagonals */
(d*) &alpha, /* Multiplication factor */
(d*) arr2, /* A */
1, /* LDA */
(d*) arr1, /* x */
1, /* incX = 1 */
(d*) &beta,
(d*) res, /* The Y matrix to write to */
1); /* incY */
#undef elem_prod_fun
#else /* No blas routines, routine is very simple, but here we
* go! */
DBGWARN("Performing slow non-blas vector-vector multiplication");
for(us i=0;i<size;i++) {
res[i] = arr1[i]*arr2[i];
}
#endif
}
void cmv_dot(const cmat* A,const vc* restrict x,vc* restrict b){
assert(A->n_rows == b->size);
assert(A->n_cols == x->size);
#if ASCEE_USE_BLAS == 1
/* typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; */
/* typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; */
/*
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order,
OPENBLAS_CONST enum CBLAS_TRANSPOSE trans,
OPENBLAS_CONST blasint m,
OPENBLAS_CONST blasint n,
OPENBLAS_CONST double *alpha,
OPENBLAS_CONST double *a,
OPENBLAS_CONST blasint lda,
OPENBLAS_CONST double *x,
OPENBLAS_CONST blasint incx,
OPENBLAS_CONST double *beta,
double *y,
OPENBLAS_CONST blasint incy);
*/
c alpha = 1.0;
c beta = 0.0;
cblas_zgemv(CblasColMajor,
CblasNoTrans,
A->n_rows,
A->n_cols,
(d*) &alpha, /* alpha */
(d*) A->data, /* A */
A->n_rows, /* lda */
(d*) x->data, /* */
1,
(d*) &beta, /* beta */
(d*) b->data,
1);
#else
size_t i,j;
size_t n_rows = A->n_rows;
vc_set(b,0.0);
iVARTRACE(20,A->n_cols);
iVARTRACE(20,A->n_rows);
for(j=0;j<A->n_cols;j++){
for(i=0;i<A->n_rows;i++) {
c* Aij = &A->data[i+j*n_rows];
b->data[i] += *Aij * x->data[j];
}
}
#endif
}
void kronecker_product(const cmat* a,const cmat* b,cmat* result){
assert(result->n_rows == a->n_rows*b->n_rows);
assert(result->n_cols == a->n_cols*b->n_cols);
c a_rs;
c b_vw;
int r_col;
int r_row;
for(size_t r=0; r< a->n_rows;r++){
for(size_t s=0; s <a->n_cols;s++) {
for(size_t v=0;v < b->n_rows; v++) {
for(size_t w=0;w < b->n_cols;w++) {
a_rs = *getcmatval(a,r,s);
b_vw = *getcmatval(b,v,w);
r_row = b->n_rows*r+v;
r_col = b->n_cols*s+w;
result->data[r_row + r_col * result->n_rows] = a_rs * b_vw;
}
}
}
}
} /* void kronecker_product */
/* #include <lapacke.h> */
/* These functions can be directly linked to openBLAS */
#define lapack_complex_double double _Complex
#define lapack_complex_float float _Complex
#define LAPACK_ROW_MAJOR 101
#define LAPACK_COL_MAJOR 102
#define LAPACK_WORK_MEMORY_ERROR -1010
#define LAPACK_TRANSPOSE_MEMORY_ERROR -1011
typedef int lapack_int;
int LAPACKE_cgelss( int matrix_layout, int m, int n,
int nrhs, lapack_complex_float* a,
int lda, lapack_complex_float* b,
int ldb, float* s, float rcond,
int* rank );
int LAPACKE_zgelss( int matrix_layout, int m, int n,
int nrhs, lapack_complex_double* a,
int lda, lapack_complex_double* b,
int ldb, double* s, double rcond,
int* rank );
lapack_int LAPACKE_zgels( int matrix_layout, char trans, lapack_int m,
lapack_int n, lapack_int nrhs,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* b, lapack_int ldb );
#if ASCEE_FLOAT == 64
#define lapack_gelss LAPACKE_zgelss
#define lapack_gels LAPACKE_zgels
#else
#define lapack_gelss LAPACKE_cgelss
#endif
#define max(a,b) ((a)>(b)?(a):(b))
/* int lsq_solve(const cmat* A,const vc* b,vc* x){ */
/* POOL_INIT(lsq_solve_pool); */
/* int rv; */
/* /\* M: number of rows of matrix *\/ */
/* /\* N: Number of columns *\/ */
/* /\* Norm: L2|b-A*x| *\/ */
/* /\* NRHS: Number of right hand sides: Number of columns of matrix B *\/ */
/* assert(A->n_rows>=A->n_cols); */
/* assert(x->size == A->n_cols); */
/* assert(b->size == A->n_rows); */
/* int info; */
/* size_t lda = max(1,A->n_rows); */
/* size_t ldb = max(lda,A->n_cols); */
/* /\* Make explicit copy of matrix A data, as it will be overwritten */
/* * by lapack_gels *\/ */
/* c* A_data = Pool_allocatec(&lsq_solve_pool,A->n_rows*A->n_cols); */
/* c_copy(A_data,A->data,A->n_cols*A->n_rows); */
/* c* work_data = Pool_allocatec(&lsq_solve_pool,b->size); */
/* c_copy(work_data,b->data,b->size); */
/* /\* Lapack documentation says: *\/ */
/* /\* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
/* squares solution vectors; the residual sum of squares for the */
/* solution in each column is given by the sum of squares of the */
/* modulus of elements N+1 to M in that column; */
/* *\/ */
/* /\* We always assume one RHS column *\/ */
/* const int nrhs = 1; */
/* /\* General Least Squares Solve *\/ */
/* info = lapack_gels(LAPACK_COL_MAJOR, /\* Column-major ordering *\/ */
/* 'N', */
/* A->n_rows, /\* Number of rows in matrix *\/ */
/* A->n_cols, /\* Number of columns *\/ */
/* nrhs, /\* nrhs, which is number_mics *\/ */
/* A_data, /\* The A-matrix *\/ */
/* lda, /\* lda: the leading dimension of matrix A *\/ */
/* work_data, /\* The b-matrix *\/ */
/* ldb); /\* ldb: the leading dimension of b: max(1,M,N) *\/ */
/* if(info==0){ */
/* c_copy(x->data,work_data,x->size); */
/* rv = SUCCESS; */
/* } */
/* else { */
/* memset(x->data,0,x->size); */
/* WARN("LAPACK INFO VALUE"); */
/* printf("%i\n", info ); */
/* TRACE(15,"Solving least squares problem failed\n"); */
/* rv = FAILURE; */
/* } */
/* Pool_free(&lsq_solve_pool,A_data); */
/* Pool_free(&lsq_solve_pool,work_data); */
/* POOL_EXIT(lsq_solve_pool,15); */
/* return rv; */
/* } */
/* d c_normdiff(const cmat* A,const cmat* B) { */
/* TRACE(15,"c_normdif"); */
/* dbgassert(A->n_cols==B->n_cols,"Number of columns of A and B " */
/* "should be equal"); */
/* dbgassert(A->n_rows==B->n_rows,"Number of rows of A and B " */
/* "should be equal"); */
/* size_t size = A->n_cols*A->n_rows; */
/* vc diff_temp = vc_al[MAX_MATRIX_SIZE]; */
/* c_copy(diff_temp,A->data,size); */
/* c alpha = -1.0; */
/* /\* This routine computes y <- alpha*x + beta*y *\/ */
/* /\* void cblas_zaxpy(OPENBLAS_CONST blasint n, *\/ */
/* /\* OPENBLAS_CONST double *alpha, *\/ */
/* /\* OPENBLAS_CONST double *x, *\/ */
/* /\* OPENBLAS_CONST blasint incx, *\/ */
/* /\* double *y, *\/ */
/* /\* OPENBLAS_CONST blasint incy); *\/ */
/* cblas_zaxpy(size, */
/* (d*) &alpha, */
/* (d*) B->data, */
/* 1, */
/* (d*) diff_temp, */
/* 1 ); */
/* return c_norm(diff_temp,size); */
/* } */
//////////////////////////////////////////////////////////////////////

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
// si_math.c
//
// last-edit-by: J.A. de Jong
//
// Description:
// Operations working on raw arrays of floating point numbers
//////////////////////////////////////////////////////////////////////
#include "ascee_math_raw.h"
#if ASCEE_USE_BLAS
#include <cblas.h>
#endif
void d_elem_prod_d(d res[],
const d arr1[],
const d arr2[],
const us size) {
#if ASCEE_USE_BLAS
#if ASCEE_DEBUG
if(arr1 == arr2) {
DBGWARN("d_elem_prod_d: Array 1 and array 2 point to the same"
" memory. This results in pointer aliasing, for which"
" testing is still to be done. Results might be"
" unrealiable.");
}
#endif
#if ASCEE_DOUBLE_PRECISION
#define elem_prod_fun cblas_dsbmv
#else
#define elem_prod_fun cblas_ssbmv
#endif
/* These parameters do not matter for this specific case */
const CBLAS_ORDER mat_order= CblasColMajor;
const CBLAS_UPLO uplo = CblasLower;
/* Extra multiplication factor */
const d alpha = 1.0;
/* void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, */
/* OPENBLAS_CONST enum CBLAS_UPLO Uplo, */
/* OPENBLAS_CONST blasint N, */
/* OPENBLAS_CONST blasint K, */
/* OPENBLAS_CONST double alpha, */
/* OPENBLAS_CONST double *A, */
/* OPENBLAS_CONST blasint lda, */
/* OPENBLAS_CONST double *X, */
/* OPENBLAS_CONST blasint incX, */
/* OPENBLAS_CONST double beta, */
/* double *Y, */
/* OPENBLAS_CONST blasint incY); */
elem_prod_fun(mat_order,
uplo,
(blasint) size,
0, // Just the diagonal; 0 super-diagonal bands
alpha, /* Multiplication factor alpha */
arr1,
1, /* LDA */
arr2, /* x */
1, /* incX = 1 */
0.0, /* Beta */
res, /* The Y matrix to write to */
1); /* incY */
#undef elem_prod_fun
#else /* No blas routines, routine is very simple, but here we
* go! */
DBGWARN("Performing slow non-blas vector-vector multiplication");
for(us i=0;i<size;i++) {
res[i] = arr1[i]*arr2[i];
}
#endif
}
void c_elem_prod_c(c res[],
const c arr1[],
const c arr2[],
const us size) {
TRACE(15,"c_elem_prod_c");
uVARTRACE(15,size);
#if ASCEE_USE_BLAS
#if ASCEE_DEBUG
if(arr1 == arr2) {
DBGWARN("c_elem_prod_c: Array 1 and array 2 point to the same"
" memory. This results in pointer aliasing, for which"
" testing is still to be done. Results might be"
" unrealiable.");
}
#endif /* ASCEE_DEBUG */
#if ASCEE_DOUBLE_PRECISION
#define elem_prod_fun cblas_zgbmv
#else
#define elem_prod_fun cblas_cgbmv
#endif
/* These parameters do not matter for this specific case */
const CBLAS_ORDER mat_order= CblasColMajor;
const CBLAS_TRANSPOSE tr = CblasNoTrans;
const c alpha = 1.0;
const c beta = 0.0;
TRACE(15,"Calling " annestr(elem_prod_fun));
elem_prod_fun(mat_order,
tr,
(blasint) size, /* M: Number of rows */
(blasint) size, /* B: Number of columns */
0, /* KL: Number of sub-diagonals */
0, /* KU: Number of super-diagonals */
(d*) &alpha, /* Multiplication factor */
(d*) arr2, /* A */
1, /* LDA */
(d*) arr1, /* x */
1, /* incX = 1 */
(d*) &beta,
(d*) res, /* The Y matrix to write to */
1); /* incY */
#undef elem_prod_fun
#else /* No blas routines, routine is very simple, but here we
* go! */
DBGWARN("Performing slow non-blas vector-vector multiplication");
for(us i=0;i<size;i++) {
res[i] = arr1[i]*arr2[i];
}
#endif
}
//////////////////////////////////////////////////////////////////////

View File

@ -0,0 +1,469 @@
// ascee_math_raw.h
//
// Author: J.A. de Jong - ASCEE
//
// Description: Raw math routines working on raw arrays of floats and
// complex numbers.
//////////////////////////////////////////////////////////////////////
#pragma once
#ifndef ASCEE_MATH_RAW_H
#define ASCEE_MATH_RAW_H
#include "ascee_assert.h"
#include "ascee_tracer.h"
#include "types.h"
#include <math.h>
#if ASCEE_USE_BLAS == 1
#include <cblas.h>
#endif
#ifdef ASCEE_DOUBLE_PRECISION
#define c_real creal
#define c_imag cimag
#define d_abs fabs
#define c_abs cabs
#define c_conj conj
#define d_atan2 atan2
#define d_acos acos
#define d_sqrt sqrt
#define c_exp cexp
#define d_sin sin
#define d_cos cos
#define d_pow pow
#else // ASCEE_DOUBLE_PRECISION not defined
#define c_conj conjf
#define c_real crealf
#define c_imag cimagf
#define d_abs fabsf
#define c_abs cabsf
#define d_atan2 atan2f
#define d_acos acosf
#define d_sqrt sqrtf
#define c_exp cexpf
#define d_sin sinf
#define d_cos cosf
#define d_pow powf
#endif // ASCEE_DOUBLE_PRECISION
#ifdef M_PI
static const d number_pi = M_PI;
#else
static const d number_pi = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679;
#endif
/**
* Set all elements in an array equal to val
*
* @param to
* @param val
* @param size
*/
static inline void d_set(d to[],d val,us size) {
for(us i=0;i<size;i++) {
to[i]=val;
}
}
/**
* Set all elements in an array equal to val
*
* @param to
* @param val
* @param size
*/
static inline void c_set(c to[],c val,us size) {
for(us i=0;i<size;i++) {
to[i]=val;
}
}
/**
* Return the maximum of two doubles
*
* @param a value 1
* @param b value 2
*
* @returns the maximum of value 1 and 2
*/
static inline d d_max(const d a,const d b) {
return a>b?a:b;
}
/**
* Return the dot product of two arrays, one of them complex-valued,
* the other real-valued
*
* @param a the complex-valued array
* @param b the real-valued array
* @param size the size of the arrays. *Should be equal-sized!*
*
* @return the dot product
*/
static inline c cd_dot(const c a[],const d b[],us size){
c result = 0;
us i;
for(i=0;i<size;i++){
result+=a[i]*b[i];
}
return result;
}
/**
* Return the dot product of two complex-valued arrays. Wraps BLAS
* when ASCEE_USE_BLAS == 1.
*
* @param a complex-valued array
* @param b complex-valued array
* @param size the size of the arrays. *Should be equal-sized!*
*
* @return the dot product
*/
static inline c cc_dot(const c a[],const c b[],us size){
#if ASCEE_USE_BLAS == 1
WARN("CBlas zdotu not yet tested");
#if ASCEE_DOUBLE_PRECISION
// assert(0);
return cblas_zdotu(size,(d*) a,1,(d*) b,1);
#else
return cblas_cdotu(size,(d*) a,1,(d*) b,1);
#endif
#else
c result = 0;
us i;
for(i=0;i<size;i++){
result+=a[i]*b[i];
}
return result;
#endif
}
/**
* Compute the dot product of two real arrays.
*
* @param a First array.
* @param b Second array.
* @param size Size of the arrays.
* @return The result.
*/
static inline d d_dot(const d a[],const d b[],const us size){
#if ASCEE_USE_BLAS == 1
#if ASCEE_DOUBLE_PRECISION
return cblas_ddot(size,a,1,b,1);
#else // Single precision function
return cblas_sdot(size,a,1,b,1);
#endif
#else // No BLAS, do it manually
d result = 0;
us i;
for(i=0;i<size;i++){
result+=a[i]*b[i];
}
return result;
#endif
}
/**
* Copy array of floats.
*
* @param to : Array to write to
* @param from : Array to read from
* @param size : Size of arrays
*/
static inline void d_copy(d to[],const d from[],const us size){
#if ASCEE_USE_BLAS == 1
cblas_dcopy(size,from,1,to,1);
#else
us i;
for(i=0;i<size;i++)
to[i] = from[i];
#endif
}
/**
* Copy array of floats to array of complex floats. Imaginary part set
* to zero.
*
* @param to : Array to write to
* @param from : Array to read from
* @param size : Size of arrays
*/
static inline void cd_copy(c to[],const d from[],const us size) {
us i;
for(i=0;i<size;i++) {
to[i] = (c) (from[i]);
dbgassert(cimag(to[i]) == 0,"Imaginary part not equal to zero");
}
}
/**
* Copy float vector to complex vector. Imaginary part set
* to zero.
*
* @param to : Vector to write to
* @param from : Vector to read from
*/
static inline void c_copy(c to[],const c from[],us size){
#if ASCEE_USE_BLAS == 1
#if ASCEE_DOUBLE_PRECISION
cblas_zcopy(size,(d*) from,1,(d*) to,1);
#else
cblas_ccopy(size,(d*) from,1,(d*) to,1);
#endif
#else
us i;
for(i=0;i<size;i++)
to[i] = from[i];
#endif
}
/**
* Multiply y with fac, and add result to x
*
* @param x Array to add to
* @param y Array to add to x
* @param fac Factor with which to multiply y
* @param size Size of the arrays
*/
static inline void d_add_to(d x[],const d y[],d fac,us size){
#if ASCEE_USE_BLAS == 1
#if ASCEE_DOUBLE_PRECISION
cblas_daxpy(size,fac,y,1,x,1);
#else
cblas_saxpy(size,fac,y,1,x,1);
#endif
#else
us i;
for(i=0;i<size;i++)
x[i]+=fac*y[i];
#endif
}
/**
* x = x + fac*y
*
* @param x Array to add to
* @param y Array to add to x
* @param fac Factor with which to multiply y
* @param size Size of the arrays
*/
static inline void c_add_to(c x[],const c y[],c fac,us size){
#if ASCEE_USE_BLAS == 1
#if ASCEE_DOUBLE_PRECISION
cblas_zaxpy(size,(d*) &fac,(d*) y,1,(d*) x,1);
#else
cblas_caxpy(size,(d*) &fac,(d*) y,1,(d*) x,1);
#endif
#else
us i;
for(i=0;i<size;i++)
x[i]+=fac*y[i];
#endif
}
/**
* Scale an array of doubles
*
* @param a array
* @param scale_fac scale factor
* @param size size of the array
*/
static inline void d_scale(d a[],const d scale_fac,us size){
#if ASCEE_USE_BLAS == 1
#if ASCEE_DOUBLE_PRECISION
cblas_dscal(size,scale_fac,a,1);
#else
cblas_sscal(size,scale_fac,a,1);
#endif
#else
us i;
for(i=0;i<size;i++)
a[i]*=scale_fac;
#endif
}
/**
* Scale an array of complex floats
*
* @param a array
* @param scale_fac scale factor
* @param size size of the array
*/
static inline void c_scale(c a[],const c scale_fac,us size){
#if ASCEE_USE_BLAS == 1
// Complex argument should be given in as array of two double
// values. The first the real part, the second the imaginary
// part. Fortunately the (c) type stores the two values in this
// order. To be portable and absolutely sure anything goes well,
// we convert it explicitly here.
d scale_fac_d [] = {creal(scale_fac),cimag(scale_fac)};
#if ASCEE_DOUBLE_PRECISION
cblas_zscal(size,scale_fac_d,(d*) a,1);
#else
cblas_cscal(size,scale_fac_d,(d*) a,1);
#endif
#else
us i;
for(i=0;i<size;i++)
a[i]*=scale_fac;
#endif
}
/**
* Compute the maximum value of an array
*
* @param a array
* @param size size of the array
* @return maximum
*/
static inline d darray_max(const d a[],us size){
us i;
d max = a[0];
for(i=1;i<size;i++){
if(a[i] > max) max=a[i];
}
return max;
}
/**
* Compute the minimum of an array
*
* @param a array
* @param size size of the array
* @return minimum
*/
static inline d d_min(const d a[],us size){
us i;
d min = a[0];
for(i=1;i<size;i++){
if(a[i] > min) min=a[i];
}
return min;
}
/**
* Compute the \f$ L_2 \f$ norm of an array of doubles
*
* @param a Array
* @param size Size of array
*/
static inline d d_norm(const d a[],us size){
#if ASCEE_USE_BLAS == 1
return cblas_dnrm2(size,a,1);
#else
d norm = 0;
us i;
for(i=0;i<size;i++){
norm+=a[i]*a[i];
}
norm = d_sqrt(norm);
return norm;
#endif
}
/**
* Compute the \f$ L_2 \f$ norm of an array of complex floats
*
* @param a Array
* @param size Size of array
*/
static inline d c_norm(const c a[],us size){
#if ASCEE_USE_BLAS == 1
return cblas_dznrm2(size,(d*) a,1);
#else
d norm = 0;
us i;
for(i=0;i<size;i++){
d absa = c_abs(a[i]);
norm+=absa*absa;
}
norm = d_sqrt(norm);
return norm;
#endif
}
/**
* Computes the element-wise vector product, or Hadamard product of
* arr1 and arr2
*
* @param res Where the result will be stored
* @param arr1 Array 1
* @param vec2 Array 2
* @param size: Size of the arrays
*/
void d_elem_prod_d(d res[],
const d arr1[],
const d arr2[],
const us size);
/**
* Computes the element-wise vector product, or Hadamard product of
* arr1 and arr2 for complex floats
*
* @param res Where the result will be stored
* @param arr1 Array 1
* @param vec2 Array 2
* @param size: Size of the arrays
*/
void c_elem_prod_c(c res[],
const c arr1[],
const c arr2[],
const us size);
/**
* Compute the complex conjugate of a complex vector and store the
* result.
*
* @param res Result vector
* @param in Input vector
* @param size Size of the vector
*/
static inline void c_conj_c(c res[],const c in[],const us size) {
// First set the result vector to zero
c_set(res,0,size);
#ifdef ASCEE_USE_BLAS
#if ASCEE_DOUBLE_PRECISION
// Cast as a float, scale all odd elements with minus one to find
// the complex conjugate.
cblas_daxpy(size ,1,(d*) in,2,(d*) res,2);
cblas_daxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
#else
cblas_faxpy(size ,1,(d*) in,2,(d*) res,2);
cblas_faxpy(size,-1,&((d*) in)[1],2,&((d*) res)[1],2);
#endif // ASCEE_DOUBLE_PRECISION
#else
for(us i=0;i<size;i++) {
res[i] = c_conj(in[i]);
}
#endif // ASCEE_USE_BLAS
}
/**
* In place complex conjugation
*
* @param res Result vector
* @param size Size of the vector
*/
static inline void c_conj_inplace(c res[],us size) {
#ifdef ASCEE_USE_BLAS
#if ASCEE_DOUBLE_PRECISION
// Cast as a float, scale all odd elements with minus one to find
// the complex conjugate.
cblas_dscal(size,-1,&((d*) res)[1],2);
#else
cblas_sscal(size,-1,&((d*) res)[1],2);
#endif // ASCEE_DOUBLE_PRECISION
#else
for(us i=0;i<size;i++) {
res[i] = c_conj(res[i]);
}
#endif // ASCEE_USE_BLAS
}
#endif // ASCEE_MATH_RAW_H
//////////////////////////////////////////////////////////////////////

View File

@ -7,7 +7,7 @@
//////////////////////////////////////////////////////////////////////
#if TRACER == 1
#include <stdio.h>
#include "tracer.h"
#include "ascee_tracer.h"
#include "types.h"
#ifdef _REENTRANT

View File

@ -1,4 +1,4 @@
// tracer.h
// ascee_tracer.h
//
// Author: J.A. de Jong - ASCEE
//
@ -6,8 +6,8 @@
// Basic tracing code for debugging.
//////////////////////////////////////////////////////////////////////
#pragma once
#ifndef TRACER_H
#define TRACER_H
#ifndef ASCEE_TRACER_H
#define ASCEE_TRACER_H
// Some console colors
#define RESET "\033[0m"
@ -214,6 +214,5 @@ void uvartrace_impl(const char* pos,int line,const char* varname,size_t var);
#endif // ######################################## TRACER ==1
#endif // TRACER_H
#endif // ASCEE_TRACER_H
//////////////////////////////////////////////////////////////////////

View File

@ -6,30 +6,26 @@
//
//////////////////////////////////////////////////////////////////////
#define TRACERPLUS (-5)
#include "tracer.h"
#include "ascee_tracer.h"
#include "fft.h"
#include "types.h"
/* #include "kiss_fftr.h" */
#include "fftpack.h"
typedef struct Fft_s {
us nfft;
us nchannels;
Fftr* fftr;
} Fft;
Fft* Fft_alloc(const us nfft,const us nchannels) {
fsTRACE(15);
#ifdef ASCEE_DEBUG
if(nchannels > ASCEE_MAX_NUM_CHANNELS) {
WARN("Too high number of channels! Please increase the "
"ASCEE_MAX_NUM_CHANNELS compilation flag");
return NULL;
}
Fft* fft = a_malloc(sizeof(Fft));
if(fft==NULL) {
WARN("Fft allocation failed");
@ -41,10 +37,10 @@ Fft* Fft_alloc(const us nfft,const us nchannels) {
fft->fftr = Fftr_alloc(nfft);
if(!fft->fftr) {
a_free(fft);
WARN(ALLOCFAILED "fftr");
return NULL;
}
#endif // ASCEE_PARALLEL
feTRACE(15);
return fft;
@ -57,10 +53,9 @@ void Fft_free(Fft* fft) {
}
us Fft_nchannels(const Fft* fft) {return fft->nchannels;}
us Fft_nfft(const Fft* fft) {return fft->nfft;}
void Fft_fft(const Fft* fft,const dmat* timedata,cmat* result) {
fsTRACE(15);
dbgassert(timedata && result,NULLPTRDEREF);
dbgassert(timedata->n_rows == fft->nfft,"Invalid size for time data rows."
" Should be equal to nfft");
dbgassert(timedata->n_cols == fft->nchannels,"Invalid size for time data cols."

View File

@ -8,7 +8,7 @@
//////////////////////////////////////////////////////////////////////
#define TRACERPLUS (-6)
#include "types.h"
#include "tracer.h"
#include "ascee_tracer.h"
#include "ascee_assert.h"
#include "ascee_alloc.h"
#include "mq.h"
@ -257,8 +257,6 @@ void* JobQueue_assign(JobQueue* jq) {
pid_t tid = syscall(SYS_gettid);
iVARTRACE(16,tid);
#endif // __linux__
#endif // ASCEE_DEBUG

View File

@ -5,11 +5,11 @@
// Description:
//
//////////////////////////////////////////////////////////////////////
#define TRACERPLUS 1000
/* #define TRACERPLUS 1000 */
#include "ps.h"
#include "fft.h"
#include "ascee_alloc.h"
#include "ascee_math.h"
#include "ascee_alg.h"
#include "ascee_assert.h"
typedef struct PowerSpectra_s {
@ -27,7 +27,7 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
const us nchannels,
const WindowType wt) {
TRACE(15,"PowerSpectra_alloc");
fsTRACE(15);
int rv;
/* Check nfft */
@ -61,11 +61,12 @@ PowerSpectra* PowerSpectra_alloc(const us nfft,
if(rv!=0) {
WARN("Error creating window function, continuing anyway");
}
feTRACE(15);
return ps;
}
void PowerSpectra_free(PowerSpectra* ps) {
TRACE(15,"PowerSpectra_free");
fsTRACE(15);
Fft_free(ps->fft);
vd_free(&ps->window);
@ -73,15 +74,19 @@ void PowerSpectra_free(PowerSpectra* ps) {
dmat_free(&ps->timedata_work);
vc_free(&ps->j_vec_conj);
a_free(ps);
feTRACE(15);
}
int PowerSpectra_compute(const PowerSpectra* ps,
void PowerSpectra_compute(const PowerSpectra* ps,
const dmat * timedata,
cmat * result) {
TRACE(15,"PowerSpectra_compute");
fsTRACE(15);
dbgassert(ps && timedata && result,NULLPTRDEREF);
const us nchannels = Fft_nchannels(ps->fft);
const us nfft = Fft_nfft(ps->fft);
uVARTRACE(15,nchannels);
@ -125,11 +130,13 @@ int PowerSpectra_compute(const PowerSpectra* ps,
&timedata_work,
&fft_work);
TRACE(15,"fft done");
/* Scale fft such that power is easily comxputed */
const c scale_fac = d_sqrt(2/win_pow)/nfft;
c_scale(fft_work.data,scale_fac,(nfft/2+1)*nchannels);
cmat_scale(&fft_work,scale_fac);
TRACE(15,"scale done");
for(i=0;i< nchannels;i++) {
/* Multiply DC term with 1/sqrt(2) */
*getcmatval(&fft_work,0,i) *= 1/d_sqrt(2.)+0*I;
@ -139,166 +146,34 @@ int PowerSpectra_compute(const PowerSpectra* ps,
}
/* print_cmat(&fft_work); */
TRACE(15,"Nyquist and DC correction done");
c* j_vec_conj = ps->j_vec_conj.data;
vc j_vec_conj = ps->j_vec_conj;
/* Compute Cross-power spectra and store result */
for(i =0; i<nchannels;i++) {
for(j=0;j<nchannels;j++) {
iVARTRACE(15,i);
iVARTRACE(15,j);
/* The indices here are important. This is also how it
* is documented */
c* res = getcmatval(result,0,i+j*nchannels);
c* i_vec = getcmatval(&fft_work,0,i);
c* j_vec = getcmatval(&fft_work,0,j);
vc res = cmat_column(result,i+j*nchannels);
TRACE(15,"SFSG");
vc i_vec = cmat_column(&fft_work,i);
vc j_vec = cmat_column(&fft_work,j);
TRACE(15,"SFSG");
/* Compute the conjugate of spectra j */
c_conj_c(j_vec_conj,j_vec,nfft/2+1);
/* Compute the product of the two vectors and store the
* result as the result */
c_elem_prod_c(res,i_vec,j_vec_conj,nfft/2+1);
vc_conj_vc(&j_vec_conj,&j_vec);
TRACE(15,"SFSG");
/* Compute the element-wise product of the two vectors and
* store the result as the result */
vc_elem_prod(&res,&i_vec,&j_vec_conj);
TRACE(15,"SFSG");
}
}
return SUCCESS;
feTRACE(15);
}
/* typedef struct AvPowerSpectra_s { */
/* us overlap_offset; */
/* us naverages; /\* The number of averages taken *\/ */
/* dmat prev_timedata; /\**< Storage for previous */
/* * timedata buffer *\/ */
/* vc* ps; /\**< Here we store the averaged */
/* * results for each Cross-power */
/* * spectra. These are */
/* * nchannels*nchannels vectors *\/ */
/* vc* ps_work; /\**< Work area for the results, also */
/* * nchannels*nchannels *\/ */
/* } AvPowerSpectra; */
/* AvPowerSpectra* AvPowerSpectra_alloc(const us nfft, */
/* const us nchannels, */
/* const d overlap_percentage) { */
/* TRACE(15,"AvPowerSpectra_alloc"); */
/* int rv; */
/* /\* Check nfft *\/ */
/* if(nfft % 2 != 0) { */
/* WARN("nfft should be even"); */
/* return NULL; */
/* } */
/* /\* Check overlap percentage *\/ */
/* us overlap_offset = nfft - (us) overlap_percentage*nfft/100; */
/* if(overlap_offset == 0 || overlap_offset > nfft) { */
/* WARN("Overlap percentage results in offset of 0, or a too high number of */
/* overlap" */
/* " samples. Number of overlap samples should be < nfft"); */
/* WARN("Illegal overlap percentage. Should be 0 <= %% < 100"); */
/* return NULL; */
/* } */
/* /\* ALlocate space *\/ */
/* Fft fft; */
/* rv = Fft_alloc(&fft,nfft,nchannels); */
/* if(rv != SUCCESS) { */
/* WARN("Fft allocation failed"); */
/* return NULL; */
/* } */
/* AvPowerSpectra* aps = a_malloc(sizeof(AvPowerSpectra)); */
/* if(!aps) { */
/* WARN("Allocation of AvPowerSpectra memory failed"); */
/* return NULL; */
/* } */
/* ps->naverages = 0; */
/* ps->overlap_offset = overlap_offset; */
/* /\* Allocate vectors and matrices *\/ */
/* ps->prev_timedata = dmat_alloc(nfft,nchannels); */
/* return ps; */
/* } */
/* us AvPowerSpectra_getAverages(const AvPowerSpectra* ps) { */
/* return ps->naverages; */
/* } */
/* /\** */
/* * Compute single power spectra by */
/* * */
/* * @param ps Initialized AvPowerSpectra structure */
/* * @param timedata Timedata to compute for */
/* * @param result Result */
/* * */
/* * @return */
/* *\/ */
/* static int AvPowerSpectra_computeSingle(const AvPowerSpectra* ps, */
/* const dmat* timedata, */
/* vc* results) { */
/* us nchannels = ps->fft.nchannels; */
/* for(us channel=0;channel<ps;channel++) { */
/* } */
/* return SUCCESS; */
/* } */
/* int AvPowerSpectra_addTimeData(AvPowerSpectra* ps, */
/* const dmat* timedata) { */
/* TRACE(15,"AvPowerSpectra_addTimeData"); */
/* dbgassert(ps,"Null pointer given for ps"); */
/* const us nchannels = ps->fft.nchannels; */
/* const us nfft = ps->fft.nfft; */
/* dbgassert(timedata->n_cols == nchannels,"Invalid time data"); */
/* dbgassert(timedata->n_rows == nfft,"Invalid time data"); */
/* dmat prevt = ps->prev_timedata; */
/* us noverlap = ps->noverlap; */
/* if(ps->naverages != 0) { */
/* /\* Copy the overlap buffer to the tbuf *\/ */
/* copy_dmat_rows(&tbuf,&overlap,0,0,noverlap); */
/* /\* Copy the new timedata buffer to the tbuf *\/ */
/* copy_dmat_rows(&tbuf,timedata,0,noverlap,(nfft-noverlap)); */
/* } */
/* else { */
/* /\* Copy the overlap buffer to the tbuf *\/ */
/* copy_dmat_rows(&tbuf,&overlap,0,0,noverlap); */
/* } */
/* return SUCCESS; */
/* } */
/* void AvPowerSpectra_free(AvPowerSpectra* ps) { */
/* TRACE(15,"AvPowerSpectra_free"); */
/* Fft_free(&ps->fft); */
/* dmat_free(&ps->prev_timedata); */
/* vd_free(&ps->window); */
/* a_free(ps); */
/* } */
//////////////////////////////////////////////////////////////////////

View File

@ -9,59 +9,54 @@
#ifndef PS_H
#define PS_H
#include "window.h"
struct PowerSpectra_s;
struct AvPowerSpectra_s;
typedef struct PowerSpectra_s PowerSpectra;
typedef struct AvPowerSpectra_s AvPowerSpectra;
/**
* Allocate a PowerSpectra computer
*
* @param nfft fft length
* @param nchannels Number of channels
* @param wt Windowtype, as defined in window.h
*
* @return PowerSpectra handle, NULL on error
*/
PowerSpectra* PowerSpectra_alloc(const us nfft,
const us nchannels,
const WindowType wt);
/**
* Compute power spectra, returns to a complex array
* Compute power spectra.
*
* @param ps[in] PowerSpectra handle
* @param[in] timedata Time data. Should have size nfft*nchannels
*
* @param[out] result Here, the result will be stored. Should have
* size (nfft/2+1)*nchannels^2, such that the Cij at frequency index f
* can be obtained as result[f,i+j*nchannels]
*
* @param[in] timedata Time data, should be of size nfft*nchannels
* @param [out] Cross-power spectra, array should be of size
* (nfft/2+1) x (nchannels*nchannels), such that the cross spectra of
* channel i with channel j at can be found as
* getvcval(0,i+j*nchannels).
* @return status code, SUCCESS on succes.
*/
int PowerSpectra_compute(const PowerSpectra*,
void PowerSpectra_compute(const PowerSpectra* ps,
const dmat* timedata,
cmat *result);
/**
* Free storage of the PowerSpectra
*/
void PowerSpectra_free(PowerSpectra*);
// AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
// const us nchannels,
// const d overlap_percentage,
// const WindowType wt);
/**
* Return the current number of averages taken to obtain the result.
* Return nfft
*
* @return The number of averages taken.
* @param ps[in] PowerSpectra handle
*
* @return nfft
*/
us AvPowerSpectra_getAverages(const AvPowerSpectra*);
int AvPowerSpectra_addTimeData(AvPowerSpectra*,
const dmat* timedata);
us PowerSpectra_getnfft(const PowerSpectra* ps);
/**
* Free storage of the AvPowerSpectra
* Free PowerSpectra
*
* @param[in] ps PowerSpectra handle
*/
void AvPowerSpectra_free(AvPowerSpectra*);
void PowerSpectra_free(PowerSpectra* ps);
#endif // PS_H
//////////////////////////////////////////////////////////////////////

View File

@ -10,6 +10,15 @@
#ifndef TYPES_H
#define TYPES_H
// Branch prediction performance improvement
#if !defined(likely) && defined(__GNUC__) && !defined(ASCEE_DEBUG)
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
/// We often use boolean values
#include <stdbool.h> // true, false
#include <stddef.h>

View File

@ -11,7 +11,7 @@
#include "ascee_alloc.h"
#include <pthread.h>
#include "ascee_assert.h"
#include "tracer.h"
#include "ascee_tracer.h"
typedef struct Workers_s {
JobQueue* jq;

View File

@ -18,25 +18,26 @@ ELSE:
ctypedef size_t us
cdef extern from "math.h":
cdef extern from "ascee_tracer.h":
void setTracerLevel(int)
cdef extern from "ascee_math.h":
ctypedef struct dmat:
d* data
d** col_ptrs
us n_cols,n_rows
ctypedef struct cmat:
c* data
c** col_ptrs
us n_cols,n_rows
# cdef np.ndarray empty(us nrows,us ncols,dtype):
cdef dmat dmat_from_array(d[::1,:] arr):
cdef dmat result
result.data = &arr[0,0]
result.n_rows = arr.shape[0]
result.n_cols = arr.shape[1]
return result
dmat dmat_foreign(us n_rows,
us n_cols,
d* data)
cmat cmat_foreign(us n_rows,
us n_cols,
c* data)
cdef cmat cmat_from_array(c[::1,:] arr):
cdef cmat result
result.data = &arr[0,0]
result.n_rows = arr.shape[0]
result.n_cols = arr.shape[1]
return result
void dmat_free(dmat*)
void cmat_free(cmat*)
void cmat_copy(cmat* to,cmat* from_)

View File

@ -1,5 +1,7 @@
include "config.pxi"
# setTracerLevel(0)
cdef extern from "fft.h":
ctypedef struct c_Fft "Fft"
c_Fft* Fft_alloc(us nfft,us nchannels)
@ -32,11 +34,19 @@ cdef class Fft:
nchannels),
dtype=NUMPY_COMPLEX_TYPE,order='F')
# result[:,:] = np.nan+1j*np.nan
cdef c[::1,:] result_view = result
cdef cmat r = cmat_foreign(result.shape[0],
result.shape[1],
&result_view[0,0])
cdef dmat t = dmat_foreign(timedata.shape[0],
timedata.shape[1],
&timedata[0,0])
cdef cmat r = cmat_from_array(result)
cdef dmat t = dmat_from_array(timedata)
Fft_fft(self._fft,&t,&r)
dmat_free(&t)
cmat_free(&r)
return result
@ -50,46 +60,139 @@ cdef extern from "window.h":
cdef extern from "ps.h":
ctypedef struct c_PowerSpectra "PowerSpectra"
c_PowerSpectra* PowerSpectra_alloc(const us nfft,
const us nchannels,
const WindowType wt)
int PowerSpectra_compute(const c_PowerSpectra* ps,
const us nchannels,
const WindowType wt)
void PowerSpectra_compute(const c_PowerSpectra* ps,
const dmat * timedata,
cmat * result)
void PowerSpectra_free(c_PowerSpectra*)
# cdef class PowerSpectra:
# cdef:
# c_PowerSpectra* _ps
cdef class PowerSpectra:
cdef:
c_PowerSpectra* _ps
# def __cinit__(self, us nfft,us nchannels):
# self._ps = PowerSpectra_alloc(nfft,nchannels,Rectangular)
# if self._ps == NULL:
# raise RuntimeError('PowerSpectra allocation failed')
def __cinit__(self, us nfft,us nchannels):
self._ps = PowerSpectra_alloc(nfft,nchannels,Rectangular)
if self._ps == NULL:
raise RuntimeError('PowerSpectra allocation failed')
# def compute(self,d[::1,:] timedata):
# cdef:
# us nchannels = timedata.shape[1]
# us nfft = timedata.shape[0]
# int rv
# dmat td
# cmat result_mat
# td.data = &timedata[0,0]
# td.n_rows = nfft
# td.n_cols = nchannels
# result = np.empty((nfft//2+1,nchannels*nchannels),
# dtype = NUMPY_COMPLEX_TYPE,
# order='F')
# result_mat = cmat_from_array(result)
def compute(self,d[::1,:] timedata):
cdef:
us nchannels = timedata.shape[1]
us nfft = timedata.shape[0]
int rv
dmat td
cmat result_mat
td = dmat_foreign(nfft,
nchannels,
&timedata[0,0])
# rv = PowerSpectra_compute(self._ps,&td,&result_mat)
# if rv !=0:
# raise RuntimeError("Error computing power spectra")
# The array here is created in such a way that the strides
# increase with increasing dimension. This is required for
# interoperability with the C-code, that stores all
# cross-spectra in a 2D matrix, where the first axis is the
# frequency axis, and the second axis corresponds to a certain
# cross-spectrum, as C_ij(f) = result[freq,i+j*nchannels]
# return result
result = np.empty((nfft//2+1,nchannels,nchannels),
dtype = NUMPY_COMPLEX_TYPE,
order='F')
cdef c[::1,:,:] result_view = result
result_mat = cmat_foreign(nfft//2+1,
nchannels*nchannels,
&result_view[0,0,0])
PowerSpectra_compute(self._ps,&td,&result_mat)
dmat_free(&td)
cmat_free(&result_mat)
return result
# def __dealloc__(self):
# if self._ps != NULL:
# PowerSpectra_free(self._ps)
def __dealloc__(self):
if self._ps != NULL:
PowerSpectra_free(self._ps)
cdef extern from "aps.h":
ctypedef struct c_AvPowerSpectra "AvPowerSpectra"
c_AvPowerSpectra* AvPowerSpectra_alloc(const us nfft,
const us nchannels,
d overlap_percentage,
const WindowType wt)
cmat* AvPowerSpectra_addTimeData(const c_AvPowerSpectra* ps,
const dmat * timedata)
void AvPowerSpectra_free(c_AvPowerSpectra*)
cdef class AvPowerSpectra:
cdef:
c_AvPowerSpectra* aps
us nfft, nchannels
def __cinit__(self,us nfft,us nchannels,d overlap_percentage):
self.aps = AvPowerSpectra_alloc(nfft,
nchannels,
overlap_percentage,
Rectangular)
self.nchannels = nchannels
self.nfft = nfft
if self.aps == NULL:
raise RuntimeError('AvPowerSpectra allocation failed')
def __dealloc__(self):
if self.aps:
AvPowerSpectra_free(self.aps)
def addTimeData(self,d[::1,:] timedata):
"""!
Adds time data, returns current result
"""
cdef:
us nsamples = timedata.shape[0]
us nchannels = timedata.shape[1]
dmat td
cmat* result_ptr
if nsamples < self.nfft:
raise RuntimeError('Number of samples should be > nfft')
if nchannels != self.nchannels:
raise RuntimeError('Invalid number of channels')
td = dmat_foreign(nsamples,
nchannels,
&timedata[0,0])
result_ptr = AvPowerSpectra_addTimeData(self.aps,
&td)
# The array here is created in such a way that the strides
# increase with increasing dimension. This is required for
# interoperability with the C-code, that stores all
# cross-spectra in a 2D matrix, where the first axis is the
# frequency axis, and the second axis corresponds to a certain
# cross-spectrum, as C_ij(f) = result[freq,i+j*nchannels]
result = np.empty((self.nfft//2+1,nchannels,nchannels),
dtype = NUMPY_COMPLEX_TYPE,
order='F')
cdef c[::1,:,:] result_view = result
cdef cmat res = cmat_foreign(self.nfft//2+1,
nchannels*nchannels,
&result_view[0,0,0])
# Copy result
cmat_copy(&res,result_ptr)
cmat_free(&res)
dmat_free(&td)
return result

View File

@ -8,7 +8,7 @@
#pragma once
#ifndef FFTPACK_H
#define FFTPACK_H
#include "tracer.h"
#include "ascee_tracer.h"
#include "ascee_alloc.h"
#include "npy_fftpack.h"

View File

@ -2,6 +2,8 @@ include_directories(${CMAKE_SOURCE_DIR}/beamforming/c)
add_executable(test_bf test_bf.c)
add_executable(test_workers test_workers.c)
add_executable(test_fft test_fft.c)
add_executable(test_math test_math.c)
target_link_libraries(test_bf beamforming_lib)
target_link_libraries(test_fft beamforming_lib pthread)
target_link_libraries(test_workers beamforming_lib pthread)
target_link_libraries(test_math beamforming_lib pthread efence)

View File

@ -8,10 +8,10 @@ Created on Mon Jan 15 19:45:33 2018
import numpy as np
from beamforming.fft import Fft, PowerSpectra
nfft=2**10
nfft=2**4
print('nfft:',nfft)
print(nfft)
nchannels = 8
#print(nfft)
nchannels = 2
t = np.linspace(0,1,nfft+1)
# print(t)

View File

@ -7,11 +7,13 @@
//////////////////////////////////////////////////////////////////////
#include "fft.h"
#include "tracer.h"
#include "ascee_tracer.h"
int main() {
setTracerLevel(0);
iVARTRACE(15,getTracerLevel());
Fft* fft = Fft_alloc(100000,8);

36
test/test_math.c Normal file
View File

@ -0,0 +1,36 @@
// test_bf.c
//
// Author: J.A. de Jong -ASCEE
//
// Description:
//
//////////////////////////////////////////////////////////////////////
#include "fft.h"
#include "ascee_math.h"
#include "ascee_tracer.h"
int main() {
iVARTRACE(15,getTracerLevel());
vc a = vc_alloc(5);
vc_set(&a,2+3*I);
c_conj_inplace(a.data,a.size);
print_vc(&a);
vc b = vc_alloc(5);
c_conj_c(b.data,a.data,5);
print_vc(&b);
vc_free(&a);
vc_free(&b);
return 0;
}
//////////////////////////////////////////////////////////////////////

View File

@ -7,8 +7,8 @@
//////////////////////////////////////////////////////////////////////
#include "worker.h"
#include "mq.h"
#include "tracer.h"
#include "ascee_tracer.h"
#include <unistd.h>
static void* walloc(void*);
static int worker(void*,void*);
static void wfree(void*);
@ -20,7 +20,7 @@ int main() {
iVARTRACE(15,getTracerLevel());
int njobs = 4;
us njobs = 4;
JobQueue* jq = JobQueue_alloc(njobs);
assert(jq);
@ -43,11 +43,10 @@ int main() {
return 0;
}
static void* walloc(void* data) {
TRACE(15,"WALLOC");
uVARTRACE(15,(int) data);
return (void*) getpid();
uVARTRACE(15,(us) data);
return (void*) 1;
}
static int worker(void* w_data,void* tj) {