Bugfix for OpenMP combinded with FFT. That one could not run in parallel in its current form.

This commit is contained in:
Anne de Jong 2022-10-05 14:57:39 +02:00
parent 12d6826140
commit 3481e4f9ba
4 changed files with 51 additions and 144 deletions

View File

@ -1,8 +1,8 @@
/* #define DEBUGTRACE_ENABLED */ /* #define DEBUGTRACE_ENABLED */
#include "debugtrace.hpp"
#include <optional>
#include "lasp_avpowerspectra.h" #include "lasp_avpowerspectra.h"
#include "debugtrace.hpp"
#include <cmath> #include <cmath>
#include <optional>
using rte = std::runtime_error; using rte = std::runtime_error;
using std::cerr; using std::cerr;
@ -17,11 +17,15 @@ PowerSpectra::PowerSpectra(const vd &window)
d win_pow = arma::sum(window % window) / window.size(); d win_pow = arma::sum(window % window) / window.size();
/* Scale fft such that power is easily computed */ /* Scale fft such that power is easily computed */
_scale_fac = 2 / (win_pow * nfft * nfft); _scale_fac = 2.0 / (win_pow * (d)nfft * (d)nfft);
} }
arma::Cube<c> PowerSpectra::compute(const dmat &input) { arma::Cube<c> PowerSpectra::compute(const dmat &input) {
/// Run very often. Silence this one.
/* DEBUGTRACE_ENTER; */
dmat input_tmp = input; dmat input_tmp = input;
// Multiply each column of the inputs element-wise with the window. // Multiply each column of the inputs element-wise with the window.
@ -30,8 +34,10 @@ arma::Cube<c> PowerSpectra::compute(const dmat &input) {
cmat rfft = _fft.fft(input_tmp); cmat rfft = _fft.fft(input_tmp);
arma::cx_cube output(rfft.n_rows, input.n_cols, input.n_cols); arma::cx_cube output(rfft.n_rows, input.n_cols, input.n_cols);
#pragma omp parallel for
for (us i = 0; i < input.n_cols; i++) { for (us i = 0; i < input.n_cols; i++) {
/// This one can be run in parallel without any problem. Note that it is
/// the inner loop that is run in parallel.
#pragma omp parallel for
for (us j = 0; j < input.n_cols; j++) { for (us j = 0; j < input.n_cols; j++) {
output.slice(j).col(i) = output.slice(j).col(i) =
_scale_fac * (rfft.col(i) % arma::conj(rfft.col(j))); _scale_fac * (rfft.col(i) % arma::conj(rfft.col(j)));
@ -69,7 +75,8 @@ AvPowerSpectra::AvPowerSpectra(const us nfft, const Window::WindowType w,
} }
std::optional<arma::cx_cube> AvPowerSpectra::compute(const dmat &timedata) { std::optional<arma::cx_cube> AvPowerSpectra::compute(const dmat &timedata) {
DEBUGTRACE_ENTER;
/* DEBUGTRACE_ENTER; */
_timeBuf.push(timedata); _timeBuf.push(timedata);
@ -101,7 +108,7 @@ std::optional<arma::cx_cube> AvPowerSpectra::compute(const dmat &timedata) {
} // end switch mode } // end switch mode
i++; i++;
} }
if(i>0) { if (i > 0) {
return _est; return _est;
} }
return std::nullopt; return std::nullopt;

View File

@ -33,7 +33,7 @@ private:
Fft _fft; Fft _fft;
vd _window; vd _window;
c _scale_fac; d _scale_fac;
public: public:
/** /**

View File

@ -131,7 +131,10 @@ cmat Fft::fft(const dmat &freqdata) {
DEBUGTRACE_ENTER; DEBUGTRACE_ENTER;
assert(_impl); assert(_impl);
cmat res(_impl->nfft/2+1, freqdata.n_cols); cmat res(_impl->nfft/2+1, freqdata.n_cols);
#pragma omp parallel for /// * WARNING *. This was source of a serious bug. It is not possible to run
/// FFT's and IFFT's on the same _impl, as it overwrites the same memory.
/// Uncommenting the line below results in faulty results.
/// #pragma omp parallel for
for (us colno = 0; colno < freqdata.n_cols; colno++) { for (us colno = 0; colno < freqdata.n_cols; colno++) {
res.col(colno) = _impl->fft(freqdata.col(colno)); res.col(colno) = _impl->fft(freqdata.col(colno));
} }
@ -145,7 +148,11 @@ vd Fft::ifft(const vc &freqdata) {
} }
dmat Fft::ifft(const cmat &freqdata) { dmat Fft::ifft(const cmat &freqdata) {
dmat res(_impl->nfft, freqdata.n_cols); dmat res(_impl->nfft, freqdata.n_cols);
#pragma omp parallel for /// * WARNING *. This was source of a serious bug. It is not possible to run
/// FFT's and IFFT's on the same _impl, as it overwrites the same memory.
/// Uncommenting the line below results in faulty results.
/// #pragma omp parallel for
for (us colno = 0; colno < freqdata.n_cols; colno++) { for (us colno = 0; colno < freqdata.n_cols; colno++) {
res.col(colno) = _impl->ifft(freqdata.col(colno)); res.col(colno) = _impl->ifft(freqdata.col(colno));
} }

View File

@ -1,145 +1,38 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on Mon Jan 15 19:45:33 2018 Testing code for power spectra
@author: anne
""" """
import numpy as np import numpy as np
from lasp import PowerSpectra, Window from lasp import PowerSpectra, Window
import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
plt.close('all') # plt.close('all')
# def test_ps():
nfft = 8 def test_ps():
t = np.linspace(0, 1.0, nfft, endpoint=False) """
Check Parsevall for single-sided power spectra
"""
nfft = 2048
t = np.linspace(0, 1.0, nfft, endpoint=False)
ps = PowerSpectra(nfft, Window.Rectangular) ps = PowerSpectra(nfft, Window.WindowType.Rectangular)
sig = np.random.randn(nfft) sig = np.random.randn(nfft)
freq = 4 freq = 4
omg = 2*np.pi*freq omg = 2*np.pi*freq
# sig = 8*np.cos(omg*t) # sig = 8*np.cos(omg*t)
cps = ps.compute(sig) cps = ps.compute(sig)
pow1 = np.sum(sig**2)/sig.size pow1 = np.sum(sig**2)/sig.size
pow2 = np.sum((cps[:,0,0]).real) pow2 = np.sum((cps[:,0,0]).real)
# print(pow1) # print(pow1)
# print(pow2) # print(pow2)
plt.plot(cps[:,0,0]) # plt.plot(cps[:,0,0])
assert np.isclose(pow2 - pow1,0, atol=1e-1) assert np.isclose(pow2 - pow1,0, atol=1e-1)
# test_ps()
# plt.plot(res_lasp.real-res_npy.real)
# plt.plot(res_lasp.imag-res_npy.imag)
# plt.plot(res_npy.real)
# plt.plot(res_npy.imag)
# plt.plot(t, sig)
# print('nfft:',nfft)
# #print(nfft)
# nchannels = 2
# t = np.linspace(0,1,nfft+1) if __name__ == '__main__':
# # print(t) test_ps()
# x1 = (np.cos(4*np.pi*t[:-1])+3.2*np.sin(6*np.pi*t[:-1]))[:,np.newaxis]+10
# x = np.vstack([x1.T]*nchannels).T
# # Using transpose to get the strides right
# x = np.random.randn(nchannels,nfft).T
# print("strides: ",x.strides)
# # x.strides = (8,nfft*8)x
# # print("signal:",x)
# xms = np.sum(x**2,axis=0)/nfft
# print('Total signal power time domain: ', xms)
# X = np.fft.rfft(x,axis=0)
# # X =np.fft.fft(x)
# #X =np.fft.rfft(x)
# # print(X)
# Xs = 2*X/nfft
# Xs[np.where(np.abs(Xs) < 1e-10)] = 0
# Xs[0] /= np.sqrt(2)
# Xs[-1] /= np.sqrt(2)
# # print('single sided amplitude spectrum:\n',Xs)
# power = Xs*np.conj(Xs)/2
# # print('Frequency domain signal power\n', power)
# print('Total signal power', np.sum(power,axis=0).real)
# pstest = PowerSpectra(nfft,nchannels)
# ps = pstest.compute(x)
# fft = Fft(nfft,nchannels)
# fft.fft(x)
# ps[np.where(np.abs(ps) < 1e-10)] = 0+0j
# print('our ps: \n' , ps)
# print('Our total signal power: ',np.sum(ps,axis=0).real)
# if __name__ == '__main__':
# nfft=2048
# fs = 2048
# ps = PowerSpectra(nfft, Window.Rectangular)
# t = np.linspace(0, 1.0, nfft, endpoint=False)
# freq = 10
# omg = 2*np.pi*freq
# sig = np.sin(omg*t)
# res = ps.compute(sig)
# plt.plot(res[:,0,0])
# # plt.plot(t, sig)
# print('nfft:',nfft)
# #print(nfft)
# nchannels = 2
# t = np.linspace(0,1,nfft+1)
# # print(t)
# x1 = (np.cos(4*np.pi*t[:-1])+3.2*np.sin(6*np.pi*t[:-1]))[:,np.newaxis]+10
# x = np.vstack([x1.T]*nchannels).T
# # Using transpose to get the strides right
# x = np.random.randn(nchannels,nfft).T
# print("strides: ",x.strides)
# # x.strides = (8,nfft*8)x
# # print("signal:",x)
# xms = np.sum(x**2,axis=0)/nfft
# print('Total signal power time domain: ', xms)
# X = np.fft.rfft(x,axis=0)
# # X =np.fft.fft(x)
# #X =np.fft.rfft(x)
# # print(X)
# Xs = 2*X/nfft
# Xs[np.where(np.abs(Xs) < 1e-10)] = 0
# Xs[0] /= np.sqrt(2)
# Xs[-1] /= np.sqrt(2)
# # print('single sided amplitude spectrum:\n',Xs)
# power = Xs*np.conj(Xs)/2
# # print('Frequency domain signal power\n', power)
# print('Total signal power', np.sum(power,axis=0).real)
# pstest = PowerSpectra(nfft,nchannels)
# ps = pstest.compute(x)
# fft = Fft(nfft,nchannels)
# fft.fft(x)
# ps[np.where(np.abs(ps) < 1e-10)] = 0+0j
# print('our ps: \n' , ps)
# print('Our total signal power: ',np.sum(ps,axis=0).real)