455 lines
14 KiB
Python
455 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""!
|
|
Author: J.A. de Jong - ASCEE
|
|
|
|
Description: Measurement class
|
|
|
|
The ASCEE hdf5 measurement file format contains the following fields:
|
|
|
|
- Attributes:
|
|
|
|
'samplerate': The audio data sample rate in Hz.
|
|
'nchannels': The number of audio channels in the file
|
|
'sensitivity': (Optionally) the stored sensitivity of the record channels.
|
|
This can be a single value, or a list of sensitivities for
|
|
each channel. Both representations are allowed.
|
|
|
|
- Datasets:
|
|
|
|
'audio': 3-dimensional array of blocks of audio data. The first axis is the
|
|
block index, the second axis the sample number and the third axis is the channel
|
|
number. The data type is either int16, int32 or float64 / float32. In case the
|
|
data is stored as integers. The raw data should be scaled with the maximum value
|
|
that can be stored for the integer bit depth to get a number between -1.0 and
|
|
1.0.
|
|
|
|
'video': 4-dimensional array of video frames. The first index is the frame
|
|
number, the second the x-value of the pixel and the third is the
|
|
y-value of the pixel. Then, the last axis is the color. This axis has
|
|
length 3 and the colors are stored as (r,g,b). Where typically a
|
|
color depth of 256 is used (np.uint8 data format)
|
|
|
|
The video dataset can possibly be not present in the data.
|
|
|
|
"""
|
|
|
|
__all__ = ['Measurement', 'scaleBlockSens']
|
|
from contextlib import contextmanager
|
|
import h5py as h5
|
|
import numpy as np
|
|
from .lasp_config import LASP_NUMPY_FLOAT_TYPE
|
|
import wave
|
|
import os
|
|
|
|
|
|
class BlockIter:
|
|
"""
|
|
Iterate over the blocks in the audio data of a h5 file
|
|
"""
|
|
|
|
def __init__(self, f):
|
|
"""
|
|
Initialize a BlockIter object
|
|
|
|
Args:
|
|
faudio: Audio dataset in the h5 file, accessed as f['audio']
|
|
"""
|
|
self.i = 0
|
|
self.nblocks = f['audio'].shape[0]
|
|
self.fa = f['audio']
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
"""
|
|
Return the next block
|
|
"""
|
|
if self.i == self.nblocks:
|
|
raise StopIteration
|
|
self.i += 1
|
|
return self.fa[self.i-1][:, :]
|
|
|
|
|
|
def getSampWidth(dtype):
|
|
"""
|
|
Returns the width of a single sample in bytes.
|
|
|
|
Args:
|
|
dtype: numpy dtype
|
|
|
|
Returns:
|
|
Size of a sample in bytes (int)
|
|
"""
|
|
if dtype == np.int32:
|
|
return 4
|
|
elif dtype == np.int16:
|
|
return 2
|
|
elif dtype == np.float64:
|
|
return 8
|
|
else:
|
|
raise ValueError('Invalid data type: %s' % dtype)
|
|
|
|
|
|
def scaleBlockSens(block, sens):
|
|
"""
|
|
Scale a block of raw data to return raw acoustic
|
|
pressure data.
|
|
|
|
Args:
|
|
block: block of raw data with integer data type
|
|
sensitivity: array of sensitivity coeficients for
|
|
each channel
|
|
|
|
"""
|
|
assert sens.ndim == 1
|
|
assert sens.size == block.shape[1]
|
|
if np.issubdtype(block.dtype.type, np.integer):
|
|
sw = getSampWidth(block.dtype)
|
|
fac = 2**(8*sw - 1) - 1
|
|
else:
|
|
fac = 1.
|
|
return block.astype(LASP_NUMPY_FLOAT_TYPE)/fac/sens[np.newaxis, :]
|
|
|
|
|
|
def exportAsWave(fn, fs, data, force=False):
|
|
if '.wav' not in fn[-4:]:
|
|
fn += '.wav'
|
|
|
|
nchannels = data.shape[1]
|
|
sampwidth = getSampWidth(data.dtype)
|
|
|
|
if os.path.exists(fn) and not force:
|
|
raise RuntimeError('File already exists: %s', fn)
|
|
|
|
with wave.open(fn, 'w') as wf:
|
|
wf.setparams((nchannels, sampwidth, fs, 0, 'NONE', 'NONE'))
|
|
wf.writeframes(np.asfortranarray(data).tobytes())
|
|
|
|
|
|
class Measurement:
|
|
"""
|
|
Provides access to measurement data stored in the h5 measurement file
|
|
format.
|
|
"""
|
|
|
|
def __init__(self, fn):
|
|
"""
|
|
Initialize a Measurement object based on the filename
|
|
"""
|
|
if '.h5' not in fn:
|
|
fn += '.h5'
|
|
|
|
# Full filepath
|
|
self.fn = fn
|
|
|
|
# Base filename
|
|
self.fn_base = os.path.split(fn)[1]
|
|
|
|
# Open the h5 file in read-plus mode, to allow for changing the
|
|
# measurement comment.
|
|
with h5.File(fn, 'r+') as f:
|
|
# Check for video data
|
|
try:
|
|
f['video']
|
|
self.has_video = True
|
|
except KeyError:
|
|
self.has_video = False
|
|
|
|
self.nblocks, self.blocksize, self.nchannels = f['audio'].shape
|
|
dtype = f['audio'].dtype
|
|
self.sampwidth = getSampWidth(dtype)
|
|
|
|
self.samplerate = f.attrs['samplerate']
|
|
self.N = (self.nblocks*self.blocksize)
|
|
self.T = self.N/self.samplerate
|
|
|
|
# comment = read-write thing
|
|
try:
|
|
self._comment = f.attrs['comment']
|
|
except KeyError:
|
|
f.attrs['comment'] = ''
|
|
self._comment = ''
|
|
|
|
# Sensitivity
|
|
try:
|
|
sens = f.attrs['sensitivity']
|
|
self._sens = sens * \
|
|
np.ones(self.nchannels) if isinstance(
|
|
sens, float) else sens
|
|
except KeyError:
|
|
self._sens = np.ones(self.nchannels)
|
|
|
|
self._time = f.attrs['time']
|
|
|
|
@property
|
|
def name(self):
|
|
"""
|
|
Returns filename base without extension
|
|
"""
|
|
return os.path.splitext(self.fn_base)[0]
|
|
|
|
@contextmanager
|
|
def file(self, mode='r'):
|
|
"""
|
|
Contextmanager which opens the storage file and yields the file.
|
|
|
|
Args:
|
|
mode: Opening mode for the file. Should either be 'r', or 'r+'
|
|
"""
|
|
if mode not in ('r', 'r+'):
|
|
raise ValueError('Invalid file opening mode.')
|
|
with h5.File(self.fn, mode) as f:
|
|
yield f
|
|
|
|
@property
|
|
def comment(self):
|
|
"""
|
|
Return the measurement comment
|
|
|
|
Returns:
|
|
The measurement comment (text string)
|
|
"""
|
|
return self._comment
|
|
|
|
@comment.setter
|
|
def comment(self, cmt):
|
|
"""
|
|
Set the measurement comment
|
|
|
|
Args:
|
|
cmt: Comment text string to set
|
|
"""
|
|
with self.file('r+') as f:
|
|
# Update comment attribute in the file
|
|
f.attrs['comment'] = cmt
|
|
self._comment = cmt
|
|
|
|
@property
|
|
def recTime(self):
|
|
"""
|
|
Returns
|
|
the total recording time of the measurement, in float seconds.
|
|
"""
|
|
return self.blocksize*self.nblocks/self.samplerate
|
|
|
|
@property
|
|
def time(self):
|
|
"""
|
|
Returns the measurement time in seconds since the epoch.
|
|
"""
|
|
return self._time
|
|
|
|
def scaleBlock(self, block):
|
|
"""
|
|
When the data is stored as integers, we assume dB full-scale scaling.
|
|
Hence, when we convert the data to floats, we divide by the maximum
|
|
possible value.
|
|
|
|
Returns:
|
|
Block of measurement data, scaled using sensitivity values and
|
|
retured as floating point values
|
|
"""
|
|
return scaleBlockSens(block, self.sensitivity)
|
|
|
|
@property
|
|
def prms(self):
|
|
"""
|
|
Returns the root mean square of the uncalibrated rms sound pressure
|
|
level (equivalend SPL).
|
|
|
|
Returns:
|
|
1D array with rms values for each channel
|
|
"""
|
|
#
|
|
try:
|
|
return self._prms
|
|
except AttributeError:
|
|
pass
|
|
|
|
pms = 0.
|
|
|
|
with self.file() as f:
|
|
for block in self.iterBlocks(f):
|
|
block = self.scaleBlock(block)
|
|
pms += np.sum(block**2, axis=0)/self.N
|
|
self._prms = np.sqrt(pms)
|
|
return self._prms
|
|
|
|
def praw(self, block=None):
|
|
"""
|
|
Returns the uncalibrated acoustic pressure signal, converted to
|
|
floating point acoustic pressure values [Pa].
|
|
"""
|
|
if block is not None:
|
|
with self.file() as f:
|
|
blocks = f['audio'][block]
|
|
else:
|
|
blocks = []
|
|
with self.file() as f:
|
|
for block in self.iterBlocks(f):
|
|
blocks.append(block)
|
|
blocks = np.asarray(blocks)
|
|
blocks = blocks.reshape(self.nblocks*self.blocksize,
|
|
self.nchannels)
|
|
|
|
# Apply scaling (sensitivity, integer -> float)
|
|
blocks = self.scaleBlock(blocks)
|
|
return blocks
|
|
|
|
def iterBlocks(self, opened_file):
|
|
"""
|
|
Iterate over all the audio blocks in the opened file
|
|
|
|
Args:
|
|
opened_file: The h5File with the data
|
|
"""
|
|
return BlockIter(opened_file)
|
|
|
|
@property
|
|
def sensitivity(self):
|
|
"""
|
|
Sensitivity of the data in Pa^-1, from floating point data scaled
|
|
between -1.0 and 1.0 to Pascal. If the sensitivity is not stored in
|
|
the measurement file, this function returns 1.0
|
|
"""
|
|
return self._sens
|
|
|
|
@sensitivity.setter
|
|
def sensitivity(self, sens):
|
|
"""
|
|
Set the sensitivity of the measurement in the file
|
|
|
|
Args:
|
|
sens: sensitivity data, should be a float, or an array of floats
|
|
equal to the number of channels.
|
|
"""
|
|
if isinstance(sens, float):
|
|
sens = sens*np.ones(self.nchannels)
|
|
|
|
valid = sens.ndim == 1
|
|
valid &= sens.shape[0] == self.nchannels
|
|
valid &= sens.dtype == float
|
|
if not valid:
|
|
raise ValueError('Invalid sensitivity value(s) given')
|
|
with self.file('r+') as f:
|
|
f.attrs['sensitivity'] = sens
|
|
self._sens = sens
|
|
|
|
def exportAsWave(self, fn=None, force=False, sampwidth=None):
|
|
"""
|
|
Export measurement file as wave. In case the measurement data is stored
|
|
as floats, the values are scaled between 0 and 1
|
|
|
|
Args:
|
|
fn: If given, this will be the filename to write to. If the
|
|
filename does not end with '.wav', this extension is added.
|
|
|
|
force: If True, overwrites any existing files with the given name
|
|
, otherwise a RuntimeError is raised.
|
|
|
|
sampwidth: sample width in bytes with which to export the data.
|
|
This should only be given in case the measurement data is stored as
|
|
floating point values, otherwise an
|
|
|
|
"""
|
|
if fn is None:
|
|
fn = self.fn
|
|
fn = os.path.splitext(fn)[0]
|
|
|
|
if '.wav' not in fn[-4:]:
|
|
fn += '.wav'
|
|
|
|
if os.path.exists(fn) and not force:
|
|
raise RuntimeError(f'File already exists: {fn}')
|
|
with self.file() as f:
|
|
audio = f['audio'][:]
|
|
|
|
if isinstance(audio.dtype, float):
|
|
if sampwidth is None:
|
|
raise ValueError('sampwidth parameter should be given '
|
|
'for float data in measurement file')
|
|
elif sampwidth == 2:
|
|
itype = np.int16
|
|
elif sampwidth == 4:
|
|
itype = np.int32
|
|
else:
|
|
raise ValueError('Invalid sample width, should be 2 or 4')
|
|
|
|
# Find maximum
|
|
max = 0.
|
|
for block in self.iterBlocks():
|
|
blockmax = np.max(np.abs(block))
|
|
if blockmax > max:
|
|
max = blockmax
|
|
# Scale with maximum value only if we have a nonzero maximum value.
|
|
if max == 0.:
|
|
max = 1.
|
|
scalefac = 2**(8*sampwidth)/max
|
|
|
|
with wave.open(fn, 'w') as wf:
|
|
wf.setparams((self.nchannels, self.sampwidth,
|
|
self.samplerate, 0, 'NONE', 'NONE'))
|
|
for block in self.iterBlocks():
|
|
if isinstance(block.dtype, float):
|
|
# Convert block to integral data type
|
|
block = (block*scalefac).astype(itype)
|
|
wf.writeframes(np.asfortranarray(block).tobytes())
|
|
|
|
@staticmethod
|
|
def fromtxt(fn, skiprows, samplerate, sensitivity, mfn=None,
|
|
timestamp=None,
|
|
delimiter='\t', firstcoltime=True):
|
|
"""
|
|
Converts a txt file to a LASP Measurement object and returns the
|
|
measurement.
|
|
|
|
Args:
|
|
fn: Filename of text file
|
|
skiprows: Number of header rows in text file to skip
|
|
samplerate: Sampling frequency in [Hz]
|
|
sensitivity: 1D array of channel sensitivities
|
|
mfn: Filepath where measurement file is stored. If not given,
|
|
a h5 file will be created along fn, which shares its basename
|
|
timestamp: If given, a custom timestamp for the measurement
|
|
(integer containing seconds since epoch). If not given, the
|
|
timestamp is obtained from the last modification time.
|
|
delimiter: Column delimiter
|
|
firstcoltime: If true, the first column is the treated as the
|
|
sample time.
|
|
|
|
"""
|
|
if not os.path.exists(fn):
|
|
raise ValueError(f'File {fn} does not exist.')
|
|
if timestamp is None:
|
|
timestamp = os.path.getmtime(fn)
|
|
if mfn is None:
|
|
mfn = os.path.splitext(fn)[0] + '.h5'
|
|
|
|
dat = np.loadtxt(fn, skiprows=skiprows, delimiter=delimiter)
|
|
if firstcoltime:
|
|
time = dat[:, 0]
|
|
if not np.isclose(time[1] - time[0], 1/samplerate):
|
|
raise ValueError('Samplerate given does not agree with '
|
|
'samplerate in file')
|
|
dat = dat[:, 1:]
|
|
nchannels = dat.shape[1]
|
|
|
|
with h5.File(mfn, 'w') as hf:
|
|
hf.attrs['samplerate'] = samplerate
|
|
hf.attrs['sensitivity'] = sensitivity
|
|
hf.attrs['time'] = timestamp
|
|
hf.attrs['blocksize'] = 1
|
|
hf.attrs['nchannels'] = nchannels
|
|
ad = hf.create_dataset('audio',
|
|
(1, dat.shape[0], dat.shape[1]),
|
|
dtype=dat.dtype,
|
|
maxshape=(1, dat.shape[0], dat.shape[1]),
|
|
compression='gzip')
|
|
ad[0] = dat
|
|
return Measurement(mfn)
|
|
|
|
# def __del__(self):
|
|
# self.f.close()
|