
1322 lines
49 KiB
Raw Normal View History

# -*- coding: utf-8 -*-
""" - Class encapsulating ndarray with meta data
Copyright 2010 Luke Campagnola
Distributed under MIT/X11 license. See license.txt for more infomation.
MetaArray is an extension of ndarray which allows storage of per-axis meta data
such as axis values, names, units, column names, etc. It also enables several
new methods for slicing and indexing the array based on this meta data.
More info at
import numpy as np
import types, copy, threading, os, re
import pickle
#import traceback
## By default, the library will use HDF5 when writing files.
## This can be overridden by setting USE_HDF5 = False
USE_HDF5 = True
import h5py
HAVE_HDF5 = True
USE_HDF5 = False
HAVE_HDF5 = False
def axis(name=None, cols=None, values=None, units=None):
"""Convenience function for generating axis descriptions when defining MetaArrays"""
ax = {}
cNameOrder = ['name', 'units', 'title']
if name is not None:
ax['name'] = name
if values is not None:
ax['values'] = values
if units is not None:
ax['units'] = units
if cols is not None:
ax['cols'] = []
for c in cols:
if type(c) != types.ListType and type(c) != types.TupleType:
c = [c]
col = {}
for i in range(0,len(c)):
col[cNameOrder[i]] = c[i]
return ax
class sliceGenerator:
"""Just a compact way to generate tuples of slice objects."""
def __getitem__(self, arg):
return arg
def __getslice__(self, arg):
return arg
SLICER = sliceGenerator()
class MetaArray(np.ndarray):
"""N-dimensional array with meta data such as axis titles, units, and column names.
May be initialized with a file name, a tuple representing the dimensions of the array,
or any arguments that could be passed on to numpy.array()
The info argument sets the metadata for the entire array. It is composed of a list
of axis descriptions where each axis may have a name, title, units, and a list of column
descriptions. An additional dict at the end of the axis list may specify parameters
that apply to values in the entire array.
For example:
A 2D array of altitude values for a topographical map might look like
{'name': 'lat', 'title': 'Lattitude'},
{'name': 'lon', 'title': 'Longitude'},
{'title': 'Altitude', 'units': 'm'}
In this case, every value in the array represents the altitude in feet at the lat, lon
position represented by the array index. All of the following return the
value at lat=10, lon=5:
array[10, 5]
array['lon':5, 'lat':10]
Now suppose we want to combine this data with another array of equal dimensions that
represents the average rainfall for each location. We could easily store these as two
separate arrays or combine them into a 3D array with this description:
{'name': 'vals', 'cols': [
{'name': 'altitude', 'units': 'm'},
{'name': 'rainfall', 'units': 'cm/year'}
{'name': 'lat', 'title': 'Lattitude'},
{'name': 'lon', 'title': 'Longitude'}
We can now access the altitude values with array[0] or array['altitude'], and the
rainfall values with array[1] or array['rainfall']. All of the following return
the rainfall value at lat=10, lon=5:
array[1, 10, 5]
array['lon':5, 'lat':10, 'val': 'rainfall']
array['rainfall', 'lon':5, 'lat':10]
Notice that in the second example, there is no need for an extra (4th) axis description
since the actual values are described (name and units) in the column info for the first axis.
version = '2'
## Types allowed as axis or column names
nameTypes = [basestring, tuple]
def isNameType(var):
return any([isinstance(var, t) for t in MetaArray.nameTypes])
def __new__(subtype, data=None, file=None, info=None, dtype=None, copy=False, **kwargs):
if data is not None:
if type(data) is types.TupleType:
subarr = np.empty(data, dtype=dtype)
subarr = np.array(data, dtype=dtype, copy=copy)
subarr = subarr.view(subtype)
#### Sanity checks on info
if info is not None:
info = list(info)
raise Exception("Info must be a list of axis specifications")
if len(info) < subarr.ndim+1:
elif len(info) > subarr.ndim+1:
raise Exception("Info parameter must be list of length ndim+1 or less.")
for i in range(len(info)):
if not isinstance(info[i], dict):
if info[i] is None:
info[i] = {}
raise Exception("Axis specification must be Dict or None")
if i < subarr.ndim and info[i].has_key('values'):
if type(info[i]['values']) is types.ListType:
info[i]['values'] = np.array(info[i]['values'])
elif type(info[i]['values']) is not np.ndarray:
raise Exception("Axis values must be specified as list or ndarray")
if info[i]['values'].ndim != 1 or info[i]['values'].shape[0] != subarr.shape[i]:
raise Exception("Values array for axis %d has incorrect shape. (given %s, but should be %s)" % (i, str(info[i]['values'].shape), str((subarr.shape[i],))))
if i < subarr.ndim and info[i].has_key('cols'):
if not isinstance(info[i]['cols'], list):
info[i]['cols'] = list(info[i]['cols'])
if len(info[i]['cols']) != subarr.shape[i]:
raise Exception('Length of column list for axis %d does not match data. (given %d, but should be %d)' % (i, len(info[i]['cols']), subarr.shape[i]))
subarr._info = info
elif hasattr(data, '_info'):
subarr._info = data._info
elif file is not None:
## decide which read function to use
fd = open(file, 'rb')
magic =
if magic == '\x89HDF\r\n\x1a\n':
return MetaArray._readHDF5(file, subtype, **kwargs)
meta = MetaArray._readMeta(fd)
if 'version' in meta:
ver = meta['version']
ver = 1
rFuncName = '_readData%s' % str(ver)
if not hasattr(MetaArray, rFuncName):
raise Exception("This MetaArray library does not support array version '%s'" % ver)
rFunc = getattr(MetaArray, rFuncName)
subarr = rFunc(fd, meta, subtype, **kwargs)
return subarr
def __array_finalize__(self,obj):
## array_finalize is called every time a MetaArray is created
## (whereas __new__ is not necessarily called every time)
## obj is the object from which this array was generated (for example, when slicing or view()ing)
# We use the getattr method to set a default if 'obj' doesn't have the 'info' attribute
#print "Create new MA from object", str(type(obj))
#import traceback
#print "finalize", type(self), type(obj)
if not hasattr(self, '_info'):
#if isinstance(obj, MetaArray):
#print " copy info:", obj._info
self._info = getattr(obj, '_info', [{}]*(obj.ndim+1))
self._infoOwned = False ## Do not make changes to _info until it is copied at least once
#print " self info:", self._info
# We could have checked first whether self._info was already defined:
#if not hasattr(self, 'info'):
# self._info = getattr(obj, 'info', {})
def __getitem__(self, ind):
#print "getitem:", ind
## should catch scalar requests as early as possible to speed things up (?)
nInd = self._interpretIndexes(ind)
#print "Indexes:", nInd
a = np.ndarray.__getitem__(self, nInd)
#print nInd, self.shape
if type(a) == type(self): ## generate new info array
#print " new MA:", type(a), a.shape
a._info = []
extraInfo = self._info[-1].copy()
for i in range(0, len(nInd)): ## iterate over all axes
#print " axis", i
if type(nInd[i]) in [slice, list] or isinstance(nInd[i], np.ndarray): ## If the axis is sliced, keep the info but chop if necessary
#print " slice axis", i, nInd[i]
#a._info[i] = self._axisSlice(i, nInd[i])
#print " info:", a._info[i]
a._info.append(self._axisSlice(i, nInd[i]))
else: ## If the axis is indexed, then move the information from that single index to the last info dictionary
#print "indexed:", i, nInd[i], type(nInd[i])
newInfo = self._axisSlice(i, nInd[i])
name = None
colName = None
for k in newInfo:
if k == 'cols':
if 'cols' not in extraInfo:
extraInfo['cols'] = []
if 'units' in newInfo[k]:
extraInfo['units'] = newInfo[k]['units']
if 'name' in newInfo[k]:
colName = newInfo[k]['name']
elif k == 'name':
name = newInfo[k]
if k not in extraInfo:
extraInfo[k] = newInfo[k]
extraInfo[k] = newInfo[k]
if 'name' not in extraInfo:
if name is None:
if colName is not None:
extraInfo['name'] = colName
if colName is not None:
extraInfo['name'] = str(name) + ': ' + str(colName)
extraInfo['name'] = name
#print "Lost info:", newInfo
#a._info[i] = None
#if 'name' in newInfo:
#a._info[-1][newInfo['name']] = newInfo
self._infoOwned = False
#while None in a._info:
return a
def __getslice__(self, *args):
return self.__getitem__(slice(*args))
def __setitem__(self, ind, val):
nInd = self._interpretIndexes(ind)
return np.ndarray.__setitem__(self.view(np.ndarray), nInd, val)
print self, nInd, val
#def __getattr__(self, attr):
#if attr in ['round']:
#return lambda *args, **kwargs: MetaArray(getattr(a.view(ndarray), attr)(*args, **kwargs)
def axisValues(self, axis):
"""Return the list of values for an axis"""
ax = self._interpretAxis(axis)
if self._info[ax].has_key('values'):
return self._info[ax]['values']
raise Exception('Array axis %s (%d) has no associated values.' % (str(axis), ax))
def xvals(self, axis):
"""Synonym for axisValues()"""
return self.axisValues(axis)
def axisHasValues(self, axis):
ax = self._interpretAxis(axis)
return self._info[ax].has_key('values')
def axisHasColumns(self, axis):
ax = self._interpretAxis(axis)
return self._info[ax].has_key('cols')
def axisUnits(self, axis):
"""Return the units for axis"""
ax = self._info[self._interpretAxis(axis)]
if ax.has_key('units'):
return ax['units']
def hasColumn(self, axis, col):
ax = self._info[self._interpretAxis(axis)]
if ax.has_key('cols'):
for c in ax['cols']:
if c['name'] == col:
return True
return False
def listColumns(self, axis=None):
"""Return a list of column names for axis. If axis is not specified, then return a dict of {axisName: (column names), ...}."""
if axis is None:
ret = {}
for i in range(self.ndim):
if 'cols' in self._info[i]:
cols = [c['name'] for c in self._info[i]['cols']]
cols = []
ret[self.axisName(i)] = cols
return ret
axis = self._interpretAxis(axis)
return [c['name'] for c in self._info[axis]['cols']]
def columnName(self, axis, col):
ax = self._info[self._interpretAxis(axis)]
return ax['cols'][col]['name']
def axisName(self, n):
return self._info[n].get('name', n)
def columnUnits(self, axis, column):
"""Return the units for column in axis"""
ax = self._info[self._interpretAxis(axis)]
if ax.has_key('cols'):
for c in ax['cols']:
if c['name'] == column:
return c['units']
raise Exception("Axis %s has no column named %s" % (str(axis), str(column)))
raise Exception("Axis %s has no column definitions" % str(axis))
def rowsort(self, axis, key=0):
"""Return this object with all records sorted along axis using key as the index to the values to compare. Does not yet modify meta info."""
## make sure _info is copied locally before modifying it!
keyList = self[key]
order = keyList.argsort()
if type(axis) == types.IntType:
ind = [slice(None)]*axis
elif type(axis) == types.StringType:
ind = (slice(axis, order),)
return self[tuple(ind)]
def append(self, val, axis):
"""Return this object with val appended along axis. Does not yet combine meta info."""
## make sure _info is copied locally before modifying it!
s = list(self.shape)
axis = self._interpretAxis(axis)
s[axis] += 1
n = MetaArray(tuple(s), info=self._info, dtype=self.dtype)
ind = [slice(None)]*self.ndim
ind[axis] = slice(None,-1)
n[tuple(ind)] = self
ind[axis] = -1
n[tuple(ind)] = val
return n
def extend(self, val, axis):
"""Return the concatenation along axis of this object and val. Does not yet combine meta info."""
## make sure _info is copied locally before modifying it!
axis = self._interpretAxis(axis)
return MetaArray(np.concatenate(self, val, axis), info=self._info)
def infoCopy(self, axis=None):
"""Return a deep copy of the axis meta info for this object"""
if axis is None:
return copy.deepcopy(self._info)
return copy.deepcopy(self._info[self._interpretAxis(axis)])
def copy(self):
a = np.ndarray.copy(self)
a._info = self.infoCopy()
return a
def _interpretIndexes(self, ind):
#print "interpret", ind
if not isinstance(ind, tuple):
## a list of slices should be interpreted as a tuple of slices.
if isinstance(ind, list) and len(ind) > 0 and isinstance(ind[0], slice):
ind = tuple(ind)
## everything else can just be converted to a length-1 tuple
ind = (ind,)
nInd = [slice(None)]*self.ndim
numOk = True ## Named indices not started yet; numbered sill ok
for i in range(0,len(ind)):
(axis, index, isNamed) = self._interpretIndex(ind[i], i, numOk)
nInd[axis] = index
#print "ndim:", self.ndim
#print "axis:", axis
#print "index spec:", ind[i]
#print "index num:", index
if isNamed:
numOk = False
return tuple(nInd)
def _interpretAxis(self, axis):
if type(axis) in [types.StringType, types.TupleType]:
return self._getAxis(axis)
return axis
def _interpretIndex(self, ind, pos, numOk):
#print "Interpreting index", ind, pos, numOk
## should probably check for int first to speed things up..
if type(ind) is int:
if not numOk:
raise Exception("string and integer indexes may not follow named indexes")
#print " normal numerical index"
return (pos, ind, False)
if MetaArray.isNameType(ind):
if not numOk:
raise Exception("string and integer indexes may not follow named indexes")
#print " String index, column is ", self._getIndex(pos, ind)
return (pos, self._getIndex(pos, ind), False)
elif type(ind) is slice:
#print " Slice index"
if MetaArray.isNameType(ind.start) or MetaArray.isNameType(ind.stop): ## Not an actual slice!
#print " ..not a real slice"
axis = self._interpretAxis(ind.start)
#print " axis is", axis
## x[Axis:Column]
if MetaArray.isNameType(ind.stop):
#print " column name, column is ", self._getIndex(axis, ind.stop)
index = self._getIndex(axis, ind.stop)
## x[Axis:min:max]
elif (isinstance(ind.stop, float) or isinstance(ind.step, float)) and ('values' in self._info[axis]):
#print " axis value range"
if ind.stop is None:
mask = self.xvals(axis) < ind.step
elif ind.step is None:
mask = self.xvals(axis) >= ind.stop
mask = (self.xvals(axis) >= ind.stop) * (self.xvals(axis) < ind.step)
##print "mask:", mask
index = mask
## x[Axis:columnIndex]
elif isinstance(ind.stop, int) or isinstance(ind.step, int):
#print " normal slice after named axis"
if ind.step is None:
index = ind.stop
index = slice(ind.stop, ind.step)
## x[Axis: [list]]
elif type(ind.stop) is list:
#print " list of indexes from named axis"
index = []
for i in ind.stop:
if type(i) is int:
elif MetaArray.isNameType(i):
index.append(self._getIndex(axis, i))
## unrecognized type, try just passing on to array
index = ind.stop
#print " other type.. forward on to array for handling", type(ind.stop)
index = ind.stop
#print "Axis %s (%s) : %s" % (ind.start, str(axis), str(type(index)))
#if type(index) is np.ndarray:
#print " ", index.shape
return (axis, index, True)
#print " Looks like a real slice, passing on to array"
return (pos, ind, False)
elif type(ind) is list:
#print " List index., interpreting each element individually"
indList = [self._interpretIndex(i, pos, numOk)[1] for i in ind]
return (pos, indList, False)
if not numOk:
raise Exception("string and integer indexes may not follow named indexes")
#print " normal numerical index"
return (pos, ind, False)
def _getAxis(self, name):
for i in range(0, len(self._info)):
axis = self._info[i]
if axis.has_key('name') and axis['name'] == name:
return i
raise Exception("No axis named %s.\n info=%s" % (name, self._info))
def _getIndex(self, axis, name):
ax = self._info[axis]
if ax is not None and ax.has_key('cols'):
for i in range(0, len(ax['cols'])):
if ax['cols'][i].has_key('name') and ax['cols'][i]['name'] == name:
return i
raise Exception("Axis %d has no column named %s.\n info=%s" % (axis, name, self._info))
def _axisCopy(self, i):
return copy.deepcopy(self._info[i])
def _axisSlice(self, i, cols):
#print "axisSlice", i, cols
if self._info[i].has_key('cols') or self._info[i].has_key('values'):
ax = self._axisCopy(i)
if ax.has_key('cols'):
#print " slicing columns..", array(ax['cols']), cols
sl = np.array(ax['cols'])[cols]
if isinstance(sl, np.ndarray):
sl = list(sl)
ax['cols'] = sl
#print " result:", ax['cols']
if ax.has_key('values'):
ax['values'] = np.array(ax['values'])[cols]
ax = self._info[i]
#print " ", ax
return ax
def prettyInfo(self):
s = ''
titles = []
maxl = 0
for i in range(len(self._info)-1):
ax = self._info[i]
axs = ''
if 'name' in ax:
axs += '"%s"' % str(ax['name'])
axs += "%d" % i
if 'units' in ax:
axs += " (%s)" % str(ax['units'])
if len(axs) > maxl:
maxl = len(axs)
for i in range(min(self.ndim, len(self._info)-1)):
ax = self._info[i]
axs = titles[i]
axs += '%s[%d] :' % (' ' * (maxl + 2 - len(axs)), self.shape[i])
if 'values' in ax:
v0 = ax['values'][0]
v1 = ax['values'][-1]
axs += " values: [%g ... %g] (step %g)" % (v0, v1, (v1-v0)/(self.shape[i]-1))
if 'cols' in ax:
axs += " columns: "
colstrs = []
for c in range(len(ax['cols'])):
col = ax['cols'][c]
cs = str(col.get('name', c))
if 'units' in col:
cs += " (%s)" % col['units']
axs += '[' + ', '.join(colstrs) + ']'
s += axs + "\n"
s += str(self._info[-1])
return s
def __repr__(self):
return "%s\n-----------------------------------------------\n%s" % (self.view(np.ndarray).__repr__(), self.prettyInfo())
def __str__(self):
return self.__repr__()
def axisCollapsingFn(self, fn, axis=None, *args, **kargs):
arr = self.view(np.ndarray)
fn = getattr(arr, fn)
if axis is None:
return fn(axis, *args, **kargs)
info = self.infoCopy()
axis = self._interpretAxis(axis)
return MetaArray(fn(axis, *args, **kargs), info=info)
def mean(self, axis=None, *args, **kargs):
return self.axisCollapsingFn('mean', axis, *args, **kargs)
def min(self, axis=None, *args, **kargs):
return self.axisCollapsingFn('min', axis, *args, **kargs)
def max(self, axis=None, *args, **kargs):
return self.axisCollapsingFn('max', axis, *args, **kargs)
def transpose(self, *args):
if len(args) == 1 and hasattr(args[0], '__iter__'):
order = args[0]
order = args
order = [self._interpretAxis(ax) for ax in order]
infoOrder = order + range(len(order), len(self._info))
info = [self._info[i] for i in infoOrder]
order = order + range(len(order), self.ndim)
return MetaArray(self.view(np.ndarray).transpose(order), info=info)
print order
#### File I/O Routines
def _readMeta(fd):
"""Read meta array from the top of a file. Read lines until a blank line is reached.
This function should ideally work for ALL versions of MetaArray.
meta = ''
## Read meta information until the first blank line
while True:
line = fd.readline().strip()
if line == '':
meta += line
ret = eval(meta)
#print ret
return ret
def _readData1(fd, meta, subtype, mmap=False):
"""Read array data from the file descriptor for MetaArray v1 files
## read in axis values for any axis that specifies a length
frameSize = 1
for ax in meta['info']:
if ax.has_key('values_len'):
ax['values'] = np.fromstring(['values_len']), dtype=ax['values_type'])
frameSize *= ax['values_len']
del ax['values_len']
del ax['values_type']
## the remaining data is the actual array
if mmap:
subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape'])
subarr = np.fromstring(, dtype=meta['type'])
subarr.shape = meta['shape']
subarr = subarr.view(subtype)
subarr._info = meta['info']
return subarr
def _readData2(fd, meta, subtype, mmap=False, subset=None):
## read in axis values
dynAxis = None
frameSize = 1
## read in axis values for any axis that specifies a length
for i in range(len(meta['info'])):
ax = meta['info'][i]
if ax.has_key('values_len'):
if ax['values_len'] == 'dynamic':
if dynAxis is not None:
raise Exception("MetaArray has more than one dynamic axis! (this is not allowed)")
dynAxis = i
ax['values'] = np.fromstring(['values_len']), dtype=ax['values_type'])
frameSize *= ax['values_len']
del ax['values_len']
del ax['values_type']
## No axes are dynamic, just read the entire array in at once
if dynAxis is None:
#if rewriteDynamic is not None:
#raise Exception("")
if meta['type'] == 'object':
if mmap:
raise Exception('memmap not supported for arrays with dtype=object')
subarr = pickle.loads(
if mmap:
subarr = np.memmap(fd, dtype=meta['type'], mode='r', shape=meta['shape'])
subarr = np.fromstring(, dtype=meta['type'])
#subarr = subarr.view(subtype)
subarr.shape = meta['shape']
#subarr._info = meta['info']
## One axis is dynamic, read in a frame at a time
if mmap:
raise Exception('memmap not supported for non-contiguous arrays. Use rewriteContiguous() to convert.')
ax = meta['info'][dynAxis]
xVals = []
frames = []
frameShape = list(meta['shape'])
frameShape[dynAxis] = 1
frameSize = reduce(lambda a,b: a*b, frameShape)
n = 0
while True:
## Extract one non-blank line
while True:
line = fd.readline()
if line != '\n':
if line == '':
## evaluate line
inf = eval(line)
## read data block
#print "read %d bytes as %s" % (inf['len'], meta['type'])
if meta['type'] == 'object':
data = pickle.loads(['len']))
data = np.fromstring(['len']), dtype=meta['type'])
if data.size != frameSize * inf['numFrames']:
#print data.size, frameSize, inf['numFrames']
raise Exception("Wrong frame size in MetaArray file! (frame %d)" % n)
## read in data block
shape = list(frameShape)
shape[dynAxis] = inf['numFrames']
data.shape = shape
if subset is not None:
dSlice = subset[dynAxis]
if dSlice.start is None:
dStart = 0
dStart = max(0, dSlice.start - n)
if dSlice.stop is None:
dStop = data.shape[dynAxis]
dStop = min(data.shape[dynAxis], dSlice.stop - n)
newSubset = list(subset[:])
newSubset[dynAxis] = slice(dStart, dStop)
if dStop > dStart:
#print n, data.shape, " => ", newSubset, data[tuple(newSubset)].shape
#data = data[subset].copy() ## what's this for??
n += inf['numFrames']
if 'xVals' in inf:
subarr = np.concatenate(frames, axis=dynAxis)
if len(xVals)> 0:
ax['values'] = np.array(xVals, dtype=ax['values_type'])
del ax['values_len']
del ax['values_type']
subarr = subarr.view(subtype)
subarr._info = meta['info']
#raise Exception() ## stress-testing
return subarr
def _readHDF5(fileName, subtype, mmap=False, writable=False):
if not HAVE_HDF5:
raise Exception("The file '%s' is HDF5-formatted, but the HDF5 library (h5py) was not found." % fileName)
f = h5py.File(fileName, 'r')
ver = f.attrs['MetaArray']
if ver > MetaArray.version:
print "Warning: This file was written with MetaArray version %s, but you are using version %s. (Will attempt to read anyway)" % (str(ver), str(MetaArray.version))
meta = MetaArray.readHDF5Meta(f['info'])
if mmap:
arr = MetaArray.mapHDF5Array(f['data'], writable=writable)
arr = f['data'][:]
#meta = H5MetaList(f['info'])
subarr = arr.view(subtype)
subarr._info = meta
return subarr
def mapHDF5Array(data, writable=False):
off =
if writable:
mode = 'r+'
mode = 'r'
if off is None:
raise Exception("This dataset uses chunked storage; it can not be memory-mapped. (store using mappable=True)")
return np.memmap(filename=data.file.filename, offset=off, dtype=data.dtype, shape=data.shape, mode=mode)
def readHDF5Meta(root, mmap=False):
data = {}
## Pull list of values from attributes and child objects
for k in root.attrs:
val = root.attrs[k]
if isinstance(val, basestring): ## strings need to be re-evaluated to their original types
val = eval(val)
raise Exception('Can not evaluate string: "%s"' % val)
data[k] = val
for k in root:
obj = root[k]
if isinstance(obj, h5py.highlevel.Group):
val = MetaArray.readHDF5Meta(obj)
elif isinstance(obj, h5py.highlevel.Dataset):
if mmap:
val = MetaArray.mapHDF5Array(obj)
val = obj[:]
raise Exception("Don't know what to do with type '%s'" % str(type(obj)))
data[k] = val
typ = root.attrs['_metaType_']
del data['_metaType_']
if typ == 'dict':
return data
elif typ == 'list' or typ == 'tuple':
d2 = [None]*len(data)
for k in data:
d2[int(k)] = data[k]
if typ == 'tuple':
d2 = tuple(d2)
return d2
raise Exception("Don't understand metaType '%s'" % typ)
def write(self, fileName, **opts):
"""Write this object to a file. The object can be restored by calling MetaArray(file=fileName)
appendAxis: the name (or index) of the appendable axis. Allows the array to grow.
compression: None, 'gzip' (good compression), 'lzf' (fast compression), etc.
chunks: bool or tuple specifying chunk shape
if USE_HDF5 and HAVE_HDF5:
return self.writeHDF5(fileName, **opts)
return self.writeMa(fileName, **opts)
def writeMeta(self, fileName):
"""Used to re-write meta info to the given file.
This feature is only available for HDF5 files."""
f = h5py.File(fileName, 'r+')
if f.attrs['MetaArray'] != MetaArray.version:
raise Exception("The file %s was created with a different version of MetaArray. Will not modify." % fileName)
del f['info']
self.writeHDF5Meta(f, 'info', self._info)
def writeHDF5(self, fileName, **opts):
## default options for writing datasets
dsOpts = {
'compression': 'lzf',
'chunks': True,
## if there is an appendable axis, then we can guess the desired chunk shape (optimized for appending)
appAxis = opts.get('appendAxis', None)
if appAxis is not None:
appAxis = self._interpretAxis(appAxis)
cs = [min(100000, x) for x in self.shape]
cs[appAxis] = 1
dsOpts['chunks'] = tuple(cs)
## if there are columns, then we can guess a different chunk shape
## (read one column at a time)
cs = [min(100000, x) for x in self.shape]
for i in range(self.ndim):
if 'cols' in self._info[i]:
cs[i] = 1
dsOpts['chunks'] = tuple(cs)
## update options if they were passed in
for k in dsOpts:
if k in opts:
dsOpts[k] = opts[k]
## If mappable is in options, it disables chunking/compression
if opts.get('mappable', False):
dsOpts = {
'chunks': None,
'compression': None
## set maximum shape to allow expansion along appendAxis
append = False
if appAxis is not None:
maxShape = list(self.shape)
ax = self._interpretAxis(appAxis)
maxShape[ax] = None
if os.path.exists(fileName):
append = True
dsOpts['maxshape'] = tuple(maxShape)
dsOpts['maxshape'] = None
if append:
f = h5py.File(fileName, 'r+')
if f.attrs['MetaArray'] != MetaArray.version:
raise Exception("The file %s was created with a different version of MetaArray. Will not modify." % fileName)
## resize data and write in new values
data = f['data']
shape = list(data.shape)
shape[ax] += self.shape[ax]
sl = [slice(None)] * len(data.shape)
sl[ax] = slice(-self.shape[ax], None)
data[tuple(sl)] = self.view(np.ndarray)
## add axis values if they are present.
axInfo = f['info'][str(ax)]
if 'values' in axInfo:
v = axInfo['values']
v2 = self._info[ax]['values']
shape = list(v.shape)
shape[0] += v2.shape[0]
v[-v2.shape[0]:] = v2
f = h5py.File(fileName, 'w')
f.attrs['MetaArray'] = MetaArray.version
#print dsOpts
f.create_dataset('data', data=self.view(np.ndarray), **dsOpts)
## dsOpts is used when storing meta data whenever an array is encountered
## however, 'chunks' will no longer be valid for these arrays if it specifies a chunk shape.
## 'maxshape' is right-out.
if isinstance(dsOpts['chunks'], tuple):
dsOpts['chunks'] = True
if 'maxshape' in dsOpts:
del dsOpts['maxshape']
self.writeHDF5Meta(f, 'info', self._info, **dsOpts)
def writeHDF5Meta(self, root, name, data, **dsOpts):
if isinstance(data, np.ndarray):
dsOpts['maxshape'] = (None,) + data.shape[1:]
root.create_dataset(name, data=data, **dsOpts)
elif isinstance(data, list) or isinstance(data, tuple):
gr = root.create_group(name)
if isinstance(data, list):
gr.attrs['_metaType_'] = 'list'
gr.attrs['_metaType_'] = 'tuple'
#n = int(np.log10(len(data))) + 1
for i in xrange(len(data)):
self.writeHDF5Meta(gr, str(i), data[i], **dsOpts)
elif isinstance(data, dict):
gr = root.create_group(name)
gr.attrs['_metaType_'] = 'dict'
for k, v in data.iteritems():
self.writeHDF5Meta(gr, k, v, **dsOpts)
elif isinstance(data, int) or isinstance(data, float) or isinstance(data, np.integer) or isinstance(data, np.floating):
root.attrs[name] = data
try: ## strings, bools, None are stored as repr() strings
root.attrs[name] = repr(data)
print "Can not store meta data of type '%s' in HDF5. (key is '%s')" % (str(type(data)), str(name))
def writeMa(self, fileName, appendAxis=None, newFile=False):
"""Write an old-style .ma file"""
meta = {'shape':self.shape, 'type':str(self.dtype), 'info':self.infoCopy(), 'version':MetaArray.version}
axstrs = []
## copy out axis values for dynamic axis if requested
if appendAxis is not None:
if MetaArray.isNameType(appendAxis):
appendAxis = self._interpretAxis(appendAxis)
ax = meta['info'][appendAxis]
ax['values_len'] = 'dynamic'
if 'values' in ax:
ax['values_type'] = str(ax['values'].dtype)
dynXVals = ax['values']
del ax['values']
dynXVals = None
## Generate axis data string, modify axis info so we know how to read it back in later
for ax in meta['info']:
if 'values' in ax:
ax['values_len'] = len(axstrs[-1])
ax['values_type'] = str(ax['values'].dtype)
del ax['values']
## Decide whether to output the meta block for a new file
if not newFile:
## If the file does not exist or its size is 0, then we must write the header
newFile = (not os.path.exists(fileName)) or (os.stat(fileName).st_size == 0)
## write data to file
if appendAxis is None or newFile:
fd = open(fileName, 'wb')
fd.write(str(meta) + '\n\n')
for ax in axstrs:
fd = open(fileName, 'ab')
if self.dtype != object:
dataStr = self.view(np.ndarray).tostring()
dataStr = pickle.dumps(self.view(np.ndarray))
#print self.size, len(dataStr), self.dtype
if appendAxis is not None:
frameInfo = {'len':len(dataStr), 'numFrames':self.shape[appendAxis]}
if dynXVals is not None:
frameInfo['xVals'] = list(dynXVals)
def writeCsv(self, fileName=None):
"""Write 2D array to CSV file or return the string if no filename is given"""
if self.ndim > 2:
raise Exception("CSV Export is only for 2D arrays")
if fileName is not None:
file = open(fileName, 'w')
ret = ''
if self._info[0].has_key('cols'):
s = ','.join([x['name'] for x in self._info[0]['cols']]) + '\n'
if fileName is not None:
ret += s
for row in range(0, self.shape[1]):
s = ','.join(["%g" % x for x in self[:, row]]) + '\n'
if fileName is not None:
ret += s
if fileName is not None:
return ret
#class H5MetaList():
#def rewriteContiguous(fileName, newName):
#"""Rewrite a dynamic array file as contiguous"""
#def _readData2(fd, meta, subtype, mmap):
### read in axis values
#dynAxis = None
#frameSize = 1
### read in axis values for any axis that specifies a length
#for i in range(len(meta['info'])):
#ax = meta['info'][i]
#if ax.has_key('values_len'):
#if ax['values_len'] == 'dynamic':
#if dynAxis is not None:
#raise Exception("MetaArray has more than one dynamic axis! (this is not allowed)")
#dynAxis = i
#ax['values'] = fromstring(['values_len']), dtype=ax['values_type'])
#frameSize *= ax['values_len']
#del ax['values_len']
#del ax['values_type']
### No axes are dynamic, just read the entire array in at once
#if dynAxis is None:
#raise Exception('Array has no dynamic axes.')
### One axis is dynamic, read in a frame at a time
#if mmap:
#raise Exception('memmap not supported for non-contiguous arrays. Use rewriteContiguous() to convert.')
#ax = meta['info'][dynAxis]
#xVals = []
#frames = []
#frameShape = list(meta['shape'])
#frameShape[dynAxis] = 1
#frameSize = reduce(lambda a,b: a*b, frameShape)
#n = 0
#while True:
### Extract one non-blank line
#while True:
#line = fd.readline()
#if line != '\n':
#if line == '':
### evaluate line
#inf = eval(line)
### read data block
##print "read %d bytes as %s" % (inf['len'], meta['type'])
#if meta['type'] == 'object':
#data = pickle.loads(['len']))
#data = fromstring(['len']), dtype=meta['type'])
#if data.size != frameSize * inf['numFrames']:
##print data.size, frameSize, inf['numFrames']
#raise Exception("Wrong frame size in MetaArray file! (frame %d)" % n)
### read in data block
#shape = list(frameShape)
#shape[dynAxis] = inf['numFrames']
#data.shape = shape
#n += inf['numFrames']
#if 'xVals' in inf:
#subarr = np.concatenate(frames, axis=dynAxis)
#if len(xVals)> 0:
#ax['values'] = array(xVals, dtype=ax['values_type'])
#del ax['values_len']
#del ax['values_type']
#subarr = subarr.view(subtype)
#subarr._info = meta['info']
#return subarr
if __name__ == '__main__':
## Create an array with every option possible
arr = np.zeros((2, 5, 3, 5), dtype=int)
for i in range(arr.shape[0]):
for j in range(arr.shape[1]):
for k in range(arr.shape[2]):
for l in range(arr.shape[3]):
arr[i,j,k,l] = (i+1)*1000 + (j+1)*100 + (k+1)*10 + (l+1)
info = [
axis('Axis2', values=[1,2,3,4,5]),
axis('Axis3', cols=[
('Ax3Col2', 'mV', 'Axis3 Column2'),
(('Ax3','Col3'), 'A', 'Axis3 Column3')]),
{'name': 'Axis4', 'values': np.array([1.1, 1.2, 1.3, 1.4, 1.5]), 'units': 's'},
{'extra': 'info'}
ma = MetaArray(arr, info=info)
print "==== Original Array ======="
print ma
print "\n\n"
#### Tests follow:
#### Index/slice tests: check that all values and meta info are correct after slice
print "\n -- normal integer indexing\n"
print "\n ma[1]"
print ma[1]
print "\n ma[1, 2:4]"
print ma[1, 2:4]
print "\n ma[1, 1:5:2]"
print ma[1, 1:5:2]
print "\n -- named axis indexing\n"
print "\n ma['Axis2':3]"
print ma['Axis2':3]
print "\n ma['Axis2':3:5]"
print ma['Axis2':3:5]
print "\n ma[1, 'Axis2':3]"
print ma[1, 'Axis2':3]
print "\n ma[:, 'Axis2':3]"
print ma[:, 'Axis2':3]
print "\n ma['Axis2':3, 'Axis4':0:2]"
print ma['Axis2':3, 'Axis4':0:2]
print "\n -- column name indexing\n"
print "\n ma['Axis3':'Ax3Col1']"
print ma['Axis3':'Ax3Col1']
print "\n ma['Axis3':('Ax3','Col3')]"
print ma['Axis3':('Ax3','Col3')]
print "\n ma[:, :, 'Ax3Col2']"
print ma[:, :, 'Ax3Col2']
print "\n ma[:, :, ('Ax3','Col3')]"
print ma[:, :, ('Ax3','Col3')]
print "\n -- axis value range indexing\n"
print "\n ma['Axis2':1.5:4.5]"
print ma['Axis2':1.5:4.5]
print "\n ma['Axis4':1.15:1.45]"
print ma['Axis4':1.15:1.45]
print "\n ma['Axis4':1.15:1.25]"
print ma['Axis4':1.15:1.25]
print "\n -- list indexing\n"
print "\n ma[:, [0,2,4]]"
print ma[:, [0,2,4]]
print "\n ma['Axis4':[0,2,4]]"
print ma['Axis4':[0,2,4]]
print "\n ma['Axis3':[0, ('Ax3','Col3')]]"
print ma['Axis3':[0, ('Ax3','Col3')]]
print "\n -- boolean indexing\n"
print "\n ma[:, array([True, True, False, True, False])]"
print ma[:, np.array([True, True, False, True, False])]
print "\n ma['Axis4':array([True, False, False, False])]"
print ma['Axis4':np.array([True, False, False, False])]
#### Array operations
# - Concatenate
# - Append
# - Extend
# - Rowsort
#### File I/O tests
print "\n================ File I/O Tests ===================\n"
import tempfile
tf = tempfile.mktemp()
tf = ''
# write whole array
print "\n -- write/read test"
ma2 = MetaArray(file=tf)
#print ma2
print "\nArrays are equivalent:", (ma == ma2).all()
#print "Meta info is equivalent:", ma.infoCopy() == ma2.infoCopy()
# CSV write
# append mode
print "\n================append test (%s)===============" % tf
ma['Axis2':0:2].write(tf, appendAxis='Axis2')
for i in range(2,ma.shape[1]):
ma['Axis2':[i]].write(tf, appendAxis='Axis2')
ma2 = MetaArray(file=tf)
#print ma2
print "\nArrays are equivalent:", (ma == ma2).all()
#print "Meta info is equivalent:", ma.infoCopy() == ma2.infoCopy()
## memmap test
print "\n==========Memmap test============"
ma.write(tf, mappable=True)
ma2 = MetaArray(file=tf, mmap=True)
print "\nArrays are equivalent:", (ma == ma2).all()