Merge pull request #977 from campagnola/pseudoscatter-performance
Add a faster method for computing pseudoscatter
This commit is contained in:
commit
ecd0642ca9
@ -2322,14 +2322,62 @@ def invertQTransform(tr):
|
|||||||
raise Exception("Transform is not invertible.")
|
raise Exception("Transform is not invertible.")
|
||||||
return inv[0]
|
return inv[0]
|
||||||
|
|
||||||
|
|
||||||
|
def pseudoScatter(data, spacing=None, shuffle=True, bidir=False, method='exact'):
|
||||||
|
"""Return an array of position values needed to make beeswarm or column scatter plots.
|
||||||
|
|
||||||
def pseudoScatter(data, spacing=None, shuffle=True, bidir=False):
|
Used for examining the distribution of values in an array.
|
||||||
"""
|
|
||||||
Used for examining the distribution of values in a set. Produces scattering as in beeswarm or column scatter plots.
|
|
||||||
|
|
||||||
Given a list of x-values, construct a set of y-values such that an x,y scatter-plot
|
Given an array of x-values, construct an array of y-values such that an x,y scatter-plot
|
||||||
will not have overlapping points (it will look similar to a histogram).
|
will not have overlapping points (it will look similar to a histogram).
|
||||||
"""
|
"""
|
||||||
|
if method == 'exact':
|
||||||
|
return _pseudoScatterExact(data, spacing=spacing, shuffle=shuffle, bidir=bidir)
|
||||||
|
elif method == 'histogram':
|
||||||
|
return _pseudoScatterHistogram(data, spacing=spacing, shuffle=shuffle, bidir=bidir)
|
||||||
|
|
||||||
|
|
||||||
|
def _pseudoScatterHistogram(data, spacing=None, shuffle=True, bidir=False):
|
||||||
|
"""Works by binning points into a histogram and spreading them out to fill the bin.
|
||||||
|
|
||||||
|
Faster method, but can produce blocky results.
|
||||||
|
"""
|
||||||
|
inds = np.arange(len(data))
|
||||||
|
if shuffle:
|
||||||
|
np.random.shuffle(inds)
|
||||||
|
|
||||||
|
data = data[inds]
|
||||||
|
|
||||||
|
if spacing is None:
|
||||||
|
spacing = 2.*np.std(data)/len(data)**0.5
|
||||||
|
|
||||||
|
yvals = np.empty(len(data))
|
||||||
|
|
||||||
|
dmin = data.min()
|
||||||
|
dmax = data.max()
|
||||||
|
nbins = int((dmax-dmin) / spacing) + 1
|
||||||
|
bins = np.linspace(dmin, dmax, nbins)
|
||||||
|
dx = bins[1] - bins[0]
|
||||||
|
dbins = ((data - bins[0]) / dx).astype(int)
|
||||||
|
binCounts = {}
|
||||||
|
|
||||||
|
for i,j in enumerate(dbins):
|
||||||
|
c = binCounts.get(j, -1) + 1
|
||||||
|
binCounts[j] = c
|
||||||
|
yvals[i] = c
|
||||||
|
|
||||||
|
if bidir is True:
|
||||||
|
for i in range(nbins):
|
||||||
|
yvals[dbins==i] -= binCounts.get(i, 0) * 0.5
|
||||||
|
|
||||||
|
return yvals[np.argsort(inds)] ## un-shuffle values before returning
|
||||||
|
|
||||||
|
|
||||||
|
def _pseudoScatterExact(data, spacing=None, shuffle=True, bidir=False):
|
||||||
|
"""Works by stacking points up one at a time, searching for the lowest position available at each point.
|
||||||
|
|
||||||
|
This method produces nice, smooth results but can be prohibitively slow for large datasets.
|
||||||
|
"""
|
||||||
inds = np.arange(len(data))
|
inds = np.arange(len(data))
|
||||||
if shuffle:
|
if shuffle:
|
||||||
np.random.shuffle(inds)
|
np.random.shuffle(inds)
|
||||||
|
Loading…
Reference in New Issue
Block a user