From cbafc773f50858cf92486ff41761168eb4e63297 Mon Sep 17 00:00:00 2001
From: KIU Shueng Chuan <nixchuan@gmail.com>
Date: Tue, 2 Mar 2021 08:53:44 +0800
Subject: [PATCH 1/4] fix: max value of uint16 is 65535

---
 examples/VideoSpeedTest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/VideoSpeedTest.py b/examples/VideoSpeedTest.py
index c036b896..be05dba9 100644
--- a/examples/VideoSpeedTest.py
+++ b/examples/VideoSpeedTest.py
@@ -157,7 +157,7 @@ def mkData():
                 dt = xp.uint16
                 loc = 4096
                 scale = 1024
-                mx = 2**16
+                mx = 2**16 - 1
             elif cacheKey[0] == 'float':
                 dt = xp.float32
                 loc = 1.0

From fda8731dabb599655e8754297d728b91b3281872 Mon Sep 17 00:00:00 2001
From: KIU Shueng Chuan <nixchuan@gmail.com>
Date: Tue, 2 Mar 2021 09:05:52 +0800
Subject: [PATCH 2/4] reduce memory usage during data generation

random.normal() generates as float64 and gets converted to a smaller
dtype. generating all the needed data in a single call thus uses a lot
more memory than is necessary.

this changes it such that smaller chunks are generated.
data clipping is also changed to be in-place.

the gaussian filtering which gave the video a washed-out look is also
removed. this also contributed to data generation time.
---
 examples/VideoSpeedTest.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/examples/VideoSpeedTest.py b/examples/VideoSpeedTest.py
index be05dba9..48b431dc 100644
--- a/examples/VideoSpeedTest.py
+++ b/examples/VideoSpeedTest.py
@@ -165,16 +165,22 @@ def mkData():
                 mx = 1.0
             else:
                 raise ValueError(f"unable to handle dtype: {cacheKey[0]}")
-            
+
+            chan_shape = (width, height)
             if ui.rgbCheck.isChecked():
-                data = xp.random.normal(size=(frames,width,height,3), loc=loc, scale=scale)
-                data = pg.gaussianFilter(data, (0, 6, 6, 0))
+                frame_shape = chan_shape + (3,)
             else:
-                data = xp.random.normal(size=(frames,width,height), loc=loc, scale=scale)
-                data = pg.gaussianFilter(data, (0, 6, 6))
-            if cacheKey[0] != 'float':
-                data = xp.clip(data, 0, mx)
-            data = data.astype(dt)
+                frame_shape = chan_shape
+            data = xp.empty((frames,) + frame_shape, dtype=dt)
+            view = data.reshape((-1,) + chan_shape)
+            for idx in range(view.shape[0]):
+                subdata = xp.random.normal(loc=loc, scale=scale, size=chan_shape)
+                # note: gaussian filtering has been removed as it slows down array
+                #       creation greatly.
+                if cacheKey[0] != 'float':
+                    xp.clip(subdata, 0, mx, out=subdata)
+                view[idx] = subdata
+
             data[:, 10, 10:50] = mx
             data[:, 9:12, 48] = mx
             data[:, 8:13, 47] = mx

From de85a23ae059a168b0a0eb4445ca7d69d7a14ffd Mon Sep 17 00:00:00 2001
From: KIU Shueng Chuan <nixchuan@gmail.com>
Date: Tue, 2 Mar 2021 09:18:52 +0800
Subject: [PATCH 3/4] switch to row-major order

---
 examples/VideoSpeedTest.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/VideoSpeedTest.py b/examples/VideoSpeedTest.py
index 48b431dc..37d854e7 100644
--- a/examples/VideoSpeedTest.py
+++ b/examples/VideoSpeedTest.py
@@ -18,6 +18,8 @@ import pyqtgraph as pg
 import pyqtgraph.ptime as ptime
 from pyqtgraph.Qt import QtGui, QtCore, QT_LIB
 
+pg.setConfigOption('imageAxisOrder', 'row-major')
+
 import importlib
 ui_template = importlib.import_module(f'VideoTemplate_{QT_LIB.lower()}')
 
@@ -166,7 +168,7 @@ def mkData():
             else:
                 raise ValueError(f"unable to handle dtype: {cacheKey[0]}")
 
-            chan_shape = (width, height)
+            chan_shape = (height, width)
             if ui.rgbCheck.isChecked():
                 frame_shape = chan_shape + (3,)
             else:
@@ -181,9 +183,9 @@ def mkData():
                     xp.clip(subdata, 0, mx, out=subdata)
                 view[idx] = subdata
 
-            data[:, 10, 10:50] = mx
-            data[:, 9:12, 48] = mx
-            data[:, 8:13, 47] = mx
+            data[:, 10:50, 10] = mx
+            data[:, 48, 9:12] = mx
+            data[:, 47, 8:13] = mx
             cache = {cacheKey: data} # clear to save memory (but keep one to prevent unnecessary regeneration)
 
         data = cache[cacheKey]

From b3dc090373b336acabc99251b298e0f85f987c22 Mon Sep 17 00:00:00 2001
From: KIU Shueng Chuan <nixchuan@gmail.com>
Date: Tue, 2 Mar 2021 13:36:05 +0800
Subject: [PATCH 4/4] implement fastpath for makeARGB

---
 pyqtgraph/functions.py | 59 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/pyqtgraph/functions.py b/pyqtgraph/functions.py
index c327e470..0dc0183f 100644
--- a/pyqtgraph/functions.py
+++ b/pyqtgraph/functions.py
@@ -1247,7 +1247,11 @@ def makeARGB(data, lut=None, levels=None, scale=None, useRGBA=False, output=None
         order = [2,1,0,3] # for some reason, the colors line up as BGR in the final image.
         
     # copy data into image array
-    if data.ndim == 2:
+    fastpath = try_fastpath_argb(xp, data, imgData, useRGBA)
+
+    if fastpath:
+        pass
+    elif data.ndim == 2:
         # This is tempting:
         #   imgData[..., :3] = data[..., xp.newaxis]
         # ..but it turns out this is faster:
@@ -1263,11 +1267,12 @@ def makeARGB(data, lut=None, levels=None, scale=None, useRGBA=False, output=None
     profile('reorder channels')
     
     # add opaque alpha channel if needed
-    if data.ndim == 2 or data.shape[2] == 3:
-        alpha = False
-        imgData[..., 3] = 255
-    else:
+    if data.ndim == 3 and data.shape[2] == 4:
         alpha = True
+    else:
+        alpha = False
+        if not fastpath:    # fastpath has already filled it in
+            imgData[..., 3] = 255
 
     # apply nan mask through alpha channel
     if nanMask is not None:
@@ -1282,6 +1287,50 @@ def makeARGB(data, lut=None, levels=None, scale=None, useRGBA=False, output=None
     return imgData, alpha
 
 
+def try_fastpath_argb(xp, ain, aout, useRGBA):
+    # we only optimize for certain cases
+    # return False if we did not handle it
+    can_handle = xp is np and ain.dtype == xp.ubyte and ain.flags['C_CONTIGUOUS']
+    if not can_handle:
+        return False
+
+    nrows, ncols = ain.shape[:2]
+    nchans = 1 if ain.ndim == 2 else ain.shape[2]
+
+    Format = QtGui.QImage.Format
+
+    if nchans == 1:
+        in_fmt = Format.Format_Grayscale8
+    elif nchans == 3:
+        in_fmt = Format.Format_RGB888
+    else:
+        in_fmt = Format.Format_RGBA8888
+
+    if useRGBA:
+        out_fmt = Format.Format_RGBA8888
+    else:
+        out_fmt = Format.Format_ARGB32
+
+    if in_fmt == out_fmt:
+        aout[:] = ain
+        return True
+
+    npixels_chunk = 512*1024
+    batch = int(npixels_chunk / ncols / nchans)
+    batch = max(1, batch)
+    row_beg = 0
+    while row_beg < nrows:
+        row_end = min(row_beg + batch, nrows)
+        ain_view = ain[row_beg:row_end, ...]
+        aout_view = aout[row_beg:row_end, ...]
+        qimg = QtGui.QImage(ain_view, ncols, ain_view.shape[0], ain.strides[0], in_fmt)
+        qimg = qimg.convertToFormat(out_fmt)
+        aout_view[:] = imageToArray(qimg, copy=False, transpose=False)
+        row_beg = row_end
+
+    return True
+
+
 def makeQImage(imgData, alpha=None, copy=True, transpose=True):
     """
     Turn an ARGB array into QImage.