From aa57c7a685f15505b4e207329b3f97d74aba9c7a Mon Sep 17 00:00:00 2001
From: pijyoi <nixchuan@gmail.com>
Date: Tue, 6 Apr 2021 14:02:52 +0800
Subject: [PATCH] implement rescaleData as a blocked iterator using np.nditer
 (#1648)

* implement rescaleData_blocked

clip limits should be int if data is int

* add test for rescaleData_blocked

* dispatch to different versions depending on numpy or cupy

* make rescaleData() the only entry-point
---
 pyqtgraph/functions.py | 81 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 68 insertions(+), 13 deletions(-)

diff --git a/pyqtgraph/functions.py b/pyqtgraph/functions.py
index 09f8d921..a2bd15a8 100644
--- a/pyqtgraph/functions.py
+++ b/pyqtgraph/functions.py
@@ -13,6 +13,7 @@ import re
 import struct
 import sys
 import warnings
+import math
 
 import numpy as np
 from .util.cupy_helper import getCupy
@@ -1032,6 +1033,49 @@ def clip_array(arr, vmin, vmax, out=None):
         return np.core.umath.clip(arr, vmin, vmax, out=out)
 
 
+def _rescaleData_nditer(data_in, scale, offset, work_dtype, out_dtype, clip):
+    """Refer to documentation for rescaleData()"""
+    data_out = np.empty_like(data_in, dtype=out_dtype)
+
+    # integer clip operations are faster than float clip operations
+    # so test to see if we can perform integer clipping
+    fits_int32 = False
+    if data_in.dtype.kind in 'ui' and out_dtype.kind in 'ui':
+        # estimate whether data range after rescale will fit within an int32.
+        # this means that the input dtype should be an 8-bit or 16-bit integer type.
+        # casting to an int32 will lose the fractional part, therefore the
+        # output dtype must be an integer kind.
+        lim_in = np.iinfo(data_in.dtype)
+        dst_bounds = scale * (lim_in.min - offset), scale * (lim_in.max - offset)
+        if dst_bounds[1] < dst_bounds[0]:
+            dst_bounds = dst_bounds[1], dst_bounds[0]
+        lim32 = np.iinfo(np.int32)
+        fits_int32 = lim32.min < dst_bounds[0] and dst_bounds[1] < lim32.max
+
+    it = np.nditer([data_in, data_out],
+            flags=['external_loop', 'buffered'],
+            op_flags=[['readonly'], ['writeonly', 'no_broadcast']],
+            op_dtypes=[None, work_dtype],
+            casting='unsafe',
+            buffersize=32768)
+
+    with it:
+        for x, y in it:
+            y[...] = x
+            y -= offset
+            y *= scale
+
+            # Clip before converting dtype to avoid overflow
+            if clip is not None:
+                if fits_int32:
+                    # converts to int32, clips back to float32
+                    np.core.umath.clip(y.astype(np.int32), clip[0], clip[1], out=y)
+                else:
+                    clip_array(y, clip[0], clip[1], out=y)
+
+    return data_out
+
+
 def rescaleData(data, scale, offset, dtype=None, clip=None):
     """Return data rescaled and optionally cast to a new dtype.
 
@@ -1040,32 +1084,43 @@ def rescaleData(data, scale, offset, dtype=None, clip=None):
         data => (data-offset) * scale
     """
     if dtype is None:
-        dtype = data.dtype
+        out_dtype = data.dtype
     else:
-        dtype = np.dtype(dtype)
+        out_dtype = np.dtype(dtype)
 
-    if dtype.kind in 'ui':
-        lim = np.iinfo(dtype)
+    if out_dtype.kind in 'ui':
+        lim = np.iinfo(out_dtype)
         if clip is None:
             # don't let rescale cause integer overflow
             clip = lim.min, lim.max
         clip = max(clip[0], lim.min), min(clip[1], lim.max)
 
+        # make clip limits integer-valued (no need to cast to int)
+        # this improves performance, especially on Windows
+        clip = [math.trunc(x) for x in clip]
+
     if np.can_cast(data, np.float32):
         work_dtype = np.float32
     else:
         work_dtype = np.float64
-    d2 = data.astype(work_dtype, copy=True)
-    d2 -= offset
-    d2 *= scale
 
-    # Clip before converting dtype to avoid overflow
-    if clip is not None:
-        clip_array(d2, clip[0], clip[1], out=d2)
+    cp = getCupy()
+    if cp and cp.get_array_module(data) == cp:
+        # Cupy does not support nditer
+        # https://github.com/cupy/cupy/issues/5021
 
-    # don't copy if no change in dtype
-    data = d2.astype(dtype, copy=False)
-    return data
+        data_out = data.astype(work_dtype, copy=True)
+        data_out -= offset
+        data_out *= scale
+
+        # Clip before converting dtype to avoid overflow
+        if clip is not None:
+            clip_array(data_out, clip[0], clip[1], out=data_out)
+
+        # don't copy if no change in dtype
+        return data_out.astype(out_dtype, copy=False)
+    else:
+        return _rescaleData_nditer(data, scale, offset, work_dtype, out_dtype, clip)
 
 
 def applyLookupTable(data, lut):