import sys, time, math import numpy as np import cupy as cp def measure(a, b): duration = 0 cycles = 0 while (duration < 1): start = time.perf_counter() c = a @ b cp.cuda.Stream.null.synchronize() end = time.perf_counter() duration += end - start cycles += 1 return duration / cycles n = 1024 a = np.random.rand(n, n).astype(np.float32) b = np.random.rand(n, n).astype(np.float32) print('numpy take', measure(a, b) * 1e6, 'usec') a = cp.random.rand(n, n, dtype = cp.float32) b = cp.random.rand(n, n, dtype = cp.float32) print('cupy take', measure(a, b) * 1e6, 'usec')