numactl --interleave=all ./testing_dgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F
=======================================================================
  100   100     ---   (  ---  )      0.49 (   0.00)     ---
 1000  1000     ---   (  ---  )     32.16 (   0.04)     ---
   10    10     ---   (  ---  )      0.02 (   0.00)     ---
   20    20     ---   (  ---  )      0.18 (   0.00)     ---
   30    30     ---   (  ---  )      0.47 (   0.00)     ---
   40    40     ---   (  ---  )      0.79 (   0.00)     ---
   50    50     ---   (  ---  )      1.25 (   0.00)     ---
   60    60     ---   (  ---  )      1.61 (   0.00)     ---
   70    70     ---   (  ---  )      0.47 (   0.00)     ---
   80    80     ---   (  ---  )      0.85 (   0.00)     ---
   90    90     ---   (  ---  )      1.19 (   0.00)     ---
  100   100     ---   (  ---  )      1.55 (   0.00)     ---
  200   200     ---   (  ---  )      5.27 (   0.00)     ---
  300   300     ---   (  ---  )     11.84 (   0.00)     ---
  400   400     ---   (  ---  )     19.21 (   0.00)     ---
  500   500     ---   (  ---  )     27.64 (   0.01)     ---
  600   600     ---   (  ---  )     36.80 (   0.01)     ---
  700   700     ---   (  ---  )     45.76 (   0.01)     ---
  800   800     ---   (  ---  )     56.97 (   0.01)     ---
  900   900     ---   (  ---  )     66.99 (   0.01)     ---
 1000  1000     ---   (  ---  )     79.00 (   0.02)     ---
 2000  2000     ---   (  ---  )    203.94 (   0.05)     ---
 3000  3000     ---   (  ---  )    321.58 (   0.11)     ---
 4000  4000     ---   (  ---  )    425.81 (   0.20)     ---
 5000  5000     ---   (  ---  )    542.72 (   0.31)     ---
 6000  6000     ---   (  ---  )    637.54 (   0.45)     ---
 7000  7000     ---   (  ---  )    714.97 (   0.64)     ---
 8000  8000     ---   (  ---  )    772.51 (   0.88)     ---
 9000  9000     ---   (  ---  )    810.88 (   1.20)     ---
10000 10000     ---   (  ---  )    843.87 (   1.58)     ---
12000 12000     ---   (  ---  )    923.69 (   2.49)     ---
14000 14000     ---   (  ---  )    962.33 (   3.80)     ---
16000 16000     ---   (  ---  )    989.29 (   5.52)     ---
18000 18000     ---   (  ---  )   1001.21 (   7.77)     ---
20000 20000     ---   (  ---  )   1014.24 (  10.52)     ---

numactl --interleave=all ./testing_dgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||Ax-b||_F/(N*||A||_F*||x||_F)
====================================================================================
  100   100     ---   (  ---  )      0.51 (   0.00)     ---
 1000  1000     ---   (  ---  )     70.38 (   0.02)     ---
   10    10     ---   (  ---  )      0.00 (   0.00)     ---
   20    20     ---   (  ---  )      0.01 (   0.00)     ---
   30    30     ---   (  ---  )      0.04 (   0.00)     ---
   40    40     ---   (  ---  )      0.08 (   0.00)     ---
   50    50     ---   (  ---  )      0.16 (   0.00)     ---
   60    60     ---   (  ---  )      0.27 (   0.00)     ---
   70    70     ---   (  ---  )      0.31 (   0.00)     ---
   80    80     ---   (  ---  )      0.46 (   0.00)     ---
   90    90     ---   (  ---  )      0.70 (   0.00)     ---
  100   100     ---   (  ---  )      2.03 (   0.00)     ---
  200   200     ---   (  ---  )      4.12 (   0.00)     ---
  300   300     ---   (  ---  )      9.20 (   0.00)     ---
  400   400     ---   (  ---  )     15.57 (   0.01)     ---
  500   500     ---   (  ---  )     23.52 (   0.01)     ---
  600   600     ---   (  ---  )     31.90 (   0.01)     ---
  700   700     ---   (  ---  )     42.22 (   0.01)     ---
  800   800     ---   (  ---  )     51.42 (   0.01)     ---
  900   900     ---   (  ---  )     60.63 (   0.02)     ---
 1000  1000     ---   (  ---  )     71.80 (   0.02)     ---
 2000  2000     ---   (  ---  )    191.44 (   0.06)     ---
 3000  3000     ---   (  ---  )    303.43 (   0.12)     ---
 4000  4000     ---   (  ---  )    403.44 (   0.21)     ---
 5000  5000     ---   (  ---  )    522.78 (   0.32)     ---
 6000  6000     ---   (  ---  )    622.05 (   0.46)     ---
 7000  7000     ---   (  ---  )    690.83 (   0.66)     ---
 8000  8000     ---   (  ---  )    749.23 (   0.91)     ---
 9000  9000     ---   (  ---  )    789.91 (   1.23)     ---
10000 10000     ---   (  ---  )    824.35 (   1.62)     ---
12000 12000     ---   (  ---  )    890.05 (   2.59)     ---
14000 14000     ---   (  ---  )    938.13 (   3.90)     ---
16000 16000     ---   (  ---  )    971.05 (   5.62)     ---
18000 18000     ---   (  ---  )    990.70 (   7.85)     ---
20000 20000     ---   (  ---  )   1004.45 (  10.62)     ---
