#!/bin/sh

# This script does some very basic benchmarks with 'xsv' on a city population
# data set (which is a strict subset of the `worldcitiespop` data set). If it
# doesn't exist on your system, it will be downloaded to /tmp for you.
#
# These aren't meant to be overly rigorous, but they should be enough to catch
# significant regressions.
#
# Make sure you're using an `xsv` generated by `cargo build --release`.

set -e

pat="$1"
data=/tmp/worldcitiespop_mil.csv
data_idx=/tmp/worldcitiespop_mil.csv.idx
if [ ! -r "$data" ]; then
  curl -sS http://burntsushi.net/stuff/worldcitiespop_mil.csv > "$data"
fi
data_size=$(stat --format '%s' "$data")

function real_seconds {
  cmd=$(echo $@ "> /dev/null 2>&1")
  t=$(
    $(which time) -p sh -c "$cmd" 2>&1 \
      | grep '^real' \
      | awk '{print $2}')
  if [ $(echo "$t < 0.01" | bc) = 1 ]; then
    t=0.01
  fi
  echo $t
}

function benchmark {
  rm -f "$data_idx"
  t1=$(real_seconds "$@")
  rm -f "$data_idx"
  t2=$(real_seconds "$@")
  rm -f "$data_idx"
  t3=$(real_seconds "$@")
  echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc
}

function benchmark_with_index {
  rm -f "$data_idx"
  xsv index "$data"
  t1=$(real_seconds "$@")
  t2=$(real_seconds "$@")
  t3=$(real_seconds "$@")
  rm -f "$data_idx"
  echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc
}

function run {
  index=
  while true; do
    case "$1" in
      --index) index="yes" && shift ;;
      *) break ;;
    esac
  done
  name="$1"
  shift

  if [ -z "$pat" ] || echo "$name" | grep -E -q "^$pat$"; then
    if [ -z "$index" ]; then
      t=$(benchmark "$@")
    else
      t=$(benchmark_with_index "$@")
    fi
    mb_per=$(echo "scale=2; ($data_size / $t) / 2^20" | bc)
    printf "%s\t%0.02f seconds\t%s MB/sec\n" $name $t $mb_per
  fi
}

run count xsv count "$data"
run flatten xsv flatten "$data"
run flatten_condensed xsv flatten "$data" --condense 50
run frequency xsv frequency "$data"
run index xsv index "$data"
run sample_10 xsv sample 10 "$data"
run sample_1000 xsv sample 1000 "$data"
run sample_100000 xsv sample 100000 "$data"
run search xsv search -s Country "'(?i)us'" "$data"
run select xsv select Country "$data"
run sort xsv sort -s AccentCity "$data"
run slice_one_middle xsv slice -i 500000 "$data"
run --index slice_one_middle_index xsv slice -i 500000 "$data"
run stats xsv stats "$data"
run --index stats_index xsv stats "$data"
run stats_everything xsv stats "$data" --everything
run --index stats_everything_index xsv stats "$data" --everything
