#!/bin/bash

set -uo pipefail

# ================================================================
# MILLER REGRESSION SCRIPT
#
# Miller has some source-code-level unit-test routines -- but the vast majority
# of tests (thousands) are invoked here at the command-line level.
#
# Output from various mlr command-line invocations, with prepared inputs, is
# generated and then compared against stored expected output.
# ================================================================

# ================================================================
# STRUCTURE
#
# * A "case file" is a file reg-test/cases/case*.sh.
#
# * Each case file reg-test/cases/case*.sh consists of one or more
#   "invocations" of Miller. An invocation is the mlr executable run once with
#   certain command-line arguments.
#
# * Each case
#     ./reg-test/cases/case-foo.sh
#   has expected output
#     ./reg-test/expected/case-foo.sh.out
#     ./reg-test/expected/case-foo.sh.err
#   and actuals
#    output-reg-test/case-c-cat.sh.out
#    output-reg-test/case-c-cat.sh.err
#
# * This script loops over all of the case-*.sh files and executes them via
#   sourcing them with the Bash "." operator.
#
# * Each has lines of the form 'run_mlr ...' or 'mlr_expect_fail ...'.
#   Those functions are defined here in this file. They take mlr command-lines
#   as arguments.  They also take global variables $verbose, $actual_stdout_file, and
#   $actual_stderr_file as side-inputs, and write global variables like
#   $num_invocations_passed as side-outputs.
#
# * Each case can fail in the following ways:
#   o Zero invocations were attempted.
#   o A given 'run_mlr ...' invocation exits with non-zero when it should exit
#     with zero.
#   o A given 'mlr_expect_fail ...' invocation exits with zero when it should
#     exit with non-zero.
#   o The output of the invocations in the case's actual-output file differs
#     from the case's expected-output file.
#
# * If this script is invoked with -s ("only_show"), it sipmly echoes out the
#   command line for each invocation and executes it, with no output files
#   modified, no exit status checked, etc. This is for debug/exploratory mode.
# ================================================================

# ================================================================
# FUNCTIONS FOR USE BY THE CASE-FILES

# A key feature of this regression script is that it can be invoked from any
# directory. Depending on the directory it's invoked from, the path to the mlr
# executable may vary.  Nonetheless for debugging it's crucial that we echo out
# each command being executed.

# ----------------------------------------------------------------
run_mlr() {
  if [ "$only_show" = "true" ]; then
    echo
    echo "# expect pass"
    echo $path_to_mlr "$@"
    echo
    $path_to_mlr "$@"

  else
    # Use just "mlr" for info messages, not full path to mlr
    if [ "$verbose" = "true" ]; then
      echo mlr "$@"
    fi
    echo mlr "$@" >> $actual_stdout_file
    num_invocations_attempted=`expr $num_invocations_attempted + 1`

    # There's no '1>> $actual_stdout_file 2>> $actual_stderr_file' but I'd use it if there were.
    # Instead we use this '$actual_stderr_file.$$' trick.
    #
    # Also note that we could 2>&1 and have a single combined stdout-and-stderr
    # file. However, experience has shown that the line-ordering isn't
    # deterministic across multiple platforms, causing a regression test baked
    # (and passing) on one platform to fail on another. See in particular
    # https://github.com/johnkerl/miller/issues/293

    $path_to_mlr "$@" >> $actual_stdout_file 2> $actual_stderr_file.$$
    status=$?
    cat $actual_stderr_file.$$ >> $actual_stderr_file
    rm $actual_stderr_file.$$

    if [ $status -eq 0 ]; then
      num_invocations_passed=`expr $num_invocations_passed + 1`
    else
      if [ "$verbose" = "true" ]; then
        echo "Exit status was $status; expected 0."
      fi
      echo "Exit status was $status; expected 0." >> $actual_stdout_file
      num_invocations_failed=`expr $num_invocations_failed + 1`
    fi
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
mlr_expect_fail() {
  if [ "$only_show" = "true" ]; then
    echo
    echo "# expect fail"
    echo $path_to_mlr "$@"
    echo
    $path_to_mlr "$@"

  else
    # Use just "mlr" for info messages
    if [ "$verbose" = "true" ]; then
      echo mlr "$@"
    fi
    echo mlr "$@" >> $actual_stdout_file 2> $actual_stderr_file.$$
    cat $actual_stderr_file.$$ >> $actual_stderr_file
    rm $actual_stderr_file.$$
    num_invocations_attempted=`expr $num_invocations_attempted + 1`

    # There's no '1>> $actual_stdout_file 2>> $actual_stderr_file' but I'd use it if there were.
    # Instead we use this '$actual_stderr_file.$$' trick.
    $path_to_mlr "$@" >> $actual_stdout_file 2> $actual_stderr_file.$$
    status=$?
    cat $actual_stderr_file.$$ >> $actual_stderr_file
    rm $actual_stderr_file.$$

    if [ $status -eq 1 ]; then
      num_invocations_passed=`expr $num_invocations_passed + 1`
    else
      num_invocations_failed=`expr $num_invocations_failed + 1`
      if [ "$verbose" = "true" ]; then
        echo "Exit status was $status; expected 1."
      fi
      echo "Exit status was $status; expected 1." >> $actual_stdout_file
    fi
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
# Lets the callsite redirect stdout and stderr. This is nominally for testing
# DSL-redirect statements and making sure '> stdout' goes to stdout, '> stderr'
# goes to stdout, '> "foo.dat"' goes to foo.dat.
run_mlr_externally_redirected() {
  if [ "$only_show" = "true" ]; then
    echo
    echo "# expect pass"
    echo $path_to_mlr "$@"
    echo
    $path_to_mlr "$@"

  else
    # Use just "mlr" for info messages, not full path to mlr
    if [ "$verbose" = "true" ]; then
      echo mlr "$@"
    fi
    echo mlr "$@" >> $actual_stdout_file
    num_invocations_attempted=`expr $num_invocations_attempted + 1`

    $path_to_mlr "$@" # callsite is expected to redirect stdout and stderr
    status=$?

    if [ $status -eq 0 ]; then
      num_invocations_passed=`expr $num_invocations_passed + 1`
    else
      if [ "$verbose" = "true" ]; then
        echo "Exit status was $status; expected 0."
      fi
      echo "Exit status was $status; expected 0." >> $actual_stdout_file
      num_invocations_failed=`expr $num_invocations_failed + 1`
    fi
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
run_mlr_no_output() {
  if [ "$only_show" = "true" ]; then
    echo
    echo "# expect pass"
    echo $path_to_mlr "$@"
    echo
    $path_to_mlr "$@"

  else
    # Use just "mlr" for info messages, not full path to mlr
    if [ "$verbose" = "true" ]; then
      echo mlr "$@"
    fi
    num_invocations_attempted=`expr $num_invocations_attempted + 1`

    # There's no '1>> $actual_stdout_file 2>> $actual_stderr_file' but I'd use it if there were.
    # Instead we use this '$actual_stderr_file.$$' trick.
    $path_to_mlr "$@" 2> $actual_stderr_file.$$
    status=$?
    cat $actual_stderr_file.$$ >> $actual_stderr_file
    rm $actual_stderr_file.$$

    if [ $status -eq 0 ]; then
      num_invocations_passed=`expr $num_invocations_passed + 1`
    else
      if [ "$verbose" = "true" ]; then
        echo "Exit status was $status; expected 0."
      fi
      echo "Exit status was $status; expected 0." >> $actual_stdout_file
      num_invocations_failed=`expr $num_invocations_failed + 1`
    fi
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
# Just cats a file while also announcing that fact.
run_cat() {
  if [ "$only_show" = "true" ]; then
    echo
    echo cat "$@"
    echo
    cat "$@"

  else
    if [ "$verbose" = "true" ]; then
      echo cat "$@"
    fi
    echo cat "$@" >> $actual_stdout_file # for diff at end
    cat "$@" >> $actual_stdout_file      # for diff at end
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
announce() {
  if [ "$only_show" = "true" ]; then
    echo
    echo "================================================================"
    echo
    echo "$@"
  else

    echo >> $actual_stdout_file
    echo "================================================================" >> $actual_stdout_file
    echo "$@" >> $actual_stdout_file
    echo >> $actual_stdout_file
  fi
}

# ----------------------------------------------------------------
mention() {
  if [ "$only_show" = "true" ]; then
    echo
    echo ---------------------------------------------------------------- "$@"
  else
    echo >> $actual_stdout_file
    echo ---------------------------------------------------------------- "$@" >> $actual_stdout_file
  fi
}

# ================================================================
# START OF SCRIPT PER SE

# ----------------------------------------------------------------
# Set up for green "PASS", red "FAIL" if output is to a terminal.

if [ "$(uname)" = "Darwin" ]; then
  export textdefault=$'\033[0m'
  export         red=$'\033[31;01m'
  export       green=$'\033[32;01m'
else
  export textdefault="\e[0m"
  export         red="\e[31m"
  export       green="\e[32m"
fi

# Bash idiom to test if file descriptor #1 (stdout) is a terminal
if [[ -t 1 ]]; then
    PASS="${green}PASS${textdefault}"
    FAIL="${red}FAIL${textdefault}"
else
    PASS="PASS"
    FAIL="FAIL"
fi

# ----------------------------------------------------------------
usage() {
	echo "Usage: $(basename $0) [case*sh filenames]" 1>&2
    echo "Options:" 1>&2
    echo "-h     Print this message" 1>&2
    echo "-v     More verbose output including each mlr-invocation line, and diff outputs" 1>&2
    echo "-w     Like -v but use sdiff. Must be run interactively since it queries stty size." 1>&2
    echo "-s     Only run cases and show outputs; don't diff against expected" 1>&2
    echo "-m {path/to/mlr} Use a specified mlr executable" 1>&2
    echo "-c     Use ../c/mlr executable" 1>&2
    echo "-g     Use ../go/mlr executable" 1>&2
    echo "-o     Run only old case-c-* cases" 1>&2
    echo "-n     Run only new non-case-c-* cases" 1>&2
    echo "-C {n} Passed directly to diff" 1>&2
	exit 1
}

# ----------------------------------------------------------------
# These next few lines let us be invoked from another directory.

pwd=$(pwd)
ourdir=`dirname $0`
srcdir=$ourdir/../..
path_to_mlr=$srcdir/go/mlr

# ----------------------------------------------------------------
# Parse the command line.

# Input/output locations
indir=$ourdir/input
expdir=$ourdir/expected
outdir=$pwd/output-reg-test
reloutdir=./output-reg-test # used by some case-files

verbose="false"
sdiff="false"
only_show="false"
case_file_names="" # None means all
diff_context_flag=""

while getopts hvwscgnom:C: f
do
	case $f in
	h)      echo; usage;;
	v)      verbose="true";                   continue;;
	w)      verbose="true"; sdiff="true";     continue;;
	s)      only_show="true";                 continue;;
	c)      path_to_mlr=$ourdir/../../c/mlr;  continue;;
	g)      path_to_mlr=$ourdir/../../go/mlr; continue;;
	m)      path_to_mlr="$OPTARG";            continue;;
	o)      case_file_names=$(ls $ourdir/cases/case-c-*.sh); continue;;
	n)      case_file_names=$(ls $ourdir/cases/case-*.sh | grep -v case-c-); continue;;
	C)      diff_context_flag="-C$OPTARG";    continue;;
	esac
done

# No case-file names means all of them
shift $(($OPTIND-1))
if [ "$case_file_names" = "" ]; then
  case_file_names="$@"
fi
if [ "$case_file_names" = "" ]; then
    case_file_names=$(ls $ourdir/cases/case-*.sh)
fi

if [ ! -x "$path_to_mlr" ]; then
  echo "$0: Could not find path to mlr executable $path_to_mlr." 1>&2
fi

if [ "$only_show" = "true" ]; then
  expdir="/tmp"
  outdir="/tmp"
fi

# ----------------------------------------------------------------
# Prep for output

if [ "$only_show" = "false" ]; then
  rm -rf $outdir
  mkdir -p $outdir
fi

echo Using mlr executable $path_to_mlr

# Don't let the running user's .mlrrc affect the regression test
export MLRRC="__none__"

# ----------------------------------------------------------------
# Set side-globals used by the case-files.

num_cases_attempted=0
num_cases_passed=0
num_cases_failed=0

# ----------------------------------------------------------------
# Set counters for this main script.

total_num_invocations_attempted=0

# ----------------------------------------------------------------
# Run the case-files.

for case_file in $case_file_names; do

  if [ "$verbose" = "true" ] ; then
    echo ""
    echo "----------------------------------------------------------------"
    echo "BEGIN $case_file"
    echo ""
  fi

  # Side-globals set/read per case:
  num_invocations_attempted=0
  num_invocations_passed=0
  num_invocations_failed=0

  case_name=$(basename $case_file)
  actual_stdout_file=$outdir/$case_name.out
  actual_stderr_file=$outdir/$case_name.err
  expected_stdout_file=$expdir/$case_name.out
  expected_stderr_file=$expdir/$case_name.err

  case_passed=true

  if [ "$only_show" = "false" ]; then
    echo > $actual_stdout_file
    echo > $actual_stderr_file
  fi
  num_cases_attempted=`expr $num_cases_attempted + 1`

  # Note that 'echo "..." | run_mlr ... ' would be nice in case-files, but, I had
  # issues with the $num_invocations_attempted not tracking correctly in
  # reg-test/run when run_mlr is at the end of a pipe :(. So, I enforce
  # the discipline of always using here-documents ("<<EOF").
  grep 'echo.*|' $case_file > /dev/null 2> /dev/null
  status=$?
  if [ $status -eq 0 ]; then
    echo "$0: Please edit $case_file and replace 'echo ... | run_mlr ...' lines with 'run_mlr ... <EOF'"
    case_passed=false
  fi

  # ----------------------------------------------------------------
  # Run all invocations in the case:
  if [ -f "$case_file" ]; then
    . $case_file
  else
    # Show this error regardless of verbosity
    echo "Case failed since file not found: $case_file"
    echo -e "$FAIL  $case_file"
    num_cases_failed=`expr $num_cases_failed + 1`
    continue
  fi
  # ----------------------------------------------------------------

  if [ "$only_show" = "true" ]; then
    continue
  fi

  # There are two ways to fail here:
  # * Miller-command code was 0 not 1 (or vice versa) -- checked per-invocation
  # * Expected/actual output mismatch -- checked per-case

  if [ $num_invocations_failed -ne 0 ]; then
    if [ "$verbose" = "true" ]; then
      echo "Case failed due to num_invocations_failed > 0"
    fi
    case_passed=false
  fi

  if [ $num_invocations_attempted -eq 0 ]; then
    if [ "$verbose" = "true" ]; then
      echo "Case failed due to num_invocations_attempted == 0"
    fi
    case_passed=false
  fi

  # TODO: check the errfiles too
  if [ "$verbose" = "true" ]; then
    echo ""
    echo "num_invocations_attempted  $num_invocations_attempted"
    echo "num_invocations_passed     $num_invocations_passed"
    echo "num_invocations_failed     $num_invocations_failed"
    echo ""
    if [ "$sdiff" = "true" ]; then
      # Assuming we're run interactively
      width=$(stty size | awk '{print $2}')
      echo sdiff -w $width $diff_context_flag $expected_stdout_file $actual_stdout_file
      sdiff -w $width $diff_context_flag $expected_stdout_file $actual_stdout_file
      ostatus=$?
      echo sdiff -w $width $diff_context_flag $expected_stderr_file $actual_stderr_file
      sdiff -w $width $diff_context_flag $expected_stderr_file $actual_stderr_file
      estatus=$?
    else
      echo diff -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stdout_file $actual_stdout_file
      diff -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stdout_file $actual_stdout_file
      ostatus=$?
      echo diff -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stderr_file $actual_stderr_file
      diff -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stderr_file $actual_stderr_file
      estatus=$?
    fi
  else
    diff -q -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stdout_file $actual_stdout_file > /dev/null
    ostatus=$?
    diff -q -a -I '^mlr' -I '^Miller:' -I '^cat' $diff_context_flag $expected_stderr_file $actual_stderr_file > /dev/null
    estatus=$?
  fi
  status=$?

  if [ $ostatus -ne 0 ]; then
    if [ "$verbose" = "true" ]; then
      echo "Case failed due to expected output != actual output:"
      echo "$expected_stdout_file"
      echo "$actual_stdout_file"
    fi
    case_passed=false
  fi

  if [ $estatus -ne 0 ]; then
    if [ "$verbose" = "true" ]; then
      echo "Case failed due to expected output != actual output:"
      echo "$expected_stderr_file"
      echo "$actual_stderr_file"
    fi
    case_passed=false
  fi

  if [ "$verbose" = "true" ]; then
    echo
  fi

  # Summarize the case status
  if [ $case_passed = "true" ]; then
    echo -e "$PASS  $case_file"
    num_cases_passed=`expr $num_cases_passed + 1`
  else
    echo -e "$FAIL  $case_file"
    num_cases_failed=`expr $num_cases_failed + 1`
  fi

  if [ "$verbose" = "true" ]; then
    echo "----------------------------------------------------------------"
  fi

  total_num_invocations_attempted=`expr $total_num_invocations_attempted + $num_invocations_attempted`
done

# ----------------------------------------------------------------
# SUMMARIZE OVERALL STATUS

if [ "$only_show" = "true" ]; then
  exit 0
fi

echo
echo "NUMBER OF MILLER INVOCATIONS $total_num_invocations_attempted"
echo "NUMBER OF CASES PASSED $num_cases_passed"
echo "NUMBER OF CASES FAILED $num_cases_failed"
echo

if [ $num_cases_failed -eq 0 ]; then
  echo -e "$PASS overall"
  exit 0
else
  echo -e "$FAIL overall"
  exit 1
fi
