#!/bin/bash
# a script to count a few keywords through project history
header="todo fixme hack" # what to count
output="todo-table"
inflate=10000 # how much to multiply the relative counts with
interval=100 # how many commits to skip when sampling
skim=no # whether to just look at current state

function cleanup() {
  [[ $skim == yes ]] && return
  {
    git reset --hard
    git checkout master
  } > /dev/null
}

function counter() {
  local lines=$(git ls-files | grep -P "\.(c|cpp|h|py|sh|pl|java|2da|txt|ac|in)$" |
                  xargs wc -l | awk 'END { print $1 }')
  local counts
  for w in $header; do
    counts="$counts $(git grep -I -i $w | wc -l)"
  done
  local label=$(git log --format="%ad" --date=short | head -n1)
#  local label=$(git describe 2>/dev/null)
#  [[ -z $label ]] && label=$(git log --oneline | sed -n 's,^\(\S*\) .*,\1,p; q')
  counts="$label $lines ${counts# }"
  # generate linecount-relative values
  local m=$inflate
  echo $counts | awk "{ printf(\"%s %d %f %f %f\\n\", \$1, \$2, \$3/\$2*$m, \$4/\$2*$m, \$5/\$2*$m) }"
}

if [[ -n $(git status --porcelain --untracked-files=no) ]]; then
  echo "Bailing out, you were in the middle of something"
  exit 1
fi

trap cleanup INT EXIT

> "$output"

if [[ $1 == --last ]]; then
  output=/dev/stdout
  skim=yes
else
  echo "Started counting..."
fi

counter >> "$output"

if [[ $skim != yes ]]; then
  while git checkout HEAD~$interval > /dev/null; do
    counter >> "$output" || exit 2
  done

  # reverse the file for normal arrow of time
  echo "commit $header" >> "$output"
  tac "$output" > "2$output"
  mv "2$output" "$output"
  sed -i 's,  *,\t,g' "$output"

  echo -e "Done, switching back to master!\n"
  echo "The count data is in $output"
fi

true << REXAMPLE
data <- read.csv("$output", sep="\t")
data[,1] <- as.POSIXct(data[,1])
par(ps = 12)
with(data, plot(commit, todo, type="l", col="blue"))
with(data, lines(commit, fixme, type="l", col="orange"))
with(data, lines(commit, hack, type="l", col="red"))
REXAMPLE
