#!/usr/bin/make -rRf
# Run the ABySS assembler.
# Written by Shaun Jackman <sjackman@bcgsc.ca> and
# Anthony Raymond <traymond@bcgsc.ca>.

SHELL=bash -e -o pipefail
ifeq ($(shell zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0)
# Set pipefail to ensure that all commands of a pipe succeed.
SHELL=zsh -e -o pipefail
# Report run time and memory usage with zsh.
export REPORTTIME=1
export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J
endif

# Record run time and memory usage in a file using GNU time.
ifdef time
ifneq ($(shell command -v gtime),)
gtime=command gtime -v -o $@.time
else
gtime=command time -v -o $@.time
endif
endif

# Wrapper script for commands that require an increased stack size limit
stack=abyss-stack-size 65536

# Define this environment variable on Mac OS X to read
# compressed files.
export DYLD_FORCE_FLAT_NAMESPACE=1

# Integrate with Sun Grid Engine (SGE)
ifdef JOB_NAME
name?=$(JOB_NAME)
endif
ifdef SGE_TASK_ID
k?=$(SGE_TASK_ID)
endif
ifdef NSLOTS
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

# Integrate with Portable Batch System (PBS)
ifdef PBS_JOBNAME
name?=$(PBS_JOBNAME)
endif
ifdef PBS_ARRAYID
k?=$(PBS_ARRAYID)
endif
ifdef PBS_NODEFILE
NSLOTS=$(shell wc -l <$(PBS_NODEFILE))
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

# Integrate with Load Sharing Facility (LSF)
ifdef LSB_JOBNAME
name?=$(LSB_JOBNAME)
endif
ifdef LSB_JOBINDEX
k?=$(LSB_JOBINDEX)
endif
ifdef LSB_DJOB_NUMPROC
ifneq ($(LSB_DJOB_NUMPROC), 1)
np?=$(LSB_DJOB_NUMPROC)
endif
endif
ifdef LSF_BINDIR
mpirun?=$(LSF_BINDIR)/mpirun.lsf
endif

# Integrate with IBM LoadLeveler
ifdef LOADL_JOB_NAME
name?=$(LOADL_JOB_NAME)
endif
ifdef LOADL_STEP_ID
k?=$(LOADL_STEP_ID)
endif
ifdef LOADL_HOSTFILE
NSLOTS=$(shell wc -l <$(LOADL_HOSTFILE))
ifneq ($(NSLOTS), 1)
np?=$(NSLOTS)
endif
endif

# Integrate with SLURM
ifdef SLURM_JOB_NAME
name?=$(SLURM_JOB_NAME)
endif
ifdef SLURM_ARRAY_TASK_ID
k?=$(SLURM_ARRAY_TASK_ID)
endif
ifdef SLURM_NTASKS
np?=$(SLURM_NTASKS)
endif

# Determine the path to mpirun
mpirun?=$(shell command -v mpirun)
ifeq ($(mpirun),)
mpirun=mpirun
endif

# Use pigz or bgzip for parallel compression if available.
ifneq ($(shell command -v pigz),)
gzip=pigz -p$j
else
ifneq ($(shell command -v bgzip),)
gzip=bgzip -@$j
else
gzip=gzip
endif
endif

ifndef preserve_path
# Determine the path to the ABySS executables
path?=$(shell dirname `command -v $(MAKEFILE_LIST)`)
ifdef path
PATH:=$(path):$(PATH)
endif
endif

ifdef db
# Determine the location of sqlite database
override dbopt:=--db=$(db)

# Track data details for database
library ?= ""
strain ?= ""
species ?= ""
endif

# Programs
MARKDOWN=pandoc

map=$(foreach a,$(2),$(call $(1),$(a)))
deref=$($1)

ifdef lr
ifndef lib
lib:=$(pe) $(lr)
endif
endif

ifdef lib
in?=$(call map, deref, $(lib))
else
ifdef in
lib?=$(name)
$(lib)?=$(in)
endif
endif

pe?=$(lib)
ifdef mp
  mp_provided=1
endif
mp?=$(pe)

# Strip spaces from the file paths
ifdef in
override in:=$(strip $(in))
endif
ifdef se
override se:=$(strip $(se))
endif
ifdef lr
override lr_reads=$(strip $(call map, deref, $(lr)))
endif

# Graph file format
graph?=dot
# g is private. Use graph instead.
override g:=$(graph)

# Number of threads
ifdef PE_HOSTFILE
hostname?=$(shell hostname -f)
j?=$(shell awk '$$1 == "$(hostname)" {print $$2}' $(PE_HOSTFILE))
endif
ifeq ($j,)
j:=$(np)
endif
ifeq ($j,)
j:=2
endif

# ABYSS parameters
q ?= 3
abyssopt += -k$k -q$q
ifdef K
abyssopt += -K$K
endif
ifdef e
abyssopt += -e$e
endif
ifdef E
abyssopt += -E$E
endif
ifdef t
abyssopt += -t$t
endif
ifdef c
abyssopt += -c$c
endif
ifdef kc
abyssopt += --kc=$(kc)
endif
ifdef b
abyssopt += -b$b
pbopt += -b$b
endif
ifdef Q
abyssopt += -Q$Q
endif
ifdef ss
SS=--SS
endif
abyssopt += $v

# additional params for Bloom filter assembly (`abyss-bloom-dbg`)
ifdef B
abyssopt += -b$B
ifdef H
abyssopt += -H$H
endif
ifdef j
abyssopt += -j$j
endif
ifdef x
abyssopt += -s$x
endif
else
abyssopt += $(dbopt) $(SS) --coverage-hist=coverage.hist -s $*-bubbles.fa
endif

# AdjList parameters
ifdef mp_provided
  m?=0
else ifeq ($(shell test $(k) -le 50; echo $$?),0)
  m?=0
else
  m?=50
endif
alopt += $v $(dbopt) $(SS) -k$k -m$m
ifndef B
ifdef K
alopt += -K$K
endif
endif

# filtergraph parameters
ifndef B
ifdef K
fgopt += --assemble --shim-max-degree=2
endif
endif
ifdef xtip
fgopt += -t$(shell echo $k*2 |bc)
endif

# PopBubbles parameters
p?=0.9
pbopt += -p$p
ifdef a
pbopt += -a$a
endif

# Keep sequence and quality strings of read alignments
ifdef ssq
ssq_t :=-ssq
endif

# Aligner parameters
aligner?=map$(ssq_t)
ifeq ($(SS),--SS)
 ifneq ($(aligner),map$(ssq_t))
  $(warning warning: setting aligner to abyss-map$(ssq_t) since other aligners do not support ss=1)
  override aligner=map$(ssq_t)
 endif
endif

align?=abyss-$(aligner)
mapopt=$v $(dbopt) -j$j -l$($*_l) $(SS) $(ALIGNER_OPTIONS) $(MAP_OPTIONS)

# DIDA parameters
escape_quotes=$(shell echo "$(1)" | sed 's|"|\\\"|g')
ifeq ($(aligner),dida)
   ifdef np
       mapopt+=-n$(np)
   endif
   ifdef DIDA_RUN_OPTIONS
       mapopt+=$(DIDA_RUN_OPTIONS)
   endif
   ifdef DIDA_MPIRUN
       mapopt+=-m"$(call escape_quotes,$(DIDA_MPIRUN))"
   endif
   ifdef DIDA_OPTIONS
       mapopt+=-d"$(call escape_quotes,$(DIDA_OPTIONS))"
   endif
endif

# fixmate parameters
ifeq ($(align),abyss-kaligner)
fixmate?=ParseAligns
else
fixmate?=abyss-fixmate$(ssq_t)
endif
fmopt=$v $(dbopt) -l$($*_l) $(FIXMATE_OPTIONS)

# DistanceEst parameters
DistanceEst?=DistanceEst$(ssq_t)
l?=40
s?=1000
n?=10
$(foreach i,$(pe),$(eval $i_l?=$l))
$(foreach i,$(pe),$(eval $i_s?=$s))
$(foreach i,$(pe),$(eval $i_n?=$n))
override deopt=$v $(dbopt) -j$j -k$k $(DISTANCEEST_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de)

# SimpleGraph parameters
sgopt += $(dbopt) -s$s -n$n
ifdef d
sgopt += -d$d
endif

# MergePaths parameters
mpopt += $v $(dbopt) -j$j -k$k -s$s
ifdef G
mpopt += -G$G
endif

# PathOverlap parameters
poopt += $v $(dbopt) -k$k

# PathConsensus parameters
pcopt += $(dbopt)
ifdef a
pcopt += -a$a
endif
pcopt += -p$p

# MergeContigs parameters
mcopt += $v $(dbopt) -k$k

# Scaffold parameters
L?=$l
S?=100-5000
N?=15-20
SCAFFOLD_DE_S?=$(shell echo $S | sed 's/-.*//')
SCAFFOLD_DE_N?=$N
SCAFFOLD_DE_OPTIONS?=$(DISTANCEEST_OPTIONS)
$(foreach i,$(mp),$(eval $i_l?=$L))
$(foreach i,$(mp),$(eval $i_s?=$(SCAFFOLD_DE_S)))
$(foreach i,$(mp),$(eval $i_n?=$(SCAFFOLD_DE_N)))
override scaffold_deopt=$v $(dbopt) --dot --median -j$j -k$k $(SCAFFOLD_DE_OPTIONS) -l$($*_l) -s$($*_s) -n$($*_n) $($*_de)
scopt += $v $(dbopt) $(SS) -k$k
ifdef G
scopt += -G$G
endif

# abyss-fac parameters
ifdef G
override facopt = -G$G
endif

# BWA-SW parameters
bwaswopt=-t$j
BWASW_OPTIONS='-b9 -q16 -r1 -w500'

# Remove environment variables
unexport in se $(lib) $(pe) $(mp) $(long)

# Check the mandatory parameters

ifndef name
error::
	@>&2 echo 'abyss-pe: missing parameter `name`'
endif
ifndef k
error::
	@>&2 echo 'abyss-pe: missing parameter `k`'
endif
ifeq ($(lib)$(in)$(se)$(lr),)
error::
	@>&2 echo 'abyss-pe: missing parameter `lib`, `in`, `se`, or `lr`'
endif

ifndef B
ifndef np
error::
	@>&2 echo 'abyss-pe: must specify either `B` or `np` for the Bloom filter and MPI modes respectively. Bloom filter mode is recommended.'
endif
endif

default:

error::
	@>&2 echo 'Try `abyss-pe help` for more information.'
	@false

# Help and version messages

help:
	@echo 'Usage: abyss-pe [OPTION]... [PARAMETER=VALUE]... [COMMAND]...'
	@echo 'Assemble reads into contigs and scaffolds. ABySS is a de novo'
	@echo 'sequence assembler intended for short paired-end reads and'
	@echo 'genomes of all sizes. See the abyss-pe man page for documentation'
	@echo 'of assembly parameters and commands. abyss-pe is a Makefile'
	@echo 'script, and so options of `make` may also be used with abyss-pe.'
	@echo 'See the `make` man page for documentation.'
	@echo
	@echo 'Report bugs to https://github.com/bcgsc/abyss/issues or abyss-users@bcgsc.ca.'

version:
	@echo "abyss-pe (ABySS) 2.3.4"
	@echo "Written by Shaun Jackman and Anthony Raymond."
	@echo
	@echo "Copyright 2012 Canada's Michael Smith Genome Science Centre"

versions: version
	@echo PATH=$(PATH)
	@ABYSS --version; echo
	@-ABYSS-P --version; echo
	@AdjList --version; echo
	@DistanceEst --version; echo
	@MergeContigs --version; echo
	@MergePaths --version; echo
	@Overlap --version; echo
	@PathConsensus --version; echo
	@PathOverlap --version; echo
	@PopBubbles --version; echo
	@SimpleGraph --version; echo
	@abyss-fac --version; echo
	@abyss-filtergraph --version; echo
	@abyss-fixmate --version; echo
	@abyss-map --version; echo
	@abyss-scaffold --version; echo
	@abyss-sealer --version; echo
	@abyss-todot --version; echo
	@abyss-rresolver-short --version; echo
	@$(align) --version; echo
	@awk --version; echo
	@sort --version; echo
	@-mpirun --version

# Determine the default target
ifdef db
default: startDb
endif
default: unitigs
ifneq ($(in),)
default: contigs contigs-graph
endif
ifneq ($(mp),)
default: scaffolds scaffolds-graph
ifneq ($(long),)
default: long-scaffs long-scaffs-graph
endif
endif
ifdef db
default: finishDb
endif
default: stats

# Define the commands (phony targets)
unitigs: $(name)-unitigs.fa

unitigs-graph: $(name)-unitigs.$g

pe-index: $(name)-3.fa.fm

pe-sam: $(addsuffix -3.sam.gz, $(pe))

pe-bam: $(addsuffix -3.bam.bai, $(pe))

contigs: $(name)-contigs.fa

contigs-graph: $(name)-contigs.$g

mp-index: $(name)-6.fa.fm

mp-sam: $(addsuffix -6.sam.gz, $(mp))

mp-bam: $(addsuffix -6.bam.bai, $(mp))

scaffolds: $(name)-scaffolds.fa

scaffolds-graph: $(name)-scaffolds.$g

seal-scaffolds: $(name)-scaffolds-sealed.fa

scaftigs: $(name)-scaftigs.fa $(name)-scaftigs.agp

long-scaffs: $(name)-long-scaffs.fa

long-scaffs-graph: $(name)-long-scaffs.$g

all: default bam stats

clean:
	rm -f *.adj *.asqg *.dot *.gfa *.sam *.txt \
		*.sam.gz *.hist *.dist *.path *.path[123]

ifdef db
.PHONY: startDb finishDb
endif

.PHONY: bam default stats \
	unitigs unitigs-graph \
	pe-index pe-sam pe-bam contigs contigs-graph \
	mp-index mp-sam mp-bam scaffolds scaffolds-graph \
	scaftigs long-scaffs long-scaffs-graph seal-scaffolds \
	all clean help version versions

.DELETE_ON_ERROR:
.SECONDARY:

# Utilities

%.fa.fai: %.fa
	abyss-index $v --fai $<

%.fa.fm: %.fa
	abyss-index $v $<

%.bam: %.sam.gz
	samtools view -Sb $< -o $@

%.bam.bai: %.bam
	samtools index $<

# Assemble unitigs

ifdef db
startDb:
	@echo -e \
	$(name)".sqlite\n"\
	> name.txt
	@echo -e \
	$(shell echo `date +%s`"_"`whoami`_)\
	$(shell echo $$RANDOM \% 1000 \+ 1 | bc)"\n"\
	$(library)"\n"\
	$(strain)"\n"\
	$(species)"\n"\
	$(name)"\n"\
	$(k)"\n"\
	$(lib)$(in)$(se)\
	> db.txt
endif

ifdef B
%-1.fa:
	$(gtime) $(stack) abyss-bloom-dbg $(abyssopt) $(ABYSS_OPTIONS) $(in) $(se) > $@
else ifdef K

ifdef np
%-1.fa:
	$(gtime) $(mpirun) -np $(np) abyss-paired-dbg-mpi $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa $(in) $(se)
else
%-1.fa %-1.$g:
	$(gtime) abyss-paired-dbg $(abyssopt) $(ABYSS_OPTIONS) -o $*-1.fa -g $*-1.$g $(in) $(se)
endif

else ifdef np
%-1.fa:
	$(gtime) $(mpirun) -np $(np) ABYSS-P $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se)
else
%-1.fa:
	$(gtime) ABYSS $(abyssopt) $(ABYSS_OPTIONS) -o $@ $(in) $(se)
endif

# Find overlapping contigs

%-1.$g: %-1.fa
	$(gtime) AdjList $(alopt) --$g $< >$@

# Resolve repeats with short reads

ifdef B
%-1-rr.fa %-1-rr.$g: %-1.fa %-1.$g
	$(gtime) abyss-rresolver-short $v -b$B -f0.8 -j$j -k$k \
	$(RRESOLVER_OPTIONS) -h $*-1-rr --$g -c $*-1-rr.fa \
	-g $*-1-rr.$g $^ $(in) $(se)
else
%-1-rr.fa: %-1.fa
	ln -s $< $@

%-1-rr.$g: %-1.$g
	ln -s $< $@
endif

# Remove shim contigs

%-2.$g1 %-1-rr.path: %-1-rr.$g %-1-rr.fa
	$(gtime) abyss-filtergraph $v --$g $(fgopt) $(FILTERGRAPH_OPTIONS) -k$k -g $*-2.$g1 $^ >$*-1-rr.path

%-2.fa %-2.$g: %-1-rr.fa %-2.$g1 %-1-rr.path
	$(gtime) MergeContigs --$g $(mcopt) -g $*-2.$g -o $*-2.fa $^

# Pop bubbles

%-2.path %-3.$g: %-2.fa %-2.$g
	$(gtime) PopBubbles $v --$g -j$j -k$k $(SS) $(pbopt) $(POPBUBBLES_OPTIONS) -g $*-3.$g $^ >$*-2.path

%-3.fa: %-2.fa %-2.$g %-2.path
	$(gtime) MergeContigs $(mcopt) -o $@ $^
	awk '!/^>/ {x[">" $$1]=1; next} {getline s} $$1 in x {print $$0 "\n" s}' \
		$*-2.path $*-1-rr.fa >$*-indel.fa

%-unitigs.fa: %-3.fa
	ln -sf $< $@

%-unitigs.$g: %-3.$g
	ln -sf $< $@

# Estimate distances between unitigs

%-3.sam.gz %-3.hist: $(name)-3.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-3.hist \
		|sort -snk3 -k4 \
		|$(gzip) >$*-3.sam.gz

%-3.bam %-3.hist: $(name)-3.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-3.hist \
		|sort -snk3 -k4 \
		|samtools view -Sb - -o $*-3.bam

%-3.dist: %-3.sam.gz %-3.hist
	gunzip -c $< \
	|$(gtime) $(DistanceEst) $(deopt) -o $@ $*-3.hist

%-3.dist: %-3.bam %-3.hist
	$(gtime) samtools view -h $< \
	|$(DistanceEst) $(deopt) -o $@ $*-3.hist

%-3.dist: $(name)-3.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-3.hist \
		|sort -snk3 -k4 \
		|$(DistanceEst) $(deopt) -o $@ $*-3.hist

dist=$(addsuffix -3.dist, $(pe))

ifneq ($(name)-3.dist, $(dist))
$(name)-3.dist: $(name)-3.fa $(dist)
	$(gtime) abyss-todot $v --dist -e $^ >$@

$(name)-3.bam: $(addsuffix -3.bam, $(pe))
	$(gtime) samtools merge -r $@ $^
endif

# Find overlaps between contigs

%-4.fa %-4.$g: %-3.fa %-3.$g %-3.dist
	$(gtime) Overlap $v --$g $(SS) $(OVERLAP_OPTIONS) -k$k -g $*-4.$g -o $*-4.fa $^

# Assemble contigs

%-4.path1: %-4.$g %-3.dist
	$(gtime) $(stack) SimpleGraph $v $(sgopt) $(SIMPLEGRAPH_OPTIONS) -j$j -k$k -o $@ $^

%-4.path2: %-4.path1 %-3.fa.fai %-4.fa.fai
	cat $*-3.fa.fai $*-4.fa.fai \
		|$(gtime) MergePaths $(mpopt) $(MERGEPATHS_OPTIONS) -o $@ - $<

%-4.path3: %-4.$g %-4.path2
	PathOverlap --assemble $(poopt) $(SS) $^ >$@

ifndef cs

%-5.path %-5.fa %-5.$g: %-3.fa %-4.fa %-4.$g %-4.path3
	cat $(wordlist 1, 2, $^) \
		|$(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -o $*-5.path -s $*-5.fa -g $*-5.$g - $(wordlist 3, 4, $^)

%-6.fa: %-3.fa %-4.fa %-5.fa %-5.$g %-5.path
	cat $(wordlist 1, 3, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 4, 5, $^)

else

%-5.$g %-5.path: %-4.$g %-4.path3
	ln -sf $*-4.$g $*-5.$g
	ln -sf $*-4.path3 $*-5.path

%-cs.fa: %-3.fa %-4.fa %-4.$g %-4.path3
	cat $(wordlist 1, 2, $^) |$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^)

# Convert colour-space sequence to nucleotides

%-6.fa: %-cs.fa
	$(gtime) KAligner $v --seq -m -j$j -l$l $(in) $(se) $< \
		|Consensus $v -o $@ $<

endif

%-6.$g: %-5.$g %-5.path
	$(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@

%-contigs.fa: %-6.fa
	ln -sf $< $@

%-contigs.$g: %-6.$g
	ln -sf $< $@

# Estimate distances between contigs

%-6.sam.gz %-6.hist: $(name)-6.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-6.hist \
		|sort -snk3 -k4 \
		|$(gzip) >$*-6.sam.gz

%-6.bam %-6.hist: $(name)-6.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-6.hist \
		|sort -snk3 -k4 \
		|samtools view -Sb - -o $*-6.bam

%-6.dist.dot: %-6.sam.gz %-6.hist
	gunzip -c $< \
	|$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist

%-6.dist.dot: %-6.bam %-6.hist
	samtools view -h $< \
	|$(gtime) $(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist

%-6.dist.dot: $(name)-6.fa
	$(gtime) $(align) $(mapopt) $(strip $($*)) $< \
		|$(fixmate) $(fmopt) -h $*-6.hist \
		|sort -snk3 -k4 \
		|$(DistanceEst) $(scaffold_deopt) -o $@ $*-6.hist

# Scaffold

%-6.path: $(name)-6.$g $(addsuffix -6.dist.dot, $(mp))
	$(gtime) abyss-scaffold $(scopt) -s$S -n$N -g $@.dot $(SCAFFOLD_OPTIONS) $^ >$@

%-7.path %-7.$g %-7.fa: %-6.fa %-6.$g %-6.path
	$(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-7.fa -g $*-7.$g -o $*-7.path $^

%-8.fa: %-6.fa %-7.fa %-7.$g %-7.path
	cat $(wordlist 1, 2, $^) \
		|$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^)

%-8.$g: %-7.$g %-7.path
	$(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@

# Scaffold using linked reads
ifdef lr

# Tigmint

# Options for mapping the reads to the draft assembly.
lr_l?=$l
override lrmapopt=$v -j$j -l$(lr_l) $(LR_MAP_OPTIONS)

# Options for abyss-scaffold
lr_s?=1000-100000
lr_n?=5-20

# Minimum AS/Read length ratio
tigmint_as?=0.65

# Maximum number of mismatches
tigmint_nm?=5

# Minimum mapping quality threshold
tigmint_mapq?=0

# Maximum distance between reads to be considered the same molecule
tigmint_d?=50000

# Minimum number of spanning molecules
tigmint_n?=10

# Size of the window that must be spanned by moecules
tigmint_w?=1000

# Align paired-end reads to the draft genome, sort by BX tag,
# and create molecule extents BED.
%.lr.bed: %.fa.fai
	$(gtime) $(align) $(lrmapopt) $(lr_reads) $*.fa \
	| samtools sort -@$j -tBX -l0 -T$$(mktemp -u -t $@.XXXXXX) \
	| tigmint-molecule -a $(tigmint_as) -n $(tigmint_nm) -q $(tigmint_mapq) -d $(tigmint_d) - \
	| sort -k1,1 -k2,2n -k3,3n >$@

# Align paired-end reads to the draft genome and sort by BX tag.
%.lr.sortbx.bam: %.fa.fai
	$(gtime) $(align) $(lrmapopt) $(lr_reads) $*.fa \
	| samtools sort -@$j -tBX -T$$(mktemp -u -t $@.XXXXXX) -o $@

# Filter the BAM file, create molecule extents BED.
%.lr.bed: %.lr.sortbx.bam
	$(gtime) tigmint-molecule -a $(tigmint_as) -n $(tigmint_nm) -q $(tigmint_mapq) -d $(tigmint_d) $< \
	| sort -k1,1 -k2,2n -k3,3n >$@

# Cut sequences at assembly errors.
%.tigmint.fa: %.lr.bed %.fa %.fa.fai
	$(gtime) tigmint-cut -p$j -n$(tigmint_n) -w$(tigmint_w) -o $@ $*.fa $<

# ARCS
arcs_c?=2
arcs_d?=0
arcs_e?=30000
arcs_l?=0
arcs_m?=4-20000
arcs_r?=0.05
arcs_s?=98
arcs_z?=500

# Align reads and create a graph of linked contigs using ARCS.
%.arcs.dist.gv: %.fa
	$(gtime) $(align) $(lrmapopt) $(lr_reads) $< \
	| abyss-fixmate-ssq --all --qname $v -l$(lr_l) $(FIXMATE_OPTIONS) \
	| arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \
		-g $*.arcs.dist.gv --tsv=$*.arcs.tsv --barcode-counts=$*.arcs.barcode-counts.tsv /dev/stdin

# Align paired-end reads to the draft genome and do not sort.
%.lr.sortn.sam.gz: %.fa
	$(gtime) $(align) $(lrmapopt) $(lr_reads) $< \
	| abyss-fixmate-ssq --all --qname $v -l$(lr_l) $(FIXMATE_OPTIONS) \
	| $(gzip) >$@

# Create a graph of linked contigs using ARCS.
%.arcs.dist.gv: %.lr.sortn.sam.gz
	gunzip -c $< \
	|$(gtime) arcs $v -c$(arcs_c) -d$(arcs_d) -e$(arcs_e) -l$(arcs_l) -m$(arcs_m) -r$(arcs_r) -s$(arcs_s) -z$(arcs_z) \
		-g $*.arcs.dist.gv --tsv=$*.arcs.tsv --barcode-counts=$*.arcs.barcode-counts.tsv /dev/stdin

# Scaffold using ARCS and abyss-scaffold.
%.arcs.path: %.arcs.dist.gv
	$(gtime) abyss-scaffold $(scopt) -s$(lr_s) -n$(lr_n) -g $@.dot $(LR_SCAFFOLD_OPTIONS) $< >$@

# Create the FASTA file of ARCS scaffolds.
%.arcs.fa: %.fa %.arcs.path
	$(gtime) MergeContigs $(mcopt) -o $@ $^

%-scaffolds.fa: %-8.tigmint.arcs.fa
	ln -sf $< $@

else

%-scaffolds.fa: %-8.fa
	ln -sf $< $@

endif

%-scaffolds.$g: %-8.$g
	ln -sf $< $@

# Sealed Scaffold
sealer_ks?=-k90 -k80 -k70 -k60 -k50 -k40 -k30

%-8_scaffold.fa: %-8.fa
	$(gtime) abyss-sealer -v -j$j --print-flanks -o$*-8 -S$< $(sealer_ks) $(SEALER_OPTIONS) $(in) $(se)

%-scaffolds-sealed.fa: %-8_scaffold.fa
	ln -s $< $@

# Scaftig

%-scaftigs.fa: %-scaffolds-agp.fa
	ln -sf $< $@

%-scaftigs.agp: %-scaffolds.agp
	ln -sf $< $@

# Transcriptome assisted scaffolding

%.fa.bwt: %.fa
	$(gtime) bwa index $<

%-8.sam.gz: $(name)-8.fa.bwt
	$(gtime) bwa mem -a -t$j -S -P -k$l $(name)-8.fa $(strip $($*)) \
		|$(gzip) >$@

%-8.dist.dot: %-8.sam.gz
	$(gtime) abyss-longseqdist -k$k $(LONGSEQDIST_OPTIONS) $< \
		|grep -v "l=" >$@

%-8.path: $(name)-8.$g $(addsuffix -8.dist.dot, $(long))
	$(gtime) abyss-scaffold $(scopt) -s$S -n1 -g $@.$g $(SCAFFOLD_OPTIONS) $^ >$@

%-9.path %-9.$g %-9.fa: %-8.fa %-8.$g %-8.path
	$(gtime) $(stack) PathConsensus $v --$g -k$k $(pcopt) $(PATHCONSENSUS_OPTIONS) -s $*-9.fa -g $*-9.$g -o $*-9.path $^

%-10.fa: %-8.fa %-9.fa %-9.$g %-9.path
	cat $(wordlist 1, 2, $^) \
		|$(gtime) MergeContigs $(mcopt) -o $@ - $(wordlist 3, 4, $^)

%-10.$g: %-9.$g %-9.path
	$(gtime) PathOverlap --overlap $(poopt) --$g $^ >$@

%-long-scaffs.fa: %-10.fa
	ln -sf $< $@

%-long-scaffs.$g: %-10.$g
	ln -sf $< $@

ifdef db
finishDb:
	@rm -f db.txt
endif

# Create the final BAM file

ifneq ($(mp),)
bam: $(name)-scaffolds.bam.bai
else
ifneq ($(in),)
bam: $(name)-contigs.bam.bai
else
bam: $(name)-unitigs.bam.bai
endif
endif

$(name)-unitigs.bam: %.bam: %.fa
	$(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) $(se) $< \
		|samtools view -Su - |samtools sort -o - - >$@

$(name)-contigs.bam $(name)-scaffolds.bam: %.bam: %.fa
	$(gtime) $(align) $v -j$j -l$l $(ALIGNER_OPTIONS) \
		$(call map, deref, $(sort $(lib) $(pe) $(mp))) $< \
		|$(fixmate) $v $(FIXMATE_OPTIONS) \
		|sort -snk3 -k4 \
		|samtools view -Sb - >$@

# Align the variants to the assembly

%-variants.bam: %.fa.bwt
	$(gtime) bwa bwasw -t$j $*.fa <(cat $(name)-bubbles.fa $(name)-indel.fa) \
		|samtools view -Su - |samtools sort -o - - >$@

%-variants.vcf.gz: %.fa %-variants.bam
	$(gtime) samtools mpileup -Buf $^ |bcftools view -vp1 - |bgzip >$@

%.gz.tbi: %.gz
	$(gtime) tabix -pvcf $<

# Calculate assembly contiguity statistics

stats: $(name)-stats.tab $(name)-stats $(name)-stats.csv $(name)-stats.md
ifneq ($(shell command -v $(MARKDOWN)),)
stats: $(name)-stats.html
endif

%-stats: %-stats.tab
	ln -sf $< $@

$(name)-stats.tab: %-stats.tab: %-unitigs.fa
ifneq ($(in),)
$(name)-stats.tab: %-stats.tab: %-contigs.fa
endif
ifneq ($(mp),)
$(name)-stats.tab: %-stats.tab: %-scaffolds.fa
endif
ifneq ($(long),)
$(name)-stats.tab: %-stats.tab: %-long-scaffs.fa
endif
$(name)-stats.tab:
	abyss-fac $(facopt) $(FAC_OPTIONS) $^ |tee $@

%.csv: %.tab
	tr '\t' , <$< >$@

%.md: %.tab
	abyss-tabtomd $< >$@

%.html: %.md
	$(MARKDOWN) $< >$@

# Create an AGP file and FASTA file of scaftigs from scaffolds

%.agp %-agp.fa: %.fa
	$(gtime) abyss-fatoagp $(FATOAGP_OPTIONS) -f $*-agp.fa $< >$*.agp

# Align the contigs to the reference

%-$(ref).sam.gz: %.fa
	$(gtime) bwa bwasw $(bwaswopt) $(BWASW_OPTIONS) $($(ref)) $< |$(gzip) >$@

# Find breakpoints in the alignments

%.break: %.sam.gz
	$(gtime) abyss-samtobreak $(SAMTOBREAK_OPTIONS) $< >$@

# Report ABySS configuration variable(s) and value(s) currently set.

override varList := a b c d e E G j k l m n N p q s S t v cs pi \
	np pe lib mp se SS hostname xtip \
	ssq ssq_ti path name in mpirun \
	aligner long ref fixmate DistanceEst \
	map deref abyssopt fgopt pbopt \
	align mapopt fmopt deopt \
	pcopt sgopt bwaswopt \
	ABYSS_OPTIONS FILTERGRAPH_OPTIONS POPBUBBLES_OPTIONS \
	OVERLAP_OPTIONS SIMPLEGRAPH_OPTIONS MERGEPATHS_OPTIONS \
	SCAFFOLD_OPTIONS ALIGNER_OPTIONS MAP_OPTIONS FAC_OPTIONS \
	FIXMATE_OPTIONS BWASW_OPTIONS FATOAGP_OPTIONS SAMTOBREAK_OPTIONS \
	RRESOLVER_OPTIONS MARKDOWN

env:
	@echo 'List of ABySS configuration variables currently set:'
	@echo '[environment], if variable was inherited from the environment.'
	@echo '[command line], if variable was defined on the command line.'
	@echo '[file], if variable was defined in (this) makefile.'
	@echo '[override], if variable was defined with an override directive in (this) makefile.'

	@$(foreach var,$(varList),\
		echo -e $(var)" = "$($(var))"\t["$(origin $(var))"]";)
