#!/bin/sh
# test-rawdog: run some basic tests to make sure rawdog's working.
# Copyright 2013, 2014, 2015, 2016, 2018 Adam Sampson <ats@offog.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

# Default to the C locale, to avoid localised error messages.
default_LC_ALL="C"

# Try to find generic UTF-8 and Japanese UTF-8 locales. (They may not be
# installed.)
utf8_LC_ALL="$(locale -a | grep -a -i 'utf-\?8' | head -1)"
ja_LC_ALL="$(locale -a | grep -a -i 'ja_JP.utf-\?8' | head -1)"

# Default to UTC so that local times are reported consistently.
default_TZ="UTC"

statedir="testauto"

# Hostname and ports to run the test server on.
serverhost="localhost"
timeoutport="8431"
httpport="8432"

# Connections to this host should time out.
# (This is distinct from timeoutport above: if you connect to timeoutport, it
# will accept the connection but not do anything, whereas this will timeout
# while connecting.)
timeouthost=""

httpdir="$statedir/pub"
httpurl="http://$serverhost:$httpport"

usage () {
	cat <<EOF
Usage: test-rawdog [OPTION]...

-b                           Include tests that are known to fail
-k                           Keep going when a test fails
-r COMMAND                   How to invoke rawdog (default: "./rawdog")
-T HOST                      Host to test connection timeout with

Report bugs to <ats@offog.org>.
EOF
	exit 1
}

knownbad=false
keepgoing=false
rawdog="./rawdog"
while getopts bkr:T: OPT; do
	case "$OPT" in
	b)
		knownbad=true
		;;
	k)
		keepgoing=true
		;;
	r)
		rawdog="$OPTARG"
		;;
	T)
		timeouthost="$OPTARG"
		;;
	?)
		usage
		;;
	esac
done

# Start the server, and kill it when this script exits.
serverpid=""
trap 'test -n "$serverpid" && kill $serverpid' 0
python testserver.py "$serverhost" "$timeoutport" "$httpport" "$httpdir" &
serverpid="$!"

exitcode=0
die () {
	echo "Test failed:" "$@"
	exitcode=1
	if ! $keepgoing; then
		exit $exitcode
	fi
}

cleanstate () {
	rm -fr $statedir $httpdir
	mkdir -p $statedir $statedir/plugins $httpdir
	cp config $statedir/config

	export LC_ALL="$default_LC_ALL"
	export TZ="$default_TZ"
}

add () {
	echo "$1" >>$statedir/config
}

begin () {
	echo ">>> Testing $1"
	cleanstate
	add "showtracebacks true"
	cmdnum=0
}

equals () {
	if [ "$1" != "$2" ]; then
		die "expected '$1'; got '$2'"
	fi
}

exists () {
	local fn
	for fn in "$@"; do
		if ! [ -e "$fn" ]; then
			die "expected $fn to exist"
		fi
	done
}

not_exists () {
	local fn
	for fn in "$@"; do
		if [ -e "$fn" ]; then
			die "expected $fn not to exist"
		fi
	done
}

same () {
	exists "$1" "$2"
	if ! cmp "$1" "$2"; then
		die "expected $1 to have the same contents as $2"
	fi
}

contains () {
	local key
	local file="$1"
	exists "$file"
	shift
	for key in "$@"; do
		if ! grep -q "$key" "$file"; then
			cat "$file"
			die "expected $file to contain '$key'"
		fi
	done
}

not_contains () {
	local key
	local file="$1"
	exists "$file"
	shift
	for key in "$@"; do
		if grep -q "$key" "$file"; then
			cat "$file"
			die "expected $file not to contain '$key'"
		fi
	done
}

# Run rawdog.
runf () {
	cmdnum=$(expr $cmdnum + 1)
	outfile=$statedir/out$cmdnum
	$rawdog -d $statedir -V log$cmdnum "$@" >$outfile 2>&1
}

# Run rawdog, expecting it to exit 0.
run () {
	if ! runf "$@"; then
		cat $outfile
		die "exited non-0"
	fi
}

# Run rawdog, expecting it to crash with an exception message.
runcrash () {
	if runf "$@"; then
		cat $outfile
		die "exited 0"
	fi

	contains $outfile "Traceback (most recent call last)"
}

# Run rawdog, expecting it to exit non-0 (but not crash).
runn () {
	if runf "$@"; then
		cat $outfile
		die "exited 0"
	fi

	not_contains $outfile "Traceback (most recent call last)"
}

# Run rawdog, expecting no complaints.
runs () {
	run "$@"
	if [ -s $outfile ]; then
		cat $outfile
		die "expected no output"
	fi
}

# Run rawdog, expecting a complaint containing the first arg.
rune () {
	local key="$1"
	shift
	run "$@"
	contains $outfile "$key"
}

# Run rawdog, expecting it to exit non-0 with a complaint containing the first
# arg.
runne () {
	local key="$1"
	shift
	runn "$@"
	contains $outfile "$key"
}

make_text () {
	cat >"$1" <<EOF
This is manifestly not a feed.
EOF
}

make_html () {
	cat >"$1" <<EOF
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
            "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
  <title>Not a feed</title>
</head>
<body>
  <p>This is manifestly not a feed.</p>
</body>
</html>
EOF
}

make_html_head () {
	cat >"$1" <<EOF
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
            "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
  <title>Not a feed</title>
EOF
	cat >>"$1"
	cat >>"$1" <<EOF
</head>
<body>
  <p>This is manifestly not a feed.</p>
</body>
</html>
EOF
}

make_html_body () {
	cat >"$1" <<EOF
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
            "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
  <title>Not a feed</title>
</head>
<body>
  <p>This is manifestly not a feed.</p>
EOF
	cat >>"$1"
	cat >>"$1" <<EOF
</body>
</html>
EOF
}

make_rss10 () {
	cat >"$1" <<EOF
<?xml version="1.0"?>
<rdf:RDF 
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
>
  <channel rdf:about="http://example.org/feed.rdf">
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
    <items>
      <rdf:Seq>
        <rdf:li resource="http://example.org/item" />
      </rdf:Seq>
    </items>
  </channel>
  <item rdf:about="http://example.org/item">
    <title>example-item-title</title>
    <link>http://example.org/item</link>
    <description>example-item-description</description>
  </item>
</rss>
EOF
}

make_rss20 () {
	cat >"$1" <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
    <item>
      <title>example-item-title</title>
      <link>http://example.org/item</link>
      <description><![CDATA[<p>example-item-description</p>]]></description>
    </item>
  </channel>
</rss>
EOF
}

make_rss20_desc () {
	cat >"$1" <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
    <item>
      <title>example-item-title</title>
      <link>http://example.org/item</link>
      <description>
EOF
	cat >>"$1"
	cat >>"$1" <<EOF
      </description>
    </item>
  </channel>
</rss>
EOF
}

write_desc () {
	make_rss20_desc $httpdir/feed.rss
	add "feed 0 $httpurl/feed.rss"
	runs -uw
}

make_atom10 () {
	cat >"$1" <<EOF
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>example-feed-title</title>
  <link href="http://example.org/"/>
  <updated>2013-01-01T18:00:00Z</updated>
  <author>
    <name>example-feed-author</name>
  </author>
  <id>http://example.org/feed-id</id>
  <entry>
    <title>example-item-title</title>
    <link href="http://example.org/item"/>
    <id>http://example.org/item-id</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
</feed>
EOF
}

make_atom10_with () {
	cat >"$1" <<EOF
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>example-feed-title</title>
  <link href="http://example.org/"/>
  <updated>2013-01-01T18:00:00Z</updated>
  <author>
    <name>example-feed-author</name>
  </author>
  <id>http://example.org/feed-id</id>
  <entry>
    <title>example-item-title</title>
    <link href="http://example.org/item"/>
    <id>http://example.org/item-id</id>
    <updated>2013-01-01T18:00:00Z</updated>
EOF
	cat >>"$1"
	cat >>"$1" <<EOF
  </entry>
</feed>
EOF
}

make_single () {
	cat >"$1" <<EOF
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>example-feed-title</title>
  <link href="http://example.org/"/>
  <updated>2013-01-01T18:00:00Z</updated>
  <author>
    <name>example-feed-author</name>
  </author>
  <id>http://example.org/feed-id</id>
  <entry>
    <title>$2-title</title>
    <link href="$3"/>
    <id>$4</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>$2-description</summary>
  </entry>
</feed>
EOF
}

make_range () {
	local i
	local from="$1"
	local to="$2"
	local file="$3"
	cat >"$file" <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
EOF
	for i in $(seq $from $to); do
		cat >>"$file" <<EOF
    <item>
      <title>range-title-$i-</title>
      <link>http://example.org/item$i</link>
      <description><![CDATA[<p>range-description-$i</p>]]></description>
    </item>
EOF
	done
	cat >>"$file" <<EOF
  </channel>
</rss>
EOF
}

make_n () {
	make_range 1 "$@"
}

# Make time.time() return a fixed value.
fake_time () {
	# A test can use this more than once within the same second, so the
	# .pyc's timestamp might not change. Ensure it gets deleted.
	rm -f $statedir/plugins/fake_time.*
	cat >$statedir/plugins/fake_time.py <<EOF
import time
def fake_time():
    return $1
time.time = fake_time
EOF
}

range () {
	seq -f "range-title-%.f-" $1 $2
}

output_range () {
	contains $statedir/output.html $(range $1 $2)
}

not_output_range () {
	not_contains $statedir/output.html $(range $1 $2)
}

output_n () {
	output_range 1 "$@"
}

begin "help"
rune "Usage:" --help

begin "unknown option"
runn --aubergine
contains $outfile "Usage:"

begin "unnecessary argument"
runn aubergine
contains $outfile "Usage:"

begin "--verbose"
run -vu
contains $outfile "Starting update"

begin "--verbose overrides config"
add "verbose false"
echo "verbose false" >$statedir/config.inc
run -v -c config.inc -u
contains $outfile "Starting update"

begin "listing feeds"
make_rss20 $httpdir/0.rss
make_rss20 $httpdir/1.rss
add "feed 0 $httpurl/0.rss"
add "feed 0 $httpurl/1.rss"
run -l
contains $outfile $httpurl/0.rss $httpurl/1.rss
runs -u
run -l
contains $outfile "Title: example-feed-title"

begin "updating one feed"
make_rss20 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
runs -u
runs -f $httpurl/feed.rss

begin "updating nonexistant feed"
rune "No such feed" -f $httpurl/feed.rss

begin "bad config syntax"
add "foo"
runne "Bad line in config"

begin "config error is fatal"
add "foo"
cat >$statedir/plugins/crash.py <<EOF
import rawdoglib.plugins
def startup(rawdog, config):
    raise Exception("crash")
rawdoglib.plugins.attach_hook("startup", startup)
EOF
runne "Bad line in config"

begin "config error in --config"
echo "foo" >$statedir/extra.conf
runne "Bad line in config" --config extra.conf

begin "config error in include"
echo "foo" >$statedir/extra.conf
add "include extra.conf"
runne "Bad line in config"

begin "bad config directive"
add "foo bar"
runne "Unknown config command"

begin "bad boolean value in config"
add "sortbyfeeddate aubergine"
runne "Bad value"

begin "bad time value in config"
add "timeout aubergine"
runne "Bad value"

begin "bad integer value in config"
add "maxarticles aubergine"
runne "Bad value"

begin "bad inline feed argument"
add "feed 0 $httpurl/feed.rss aubergine"
runne "Bad feed argument"

begin "bad feed argument line"
add "feed 0 $httpurl/feed.rss"
add "  aubergine"
runne "Bad argument line"

begin "feed argument line with no feed"
: >$statedir/config
add "  allowduplicates true"
runne "First line in config cannot be an argument"

begin "feeddefaults on one line"
add "feeddefaults allowduplicates=true"
runs

begin "feeddefaults argument lines"
add "feeddefaults"
add "  allowduplicates true"
runs

begin "argument lines in the wrong place"
add "tidyhtml false"
add "  allowduplicates true"
runne "Bad argument lines"

begin "feed with no time"
add "feed"
runne "Bad line in config"

begin "feed with no URL"
add "feed 3h"
runne "Bad line in config"

begin "define with no name"
add "define"
runne "Bad line in config"

begin "define with no value"
add "define thing"
runne "Bad line in config"

begin "define"
add "define myvar This is my variable!"
echo "myvar(__myvar__)" >$statedir/page
add "pagetemplate page"
runs -uw
contains $statedir/output.html "myvar(This is my variable!)"

begin "missing config file"
rm $statedir/config
runne "Can't read config file" -u

begin "empty config file"
: >$statedir/config
runs -uw

begin "--config and include"
make_rss20 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
runs -uw
exists $statedir/output.html
rm $statedir/output.html
echo "outputfile second.html" >$statedir/config.inc
runs -c config.inc -w
exists $statedir/second.html
not_exists $statedir/output.html
rm $statedir/second.html
add "include config.inc"
runs -w
exists $statedir/second.html
not_exists $statedir/output.html
rm $statedir/second.html

begin "missing state dir"
runn -d aubergine
contains $outfile "No aubergine directory"

begin "corrupt state file"
echo this is not a valid state file >$statedir/state
runne "means the file is corrupt" -u

begin "empty state file"
touch $statedir/state
runne "means the file is corrupt" -u

begin "corrupt splitstate file"
make_rss20 $statedir/simple.rss
add "splitstate true"
add "feed 0 simple.rss"
runs -u
echo this is not a valid state file >$(echo $statedir/feeds/*.state)
runne "means the file is corrupt" -u

for run in first second feed-adding; do
	for state in false true; do
		begin "recover from crash on $run run, splitstate $state"
		make_rss20 $statedir/0.rss
		add "splitstate $state"
		add "feed 0 0.rss"
		if [ "$run" != first ]; then
			runs -u
		fi
		if [ "$run" = feed-adding ]; then
			make_rss20 $statedir/1.rss
			add "feed 0 1.rss"
		fi
		# Crash while updating, so we have both state files open.
		cat >$statedir/plugins/crash.py <<EOF
import rawdoglib.plugins
def crash(*args):
    raise Exception("deliberate")
rawdoglib.plugins.attach_hook("post_update_feed", crash)
EOF
		runcrash -u
		contains $outfile "Exception: deliberate"
		rm $statedir/plugins/crash.py
		runs -u
	done
done

begin "exception raised by feedparser"
make_rss20 $statedir/feed.rss
add "feed 0 feed.rss"
cat >$statedir/plugins/crash.py <<EOF
import feedparser
def crash(*args, **kwargs):
    raise Exception("exception from feedparser")
feedparser.parse = crash
EOF
rune "Error fetching or parsing feed" -u
contains $outfile "exception from feedparser"

begin "with --no-locking"
make_rss20 $statedir/simple.rss
add "splitstate true"
add "feed 0 simple.rss"
# Stub out lockf so it'll crash if invoked.
cat >$statedir/plugins/nolock.py <<EOF
import fcntl
fcntl.lockf = None
EOF
runs --no-locking -uw

begin "with --no-lock-wait"
make_rss20 $statedir/simple.rss
add "feed 0 simple.rss"
runs -u
cat >$statedir/lock.py <<EOF
import fcntl
import time
f = open("$statedir/state.lock", "w+")
fcntl.lockf(f.fileno(), fcntl.LOCK_EX)
ff = open("$statedir/lock.signal", "w")
ff.close()
time.sleep(5)
f.close()
EOF
python $statedir/lock.py &
while [ ! -e $statedir/lock.signal ]; do
	python -c 'import time; time.sleep(0.1)'
done
runs --no-lock-wait -w
not_exists $statedir/output.html
# lock.py will keep running, but harmlessly time out after a bit.

begin "no plugins dir"
rm -fr $statedir/plugins
runs -uw

begin "ignore junk files in plugins dir"
make_rss20 $statedir/simple.rss
add "feed 0 simple.rss"
echo "this is junk" >$statedir/plugins/junk.txt
cat >$statedir/plugins/.hidden.py <<EOF
raise Exception("don't run me")
EOF
runs -uw

begin "stacking hook functions"
cat >$statedir/plugins/a.py <<EOF
import rawdoglib.plugins
def opt(config, name, value):
    if name == "testa":
        print "saw-a"
        return False
    return True
rawdoglib.plugins.attach_hook("config_option", opt)
EOF
cat >$statedir/plugins/b.py <<EOF
import rawdoglib.plugins
def opt(config, name, value):
    if name == "testb":
        print "saw-b"
        return False
    return True
rawdoglib.plugins.attach_hook("config_option", opt)
EOF
add "testa foo"
add "testb foo"
run
contains $outfile saw-a saw-b

begin "numthreads 4"
for i in 1 2 3 4 5 6 7 8; do
	make_atom10 $httpdir/${i}.atom
	add "feed 0 $httpurl/${i}.atom"
done
add "numthreads 4"
runs -uw

begin "--dump"
make_atom10 $httpdir/feed.atom
run --dump $httpurl/feed.atom
contains $outfile example-feed-title

begin "--find, successful"
make_atom10 $httpdir/feed1.atom
make_atom10 $httpdir/feed2.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/atom+xml" title="one" href="$httpurl/feed1.atom">
<link rel="alternate" type="application/atom+xml" title="two" href="$httpurl/feed2.atom">
EOF
run --find $httpurl/page.html
contains $outfile $httpurl/feed1.atom
contains $outfile $httpurl/feed2.atom

begin "--find, unsuccessful"
make_html_head $httpdir/page.html <<EOF
(no feeds in this file)
EOF
runn --find $httpurl/page.html
not_contains $outfile '/'

begin "filename"
make_rss20 $statedir/simple.rss
add "feed 0 simple.rss"
runs -u

begin "filename not found"
add "feed 0 missing.rss"
rune "No such file" -u

begin "file: URL"
make_rss20 $statedir/simple.rss
add "feed 0 file:simple.rss"
runs -u

begin "file: URL not found"
add "feed 0 file:missing.rss"
rune "No such file" -u

begin "http: URL and write, RSS 1.0"
make_rss10 $httpdir/simple.rdf
add "feed 0 $httpurl/simple.rdf"
runs -uw
contains $statedir/output.html example-item-title example-item-description

begin "http: URL and write, RSS 2.0"
make_rss20 $httpdir/simple.rss
add "feed 0 $httpurl/simple.rss"
runs -uw
contains $statedir/output.html example-item-title example-item-description

begin "http: URL and write, RSS 2.0, empty config"
make_rss20 $httpdir/simple.rss
rm $statedir/config
add "feed 0 $httpurl/simple.rss"
runs -uw
contains $statedir/output.html example-item-title example-item-description

begin "http: URL and write, Atom 1.0"
make_atom10 $httpdir/simple.atom
add "feed 0 $httpurl/simple.atom"
runs -uw
contains $statedir/output.html example-item-title example-item-description

begin "HTML rather than a feed"
make_html $httpdir/bad.html
add "feed 0 $httpurl/bad.html"
rune "could not be understood" -u

begin "plain text rather than a feed"
make_text $httpdir/bad.txt
add "feed 0 $httpurl/bad.txt"
rune "could not be understood" -u

for code in 301 302; do
	begin "HTTP $code redirect to HTML"
	make_html $httpdir/bad.html
	add "changeconfig false"
	add "feed 0 $httpurl/$code/bad.html"
	rune "could not be understood" -u

	begin "HTTP $code redirect to plain text"
	make_text $httpdir/bad.txt
	add "changeconfig false"
	add "feed 0 $httpurl/$code/bad.txt"
	rune "could not be understood" -u

	begin "HTTP $code redirect to 404"
	add "changeconfig false"
	add "feed 0 $httpurl/$code/404"
	rune "404" -u
done

begin "empty file rather than a feed"
touch $httpdir/bad.xml
add "feed 0 $httpurl/bad.xml"
rune "could not be understood" -u

begin "valid feed with no items"
cat >$httpdir/empty.xml <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
  </channel>
</rss>
EOF
add "feed 0 $httpurl/empty.xml"
runs -u

begin "HTTP 404"
add "feed 0 $httpurl/notthere"
rune "404" -u

for proto in http https ftp; do
	if [ -n "$timeouthost" ]; then
		begin "$proto: connect timeout"
		add "timeout 1s"
		add "feed 0 $proto://$timeouthost/feed.xml"
		rune "Timeout while reading" -u
	fi

	begin "$proto: response timeout"
	add "timeout 1s"
	add "feed 0 $proto://$serverhost:$timeoutport/feed.xml"
	rune "Timeout while reading" -u
done

begin "ignoretimeouts true"
add "timeout 1s"
add "ignoretimeouts true"
add "feed 0 http://$serverhost:$timeoutport/feed.xml"
runs -u

begin "0 period"
make_rss20 $httpdir/simple.rss
add "feed 0 $httpurl/simple.rss"
runs -u
rm $httpdir/simple.rss
rune "404" -u

begin "1h period"
make_rss20 $httpdir/simple.rss
add "feed 1h $httpurl/simple.rss"
runs -u
rm $httpdir/simple.rss
runs -u

begin "10 items"
make_n 10 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 10

begin "new articles are collected"
make_n 3 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 3
make_n 6 $httpdir/feed.rss
runs -uw
output_n 6

begin "outputfile"
make_rss20 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
add "outputfile second.html"
runs -uw
contains $statedir/second.html example-feed-title

begin "outputfile -"
make_rss20 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
add "outputfile -"
run -uw
contains $outfile example-feed-title

begin "maxarticles 10"
make_n 20 $httpdir/feed.rss
add "maxarticles 10"
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 10
not_output_range 11 20

begin "maxage 30m"
fake_time 1408794484.0
make_n 10 $httpdir/feed.rss
add "maxage 30m"
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 10
# This is 45 minutes later than the time above.
fake_time 1408797184.0
make_n 20 $httpdir/feed.rss
runs -uw
not_output_range 1 10
output_range 11 20

begin "keepmin 10"
make_n 20 $httpdir/feed.rss
add "keepmin 10"
add "expireage 0"
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 20
make_n 5 $httpdir/feed.rss
runs -uw
# Should have the 5 currently in the feed, and 10 in total
output_n 5
if [ $(grep range-title- $statedir/output.html | wc -l) != 10 ]; then
	die "Should contain 10 items"
fi

begin "currentonly true"
make_n 10 $httpdir/feed.rss
add "currentonly true"
add "feed 0 $httpurl/feed.rss"
runs -uw
output_n 10
make_n 5 $httpdir/feed.rss
runs -uw
output_n 5
not_output_range 6 10

for state in false true; do
	begin "useids $state"
	add "useids $state"
	add "hideduplicates none"
	add "feed 0 $httpurl/feed.atom"
	echo "<summary>Original</summary>" | make_atom10_with $httpdir/feed.atom
	runs -uw
	contains $statedir/output.html Original
	echo "<summary>Revised</summary>" | make_atom10_with $httpdir/feed.atom
	runs -uw
	contains $statedir/output.html Revised
	if $state; then
		# Should have updated the existing article
		not_contains $statedir/output.html Original
	else
		# Should have kept both versions
		contains $statedir/output.html Original
	fi
done

dupecheck () {
	add "useids false"
	add "feed 0 $httpurl/feed.atom"
	make_single $httpdir/feed.atom item-a \
		http://example.org/link/x http://example.org/id/0
	runs -u
	make_single $httpdir/feed.atom item-b \
		http://example.org/link/x http://example.org/id/1
	runs -u
	make_single $httpdir/feed.atom item-c \
		http://example.org/link/y http://example.org/id/1
	runs -uw
}

begin "hideduplicates none"
add "hideduplicates none"
dupecheck
contains $statedir/output.html item-a-title item-b-title item-c-title

begin "hideduplicates id"
add "hideduplicates id"
dupecheck
contains $statedir/output.html item-a-title item-c-title
not_contains $statedir/output.html item-b-title

begin "hideduplicates link"
add "hideduplicates link"
dupecheck
contains $statedir/output.html item-b-title item-c-title
not_contains $statedir/output.html item-a-title

begin "hideduplicates link id"
add "hideduplicates link id"
dupecheck
contains $statedir/output.html item-c-title
not_contains $statedir/output.html item-a-title item-b-title

begin "allowduplicates"
add "feeddefaults allowduplicates=true"
add "hideduplicates link id"
dupecheck
contains $statedir/output.html item-a-title item-b-title item-c-title

begin "sortbyfeeddate false/true"
# Debian bug 651080.
for day in 01 02 03; do
	cat >$httpdir/$day.atom <<EOF
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>example-feed-title-${day}</title>
  <link href="http://example.org/${day}/"/>
  <updated>2013-01-${day}T18:00:00Z</updated>
  <author>
    <name>example-feed-author</name>
  </author>
  <id>http://example.org/${day}/feed-id</id>
  <entry>
    <title>example-item-title-${day}</title>
    <link href="http://example.org/${day}/item"/>
    <id>http://example.org/${day}/item-id</id>
    <updated>2013-01-${day}01T18:00:00Z</updated>
    <summary>ENTRY-${day}</summary>
  </entry>
</feed>
EOF
done
entries () {
	grep 'ENTRY' $statedir/output.html | sed 's,.*ENTRY-\(..\).*,\1,' | xargs -n10 echo
}
add "feed 0 $httpurl/03.atom"
runs -u
add "feed 0 $httpurl/02.atom"
runs -u
add "feed 0 $httpurl/01.atom"
add "sortbyfeeddate false"
runs -uw
equals "01 02 03" "$(entries)"
add "sortbyfeeddate true"
runs -w
equals "03 02 01" "$(entries)"

for dstate in false true; do
	for tstate in false true; do
		begin "daysections $dstate, timesections $tstate"
		cat >$httpdir/feed.rss <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
    <item>
      <pubDate>Thu, 03 Jan 2013 18:00:00 +0000</pubDate>
      <title>item-1</title>
      <link>http://example.org/1</link>
    </item>
    <item>
      <pubDate>Wed, 02 Jan 2013 18:00:00 +0000</pubDate>
      <title>item-2</title>
      <link>http://example.org/2</link>
    </item>
    <item>
      <pubDate>Tue, 01 Jan 2013 19:00:00 +0000</pubDate>
      <title>item-3</title>
      <link>http://example.org/3</link>
    </item>
    <item>
      <pubDate>Tue, 01 Jan 2013 18:00:00 +0000</pubDate>
      <title>item-4</title>
      <link>http://example.org/4</link>
    </item>
  </channel>
</rss>
EOF
		add "dayformat day(%d)"
		add "timeformat time(%H)"
		add "daysections $dstate"
		add "timesections $tstate"
		add "sortbyfeeddate true"
		add "feed 0 $httpurl/feed.rss"
		runs -uw
		if $dstate; then
			contains $statedir/output.html \
				'day(01)' 'day(02)' 'day(03)'
		else
			not_contains $statedir/output.html 'day('
		fi
		if $tstate; then
			contains $statedir/output.html \
				'time(18)' 'time(19)'
		else
			not_contains $statedir/output.html 'time('
		fi
	done
done

begin "default templates"
make_rss20 $httpdir/simple.rss
add "feed 0 $httpurl/simple.rss"
runs -uw
cp $statedir/output.html $statedir/output.html.orig
for template in page item feedlist feeditem; do
	run -s $template
	cp $outfile $statedir/$template
	run --show $template
	same $outfile $statedir/$template
	add "${template}template ${template}"
done
run -w
same $statedir/output.html.orig $statedir/output.html

begin "show unknown template"
run -s aubergine
contains $outfile "Unknown template name: aubergine"

begin "pre-2.15 template options"
make_rss20 $httpdir/simple.rss
add "feed 0 $httpurl/simple.rss"
runs -uw
cp $statedir/output.html $statedir/output.html.orig
run -t
cp $outfile $statedir/page
run --show-template
same $outfile $statedir/page
run -T
cp $outfile $statedir/item
run --show-itemtemplate
same $outfile $statedir/item
add "template page"
add "itemtemplate item"
run -w
same $statedir/output.html.orig $statedir/output.html
echo MAGIC1__items__ >$statedir/page
echo MAGIC2 >$statedir/item
run -uw
contains $statedir/output.html MAGIC1 MAGIC2

for template in page item feedlist feeditem; do
	begin "missing ${template} template file"
	add "${template}template ${template}"
	runne "Can't read template file" -u
done

begin "template conditionals"
make_atom10 $httpdir/feed.atom
cat >$statedir/item <<EOF
__if_title__OK-1__endif__
__if_title__OK-2__else__BAD-2__endif__
__if_aubergine__BAD-3__endif__
__if_aubergine__BAD-4__else__OK-4__endif__
__if_title____if_date__OK-5__endif____endif__
__if_aubergine____if_date__BAD-6__endif____endif__
EOF
add "itemtemplate item"
add "feed 0 $httpurl/feed.atom"
runs -uw
contains $statedir/output.html "OK-1" "OK-2" "OK-4" "OK-5"
not_contains $statedir/output.html BAD

begin "UTF-8 in template, ASCII locale"
echo "char(ø)" >$statedir/item
make_atom10 $httpdir/feed.atom
add "feed 0 $httpurl/feed.atom"
add "itemtemplate item"
runne "Character encoding problem" -uw

if [ -n "$utf8_LC_ALL" ]; then
	begin "UTF-8 in template, UTF-8 locale"
	echo "char(ø)" >$statedir/item
	make_atom10 $httpdir/feed.atom
	add "feed 0 $httpurl/feed.atom"
	add "itemtemplate item"
	export LC_ALL="$utf8_LC_ALL"
	runs -uw
	contains $statedir/output.html "char(&#248;)"
fi

begin "UTF-8 in define, ASCII locale"
make_atom10 $httpdir/feed.atom
echo "expand(__thing__)" >$statedir/item
add "itemtemplate item"
add "feed 0 $httpurl/feed.atom"
add "  define_thing char(ø)"
runne "Character encoding problem" -uw

if [ -n "$utf8_LC_ALL" ]; then
	begin "UTF-8 in define, UTF-8 locale"
	make_atom10 $httpdir/feed.atom
	echo "expand(__thing__)" >$statedir/item
	add "itemtemplate item"
	add "feed 0 $httpurl/feed.atom"
	add "  define_thing char(ø)"
	export LC_ALL="$utf8_LC_ALL"
	runs -uw
	contains $statedir/output.html "expand(char(&#248;))"
fi

begin "item dates"
# Debian bug 651080.
run -s item
cp $outfile $statedir/item
echo "__date__" >>$statedir/item
make_atom10 $httpdir/feed.atom
add "feed 0 $httpurl/feed.atom"
add "sortbyfeeddate true"
add "timeformat HEADING-%m-%d-%H:%M"
add "datetimeformat ITEMDATE-%m-%d-%H:%M"
add "itemtemplate item"
runs -uw
contains $statedir/output.html "HEADING-01-01-18:00" "ITEMDATE-01-01-18:00"

begin "dates shown in local time"
echo "__date__" >$statedir/item
make_atom10 $httpdir/feed.atom
add "feed 0 $httpurl/feed.atom"
add "sortbyfeeddate true"
add "timeformat HEADING-%m-%d-%H:%M"
add "datetimeformat ITEMDATE-%m-%d-%H:%M"
add "itemtemplate item"
runs -u
export TZ="GMT+5"
runs -w
contains $statedir/output.html "HEADING-01-01-13:00" "ITEMDATE-01-01-13:00"
export TZ="$default_TZ"
runs -w
contains $statedir/output.html "HEADING-01-01-18:00" "ITEMDATE-01-01-18:00"

if [ -n "$ja_LC_ALL" ]; then
	begin "dates shown in Japanese"
	echo "__date__" >$statedir/item
	make_atom10 $httpdir/feed.atom
	add "feed 0 $httpurl/feed.atom"
	add "sortbyfeeddate true"
	add "timeformat HEADING-%A-%c"
	add "datetimeformat ITEMDATE-%A-%c"
	add "itemtemplate item"
	export LC_ALL="$ja_LC_ALL"
	runs -uw
	# Japanese for Tuesday, in Unicode.
	tue="&#28779;&#26332;&#26085;"
	contains $statedir/output.html "HEADING-$tue-" "ITEMDATE-$tue-"
	not_contains $statedir/output.html "Tuesday"
	export LC_ALL="$default_LC_ALL"
	runs -uw
	contains $statedir/output.html "HEADING-Tuesday" "ITEMDATE-Tuesday"
fi

begin "strange dates in feeds"
# Python's time.strftime can't handle all possible dates, and the range of
# dates that Python can work with in time_t format varies between platforms.
# rawdog won't be able to display dates that Python can't handle, but it
# should at least not crash if feedparser decides to present them
# (for example, if feedparser misparses a timezone as a year).
echo "__date__" >$statedir/item
add "itemtemplate item"
add "sortbyfeeddate true"
cat >$httpdir/feed.rss <<EOF
<rss version="2.0">
  <channel>
    <title>example-feed-title</title>
    <link>http://example.org/</link>
    <description>example-feed-description</description>
    <item>
      <title>Date in 300 outside 32-bit time_t range</title>
      <link>http://example.org/item</link>
      <pubDate>Mon, 1 Jan 0300 01:23:45 +0000</pubDate>
    </item>
    <item>
      <title>Date in 1750 using Julian calendar</title>
      <link>http://example.org/item</link>
      <pubDate>Mon, 1 Jan 1750 01:23:45 +0000</pubDate>
    </item>
    <item>
      <title>Date in 1969 with negative time_t</title>
      <link>http://example.org/item</link>
      <pubDate>Wed, 1 Jan 1969 01:23:45 +0000</pubDate>
    </item>
    <item>
      <title>Date in 2015 that feedparser 5.2.0 misparses as 300</title>
      <link>http://zeptobars.ru/en/rss</link>
      <pubDate>Fri, 20 Mar 15 17:32:14 +0300</pubDate>
    </item>
  </channel>
</rss>
EOF
add "feed 0 $httpurl/feed.rss"
runs -uw

begin "item authors"
cat >$httpdir/feed.atom <<EOF
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <title>example-feed-title</title>
  <link href="http://example.org/"/>
  <updated>2013-01-01T18:00:00Z</updated>
  <id>http://example.org/feed-id</id>
  <entry>
    <author>
      <name>author-1</name>
    </author>
    <title>example-item-title-1</title>
    <link href="http://example.org/item/1"/>
    <id>http://example.org/item-id/1</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
  <entry>
    <author>
      <name>author-2</name>
      <email>author2@example.org</email>
    </author>
    <title>example-item-title-2</title>
    <link href="http://example.org/item/2"/>
    <id>http://example.org/item-id/2</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
  <entry>
    <author>
      <name>author-3</name>
      <uri>http://example.org/author3</uri>
    </author>
    <title>example-item-title-3</title>
    <link href="http://example.org/item/3"/>
    <id>http://example.org/item-id/3</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
  <entry>
    <author>
      <name>author-4</name>
      <email>author4@example.org</email>
      <uri>http://example.org/author4</uri>
    </author>
    <title>example-item-title-4</title>
    <link href="http://example.org/item/4"/>
    <id>http://example.org/item-id/4</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
  <entry>
    <!-- Not valid Atom -->
    <author>
      <uri>http://a5.example.org</uri>
    </author>
    <title>example-item-title-5</title>
    <link href="http://example.org/item/5"/>
    <id>http://example.org/item-id/5</id>
    <updated>2013-01-01T18:00:00Z</updated>
    <summary>example-item-description</summary>
  </entry>
</feed>
EOF
cat >$statedir/item <<EOF
author(__author__)
EOF
add "feed 0 $httpurl/feed.atom"
add "itemtemplate item"
add "tidyhtml false"
runs -uw
contains $statedir/output.html \
	"author(author-1)" \
	"author(<a href=\"mailto:author2@example.org\">author-2</a>)" \
	"author(<a href=\"http://example.org/author3\">author-3</a>)" \
	"author(<a href=\"http://example.org/author4\">author-4</a>)" \
	"author(<a href=\"http://a5.example.org\">http://a5.example.org</a>)"

begin "feed list templates"
make_rss20 $httpdir/0.rss
make_rss20 $httpdir/1.rss
make_rss20 $httpdir/2.rss
add "feed 0 $httpurl/0.rss"
add "feed 0 $httpurl/1.rss"
add "feed 0 $httpurl/2.rss"
run -s feedlist
cp $outfile $statedir/feedlist
echo "FEEDLIST" >>$statedir/feedlist
run -s feeditem
cp $outfile $statedir/feeditem
echo "FEEDITEM-__feed_url__" >>$statedir/feeditem
add "feedlisttemplate feedlist"
add "feeditemtemplate feeditem"
run -w
contains $statedir/output.html \
	FEEDLIST \
	FEEDITEM-$httpurl/0.rss FEEDITEM-$httpurl/1.rss FEEDITEM-$httpurl/2.rss

begin "prefer content over summary"
make_atom10_with $httpdir/1.atom <<EOF
<content>Content1</content>
EOF
make_atom10_with $httpdir/2.atom <<EOF
<summary>Summary2</summary>
EOF
# Note that feedparser 5.1.3 will do odd things if summary follows content --
# feedparser issue 412.
make_atom10_with $httpdir/3.atom <<EOF
<summary>Summary3</summary>
<content>Content3</content>
EOF
add "useids false"
add "hideduplicates none"
add "feed 0 $httpurl/1.atom"
add "feed 0 $httpurl/2.atom"
add "feed 0 $httpurl/3.atom"
runs -uw
contains $statedir/output.html Content1 Summary2 Content3
not_contains $statedir/output.html Summary3

begin "showfeeds true/false"
make_atom10 $httpdir/simple.atom
add "feed 0 $httpurl/simple.atom"
runs -u
add "showfeeds true"
runs -w
contains $statedir/output.html $httpurl/simple.atom
add "showfeeds false"
runs -w
not_contains $statedir/output.html $httpurl/simple.atom

begin "userefresh true/false"
make_atom10 $httpdir/0.atom
make_atom10 $httpdir/1.atom
# It should pick the lowest of these and convert to seconds.
add "feed 1m $httpurl/0.atom"
add "feed 2m $httpurl/1.atom"
runs -u
add "userefresh true"
runs -w
contains $statedir/output.html 'http-equiv="Refresh" content="60"'
add "userefresh false"
runs -w
not_contains $statedir/output.html 'http-equiv="Refresh"'

begin "HTTP basic authentication"
make_rss20 $httpdir/private.rss
add "feed 0 $httpurl/auth-TestUser-TestPass/private.rss"
rune "401" -u
add "  user TestUser"
add "  password TestPass"
runs -u

# Generate a plugin to check that feedparser returned a particular HTTP status
# code.
checkstatus () {
	cat >$statedir/plugins/checkstatus.py <<EOF
import rawdoglib.plugins
def feed_fetched(rawdog, config, feed, p, error, nf):
    if $1 not in [e['status'] for e in p['rawdog_responses']]:
        print "didn't get HTTP $1 response"
rawdoglib.plugins.attach_hook("feed_fetched", feed_fetched)
EOF
}

begin "HTTP 304 not modified"
make_rss20 $httpdir/feed.rss
add "feed 0 $httpurl/feed.rss"
runs -u
checkstatus 304
runs -u

begin "HTTP 302 redirect to 304 not modified"
# feedparser issue 390.
make_rss20 $httpdir/new.rss
add "feed 0 $httpurl/302/new.rss"
runs -u
checkstatus 304
runs -u

begin "HTTP 302 temporary redirect"
make_rss20 $httpdir/new.rss
add "feed 0 $httpurl/302/new.rss"
runs -u

begin "HTTP 301 permanent redirect, changeconfig false"
make_rss20 $httpdir/new.rss
add "changeconfig false"
add "feed 0 $httpurl/301/new.rss"
rune "You should update its entry" -u
contains $statedir/config "$httpurl/301/new.rss"

for state in false true; do
	begin "HTTP 301 permanent redirect, changeconfig true, splitstate $state"
	make_rss20 $httpdir/new.rss
	add "changeconfig true"
	add "splitstate $state"
	add "feed 0 $httpurl/301/new.rss"
	rune "has been updated automatically" -u
	contains $statedir/config "$httpurl/new.rss"
	runs -u
done

begin "HTTP 302 to 301"
# We should only update config if the first redirect is permanent.
make_rss20 $httpdir/feed.rss
add "changeconfig true"
add "feed 0 $httpurl/302/301/feed.rss"
runs -u
contains $statedir/config "$httpurl/302/301/feed.rss"

begin "HTTP 301 to 302"
make_rss20 $httpdir/feed.rss
add "changeconfig true"
add "feed 0 $httpurl/301/302/feed.rss"
rune "has been updated automatically" -u
contains $statedir/config "$httpurl/302/feed.rss"

begin "HTTP 301 to 301 to 302"
# rawdog should follow the 301s but not the 302 to find the new URL.
make_rss20 $httpdir/feed.rss
add "changeconfig true"
add "feed 0 $httpurl/301/301/302/feed.rss"
rune "has been updated automatically" -u
contains $statedir/config "$httpurl/302/feed.rss"

begin "HTTP 301 to nowhere"
add "feed 0 $httpurl/301"
rune "without a new location" -u

for badurl in "/" "http:" "://example.org" "://example.org:80,example.org/"; do
	begin "HTTP 301 to invalid Location $badurl"
	add "changeconfig true"
	add "feed 0 $httpurl/301/=$badurl"
	rune "invalid new location" -u
	contains $statedir/config "$httpurl/301/=$badurl"
done

begin "HTTP 410 gone"
add "feed 0 $httpurl/410"
rune "You should remove it" -u

for state in false true; do
	other=$(if $state; then echo false; else echo true; fi)

	begin "converting from splitstate $state to splitstate $other"
	make_n 10 $httpdir/feed.rss
	add "splitstate $state"
	add "feed 1h $httpurl/feed.rss"
	runs -uw
	output_n 10
	rm $httpdir/feed.rss
	add "splitstate $other"
	runs -uw
	output_n 10
done

for state in false true; do
	begin "changeconfig moving items from existing feed, splitstate $state"
	make_range 1 5 $httpdir/old.rss
	add "splitstate $state"
	add "keepmin 20"
	add "changeconfig true"
	add "feed 0 $httpurl/old.rss"
	runs -uw
	rm $httpdir/old.rss
	make_range 6 10 $httpdir/new.rss
	echo >$httpdir/.rewrites "/old.rss /301/new.rss"
	rune "has been updated automatically" -uw
	# We should still have the original items at this point.
	output_range 1 10
	runs -uw
	output_range 1 10
done

begin "changeconfig for feed from included file"
make_rss20 $httpdir/feed.rss
add "changeconfig true"
add "include config2"
echo >$statedir/config2 "feed 0 $httpurl/301/feed.rss"
rune "has been updated automatically" -u
# FIXME: this behaviour is probably not what the user wanted.
# rawdog should probably complain that it's trying to change
# something but hasn't succeeded.
not_contains $statedir/config "$httpurl/feed.rss"
contains $statedir/config2 "$httpurl/301/feed.rss"
not_contains $statedir/config2 "$httpurl/feed.rss"

begin "changeconfig to same URL as existing feed"
make_rss20 $httpdir/feed.rss
add "changeconfig true"
add "feed 0 $httpurl/feed.rss"
runs -u
add "feed 0 $httpurl/301/feed.rss"
rune "already subscribed" -u

for state in false true; do
	begin "changeconfig to URL of just-removed feed, splitstate $state"
	make_rss20 $httpdir/feed.rss
	add "splitstate $state"
	add "changeconfig true"
	add "feed 0 $httpurl/feed.rss"
	runs -u
	# Simulate the change failing, then succeeding.
	for i in 1 2; do
		: >$statedir/config
		add "splitstate $state"
		add "changeconfig true"
		add "feed 0 $httpurl/301/feed.rss"
		rune "has been updated automatically" -u
		contains $statedir/config "$httpurl/feed.rss"
		not_contains $statedir/config "$httpurl/301/feed.rss"
	done
	runs -u
done

begin "feed format text"
make_rss20_desc $httpdir/feed.rss <<EOF
three &lt; four, not HTML
EOF
add "feed 0 $httpurl/feed.rss format=text"
add "tidyhtml false"
runs -uw
contains $statedir/output.html "<pre>three &lt; four"

begin "feed id"
make_rss20 $httpdir/0.rss
make_rss20 $httpdir/1.rss
add "feed 0 $httpurl/0.rss id=blah"
add "feed 0 $httpurl/1.rss"
add "itemtemplate item"
echo "feed-id(__feed_id__)" >$statedir/item
runs -uw
contains $statedir/output.html "feed-id(blah)" "feed-id(examplefeedtitle)"

begin "shorttag expansion"
# <br/> <br /> bug fixed 2006-01-07.
# <br/>/ has a workaround in feedparser for sgmllib.
add "tidyhtml false"
write_desc <<EOF
&lt;b&gt;0&lt;/b&gt;&lt;br/&gt;
&lt;b&gt;1&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;2&lt;/b&gt;&lt;br/&gt;/
EOF
contains $statedir/output.html \
	"<b>0</b><br />" \
	"<b>1</b><br />" \
	"<b>2</b><br />/"

begin "broken processing instruction"
write_desc <<EOF
&lt;!doctype html!&gt;
EOF

begin "relative URIs resolved"
write_desc <<EOF
<![CDATA[<p>
<a href="rel-link">link</a>
<img src="rel-img">
</p>]]>
EOF
contains $statedir/output.html \
	"$httpurl/rel-link" \
	"$httpurl/rel-img"

begin "Javascript removed"
write_desc <<EOF
<![CDATA[<p>
<script type="text/javascript">
alert('Annoying1');
</script>
<span style="background: url(javascript:alert('Annoying2'))">span</a>
</p>]]>
EOF
not_contains $statedir/output.html "Annoying1" "Annoying2"

begin "stray ] in URL"
# This produced an "Invalid IPv6 URL" exception with feedparser r738.
write_desc <<EOF
<![CDATA[<p><a href="http://example.com/not-broken]">link</a></p>]]>
EOF
contains $statedir/output.html not-broken

if $knownbad; then
	begin "escaped slashes in URL"
	# feedparser issue 407: links with :// escaped get mangled (reported in
	# rawdog by Joseph Reagle).
	write_desc <<EOF
	<![CDATA[
	<a href="http:&#x2F;&#x2F;example.com&#x2F;0">link</a>
	<a href="https:&#x2F;&#x2F;example.com&#x2F;1">link</a>
	<a href="http&0x3A;&#x2F;&#x2F;example.com&#x2F;2">link</a>
	<a href="https&0x3A;&#x2F;&#x2F;example.com&#x2F;3">link</a>
	]]>
EOF
	contains $statedir/output.html \
		http://example.com/0 http://example.com/1 \
		http://example.com/2 http://example.com/3
fi

begin "add feed, actually a feed"
make_rss20 $httpdir/feed.rss
rune "Adding feed" -a $httpurl/feed.rss
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, relative <link>"
# Debian bug 657206.
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, absolute <link>"
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="$httpurl/feed.rss">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, typical blog"
# Roughly what blogspot pages have.
make_atom10 $httpdir/posts
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/atom+xml" title="My Blog - Atom" href="$httpurl/posts">
<link rel="alternate" type="application/atom+xml" title="My Blog - RSS" href="$httpurl/posts?alt=rss">
<link rel="service.post" type="application/atom+xml" title="My Blog - Atom" href="http://example.org/posts">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/posts
not_contains "$statedir/config" "alt=rss"

begin "add feed, avoid HTML <link>"
make_html $httpdir/dummy.html
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="text/html" title="Something" href="$httpurl/dummy.html">
EOF
rune "Cannot find any feeds" -a $httpurl/page.html

begin "add feed, <a> with obvious URL"
make_rss20 $httpdir/foo.rss
make_html_body $httpdir/page.html <<EOF
<p>Here is our <a href="$httpurl/foo.rss">feed</a>!</p>
EOF
rune "Adding feed" -a $httpurl/page.html

if $knownbad; then
	begin "add feed, <a> with non-obvious URL"
	# ... as boingboing.net currently has (old feedfinder doesn't find
	# this; it finds /atom.xml by brute force).
	make_rss20 $httpdir/foo
	make_html_body $httpdir/page.html <<EOF
	<p>Here is our <a href="$httpurl/foo">RSS feed</a>!</p>
EOF
	rune "Adding feed" -a $httpurl/page.html
fi

if $knownbad; then
	# Old feedfinder could find this because it tried appending lots of
	# likely suffixes to URLs. However, this generally isn't needed
	# nowdays; most of the feeds that it could find that way have proper
	# <link> elements.
	begin "add feed, brute force"
	make_atom10 $httpdir/index.atom
	make_html $httpdir/page.html
	rune "Adding feed" -a $httpurl/page.html
fi

begin "add feed, no feeds to be found"
make_html $httpdir/page.html
rune "Cannot find any feeds" -a $httpurl/page.html

begin "add feed, nonsense in HTML"
# Debian bug 650776. This will provoke a HTMLParseError.
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
<![GARBAGE]]>
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, already present"
make_atom10 $httpdir/feed.atom
add "feed 3h $httpurl/feed.atom"
rune "already in the config file" -a $httpurl/feed.atom

begin "add feed, prefer RSS 1.0 over nonsense"
make_rss10 $httpdir/feed.rdf
echo "this is nonsense" >$httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rdf+xml" title="RSS" href="feed.rdf">
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rdf

begin "add feed, prefer RSS 2 over RSS 1.0"
make_rss10 $httpdir/feed.rdf
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rdf+xml" title="RSS" href="feed.rdf">
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, prefer .rss2 over .rss"
make_rss20 $httpdir/feed.rss
make_rss20 $httpdir/feed.rss2
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss2">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.rss2

begin "add feed, prefer Atom over RSS"
make_rss10 $httpdir/feed.rdf
make_rss20 $httpdir/feed.rss
make_rss20 $httpdir/feed.rss2
make_atom10 $httpdir/feed.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rdf+xml" title="RSS" href="feed.rdf">
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss">
<link rel="alternate" type="application/rss+xml" title="RSS" href="feed.rss2">
<link rel="alternate" type="application/atom+xml" title="Atom" href="feed.atom">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.atom

begin "add feed, prefer entries over comments"
make_atom10 $httpdir/comments.atom
make_atom10 $httpdir/entries.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/atom+xml" title="Atom Comments" href="comments.atom">
<link rel="alternate" type="application/atom+xml" title="Atom" href="entries.atom">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/entries.atom

begin "add feed, keep page order"
make_atom10 $httpdir/0.atom
make_atom10 $httpdir/1.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/atom+xml" title="Atom" href="0.atom">
<link rel="alternate" type="application/atom+xml" title="Atom" href="1.atom">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/0.atom

begin "add feed, ignore broken link"
make_atom10 $httpdir/1.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/atom+xml" title="Atom" href="0.atom">
<link rel="alternate" type="application/atom+xml" title="Atom" href="1.atom">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/1.atom

begin "add feed, UTF-8 in attr"
# This problem showed up in orbitbooks.net's front page. The intent is fine,
# but it crashes Python 2.7's HTMLParser if it's not properly decoded.
make_atom10 $httpdir/feed.atom
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="image/gif" title="ø&quot;" href="picture.gif">
<link rel="alternate" type="application/atom+xml" title="Atom" href="feed.atom">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/feed.atom

begin "add feed, gzip-encoded response"
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="$httpurl/feed.rss">
EOF
rune "Adding feed" -a $httpurl/gzip/page.html
contains "$statedir/config" $httpurl/feed.rss

begin "add feed, gzip-encoded feed"
make_rss20 $httpdir/feed.rss
make_html_head $httpdir/page.html <<EOF
<link rel="alternate" type="application/rss+xml" title="RSS" href="$httpurl/gzip/feed.rss">
EOF
rune "Adding feed" -a $httpurl/page.html
contains "$statedir/config" $httpurl/gzip/feed.rss

begin "remove feed"
add "feed 3h $httpurl/0.rss"
add "feed 3h $httpurl/1.rss"
add "feed 3h $httpurl/2.rss"
rune "Removing feed" -r $httpurl/1.rss
contains "$statedir/config" $httpurl/0.rss $httpurl/2.rss
not_contains "$statedir/config" $httpurl/1.rss

begin "remove feed with options"
add "feed 3h $httpurl/0.rss"
add "   define_foo 0a"
add "   define_foo 0b"
add "feed 3h $httpurl/1.rss"
add "   define_foo 1a"
add "   define_foo 1b"
add "feed 3h $httpurl/2.rss"
add "   define_foo 2a"
add "   define_foo 2b"
rune "Removing feed" -r $httpurl/1.rss
contains "$statedir/config" \
	$httpurl/0.rss "foo 0a" "foo 0b" \
	$httpurl/2.rss "foo 2a" "foo 2b"
not_contains "$statedir/config" \
	$httpurl/1.rss "foo 1a" "foo 1b"

begin "remove feed, preserving comments"
add "feed 3h $httpurl/0.rss"
add "   define_foo 0a"
add "# Keep this comment"
add "   define_foo 0b"
rune "Removing feed" -r $httpurl/0.rss
contains $statedir/config "# Keep this comment"
not_contains $statedir/config "foo 0a" "foo 0b"

begin "remove nonexistant feed"
add "feed 3h $httpurl/0.rss"
add "feed 3h $httpurl/1.rss"
add "feed 3h $httpurl/2.rss"
rune "not in the config file" -r $httpurl/3.rss

for state in false true; do
	for fetched in false true; do
		not=$(if ! $fetched; then echo "not "; fi)
		begin "remove feed, ${not}fetched, splitstate $state"
		make_rss20 $httpdir/feed.rss
		add "feed 0 $httpurl/feed.rss"
		add "splitstate $state"
		if $fetched; then
			runs -uw
			contains $statedir/output.html example-item-title
			if $state; then
				exists $statedir/feeds/*
			fi
		fi
		rune "Removing feed" -r $httpurl/feed.rss
		if $state; then
			not_exists $statedir/feeds/*
		fi
		runs -uw
		not_contains $statedir/output.html example-item-title
	done
done

# Run the plugins test suite if it's there.
if [ -e rawdog-plugins/test-plugins ]; then
	. rawdog-plugins/test-plugins
fi

exit $exitcode
