#!/usr/bin/perl

use lib '.';

#
#   Grutatxt - A text to HTML (and other things) converter
#
#   Angel Ortega <angel@triptico.com> et al.
#
#   This software is released into the public domain.
#   NO WARRANTY. See file LICENSE for details.
#

use Getopt::Long;
use locale;
use Grutatxt;

$VERSION = $Grutatxt::VERSION . ':1';

# input file
$input_file = '-';

# output file
$output_file = '>-';

# CSS information
$css = '';
$embed_css = 0;

# page title
$title = '';

# offset for the h? headers
$header_offset = 0;

# default mode
$mode = 'HTML';

# use real dl
$dl_as_dl = 0;

# troff table type
$table_type = 'allbox';

# abstract line number
$abstract = 0;

# marks
@marks = ();

# man page section
$man_section = 1;

# default tab size in LaTeX mode
$tabsize = 8;

# avoid time signature
$no_time_sig = 0;

# disable pure verbatim mode
$no_pure_verbatim = 0;

# enable TOC
$toc = 0;

#####################################################################

# parse options
if (!GetOptions('i|input=s' =>  \$input_file,
        'o|output=s'        =>  \$output_file,
        'c|css=s'           =>  \$css,
        'e|embed-css'       =>  \$embed_css,
        't|title=s'         =>  \$title,
        'f|header-offset=s' =>  \$header_offset,
        'b|table-headers'   =>  \$table_headers,
        'ct|center-tables'  =>  \$center_tables,
        'xt|expand-tables'  =>  \$expand_tables,
        'sp|strip-parens'   =>  \$strip_parens,
        'ts|tabsize=s'      =>      \$tabsize,
        'nb|no-body'        =>  \$no_body,
        'v|version'         =>  \$version,
        'h|help'            =>  \$usage,
        'm|mode=s'          =>  \$mode,
        's|man-section=s'   =>  \$man_section,
        'docclass=s'        =>  \$latex_docclass,
        'papersize=s'       =>  \$papersize,
        'encoding=s'        =>  \$encoding,
        'dl'                =>  \$dl_as_dl,
        'no-time-sig'       =>  \$no_time_sig,
        'no-pure-verbatim'  =>  \$no_pure_verbatim,
        'toc'               =>  \$toc,
        'href-new-window'   =>  \$href_new_window
    ) or $usage) {
    usage();
}

if ($version) {
    print "$VERSION\n"; exit(0);
}

open I, $input_file or die "Can't open $input_file: $!";
open O, ">$output_file" or die "Can't create $output_file: $!";

# if utf-8 encoding is wanted, set the filehandles as utf-8
# so that regular expressions match all characters
# (this is crap)
if (defined($encoding) && $encoding =~ /^utf-?8/i) {
    binmode(I, ":utf8");
    binmode(O, ":utf8");
}

$content = join('',<I>);
close I;

$content_title = '';

# make tab to space conversion only in LaTeX mode
$tabsize = 0 unless $mode =~ /^latex$/i;

$grutatxt = new Grutatxt(
        'mode'             => $mode,
        'header-offset'    => $header_offset,
        'table-headers'    => $table_headers,
        'center-tables'    => $center_tables,
        'expand-tables'    => $expand_tables,
        'strip-parens'     => $strip_parens,
        'strip-dollars'    => $strip_dollars,
        'tabsize'          => $tabsize,
        'dl-as-dl'         => $dl_as_dl,
        'table-type'       => $table_type,
        'title'            => \$content_title,
        'abstract'         => \$abstract,
        'marks'            => \@marks,
        'page-name'        => $title,
        'section'          => $man_section,
        'docclass'         => $latex_docclass,
        'papersize'        => $papersize,
        'encoding'         => $encoding,
        'no-pure-verbatim' => $no_pure_verbatim,
        'toc'              => $toc,
        'href-new-window'  => $href_new_window
        );

@result = $grutatxt->process($content);

if ($mode eq 'HTML') {
    if (scalar(@marks) && @marks[0]) {
        $abstract = $marks[0] - 1;
    }

    @result = (
        @result[0 .. $abstract],
        '<->',
        @result[$abstract + 1 .. scalar(@result)]
    );
}

$title = $content_title unless $title;
$no_body = 1 unless $mode =~ /^html$/i;

unless ($no_body) {
    print O "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n";
    print O "   \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n";
    print O "<html><head>\n";
    print O "<meta http-equiv='Content-Type' content='text/html; charset=" .
        ($encoding || 'utf-8') . "'>\n";
    print O "<title>$title</title>\n";

    printf O "<!-- converted from text by grutatxt $VERSION on %s -->\n", scalar(localtime)
        unless $no_time_sig;

    if ($css) {
        if ($embed_css) {
            if (open C, $css) {
                my ($c) = join('', <C>);
                close C;

                print O "<style type='text/css'>\n";
                print O $c . "\n";
                print O "</style>\n";
            }
            else {
                die "Can't open '$css' CSS file.";
            }
        }
        else {
            print O "<link rel=StyleSheet href='$css' type='text/css'>";
        }
    }

    print O "</head><body>\n";
}

foreach my $l (@result) {
    print O "$l\n";
}

print O "</body></html>\n" unless $no_body;

close O;

exit(0);


sub usage
{
    print "grutatxt $VERSION - Grutatxt format processor\n";
    print "Angel Ortega <angel\@triptico.com> et al.\n";
    print "This software is released into the public domain. NO WARRANTY.\n\n";

    print "Usage:\n";
    print "\n";
    print "grutatxt [options] < input_text_file > output_html_file\n";
    print "\n";
    print "Global options:\n\n";
    print "    -i|--input=FILE            Input file (STDIN)\n";
    print "    -o|--output=FILE           Output file (STDOUT)\n";
    print "    -t|--title=TITLE           Document title (if unset,\n";
    print "                               level 1 heading is used)\n";
    print "    -sp|--strip-parens         Strip parentheses in function\n";
    print "                               names (shown monospaced anyway)\n";
    print "    -sd|--strip-dollars        Strip leading \$ in variable\n";
    print "                               names (shown monospaced anyway)\n";
    print "    -m|--mode=[HTML|troff|man|latex|rtf]\n";
    print "                               Output mode: HTML, troff, man, LaTEX or RTF\n";
    print "                               (default: HTML)\n";
    print "    --no-time-sig              Avoid time signature in HTML comment\n";
    print "    --no-pure-verbatim         Disable pure verbatim mode\n";
    print "    --toc                      Add a table of contents after abstract\n\n";
    print "HTML options:\n\n";
    print "    -c|--css=CSS_URL_OR_FILE   CSS URL (or file if using --embed-css)\n";
    print "    -e|--embed-css             Embed CSS instead of linking to it\n";
    print "    -f|--header-offset=NUMBER  Offset to add to <h1>,\n";
    print "                               <h2>... headers (default 0)\n";
    print "    -b|--table-headers         Use <th> instead of <td> in\n";
    print "                               the first row of each table\n";
    print "    -ct|--center-tables        Centers the tables\n";
    print "    -xt|--expand-tables        Expands the tables (width=100\%)\n";
    print "    -nb|-no-body               Don't generate <html><body>...\n";
    print "                               </body></html> enclosing\n";
    print "    --encoding=ENCODING        Character encoding (default: utf-8)\n";
    print "    -dl                        Use real <dl>, <dd> and <dt>\n";
    print "                               instead of tables in definition lists\n";
    print "    --href-new-window          Open links in new windows/tabs\n\n";
    print "troff options:\n\n";
    print "    --table-type=TYPE          Table type. Possible values:\n";
    print "                               box, allbox, doublebox (default allbox)\n";
    print "man options:\n\n";
    print "    -s|--man-section=SECTION   Man page section (default: 1)\n\n";
    print "LaTeX options:\n\n";
    print "    --docclass=CLASS           Document class (default: report)\n";
    print "    --papersize=SIZE           Paper size (default: a4paper)\n";
    print "    --encoding=ENCODING        Character encoding (default: latin1)\n";
    print "    -ts|--tabsize=NUMBER       Tab size for tab to space conversions in\n";
    print "                               LaTeX verbatim environment (default: 8)\n";

    exit(1);
}
