#!/usr/bin/perl

#
#  Copyright (C) 1999-2001 Ricardo Ueda Karpischek
#
#  This is free software; you can redistribute it and/or modify
#  it under the terms of the version 2 of the GNU General Public
#  License as published by the Free Software Foundation.
#
#  This software is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this software; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
#  USA.
#

#
# mkdoc.pl: generates the Clara OCR documentation
#

use strict;

my($nl,$nl_empty,$input_exhausted,$appends);
my($cl,$sec_title,$sec_class,$sec_number);
my($verbose);
my($ne);
my($sn,$ssn);
my($preamble,$output,$contents);
my(%sname,%st);
my($html,$nroff,$web,$pre);
my($margin);
my($date);
my($version);
my($fname,$docname,$other,$title);

# oops!
$date = "May 03 2002";
$version = "0.9.9";

#
# To make easier the detection of end of sections on the
# process_line procedure, this function generates the lines
#
#     /* (book)
#     foo section
#     -----------
#     */
#
# and after them, generates empty lines as many times as this
# procedure continues to be called.
#
sub gen_line
{
    if ($appends == 0) {
        $nl = "/* (book)\n";
    }
    elsif ($appends == 1) {
        $nl = "foo section\n";
    }
    elsif ($appends == 2) {
        $nl = "-----------\n";
    }
    elsif ($appends == 3) {
        $nl = "*/\n";
    }
    else {
        $nl = "\n";
    }
    ++$appends;
}

#
# Put the next input line into $cl and make available on $nl the
# next line for look-ahead tests. If the input got exhausted,
# append the lines generated by gen_line.
#
sub read_input
{
    # make sure that $nl contains one line
    if ($nl_empty) {
        if ($_ = <F>) {
            s/\/\* \(($other)\)/\/* /g;
            s/\/\* \($docname\)/\/* (book)/g;
            s/\/\* \(all\)/\/* (book)/g;
            $nl = $_;
        }
        else {
            &gen_line();
            $input_exhausted = 1;
        }
        $nl_empty = 0;
    }

    # the next line becomes the current line
    $cl = $nl;

    # append one line to the exhausted input
    if ($input_exhausted) {
        &gen_line();
    }

    # read next line
    else {
        if ($_ = <F>) {
            s/\/\* \(($other)\)/\/* /g;
            s/\/\* \($docname\)/\/* (book)/g;
            s/\/\* \(all\)/\/* (book)/g;
            $nl = $_;
        }
        else {
            &gen_line();
            $input_exhausted = 1;
        }
    }
}

#
# Classify the section using its title.
#
sub classify
{
    # the section that describes the command-line switches
    if ($sec_title =~ /Reference of command-line switches/i) {
        $sec_class = 's';
    }

    # sections that describe menus
    elsif ($sec_title =~ / menu$/) {
        $sec_class = 'm';
    }

    # the "contents" section
    elsif ($sec_title =~ /^CONTENTS/) {
        $sec_class = 'c';
    }

    # the preamble sections
    elsif ($sec_title =~ /^(NAME|SYNOPSIS|DESCRIPTION|WELCOME)/) {
        $sec_class = 'p';
    }

    # detected section absent from the index
    elsif ($sec_number eq '') {
        $sec_class = '';
        if (($verbose) &&
            ($sec_title !~ /^foo section/)) {

            printf(STDERR "out of index: $fname section $sec_title");
        }
    }

    # other sections
    else {
        $sec_class = '';
    }
}

#
# Close preformatted mode.
#
sub close_pre
{
    # finish preformatted mode
    if (($html) && ($pre)) {
        while (substr($output,length($output)-1,1) eq "\n") {
            chop($output);
        }
        $output .= "</PRE>\n</TD></TR></TABLE></CENTER>\n";
        $pre = 0;
    }
}

#
# process one excerpt line
#
sub process_line
{
    my($a,$b);

    # count lines discarding the initial empty block
    if (($cl =~ /\w/) || ($ne > 0)) {
        ++$ne;
    }

    # ignore the line 2 when reading lists
    if (($sec_class eq 's') && ($ne == 2)) {
        return;
    }

    # $cl contains an underlined title
    ($a = $cl) =~ s/^ *//;
    ($b = $nl) =~ s/^ *//;
    if (($nl =~ /^ *-+\n$/) && (length($a) == length($b))) {

        # close preformatted mode
        &close_pre();

        # margin size
        $sec_title = $a;
        if (!($sec_title =~ /^foo section/)) {
            if ($verbose) {
                printf(STDERR "detected title: $sname{$sec_title} $sec_title");
            }
        }
        $margin = length($cl) - length($a);
    	
        # discard next line
        $nl_empty = 1;
    	
        # store the just finished section
        if ($sec_class eq 'p') {
            $preamble .= $output;
        }
        elsif ($sec_class eq 'c') {
            $contents .= $output;
        }
        elsif ($sec_number ne '') {
            $st{$sec_number} = $output;
            if ($verbose) {
                printf(STDERR "$sec_number size is " . length($output) . "\n");
            }
        }

        # initialize the just started section
        $sec_number = $sname{$a};
        if ($html) {
            my($b,$c);
            ($b = $a) =~ s/\n//g;
            $c = "<P><TABLE BORDER=1 WIDTH=100%><TR>";
            if ($sec_number =~ /\d+\.\d+/) {
                $c .= "<TD BGCOLOR=#E2D3FC>";
            }
            else {
                $c .= "<TD BGCOLOR=#79BEC6>";
            }
            $output = sprintf("$c<FONT SIZE=+1><B>$sec_number $b");
            $output .= "</B></FONT></TD></TR></TABLE>";
        }
        elsif ($nroff) {
            $output = sprintf(".SH $sec_number $a");
        }
        else {
            if ($sec_number eq '') {
                $output = sprintf("$a");
            }
            else {
                $output = sprintf("$sec_number $a");
            }
        }
        &classify();
        return;
    }
    
    # remove the margin from $cl
    $cl = substr($cl,$margin);
    if ($cl eq '') {
        $cl = "\n";
    }

    # put command line switches in bold
    if (($sec_class eq 's') && ($ne == 1)) {

        # close preformatted mode
        &close_pre();

        if ($html) {
            $cl = "<P><TABLE BORDER=1><TR><TD BGCOLOR=#F0F0F0><B>" . $cl;
            $cl .= "</B></TD></TR></TABLE><P>";
        }
        else {
            $cl =~ s/ / \\ /;
            $cl = ".TP\n.BI " . $cl;
        }
    }

    # put menu items in bold
    elsif (($sec_class eq 'm') && ($ne == 1)) {

        # close preformatted mode
        &close_pre();

        if ($html) {
            $cl = "<P><TABLE BORDER=1><TR><TD BGCOLOR=#F0F0F0><B>" . $cl;
            $cl .= "</B></TD></TR></TABLE><P>";
        }
        else {
            $cl = ".TP\n.B " . $cl;
        }
    }

    # compute section numbers and memorize section names
    elsif ($sec_class eq 'c') {
        my($a);
    
        # main section
        if ($cl =~ /^\w/) {
            ++$sn;
            $ssn = 0;
            $sname{$cl} = "$sn.";
            if ($verbose) {
                printf(STDERR "registered $cl");
            }
            $cl = "$sn. $cl";
        }
    
        # subsection
        elsif ($cl =~ /^    \w/) {
            ++$ssn;
            $cl = substr($cl,4);
            $sname{$cl} = "$sn.$ssn";
            if ($verbose) {
                printf(STDERR "registered $cl");
            }
            $cl = "    $sn.$ssn $cl";
        }
    }
    
    # replace empty lines by the paragraph tag
    elsif (($cl =~ /^[ \t\r\n]*$/) && ($html) && ($pre == 0)) {
        $cl = "\n<P>\n";
    }

    # enter preformatted mode
    elsif (($cl =~ /^ /) && ($html) && ($pre == 0)) {
        $cl = "<TABLE WIDTH=100%><TR><TD BGCOLOR=#E0E0E0><PRE>\n\n$cl";
        $pre = 1;
    }

    # finish preformatted mode
    elsif (($cl =~ /^[^ ]/) && ($cl ne "\n") && ($html) && ($pre)) {

        &close_pre();
    }

    # send to output
    if ($pre) {
        chomp($cl);
        if ($cl !~ /^ *$/) {
            $output .= "$cl\n";
        }
        else {
            $output .= "\n";
        }
    }
    else {
        $output .= sprintf($cl);
    }
}

#
# process one file
#
sub process_file
{
    my ($f);

    # reset current section (cs) variables
    $sn = 0;
    $ssn = 0;
    $sec_class = '';
    $output = "";

    # prepare the input
    open(F,$fname=$_[0]);
    $nl_empty = 1;
    $appends = 0;
    $input_exhausted = 0;

    # process the input
    while ($appends < 6) {

        # start of excerpt
        &read_input();
        if ($cl =~ /\/\* +\(book\)/) {

            # initialize control variables
            #printf(STDERR "entering excerpt block\n");
            $ne = $f = 0;

            # for each excerpt line
            do {

                &read_input();
                # untabfy (to do)

                # end of excerpt detected
                if ($cl =~ /\*\//) {
                    $f = 1;
                }

                # add to output
                else {
                    &process_line();
                }
            } while ($f == 0);

            #printf(STDERR "out from excerpt block\n");
        }
    }
    close(F);
}

#
# Expand macros
#
sub expand
{
    $_ = $_[0];
    s/CLARA_HOME/http:\/\/www.claraocr.org\//g;
    s/CLARA_VERSION/$version/g;
    return($_);
}

#
#
#
sub linkfy
{
    $_ = $_[0];
    s/(http:\/\/[^ ]*?)([.,;:)\]]*|)([ \n])/<A HREF=$1>$1<\/A>$2$3/gms;
    return($_);
}

#
# Print Book Body
#
sub print_body
{
    my(@c,$i);
    my($sn);

    #
    # Output list of sections
    #
    @c = split("\n",$contents);
    if ($html) {
        print "$c[0]\n<UL>\n";
    }
    for ($i=0; $i<=$#c; ++$i) {
        ($sn) = ($c[$i] =~ /^ *(\d.*?) /);
        if (($sn ne '') && ($sn =~ /\d+\.\d+/) && ($st{$sn} eq "")) {
            $c[$i] .= " (to be written)";
        }
    }
    for ($i=0; $i<=$#c; ++$i) {
        ($sn) = ($c[$i] =~ /^ *(\d.*?) /);
        if ($html) {
            if ($sn eq '') {
                print "<P>\n";
            }
            else {
                if ($sn =~ /^([2-9]|\d\d+)\.$/) {
                    print "</UL>\n";
                }
                print "<LI> <A HREF=#$sn>$c[$i]</A>\n";
                if ($sn =~ /^\d+\.$/) {
                    print "<UL>\n";
                }
            }
        }
        else {
            print "$c[$i]\n";
        }
    }
    if ($html) {
        print "</UL>\n</UL>\n";
    }
    else {
        print "\n";
    }

    #
    # Output the sections
    #
    for ($i=0; $i<=$#c; ++$i) {
        ($sn) = ($c[$i] =~ /^ *(\d.*?) /);

        # the section is currently empty
        if (($sn ne '') && ($st{$sn} eq "")) {
            my($a,$c);

            ($a = $c[$i]) =~ s/^ *//;
            if ($html) {
                $c = "<P><A NAME=$sn><TABLE BORDER=1 WIDTH=100%><TR>";
                if ($sn =~ /\d+\.\d+/) {
                    $c .= "<TD BGCOLOR=#E2D3FC>";
                }
                else {
                    $c .= "<TD BGCOLOR=#79BEC6>";
                }
                print "$c<FONT SIZE=+1><B>$a</B></FONT></TD></TR></TABLE>\n";
            }
            else {
                print "\n.SH $a\n";
            }
        }

        # the section has contents
        elsif ($sn ne '') {
            $a = &expand($st{$sn});
            if ($html) {
                $a = &linkfy($a);
                print "<A NAME=$sn>\n$a";
            }
            else {
                print ($a);
            }
        }
    }
}

#
# Flag for verbose mode
#
$verbose = 0;

#
# initialize some output buffers.
#
$preamble = "";
$contents = "";

#
# initialize some flags
#
$pre = 0;

#
# Process command-line arguments.
#
{
    my($i);

    # defaults
    $html = 0;
    $nroff = 0;
    $web = 0;

    for ($i=0; ($i<=$#ARGV) && ($ARGV[$i] =~ /^-/); ++$i) {

        if ($ARGV[$i] eq '-html') {
            $html = 1;
            $nroff = 0;
        }
        elsif ($ARGV[$i] eq '-nroff') {
            $html = 0;
            $nroff = 1;
        }
        elsif ($ARGV[$i] eq '-web') {
            $web = 1;
        }
        elsif ($ARGV[$i] eq '-book') {
            $docname = 'book';
            $other = 'faq|devel|tutorial|glossary';
            $title = "Clara OCR Advanced User's Manual";
        }
        elsif ($ARGV[$i] eq '-faq') {
            $docname = 'faq';
            $other = 'all|book|devel|tutorial|glossary';
            $title = "The Clara OCR FAQ";
        }
        elsif ($ARGV[$i] eq '-devel') {
            $docname = 'devel';
            $other = 'faq|book|tutorial|glossary';
            $title = "Clara OCR Developer's Guide";
        }
        elsif ($ARGV[$i] eq '-tutorial') {
            $docname = 'tutorial';
            $other = 'faq|book|devel|glossary';
            $title = "Clara OCR Tutorial";
        }
        elsif ($ARGV[$i] eq '-glossary') {
            $docname = 'glossary';
            $other = 'faq|book|devel|tutorial';
            $title = "Clara OCR Glossary";
        }
    }

    # remaining arguments are assumed to be the source code files
    for (; $i <= $#ARGV; ++$i) {
        process_file($ARGV[$i]);
    }
}

#
# output preamble.
#
if ($html) {
    my($a,$t,$u);

    print "<HTML><HEAD><TITLE>Clara Book</TITLE></HEAD>\n";
    print "<BODY BGCOLOR=#D0D0D0>\n";

    # remove everything until DESCRIPTION heading
    $preamble =~ s/^.*?DESCRIPTION.*?<\/TABLE>//ms;

    # links
    $preamble = linkfy(expand($preamble));

    # add HTML heading
    #$u = 'http://www.claraocr.org';
    $u = '';
    $a = "<TABLE WIDTH=100% BORDER=1 BGCOLOR=#E2D3FC><TR><TD><CENTER><H1>";
    $a .= "<BR>$title<BR></H1></CENTER></TD></TR></TABLE>\n";
    $a .= "<P>\n";
    $a .= "<CENTER>\n";
    if ($web) {
        $a .= "[<A href=$u" . "index.html>Main</A>]\n";
    }
    $a .= "[<A href=$u" . "clara-faq.html>FAQ</A>]\n";
    $a .= "[<A href=$u" . "clara-glo.html>Glossary</A>]\n";
    $a .= "[<A href=$u" . "clara-tut.html>Tutorial</A>]\n";
    $a .= "[<A href=$u" . "clara-adv.html>User's Manual</A>]\n";
    $a .= "[<A href=$u" . "clara-dev.html>Developer's Guide</A>]\n";
    $a .= "</CENTER>\n";
    $preamble = $a . $preamble;
}
elsif ($nroff) {
    print ".TH CLARA 1 \"$date\" \"$title v $version\"\n";
    $preamble = expand($preamble);
}
else {
    print "$title\n";
    print "-" x length($title) . "\n\n";
    $preamble = expand($preamble);
}
print $preamble;

#
# Output the body of the Book
#
&print_body();

#
# footings
#
if ($html) {
    print "</HR></BODY></HTML>\n";
}
