#!/usr/local/bin/perl -I../pm -w

#-----------------------------------------------------------------------

=head1 NAME

B<createTitles> - Create the Jumping Spider title table.

=head1 SYNOPSIS

  createTitles
    [-help] |
    [-version] |
    [-verbose]
    [-databaseName databaseName]
    [-databaseMode GDBM | DBM | BSD]
    resultsFile

=head1 DESCRIPTION

B<createTitle> is a simple script to read the results of the robot search
and create a table of titles that map to URLs.

=head1 EXAMPLE USAGE

Create a title table for the results of the ACM site run.

 perl -I../lib createTitles \
    -verbose \
    -databaseName databaseName \
    -databaseMode DBM \
    ../results/acmscript

See also L<JumpingSpider>.

=cut

#-----------------------------------------------------------------------

use strict;
require 5.002;

use Getopt::Long;
use IO::Pipe;
use English;
use JumpingSpider;

#-----------------------------------------------------------------------

=head1 OPTIONS

=over 4

=item -help

Display a short help message with a reminder of supported
command-line options.

=item -version

Display the version of Robot.

=item -verbose

Enable verbose reporting.

=item -databaseName databaseName

The name of the database, overides the default name
in L<JumpingSpider::Constants>.

=item -databaseMode databaseMode

The mode of the database, overides the default mode
in L<JumpingSpider::Constants>.

=back

=cut

#-----------------------------------------------------------------------

use vars qw($VERSION);

my $VERSION       = '1.00';
my $SHOW_VERSION  = 0;
my $VERBOSE       = 0;
my $HELP          = 0;
my $COMMAND_NAME  = 'createTitles';

my $InputFileName;

#-----------------------------------------------------------------------
# Parse the command line
#-----------------------------------------------------------------------
&ParseCommandLine();

#-----------------------------------------------------------------------
# Open the necessary tables
#-----------------------------------------------------------------------
print "$COMMAND_NAME: Opening database tables.\n" if $VERBOSE;
my $Globals = new JumpingSpider::Globals();
my $titleTable = $Globals->{'titleTable'};

print "$COMMAND_NAME: Opening input file.\n" if $VERBOSE;
open(INPUT, "< $InputFileName") ||
   die "Could not open input file: $InputFileName";

print "$COMMAND_NAME: Processing input...\n" if $VERBOSE;
# Process the input file looking for certain kinds of information
while (<INPUT>) {
  /^(\w+)\s+([^\s]+)\s+(.*)$/;
  my ($action, $first, $second) = (' ', ' ', ' ');
  $action = $1 if defined $1;
  $first = $2 if defined $2;
  $second = $3 if defined $3;
  my ($third);
  if ($action eq 'TITLE') {
    $first = &_addSlash($first);
    $second =~ s/\s+$//;
    $titleTable->insertTuple(new Tuple(Id::fromString($first), 
                                       new StringCol($second)));
    }
  }
close INPUT;
#
# clean up
#
$Globals->close();
print "$COMMAND_NAME: Cleaned up and exiting.\n" if $VERBOSE;

#----------------------------------------------------------------------
# Add a slash to the URL
#----------------------------------------------------------------------
sub _addSlash {
  my ($s) = @_;

  # already ends in a slash?
  $s =~ s/\s+$//;
  if ($s =~ /\/$/) {return $s;}

  $s =~ /([^\/]*)$/;
  if ($1  =~ /\.[sS]?[hH][tT][mM][lL]?/) {return $s;}
  $s .= '/';
  return $s;
  }

#------------------------------------------------------------------------
# ParseCommandLine() - handle command line
#------------------------------------------------------------------------
sub ParseCommandLine {
  my @switches = (
    'databaseMode=s', \$JumpingSpider::Constants::databaseMode,
    'databaseName=s', \$JumpingSpider::Constants::databaseName,
    'help',           \$HELP,
    'verbose',        \$VERBOSE,
    'version',        \$SHOW_VERSION,
    );

  &GetOptions(@switches) || die "use -help switch to display brief help\n";

  if ($SHOW_VERSION) {
    print "This is $COMMAND_NAME, version $VERSION\n";
    exit 0;
    }

  if ($HELP) {
    print <<HelpEnd;
    $COMMAND_NAME, v$VERSION - create the Jumping Spider title table

    Usage: $COMMAND_NAME
                         [-help] |
                         [-version] |
                         [-verbose]
                         [-databaseName databaseName]
                         [-databaseMode GDBM | DBM | BSD]
                         resultsFile

        -help            : display this message
        -verbose         : display verbose information as running
        -databaseName name : name of the database
        -databaseMode mode : mode for the database

HelpEnd
    exit 0;
    }

  $InputFileName = shift @ARGV ||
    die "$COMMAND_NAME: Needs an input file name ";

}

