#!/usr/local/bin/perl -I../lib -w
 
#-----------------------------------------------------------------------

=head1 NAME

B<parseSpecifications> - Create the data cube's data store and parser
from the specification files.

=head1 SYNOPSIS

  parseSpecifications
    [-help] |
    [-version] |
    [-verbose] 
    [-specifications directory] 
    [-parser directory] 
    [-databaseName databaseName] 
    [-databaseMode GDBM | DBM | BSD] 
    [-dimensions string] 

=head1 DESCRIPTION

B<parseSpecifications> reads the specification files for the dimensions
and creates the necessary databases.

=head1 EXAMPLE USAGE

Create the specifications.

 perl -I../lib parseSpecifications \
    -verbose \
    -specifications specifications \
    -parser parser \
    -databaseName databaseName \
    -databaseMode DBM \
    -flex flex \
    -gcc "gcc lex.yy.c -lfl -o" \

See also L<IncompleteDataCube>.

=cut

#-----------------------------------------------------------------------

use strict;
require 5.002;

use Getopt::Long;
use IO::Pipe;
use English;
use IncompleteDataCube;

#-----------------------------------------------------------------------

=head1 OPTIONS

=over 4

=item -help

Display a short help message with a reminder of supported
command-line options.

=item -version

Display the version of B<parseSpecifications>.

=item -verbose

Enable verbose reporting.

=item -specifications directory

The name of the specifications directory, defaults to specifications.

=item -parser parser

The name of the parser directory, defaults to parser.

=item -databaseName databaseName

The name of the database, overides the default name 
in L<IncompleteDataCube::Constants>.

=item -databaseMode databaseMode

The mode of the database, overides the default mode
in L<IncompleteDataCube::Constants>.

=back

=cut

#-----------------------------------------------------------------------

use vars qw($VERSION);

my $VERSION       = '1.00';
my $SHOW_VERSION  = 0;
my $VERBOSE       = 0;
my $HELP          = 0;
my $DIMENSIONS = 0;
my $COMMAND_NAME  = 'parseSpecifications';
my $SPECIFICATIONS = 'specifications';
my $PARSER= 'parser';
my $FLEX = 'flex';
my $GCC = 'gcc lex.yy.c -lfl -o';

my $unitName;
my $measureName;
my $measureSet;
my $subunitSet;
my $dimension;
my $lines;
my $matched;

# set up tags
my $regexTag = '[({](.*)[})]';
my $identTag = "(\\w+)|\\\"([^\\\"]*)\\\"|\\'([^']*)\\'";
my $periodTag = '(\.)';
my $equalsTag = '(=)';
my $measureTag = '(MEASURE:)';
my $unitTag = '(UNIT:)';
my $semicolonTag = '(;)';
my $filtersTag = '(FILTERS:)';
my $atTag = '(\@)';

# Filter related global state vars, ugly you bet
my  @filterUnits = ();
my  @filterMeasures = ();
my  $filterCount = new Id(0);

#---------------------------------------------------------------------

=head1 NAME Main
 
Parse the specification files and create the tables and flex file
needed to parse the actual records.
 
=head1 DESCRIPTION
 
This is the parser for the specification files.  It is a top-down
recursive descent parser.
 
=cut
 
#---------------------------------------------------------------------
#
# There aren't too many comments from here down, but it is 
# fairly straightforward code.  Your mileage may vary.
#

#-----------------------------------------------------------------------
# Parse the command line
#-----------------------------------------------------------------------
&ParseCommandLine();

my $whichDimension = 0;
my $fileName = "";
my $global = new IncompleteDataCube::Globals();
my $i;
#// Read the specs
for ($i = 0; $i < $IncompleteDataCube::Constants::dimensions; $i++) {
  $whichDimension = $i;
  $fileName = @$IncompleteDataCube::Constants::dimensionNames[$i];
  &LAopen("$PARSER/$fileName.flex");
  &LAemitHeader();
  # stick the parser in here! 
  &parse("$SPECIFICATIONS/$fileName");
  &LAemitTail();
  }

#// Ok now read the filters
print "Gobbling the filters.\n" if $VERBOSE;
$whichDimension = 0;
$fileName = $IncompleteDataCube::Constants::filterFileName;
&LAopen("$PARSER/$fileName.l");
&LAemitHeader();
&parse("$SPECIFICATIONS/$fileName");
&LAemitTail();

#// Ok now read the LogFileSplitter
print "Gobbling the filters.\n" if $VERBOSE;
$whichDimension = 0;
$fileName = $IncompleteDataCube::Constants::logFileSplitFileName;
&LAopen("$PARSER/$fileName.flex");
&LAemitHeader();
print "Doing the log file split.\n" if $VERBOSE;
open(FLOG, "<$SPECIFICATIONS/$fileName") || 
  die "Exiting: could not open $fileName\n";
while (<FLOG>) {
  print FOUT;
  }
&LAemitTail();
print "cd $PARSER; $FLEX $fileName.flex; $GCC $fileName\n" if $VERBOSE;
system("cd $PARSER; $FLEX $fileName.flex; $GCC $fileName");
close FLOG;
print "Closed the log file split.\n" if $VERBOSE;

#// Flex the suckers
print "Flexing the lexical analyzers.\n" if $VERBOSE;
for ($i = 0; $i < $IncompleteDataCube::Constants::dimensions; $i++) {
  $fileName = @$IncompleteDataCube::Constants::dimensionNames[$i];
  print "cd $PARSER; $FLEX $fileName.flex; $GCC $fileName\n" if $VERBOSE;
  system("cd $PARSER; $FLEX $fileName.flex; $GCC $fileName");
  }

$global->close();
print "alldone!\n" if $VERBOSE;

sub initParser {
  my ($fileName) = @_;
  $unitName = "";
  $measureName = "";
  $measureSet = new IdSet();
  $subunitSet = new IdSet();
  $dimension = $whichDimension;
  }

sub prog {
  &measures();
  &filters();
  &mustMatch($semicolonTag, 'end of file');
  }

sub filters {
  if (&filter()) {&filters();}
  }

sub filter {
  if (&mayMatch($filtersTag)) { 
    &unitList();
    &addAFilter($global->{'filterTable'}, 
               $global->{'filterUnitTable'}, 
               $global->{'filterMeasureTable'});
    return 1;
    }
  return 0; 
  }

sub unitList {
  if ((&mayMatch($filtersTag)) || (&mayMatch($semicolonTag))) { 
    &unMatch(); 
    return 
    }
  my $unit = &mustMatch($identTag, 'unit name');
  &mustMatch($atTag, $atTag);
  my $measure = &mustMatch($identTag, 'measure name');
  &addFilter($unit, $measure);
  &unitList();
  }

sub measures {
  if (&measure()) {&measures();}
  }

sub measure {
  if (&mayMatch($measureTag)) {
    $measureName = &mustMatch($identTag, 'measure name');
    my $id = Id::fromString($measureName);
    my $t1 = new Tuple($id, $id);
    $global->{'measureTables'}[$dimension]->insertTuple($t1);
    $measureSet = new IdSet();
    # read units
    while (&unit()) {};
    my $otherId;
    foreach $otherId ($measureSet->enumerate()) {
      $global->{'coarserMeasureGraphs'}[$dimension]->addEdge($otherId, $id);
      $global->{'finerMeasureGraphs'}[$dimension]->addEdge($id, $otherId);
      }
    return 1;
    }
  else {
    return 0;
    }
  }

sub unit {
  if (&mayMatch($unitTag)) {
    $unitName = &mustMatch($identTag, 'unit name');
    &unitToMeasure($unitName, $measureName, 
                   $global->{'unitToMeasureTables'}[$dimension]);
    if (&mayMatch($equalsTag)) {
      #// is a base unit
      my $regex = &mustMatch($regexTag, 'regular expression');
      &LAemitTokenRule($regex, $unitName);
      }
    else {
      #// is an interior unit
      $subunitSet = new IdSet();
      &subunits();
      &unitToSubunits($unitName, $subunitSet, 
                      $global->{'finerUnitGraphs'}[$dimension]);
      my $unitId = Id::fromString($unitName);
      my $otherId;
      for $otherId ($subunitSet->enumerate()) {
        $global->{'coarserUnitGraphs'}[$dimension]->addEdge($otherId,$unitId);
        }
      }
    return 1;
    }
  else {
    return 0;
    }
  }

sub subunits {
  if (&subunit()) {&subunits()};
  }

sub subunit {
  if (&mayMatch($unitTag) || &mayMatch($measureTag)) {
    &unMatch();
    return 0;
    }
  my $subunitMeasure = &mayMatch($identTag);
  return 0 unless $subunitMeasure;
  $measureSet->insert(Id::fromString($subunitMeasure));
  &mustMatch($periodTag, $periodTag); 
  my $subunitUnit = &mustMatch($identTag, 'subunit name');
  $subunitSet->insert(Id::fromString($subunitUnit));
  return 1;
  }

sub parse { 
  my ($file) = @_;
  print "Parsing $file\n" if $VERBOSE;
  &initParser($file);
  &initTokenizer($file);
  &prog();
  print "Closing $file.\n" if $VERBOSE;
  close FIN; 
}

# cleans the white space from around a string
sub cleanWS {
  my ($value) = @_;
  #chew up white space at both ends (parsing of fields depends on this!)
  $value =~ s/^[\s\n]+//;
  $value =~ s/[\s\n]+$//;
  return $value;
}
 
sub unMatch {
  #print "unmatching $matched, $lines\n";
  $lines = $matched . $lines;
  $matched = '';
}
 
sub mayMatch {
  my ($pattern) = @_;
 
  while ($lines =~ /^\s*$/) {
    $lines = <FIN> || $semicolonTag;
    chomp $lines;
    }
 
  if ($lines =~ /^\s*$pattern/) {
    my $match = $&;
    my $thing = $1 || $2 || $3;
    $lines =~ s/$match//;
    $matched = $match;
    #print "$pattern may match $match, leftover $lines\n";
    return $thing;
    }
  else {
    #print "$pattern did not may match $lines\n";
    return '';
    }
}
 
sub mustMatch {
  my ($pattern, $name) = @_;
 
  while ($lines =~ /^\s*$/) {
    $lines = <FIN> || $semicolonTag;
    chomp $lines;
    }
 
  if ($lines =~ /^\s*$pattern/) {
    my $match = $&;
    my $thing = $1 || $2 || $3;
    $matched = $match;
    #print "$lines before\n";
    $lines =~ s/\Q$match\E//;
    #print "$pattern must match z$match, leftover z$lines.\n";
    return $thing;
    }
  else {
    my @lines = split("\n", $lines);
    die "$lines \n^^^^^^^^^^\nExpecting $name, but it was not found\n";
    }
}
 
sub initTokenizer {
  my ($file) = @_;
 
  # slurp standard in, ignoring comments (lines that start with %)
  print "Opening $file for input.\n" if $VERBOSE;
  open (FIN, "<$file") || die "could not open $file\n";
  $lines = <FIN>;
}

  #/**
  #* Parser action to add a new unit and measure to the current filter
  #**/
  sub addFilter {
    my ($unit, $measure) = @_;
    push @filterUnits, Id::fromString($unit);
    push @filterMeasures, Id::fromString($measure);
    }

  #/**
  #* Parser action to create a new filter in the filter table
  #**/
  sub addAFilter {
    my ($filterTable, $filterUnitTable, $filterMeasureTable) = @_;
    my ($s, $f);
    my ($id);
    foreach $id (@filterUnits) {
      my $t = $filterTable->retrieveTuple($id);
      if (!$t) { $s = new IdSet(); }
      else { $s = $t->getValueAsIdSet(); }
      $s->insert($filterCount);
      $f = new Tuple($id, $s);
      $filterTable->insertTuple($f);
      }
    $f = new Tuple($filterCount, new IdList(\@filterUnits));
    $filterUnitTable->insertTuple($f);
    $f = new Tuple($filterCount, new IdList(\@filterMeasures));
    $filterMeasureTable->insertTuple($f);
    @filterUnits = (); 
    @filterMeasures = (); 
    $filterCount->increment();
    }

  #/**
  #* Convert a units to Subunits and add an edge to the finer unit graph
  #**/
  sub unitToSubunits {
    my ($unitName, $subunitSet, $finerUnitGraph) = @_;
    my $unitId = Id::fromString($unitName);
    my ($id);
    foreach $id ($subunitSet->enumerate()) {
      $finerUnitGraph->addEdge($unitId,$id);
      }
    }

  #/**
  #* Add a tuple to the unit to measure table
  #**/
  sub unitToMeasure {
    my ($unit, $measure, $unitToMeasureTable) = @_;
    my $t = new Tuple(Id::fromString($unit), Id::fromString($measure));
    $unitToMeasureTable->insertTuple($t);
    }

=head2 LAopen(string $filename)

=over 4

=item * filename

- The name of the lexical analyzer file.

=back

Open the file for output.

=cut

#---------------------------------------------------------------------
  #/**
  #* Open the file to contain the flex source
  #**/
  sub LAopen {
    my ($s) = @_;
    print "Opening $s for LA output.\n" if $VERBOSE;
    open(FOUT, ">$s") || die "could not open $s";
    }
#---------------------------------------------------------------------

=head2 emitTokenRule(string regex, string unit)

=over 4

=item * regex 

- the regular expression for a unit

=item * unit 

- the unit

Emit a rule to recognize a token.

=cut

#---------------------------------------------------------------------
  sub LAemitTokenRule {
    my ($regex, $unit) = @_;
    print FOUT $regex . " { printf(\"" . 
               (Id::fromString($unit))->image() . " \"); } REJECT\n";
    }
#---------------------------------------------------------------------

=head2 LAemitHeader

Generate a canned header for the flex file

=cut 

#---------------------------------------------------------------------
  sub LAemitHeader {
    my ($self) = @_;
    print FOUT <<EofHeader;
%{
/* flex source for lexical analyzer, automatically generated */
%}
%%
EofHeader
    }
#---------------------------------------------------------------------

=head2 LAemitTail

Generate a canned tail for the flex file

=cut

#---------------------------------------------------------------------
  sub LAemitTail {
    print FOUT <<EofTail;
. {}
%%
#undef input()
#define input() mlex_input()
#undef unput()
#define unput(c) mlex_unput(c)
EofTail
    print "Closing lexical analyzer.\n" if $VERBOSE;
    close FOUT; 
    }
#---------------------------------------------------------------------

#------------------------------------------------------------------------
# ParseCommandLine() - handle command line
#------------------------------------------------------------------------
sub ParseCommandLine {
  my @switches = (
    'databaseMode=s', \$IncompleteDataCube::Constants::databaseMode,
    'databaseName=s', \$IncompleteDataCube::Constants::databaseName,
    'dimensions=s',   \$DIMENSIONS,
    'help',           \$HELP,
    'verbose',        \$VERBOSE,
    'version',        \$SHOW_VERSION,
    'specifications', \$SPECIFICATIONS,
    'parser',         \$PARSER,
    'flex=s',         \$FLEX,
    'gcc=s',          \$GCC,
    );

  &GetOptions(@switches) || die "use -help switch to display brief help\n";

  if ($DIMENSIONS) {
    # redo the dimension name string!
    $IncompleteDataCube::Constants::dimensionNameString = $DIMENSIONS;
    $IncompleteDataCube::Constants::dimensionNames = [ split(" ", $DIMENSIONS) ];
    }
  if ($SHOW_VERSION) {
    print "This is $COMMAND_NAME, version $VERSION\n";
    exit 0;
    }

  if ($HELP) {
    print <<HelpEnd;
    $COMMAND_NAME, v$VERSION - parse the Incomplete Data Cube specifications

    Usage: $COMMAND_NAME 
                         [-help] |
                         [-version] |
                         [-verbose]
                         [-specifications directory]
                         [-parser directory]
                         [-databaseName databaseName]
                         [-databaseMode GDBM | DBM | BSD]

        -help            : display this message
        -verbose         : display verbose information as running
        -specifications directory : directory containing specifications
        -parser directory : directory containing the logfile flex 
        -databaseName name : name of the database
        -databaseMode mode : mode for the database 

HelpEnd
    exit 0;
    }

}
