#!/usr/bin/perl -w
# 2012 Public Domain Wesley Ebisuzaki
#
# alt_gribmap  alternative format for grib2 ctl files
# 6/2013 added ENV{OMP_NUM_THREADS}=1 for wgrib2
use POSIX;
use threads;
use threads::shared;

$program=$0;
$version="0.0.9";
$wgrib2="wgrib2";
$gzip='gzip';
$wgrib2_flags='';
# $index_format = 1;
$ctl='';
$good_npts=0;
$verbose = 0;
$update = 0;
$nthreads = 5;
$match = '';
$ENV{'OMP_NUM_THREADS'}=1;

$undef = -999;
# $undef will not be the same after packed into $index_buffer because
# saved as 32-bit unsigned number and converted back into positive 64-bit number
$undef_vec = unpack("N", pack("N", -999));

# get options
for ($i = 0; $i <= $#ARGV; $i++) {
   $_ = $ARGV[$i];
   if (/^-i/) { $ctl = $ARGV[++$i]; }
   elsif (/^-v/) { $verbose++; }
   else { $ctl = $ARGV[$i]; }
}

if ($ctl eq "") { die "$0: version $version\ngribmap for alt_g2ctl\n"; }

&scan_ctl;
&resolve_dset_index;

# print contents of the id table
if ($verbose > 1) {
   for ($i = 0; $i < $nfields; $i++) { print "table: $i  id=$var_id[$i]\n"; }
}

# find number of fields per time step
$size = $nfields * $ntime * 2;
if ($verbose > 1) { print "size=$size\n"; }

# figure out the initial date code
$date0=from_gdate($start_time);
if ($verbose > 1) { print "date0 = $date0 or $start_time\n"; }

# make an array of all the time codes
make_timestamps($ntime, $date0, $inc_time);

if ($verbose > 1) { 
   foreach $time (sort keys(%time_stamp)) { print "time=$time $time_stamp{$time} \n"; }
}

mk_template_list($dset, $inc_time);

$n=$#template_file;
$nfound=0;
$nmissing=0;

make_index();

$total=$ntime*$nfields;
print "$program v$version finished ctl=$ctl records matched=$nfound, not matched=$nmissing ctl_defn=$total\n";

exit 0;


#-----------------------------------------------------------------

sub make_index {

#    my ($index_buffer);
   my ($inv, $last, $j);
   my ($num,$loc,$npts,$date,$id);
   my ($fail_fork);


   # open index file
   open(INDEX, ">", $index) or die "failed to open index file: $index\n";
   binmode(INDEX);
   # initialize index file buffer with -999 (32-bit network order)
   $index_buffer = (pack "N", $undef) x $size;
   $temp_index_buffer = $index_buffer;

#  make pipes for communiation

   for ($count = 1; $count < $nthreads; $count++) {
      pipe($read[$count], $write[$count] );
      binmode($read[$count]);
      binmode($write[$count]);
   }

#  make $nthreads threads/processes
   my (@pids);
   $fail_fork = 0;
   $pid = -1;

   for ($count = 1; $count < $nthreads; $count++) {
      $pid = fork();
      if (not defined($pid)) {
#        if fork fails, set number of procs=1
         print "fork failed, nprocs set to 1\n";
         $nthreads = 1; $fail_fork = 1; last; }
      if ($pid != 0) {
         # parent
         $pids[$count] = $pid;
      }
      else {
         # child
         last;
      }
   }
   if ($pid != 0) { $count = 0 };

#  if fork failed, kill childern procs
   if ($fail_fork == 1 && $pid == 0) { exit 0; }

#  print "process=$count out of $nthreads processes\n";

   if ($update == 1) {
      # update: use index files
      for ($inv = $count; $inv <= $n; $inv += $nthreads) {
         $inv_file = $template_file[$inv] . $wgrib2_inv . '.gz';
         # make inventory file
         if (! -f $inv_file) {
             if (! -f $template_file[$inv]) {
                $file = $template_file[$inv];
                print "missing grib2 file $file\n";
                $template_file[$inv] = '';
                next;
             }
             else {
#               print "making: $inv_file  process=$count\n";
                system "$wgrib2 $wgrib2_flags $match \"$template_file[$inv]\" | $gzip -c > $inv_file";
             }
         }
         if (-f $inv_file) {
             print "scanning-gz $inv_file (process=$count)\n";
             open(INV, "$gzip -dc $inv_file |") or die "could not $gzip $inv_file\n";
             scan_inv();
         }
      }
   }
   else {
      # scan the grib2 files
      for ($inv = $count; $inv <= $n; $inv += $nthreads) {
         if (! -r $template_file[$inv]) {
            print "missing grib2 file $template_file[$inv] (process=$count)\n";
            next;
         }
         open(INV,"$wgrib2 $wgrib2_flags $match \"$template_file[$inv]\"  |") or die "unexpect fail $wgrib2 $template_file[$inv]\n";
         print "scanning $template_file[$inv] (process=$count)\n";
         scan_inv();
      }
   }

#  child processes write $index_buffer to parent

   if ($pid == 0) { 
      print { $write[$count] } pack("NNN", $count, $nmissing, $nfound);
#     only write index file if found a match
      if ($nfound != 0) { print { $write[$count] } $index_buffer; }
      exit 0;
   }

#  parent process reads $index_buffer from all child process
#  and merges index_buffer

   print "merge index files\n";
   for ($count = 1; $count < $nthreads; $count++) {
      read  $read[$count], $i, 4;
      $j = unpack("N", $i);
      if ($j != $count) { print "programming error, pipe mismatch\n"; }

      read  $read[$count], $i, 4;
      $j = unpack("N", $i);
      $nmissing += $j;

      read  $read[$count], $i, 4;
      $j = unpack("N", $i);
      $nfound += $j;

      if ($j != 0) {
         $i = read  $read[$count], $temp_index_buffer, 4*$size;
      }

#      print "read pipe $count bytes=$i nfound=$j undef=$undef\n";

      for ($i = 0; $i < $size; $i++) {
         $j = vec($temp_index_buffer, $i, 32);
         if ($j != $undef_vec) { vec($index_buffer, $i, 32) = $j;}
      }
      waitpid($pids[$count], 0);
   }

   # write index file
   print "writing out index\n";
   print INDEX pack("NN", 1, $size);
   print INDEX $index_buffer;
}


# --------------------- start of various routines -----------------------

sub scan_inv {

      my ($num, $loc, $npts, $date, $id, $t, $j);

      while (<INV>) {
         chomp;
         if ($verbose > 2) { print "wgrib2 inv=$_\n"; }
         ($num,$loc,$npts,$date,$id) = split(/:/, $_, 5);
         $npts =~ s/npts=//;
         $npts = $npts + 0;
         $date =~ s/.*=//;
         # convert YYYYMMDDHHMMSS -> YYYYMMDDHHMM
         $date = substr($date,0,12);
         if ($good_npts == 0) { $good_npts = $npts; }

         # get submessage number
         if ($num =~ /\.(\d*)/) { $num = $1; }
         else { $num = 1; }

         if ($verbose > 2) { print "submsg num=$num loc=$loc date=$date id=$id\n"; }
         $t = -1;
         if (($good_npts == $npts) && defined($i = $var_num{$id}) && defined($t = $time_stamp{$date}) ) {
            if ($verbose > 0) { print "MATCH $id $date\n"; }
            $j = 2*($i + $nfields * $t);
            vec($index_buffer, $j, 32) = $loc;
            vec($index_buffer, $j+1, 32) = $num;
            $nfound++;
         }
         else {
            if ($verbose > 0) { print "NO MATCH $id $date\n"; }
            $nmissing++;
         }
      }
      close(INV);
}


sub resolve_dset_index {

#  dset and index could have a caret, resolve relative to ctl file
   if (substr($index,0,1) eq '^' || substr($dset,0,1) eq '^') {
      if ($ctl =~ /\//) {
         $ctldir = $ctl;
         $ctldir =~ s/\/[^\/]+$/\//;
      }
      else {
         $ctldir = '';
      }
      if (substr($index,0,1) eq '^') { $index =~ s/\^/$ctldir/ };
      if (substr($dset,0,1) eq '^') { $dset =~ s/\^/$ctldir/ };
      if ($verbose > 1) {print "ctldir=$ctldir index=$index dset=$dset\n"; }
   }
}

# make a hash of possible time stamps
sub make_timestamps {
   my ($nt, $date0, $inc, $i, $j, $jj);
   my ($year, $month, $day, $hour, $minute, $unit, $n);
   $nt = $_[0];
   $date0 = $_[1];
   $inc = $_[2];
   $time_stamp{$date0} = 0;
   if ($nt == 1) { return; }

   $year = substr($date0,0,4);
   $month = substr($date0,4,2);
   $day = substr($date0,6,2);
   $hour = substr($date0,8,2);
   $minute = substr($date0,10,2);
   $inc =~ /(\d+)([a-zA-Z]+)/ or die "make_timestamps: fatal error bad increment $inc\n";
   $n = $1;
   $unit = $2;
   $unit =~ tr/A-Z/a-z/;

   if ($unit =~ /yr/) {
      for ($i = 1; $i < $nt; $i++) {
         $year += $n;
         $time_stamp{sprintf "%4d%2.2d%2.2d%2.2d%2.2d", $year, $month, $day, $hour, $minute} = $i;
      }
      return;
   }
   if ($unit =~ /mo/) {
      for ($i = 1; $i < $nt; $i++) {
         $month += $n;
         if ($month > 12) { 
             $j = floor(($month-1)/12);
             $year += $j;
             $month = $month - 12*$j;
         }
         $time_stamp{sprintf "%4d%2.2d%2.2d%2.2d%2.2d", $year, $month, $day, $hour, $minute} = $i;
      }
      return;
   }

#  convert to minutes

   if ($unit eq 'hr') { $n *= 60; $unit = 'mn'; }
   elsif ($unit eq 'dy') { $n *= 24*60; $unit = 'mn'; }
   else { die "bad unit $unit\n"; }

   for ($i = 1; $i < $nt; $i++) {
      $minute += $n;
      if ($minute >= 60) {
         $j = floor($minute/60);
         $minute -= 60*$j;
         $hour += $j;
         if ($hour >= 24) {
            $j = floor($hour/24);
            $hour -= 24*$j;
#           $j is number of days to add
            $j += julian($year,$month,$day);
            while ($j > 365 + leap_year($year)) {
                $j -= (365 + leap_year($year));
                $year++;
            }
            $jj = unjulian($j,$year);
            $month = substr($jj,0,2);
            $day = substr($jj,2,2);
         }
      }
      $time_stamp{sprintf "%4d%2.2d%2.2d%2.2d%2.2d", $year, $month, $day, $hour, $minute} = $i;
   }
}

#
# convert from gdate to YYYYMMDDHHMM format
#

sub from_gdate {
   my ($gdate, $min, $hour, $day, $mon, $year);
   $gdate = $_[0];
   $_ = $gdate;
 
   $min = 0;
   $hour = 0;

#  get hour and optional minutes 
   if (/^(\d+)+Z/i) {
      $hour = $1;
      $_ = $';
   }
   elsif (/^(\d+):(\d+)Z/i) {
      $hour = $1;
      $min = $2;
      $_ = $';
   }

#  get day month and year
   /(\d+)([a-zA-Z]{3})(\d+)/ or die "from_gdate fatal error: $gdate\n";
   $day = $1;
   $mon = $2;
   $year = $3;

   if (/jan/i) { $mon=1; }
   elsif (/feb/i) { $mon=2; }
   elsif (/mar/i) { $mon=3; }
   elsif (/apr/i) { $mon=4; }
   elsif (/may/i) { $mon=5; }
   elsif (/jun/i) { $mon=6; }
   elsif (/jul/i) { $mon=7; }
   elsif (/aug/i) { $mon=8; }
   elsif (/sep/i) { $mon=9; }
   elsif (/oct/i) { $mon=10; }
   elsif (/nov/i) { $mon=11; }
   elsif (/dec/i) { $mon=12; }

   return sprintf "%4d%2.2d%2.2d%2.2d%2.2d", $year, $mon, $day, $hour, $min;
}


#
# scan_ctl
#
# this routine scans the ctl file
# and sets the needed variables
#
# time variables
#  $ntime, $start_time, $inc_time
#
# level variables
#  $nlevels               number of levels
#  $level[0..$nlevels-1]  level values
#
# variables
#
#  $var_id[$i]
#  $var_num{$id}
#
#
# $nfields = number of fields (var + levels) per time

sub scan_ctl {

   my ($tmp, $i, $a, $b);

   open(CTL,"<", $ctl) or die "Could not open ctl file: $ctl";
   $dtype = 0;
   $pdef = 0;
   $calendar=366;
   $pascals=0;
   while (<CTL>) {
      chomp;

#     can have more than one option per line

      if (/^options.*\s365_day_calendar/i) { 
         $calendar=365;
         if ($verbose > 1) { print "365 day calendar\n"; }
      }
      if (/^options.*\spascals/i) { 
         $pascals=1;
         if ($verbose > 1) { print "pascals\n"; }
      }

#     scan lines
      if (/^tdef /i) {
         /^tdef +(\d+) +linear +(\S+) +(\S+)/i or die "failed scanning tdef:  $_";
         $ntime=$1;
         $start_time=$2;
         $inc_time=$3;
         print "tdef:  nt=$ntime start=$start_time by=$inc_time\n";
      }

      elsif (/^zdef /i) {
         $_=~ s/^zdef +(\d+) +levels//i or die "failed scanning zdef:  $_";

         $nlevels=$1;
         print "zdef: nlevel=$nlevels\n";
         for ($i = 0; $i < $nlevels; $i++) {
#           if empty line, read another
	    if (! / *\S+/) { 
               chomp;
               if (!(defined($_ = <CTL>))) {
                  die "failed scanning zdef:\n";
               }
            }
#           need to read a level
	    / *(\S+)/ or die "failed scanning zdef:\n";
            $a = $1;
            $_ = $';
            $level[$i] = $a+0;
            if ($verbose > 1) { print "level[$i]=$level[$i]\n"; }
         }
      }

      elsif (/^vars /i) {
         /^vars +(\d+)/i or die "failed scanning vars:  $_";
         $nvars = $1;
         $nfields = 0;
         for ($i = 0; $i < $nvars; $i++) {
            if (!(defined($_ = <CTL>))) {
               die "vars: end of control file\n";
            }
            chomp;
            if (/^ *endvars/i) {
               die "vars: not enough variables\n";
            }
            / *(\S+) +(\d+) *\d+ *"([^"]+)"/ or die "vars: problem with $_\n";

            # $name = $1;
            $nlevs = $2;
            $id = $3;
            if ($verbose > 1) { print "vars: $1 $2 $id\n"; }

            if ($nlevs == 0) {
               $var_id[$nfields] = $id;
               $var_num{$id} = $nfields;
               $nfields++;
            }
            else {
               if ($nlevs < 0 || $nlevs > $nlevels) { die "vars: number of levels is too big $nlevs\n"; }
               $idd=$id;
#              need to quote % except for %s(blandk)
               $idd =~ s/%/%%/g;
               $idd =~ s/%%s/%s/g;
               $count = 1;
               if ($count =~ m/%s.*%s/) { $count = 2; }
#
               for ($j = 0; $j < $nlevs; $j++) {
# for hply        $tmp = sprintf($idd, $level[$j]);
#                  $tmp = sprintf($idd, $level[$j], $level[$j]+1);
                  if ($count == 1) { $tmp = sprintf($idd, $level[$j]); }
                  else { $tmp = sprintf($idd, $level[$j], $level[$j]+1); }
                  $var_id[$nfields] = $tmp;
                  $var_num{$tmp} = $nfields;
                  $nfields++;
               }
            }
         }
         if (!(defined($_ = <CTL>))) {
            die "vars: end of control file\n";
         }
         /^ *endvars/i or die "vars: expecting endvars\n";
         if ($verbose > 1) { print "number of fields=$nfields\n"; }
      }
      elsif (/^xdef /i) {
         if ($verbose > 1) { print "xdef:  $_\n"; }
      }
      elsif (/^ydef /i) {
         if ($verbose > 1) { print "ydef:  $_\n"; }
      }
      elsif (/^pdef /i) {
         print "pdef:  $_";
         $pdef++;
      }
      elsif (/^dtype grib2/i) {
         print "dtype:  $_\n";
         $dtype++;
      }
      elsif (/^index /i) {
         /^index +(\S.*)/i or die "failed scanning index:  $_";
         $index=$1;
         if ($verbose > 1) { print "index:  $index\n"; }
      }
      elsif (/^dset /i) {
         /^dset +(\S.*)/i or die "failed scanning dset:  $_\n";
         $dset=$1;
         if ($verbose > 1) { print "dset:  $dset\n"; }
      }
      elsif (/^\* wgrib2 inventory flags: (.*)/) {
         $wgrib2_flags=$1;
         print "wgrib2_flags=$wgrib2_flags\n";
      }
      elsif (/^\* wgrib2 inv suffix:\s*(\S*)/) {
         $wgrib2_inv=$1;
         print "wgrib2_inv=$wgrib2_inv\n";
      }
      elsif (/^\* alt_gmp options: update=(\S*)/) {
         $update=$1;
      }
      elsif (/^\* alt_gmp options: nthreads=(\S*)/) {
         $nthreads=$1;
      }
      elsif (/^\* alt_gmp options: match=(.*)/) {
         $match=$1;
      }
   }
   close(CTL);
}


# make a list of templated files along with valid time range

sub mk_template_list {

   my ($dset, $time, $inctime, $fhr, $i, $tmp, $n);
   my ($y4,$y2,$m2,$d2,$f2,$f3);

   $dset = $_[0];
   $inctime = $_[1];
   print "resolve_dsets dset=$dset inctime=$inctime\n";

   $_ = $dset;
   if (! /%/) {
       $template_file[0] = $dset;
       $template_file_min[0] = 0;
       $template_file_max[0] = $ntime-1;
       print "resolve_dsets: no template\n";
       return;
   }

   print "resolve_dsets template dset=$dset\n";
   $_ = $dset;
   $y4 = /%y4/;
   $y2 = /%y2/;
   $m2 = /%m2/;
   $d2 = /%d2/;
   $h2 = /%h2/;
   $n2 = /%n2/;
   $f2 = /%f2/;
   $f3 = /%f3/;
   if ($f2 || $f3) {
      $inctime =~ s/hr$// or die "%f2 needs dt-units of hr\n";
      $fhr = $inctime + 0;
   }

   $n = 0;
   foreach $time (sort keys(%time_stamp)) { 
      $_ = $dset;
      $i = $time_stamp{$time};

      if ($y4) { $tmp = substr($time,0,4); s/%y4/$tmp/g; }
      if ($y2) { $tmp = substr($time,2,2); s/%y2/$tmp/g; }
      if ($m2) { $tmp = substr($time,4,2); s/%m2/$tmp/g; }
      if ($d2) { $tmp = substr($time,6,2); s/%d2/$tmp/g; }
      if ($h2) { $tmp = substr($time,8,2); s/%h2/$tmp/g; }
      if ($n2) { $tmp = substr($time,10,2); s/%n2/$tmp/g; }
      if ($f2) { 
          $tmp = sprintf "%2.0f", $i * $fhr;
          $tmp =~ s/ /0/g;
          s/%f2/$tmp/g;
      }
      if ($f3) { 
          $tmp = sprintf "%3.0f", $i * $fhr;
          $tmp =~ s/ /0/g;
          s/%f3/$tmp/g;
      }

      if ($n == 0) {
          $template_file[$n] = $_;
          $template_file_min[$n] = 0;
          $template_file_max[$n] = 0;
          $n++;
      }
      else {
          if ($_ eq $template_file[$n-1]) {
              $template_file_max[$n-1]++;
          }
          else {
             $template_file[$n] = $_;
             $template_file_min[$n] = $i;
             $template_file_max[$n] = $i;
             $n++;
          }
      }
   }

#   for ($i = 0; $i < $n; $i++) {
#      print "$i $template_file[$i] - $template_file_min[$i]  $template_file_max[$i]\n";
#   }
   return;
}

#
# leap year: 0/1
#
sub leap_year {
   my ($year);
   $year = $_[0];
   if ($calendar == 365) { return 0; }
   if ($year % 4 != 0) { return 0; }
   if ($year % 400 == 0) { return 1; }
   if ($year % 100 == 0) { return 0; }
   return 1;
}

#
# return the Julian day 1..365/366
# julian(year, month, day)
#
sub julian {

   my ($year,$mon, $day, $n);

   $year=$_[0];
   $mon=$_[1];
   $day=$_[2];

   $n=substr(" 000 031 059 090 120 151 181 212 243 273 304 334",($mon-1)*4,4);
   $n = $n + $day;

   if ($calendar eq '365') { return $n; }
   
   if ($mon > 2) { $n += leap_year($year); }

   return $n;
}

# unjulian(julian_day,year)
# julian day (1..366) to "MMDD"

sub unjulian {

   my  ($julian, $n, $mon, $day, $year, $leap);
   $julian=$_[0];
   $year=$_[1];
   $leap = leap_year($year);
   if ($leap == 1) {
       if ($julian == 31 + 29) { return "0229"; }
       if ($julian > 31 + 28) { $julian--; }
   }

   for ($mon = 12; $mon > 0; $mon--) {
      $n=substr(" 000 031 059 090 120 151 181 212 243 273 304 334",($mon-1)*4,4);
      if ($julian > $n) {
          $day = $julian - $n;
          return sprintf("%2.2d%2.2d", $mon, $day);
      }
   }
   die "fatal error in unjulian\n";
}