#!/usr/bin/perl

use Getopt::Long;
use File::Copy;

sub usage() {
    print "Usage: perl $0 [-listfile=<listfile>][-directory=<directory>][-product=<product>]\n";
    print "Example:\n";
    print "perl $0 -listfile=list.txt -directory=/dir/subdir -product=TLFE2007\n";
    print"where list.txt is the filename of the list file containg the names of the files to validated,\n";
       print "/dir/subdir is the directory containing the files to be validated\n";
       print "and TLFE2007 is the product.\n";
}
%gOptions;
&GetOptions(\%gOptions, qw(listfile=s directory=s  product=s));
unless ($gOptions{listfile} and $gOptions{directory} and $gOptions{product}) {
        usage();
        exit(1);
};
#---------------------------------------------file mangement section----------------------------------------------------------------------------------------
$Infile = $gOptions{listfile};
chomp($Branch = $gOptions{product});
chomp($indir= $gOptions{directory});

if ($indir eq "mtdata"){
$indir = "/mtdata/geo/gpms/input/mif";
}elsif ($indir eq "nfs"){
$indir ="/nfs/gpms/input/mif";
}elsif ($indir eq "input"){
$indir ="/home/mccre004/input_files";
};

$root="/mt/apps";
$rootout="/mtdata/geo";
#$indir="$rootout/gpms/input/mif";
$inlist="$rootout/gpms/input/list";
$outputdir ="$rootout/gpms/output/mrf";
$errlogdir ="$rootout/gpms/log";
$combolist = $inlist . "/" . $Infile;
$stylesheet ="$root/gpms/xsl/MIF2MRF.xsl";
$FGDCdir="$rootout/gpms/output/fgdc";

#------------------------------------------------------error trap--------------------------------------------------------------------------------------------

if (-e $indir){
print "Please Wait, Now processing the files ----------------------------------------------\n\n";
#print "for the $indir directory\n";
}else{
print "The directory $indir  could not be found! \n";
print "Please make sure the pathname, $indir, was spelled correctly and that it exists.\n";
exit(1);
};

if (-s $indir){
}else{
print "The directory $indir does not contain any files to be validated! \n";
exit(1);
};

if (-e $combolist){
#print "Please Wait, Now processing -----------------------------------------------\n";
}else{
print "The file $combolist could not be found! \n";
print "Please make sure the file, $Infile,  was spelled correctly and that the file is in the $inlist directory.\n";
exit(1);
}; 

#making sure the file list contains something
if (-s $combolist){
#comment
}else{
print "The file $combolist does not contain the list of files to be validated! \n";
print "Please make sure the file, $Infile,  contains a list of files to be validated.\n";
exit(1);
};

#------------------------------------getting the date---------------------------------------------------------------------------------------------------------
my $now=`date`;
$mon = substr($now,4,3);
$day=substr($now,8,2);
$year =substr($now,24,4);
#$time= substr($now,11,5);
$hour = substr($now,11,2);
$min = substr($now,14,2);

if ($day < 10){
$daynuma = substr($day,1,1);
$daynum = "0$daynuma";
}else{
$daynum=$day;
};

#converting the date to a number
if($mon eq "Jan") {
$monnum="01";
}elsif($mon eq "Feb"){
$monnum= "02";
}elsif($mon eq"Mar"){
$monnum="03";
}elsif($mon eq "Apr"){
$monnum="04";
}elsif($mon eq "May"){
$monnum="05";
}elsif($mon eq "Jun"){
$monnum="06";
}elsif($mon eq "Jul"){
$monnum="07";
}elsif($mon eq "Aug"){
$monnum="08";
}elsif($mon eq "Sep"){
$monnum="09";
}elsif($mon eq "Oct"){
$monnum="10";
}elsif($mon eq "Nov"){
$monnum="11";
}elsif ($mon eq "Dec"){
$monnum="12";
};

#$dateString =$year . $monnum . $daynum . ":" .  $time;
$dateString =$year . $monnum . $daynum . $hour . $min;
$time = $hour . ":" . $min;

if ($hour < 12){
$ampm ="am";

if ($hour != 0){
$newhour = $hour;
$ampm ="am";
}else{
$newhour=12;
};

}else{
$newhour = $hour - 12;
if ($newhour == 0){
$newhour=12;
};

$ampm = "pm";
};

$newtime = $newhour . ":" . $min . " $ampm";

#-----------------------------------------------------making the directories---------------------------------------------------------------------------
$mrf ="MRF_$Branch";
$newdir=$Branch . $dateString;
system "mkdir $outputdir/$newdir";
$newerrdir ="$errlogdir/$newdir";
system "mkdir $newerrdir";
chmod 0777, "$outputdir/$newdir";
chmod 0777, "$newerrdir";
system "mkdir $FGDCdir/$newdir";
chmod 0777, "$FGDCdir/$newdir";

#---------------------------------------------------------------------Counter variable Management section------------------------------------------
$badfile_counter2=0;
$goodfile_counter=0;
$countere=0;
$flag_array_counter=0;
#----------------------------------------------------------Saxon-----------------------------------------------------------------------------------------
$ENV{'PATH'} = "/opt/saxon_lib/jdk1.5.0_11/bin:/mt/apps/gpms/output/mrf$newerrdir:$ENV{'PATH'}";#goodone
$cp = "CLASSPATH";#goodone
$ENV{$cp} = "/opt/saxon_lib/jdk1.5.0_11/lib/:/opt/saxon_lib/saxon9/saxon9sa.jar:/opt/saxon_lib/saxon9/saxon9-jdom.jar:/opt/saxon_lib/saxon9/:/opt/saxon_lib/saxon9/saxon9.jar:$ENV{$cp}";
#---------------------------------------redirecting standard output section------------------------------------------------------------------------------


#redirecting STDOUT because the Saxon program automatically sends all its output to STDOUT
# Saving a copy of STDOUT as SAVEOUT so that items can still be printed to the screen
open (SAVEOUT,">&STDOUT");

#-----------------------------------------Filename array creation section------------------------------------------------------------------------------
#open a filehamdle for the file containing the filenames
open INPUTFILE, "$combolist";
while(<INPUTFILE>){
$slashpos= rindex($_, "/");

if ($slashpos < 0){
$flagpos=index($_,"xml");
$flagless=substr($_, 0,$flagpos+3);
$flag=substr($_,$flagpos+4);
$ppos=index($flag,"p");
$filename="$indir/$flagless";

if ($ppos>=0){
$filename[$first_counter]="$indir/$flagless";
$flagarray[$first_counter]="$flag";
$flaghash{$filename}="$flag";
$first_counter++;
} else{
$noflagarray[$flag_array_counter]=$_;
$flag_array_counter++;
};


}elsif($slashpos > 0){
$infile=$_;
$dotloc=index($infile,".")+4;
$slashfilename = substr($infile,0,$dotloc);
$flagpos=index($infile,"xml");
$flag=substr($infile,$flagpos+4);
#print "\$flag = $flag\n";
$flaglen =length($flag);
$ppos=index($flag,"p");
#print "\$ppos = $ppos\n";
$flag2=substr($flag,0,1);

if ($ppos>=0){
$filename[$first_counter]=$slashfilename;
$flagarray[$first_counter]="$flag";
$flaghash{$slashfilename}="$flag";
#print "------------------------------\n";
#print "\$filename[$first_counter] = $filename[$first_counter]\n";
#print "\$flaghash{$slashfilename} = $flaghash{$slashfilename}\n";
#print "------------------------------\n\n";
$first_counter++;
} else{
$noflagarray[$flag_array_counter]=$_;
$flag_array_counter++;
};

};
};



#------------------------------------------validation section--------------------------------------------------------------------------------------------------------
$valcounter=0;

foreach $filename (@filename){
chomp($filename);
#setting up the variables necessary to make the the finalfilename variable, which is needed to make the MRF file
$slashpos= rindex($filename, "/") +1;
$newfilename = substr($filename,$slashpos);
$dotlocation = index($newfilename, ".");
$finalfilename=substr($newfilename, 0, $dotlocation);
#setting up the TMP and MRF filenames

#if (-e $TMP){
if ($valcounter >0){
close STDERR;
system "rm $TMP";
};

$MRF = $finalfilename . ".mrf";
$TMP = "TMP" . "-" . $newfilename;
$valcounter++;


open STDOUT,">$TMP";

#making the error file and redirecting the STDERR to the error file
#have a new $dotlocation variable because the $filename and $newfilename variables are different lengths
$errmain= substr($newfilename, 0, $dotlocation);
$errfile = $errmain .".err";

open STDERR, ">$errfile";

#starts schema aware transformation. This will cause the transformation to fail if the document is invalid
# the -vw option indicates that validation errors found when validating the result tree should be treated as warnings only.
#This option is available only with the Saxon-SA version of the command, com.saxonica.Transform.
printf SAVEOUT "Now validating $filename\n"; 
my @csax = "java com.saxonica.Transform -vw $filename $root/gpms/xsl/MIF2MRF.xsl";
my $mif = system(@csax);
#print "\$mif = $mif\n";
#opening fielhandles so that *.xsd will be replaced by Repository.xsd
#basically replicating the sed command 
close STDERR;
open MRFOUTPUT, ">$MRF";
open TMPFILE, "<$TMP";
#printf SAVEOUT "\$mif = $mif\n";

if ($mif > 0) {
print"error\n";
$badfile[$badfile_counter] = $errfile;
$badfile_counter++;
#copy(
unlink $MRF;
unlink $TMP;
}else{
unlink $errfile;
$goodfile[$goodfile_counter]= $MRF;
$goodflaghash{$MRF}=$flaghash{$filename};
$npos=index($flaghash{$filename},"n");
#printf SAVEOUT "\$npos = $npos\n";
if ($npos==0){
$nopassarray[$no_pass_counter]=$goodfile[$goodfile_counter];
$no_pass_counter++;
}elsif ($npos<0){
$passarray[$pass_counter]=$goodfile[$goodfile_counter];
$pass_counter++;
#printf SAVEOUT"\$pass_counter = $pass_counter\n";
#printf SAVEOUT $passarray[$pass_counter];
};



$goodfile_counter++;
chmod 0777, "$outputdir/$newdir/$MRF";
unlink $errfile;
 while(<TMPFILE>){
  if($_ =~ /xsd/){
   #this happens if the the line has xsd in it
   $xsdlocation =index($_, "/xsd");
   $originalstring= substr($_,0,$xsdlocation);
   $Repository = "xsd/Repository.xsd\">\n";
   printf MRFOUTPUT ("$originalstring/$Repository");
  }elsif ($_ =~ /<\/MRF>/){
   printf  MRFOUTPUT ("$_");
   printf  MRFOUTPUT "\n";
   }elsif($_=~ /%/){
        chomp($_);
	$perloc=index($_, "%");
	$firsthalf =substr($_,0,$perloc);
	$secondhalf =substr($_, $perloc+1);
	$perloc2=index($secondhalf, "%");
		if ($perloc2 < 0){ 
			printf  MRFOUTPUT"$firsthalf" . "%%" . "$secondhalf";
		}elsif($perloc2 >0){
			$thirdhalf =substr($secondhalf,0,$perloc2);
			$fourthhalf =substr($secondhalf, $perloc2+1);
                        $newstring = "$firsthalf" . "%%" . "$thirdhalf" . "%%" . "$fourthhalf\n";
if ($newstring=~/&#147;/){
                           $amploc1=index($newstring, "&#147;");
                           $amploc2 =index($newstring, "&#148;");
                           $amp1string = substr($newstring,0,$amploc1);
                           $midstringlength= $amploc2 - $amploc1-6;
                           $amp2string = substr($newstring,$amploc1+6,$midstringlength);
                           $amp3string = substr($newstring,$amploc2+6);
                           printf  MRFOUTPUT $amp1string . "\"" . $amp2string . "\"" . $amp3string;
                         }else{
                        printf  MRFOUTPUT"$firsthalf" . "%%" . "$thirdhalf" . "%%" . "$fourthhalf\n";
                         };;#if ($newstring=~/&#147;/){
                         };#if ($perloc2 <  0){
 }elsif($_=~/&#147;/){
$amploc1=index($_, "&#147;");
 $amploc2 =index($_, "&#148;");
 $amp1string = substr($_,0,$amploc1);
 $midstringlength= $amploc2 - $amploc1-6;
 $amp2string = substr($_,$amploc1+6,$midstringlength);
 $amp3string = substr($_,$amploc2+6);
 printf  MRFOUTPUT $amp1string . "\"" . $amp2string . "\"" . $amp3string;

}elsif($_=~/&#146;/){
$aposloc1 = index($_,"&#146;");
$aposstring1 = substr($_,0,$aposloc1);
$aposstring2 = substr($_,$aposloc1+6);
#print SAVEOUT "\$aposstring1 = $aposstring1\n";
#print SAVEOUT "\$aposstring2 = $aposstring2\n";
#printf SAVEOUT $aposstring1 . "'" . $aposstring2;
printf  MRFOUTPUT $aposstring1 . "'" . $aposstring2;

	
  }else{
   printf  MRFOUTPUT "$_";
  };#if xsd
  };#while<TMPFILE>
close MRFOUTPUT;
close TMPFILE;
#close STDERR;
system "rm $TMP";
};#if mif>0
unlink $TMP 
};#foreach

unlink $TMP;
printf SAVEOUT "\n";

#------------------------------error filehandling section----------------------------------------------------------------------------------------------------

foreach $badfile (@badfile){
open BADFILEH, "$badfile";
$bad="no";
$slashpos= rindex($badfile, "/")+1;
$wheredot2 = index($badfile, ".");
$MRF_Main_name = substr($badfile,$slashpos,$wheredot2);
$name = "$MRF_Main_name.xml";
$bad ="no";
if (-s $badfile == 0){
$not_exist[$countere]= "$name";
printf  MRFOUTPUT "Here!";
unlink "$badfile";
$countere++;
$bad ="yes";
}else{
while(<BADFILEH>){
if ($_=~ /does not exist/){
$not_exist[$countere]= "$name";
printf  MRFOUTPUT "not found!\n";
unlink "$badfile";
$countere++;
$bad ="yes";
#file not existing not being reported to the error file
}elsif (-s $badfile == 0 and $badfile2[$badfile_counter2-1] ne $name and $bad ne "yes"){
$not_exist[$countere]= "$name";
printf  MRFOUTPUT "Here!";
unlink "$badfile";
$countere++;
$bad ="yes";
printf SAVEOUT "Emptyfile";
}elsif($badfile2[$badfile_counter2-1] ne $name and $bad ne "yes"){
$name = "$MRF_Main_name.err";
$badfile2[$badfile_counter2] = "$name";
$badfile_counter2++;
$bad ="no";
system "cp $badfile $newerrdir/$name";
unlink $badfile
};#if
}#s if
};#while
close BADFILEH
};#foreach

#--------------------------------------final variables counting section--------------------------------------------------------------------------------
$total = $badfile_counter2 + $goodfile_counter;
$newtotal=$total+$countere+$flag_array_counter;
$pergood = (($goodfile_counter)/$total) *100;
$perbad = (($badfile_counter2)/$total) *100;

#-------------------------------working with the log file----------------------------------------------------------------------------------------------
#$year . $monnum . $daynum . ":" .  $time
#open STDOUT,">$TMP";
open LOG, ">$errlogdir/$newdir/$newdir.log";
printf LOG "This log details the transformation and validation results of the $newtotal mif files contained in \n  $combolist.\n";
printf LOG "It was run on $monnum/$daynum/$year at $time($newtime) for the $Branch product.\n\n";

#--------------------------indicator section-----------------------------------------------------------------------------------------------------------
if ($countere == 1) {
	printf SAVEOUT "The following file could not be found!\n";
	printf LOG "The following file could not be found!\n";
		foreach $not_exist (@not_exist){
			printf SAVEOUT "$countere: $not_exist\n";
			printf LOG "$countere: $not_exist\n"; 
		};
	printf SAVEOUT"\n";
	printf LOG "\n";		
}elsif ($countere >1) {
	printf SAVEOUT "The following files could not be found!\n";
	printf LOG "The following files could not be found!\n";
	foreach $not_exist (@not_exist){
		$notexistcounter++;
		printf SAVEOUT "$notexistcounter: $not_exist\n";
		printf LOG "$notexistcounter: $not_exist\n";
	};
	printf SAVEOUT"\n";
	printf LOG"\n";
};

$counter=1;

if ($flag_array_counter ==1){
	$flagfile="FailedFiles";
	#printf SAVEOUT "\$flagfile = $flagfile";
	open FLAGLIST, ">$errlogdir/$newdir/$flagfile";
        #to the screen
	printf SAVEOUT "The following file does not have a FGDC posting flag and therefore can not be processed. Please put a space and a p (for post) or np (for not post)
after the filename. A list containg this file can be found in $flagfile in the \n";
	printf SAVEOUT "$errlogdir/$newdir directory.\n";
	#to the logfile
	printf LOG  "The following file does not have a FGDC posting flag and therefore can not be processed. Please put a space and a p (for post) or np (for not post)
after the filename.\n";
	printf LOG  "A file called $flagfile has a list with this file in it for you to flag.\n";	
		foreach $noflagarray (@noflagarray){
			printf SAVEOUT "$counter: $noflagarray\n";
			printf LOG "$counter: $noflagarray\n"; 
			printf FLAGLIST "$noflagarray";
		};
	close FLAGLIST;	
}elsif ($flag_array_counter >1) {
	$flagfile="FailedFiles";
	 #printf SAVEOUT "\$flagfile = $flagfile\n";
	 open FLAGLIST, ">$errlogdir/$newdir/$flagfile";
	#to the screen
	 printf SAVEOUT "The following files do not have a FGDC posting flag and therefore can not be processed. Please put a space and a p (for post) or np (for not post)
after the filename. A list of these files can be found in $flagfile in the \n";
	 printf SAVEOUT "$errlogdir/$newdir directory.\n";
	 #to the logfile
	 printf LOG  "The following files do not have a FGDC posting flag and therefore can not be processed. Please put a space and a p (for post) or np (for not post)
after the filename. A list of these files can be found in $flagfile in the $errlogdir/$newdir directory.\n";
	foreach $noflagarray (@noflagarray){ 
			printf SAVEOUT "$counter: $noflagarray";
			printf LOG "$counter: $noflagarray";
			printf FLAGLIST "$noflagarray";
			$counter++;
		};
	close FLAGLIST;
	printf SAVEOUT"\n";
	printf LOG"\n";
};

$counter = 1;

#printf SAVEOUT out the files that passed validation
if ($goodfile_counter >0){
printf SAVEOUT "$goodfile_counter or $pergood%% of your files have passed validation\n\n";
printf LOG "$goodfile_counter or $pergood%% of your files have passed validation\n\n";
}elsif($goodfile_counter == 0){
printf SAVEOUT "No good files\n";
rmdir "$outputdir/$newdir";
rmdir "$FGDCdir/$newdir";
};

#reset the couter
$counter =1;

if($no_pass_counter >0){

if ($no_pass_counter == 1){
printf SAVEOUT "Of these $goodfile_counter files, $no_pass_counter file has been transformed in the MRF_filename versions. This file WAS NOT selected to be posted ";
printf LOG "Of these $goodfile_counter files, $no_pass_counter file has been transformed in the MRF_filename versions. This file WAS NOT selected to be posted ";
}elsif ($no_pass_counter >1){
printf SAVEOUT "Of these $goodfile_counter files, $no_pass_counter files have been transformed in the MRF_filename versions. These files WERE NOT\nselected to be posted ";
printf LOG "Of these $goodfile_counter files, $no_pass_counter files have been transformed in the MRF_filename versions. These files WERE NOT\n";
printf LOG "selected to be posted ";
};

printf SAVEOUT "on the Census Metadata Server. ";
printf SAVEOUT "Please see the MRF_ filename versions of \nthese files in the  $outputdir/$newdir directory:\n";

printf LOG "on the Census Metadata Server. ";
printf LOG "Please see the MRF_ filename versions of\n these files in the $outputdir/$newdir directory:\n";

foreach $nopassarray (@nopassarray){
my $copy = system "cp $nopassarray $outputdir/$newdir/$nopassarray";
printf SAVEOUT "$counter: $nopassarray \n";
printf LOG "$counter: $nopassarray \n";

$counter++;
unlink $nopassarray;
};

printf SAVEOUT"\n";
printf LOG"\n";
$counter =1
};


if ($pass_counter >0){

if ($pass_counter ==1){
printf SAVEOUT "Of these $goodfile_counter files, $pass_counter file has been transformed into the MRF_filename and FGDC_filename versions. This file was selected\n";
printf LOG "Of these $goodfile_counter files, $pass_counter file has been transformed into the MRF_filename and FGDC_filename versions. This file was selected ";
}else{
printf SAVEOUT "Of these $goodfile_counter files, $pass_counter files have been transformed into the MRF_filename and FGDC_filename versions. These files were selected";
printf LOG "Of these $goodfile_counter files, $pass_counter files have been transformed into the MRF_filename and FGDC_filename versions. These files were selected ";
};

printf SAVEOUT " to be posted on the Census metadata server. Please see the MRF_ filename versions of these files in the";
printf SAVEOUT " $outputdir/$newdir directory\n";
printf SAVEOUT  "and the FGDC_ filename versions of these files in the \n$FGDCdir/$newdir directory:\n";

#printf LOG "Of these $goodfile_counter files, $pass_counter files have been transformed into the MRF_filename and FGDC_filename versions. These file were selected \n";
printf LOG "to be posted on the Census metadata server. Please see the MRF_ filename versions of these files in the ";
printf LOG "$outputdir/$newdir directory\n";
printf LOG "and the FGDC_ filename versions of these files in the \n$FGDCdir/$newdir directory:\n";

foreach $passarray (@passarray){
my $copy = system "cp $passarray $outputdir/$newdir/$passarray";
$fgdcdotloc = index($passarray, ".");
$mainpart=substr($passarray,0,$fgdcdotloc);
#print SAVEOUT "\$passarray = $passarray\n";
#print SAVEOUT "\$mainpart = $mainpart\n";
$FGDCname = "$mainpart.xml";
open STDOUT,">$FGDCdir/$newdir/$FGDCname";

my @csax = "java com.saxonica.Transform -vw $passarray $root/gpms/xsl/xsl4mrf2fgdc/MRF2FGDC.xsl";
my $mif = system(@csax);

printf SAVEOUT "$counter: $passarray and $FGDCname\n";
printf LOG "$counter: $passarray and $FGDCname\n";
 
close STDOUT;
$counter++;
unlink $passarray;
};
$counter =1
};


if ($badfile_counter2 > 0){
printf SAVEOUT "\n";
printf SAVEOUT "$badfile_counter2 or $perbad%% of your files have FAILED VALIDATION!\n";
printf SAVEOUT "Please see the err_files for the files named below in the \n$errlogdir/$newdir directory:\n";

#for the log file
printf LOG "\n";
printf LOG "$badfile_counter2 or $perbad%% of your files have FAILED VALIDATION!\n"; 
printf LOG "Please see the err_files for the files named below in the \n$errlogdir/$newdir directory:\n";

foreach $badfile2 (@badfile2){
printf SAVEOUT "$counter: $badfile2 \n";
printf LOG "$counter: $badfile2 \n";
$flagfile="FailedFiles";
open FLAGLIST, ">>$errlogdir/$newdir/$flagfile";
print FLAGLIST "$badfile2 \n";
$counter++;
};# end the foreach $badfile (@badfile){ loop

}elsif ($badfile_counter == 0){
};#end if loop

printf SAVEOUT "\n";
printf SAVEOUT "Please see the log file containing the list of files that passed and failed validation and the \nfiles for which the FGDC versions were made at:\n";
printf SAVEOUT "$errlogdir/$newdir/$newdir.log\n"; 
printf SAVEOUT "\n";

if ($flag_array_counter >1) {
printf SAVEOUT "Also, please see the list of files without flags in $errlogdir/$newdir/$flagfile.\n\n";
};


printf LOG "\n";
close LOG;

if ($badfile_counter2>0 or $flag_array_counter>0){
exit(2);
}else{
exit(0);
};
