This is the MOM5 component integrated in the IOW earth-system model.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

598 lines
18 KiB

#! /usr/bin/perl
# $Author: Niki.Zadeh $
# $Revision: 1.1.2.1 $
# $Date: 2013/12/18 17:47:53 $
# Perl script to parse the diag_table. Count the number of files to
# be used, and the max number of fields per file used.
use strict;
use Switch;
use List::Util qw/max/;
use XML::LibXML;
use Pod::Usage;
use Getopt::Long;
my $help = 0;
my $verbose = 0;
my $xmlFile = '';
GetOptions ("help|h" => \$help,
"verbose|v" => \$verbose,
"xml|x=s" => \$xmlFile) or pod2usage(2);
pod2usage(1) if $help;
# Variable to hold the location of the diag_table file.
my $diag_table_file = '';
# diag_table_chk can be called one of two ways. Either, the
# diag_table file is given on the command line, or we will extract the
# information from an XML file and experiment.
if ( ! $xmlFile ) { # If no XML file specified.
if ( $#ARGV < 0 ) {
pod2usage( { -message => "$0: diag_table file must be given as an argument.",
-verbose => 0,
} );
} else {
$diag_table_file = $ARGV[0]
}
} else { # We are using an XML file.
# Set up the XML Parser.
if ( $#ARGV < 0 ) {
pod2usage( { -message => "$0: experiment must be given as an argument.",
-verbose => 0,
} );
} else {
# Make sure the $xmlFile exists and is readable
die "File $xmlFile does not exist.\n" unless ( -e $xmlFile );
die "File $xmlFile exists, but is unreadable.\n" unless ( -r $xmlFile );
die "$xmlFile is not a file.\n" unless ( -f $xmlFile );
our $parser = XML::LibXML -> new();
our $root = $parser -> parse_file($xmlFile) -> getDocumentElement;
our $inputExperiment = $ARGV[0];
die "$0: Experiment $inputExperiment does not exist in file $xmlFile.\n" unless ( experimentExistsInXML($inputExperiment) );
$diag_table_file = getDiagTableFromXML($inputExperiment);
}
}
# Check if the diag table file exists, is not a directory and is readable.
die "$0: File $diag_table_file does not exist.\n" unless ( -e $diag_table_file );
die "$0: File $diag_table_file exists, but is unreadable.\n" unless ( -r $diag_table_file );
die "$0: $diag_table_file is not a file.\n" unless ( -f $diag_table_file );
# Announce what file we are going to read.
print "Reading file $diag_table_file\n\n";
# Open the file handler for the filename.
open(DIAG_TABLE, "<", $diag_table_file);
# Arrays to hold files and fields.
my @files = ( {
file_name => '',
output_frequency => 0,
output_frequency_units => 0,
output_format => 0,
time_units => 0,
long_name => '',
new_file_frequency => 0,
new_file_frequency_units => 0,
start_time_string => '',
file_duration => 0,
file_duration_units => 0,
} );
my @fields = ( {
file_name => '',
module_name => '',
field_name => '',
output_name => '',
time_sampling => '',
time_method => '',
spatial_ops => '',
packing => 0,
} );
# Other variables to hold useful information.
my %fields_per_file;
my @warnings = ( { line_number => 0,
message => '',
} );
my $tableName;
my @globalDate;
# Parse the data from the diag table file, and put it in the
# appropiate array.
while ( <DIAG_TABLE> ) {
my $line = sanitizeString($_);
next if ( $line =~ /^#/ or $line =~ /^$/ );
my @line_data = split(/,/,$line,11);
my $num_warnings = 0;
if ( $#line_data == 0 ) { # No Commas in string
# Find the descriptor and base date. Neither should have a comma.
my @date = split(/\s+/, sanitizeString($line_data[0]));
if ( $#date >= 1 ) { # We have a date.
my $message = verifyDate(@date);
if ( $message ) {
push @warnings, ( {
line_number => $.,
message => "Invalid global date. $message",
} );
} else {
@globalDate = @date;
}
} else { # We have the the descriptor / table name or the date may be set by the script
if ( $line_data[0] =~ /^\$.*[dD]ate$/ ) {
@globalDate[0] = $line_data[0];
} else {
$tableName = sanitizeString($line_data[0]);
}
}
} elsif ( $#line_data > 1 ) {
if ( $tableName =~ /^$/ or $globalDate[0] =~ /^$/ ) {
push @warnings, ( {
line_number => $.,
message => 'The table descriptor and the base date must be set before any files or fields.',
} );
$tableName = 'NOT SET' if ( $tableName =~ /^$/ );
$globalDate[0] = 'NOT SET' if ( $globalDate[0] =~ /^$/ ) ;
}
if ( lc($line_data[5]) =~ /time/ ) { # This is a file.
# Check output_frequency :: Must be >= -1
if ( $line_data[1] < -1 ) {
$num_warnings++;
push @warnings, ( {
line_number => $.,
message => 'Invalid output frequency. Must be >= -1.',
} );
}
# check output_frequency units :: return from find_unit_ivalue() > 0
if ( find_unit_ivalue($line_data[2]) < 0 ) {
$num_warnings++;
$line_data[2] =~ s/"//g;
push @warnings, ( {
line_number => $.,
message => "Invalid output frequency unit. ($line_data[2]).",
} );
}
# check output_format :: Must be in the range 1 <= output_format <= 2
if ( $line_data[3] < 1 or $line_data[3] > 2 ) {
$num_warnings++;
push @ warnings, ( {
line_number => $.,
message => "Output_format out of range. Must be in the range [1,2].",
} );
}
# check time_units :: return from find_unit_ivalue() > 0
if ( find_unit_ivalue($line_data[4]) < 0 ) {
$num_warnings++;
$line_data[4] =~ s/"//g;
push @warnings, ( {
line_number => $.,
message => "Invalid time unit. ($line_data[4]).",
} );
}
# The following are optional. (There may be a slight problem if the line ends with a ','.)
if ( $#line_data > 6 ) {
# Check new_file_frequency :: Must be > 0
if ( $line_data[6] < 0 ) {
$num_warnings++;
push @warnings, ( {
line_number => $.,
message => "Invalid new file frequency. Must be > 0.",
} );
}
# Check new_file_frequency_units :: return from find_unit_ivalue() > 0
if ( find_unit_ivalue($line_data[7]) < 0 ) {
$num_warnings++;
$line_data[7] =~ s/"//g;
push @warnings, ( {
line_number => $.,
message => "Invalid new file frequency unit. ($line_data[7]).",
} );
}
# More optional inputs
if ( $#line_data >= 8 ) {
$num_warnings++;
# remove quotes, beginning and ending space.
$line_data[8] =~ s/"//g;
$line_data[8] =~ s/^\s+//;
$line_data[8] =~ s/\s+$//;
my @start_time = split(/\s+/,$line_data[8]);
# Check start_time_string :: Must be valid date string
my $message = verifyDate(@start_time);
if ( $message ) {
push @warnings, ( {
line_number => $.,
message => "Invalid start time format. $message",
} );
}
# The last two optional inputs
if ( $#line_data > 8 ) {
# Check file_duration :: Must be > 0
if ( $line_data[9] < 0 ) {
$num_warnings++;
push @warnings, ( {
line_number => $.,
message => "Invalid file duration. Must be > 0.",
} );
}
# Check file_duration_units :: return from find_unit_ivalue() > 0
if ( find_unit_ivalue($line_data[10]) < 0 ) {
$num_warnings++;
$line_data[10] =~ s/"//g;
push @ warnings, ( {
line_number => $.,
message => "Invalid file duration unit. ($line_data[10]).",
} );
}
}
}
}
if ( $num_warnings == 0 ) {
push @files, ( {
file_name => sanitizeString($line_data[0]),
output_frequency => sanitizeString($line_data[1]),
output_frequency_units => sanitizeString($line_data[2]),
output_format => sanitizeString($line_data[3]),
time_units => sanitizeString($line_data[4]),
long_name => sanitizeString($line_data[5]),
new_file_frequency => sanitizeString($line_data[6]),
new_file_frequency_units => sanitizeString($line_data[7]),
start_time_string => sanitizeString($line_data[8]),
file_duration => sanitizeString($line_data[9]),
file_duration_units => sanitizeString($line_data[10]),
} );
$fields_per_file{$files[$#files]{file_name}} = 0;
# print "File found (",$files[$#files]{file_name},"), line ",$.,".\n";
}
} else { # This is a field.
# Make sure there are enough fields on the description line :: must be = 8.
if ( $#line_data != 7 ) {
$num_warnings++;
my $message;
# Are there too many?
if ( $#line_data > 7 ) {
$message = "Too many fields on field description line.";
} else { # Nope, too few.
$message = "Not enough fields on field description line.";
}
push @warnings, ( {
line_number => $.,
message => $message,
} );
}
# Verify that file_name exists in the files array
$line_data[3] =~ s/"//g;
$line_data[3] =~ s/^\s+//;
$line_data[3] =~ s/\s+$//;
my $notfound = 1;
for (my $i=0; $i <= $#files; $i++) {
if ( $files[$i]{file_name} =~ $line_data[3] ) {
$notfound = 0;
last;
}
}
if ( $notfound ) {
$num_warnings++;
push @warnings, ( {
line_number => $.,
message => "File ($line_data[3]) not defined. It must be defined before any fields.",
} );
}
# Verify time_method / time_avg is valid
if ( invalid_timeSample(sanitizeString($line_data[5])) ) {
$ num_warnings++;
push @warnings, ( {
line_number => $.,
message => "Time sampling method must be one of (.true., mean, average, avg, .false., none, point, maximum, max, minimum, min, diurnal[#]).",
} );
}
# Verify packing is valid :: must be in range [1,8]
if ( $line_data[7] < 1 or $line_data[7] > 8 ) {
$num_warnings++;
push @warnings, ( {
line_number => $.,
message => "Packing is out of the valid range. Must be in the range [1,8]."
} );
}
if ( $num_warnings == 0 ) {
push @fields, ( {
file_name => sanitizeString($line_data[3]),
module_name => sanitizeString($line_data[0]),
field_name => sanitizeString($line_data[1]),
output_name => sanitizeString($line_data[2]),
time_sampling => sanitizeString($line_data[4]),
time_method => sanitizeString($line_data[5]),
spatial_ops => sanitizeString($line_data[6]),
packing => sanitizeString($line_data[7]),
} );
$fields_per_file{$fields[$#fields]{file_name}}++;
}
}
}
}
if ( $verbose ) {
my $files2output;
my $fields2output;
open(FILES, '>', \$files2output);
open(FIELDS, '>', \$fields2output);
my $file_name;
my $output_frequency;
my $output_frequency_units;
my $output_format;
my $time_units;
my $module_name;
my $field_name;
my $output_name;
my $time_sampling;
my $time_method;
my $spatial_ops;
my $packing;
format FILES_TOP =
Files
Output Axis
File Name Frequency FMT Units
------------------------------------------------------------
.
format FILES =
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>> @<<<<<<< @||| @<<<<
$file_name, $output_frequency, $output_frequency_units, $output_format, $time_units
.
for ( my $file=1; $file <= $#files; $file++ ) {
$file_name = $files[$file]{file_name};
$output_frequency = $files[$file]{output_frequency};
$output_frequency_units = $files[$file]{output_frequency_units};
$output_format = $files[$file]{output_format};
$time_units = $files[$file]{time_units};
write FILES;
}
format FIELDS_TOP =
Fields
Output Sample Spatial
Field Name Module File Name Name Samples Method Ops Packing
-------------------------------------------------------------------------------------------------------------
.
format FIELDS =
@<<<<<<<<<<<<<<< @<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<< @<<<<< @<<<<<< @<<< @<
$field_name, $module_name, $file_name, $output_name, $time_sampling, $time_method, $spatial_ops, $packing
.
for ( my $field=1; $field <=$#fields; $field++ ) {
$module_name = $fields[$field]{module_name};
$field_name = $fields[$field]{field_name};
$output_name = $fields[$field]{output_name};
$file_name = $fields[$field]{file_name};
$time_sampling = $fields[$field]{time_sampling};
$time_method = $fields[$field]{time_method};
$spatial_ops = $fields[$field]{spatial_ops};
$packing = $fields[$field]{packing};
write FIELDS;
}
# Output the files and fields
close(FILES);
close(FIELDS);
print $files2output;
print $fields2output;
print "\n";
}
print "Table Descriptor:\t",$tableName,"\n";
print "Base Date:\t\t",join(' ',@globalDate),"\n";
print "Number of files:\t",$#files,"\n";
print "Max fields per file:\t",max(values(%fields_per_file)),"\n";
print "Number of warnings:\t",$#warnings,"\n";
if ( $#warnings ) {
for ( my $warning=1; $warning <= $#warnings; $warning++ ) {
print STDERR "WARNING($warnings[$warning]{line_number}): $warnings[$warning]{message}\n";
}
}
# Verify that the given unit is understood.
# A return value of -1 indicated an unknown unit.
sub invalid_timeSample {
my $timeSample = $_[0];
switch ($timeSample) {
case (/^\.true\.$/i) { return 0; }
case (/^\.false\.$/i) { return 0; }
case (/^mean|average|avg$/) { return 0; }
case (/^none|point$/) { return 0; }
case (/^maximum|max$/) { return 0; }
case (/^minimum|min$/) { return 0; }
case (/^diurnal\d+$/) { return 0; }
else { return 1 };
}
}
# Verify that the given unit is understood.
# A return value of -1 indicated an unknown unit.
sub find_unit_ivalue {
my $unit_string = $_[0];
switch ($unit_string) {
case (/seconds/) { return 1; }
case (/minutes/) { return 2; }
case (/hours/) { return 3; }
case (/days/) { return 4; }
case (/months/) { return 5; }
case (/years/) { return 6; }
else { return -1 }
}
}
sub experimentExistsInXML {
my $experiment = shift(@_);
my $experimentNode = $::root -> findnodes("experiment[\@label='$experiment' or \@name='$experiment']") -> get_node(1);
return $experimentNode;
}
sub getDiagTableFromXML {
my $experiment = shift(@_);
my $diagTableNode = $::root -> findnodes("experiment[\@label='$experiment' or \@name='$experiment']/input/diagTable") -> get_node(1);
# If the diagTable node is empty, then recursivly check the parent experiment until it is found.
if ( $diagTableNode ) {
return $diagTableNode -> findvalue("\@file");
} else {
my $parent = $::root -> findvalue("experiment[\@label='$experiment' or \@name='$experiment]/\@inherit");
if ( $parent) {
&getDiagTable($parent);
} else {
die "$0: Cannot find diagTable tag in the XML file $::xmlFile.\n"
}
}
}
sub verifyDate {
# The date must already be in an array. We will check the size here.
# The format should be (year, month, day, hour, min, sec)
my @date = @_;
my $leapYear = 0;
my @months = ( { month => 'January',
days => 31 },
{ month => 'February',
days => 28 },
{ month => 'March',
days => 31 },
{ month => 'April',
days => 30 },
{ month => 'May',
days => 31 },
{ month => 'June',
days => 30 },
{ month => 'July',
days => 31 },
{ month => 'August',
days => 31 },
{ month => 'September',
days => 30 },
{ month => 'October',
days => 31 },
{ month => 'November',
days => 30 },
{ month => 'December',
days => 31 } );
if ( scalar(@date) != 6 ) {
# Wrong number of elements in date. Are we too big?
return 'Too many elements in date string.' if ( scalar(@date) > 6 ) ;
return 'Too few elements in date string.' if ( scalar(@date) < 6 );
}
return 'Year must be > 0.' if ( $date[0] < 0 );
# Correct number of days in February if this is a leap year.
$months[1]{days} = $months[1]{days} + 1 if ( isLeapYear($date[0]) );
return 'Month must be in the range [1,12].' if ( $date[1] < 1 or $date[1] > 12 ) ;
return "Days must be in the range [1,$months[$date[1]-1]{days}] for $months[$date[1]-1]{month} in year $date[0]." if ( $date[2] < 1 or $date[2] > $months[$date[1]-1]{days} );
return 'Hours must be in the range [0,24].' if ( $date[3] < 0 or $date[3] > 24 );
return 'Minutes must be in the range [0,60].' if ( $date[4] < 0 or $date[4] > 60 );
return 'Seconds must be in the range [0,60].' if ( $date[5] < 0 or $date[5] > 60 );
return '';
}
sub isLeapYear {
my $year = shift(@_);
if ( ($year % 4 == 0) and ($year % 100 != 0) or ($year % 400 == 0) ) {
return 1;
} else {
return 0;
}
}
sub sanitizeString {
# Remove the quote marks and any additional space before and after
# the string.
my $string = shift(@_);
$string =~ s/"//g;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
__END__
=head1 NAME
diag_table_chk - Parse a diag_table, and report the number of files, max fields, and parse errors
=head1 SYNOPSIS
diag_table_chk [-h|--help]
diag_table_chk [-v|--verbose] I<diag_table>
diag_table_chk [-v|--verbose] -x I<xml_file> I<experiment>
=head1 DESCRIPTION
B<diag_table_chk> will parse a diag_table and report on the number of
files in the diag_table, the max fields used per file, and will give
warnings on any parse errors found in the format of 'WARNING(<line_number>)'.
=head1 OPTIONS
=over 8
=item B<-h>, B<--help>
Display usage information
=item B<-v>, B<--verbose>
Display the files and fields that were found.
=item B<-x>, B<--xml> <xml_file> <experiment>
Read the diagnostic table file from I<<experiment>> from the I<<xml_file>>.
=item <diag_table>
The file name of the diagnostic table to check
=back
=head1 EXAMPLE
> diag_table_chk -x SM2.1U-LM3V.xml SM2.1U_Control-1990_lm3v_pot_A1
Table Descriptor: NOT SET
Base Date: 0101 01 01 0 0 0
Number of files: 14
Max fields per file: 93
Number of warnings: 2
WARNING(3): The table descriptor and the base date must be set before any files or fields.
WARNING(206): Time sampling method must be one of (.true., mean, average, avg, .false., none, point, maximum, max, minimum, min, diurnal[#]).
=head1 AUTHOR
Seth Underwood <sunderwood@hpti.com>
=head1 BUGS
No known bugs at this time.
Report any bug to the author.
=cut