#!/usr/bin/env bash

# Purpose: Check data file against DIWG (and, eventually, other) recommendations

# Copyright (C) 2023--present Charlie Zender, ... (Contribuing DIWG members---Add your names!)

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
# See the 3-Clause BSD License for more details.

# Prerequisites: Bash, NCO
# Script could use other shells, e.g., dash (Debian default) after rewriting function definitions and loops
# Debug with 'bash -x ncchecker --dbg=dbg_lvl' where 0 <= dbg_lvl <= 5

# Insta-install:
# scp ~/diwg/ncchecker dust.ess.uci.edu:bin
# scp dust.ess.uci.edu:bin/ncchecker ${MY_BIN_DIR}

# Set script name, directory, PID, run directory
drc_pwd=${PWD}
# Security: Explicitly unset IFS before wordsplitting, so Bash uses default IFS=<space><tab><newline>
unset IFS
# Set these before 'module' command which can overwrite ${BASH_SOURCE[0]}
# NB: dash supports $0 syntax, not ${BASH_SOURCE[0]} syntax
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in
spt_src="${BASH_SOURCE[0]}"
[[ -z "${spt_src}" ]] && spt_src="${0}" # Use ${0} when BASH_SOURCE is unavailable (e.g., dash)
while [ -h "${spt_src}" ]; do # Recursively resolve ${spt_src} until file is no longer a symlink
  drc_spt="$( cd -P "$( dirname "${spt_src}" )" && pwd )"
  spt_src="$(readlink "${spt_src}")"
  [[ ${spt_src} != /* ]] && spt_src="${drc_spt}/${spt_src}" # If ${spt_src} was relative symlink, resolve it relative to path where symlink file was located
done
cmd_ln="${spt_src} ${@}"
drc_spt="$( cd -P "$( dirname "${spt_src}" )" && pwd )"
spt_nm=$(basename ${spt_src}) # [sng] Script name (unlike $0, ${BASH_SOURCE[0]} works well with 'source <script>')
spt_pid=$$ # [nbr] Script PID (process ID)

# Original Setup (prior to merger with NCO):
# git clone git@github.com:diwg/diwg ~/diwg
# ln -s ~/diwg/diwg_chk ~/bin/diwg_chk
# ln -s ~/diwg/diwg_chk ~/sh/diwg_chk

# Debugging and Benchmarking:
# ncchecker ~/nco/data/in.nc ~/nco/data/in_4.nc 
# ncchecker ~/data/bm/elmv2_ne30pg2l15.nc
# ncchecker ~/data/hdf/MODIS_L2N_20140304T1120.nc > ~/diwg.txt 2>&1 &
# ncchecker ~/data/hdf/OMI-Aura_L2-OMAERUV_2013m1004t2338-o49057_v003-2013m1005t053932.nc

# Production usage:
# ncchecker --dbg=1 ~/nco/data/in_4.nc > ~/diwg.txt 2>&1 &
# screen # Start screen
# ncchecker --dbg=1 ~/nco/data/in_4.nc > ~/diwg.txt 2>&1 &
# Ctl-A D # Detach screen
# tail ~/diwg.txt # Monitor progress
# screen -ls # List screens
# screen -r <ID> # Re-attach screen

# Configure paths at High-Performance Computer Centers (HPCCs) based on ${HOSTNAME}
if [ -z "${HOSTNAME}" ]; then
    if [ -f /bin/hostname ] && [ -x /bin/hostname ]; then
	export HOSTNAME=`/bin/hostname`
    elif [ -f /usr/bin/hostname ] && [ -x /usr/bin/hostname ]; then
	export HOSTNAME=`/usr/bin/hostname`
    fi # !hostname
fi # HOSTNAME

# Set NCO version and directory
nco_exe=`which ncks`
if [ -z "${nco_exe}" ]; then
    echo "${spt_nm}: ERROR Unable to find NCO, \${nco_exe} = ${nco_exe}"
    echo "${spt_nm}: HINT Carefully examine your environment setup (e.g., .bashrc) to avoid inadvertently overriding (with, e.g., conda-initialization) paths intended to be provided by an analysis-package environment (e.g., Anaconda)"
    exit 1
fi # !nco_exe
# StackOverflow method finds NCO directory
while [ -h "${nco_exe}" ]; do
  drc_nco="$( cd -P "$( dirname "${nco_exe}" )" && pwd )"
  nco_exe="$(readlink "${nco_exe}")"
  [[ ${nco_exe} != /* ]] && nco_exe="${drc_nco}/${nco_exe}"
done
drc_nco="$( cd -P "$( dirname "${nco_exe}" )" && pwd )"
nco_vrs=$(ncks --version 2>&1 > /dev/null | grep NCO | awk '{print $5}')
nco_sng=$(ncks --version 2>&1 > /dev/null | grep NCO | awk -F '"' '{print $2}')

# 20190218: Die quickly when NCO is found yet cannot run, e.g., due to linker errors
if [ -z "${nco_vrs}" ]; then
    echo "${spt_nm}: ERROR ${nco_exe} dies with error message on next line:"
    $(ncks --version)
    exit 1
fi # !nco_vrs
lbr_vrs=$(ncks --library 2>&1 > /dev/null | awk '{print $6}')

# When running in a terminal window (not in an non-interactive batch queue)...
if [ -n "${TERM}" ]; then
    # Set fonts for legibility
    if [ -x /usr/bin/tput ] && tput setaf 1 &> /dev/null; then
	fnt_bld=`tput bold` # Bold
	fnt_nrm=`tput sgr0` # Normal
	fnt_rvr=`tput smso` # Reverse
	fnt_tlc=`tput sitm` # Italic
    else
	fnt_bld="\e[1m" # Bold
	fnt_nrm="\e[0m" # Normal
	fnt_rvr="\e[07m" # Reverse
	fnt_tlc="\e[3m" # Italic
    fi # !tput
fi # !TERM
    
# Defaults for command-line options and some derived variables
# Modify these defaults to save typing later
caseid='elmforc.ERA5.c2018.0.25d' # [sng] Case ID
dbg_lvl=0 # [nbr] Debugging level
drc_in='' # [sng] Input file directory
drc_in_xmp="${DATA}/era5/data_raw/1979" # [sng] Input file directory for examples
drc_out='' # [sng] Output file directory
drc_out_xmp="${DATA}/era5/data_out" # [sng] Output file directory for examples
inp_aut='No' # [sng] Input file list automatically generated
inp_glb='No' # [sng] Input file list from globbing directory 
inp_psn='No' # [sng] Input file list from positional arguments
inp_std='No' # [sng] Input file list from stdin
nco_opt='' # [sng] NCO options (e.g., '-6 -t 1')
tst_flg='Yes' # [flg] Trigger test-selection code
tst_lst='bnd,chr,mss,nan,tm,xtn' # [sng] Tests to conduct (subset of bnd,chr,mss,nan,tm,xtn)
vrs_prn='No' # [sng] Print version information

# NCO filters documented in http://nco.sf.net/nco.html#filter
function ncvarlst { ncks --trd -m ${1} | grep -E ': type' | cut -f 1 -d ' ' | sed 's/://' | sort ; }
function ncdmnlst { ncks --cdl -m ${1} | cut -d ':' -f 1 | cut -d '=' -s -f 1 ; }

function fnc_usg_prn { # NB: dash supports fnc_nm (){} syntax, not function fnc_nm{} syntax
    # Print usage
    printf "${fnt_rvr}Basic usage:\n${fnt_nrm}${fnt_bld}${spt_nm} in.nc${fnt_nrm} # Default settings\n"
    printf "${fnt_bld}${spt_nm} -t bnd,nan${fnt_nrm} # Short option syntax with explicit tests\n"
    printf "${fnt_bld}${spt_nm} --tst=bnd,nan${fnt_nrm} # Long options with with explicit tests\n"
    echo "Command-line options [long-option synonyms in ${fnt_tlc}italics${fnt_nrm}]:"
#    echo "${fnt_rvr}-3${fnt_nrm}          Output file format CLASSIC (netCDF3 classic CDF1) [${fnt_tlc}fl_fmt, file_format=classic${fnt_nrm}]"
#    echo "${fnt_rvr}-4${fnt_nrm}          Output file format NETCDF4 (netCDF4 extended HDF5) [${fnt_tlc}fl_fmt, file_format=netcdf4${fnt_nrm}]"
#    echo "${fnt_rvr}-5${fnt_nrm}          Output file format 64BIT_DATA (netCDF3/PnetCDF CDF5) [${fnt_tlc}fl_fmt, file_format=64bit_data${fnt_nrm}]"
#    echo "${fnt_rvr}-6${fnt_nrm}          Output file format 64BIT_OFFSET (netCDF3 64bit CDF2) [${fnt_tlc}fl_fmt, file_format=64bit_offset${fnt_nrm}]"
#    echo "${fnt_rvr}-7${fnt_nrm}          Output file format NETCDF4_CLASSIC (netCDF4 classic HDF5) [${fnt_tlc}fl_fmt, file_format=netcdf4_classic${fnt_nrm}]"
#    echo "${fnt_rvr}-c${fnt_nrm} ${fnt_bld}caseid${fnt_nrm}   Case ID string to generate output filenames (default ${fnt_bld}${caseid}${fnt_nrm}) [${fnt_tlc}caseid, case_id, case${fnt_nrm}]"
    echo "${fnt_rvr}-d${fnt_nrm} ${fnt_bld}dbg_lvl${fnt_nrm}  Debug level (default ${fnt_bld}${dbg_lvl}${fnt_nrm}) [${fnt_tlc}dbg_lvl, dbg, debug, debug_level${fnt_nrm}]"
    echo "${fnt_rvr}-i${fnt_nrm} ${fnt_bld}drc_in${fnt_nrm}   Input directory (default ${fnt_bld}${drc_in}${fnt_nrm}) [${fnt_tlc}drc_in, in_drc, dir_in, in_dir, input${fnt_nrm}]"
#    echo "${fnt_rvr}-j${fnt_nrm} ${fnt_bld}job_nbr${fnt_nrm}  Job simultaneity for parallelism (default ${fnt_bld}${job_nbr}${fnt_nrm}) [${fnt_tlc}job_nbr, job_number, jobs${fnt_nrm}]"
#    echo "${fnt_rvr}-o${fnt_nrm} ${fnt_bld}drc_out${fnt_nrm}  Output directory (default ${fnt_bld}${drc_out}${fnt_nrm}) [${fnt_tlc}drc_out, out_drc, dir_out, out_dir, output${fnt_nrm}]"
    echo "${fnt_rvr}-p${fnt_nrm} ${fnt_bld}par_typ${fnt_nrm}  Parallelism type (default ${fnt_bld}${par_typ}${fnt_nrm}) [${fnt_tlc}par_typ, par_md, parallel_type, parallel_mode, parallel${fnt_nrm}] [${fnt_tlc}serial | background | mpi${fnt_nrm}]"
#    echo " ${fnt_bld}--tpd_out${fnt_nrm}  Timesteps-per-day in output (default ${fnt_bld}${tpd_out}${fnt_nrm}) [${fnt_tlc}tpd_out, tpd, timesteps_per_day${fnt_nrm}]"
    echo " ${fnt_bld}--tests${fnt_nrm}    Tests to conduct ('none' means none) (default ${fnt_bld}${tst_lst}${fnt_nrm} [${fnt_tlc}tst_lst, tst, test_list${fnt_nrm}]"
    echo "${fnt_rvr}-v${fnt_nrm} ${fnt_bld}var_lst${fnt_nrm}  Variable list (empty means all) (default ${fnt_bld}${var_lst}${fnt_nrm}) [${fnt_tlc}var_lst, variable_list, var, vars, variable, variables${fnt_nrm}]"
    echo " ${fnt_bld}--version${fnt_nrm}  Version and configuration information [${fnt_tlc}version, vrs, config, configuration, cnf${fnt_nrm}]"
    echo "${fnt_rvr}-x${fnt_nrm} ${fnt_bld}--xcl_var${fnt_nrm}  Exclude rather than extract var_lst [${fnt_tlc}xcl_var, xcl, exclude, exclude_variables${fnt_nrm}]"
    printf "\n"
    printf "${fnt_rvr}Examples:${fnt_nrm}\n"
    printf "${fnt_bld}${spt_nm} in1.nc in2.nc${fnt_nrm} # Run all tests on two files\n"
    printf "${fnt_bld}${spt_nm} -v var1,var2 in1.nc # Check only two variables\n"
    printf "${fnt_bld}${spt_nm} *.nc${fnt_nrm} # Glob input files via wildcard\n"
    printf "${fnt_bld}ls *.nc | ${spt_nm}${fnt_nrm} # Input files via stdin\n"
    printf "${fnt_bld}${spt_nm} --dbg=2 *.nc${fnt_nrm} # Debug ${spt_nm}\n"
    printf "${fnt_bld}${spt_nm} --tests=nan,mss *.nc # Select only two tests\n"
    printf "${fnt_bld}${spt_nm} --tests=xtn,tm,nan,mss,chr,bnd *.nc${fnt_nrm} # Change test ordering\n"
    printf "${fnt_bld}${spt_nm} "file://${HOME}/in_zarr4#mode=nczarr,file"${fnt_nrm} # Check Zarr object(s)\n"
    printf "\nComplete documentation at http://nco.sf.net/nco.html#${spt_nm}\n\n"
    exit 1
} # !fnc_usg_prn()

# Check argument number and complain accordingly
arg_nbr=$#
#printf "\ndbg: Number of arguments: ${arg_nbr}"
if [ ${arg_nbr} -eq 0 ]; then
  fnc_usg_prn
fi # !arg_nbr

# Parse command-line options:
# http://stackoverflow.com/questions/402377/using-getopts-in-bash-shell-script-to-get-long-and-short-command-line-options (see method by Adam Katz)
# http://tuxtweaks.com/2014/05/bash-getopts
while getopts :34567c:d:f:i:o:p:s:v:x-: OPT; do
    case ${OPT} in
	3) fl_fmt='3' ;; # File format
	4) fl_fmt='4' ;; # File format
	5) fl_fmt='5' ;; # File format
	6) fl_fmt='6' ;; # File format
	7) fl_fmt='7' ;; # File format
	c) caseid="${OPTARG}" ;; # CASEID
	d) dbg_lvl="${OPTARG}" ;; # Debugging level
	f) fml_nm_usr="${OPTARG}" ;; # Family name
	i) drc_in="${OPTARG}" ;; # Input directory
	o) drc_out_usr="${OPTARG}" ;; # Output directory
	p) par_typ="${OPTARG}" ;; # Parallelism type
	v) var_lst="${OPTARG}" ;; # Variables
	x) xcl_flg='Yes' ;; # Exclude variable list
	-) LONG_OPTARG="${OPTARG#*=}"
	   case ${OPTARG} in
	       # Hereafter ${OPTARG} is long argument key, and ${LONG_OPTARG}, if any, is long argument value
	       # Long options with no argument, no short option counterpart
	       # Long options with argument, no short option counterpart
	       # Long options with short counterparts, ordered by short option key
	       caseid=?* | case_id=?* | case=?* ) caseid="${LONG_OPTARG}" ;; # -c # CASEID
	       dbg_lvl=?* | dbg=?* | debug=?* | debug_level=?* ) dbg_lvl="${LONG_OPTARG}" ;; # -d # Debugging level
	       drc_in=?* | in_drc=?* | dir_in=?* | in_dir=?* | input=?* ) drc_in="${LONG_OPTARG}" ;; # -i # Input directory
	       drc_out=?* | out_drc=?* | dir_out=?* | out_dir=?* | output=?* ) drc_out_usr="${LONG_OPTARG}" ;; # -o # Output directory
	       fl_fmt=?* | fmt_out=?* | file_format=?* | format_out=?* ) fl_fmt="${LONG_OPTARG}" ;; # # Output file format
	       fml_nm=?* | fml=?* | family_name=?* | family=?* ) fml_nm_usr="${LONG_OPTARG}" ;; # -f # Family name
	       hrd_pth | hard_path | npo | nco_path_override ) hrd_pth='Yes' ;; # # Use hard-coded paths on known machines (intentional no-op because already handled prior to getopt())
	       hrd_pth=?* | hard_path=?* | npo=?* | nco_path_override=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Use hard-coded paths on known machines
	       job_nbr=?* | job_number=?* | jobs=?* ) job_usr="${LONG_OPTARG}" ;; # -j # Job simultaneity
	       par_typ=?* | par_md=?* | parallel_type=?* | parallel_mode=?* | parallel=?* ) par_typ="${LONG_OPTARG}" ;; # -p # Parallelism type
	       tests=?* | tst_lst=?* | tst=?* | test_list=?* ) tst_lst="${LONG_OPTARG}" ;; # # Tests to conduct
	       tpd_out=?* | tpd=?* | timesteps_per_day=?* ) tpd_usr="${LONG_OPTARG}" ;; # # Timesteps-per-day in output
	       var_lst=?* | variable_list=?* | var=?* | vars=?* | variable=?* | variables=?* ) var_lst="${LONG_OPTARG}" ;; # -v # Variables
	       version | vrs | config | configuration | cnf ) vrs_prn='Yes' ;; # # Print version information
	       version=?* | vrs=?* | config=?* | configuration=?* | cnf=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Print version information
	       xcl_var | xcl | exclude | exclude_variables ) xcl_flg='Yes' ;; # # Exclude rather than extract variable list
	       xcl_var=?* | xcl=?* | exclude=?* | exclude_variables=?* ) echo "No argument allowed for --${OPTARG switch}" >&2; exit 1 ;; # # Exclude rather than extract variable list
               '' ) break ;; # "--" terminates argument processing
               * ) printf "\nERROR: Unrecognized option ${fnt_bld}--${OPTARG}${fnt_nrm}\n" >&2; fnc_usg_prn ;;
	   esac ;; # !OPTARG
	\?) # Unrecognized option
	    printf "\nERROR: Option ${fnt_bld}-${OPTARG}${fnt_nrm} not recognized\n" >&2
	    fnc_usg_prn ;;
    esac # !OPT
done # !getopts
shift $((OPTIND-1)) # Advance one argument
psn_nbr=$#
if [ ${psn_nbr} -ge 1 ]; then
    inp_psn='Yes'
    # 20200430 Input files on command-line mean we need not check standard-input
    std_chk='No'
fi # !psn_nbr

if [ ${vrs_prn} = 'Yes' ]; then
    printf "${spt_nm}, runs DIWG recommendation compliance checks, version ${nco_vrs} \"${nco_sng}\"\n"
    printf "Authors: \n"
    printf "Config: ${spt_nm} script located in directory ${drc_spt}\n"
    printf "Config: NCO binaries located in directory ${drc_nco}, linked to netCDF library version ${lbr_vrs}\n"
    if [ "${hrd_pth_fnd}" = 'Yes' ]; then
	printf "Config: Employ NCO machine-dependent hardcoded paths/modules for ${HOSTNAME}. (If desired, turn-off NCO hardcoded paths with \"export NCO_PATH_OVERRIDE=No\").\n"
    else
	printf "Config: No hardcoded machine-dependent path/module overrides. (If desired, turn-on NCO hardcoded paths at supported national labs with \"export NCO_PATH_OVERRIDE=Yes\").\n"
    fi # !hrd_pth_fnd
    exit 0
fi # !vrs_prn

# Determine mode first (this helps determine other defaults)
if [ ${tst_lst} = 'none' ]; then
    tst_nbr=0
    tst_flg='No'
fi # !tst_lst

if [ -z "${drc_in}" ]; then
    drc_in="${drc_pwd}"
else # !drc_in
    if [ ! -d "${drc_in}" ]; then
	echo "${spt_nm}: ERROR specified input directory \"${drc_in}\" does not exist"
	exit 1
    fi # !drc_in
    drc_in_usr='Yes'
fi # !drc_in

# 20170807 Custom tests (code borrowed from ncremap seasons (csn) treatment)
if [ "${tst_flg}" = 'Yes' ]; then

    # Standard test enumeration
    # Index into test definition table
    tst_idx_srt=0 # [idx] Starting index for test enumeration
    tst_bnd=0
    tst_chr=1
    tst_mss=2
    tst_nan=3
    tst_tm=4
    tst_xtn=5
    tst_nbr_max=6 # [nbr] Maximum number of tests in definitions database

    # Test abbreviations, uppercase
    tst_abb[${tst_bnd}]='BND'
    tst_abb[${tst_chr}]='CHR'
    tst_abb[${tst_mss}]='MSS'
    tst_abb[${tst_nan}]='NAN'
    tst_abb[${tst_tm}]='TM'
    tst_abb[${tst_xtn}]='XTN'

    # Test abbreviations, lowercase
    tst_abb_lc[${tst_bnd}]='bnd'
    tst_abb_lc[${tst_chr}]='chr'
    tst_abb_lc[${tst_mss}]='mss'
    tst_abb_lc[${tst_nan}]='nan'
    tst_abb_lc[${tst_tm}]='tm'
    tst_abb_lc[${tst_xtn}]='xtn'

    # Recommendation (i.e., short exhortation) in ESDS RFC Title
    tst_rcm[${tst_bnd}]='Use CF Bounds Attributes'
    tst_rcm[${tst_chr}]='Identifier names shall comply with this regular expression: [A-Za-z][A-Za-z0-9_]*'
    tst_rcm[${tst_mss}]='Avoid use of the missing_value attribute in new Earth Science data products'
    tst_rcm[${tst_nan}]='Earth Science data products should avoid using Not-a-Number (NaN) in any field values or as an indicator of missing or invalid data'
    tst_rcm[${tst_tm}]='Use Double Precision When Archiving Time in Seconds Since a Specific Epoch'
    tst_rcm[${tst_xtn}]='Files created with the HDF5, HDF-EOS5, or netCDF APIs should have filename extensions \"h5\", \"he5\", or \"nc\", respectively'
    
    # ESDS RFC Recommendation Number
    tst_rfc[${tst_bnd}]='2.3'
    tst_rfc[${tst_chr}]='3.1'
    tst_rfc[${tst_mss}]='4.2'
    tst_rfc[${tst_nan}]='3.7'
    tst_rfc[${tst_tm}]='4.11'
    tst_rfc[${tst_xtn}]='3.8'
    
    # ESDS RFC Recommendation URL
    tst_url[${tst_bnd}]='https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=182296291'
    tst_url[${tst_chr}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Character+Set+for+User-Defined+Group%2C+Variable%2C+and+Attribute+names'
    tst_url[${tst_mss}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Avoid+Use+of+the+missing_value+Attribute'
    tst_url[${tst_nan}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Not-a-Number+%28NaN%29+Value'
    tst_url[${tst_tm}]='https://wiki.earthdata.nasa.gov/display/ESDSWG/Use+Double+Precision+When+Archiving+Time+in+Seconds+Since+a+Specific+Epoch'
    tst_url[${tst_xtn}]='https://wiki.earthdata.nasa.gov/pages/viewpage.action?pageId=182297715'
    
    # Test names "in English" (complete phrase "Test checks that ___")
    tst_ngl[${tst_bnd}]='coordinates have bounds attributes'
    tst_ngl[${tst_chr}]='identifier names comprised of legal characters'
    tst_ngl[${tst_mss}]='variables do not have missing_value attributes'
    tst_ngl[${tst_nan}]='data values do not contain NaN (or NaNf)'
    tst_ngl[${tst_tm}]='time variables are stored in double precision'
    tst_ngl[${tst_xtn}]='filename extension matches recommended list'

    # Test short names (complete phrase "___ check passed/failed")
    tst_sht[${tst_bnd}]='coordinate bounds'
    tst_sht[${tst_chr}]='character-set'
    tst_sht[${tst_mss}]='missing_value'
    tst_sht[${tst_nan}]='NaN'
    tst_sht[${tst_tm}]='precision of time variables'
    tst_sht[${tst_xtn}]='filename extension'
    
    # Test invocation options to ncks
    tst_opt[${tst_bnd}]='--chk_bnd'
    tst_opt[${tst_chr}]='--chk_chr'
    tst_opt[${tst_mss}]='--chk_mss'
    tst_opt[${tst_nan}]='--chk_nan'
    tst_opt[${tst_tm}]='--chk_tm'
    tst_opt[${tst_xtn}]='--chk_xtn'

    # Which tests are requested?
    # http://stackoverflow.com/questions/27702452/loop-through-a-comma-separated-shell-variable
    tst_nbr=0 # [sng] Number of tests to conduct
    for tst in ${tst_lst//,/ }; do
	tst_rqs[${tst_nbr}]=${tst}
	# NB: Requested seasons are 0-based, defined seasons are 0-based
	for ((tst_dfn_idx=${tst_idx_srt};tst_dfn_idx<${tst_nbr_max};tst_dfn_idx++)); do
	    if [[ "${tst}" =~ "${tst_abb[${tst_dfn_idx}]}" ]] || [[ "${tst}" =~ "${tst_abb_lc[${tst_dfn_idx}]}" ]]; then
		# Map requested to defined (r2d) tests and inverse (d2r)
		# map_r2d[0]=3 means first test that user requested (i.e., in tst_lst) is fourth defined in table
		# map_d2r[3]=0 means fourth defined test is first requested
		map_r2d[${tst_nbr}]=${tst_dfn_idx}
		map_d2r[${tst_dfn_idx}]=${tst_nbr}
		let tst_nbr=${tst_nbr}+1
		break
	    fi # !match
	done # !tst_dfn_idx
	if [ "${tst_dfn_idx}" -eq "${tst_nbr_max}" ]; then
	    printf "${spt_nm}: ERROR Requested test ${tst} not defined\n"
	    exit 1
	fi # !match
    done # !tst_lst

fi # !tst_flg

# Read files from stdin pipe, positional arguments, or directory glob
#printf "dbg: inp_aut  = ${inp_aut}\n"
#printf "dbg: inp_glb  = ${inp_glb}\n"
#printf "dbg: inp_psn  = ${inp_psn}\n"
#printf "dbg: inp_std  = ${inp_std}\n"

# Derived variables
if [ -n "${drc_out_usr}" ]; then
    chr_fst=${drc_out_usr:0:1}
    if [ "${chr_fst}" = '-' ]; then
	printf "${spt_nm}: WARNING first character of user-specified output directory drc_out is \"${chr_fst}\", which looks like an option itself. Be sure the ${spt_nm} '-O' option is followed by an argument that specifies the output directory for processed files. The '-O' option requires a pathname argument, and should not be confused with the '-O' flag (which takes no argument) used in the rest of NCO to force overwriting files (${spt_nm} automatically overwrites existing output files).\n"
    fi # !chr_fst
    # Fancy %/ syntax removes trailing slash (e.g., from $TMPDIR)
    drc_out="${drc_out_usr%/}"
fi # !drc_out_usr

# Doubly-derived variables

if [ ${inp_glb} = 'Yes' ]; then 
    for fl in "${drc_in}"/*.nc "${drc_in}"/*.nc3 "${drc_in}"/*.nc4 "${drc_in}"/*.nc5 "${drc_in}"/*.nc6 "${drc_in}"/*.nc7 "${drc_in}"/*.cdf "${drc_in}"/*.hdf "${drc_in}"/*.he5 "${drc_in}"/*.h5 ; do
	if [ -f "${fl}" ]; then
	    fl_in[${fl_nbr}]=${fl}
	    let fl_nbr=${fl_nbr}+1
	fi # !file
    done
fi # !inp_glb
if [ ${inp_psn} = 'Yes' ]; then
    # Read any positional arguments
    for ((psn_idx=1;psn_idx<=psn_nbr;psn_idx++)); do
	fl_in[(${psn_idx}-1)]=${!psn_idx}
	fl_nbr=${psn_nbr}
    done # !psn_idx
fi # !inp_psn
if [ ${inp_std} = 'Yes' ]; then
    # Input awaits on unit 0, i.e., on stdin
    while read -r line; do # NeR05 p. 179
	fl_in[${fl_nbr}]=${line}
	let fl_nbr=${fl_nbr}+1
    done < /dev/stdin
fi # !inp_std

# Create output directory
if [ -n "${drc_out}" ] && [ ! -d "${drc_out}" ]; then 
    chr_fst=${drc_out:0:1}
    if [ "${chr_fst}" = '-' ]; then
	echo "${spt_nm}: ERROR Attempting to mkdir user-specified output directory \"${drc_out}\" will fail because directory name begins with '-' which is an option indicator"
	echo "${spt_nm}: HINT Specify an output directory name that does not begin with '-'"
	exit 1
    fi # !chr_fst
    cmd_mkd="mkdir -p ${drc_out}"
    eval ${cmd_mkd}
    if [ "$?" -ne 0 ]; then
	printf "${spt_nm}: ERROR Failed to create output directory. Debug this:\n${cmd_mkd}\n"
	printf "${spt_nm}: HINT Creating a directory requires proper write permissions\n"
	exit 1
    fi # !err
fi # !drc_out

# Print initial state
if [ ${dbg_lvl} -ge 2 ]; then
    printf "dbg: dbg_lvl = ${dbg_lvl}\n"
    printf "dbg: drc_in  = ${drc_in}\n"
    printf "dbg: drc_out = ${drc_out}\n"
    printf "dbg: hrd_pth = ${hrd_pth}\n"
    printf "dbg: job_nbr = ${job_nbr}\n"
    printf "dbg: mpi_flg = ${mpi_flg}\n"
    printf "dbg: mpi_nbr = ${mpi_nbr}\n"
    printf "dbg: nco_opt = ${nco_opt}\n"
    printf "dbg: tst_lst = ${tst_lst}\n"
    printf "dbg: tst_nbr = ${tst_nbr}\n"
    printf "dbg: var_lst = ${var_lst}\n"
    printf "dbg: xcl_flg = ${xcl_flg}\n"
fi # !dbg
if [ ${dbg_lvl} -ge 2 ]; then
    psn_nbr=$#
    if [ ${psn_nbr} -ge 1 ]; then
	printf "dbg: Found ${psn_nbr} positional parameters (besides \$0):\n"
	for ((psn_idx=1;psn_idx<=psn_nbr;psn_idx++)); do
	    printf "dbg: psn_arg[${psn_idx}] = ${!psn_idx}\n"
	done # !psn_idx
    fi # !psn_nbr
    if [ "${fl_nbr}" -ge 1 ]; then
	printf "dbg: Processing ${fl_nbr} raw input files:\n"
	for ((fl_idx=0;fl_idx<fl_nbr;fl_idx++)); do
	    printf "dbg: fl_in[${fl_idx}] = ${fl_in[${fl_idx}]}\n"
	done # !fl_idx
    fi # !fl_nbr
fi # !dbg

# Human-readable summary
date_srt=$(date +"%s")
if [ ${dbg_lvl} -ge 0 ]; then
    printf "ncchecker, a DIWG recommendation compliance checker, invoked with command:\n"
    echo "${cmd_ln}"
    printf "Started DIWG checker at `date`\n"
    printf "# of recommendations that will be checked: ${tst_nbr}/${tst_nbr_max}\n"
    printf "Recommendations that will be checked: "
    for ((tst_idx=0;tst_idx<tst_nbr;tst_idx++)); do
	idx_dfn=${map_r2d[${tst_idx}]}
	printf "${tst_rfc[${idx_dfn}]} (${tst_sht[${idx_dfn}]}) "
    done # !tst_idx
    printf "\n"
fi # !dbg

if [ "${fl_nbr}" -lt 1 ]; then
    echo "${spt_nm}: ERROR Checker did not receive any input files, fl_nbr = ${fl_nbr}"
    exit 1
fi # fl_nbr

# Main Loop

# Prepend input directory string to filename
for ((fl_idx=0;fl_idx<fl_nbr;fl_idx++)); do
    fl_out[${fl_idx}]="${drc_out}/$(basename ${fl_in[${fl_idx}]})"
    #printf "fl_in[${fl_idx}]  = ${fl_in[${fl_idx}]}\n"
    #printf "fl_out[${fl_idx}] = ${fl_out[${fl_idx}]}\n"
    
    printf "\nChecking file #${fl_idx}: ${fl_in[${fl_idx}]} ...\n"
    
    for ((tst_idx=0;tst_idx<tst_nbr;tst_idx++)); do

	let tst_idxp1=${tst_idx}+1
	# Index of test in definition table
	idx_dfn=${map_r2d[${tst_idx}]}
	
	printf "\nTest ${tst_idxp1}: Check that ${tst_ngl[${idx_dfn}]} ...\n"
	printf "Recommendation ${tst_rfc[${idx_dfn}]}: ${tst_rcm[${idx_dfn}]}\n"
	echo "URL: ${tst_url[${idx_dfn}]}"
	[[ ${dbg_lvl} -ge 0 ]] && date_chr=$(date +"%s")
	
	cmd_tst[${fl_idx}]="ncks --dbg_lvl=${dbg_lvl} ${tst_opt[${idx_dfn}]} ${fl_in[${fl_idx}]}"
	
	if [ ${dbg_lvl} -ge 2 ]; then
	    echo ${cmd_tst[${fl_idx}]}
	fi # !dbg
	if [ ${dbg_lvl} -le 1 ]; then
	    if [ -z "${par_opt}" ]; then
		eval ${cmd_tst[${fl_idx}]}
		if [ "$?" -ne 0 ]; then
		    printf "Result: FAILURE ${tst_sht[${idx_dfn}]} check failed. Reproduce this single check with:\n${cmd_tst[${fl_idx}]}\n"
		else # !err
		    printf "Result: SUCCESS ${tst_sht[${idx_dfn}]} check passed.\n"
		fi # !err
	    else # !par_opt
		eval ${cmd_bnd[${fl_idx}]} ${par_opt} # eval always returns 0 on backgrounded processes
		bnd_pid[${fl_idx}]=$!
	    fi # !par_opt
	fi # !dbg
	
	if [ ${dbg_lvl} -ge 1 ]; then
	    date_crr=$(date +"%s")
	    date_dff=$((date_crr-date_chr))
	    echo "Elapsed time for ${tst_sht[${idx_dfn}]} check = $((date_dff/60))m$((date_dff % 60))s"
	fi # !dbg
	
    done # !tst_idx
    
done # !fl_idx

date_end=$(date +"%s")
if [ ${dbg_lvl} -ge 0 ]; then
    printf "\nCompleted DIWG Compliance Checker at `date`\n"
    date_dff=$((date_end-date_srt))
    echo "Elapsed time $((date_dff/60))m$((date_dff % 60))s"
fi # !dbg

exit 0
