#!/bin/sh

# $Header$

# Purpose: Convert PAWS data to netCDF input file compatible with DEAD

# Usage:
# ~/esh/paws2nc.sh
# cd ${DATA}/esh;~/esh/paws2nc.sh;cd -

# Sample header from raw PAWS data file
hdr_txt='Data Extracted:2005-02-03 12:53:14
<a href="/station_info/station_info_802.html">Paterson</a><BR>Lat:46.0 Lng:-120. elevation: 
Dates Range From 1990-05-11 To 2005-02-02

                           Hour       Total      Avg        Avg        Avg        Avg        Avg        Dev                   Leaf       Soil       Avg        
                           of the     Solar      Air        Relative   Dewpt      Wind       Wind       in         Total      Wetness    Moistr     Soil       
Station                    Day        Rad        Temp C     Humidity   Temp C     Speed      Dir deg    Dir        Precip     total      kPa        Temp C     
ID         Greg.    Julian PST        W/sq m     1.6 M      %          1.6 M      M/s@2M     2 M        Deg        mm         minutes    203.2mm    203.2mm    
--------   --------------- ------     -------    ------     -------    -------    ------     -------    -------    ------     -------    -------    -------    '

# From the tbl2cdf documentation:
#       Data types:
# 
#               The preferred data type can be specified for each
#               variable name, by appending a colon and a data
#               type abbreviation to the variable name.  Example:
#               x:d signifies that variable x is to be stored
#               as NC_DOUBLE (NetCDF double).  The abbreviations
#               are:
#                               b       byte    NC_BYTE
#                               c       char    NC_CHAR
#                               s       short   NC_SHORT
#                               l       long    NC_LONG
#                               f       float   NC_FLOAT
#                               d       double  NC_DOUBLE
#                               v       void    NC_IGNORE
# 
#               No variable is created for type NC_IGNORE.

# Field names in order of appearance (after removing dashes in YYYY-MM-DD and colons in HH:MM)
# NB: Specify NC_FLOAT for some variables otherwise tbl2cdf picks NC_BYTE for years without measurements 
# Then ncrcat cannot convert some latter measurements into the NC_BYTE range
fld_nm_txt='stn_ID date_gregorian year month_of_year day_of_month doy_int hour_of_day minute_of_hour flx_dwn_SW:f tpt_mdp:f RH:f tpt_dwp:f wnd_znl_mdp:f wnd_drc_dgr:f wnd_drc_dvn_dgr:f pcp_mm:f leaf_wet_min soil_moisture_203mm_kPa:f tpt_soi_203mm:f'

cd ${DATA}/esh
yr='1991'
#while [ ${yr} -le 1991 ] ; do
while [ ${yr} -le 2003 ] ; do
    fl_txt="paws_Ptr_${yr}0101_${yr}1231.txt"
    fl_txt_tmp="paws_Ptr_${yr}0101_${yr}1231_tmp.txt"
    fl_nc="paws_Ptr_${yr}0101_${yr}1231_raw.nc"
    printf "Processing year ${yr} data in file ${fl_txt}...\n"
# Raw PAWS data is in Windows format
# Strip excess carriage returns from Windows file
    tr -d '\015' < ${fl_txt} > /tmp/${fl_txt}
    /bin/mv -f /tmp/${fl_txt} ${fl_txt}
# Files have nine-line headers
    tail --lines=+10 ${fl_txt} > /tmp/${fl_txt_tmp}
# Create variable name header as text file
    echo ${fld_nm_txt} > /tmp/${fl_txt}.header
# Change hour of day HH:MM into hour HH and minute MM by removing colon
    perl -pi -e 's/:/ /g;' /tmp/${fl_txt_tmp}
# Change YYYY-MM-DD into year YYYY, month MM, and day DD by removing dashes
    perl -pi -e 's/([0-9]{4})-([0-9]{2})-([0-9]{2})/$1$2$3 $1 $2 $3/g;' /tmp/${fl_txt_tmp}
# Convert from text to netCDF
    tbl2cdf -h /tmp/${fl_txt}.header /tmp/${fl_txt_tmp} ${fl_nc}
# Put correct metadata into netCDF files
    ncatted -O -h \
	-a history,global,a,c,"Processed by ${HOME}/esh/paws2nc.sh\n" \
	-a long_name,stn_ID,o,c,"Station ID" \
	-a units,stn_ID,o,c,"index" \
	-a long_name,date_gregorian,o,c,"Gregorian Date" \
	-a units,date_gregorian,o,c,"YYYYMMDD" \
	-a long_name,year,o,c,"Gregorian Year" \
	-a units,year,o,c,"year" \
	-a long_name,month_of_year,o,c,"Month of year" \
	-a units,month_of_year,o,c,"month" \
	-a long_name,day_of_month,o,c,"Day of month" \
	-a units,day_of_month,o,c,"day" \
	-a long_name,doy_int,o,c,"Day of year (integer)" \
	-a units,doy_int,o,c,"day" \
	-a long_name,hour_of_day,o,c,"Hour of Day" \
	-a units,hour_of_day,o,c,"hour" \
	-a long_name,minute_of_hour,o,c,"Minute of Hour" \
	-a units,minute_of_hour,o,c,"minute" \
	-a long_name,flx_dwn_SW,o,c,"Surface Insolation" \
	-a units,flx_dwn_SW,o,c,"watt meter-2" \
	-a long_name,tpt_mdp,o,c,"Mean Temperature at 1.6 m" \
	-a units,tpt_mdp,o,c,"celsius" \
	-a long_name,RH,o,c,"Mean Relative Humidity" \
	-a units,RH,o,c,"percent" \
	-a long_name,tpt_dwp,o,c,"Mean Dewpoint Temperature at 1.6 m" \
	-a units,tpt_dwp,o,c,"celsius" \
	-a long_name,wnd_znl_mdp,o,c,"Mean Wind Speed at 2 m" \
	-a units,wnd_znl_mdp,o,c,"meter second-1" \
	-a long_name,wnd_drc_dgr,o,c,"Mean Wind Direction at 2 m" \
	-a units,wnd_drc_dgr,o,c,"degree" \
	-a long_name,wnd_drc_dvn_dgr,o,c,"Deviation in Wind Direction" \
	-a units,wnd_drc_dvn_dgr,o,c,"degree" \
	-a long_name,pcp_mm,o,c,"Total precipitation" \
	-a units,pcp_mm,o,c,"millimeter" \
	-a long_name,leaf_wet_min,o,c,"Leaf Wetness" \
	-a units,leaf_wet_min,o,c,"minute" \
	-a long_name,soil_moisture_203mm_kPa,o,c,"Soil Moisture at 203.2 mm" \
	-a units,soil_moisture_203mm_kPa,o,c,"kilopascal" \
	-a long_name,tpt_soi_203mm,o,c,"Mean Soil Temperature at 203.2 mm" \
	-a units,tpt_soi_203mm,o,c,"celsius" \
	${fl_nc}
# Rename "u" dimension to "time" for later service as time coordinate dimension
    ncrename -O -d u,time ${fl_nc}
# Increment year counter for next file
    yr=$((yr+1))
done # end loop over year

# Concatenate all years together into one massive file
/bin/rm -f paws_Ptr_19910101_20031231_raw.nc
ncrcat -O paws_Ptr_????0101_????1231_raw.nc paws_Ptr_19910101_20031231_raw.nc

# Distinction between raw files (paws*_raw.nc) and DEAD input files (paws*.nc):
# Raw files have undergone minimal post-processing since conversion
# DEAD input files have correct gridding and metadata for DEAD
# For testing purposes, create smaller DEAD input file called paws_Ptr.nc 
fl_stb="paws_Ptr_19910101_20031231"
fl_raw="${fl_stb}_raw.nc"
fl_tst="${fl_stb}.nc"

# Convert raw data file to DEAD input file

# Exclude variables not currently used
ncks -O -x -v stn_ID,leaf_wet_min,pcp_mm,soil_moisture_203mm_kPa,tpt_soi_203mm,wnd_drc_dvn_dgr,wnd_drc_dgr,tpt_dwp,RH \
${fl_raw} ${fl_tst}

# Create time variable with units = Day of year (DOY) including fractional day
ncap -O -s "time=year*10000+month_of_year*100+day_of_month+hour_of_day/24.0+minute_of_hour/1440.0" -s "doy=doy_int+hour_of_day/24.0+minute_of_hour/1440.0" ${fl_tst} ${fl_tst}
ncatted -O -h \
    -a long_name,time,o,c,"Time (YYYYMMDD.day_fraction)" \
    -a units,time,o,c,"day" \
    -a long_name,doy,o,c,"Day of Year [1.0..367.0)" \
    -a units,doy,o,c,"day" \
    ${fl_tst}

# Run DEAD on input file
dead --time_nbr=10 --fl_xtr_dat=${DATA}/esh/${fl_tst}
