% $Header$

% Purpose: Proposal for NSF Science and Engineering Informatics (SEI) NCO->SDO project

% URLs:
% http://dust.ess.uci.edu/prp/prp_sei/prp_sei.pdf
% http://dust.ess.uci.edu/prp/prp_sei/prp_sei_fll.pdf
% http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=0431203
% http://www.calit2.net/articles/article.php?id=360

% 2004 NSF SEI:
% NSF 04-528
% Directorate for Computer and Information Science and Engineering (CISE)
% Division of Information and Intelligent Systems (IIS)
% Division of Shared Cyberinfrastructure (SC)
% Program Manager, IIS: James C. French, (703) 292-8936, jfrench@nsf.gov
% Program Manager, SC: Barbara Fossum, (703) 292-8962, bfossum@nsf.gov
% Deadlines: Full Proposal 20040304
% NSF FastLane Temporary Proposal #635594 PIN czen
% NSF FastLane Proposal #0431203 (refer to as IIS-0431203)
% Total 3-year budget request: $594417
% Project duration: 20040901--20070831
% Annual progress report deadlines: 20050603, 20060603, 20071129
% Procurement control number (PCN): 
% UCI account number: 9-number-fund-sub-object = 9-123456-12345-1-1234 = 9-445925-21862-1-1234 
% Physical Sciences budget code: 2001

% fxm: Next round mention CI report http://www.cyrdas.org/report/cyrdas_report_final.pdf
% fxm: Can SDO use MPICH-G2, ``Grid-enabled'' MPI, to discover and
% take advantage of computational topology?

% GDS already has limited SSDDRA http://www.iges.org/grads/gds/doc/user.html#3

% 2004 NSF SEI Round2:
% Due December 15, 2004
% 2005 NSF SEI Round3:
% Due December 15, 2005

% Usage: See end of file

% Parallel netCDF:
% http://www-unix.mcs.anl.gov/parallel-netcdf/
% netCDF4:
% http://my.unidata.ucar.edu/content/software/netcdf/netcdf-4/index.html
% http://www.unidata.ucar.edu/proposals/NASA-AIST-2002/Description.pdf
% NCAR CCSM IPCC runs:
% http://www.cgd.ucar.edu/ccr/ipcc/

\documentclass[12pt]{article}

% Standard packages
\usepackage{ifpdf} % Define \ifpdf
\ifpdf % We are running PDFLaTeX
\usepackage[pdftex]{graphicx} % Defines \includegraphics*
\pdfcompresslevel=9
\usepackage{thumbpdf} % Generate thumbnails
\usepackage{epstopdf} % Convert .eps, if found, to .pdf when required
\else % We are not running PDFLaTeX
\usepackage{graphicx} % Defines \includegraphics*
\fi % endif PDFLaTeX
\usepackage{amsmath} % \subequations, \eqref, \align
\usepackage{array} % Table and array extensions, e.g., column formatting
\usepackage{datetime} % \xxivtime, \ordinal
\usepackage{longtable} % Multi-page tables, e.g., acronyms and symbols
\usepackage{makeidx} % Index keyword processor: \printindex and \see
\usepackage{mdwlist} % Compact list formats \itemize*, \enumerate*
\usepackage{natbib} % \cite commands from aguplus
\usepackage{times} % Postscript Times-Roman font KoD99 p. 375
\usepackage{tocbibind} % Add Bibliography and Index to Table of Contents
\usepackage{url} % Typeset URLs and e-mail addresses

% hyperref is last package since it redefines other packages' commands
% hyperref options, assumed true unless =false is specified:
% backref       List citing sections after bibliography entries
% baseurl       Make all URLs in document relative to this
% bookmarksopen Unknown
% breaklinks    Wrap links onto newlines
% colorlinks    Use colored text for links, not boxes
% hyperindex    Link index to text
% plainpages=false Suppress warnings caused by duplicate page numbers
% pdftex        Conform to pdftex conventions
% Colors used when colorlinks=true:
% linkcolor     Color for normal internal links
% anchorcolor   Color for anchor text
% citecolor     Color for bibliographic citations in text
% filecolor     Color for URLs which open local files
% menucolor     Color for Acrobat menu items
% pagecolor     Color for links to other pages
% urlcolor      Color for linked URLs
\ifpdf % We are running PDFLaTeX
\usepackage[backref,breaklinks,colorlinks,citecolor=blue,linkcolor=blue,urlcolor=blue,hyperindex,plainpages=false,pdftex]{hyperref} % Hyper-references
\pdfcompresslevel=9
\else % We are not running PDFLaTeX
\usepackage[backref=false,breaklinks,colorlinks=false,hyperindex,plainpages=false]{hyperref} % Hyper-references
\fi % endif PDFLaTeX

% Personal packages
\usepackage{csz} % Library of personal definitions
\usepackage{abc} % Alphabet as three letter macros
\usepackage{dmn} % Dimensional units
\usepackage{chm} % Commands generic to chemistry
\usepackage{dyn} % Commands generic to fluid dynamics
\usepackage{aer} % Commands specific to aerosol physics
\usepackage{psd} % Particle size distributions
\usepackage{rt} % Commands specific to radiative transfer
\usepackage{jrn_agu} % AGU-sanctioned journal title abbreviations

% Commands which must be executed in preamble
\makeglossary % Glossary described on KoD95 p. 221
\makeindex % Index described on KoD95 p. 220

% Commands specific to this file
\makeatletter
\renewcommand{\fnum@table}{\textbf{\tablename~\thetable}} % Boldface ``Table: #''
\renewcommand{\fnum@figure}{\textbf{\figurename~\thefigure}} % Boldface ``Figure: #''
\makeatother

% 1. Primary commands
\providecommand{\varidx}{\ensuremath{n}}\renewcommand{\varidx}{\ensuremath{n}} % [idx] Variable index
\providecommand{\flidx}{\ensuremath{m}}\renewcommand{\flidx}{\ensuremath{m}} % [idx] File index
\newcommand{\calit}{Cal-(IT)$^{2}$} % [sng] Cal-IT2
\newcommand{\var}{\ensuremath{v}} % [frc] Generic variable
\newcommand{\bndwdt}{\ensuremath{b}} % [bit s-1] Bandwidth
\newcommand{\varnbr}{\ensuremath{V}} % [nbr] Number of variables
\newcommand{\fl}{\ensuremath{\mathbf{F}}} % [frc] Generic file
\newcommand{\flpnbr}{\ensuremath{F}} % [nbr] Number of floating point operations
\newcommand{\ntgnbr}{\ensuremath{I}} % [nbr] Number of integer arithmetic operations
\newcommand{\mmrnbr}{\ensuremath{M}} % [nbr] Number of memory operations
\newcommand{\cmpspd}{\ensuremath{v}} % [s-1] Computer speed
\newcommand{\flnbr}{\ensuremath{M}} % [nbr] Number of files
\newcommand{\cpu}{\ensuremath{\mathbf{C}}} % [frc] Generic CPU
\newcommand{\cpuidx}{\ensuremath{l}} % [idx] CPU index
\newcommand{\cpunbr}{\ensuremath{L}} % [nbr] Number of CPUs per node
\newcommand{\ndl}{\ensuremath{\mathbf{Q}}} % [frc] Generic node
\newcommand{\wrdsz}{\ensuremath{W}} % [b] Word size
\newcommand{\ndlidx}{\ensuremath{h}} % [idx] Node index
\newcommand{\ndlnbr}{\ensuremath{H}} % [nbr] Number of nodes
\newcommand{\dmnsz}{\ensuremath{D}} % [nbr] Dimension size
\newcommand{\dmnavgsz}{\ensuremath{A}} % [nbr] Size of averaging dimension
\newcommand{\brdsbs}{\ensuremath{B}} % [sbs] Broadcast subscript
\newcommand{\rnksbs}{\ensuremath{R}} % [sbs] Rank subscript
\newcommand{\usrsbs}{\ensuremath{u}} % [sbs] User subscript
\newcommand{\syssbs}{\ensuremath{s}} % [sbs] System subscript
\newcommand{\dmnnbr}{\ensuremath{R}} % [nbr] Rank (number of dimensions)
\newcommand{\avgsbs}{\ensuremath{A}} % [sbs] Average subscript
\newcommand{\outsbs}{\ensuremath{O}} % [sbs] Output subscript
\newcommand{\CL}{CL} % [sng] Client Library
\newcommand{\ALB}{ALB} % [sng] Analytically Load-Balanced
\newcommand{\CGLB}{CGLB} % [sng] Computational Geometry Load-Balanced
\newcommand{\DRA}{DR\&A} % [sng] Data Reduction and Analysis
\newcommand{\LDRA}{LDR\&A} % [sng] Local Data Reduction and Analysis
\newcommand{\DDRA}{DDR\&A} % [sng] Distributed Data Reduction and Analysis
\newcommand{\SSDRA}{SSDR\&A} % [sng] Server-side Data Reduction and Analysis
\newcommand{\IDRAV}{IDRAV} % [sng] Integrated Data Reduction, Analysis, and Visualization
\newcommand{\CST}{CST} % [sng] Client-Server Traffic
\newcommand{\PTV}{PTV} % [sng] Per-Thread-Variables
\newcommand{\CSTP}{CSTP} % [sng] Client-Server Traffic Protocol

\newcommand{\idx}{\ensuremath{i}} % [idx] Index
\newcommand{\lmnidx}{\ensuremath{i}} % [idx] Element index
\newcommand{\outnbr}{\ensuremath{J}} % [nbr] Number of elements in output hyperslab
\newcommand{\lmnnbr}{\ensuremath{N}} % [nbr] Number of elements in input hyperslab
\newcommand{\tllnbr}{\ensuremath{M}} % [nbr] Tally (number of valid elements in input hyperslab)
\newcommand{\mskflg}{\ensuremath{m}} % [flg] Mask flag
\newcommand{\mssflg}{\ensuremath{\mu}} % [flg] Missing value flag
\newcommand{\wgt}{\ensuremath{w}} % [frc] Weight
\newcommand{\wgtsbs}{\ensuremath{W}} % [frc] Weight subscript
\newcommand{\lclsbs}{\ensuremath{l}} % [sbs] Local subscript
\newcommand{\ntwsbs}{\ensuremath{n}} % [sbs] Network subscript
\newcommand{\clnsbs}{\ensuremath{c}} % [sbs] Client subscript
\newcommand{\srvsbs}{\ensuremath{s}} % [sbs] Server subscript
\newcommand{\oprtm}{\ensuremath{T}} % [s] Operation time
\newcommand{\rqssrv}{\ensuremath{L}} % [nbr] Server transaction requests
\newcommand{\ltnsrv}{\ensuremath{l}} % [s] Server transaction latency
\newcommand{\wrttm}{\ensuremath{W}} % [s] Write time
\newcommand{\iotm}{\ensuremath{\mathrm{IO}}} % [s] I/O time
\newcommand{\rdtm}{\ensuremath{R}} % [s] Read time
\newcommand{\shp}{\ensuremath{\mathbf{S}}} % [vct] Shape vector

% 2. Derived commands
\newcommand{\avgnbr}{\ensuremath{\dmnnbr_{\avgsbs}}} % [nbr] Number of dimensions averaged over
\newcommand{\lmnnbravg}{\ensuremath{\lmnnbr_{\avgsbs}}} % [nbr] Number of elements in averaging block
\newcommand{\lmnnbrout}{\ensuremath{\lmnnbr_{\outsbs}}} % [nbr] Number of elements in output variable
\newcommand{\oprtmlcl}{\ensuremath{\oprtm_{\lclsbs}}} % [s] Operation time, local DRA
\newcommand{\oprtmcln}{\ensuremath{\oprtm_{\clnsbs}}} % [s] Operation time, client-side DRA
\newcommand{\oprtmsrv}{\ensuremath{\oprtm_{\srvsbs}}} % [s] Operation time, server-side DRA
\newcommand{\flpspd}{\ensuremath{\cmpspd_{\flpnbr}}} % [nbr s-1] Floating point operation speed
\newcommand{\mmrspd}{\ensuremath{\cmpspd_{\mmrnbr}}} % [nbr s-1] Memory operation speed
\newcommand{\ntgspd}{\ensuremath{\cmpspd_{\ntgnbr}}} % [nbr s-1] Integer operation speed
\newcommand{\mmrusrnbr}{\ensuremath{\mmrnbr_{\usrsbs}}} % [nbr] Number of user memory operations
\newcommand{\mmrsysnbr}{\ensuremath{\mmrnbr_{\syssbs}}} % [nbr] Number of system memory calls
\newcommand{\dmnnbrone}{\ensuremath{\dmnnbr_{1}}} % [nbr] Rank one
\newcommand{\dmnnbrtwo}{\ensuremath{\dmnnbr_{2}}} % [nbr] Rank two
\newcommand{\shpavg}{\ensuremath{\shp_{\avgsbs}}} % [vct] Shape vector one
\newcommand{\shpone}{\ensuremath{\shp_{1}}} % [vct] Shape vector one
\newcommand{\shptwo}{\ensuremath{\shp_{2}}} % [vct] Shape vector two
\newcommand{\dmnavgszone}{\ensuremath{\dmnavgsz_{1}}} % [nbr] Dimension size, first averaged dimension 
\newcommand{\dmnavgsztwo}{\ensuremath{\dmnavgsz_{2}}} % [nbr] Dimension size, second averaged dimension 
\newcommand{\dmnavgszidx}{\ensuremath{\dmnavgsz_{\idx}}} % [nbr] Dimension size, ith averaged dimension 
\newcommand{\dmnavgszavgnbr}{\ensuremath{\dmnavgsz_{\avgnbr}}} % [nbr] Dimension size, Ath dimension
\newcommand{\dmnszone}{\ensuremath{\dmnsz_{1}}} % [nbr] Dimension size, first dimension
\newcommand{\dmnsztwo}{\ensuremath{\dmnsz_{2}}} % [nbr] Dimension size, second dimension
\newcommand{\dmnszidx}{\ensuremath{\dmnsz_{\idx}}} % [nbr] Dimension size, ith dimension
\newcommand{\dmnszdmnnbr}{\ensuremath{\dmnsz_{\dmnnbr}}} % [nbr] Dimension size, Rth dimension
\newcommand{\dmnszavg}{\ensuremath{\bar{\dmnsz}}} % [nbr] Mean dimension size
\newcommand{\rnk}{\ensuremath{\dmnnbr}} % [sbs] Rank (number of dimensions)
\newcommand{\rnkbrd}{\ensuremath{\rnk_{\brdsbs}}} % [sbs] Rank of variable to broadcast
\newcommand{\rnkwgt}{\ensuremath{\rnk_{\wgtsbs}}} % [sbs] Rank of weight
\newcommand{\rnkntl}{\ensuremath{\rnksbs_{\mathrm{i}}}} % [sbs] Rank subscript, initial
\newcommand{\rnkfnl}{\ensuremath{\rnksbs_{\mathrm{f}}}} % [sbs] Rank subscript, final

\newcommand{\rnkZvar}{\ensuremath{{}^{0}\var}} % [frc] Generic variable, rank zero
\newcommand{\rnkUvar}{\ensuremath{{}^{1}\var}} % [frc] Generic variable, rank one
\newcommand{\rnkSvar}{\ensuremath{{}^{2}\var}} % [frc] Generic variable, rank two
\newcommand{\rnkCvar}{\ensuremath{{}^{3}\var}} % [frc] Generic variable, rank three
\newcommand{\rnkQvar}{\ensuremath{{}^{4}\var}} % [frc] Generic variable, rank four
\newcommand{\rnkFvar}{\ensuremath{{}^{5}\var}} % [frc] Generic variable, rank five
\newcommand{\rnkRvar}{\ensuremath{{}^{\rnksbs}\var}} % [frc] Generic variable, rank R
\newcommand{\rnkRmonevar}{\ensuremath{{}^{\rnksbs-1}\var}} % [frc] Generic variable, rank R-1

\newcommand{\rnkZtpt}{\ensuremath{{}^{0}\tpt}} % [K] Temperature, rank zero
\newcommand{\rnkUtpt}{\ensuremath{{}^{1}\tpt}} % [K] Temperature, rank one
\newcommand{\rnkStpt}{\ensuremath{{}^{2}\tpt}} % [K] Temperature, rank two
\newcommand{\rnkCtpt}{\ensuremath{{}^{3}\tpt}} % [K] Temperature, rank three
\newcommand{\rnkQtpt}{\ensuremath{{}^{4}\tpt}} % [K] Temperature, rank four
\newcommand{\rnkFtpt}{\ensuremath{{}^{5}\tpt}} % [K] Temperature, rank five
\newcommand{\rnkRtpt}{\ensuremath{{}^{\rnksbs}\tpt}} % [K] Temperature, rank R

\newcommand{\rnkZtptsfc}{\ensuremath{{}^{0}\tptsfc}} % [K] Temperature, rank zero
\newcommand{\rnkUtptsfc}{\ensuremath{{}^{1}\tptsfc}} % [K] Temperature, rank one
\newcommand{\rnkStptsfc}{\ensuremath{{}^{2}\tptsfc}} % [K] Temperature, rank two
\newcommand{\rnkCtptsfc}{\ensuremath{{}^{3}\tptsfc}} % [K] Temperature, rank three
\newcommand{\rnkQtptsfc}{\ensuremath{{}^{4}\tptsfc}} % [K] Temperature, rank four
\newcommand{\rnkFtptsfc}{\ensuremath{{}^{5}\tptsfc}} % [K] Temperature, rank five
\newcommand{\rnkRtptsfc}{\ensuremath{{}^{\rnksbs}\tptsfc}} % [K] Temperature, rank R

\newcommand{\varone}{\ensuremath{\var_{1}}} % [frc] Generic variable, first
\newcommand{\vartwo}{\ensuremath{\var_{2}}} % [frc] Generic variable, second
\newcommand{\varnnn}{\ensuremath{\var_{\varidx}}} % [frc] Generic variable, n'th
\newcommand{\varNNN}{\ensuremath{\var_{\varnbr}}} % [frc] Generic variable, N'th
\newcommand{\flone}{\ensuremath{\fl_{1}}} % [frc] Generic file, first
\newcommand{\fltwo}{\ensuremath{\fl_{2}}} % [frc] Generic file, second
\newcommand{\flmmm}{\ensuremath{\fl_{\flidx}}} % [frc] Generic file, m'th
\newcommand{\flMMM}{\ensuremath{\fl_{\flnbr}}} % [frc] Generic file, M'th
\newcommand{\cpuone}{\ensuremath{\cpu_{1}}} % [frc] Generic CPU, first
\newcommand{\cputwo}{\ensuremath{\cpu_{2}}} % [frc] Generic CPU, second
\newcommand{\cpulll}{\ensuremath{\cpu_{\cpuidx}}} % [frc] Generic CPU, l'th
\newcommand{\cpuLLL}{\ensuremath{\cpu_{\cpunbr}}} % [frc] Generic CPU, L'th
\newcommand{\ndlone}{\ensuremath{\ndl_{1}}} % [frc] Generic node, first
\newcommand{\ndltwo}{\ensuremath{\ndl_{2}}} % [frc] Generic node, second
\newcommand{\ndlhhh}{\ensuremath{\ndl_{\ndlidx}}} % [frc] Generic node, h'th
\newcommand{\ndlHHH}{\ensuremath{\ndl_{\ndlnbr}}} % [frc] Generic node, H'th

% 3. Doubly-derived commands
\newcommand{\tptsfcdlt}{\ensuremath{\Delta\tptsfc}} % [K] Surface temperature change
\newcommand{\dmnszavgnbr}{\ensuremath{\dmnsz_{\avgnbr}}} % [nbr] Dimension size, Ath dimension
\newcommand{\mmrusrspd}{\ensuremath{\cmpspd_{\mmrusrnbr}}} % [nbr s-1] Memory operation speed, user
\newcommand{\mmrsysspd}{\ensuremath{\cmpspd_{\mmrsysnbr}}} % [nbr s-1] Memory operation speed, system
\newcommand{\rnkntlvarnnn}{\ensuremath{{}^{\rnkntl}\var_{\varidx}}} % [frc] Generic variable, n'th, initial rank
\newcommand{\rnkfnlvarnnn}{\ensuremath{{}^{\rnkfnl}\var_{\varidx}}} % [frc] Generic variable, n'th, initial rank

\newcommand{\rnkntlvar}{\ensuremath{{}^{\rnkntl}\var}} % [frc] Generic variable, initial rank
\newcommand{\rnkfnlvar}{\ensuremath{{}^{\rnkfnl}\var}} % [frc] Generic variable, final rank

\newcommand{\tptsfcbarngl}{\ensuremath{\langle\bar{\tptsfc}\rangle}} % [K] Temperature at surface, time mean, area mean
\newcommand{\tptsfcnglbar}{\ensuremath{\bar{\langle\tptsfc\rangle}}} % [K] Temperature at surface, area mean, time mean
\newcommand{\tptbarngl}{\ensuremath{\langle\bar{\tpt}\rangle}} % [K] Temperature, time mean, area mean
\newcommand{\tptnglbar}{\ensuremath{\bar{\langle\tpt\rangle}}} % [K] Temperature, area mean, time mean

% Margins
\oddsidemargin = 0.0in
\evensidemargin = 0.0in
\topmargin = -0.4in
\textheight = 9in
\textwidth = 6.5in
\columnsep = 0.25in
\headheight = 12pt
\headsep = 12pt
\footskip = 24pt

% Vertical space 
% 20000717: None of these commands seem to have any effect here
\setlength{\textfloatsep}{12pt} % Space between last top float or first bottom float and the text. 
\setlength{\intextsep}{12pt} % Space left on top and bottom of an in-text float. 
% \dbltextfloatsep is \textfloatsep for 2 column output. 
% \dblfloatsep is \floatsep for 2 column output. 
\setlength{\abovecaptionskip}{12pt} % Space above caption 
\setlength{\belowcaptionskip}{12pt} % Space below caption 

% Float placement
% NB: Placement of figures is very sensitive to \textfraction
\renewcommand\textfraction{0.0} % Minimum fraction of page that is text
\setcounter{totalnumber}{73} % Maximum number of floats per page
\setcounter{topnumber}{73} % Maximum number of floats at top of page
\setcounter{dbltopnumber}{73} % Maximum number of floats at top of two-column page
\setcounter{bottomnumber}{73} % Maximum number of floats at bottom of page
\renewcommand\topfraction{1.0} % Maximum fraction of top of page occupied by floats
\renewcommand\dbltopfraction{1.0} % Maximum fraction of top of two-column page occupied by floats
\renewcommand\bottomfraction{1.0} % Maximum fraction of bottom of page occupied by floats
\renewcommand\floatpagefraction{1.0} % Fraction of float page filled with floats
\renewcommand\dblfloatpagefraction{1.0} % Fraction of double column float page filled with floats

\begin{document}

% Science and Engineering Informatics (SEI)
\def\prpttl{SEI(GEO): Scientific Data Operators Optimized for Distributed Interactive and Batch Analysis of Tera-Scale Geophysical Data\\}
% Cover page
\pagenumbering{roman}
\setcounter{page}{1}
\pagestyle{headings}
\thispagestyle{empty}

{\noindent%
On the Web at \url{http://dust.ess.uci.edu/prp/prp_sei/prp_sei.pdf}\\
NSF Science and Engineering Informatics (SEI) Proposal \hfill Submitted: March~4, 2004\\
Last modified: \today, \xxivtime \hfill Next Round Due: December~15, 2005}
\begin{center}
\textbf{\Large\prpttl}
\bigskip
Dr. Charles S. Zender \hfill Dr. Phil Papadopoulos \\
Department of Earth System Science \hfill San Diego Supercomputer Center \\
University of California at Irvine \hfill University of California at San Diego \\
\end{center}
\vskip 0.5 cm

\noindent\textbf{News/Preface:} NSF funded the first NCO/SDO proposal.
The next NCO/SDO proposal will tackle different issues, particularly 
native HDF support and possibly bioinformatics and multi-core
processor extensions. 
This preface sketches out the second NCO proposal, and is followed
by the fifteen-page body of the first proposal. 
Once we are ready to branch off and work on the second NCO proposal, 
probably around November, 2004, I will place a link to it here.
So, just to be clear, this introductory information is an evolving
document as we focus in on the second proposal.
The next fifteen pages is the body of the first proposal and is fixed.
Things got into this hybrid state because I expected to revise and
re-submit the first proposal.
I began the revisions before learning that the proposal was funded.

The second proposal will leverage the infrastructure and research
provided by the first grant, and will develop whole new applications
and/or enhancements for NCO/SDO.
HDF, bioinformatics, and multi-core processor support are possibile
directions.
The eventual focus needs a champion to Co-PI the next proposal.
If you are interested in being Co-PI on the next proposal in one of
these areas, or in being PI of the whole proposal, let me know. 
(I will not PI another proposal to NSF SEIII until the current grant
is closer to expiring (say, 2006), but am happy to play a role in 
others' SEIII proposals.)

\noindent\textbf{Information for potential collaborators:}
This is an NSF proposal to improve Distributed Data Reduction
\&~Analysis (\trmidx{\DDRA}) with NCO.  
The two main components of this proposal are NCO parallelism (OpenMP, 
MPI) and Server-Side \DDRA\ (\trmidx{\SSDRA}) implemented through
extensions to DODS/OPeNDAP and to \trmidx{netCDF4}.
This project would dramatically reduce bandwidth usage for NCO \DDRA.
With this first NCO proposal out the door, the content of the next two
NCO proposals is clear.

The second NCO proposal will be re-written and improved from the first.
The certain changes include 
(1)~a more convincing description of why CCSM IPCC data reduction
exemplifies a computational, storage, and network-constrained problem 
common to many domains besides geophysics,   
(2)~articulating the numerous required \SSDRA\ extensions to OPeNDAP and
to netCDF4, and 
(3)~deeper involvement (and funding) for distributed computing
experts (e.g., UCSD/\trmidx{OptIPuter}) on the parallelization and
\SSDRA\ issues. 
Unfortunately, the \trmidx{NSF} \trmidx{SEIII} program has relatively
limited funding. 
The average SEI three-year award size is \$500,000.
Our budget is already $\sim\$600,000$ and I want to grow the \SSDRA\ 
component. 
If we are not funded this round, we will include these changes and 
re-submit to the next NSF SEIII round, due 20041215. 

I anticipate sending a distinct (third?) NCO proposal to NASA.
Its narrow technical focus will be NCO/SDO for native HDF speakers.
I will ask Tom DeFanti and Mike Folk to clarify the issue of whether
HDF needs to be modified to be OptIPuter-aware.
The two likely NASA programs which might support SDO are ESTO
and REASON CANN.

Other significant new directions for NCO/SDO might merit full new
proposals to NSF or to other agencies (e.g., DOE, NASA, NIH). 
Significant extensions under consideration include a bioinformatics
component (led by Harry Mangalam, TACGI?), arithmetic speed-ups on 
multi-processor single-core nodes (led by Steve Jenks, UCI?), or
really going hog-wild with the \SSDRA\ idea (anyone?).

Please contact me if you wish to be involved with any future proposals.  
Comments on the proposals and letters of support are very welcome.

\begin{enumerate*}
\item Senior Personnel Inquiries for Intellectual Collaboration
\begin{enumerate*}
\item Peter Cornillon (URI) OPeNDAP netCDF CL extensions (declined until next round, provided LOS)
\item Steve Jenks (UCI) multi-processor core performance improvements (no response)
\item Harry Mangalam (TACGI) bio-informatics (declined until next round, provided text)
\item Phil Papadopoulos (SDSC, Co-PI) parallelization?
\item Russ Rew (UCAR Unidata) netCDF4, API (provided LOS)
\item Takemura Sato (Japan ESC) DDRA demonstrations (will serve 1~TB)
\item Larry Smarr (Cal-IT2) OptIPuter (provided LOS)
\item Padhraic Smyth (UCI ICS) data mining (declined)
\end{enumerate*}
\item Senior Personnel for Equipment Collaboration
\begin{enumerate*}
\item Aaron Chin (Cal-IT2, Senior Personnel) OptIPuter server support
\end{enumerate*}
\item Other Letters of Collaboration/Support to consider
\begin{enumerate*}
\item Rajiv Bendale contact (IBM) bio-informatics
\item John Caron (UCAR Unidata) NcML aggregation, lazy processing
\item Brian Eaton (NCAR) CF conformance
\item Ian Foster (Argonne) 
\item James Gallagher (URI) OPeNDAP netCDF CL extensions
\item Dan Holloway (URI) OPeNDAP netCDF CL extensions
\item Mark Taylor (Sandia) climate \SSDRA\
\end{enumerate*}
\item Big Budget Items
\begin{enumerate*}
\item One month summer salary per year for Zender
\item One month salary for Aaron Chin (OptIPuter)
\item P/A or Specialist~II to architect/bulletproof/coordinate/release SDO (Butowsky?)
\item Grad Student to optimize computational geometry based on processor topology (ICS?)
\item \$17k for ESMF$\leftrightarrow$OptIPuter networking
\end{enumerate*}
\item Next SEI proposal due December~15, 2004
\end{enumerate*}
\clearpage

%\onecolumn
%\tableofcontents
%\listoftables
%\clearpage
\pagenumbering{arabic}
\setcounter{page}{0}
\thispagestyle{empty}
%\markleft{Mineral Dust Aerosol}
%\markright{}

%\begin{center}
%\textbf{\large\prpttl}
%\end{center}

\noindent{\large{\textbf{Project Summary.}}}\label{sxn:smr}
%\enlargethispage*{0.5in}

Climate simulations for the Fourth Assessment Report of the IPCC will
be performed and stored in netCDF format in multiple national and
international HPC centers.  
HDF datasets from NASA, NOAA, and ESA satellites steadily accumulate
in geographically disparate EOSDIS sites. 
These data are only as valuable as they are accessible to the working
geophysicist at his or her desk.
Concurrent interactive and scripted analysis of geographically
distributed large scale scientific datasets is functionality critical
to managing and interpreting the many streams of geophysical data.

We propose to develop a suite of Scientific Data Operators (SDO) for
interactive and scripted manipulation of (locally and) widely
distributed repositories of netCDF- and HDF5-formatted geophysical
data.  
SDO's functionality will suit distributed, network-transparent,
analysis of ensemble tera-scale datasets stored at many remote
locations. 
SDO will meet the current and future needs of geophysicists and,
potentially, bio-informaticists.

SDO integrates four existing, proven, Open Source software technologies: 
(1)~netCDF---the de~facto standard format for climate model data and 
Earth-bound geophysical observational datasets. 
(2)~netCDF Operators (NCO)---the de~facto standard for analysis of
climate model and geophysical data. 
(3)~DODS/OPeNDAP---the de~facto standard for network-transparent
client/server access to geophysical data. 
(4)~HDF-EOS---the official storage standard for NASA EOS satellite
data. 
We have taken advantage of existing synergies and convergence among 
these standards, and guidance from their initiators, to plan the 
extension of the existing NCO toolkit into the next generation SDO
toolkit. 
SDO will (like NCO) be an indispensable software assistant to
individual researchers and research centers for distributed 
processing of climate and satellite data. 

This SDO project has three main parts:
(1)~Implement shared memory (OpenMP) and Message Passing Interface 
(MPI) parallelism to speed up and reduce latency of local and
distributed Tera-scale data processing.
(2)~Design and implement automatic computational geometry
load-balancing algorithms to exploit the intrinsic parallelism of 
frequently used arithmetic operations without user intervention.   
(3)~Add, enhance, and simplify functionality for server-side
processing of distributed netCDF data with the OPeNDAP protocol.
%(3)~Implement an HDF-EOS back-end to NCO/SDO for processing of
%satellite data with existing NCO functionality.   

\textbf{Scientific Merit:}
The proof-of-concept application, distributed analysis of NCAR CCSM
IPCC assessment simulations within and across national boundaries,
may potentially yield otherwise undiscovered patterns among different
SRES scenarios for the same model, and among different climate models.
New data reduction efficiencies will be gained by automatic
configuration of computational geometry to optimize for the data
servers' processor topology.
The tera-scale climate datasets targeted for analysis will reveal any
critical bandwidth, I/O, and client/server bottlenecks in distributed
geophysical data processing.

\textbf{Broader Impacts:}
Bringing distributed data analysis power out from the realm of the
computational scientist to the desktop of the practicing geophysicist 
will leverage existing distributed capabilities by making their use
transparent to the average scientific user.
Improving tools to analyze and inter-compare geophysical model and
observational data that reside in various national HPC centers
increases the simulations' scientific value and decreases time to
discovery and publication.  
SDO helps address the problem of growing bioinformatics data sets,
especially gene expression data, in ways similar to the geophysics
domain. 
It provides a mechanism for efficient storage and manipulation of the  
numeric data separate from the contextual or meta-data which is
normally stored in XML.   
\clearpage
\begin{center}
\textbf{\Large \prpttl}
\end{center}
\thispagestyle{empty}

\section{Introduction}\label{sxn:ntr}

Climate simulations for the Fourth Assessment Report of the IPCC will
be performed and stored in netCDF format in multiple national and
international HPC centers.  
HDF datasets from NASA, NOAA, and ESA satellites steadily accumulate
in geographically disparate EOSDIS sites. 
These data are only as valuable as they are accessible to the working
geophysicist at his or her desk.
Concurrent interactive and scripted analysis of geographically
distributed large scale scientific datasets is functionality critical
to managing and interpreting the many streams of geophysical data.

We propose to develop a suite of Scientific Data Operators (SDO) for
interactive and scripted manipulation of (locally and) widely
distributed repositories of netCDF- and HDF5-formatted geophysical
data.  
SDO's functionality will suit distributed, network-transparent,
analysis of ensemble tera-scale datasets stored at many remote
locations. 
SDO will meet the current and future needs of geophysicists and,
potentially, bio-informaticists.

SDO integrates four existing, proven, Open Source software technologies: 
(1)~netCDF---the de~facto standard format for climate model data and 
Earth-bound geophysical observational datasets. 
(2)~netCDF Operators (NCO)---the de~facto standard for analysis of
climate model and geophysical data. 
(3)~DODS/OPeNDAP---the de~facto standard for network-transparent
client/server access to geophysical data. 
(4)~HDF-EOS---the official storage standard for NASA EOS satellite
data. 
We have taken advantage of existing synergies and convergence among 
these standards, and guidance from their initiators, to plan the 
extension of the existing NCO toolkit into the next generation SDO
toolkit. 
SDO will (like NCO) be an indispensable software assistant to
individual researchers and research centers for distributed 
processing of climate and satellite data. 

This SDO project has three main parts:
(1)~Implement shared memory (OpenMP) and Message Passing Interface 
(MPI) parallelism to speed up and reduce latency of local and
distributed Tera-scale data processing.
(2)~Design and implement automatic computational geometry
load-balancing algorithms to exploit the intrinsic parallelism of 
frequently used arithmetic operations without user intervention.   
(3)~Add, enhance, and simplify functionality for server-side
processing of distributed netCDF data with the OPeNDAP protocol.

The proof-of-concept application, distributed analysis of NCAR CCSM
IPCC assessment simulations within and across national boundaries,
may potentially yield otherwise undiscovered patterns among different
SRES scenarios for the same model, and among different climate models.
New data reduction efficiencies will be gained by automatic
configuration of computational geometry to optimize for the data
servers' processor topology.
The tera-scale climate datasets targeted for analysis will reveal any
critical bandwidth, I/O, and client/server bottlenecks in distributed
geophysical data processing.

\subsection{Organization}\label{sxn:org}
This proposal is organized as follows.
Section~\ref{sxn:bgr} describes the purpose, capabilities, and
functional relationships between netCDF, HDF, NCO, and OPeNDAP.
Section~\ref{sxn:prr} describes the results of our relevant, prior
NSF-funded research.
Section~\ref{sxn:dra} describes the current barriers facing
geo-scientists who require Distributed Data Reduction \& Analysis
(\DDRA) capabilities. 
Section~\ref{sxn:mth} details the specific objectives of the proposal 
and details our methods for achieving them.
We then describe then prototype experiment for this project,
\DDRA\ of climate simulations datasets.
Section~\ref{sxn:swg} describes our software engineering plans.
Section~\ref{sxn:crd} presents our project coordination plan,
PI responsibilities, time-line, milestones, and software engineering
methods.  
Section~\ref{sxn:mpc} concludes with a presentation of the broader
impacts and synergies of our project.
Three letters of support/collaboration and a list of acronyms and
abbreviations are included as supplementary documents.

\section{Background}\label{sxn:bgr}

The increasing size, number, and complexity of scientific data in the
past decades has led to the development and use of self-describing
data formats (SDDFs) and tools to manipulate these formats.
The SDDFs replace less functional formats such as raw-binary or
text-formatted data. 

\subsection{HDF}\label{sxn:HDF}
Two SDDFs currently dominate data archival in the geo-sciences.
The first is the \href{http://hdf.ncsa.uiuc.edu}{Hierarchical Data
Format} (\trmidx{HDF}) \cite[][]{HDF}. 
HDF was developed at the 
\trmidx{National Center for Supercomputing Applications}
(\trmidx{NCSA}) and adopted by NASA for \trmidx{Earth Science
  Enterprise} (\trmidx{ESE}) applications.  
HDF is the most commonly used archival format for ESE satellite data.  
This proposal does not involve any work directly with HDF and we
mention HDF mainly due to its importance in observational geophysics. 
Although we would like one day to implement a native HDF-EOS
back-end to NCO/SDO, that task would be extremely difficult and is
beyond the scope of this proposal.

This proposal will, however, exploit and benefit HDF data indirectly
thanks to a complementary proposal described in Section~\ref{sxn:mpc}.
Briefly, a fully-funded effort to layer the netCDF API on top of HDF5
is underway. 
Since NCO/SDO is completely netCDF-conformant, all NCO/SDO operations
will soon work on any HDF file written with netCDF4.

\subsection{netCDF}\label{sxn:netCDF}
The second popular SDDF is the
\href{http://www.unidata.ucar.edu/packages/netcdf}{Network Common Data Format} 
(netCDF), developed by Unidata at the National Center for Atmospheric
Research (NCAR) \cite[][]{netCDF}.
netCDF has become the most commonly used archival format for
large scale geophysical models, such as climate models.
netCDF is less-powerful than HDF because is lacks features such
as data compression, irregular grids, threading, and parallel I/O.
However, netCDF is much simpler to program than HDF, and, as a result
is widely used in the geophysical and climate modeling community by
practicing scientists.

\subsection{NCO: netCDF Operators}\label{sxn:nco}
Tools to manipulate and view netCDF files are relatively easy to
write since the API is much simpler than, say, HDF.
The netCDF Operators (NCO) \cite[]{NCO} is probably the best-known
toolkit for numeric and metadata analysis and manipulation of netCDF
data. 

Traditional processing of scientific data works with an intra-file paradigm.
Users open a file, read a variable from the file, and manipulate it.
The intra-file paradigm works well in cases where all the pertinent
data are stored in one or a few files.
In some disciplines, however, data storage requirements dictate
that relevant data be spread over multiple files.
Satellite-derived information, for example, may be stored in
a file-per-day or file-per-orbit format.
Data produced by geophysical time-stepping models is output
every timestep or averaged over many timesteps.
Climate models, for example, archive data once-per simulated day or
month, and simulate years or even centuries producing hundreds or
thousands of large files in a single simulation.
In such applications, the inter-file paradigm becomes unwieldy
and the optimal tool for data reduction must support an inter-file
paradigm.

We developed some guidelines based on our extensive experience with
geophysical and climate data and implemented them in NCO.
NCO assumes that processing large numbers of geophysical data-files is
most efficient and intuitive when:
\begin{enumerate*}
\item Files are the fundamental unit of data.
NCO makes it easy to add, subtract, and manipulate entire files.
\item Files to be processed in a single step are homogeneous.
NCO assumes the structure of each file (i.e., the fields present and
there dimensions) are identical to the structure of the first file in
the sequence.
The two exceptions NCO allows are that the record variable (i.e., time
dimension) length, and, in some cases the number of variables present,
may change from file to file.
\item Distinctions between \trmdfn{dimensions}, \trmdfn{coordinates}, 
and \trmdfn{variables} are maintained. 
\item Operators have defaults that make sense and may be over-ridden
  with a simple, mnemonic command line switch.
\item Operators must provide an \textbf{audit trail that tracks data
    provenance} 
\item Operators must be as generic as possible, imposing no
  limitations on data dimensionality, size, or type.
\item Conformance to metadata conventions is paramount
\end{enumerate*}

Apparently NCO's guiding philosophy, ``do what a sane user would
want'' has succeeded!
NCO runs on all modern operating systems, and its use is fully
detailed in the \href{http://nco.sf.net/nco.pdf}{NCO User's Guide}.
To my knowledge, all established national and international 
climate modeling centers, including NASA, NOAA, NSF, and DOE centers
install and maintain NCO for their system users.
See, for example, NCO usage at
\href{http://arm.mrcsb.com}{DOE ARM},
\href{http://www.dkrz.de/dkrz/services/software/Software_SX_cross#nco}{DKRZ},
\href{http://climserv.lmd.polytechnique.fr/soft/utilitaires/nco/nco_news.shtml}{LMD},
\href{http://tao.atmos.washington.edu/data_sets/nco}{JISAO}, 
\href{http://www.ccsm.ucar.edu/experiments}{NCAR}, 
\href{http://www.gfdl.gov/~jps/GFDL_VG_WhatsNew.html}{NOAA GFDL},
\href{http://www.cdc.noaa.gov/PublicData/tools.html}{NOAA CDC}, and
\href{http://prism.enes.org/WPs/WP4a/Questionnaires/quest_results.html}{PRISM}.
In other words, NCO is widely used as middle-ware at geophysical
institutions for data post-processing, hyper-slabbing and serving.
The improvements and extensions to SDO proposed here will help
weld HPCC repositories into a \textbf{shared-cyberinfrastructure}
that will benefit a substantial scientific community much larger than
the proposers'.

\subsection{DODS/OPeNDAP}\label{sxn:dods}
The 
\href{http://www.unidata.ucar.edu/packages/dods}{Distributed Oceanographic Data Server} (DODS)
data server protocol provides useful replacements for common data
interface libraries like netCDF.  
The DODS versions of these libraries implement network
transparent access to data via a client-server data access protocol
that uses the HTTP protocol for communication.
Although DODS-technology originated with oceanography data,
it applies to virtually all scientific data.
In recognition of this, the data access protocol underlying
DODS (which is what NCO/SDO cares about) has been renamed the
\href{http://opendap.org}{Open-source Project for a Network Data Access Protocol} (OPeNDAP)
For the purposes of this proposal, DODS and OPeNDAP are used
interchangeably, usually in the hyphenated form.
Essentially DODS is being deprecated in favor of OPeNDAP, another
acronym for the same technology.
The \href{http://nco.sf.net/nco.html#DODS}{NCO User's Guide} and this
\href{http://opendap.org/presentations/peterspresentationagufall03/presentationdriver.html}{OPeNDAP Presentation} provide more details.

Any binary netCDF application (like NCO) may be
OPeNDAP-enabled by linking to the OPeNDAP netCDF
Client Library (CL) instead of the default netCDF library.
Once NCO is OPeNDAP-enabled the operators are OPeNDAP
clients.   
All OPeNDAP clients have network transparent access to 
any files controlled by a OPeNDAP server. 

\section{Results from Prior NSF Funding on Related Projects}\label{sxn:prr}
Zender is PI on ATM-0321380 ``Acquisition of an 
\href{http://esmf.ess.uci.edu}{Earth System Modeling Facility} (ESMF)
for Coupled Climate, Chemistry, and Biogeochemistry Studies''.  
After negotiating the best price supercomputer through an open bid
competition in summer 2003, we awarded IBM the ESMF contract in
October 2003. 
The ESMF opened to early users in early February 2004 with a two day
HPC-programming workshop attended by about 30~ESMF users.
The ESMF is currently undergoing final acceptance testing by UCI and
configuration by IBM prior to being fully devoted to coupled climate 
studies.
The ESMF provides the computational power for Zender's graduate seminar 
\href{http://e3.uci.edu/04w/42740}{ESS~286B: Modeling Land Surface Processes}.
This SEI will use the ESMF as one source of tera-scale climate model data.
This proposal will fund turning the ESMF into an OptIPuter node
so that distributed data reduction of ESMF and SDSC data will test
SDO performance between two geographically disparate nodes connected
by the relatively high-bandwidth OptIPuter network. 

Zender is a Co-PI on ATM-0214430, "Collaborative Proposal: Using
Measurements from the Columbia Plateau Eolian System to Improve
Global-Scale Models of Mineral-Dust Aerosols",
8/1/2002--7/30/2005. This project has resulted so far in four
national meeting presentations with manuscripts in preparation
\cite[]{SBG02,SBR03,SBG03,ZFA03}.
Our manuscript studies the range of uncertainty in LGM dust mass
and radiative budgets to uncertainty in vegetation reconstruction.
We show that a significant fraction of the observed LGM increase in 
Pacific Ocean dust deposition is attributable to vegetation change.
Our paper in press \cite[]{GrZ04} explains how the twin processes of
saltation and sandblasting (SS) relate to loess formation. 
These SS~physics were implemented in DEAD which is used in the NCAR
CCSM (and other) IPCC simulations, the proof-of-concept application 
for this proposal.
Since CCSM generates netCDF datasets, all analysis of CCSM data is 
relevant to this proposal.

Papadopoulos is Co-PI for ANI-0225642 ``The OptIPuter'', formulated to
discover the impacts of ultra-high speed networks enabled by optically
parallel wave-division multiplexing on system architecture, software
architecture, and overall functionality. 
He is the chief OptIPuter systems and network architect for UCSD,
responsible for the design and implementation of the UCSD experimental
apparatus.
The UCSD OptIPuter is designed around a high-speed packet switching
network with a next-generation optical-core Chiaro router as its
centerpiece. 
Six campus laboratories with clustered endpoints connect to the Chiaro
through a private fiber plant in which each site has at least four
parallel fibers connecting to the Chiaro router.  
UCI resources will signal through the Chiaro, allowing access to all
OptIPuter resources including a 48-node, 21\,\TB\ storage test-bed.
Several invited talks and two peer-reviewed papers have been directly
attributed to this project. 

Papadopoulos serves as Senior Personnel for ACI-9619020, ``NPACI: The 
National Partnership for advanced Computational Infrastructure''. 
He leads the design and implementation of the NPACI Rocks clustering 
toolkit used to build hundreds of clusters around the world and
impacting several large-scale NSF programs. 
Rocks is a turnkey solution for rapidly building clustered endpoints. 
Papadopoulos has authored over one half dozen conference and journal
papers and given more than twenty invited talks on Rocks.

Papadopoulos is a Co-PI for ANIR-0123973 ``Designing and Building a
National Middleware Infrastructure: Towards a National GRIDS Center''
and serves as the site PI for SDSC. 
GRIDS produces integrated grid software releases.
At SDSC, Papadopoulos oversees the development and architecture of a
general purpose grid configuration tool. 

\section{Geophysical Domain Challenge for \DDRA}\label{sxn:dra}
Although SDO, netCDF, and OPeNDAP apply to any gridded data, we will
use the example of climate model data storage and reduction to
concretely illustrate our project. 
We choose the field of climate modeling for two reasons.
First, it involves tangible quantities (e.g., air temperature) and
dimensions (latitude, longitude, height, time) which are familiar to
all geophysicists.
Second, our prototype application is data reduction and analysis of
\href{http://ccsm.cgd.ucar.edu}{Community Climate System Model} (CCSM)
\cite[][]{BBB01,CCSM} climate simulations prepared for the Fourth
Assessment Report (AR4) of the Intergovernmental Panel on Climate
Change (IPCC) \cite[][]{IPCC011}. 

The IPCC AR4 is scheduled for release in 2006 or 2007.
Simulations contributing to this report are underway now.
Climate simulations from one model, CCSM, with differing initial
conditions (ICs) and forcing scenarios will be performed and archived
at geographically disparate High Performance Computing Centers
(HPCCs), as described \href{http://www.cgd.ucar.edu/ccr/ipcc}{here}.
These HPCCs include the National Center for Atmospheric Research (NCAR), 
Los Alamos National Laboratory (LANL),
Oak Ridge National Laboratory (ORNL), 
the National Energy Research Scientific Computing Center (NERSC), and 
other national and international centers including the Earth Simulator
Center (ESC) in Japan.
Each of these centers will perform an ensemble of CCSM simulations.
Typically, the different members of the ensemble correspond to Initial
Condition (IC) perturbations needed to help quantify the internal
variability of the model.
The ensembles themselves span the spectrum of 21st century (and
beyond) anthropogenic forcing scenarios agreed to in the IPCC Special
Report on Emissions Scenarios (SRES) \cite[][]{HFN95,IPCC011}. 
Characterizing climate change based on tens--hundreds of \TB\ of
CCSM results stored in \href{http://www.cgd.ucar.edu/ccr/ipcc}{HPCCs}
around the Globe is the \textbf{significant domain challenge} for
geophysical modelers such as PI~Zender. 

The CCSM results of a single SRES ensemble occupy on the order of one
\TB\ of storage. 
Thus each HPCC mentioned above has tera-scale Local \DRA\ (\LDRA)
needs for a single, local, CCSM ensemble.  
Much interesting science will be done on these results at each HPCC
without any Distributed \DRA\ (\DDRA) component.
Characterizing the internal variability, mean climate state, and
transient evolution of both is a challenging \DRA\ problem potentially
leading to new understanding (i.e., scientific discoveries) of the
processes causing the observed changes in the frequency of occurrence
and intensity of El~Ni\~{n}os, and of abrupt climate transitions.  
\LDRA\ of many TB of climate data requires enormous computational and 
storage resources.

The barriers that face researchers who wish to perform \LDRA\ on
climate data are (first) aggregating the climate data from remote
servers to the local host (which may have relatively limited storage),  
and (second) reducing the wall-clock time of the data reduction.
Fortunately, NCO is available and is already 
\href{http://www.cgd.ucar.edu/csm/support/FH/nco.shtml}{widely used}
for \LDRA.
Thus the first barrier can be lowered by promulgating use of
OPeNDAP servers at HPCCs so that all researchers have access to
the climate simulation data.
This occurs organically as users learn more about NCO and OPeNDAP
and request their system administrators to install them. 
Serving climate data via OPeNDAP does not address the network
problem of bandwidth consumption by hundreds of researchers requesting
the same raw datasets be transferred to their local machine.
Thus high-bandwidth consumption, high-latency (assuming raw data are
originally on remote machines), and un-optimized data-reducing clients
(i.e., NCO) together form a \textbf{significant computer science
  problem that is a barrier to achieving the domain challenge} (i.e.,
characterizing climate by \LDRA\ methods).

Inter-comparing and aggregating CCSM results from geographically
disparate HPCC centers requires Distributed Data Reduction and
Analysis (\DDRA) or copying many TB of data en~masse to one 
master storage location for \LDRA.
The latter is a viable option for some important types of analysis
(e.g., statistics of monthly mean results).
However, the disadvantages of copying the distributed raw data to
one center for \LDRA\ are numerous:
\begin{enumerate} 
\item Raw model output data at high time resolution (e.g., daily
  average results for one century) for all the ensemble members would 
  require hundreds of TB at a master HPCC storage location. 
\item Copying distributed data to local storage is \LDRA\ in disguise.
  It offers none of the advantages of true \DDRA\ (where data are
  stored remotely). 
  \LDRA\ reduces the pace of discovery by excluding researchers at
  other locations from performing entrepreneurial \DRA. 
\item University researchers not affiliated with an HPCC have a very
  difficulty time accessing the raw data directly in almost any
  scenario.  
\end{enumerate} 

The scientific objectives of IPCC Working Group One (WG1 is in charge 
of describing the scientific basis of climate change) include
quantifying climate sensitivity to various SRES scenarios.
Establishing the sensitivity of internal and forced CCSM climate
response to the spectrum of SRES forcings requires \DDRA\ because
the results are stored at multiple HPCCs. 
Thus the prototype experiment for this project is \DDRA\ of CCSM
climate data stored at HPCCs with network connectivity to UCI ranging
from low ($\sim 100$\,\Mbxs\ to NCAR) to high ($\sim 10$\,\Gbxs\ to
SDSC). 
CCSM \DDRA\ is an excellent proof-of-concept for SDO because the CCSM
IPCC data are (first) in netCDF format, (second) of interest to 
hundreds of geophysicists (especially PI~Zender!), and (third) 
helping to drive development of new HPC resources and tools.

\section{Methods}\label{sxn:mth}

In addition to the domain-specific scientific gains arising from
characterizing CCSM IPCC simulation output, we have four overarching
software and hardware engineering goals:
\begin{enumerate*}
\item Increase speed of common rank-reduction operations
\item Reduce latency of distributed data analysis
\item Reduce bandwidth requirements of distributed data analysis
\item Do this all in an architecture-neutral, model-neutral, flexible
  and extensible software framework that works across many orders of
  bandwidth of network connectivity
\end{enumerate*}
The following methods are designed to attain our domain challenge by
resolving the significant computer science barriers in our way.

\subsection{Rank Reduction Efficiencies for \DRA}\label{sxn:rr}
We will denote the rank $\rnksbs$ of a variable $\var$ with a
left-superscript, e.g., $\rnkRvar$. 
%We do not distinguish between the concepts of rank and
%dimensionality. 
Sample variables for our prototype experiment with climate model data
are surface temperature $\tptsfc$, surface pressure $\prs$, and
top-of-atmosphere down-welling shortwave radiative flux $\flxdwnsw$. 
A time-series of, say, surface temperature $\tptsfc$ is a rank three
($\rnksbs = 3$) variable because it has two spatial dimensions
(latitude and longitude) and one temporal dimension (time).
The instantaneous value of air temperature $\tpt$ is also rank three
since it has three spatial dimensions (latitude, longitude, and
level).
A timeseries of temperature data is rank four $\tpt(\tm) = \rnkQtpt$.

Rank reduction is often the first step in geophysical \DRA.
For example, a researcher may want to derive the time-mean temperature
field $\rnkCtpt = \tptavg$ (vertical overbar denotes temporal averaging) from the
model-predicted $\rnkQtpt = \tpt$.  
This requires a rank reduction from $\rnkQtpt \rightarrow \rnkCtpt$. 
The time-mean, global-mean surface surface temperature 
$\rnkZtptsfc = \tptsfcbarngl$ (angles denote area-averaging) is the
scalar obtained by performing a reduction of 
$\rnkCtptsfc \rightarrow \rnkZtptsfc$. 
More generally, rank reduction is 
$\rnkntlvar \rightarrow \rnkfnlvar$ where $\rnkntl$ is the initial
variable rank, $\rnkfnl$ is the final variable rank, and, for
geophysics, $[\rnkntl,\rnkfnl] \in [0,1,\ldots,5]$.
Usually $\rnkntl > \rnkfnl$ but temporary rank expansion
(broadcasting) is often required to perform arithmetic efficiently
between variables of different ranks.

In modern climate modeling applications, each rank reduction reduces
the data size by about two orders of magnitude!
For example, CCSM data typically have 100--200 points in the
longitudinal dimension. 
Reducing $\rnkQtpt = \tpt(\tm)$ into zonal-mean temperature 
$\rnkCtpt = [\tpt]$ (brackets denote zonal-averaging) shrinks the 
dataset size by a factor of 100--200.
Rank reduction on a remote server, followed by transfer of reduced
data, versus transfer of raw data, followed by rank reduction on a
local machine, has multiple-order of magnitude implications for the
bandwidth required by \DDRA\ processing.  

\subsubsection{Algorithmic Description of Rank Reduction}\label{sxn:alg}
The algorithms NCO (and, hence, SDO) use for rank-reduction make clear
the intrinsic parallelism of many \LDRA\ operations.
It is this parallelism we will exploit with Shared Memory Parallel
Programming (SMPP) via \href{http://www.openmp.org}{OpenMP} directives.

The masked, weighted average of a variable $\xxx$ can be generally
represented as
\begin{equation} 
\bar{\xxx}_{\jjj} = {\sum_{\idx=1}^{\idx=\lmnnbr} \mssflg_{\idx}
\mskflg_{\idx} \wgt_{\idx} \xxx_{\idx} \over
\sum_{\idx=1}^{\idx=\lmnnbr} \mssflg_{\idx} \mskflg_{\idx} \wgt_{\idx}}  
\label{eqn:avg_dfn}
\end{equation} 
where $\bar{\xxx}_{\jjj}$ is the $\jjj$'th element of the output
hyperslab, $\xxx_{\idx}$ is the $\idx$'th element of the input
hyperslab, $\mssflg_{\idx}$ is~1 unless $\xxx_{\idx}$ equals the missing  
value, $\mskflg_{\idx}$ is~1 unless $\xxx_{\idx}$ is masked, and
$\wgt_{\idx}$ is the weight.  
This formidable looking formula represents a simple weighted average.
NCO uses various permutations and extensions of this formula to
compute related statistics such as masked, weighted sums, extrema
(i.e., minima and maxima), and standard deviations. 

When 
$\mssflg_{\idx} = \mskflg_{\idx} = \wgt_{\idx} = 1$, the 
generic averaging expression above reduces to a simple arithmetic
average.  
Currently, $\mskflg_{\idx} = \wgt_{\idx} = 1$ for all NCO operators
except \href{http://nco.sf.net/nco.html#ncwa}{\texttt{ncwa}}.
These variables are included in the discussion below for completeness
and because this project will add masks to other operators (cf.\
Section~\ref{sxn:rbs}).  

The size $\outnbr$ of the output hyperslab for a given variable is the
product of all the dimensions of the input variable which are not
averaged over. 
The size $\lmnnbr$ of the input hyperslab contributing to each 
$\bar{\xxx}_{\jjj}$ is simply the product of the sizes of all dimensions
which are averaged over.
The input hyperslabs are independent of one another.
Thus $\lmnnbr$ is the number of input elements which \textit{potentially}
contribute to each output element.
For a complete discussion of the conditions under which input elements
contribute to the output hyperslab, see the 
\href{http://nco.sf.net/nco.pdf}{NCO User's Guide} \cite[][p.~37]{NCO}.

\subsubsection{Analytic Load Balancing (\ALB) in \LDRA}\label{sxn:alb}
We now describe the intrinsic parallelism of rank reduction.
First, we note that no input element $\xxx_{\idx}$ contributes to more
than one output element $\bar{\xxx}_{\jjj}$ in (\ref{eqn:avg_dfn}). 
the outermost loop over the output hyperslab $\bar{\xxx}_{\jjj}$
is the least rapidly varying dimension not averaged.
We will investigate the efficacy of OpenMP directives placed around
this loop.
Both the input and output array may be stored as shared data since
each input hyperslab maps to a single output element.

Consider our prototype climate model application where the user
wishes to derive $\tptbarngl(\zzz) = \rnkUtpt$ (the global area-mean,
time-mean vertical temperature profile) from the raw data
$\tpt(\xxx,\yyy,\zzz,\tm) = \rnkQtpt$.
In this case, 
$\lmnnbr = \NNN_{\xxx} \times \NNN_{\yyy} \times \NNN_{\tm}$ where
$\NNN_{\xxx}$, $\NNN_{\yyy}$, and $\NNN_{\tm}$ are the number of
elements in the $\xxx$, $\yyy$, and $\tm$ dimensions respectively.
For a typical NCAR CCSM IPCC simulation one hundred years in length
stored at daily temporal resolution and T85$\times$L40 resolution,
($\NNN_{\xxx}$, $\NNN_{\yyy}$, $\NNN_{\tm}$) =
($256$, $128$, $36500$) and 
$\lmnnbr = 1196032000\ (\sim 1.2 \times 10^{9})$.
Thus each of the forty points in the output variable distills an
average of about one-billion input points.
Given that (\ref{eqn:avg_dfn}) involves about five floating point
operations per input point (to handle masking, weighting) and
additional logical operations, it seems fair to estimate about
ten billion floating point operations per output point.
This is well within the realm ($\lmnnbr \gtrsim 10^{9}$) where OpenMP
parallelism is likely to increase computational throughput rather than
decrease it due to the overhead of setting up the threads themselves 
(Jim Tuccillo, IBM, personal communication, February, 2004).
In other words, throughput efficiencies would be achieved by spawning
up to $\NNN_{\zzz} = 40$ threads, one per vertical level.

We call exploiting this parallelism Analytic Load Balancing (\ALB)
because its efficacy follows from analytic considerations that depend
on the size of the hyperslab to be reduced to a scalar.
This size will be evaluated automatically at runtime without user
intervention, and will differ for variables of different ranks.  
OpenMP formalism within the rank reduction (\ref{eqn:avg_dfn})
will not always enhance throughput.
Thus \ALB\ will be invoked only if short run-time checks verify its
efficacy.
Implementing and tuning this algorithm will take place in Year~1.

\subsubsection{Per-Thread Variables (\PTV) in \LDRA}\label{sxn:ptv}
Per-Thread Variables (PTVs) are another promising way to parallelize
rank reduction operations on multi-file operators. 
Assume each input file in (\ref{eqn:avg_dfn}) contains the same
$\varnbr$ variables $\varone, \vartwo \ldots \varnnn \ldots \varNNN$.
Computing each $\rnkntlvarnnn \rightarrow \rnkfnlvarnnn$ on a separate
OpenMP thread inside the ``variable loop'' of the multi-file operators
may be easier to implement, though not necessarily more efficient,
than \ALB\ parallelization (Section~\ref{sxn:alb}).
The \PTV\ method appears to be robust for a mixture of $\rnkntl$ in the
input file. 
Reductions on variables with smaller $\rnkntl$ will finish more
quickly than those with larger $\rnkntl$, and the OpenMP thread will
simply proceed to the next variable requiring reduction.  
Since a typical CCSM simulation has $\varnbr \sim 100$ the amount of 
idle thread time relative to total thread time is likely to be small
since most computational geometries have eight or fewer CPUs per node.
Implementing and tuning the \PTV\ algorithm will take place in Year~1.

\subsubsection{Computational Geometry Load Balancing (\CGLB) in \LDRA}\label{sxn:cglb}
The last form of parallelization possible for rank-reduction
operations involves spawning a separate MPI process for reducing files.
Consider $\flnbr$ input files in (\ref{eqn:avg_dfn}): 
$\flone, \fltwo, \ldots, \flmmm, \ldots, \flMMM$.
The simplest form of Computational Geometry Load Balancing would be 
to spawn $\flnbr$ MPI tasks.
Each task would obtain and perform \DRA\ on a least one file.
In all likelihood, $\flnbr$ is an upper bound on the optimal number of
MPI tasks to spawn. 
A better optimized \DRA\ might spawn no more MPI tasks than local
computational nodes that are available.
Suppose there are $\ndlnbr$ computational nodes, 
$\ndlone, \ndltwo, \ldots, \ndlhhh, \ldots, \ndlHHH$,
available for the \DRA. 
Then a better number of MPI tasks to spawn might be 
$\min(\flnbr,\ndlnbr)$.

As discussed above in Section~\ref{sxn:alb}, it makes sense to
parallelize code branches with more than about one billion floating
point operations. 
Thus the most general case of \LDRA\ parallelization would use a
hybrid approach relying on both OpenMP and MPI.
For example, OpenMP could be used for \PTV\ (Section~\ref{sxn:ptv}) 
within a file.
The number of OpenMP threads would be $\min(\varnbr,\cpunbr)$
where $\cpunbr$ is the number of CPUs per local node.
MPI could spawn a separate task for each file (assuming the reduction
operation commutes). 
Since the optimal balance of OpenMP (e.g., \PTV) and MPI
parallelization depends on the precise computational geometry 
available to the SDO client, we call this Computational Geometry Load
Balancing (\CGLB).
Implementing and tuning the \CGLB\ algorithm will take place in
Years~2 and~3.

\subsection{Latency and Bandwidth Efficiencies in \DDRA}\label{sxn:ltn}
We now characterize the computational barriers we must overcome to
obtain efficient \DRA\ with large datasets stored at geographically
disparate locations.  
In simple terms, traditional \LDRA\ is expressible as a sequence of
operations on local files. 
With NCO, typical \LDRA\ multi-file operations are representable as
\begin{eqnarray} 
\mbox{\texttt{operator [options] file1 file2 \ldots fileM }}
& & \mbox{\texttt{fileout}}
\label{eqn:LDRA}
\end{eqnarray} 
Here \texttt{operator} stands for any NCO multi-file operator such as 
\href{http://nco.sf.net/nco.html#ncra}{\texttt{ncra}}, the
netCDF Record Averager, which would time-averages the input files and
store the results in \texttt{fileout}. 

With NCO compiled as OPeNDAP clients, file names may be URLs
accessible through one or more OPeNDAP servers.
With NCO OPeNDAP clients, \DDRA\ is possible and typically expressible
as a sequence of operations on (local and) remote files. 
\begin{eqnarray} 
\mbox{\texttt{operator [options] http://server1/file1 http://server2/file2 }} & & \nonumber \\
\mbox{\texttt{ \ldots http://serverK/fileM fileout}} & &
\label{eqn:DDRA}
\end{eqnarray} 

Currently, the \DDRA\ example (\ref{eqn:DDRA}) requires high bandwidth 
because none of the averaging takes place on the OPeNDAP server.
Instead, OPeNDAP transfers the raw input files back to the
local \texttt{ncra} client for processing.
A classification of possible client-server \DDRA\ scenarios helps 
to clarify the barriers to more efficient \DDRA.
We classify the scenarios based on four criteria:
\setcounter{enmrfr}{0} % Reset reference counter for this list
\begin{enumerate*}
\enmrfrstp \label{idx_crt_CSTP} 
\item Client-Server Traffic Protocol (\CSTP): ``Parallel'' or ``Serial''?\\
  \DDRA\ traffic is serial for a multi-file \texttt{operator} (e.g., 
  \texttt{ncra}) that requests data synchronously, i.e., one file at a
  time, operates on it, then asks for the next file. 
  Many useful \DRA\ operations (e.g., multi-file averaging) are
  commutative, and thus could be performed in any order to yield the
  same answer (to round-off precision, anyway).
  \DDRA\ traffic is parallel when a multi-file \texttt{operator} (e.g., 
  \texttt{ncra}) requests multiple files asynchronously, and operates
  on them in some pre-defined or random order. 
  \CST\ volume is high for dumb servers and low for smart servers. 
\enmrfrstp \label{idx_crt_ltn} 
\item Latency: ``High'', ``Medium'', or ``Low''?\\
  \DDRA\ is high latency if it often forced into wait states for other data. 
  Serial \CSTP\ is high latency by definition, since the operator goes
  into a wait state after processing each file, until the next input
  file request completes.
  The source of what we call ``Medium'' latency in \DDRA\ is arithmetic.
  Multi-file operations that do not commute (e.g., averaging with a
  temporal stride) must complete their operations in a specified order.
  Even if all files are requested in parallel, the arithmetic must
  always be executed synchronously and thus delays are possible
  obtaining data from slower servers.
  Commutative operations do not suffer from this constraint if the
  user foregoes bit-for-bit answer reproducibility.
  The option to forego bit-for-bit reproducibility for commutative
  operations with asynchronous I/O defines what we call ``Low''
  latency.  
\enmrfrstp \label{idx_crt_SSDRA} 
\item Server-side Data Reduction \& Analysis (\SSDRA): ``Smart'' or ``Dumb''?\\
  Currently, the OPeNDAP servers broker client data requests without
  performing \SSDRA.  
  These are ``Dumb'' servers since they return the full requested data
  hyperslab to the client with no intermediate processing.
  The client may only ultimately need reduced data (e.g., file averages).
  The cost of transmitting raw rather than reduced data is described
  in Section~\ref{sxn:rr}. 
  Our goal is to move all feasible \DRA\ (e.g., rank reduction) from
  the client to the server to reduce the high-bandwidth requirements
  of transferring raw data.
\end{enumerate*}
The technical goal of this project is to develop SDO so that it
transparently (to the user) functions with the desirable capabilities
mentioned above and in Section~\ref{sxn:rr}. 

Table~\ref{tbl:dra} outlines our planned progression of \DDRA\
Scenarios starting from the present NCO$\leftrightarrow$OPeNDAP
capability. 
\begin{table}
\centering % \centering uses less vertical space than center-environment
\begin{minipage}{\hsize}
\renewcommand{\footnoterule}{\rule{\hsize}{0.0cm}\vspace{-0.0cm}} % KoD95 p. 111
\caption[\DRA\ Scenarios]{\textbf{\DRA\ Scenarios}
\label{tbl:dra}}
\vspace{\cpthdrhlnskp}
\begin{tabular}{ >{\raggedright}p{12.0em}<{} ccccc l }
\hline \rule{0.0ex}{\hlntblhdrskp}%
Scenario &
\ALB\footnote{Analytically Load-Balanced: Requires OpenMP in SDO clients.} &
\CGLB\footnote{Computational Geometry Load-Balanced: Achieved with client-side OpenMP and/or MPI.} & 
\CSTP\footnote{Client-Server Traffic Protocol: Requires client-side OpenMP and/or MPI.} & 
Latency & 
\SSDRA\footnote{Server-Side Data Reduction \& Analysis: Requires
  extensions to OPeNDAP netCDF Client Library and (possibly) netCDF4
  library. 
  Exploits client-side efficiency and load-balancing improvements.} & 
Expected \\
\hline \rule{0.0ex}{\hlntblntrskp}%
NCO Clients lack OpenMP, MPI. 
Clients synchronously request data from servers. 
Clients do all \DRA. & 
No & No & Serial & High & Dumb & Present \\[0.5ex]
SDO Clients gain OpenMP parallelism, request data asynchronously. &  
Yes & No & Parallel & Medium & Dumb & Year~1 \\[0.5ex]
SDO Clients gains MPI. &
Yes & Yes & Parallel & Low & Dumb & Year~2 \\[0.5ex]
OPeNDAP Servers perform intermediate \DRA. &
Yes & Yes & Parallel & Low & Smart & Year~3 \\[0.5ex]
\hline
\end{tabular}
\end{minipage}
\end{table} % end tbl:mdl
Implementing \SSDRA\ is the most difficult task as it may involve
changes to the netCDF API and to the OPeNDAP-netCDF client library
(CL).  
By the end of Year~3, SDO will automatically identify logically
segregable parts of multi-file jobs so that arithmetic and data
reduction are done on the servers whenever possible.
The results of the intermediate \SSDRA\ will be relayed to the SDO
client for final aggregation and processing. 

\section{SDO Software Engineering}\label{sxn:swg}

We will apply modern software engineering techniques to SDO.
Already the NCO code-base is clean and well-documented internally.
We make heavy use of metadata and
\href{http://dust.ess.uci.edu/doc/abb/abb.pdf}{systematization} in
variable and function names to impart a high level of literacy to the
source code. 

\subsection{Enhancing Robustness}\label{sxn:rbs}
The following standard software engineering practices will be applied 
to the NCO code-base during the execution of this project:
\begin{enumerate*}
\item Unit and Regression Testing: Addition of self-diagnosing tests
  that no existing features break as new features are added.
\item Profiling: Analysis of code bottlenecks and scalability
\item Memory purification: Detection and elimination of dangling
  pointers, un-freed memory (e.g., with
  \href{http://valgrind.kde.org}{Valgrind}). 
\end{enumerate*}
All of these practices have been applied ad~hoc to NCO in the past.
The Scientific Programming Specialist will rigorously enforce, build
into the code, and automate these practices where possible.

\subsection{NCO/SDO new features}\label{sxn:ftr}
Dozens of major and minor features would make NCO more useful and
robust and are enumerated on this 
\href{http://nco.sf.net/TODO}{TODO list}.
This SDO project would help address many of these deficiencies as 
part of its robustness and standards conformance emphasis.
The items that would advance NCO/SDO \DDRA\ functionality are
candidate tasks for the Specialist position funded by this proposal. 
The top five candidate tasks not mentioned previously include:
\begin{enumerate*}
\item Geographic \href{http://nco.sf.net/nco.html#msk}{masks} for all
  operations (e.g., masking oceans with \texttt{--mask=Atlantic}) 
\item \href{http://www.cgd.ucar.edu/cms/eaton/cf-metadata/CF-working.html}{Climate \& Forecast} (CF) Metadata Convention conformance
\item Rank-reduction and logical constraint operations for \href{http://nco.sf.net/nco.html#ncap}{\texttt{ncap}} and \href{http://nco.sf.net/nco.html#ncwa}{\texttt{ncwa}}
\item Multi-file input capability for \href{http://nco.sf.net/nco.html#ncap}{\texttt{ncap}} and \href{http://nco.sf.net/nco.html#ncwa}{\texttt{ncwa}}
\item More pre-defined complex arithmetic operations in \href{http://nco.sf.net/nco.html#ncap}{\texttt{ncap}},
  e.g., standard deviations, chi-squared tests, linear regressions.
\end{enumerate*}
This functionality would greatly enhance SDO's power and reduce the 
effort the working scientist needs to put into \DDRA\ to obtain useful
results. 

\section{Project Coordination}\label{sxn:crd}

PI~Zender will take overall responsibility for project coordination.
NCO was first publicly released in 1995, and became an Open Source
project in 2000.  
There are currently three active NCO developers.
Unlike related the base SDDF projects netCDF and HDF, NCO has never
received based institutional support. 
Rather like OPeNDAP, NCO has grown organically because users
need NCO functionality.
OPeNDAP has been quite successful at obtaining institutional
support (from NASA, NOPP, NOAA, and NSF) via the peer-review process.
NCO/SDO will strive to emulate and learn from the OPeNDAP
practices that have let it successfully transition to a robust,
institutionally supported Open Source project.

\subsection{Personnel}\label{sxn:prs}
Zender will continue to lead the NCO/SDO development, establish SDO
development priorities and policies, continue to entrain the Open
Source community, and coordinate architecture decisions with netCDF
and OPeNDAP developers at UCAR/Unidata and University of Rhode Island 
(URI), respectively (see attached letters of support from Rew and
Cornillon). 
Zender will work with a full-time Specialist to help design SDO and 
to implement the optimizations for distributed data reduction and
analysis (Section~\ref{sxn:ltn}) and to enhance the software
engineering and robustness of NCO/SDO (Section~\ref{sxn:swg}).
Zender will work with and ESS/ICS graduate student to identify and
optimize techniques that exploit the intrinsic parallelism that
pervades tera-scale geophysical data reduction
(Section~\ref{sxn:rr}). 

Co-PI Papadopoulos will consult and advise on issues of SDO
parallelism, load balancing, and network connectivity.
His experience developing open source software on high performance
computers is the key bridge between the existing, homegrown NCO
software and the proposed robust, parallelized, distributable SDO.
Papadopoulos helped develop the Parallel Virtual Machine (PVM) and
High-Performance Virtual Machines (HPVM) libraries. 
He leads development of the Rocks Clustering Toolkit at SDSC.
Papadopoulos is a Co-PI on the NSF Middleware Initiative GRIDS
center which has produced four public releases of integrated and
tested grid software.    
Papadopoulos is also OptIPuter Co-PI and Network Architect. 
As such he will oversee integration of the ESMF with OptIPuter that is
essential to developing, testing, and optimizing SDO for high-bandwith
connected HPCCs. 

\subsection{Management Style}\label{sxn:mng}
Zender directs two other multi-investigator projects, the 
\href{http://www.ess.uci.edu/esmf}{Earth System Modeling Facility},
larger in scale that this proposed SDO project, and the 
\href{http://nco.sf.net}{netCDF Operators}.
NCO is the smaller scale, never-funded, OpenSource software project 
which SDO will leverage.
Zender makes efficient use of project coordination software such as 
\href{http://sf.net}{SourceForge} (for complete project coordination),  
\href{https://maillists.uci.edu/mailman/listinfo/esmfadm}{Mailman} 
(for project mailing lists), 
\href{http://dcs.nac.uci.edu/cgi-bin/wreq/req}{wreq} (a work-request
tracking system for prioritizing tasks), and extensive documentation
on project \href{http://nco.sf.net}{Home Pages}.
These techniques maximize project transparency and minimize confusion
that arises through misunderstood responsibilities, requests, and
goals.  

All SDO software design, construction, and modification will employ
Concurrent Versioning System (\href{http://www.cvshome.org}{CVS}) or
its successor (\href{http://www.cvshome.org}{Subversion}) to
facilitate distributed development.
To facilitate collaboration, all SDO software and data made available
for this project will be publicly accessible (read-only) through
CVS and OPeNDAP servers, respectively.
This will encourage the Open Source community to remain involved
in SDO development. 
We believe strongly in unfettered exchange of software and data.

\subsection{Schedule and Milestones}\label{sxn:tm_ln}
\noindent\textbf{Year~1}. \textit{Milestones}: 
1a.~OpenMP parallelization for arithmetic operators;
1b.~ESMF becomes OptIPuter node with 1--2\,\Gbxs\ connectivity;
1c.~Release NCO version~4 (exploits netCDF4$+$OpenMP)\\
\textit{Tasks}:
\begin{enumerate*}
\item Parallelization with OpenMP
\begin{enumerate*}
\item Independent variables concurrently processed on separate threads
\item Asynchronous Client$\rightarrow$Server data requests (\CSTP)
\item Analytic Load Balancing (\ALB)
\end{enumerate*}
\item Develop unit testing for crucial code paths
\item Develop benchmarking suite to assess \LDRA\ performance
\end{enumerate*}
\textit{Travel}:
\begin{enumerate*}
\item One-week visit by Specialist to Unidata to coordinate
  integration of netCDF4 features into NCO/SDO. 
\end{enumerate*}
\textbf{Year~2}. \textit{Milestones}: 
2a.~ESMF$\leftrightarrow$OptIPuter connectivity increases to
10\,\Gbxs\ (depends on outside funding);
2b.~Demonstrate \DDRA\ of CCSM data between/among multiple HPCCs: ESMF, NCAR, SDSC, ESC;
2c.~Release NCO version~4.2 (exploits MPI)\\
\textit{Tasks}:
\begin{enumerate*}
\item Analytic parallelization with MPI (\CGLB)
\item Enhance SDO benchmark suite to assess \DDRA\ and \CGLB\ performance gains 
\end{enumerate*}
\textit{Travel}:
\begin{enumerate*}
\item One-week visit by Specialist to Unidata to coordinate
  netCDF and DODS netCDF CL API additions with Unidata netCDF and 
  OPeNDAP projects. 
\item PI~Zender and Specialist attend ACM Supercomputing meeting to
  present science and to demonstrate \DRA\ technology.
\item PI~Zender and ESS Graduate Student to attend Fall AGU meeting
  to present science and to demonstrate \DRA\ technology on CCSM IPCC
  simulation data.
\end{enumerate*}
\textbf{Year~3}. \textit{Milestones}: 
3a.~\SSDRA\ on OPeNDAP servers and netCDF4;
3b.~Prototype \SSDRA\ applicability to Genomic data;
3c.~Release NCO version~4.4 (exploits \SSDRA)\\
\textit{Tasks}:
\begin{enumerate*}
\item Place \SSDRA\ hooks in netCDF4 library
\item Use SDO operators to add ``file-out'' \DRA\ to OPeNDAP netCDF CL
\item Hybrid parallelization with OpenMP/MPI (CGLB)
\item Measure \DDRA, \ALB\ and \CGLB\ performance gains
\item Profile CCSM \DDRA\ to identify remaining bottlenecks
\end{enumerate*}
\textit{Travel}:
\begin{enumerate*}
\item PI~Zender and Specialist attend ACM Supercomputing meeting to
    present science and to demonstrate \DDRA\ and \SSDRA\ technology 
\item PI~Zender and ESS Graduate Student to attend Fall AGU meeting
    to present science and to demonstrate \DDRA\ and \SSDRA\
    technology on CCSM IPCC simulation data.
\end{enumerate*}

\subsection{Education Plan}\label{sxn:edc}
This project will train one graduate at the interface of computational
and geophysical sciences.
The Specialist and graduate student will both conduct training
workshops with Zender at appropriate national scientific conferences
(Supercomputing and AGU) in Years~2 and~3.
This will help ensure widespread institutional awareness of the
project.
Where possible we will pay work-study undergraduates at UCI to assist
with programming tasks.

Interestingly, the primary NCO developer besides Zender for many years
grew from novice programmer to expert writer of parsers and lexers by
contributing to NCO in the Open Source fashion.
With a true Open Source project, knowing how much outreach one is
doing is impossible because the code is freely available.
We have accepted source code contributions from about a dozen people
in the last ten years.
With a funded base, we have high hopes for entraining others,
including programmers in less developed countries, and passing on our 
geophysical skill-sets to them.

\subsection{Open Source Software}\label{sxn:oss}
This SDO project, devoted to improving analysis of data often produced
for pure research and/or public policy (e.g., geophysical
measurements, climate models), will only thrive if it remains Open
Source. 
NCO has been supported by the Open Source community for five years.
The Open Source model provides for wider testing of the software.
Public access to source code makes finding and fixing bugs much
easier \cite[e.g.,][]{Ray99}.    
Volunteer developers now solve 50--75\% of NCO bugs.
SDO will remain, true to its roots, Open Source Software released
under the GNU General Public License.

\section{Broader Impacts and Synergies}\label{sxn:mpc}
All sciences utilizing \LDRA\ and \DDRA\ on netCDF data will benefit
from efficiency improvements to NCO/SDO outlined in
Sections~\ref{sxn:mth} and~\ref{sxn:swg}, respectively.
Geoscience fields which use data storage formats other than netCDF or
HDF will also benefit from \SSDRA\ improvements outlines in
Section~\ref{sxn:mth}. 
netCDF and HDF have already been embedded in some bio-informatics  
computational languages to reduce data storage and access costs
associated with XML and relational databases (RDBs) \cite[]{Ste04}.
All \DRA\ improvements are thus usable by appropriately formatted
bioinformatics data sets, especially gene expression data, in ways
similar to the geophysics domain. 

This proposal is synergistic with a funded NASA REASON project
``Merging the NetCDF and HDF5 Libraries to Achieve Gains in
Performance and Interoperability'' \cite[]{Rew03}.
This project is building netCDF3 on top of HDF5.
The result, to be called netCDF4, will exploit the many performance 
advantages of HDF5 (parallel I/O, chunking, data compression) while
retaining the simpler, more intuitive netCDF API.
The PI, Russell Rew of Unidata (see attached letter of support)
supports our proposal and will be involved in yearly meetings.
The Co-PI, Mike Folk of the National Center for Supercomputing
Applications (NCSA), has in the past expressed his support for  
bringing NCO functionality to HDF data. 

Many of the \DDRA\ improvements we propose clearly fit the Information
Integration (II) portion of this NSF RFP, especially the emphasis on
\textbf{decentralized data-sharing}.
This proposal supports Papadopoulos at a nominal level only.
We plan to seek additional funding from NSF or NASA to support heavier 
involvement by his SDSC group to help exploit \DDRA\ parallelism.
Our next project would be complementary to this NSF project.
We will propose to completely abstract the SDO I/O layer,
develop a native HDF back-end to SDO, and to fully exploit the MPI2 
parallel I/O library to further reduce latency and bandwidth
requirements of \DDRA.
\newpage

\section*{References}\label{sxn:rfr}
\setcounter{page}{1}
\thispagestyle{empty}
% Bibliography
\renewcommand\refname{}
\vspace{-24.0pt}
\newlength{\oldbaselineskip}
\setlength{\oldbaselineskip}{\baselineskip}
%\setlength{\baselineskip}{13.574pt} % 1.234 X 12pt
\setlength{\baselineskip}{12.0pt} % 1.234 X 11pt
\setlength{\bibsep}{4pt} % Space between natbib bibliography items
\bibliographystyle{jas}
\bibliography{bib}
\setlength{\baselineskip}{\oldbaselineskip} % 1.851 X 11pt
\newpage
\printindex % Requires makeidx KoD95 p. 221
\newpage

\subsection{Budget Justification}\label{sxn:bdg_jst}
\setcounter{page}{1}
\thispagestyle{empty}
\begin{verbatim}
% NB: Do not use LaTeX formatting in Budget Justification since must
% upload into Liz's Word document 

Salaries and Wages
One month of summer salary support for three years is requested for
Prof. Charles Zender, the PI at UCI, who will have primary
responsibility for the proposed research. 
Zender will lead the NCO/SDO development, establish SDO development
priorities and policies, continue to entrain the Open Source
community, and coordinate architecture decisions with netCDF and
OPeNDAP developers at UCAR/Unidata and University of Rhode Island
(URI), respectively (see attached letters of support from Rew and
Cornillon). 

To Be Named---One full-time Specialist Step I will share
responsibility for SDO library design, and will have primary
responsibility for library implementation, server-side extensions,
profiling, regression testing, debugging, and SDO releases. 
The Specialist will work with the graduate student to profile, test,
and improve the OpenMP and MPI modifications.
A 2% cost of living increase was applied each year of this proposal
as well as a 5% merit, where applicable.

To be Named---Graduate Student Researcher III.  Funds are requested to
support one non-resident graduate student each year of the project.
The graduate student support is requested at 50% for 9 months during
the academic year and 100% for 3 months during the summer. 
The graduate student will work with Zender on optimizing OpenMP and
MPI parallelization to exploit the intrinsic parallelism of common
data reduction arithmetic, with both local and distributed data.
All salaries and wages were estimated using UCI's academic and staff
salary scales.  

Employee Benefits
Fringe Benefits were estimated using the composite rates agreed upon
by the University of California Office of the President and the DHHS
Audit Agency, the Cognizant Audit Agency for the University of
California.  Benefit rates used in this proposal are: 
Faculty - summer - 12.7%
Academic (Specialist) - 17%
Student employees - summer - 3%
Student employees - academic year - 1.3%

Fees are requested for one nonresident student for the duration of the
project. University of California policy requires award payment of
fees for any student with more than 25% support from a grant.  In the
first year, $21,147 is requested for non-resident fees and tuition,
$22,579 in the second year, and $24,111 in the third year. Fees &
tuition are excluded from indirect cost assessment. 

Equipment
Equipment funds are requested for the first year only for two dual
Opteron workstations at $5,000 each. These workstations will be
dedicated to the Specialist and the graduate student for use on this
project. 

The Cisco Catalyst WS-3550-12T switch and accompanying components
provide a 1 gigabit/second Ethernet uplink or 2 gigabit/second
Ethernet "Etherchannel" uplink to a connection on campus for
Optiputer.  An additional 2 GBICs for the second uplink are included
to increase to 2 GE speed.  The switch provides multiple GE copper
interfaces and acts as a node connection point.  In order to tie to
the ESMF, the Cisco SW-C3508G GE switch and components is included.
Equipment prices include tax, and shipping and handling charges.
Equipment is excluded from indirect cost assessment. 

Materials and Supplies
None are requested.

Travel
Domestic:  One round-trip per year at $1500 per trip is requested for
the Specialist to travel to Denver/Boulder to visit with the Unidata
netCDF and OPeNDAP projects.  Each trip includes roundtrip travel
from Irvine to Denver, one-week hotel and per diem. Travel support is
requested in years 2 and 3 for the PI and graduate student to attend
the ACM Supercomputing meeting to present science and to demonstrate
distributed climate data reduction technology. $1000 is requested each
year for travel per person plus $500 in shipping/rental fees at a
total cost of $2500 per year.  Travel support is also requested in
years 2 and 3 for the PI and Specialist to attend the Fall AGU meeting
to demonstrate distributed climate data reduction technology.  $1000
is requested each year for travel per person plus $500 in
shipping/rental fees at a total cost of $2500 per year. These trips
include estimated conference registration, abstract submission fees,
RT airfare, lodging, meals and ground transportation.  Travel
estimates are based on historical usage. 

Other Direct Costs
Charges for journals, photocopying, long distance phone, fax and
postage charges pursuant to this project are requested each year.
Included in these expenses are long-distance charges for usage
directly related to the project, such as communication with
colleagues, journals, and vendors.    Photocopying of research
materials including publications and results of this sponsored
research project are requested as well as mail and shipping for
materials related to this project. Support is requested in years 2 and
3 for publication costs pursuant to this project, which include
utilization of expensive color figures.  Costs were estimated
according to historical usage. 

Subaward to UCSD:  $12,918 is requested in the first year. This
subaward will fund one month of salary for Aaron Chin, the UCSD
OptIPuter project manager, to install a OPeNDAP server on
the UCSD OptIPuter, and to configure it for benchmark studies of
SDO in a high-bandwidth distributed data mode.

Indirect Costs
Facilities and Administrative costs were estimated in accordance with
UCI's approved indirect cost rate agreement.  The indirect cost rate
of 51.5% of MTDC through 6/30/05 and 52.5% of MTDC effective 7/1/05
was based upon the nature and location of the work proposed.
Graduate student fees and tuition and equipment are excluded from
indirect cost assessment.  The subaward to UCSD is not subject to
indirect cost assessment due to a University of California multicampus
agreement UCI's indirect cost rate agreement was approved by DHHS,
the Federal Cognizant Audit Agency for UCI on 12/5/01. 
\end{verbatim}
\newpage

\section{Facilities, Equipment, and Other Resources}
\subsection{Computer and Networking}
\setcounter{page}{1}
\thispagestyle{empty}
Our SEI(GEO) project is well-situated to take advantage of the 
fastest fastest network connections at UCI and UCSD. 
The UCI Network Infrastructure provides researchers with 1.0\,\Gbxs\ 
access to the high-performance network of \calit\ and to the
\Gb\-backbone of UCINet. 
UCI will upgrade this link to 10\,\Gbxs\ in the near future. 
This will remove one potential bottleneck to the
ESMF$\leftrightarrow$OptIPuter connection.

PI~Zender is director of the Earth System Modeling Facility (ESMF), 
an NSF-supported MRI facility dedicated to coupled global climate,
chemistry, and biogeochemistry simulations.
The ESMF is an 88-CPU Power4+ IBM supercomputer with 192\,\GB\ RAM and
32\,\TB\ of RAID storage.
Since this SEI proposal is highly complimentary the ESMF mission,
the ESMF will be made available for NCO/SDO/OPeNDAP development,
benchmarking, and test.
Funding for bi-directional 2\,\Gbxs\ connections between the
ESMF and UCI's Campus portal is requested as part of this SEI
proposal. 
The Tera-Scale distributed data reduction to be optimized in this SEI  
proposal will be demonstrated between two or more geographically
disparate supercomputer data-farms. 
The 30\,\TB\ ESMF RAID storage will typically be one of those
data-farms. 
The ESMF will place at least 1\,\TB\ of storage under the control of
a OPeNDAP server in support of this SDO project.

Co-PI~Papadopoulos is Program Director for Grid and Cluster Computing 
at the SDSC. As Co-PI of the \calit\ OptIPuter, Papadopoulos manages
the storage, clusters, and grid part of the UCSD OptIPuter.
The targeted OptIPuter node for the OPeNDAP server is an IBM
storage cluster (see attached letter of support from Larry Smarr).
We plan to partition a portion of the cluster for OPeNDAP services.
The cluster consists of 48~storage nodes with a single management
node.
Each node is an IBM xSeries 345 2U~rack mount server with dual 2.8~GHz
Xeon Processors.  
There is 1.5\,\GB\ RAM and six 73\,\GB\ drives for a total of
2.19\,\TB\ of storage each. 
The applications will access the storage via PVFS.  
The IBM storage cluster is connected to the OptIPuter network today at
4\,\Gbxs.

Our project will conduct distributed data reduction at two other sites  
in addition to the ESMF and UCSD, which share a high-bandwidth
connection. 
The other sites sites are NCAR and Japan's Earth Simulator Center
(ESC). 
These are also world-class supercomputer facilities.
Facility managers or directors at these sites have expressed great
interest to Zender in making available about 1\,\TB\ of storage
available through OPeNDAP servers for proof-of-concept
benchmarking for our study (Personal communication, 2004, Dr.~Tetsuya
Sato, Director-General, Earth Simulator Center, Japan; personal
communication, 2004, George Fuentes, Head, High Performance Systems
Section, SCD, NCAR).  
Accessing these sites via the standard research Internet requires no
additional networking support or hardware.
Deployment and operation of the OPeNDAP servers at these sites 
will be worked out on an informal basis once the SDSC connection is
complete.

\subsection{Maintenance and Technical Support}
\setcounter{page}{1}
\thispagestyle{empty}
Network and Academic Computing Services
\href{http://www.nacs.uci.edu}{NACS} is the largest IT organization   
at UCI.
Dr.~Frank Wessel manages the NACS Research Computing Support Group
(RCS).  
Dr.~Wessel is the NACS project lead for the Earth System Modeling
Facility (ESMF), a recently funded NSF MRI with C.~Zender as PI. 
RCS provides customized support and facilitates user access to
high-performance computing (HPC) resources, software, training, and
development of the UCI research infrastructure. 
NACS RCS staff led by Dr.~Wessel and Garrett Hildebrand will
facilitate and oversee the dedicated network infrastructure to link
the ESMF to the UCSD OptIPuter.
To implement the required network connection for the high-speed
ESMF$\leftrightarrow$OptIPuter network, NACS will upgrade network
facilities with additional switches and interconnects provided for in
the budget.  
UCSD OptIPuter project manager Aaron Chin will ensure smooth
connectivity on the UCSD end.
\newpage

\section{Acronyms and Abbreviations}\label{sxn:abb}
\setcounter{page}{1}
\thispagestyle{empty}
%\begin{longtable}{ >{\raggedright}p{7.0em}<{} >{\raggedright}p{8.0em}<{} }
\begin{longtable}{ r >{\raggedright}p{25.0em}<{} l }
& \kill % NB: longtable requires caption as table entry
\caption[Acronyms and Abbreviations]{\textbf{Acronyms and Abbreviations}%
\label{tbl:abb}} \\
\hline\hline \rule{0.0ex}{\hlntblhdrskp}% 
Abbreviation & Description & \\[0.0ex]
\hline \rule{0.0ex}{\hlntblntrskp}%
\endfirsthead % Lines between and \endfirsthead appear at top of table
\caption[]{(continued)} \\ % Set label for following pages
Abbreviation & Description & \\[0.0ex]
\hline \rule{0.0ex}{\hlntblntrskp}%
\endhead % Previous block appears at top of every page
\endlastfoot % Previous block appears at end of table
ABI & Application Binary Interface & \\
ACM & Association for Computing Machinery & \\
AGU & American Geophysical Union & \\
AMWG & (CCSM) Atmospheric Model Working Group & \\
API & Application Programmer Interface & \\
CAM & Community Atmosphere Model & \\
CCM & Community Climate Model & \\
CCSM & Community Climate System Model & \\
CENIC & Corporation for Education Network Initiatives in California & \\
CF & Climate \& Forecast (metadata convention) & \\
CL & Client Library & \\
CPU & Central Processing Unit & \\
CST & Client-Server Traffic & \\
CSTP & Client-Server Traffic Protocol & \\
CVS & Concurrent Versions System & \\
Cal-(IT)$^{2}$ & California Institute for Telecommunications and Information Technology & \\
CalREN-2 & California Regional Network & \\
CalREN-XD & California Research Network, Experimental Development & \\
DDRA & Distributed Data Reduction \& Analysis & \\
DEAD & Dust Entrainment And Deposition Model & \\
DKRZ & Deutsches Klimarechenzentrum & \\
DODS & Distributed Oceanographic Data Server & \\
DRA & Data Reduction \& Analysis & \\
ESA & European Space Agency & \\
ESC & Earth Simulator Center & \\
ESE & Earth Science Enterprise & \\
ESMF & Earth System Modeling Facility & \\
ESS & Earth System Science (Department) & \\
FAR & First Assessment Report & \\
TAR & Third Assessment Report & \\
SAR & Second Assessment Report & \\
AR4 & Fourth Assessment Report & \\
GB & Gigabyte & \\
GCM & General Circulation Model & \\
GFS & Global File System & \\
GPFS & General Parallel File System & \\
Gb & Gigabit & \\
HDF & Hierarchical Data Format & \\
HIPerWall & High-Performance Visualizationn System for Collaborative Earth System Sciences & \\ 
HPCC & High Performance Computing Center & \\
HPVM & High-Performance Virtual Machines & \\
HTML & Hypertext Markup Lanuage & \\
HTTP & Hypertext Transfer Protocol & \\
ICS & Information and Computer Sciences & \\
II & Information Integration & \\
IPCC & Intergovernmental Panel on Climate Change & \\
JISAO & Joint Institute for the Study of the Atmosphere and Ocean & \\
LANL & Los Alamos National Laboratory & \\
LLNL & Lawrence Livermore National Laboratory & \\
MM5 & PennState/NCAR Mesoscale Model version~5 & \\
MPMD & Multiple Program Multiple Data & \\
NACS & Network and Computing Services & \\
NASA & National Aeronautic and Space Administration & \\
NCAR & National Center for Atmospheric Research & \\
NCEP & National Center for Environmental Prediction & \\
NCVweb & NetCDF Viewer Web & \\
NERSC &  National Energy Research Scientific Computing Center & \\
NOAA & National Oceanic and Atmospheric Administration & \\
NOPP & National Oceanographic Partnership Program & \\
NPACI & National Partnership for Advanced Computational Infrastracture & \\
NPOESS & National Polar-orbiting Operational Environmental Satellite System & \\
NSF & National Science Foundation & \\
NUG & NCO User's Guide & \\
ONI & Optical Networking Initiative & \\
OPeNDAP & Open-source Project for a Network Data Access Protocol & \\
ORNL & Oak Ridge National Laboratory & \\
OpenMP & Standard API for SMPP & \\
OptIPuter & Optical networking Internet Protocol computer & \\
PI & Principle Investigator & \\
PVFS & Parallel Virtual File System & \\
PVM & Parallel Virtual Machine & \\
RAM & Random Access Memory & \\
RCS & Research Computing Services & \\
RDB & Relational databases & \\
RFP & Request for Proposals & \\
SDDF & Self-describing Data Format & \\
SDSC & San Diego Supercomputer Center & \\
SEI & Science and Engineering Informatics & \\
SMP & Symmetric Multi-Processing & \\
SMPP & Shared Memory Parallel Programming & \\
SP & Senior Personnel & \\
SPMD & Single Program Multiple Data & \\
SRES & Special Report on Emissions Scenarios & \\
SSDRA & Server-Side Data Reduction \& Analysis & \\
TB & Terabyte & \\
UCAR & University Corporation for Atmospheric Research & \\
URI & University of Rhode Island & \\
WRF & Weather Research and Forecasting (model) & \\
XML & Extensible Markup Language & \\
netCDF & network Common Data Format & \\
p655 & 8-CPU IBM computer node in ESMF & \\
p690 & 32-CPU IBM computer node in ESMF & \\
\end{longtable} % end tbl:abb
\newpage

\section{Project-Wide Combined Collaborator and Advisor List}\label{sxn:prs_lst} 
\setcounter{page}{1}
\thispagestyle{empty}
% Currently includes: Zender
All Personnel Associated with Proposal, Collaborators and
Co-Editors of Project Senior Personnel, their Post-docs, and their
Thesis Advisors:
\begin{enumerate*}
\item[] Ammann, C.~A. (NCAR) 
\item[] Bian, H. (NASA/UMBC) 
\item[] Bonan, G.~B. (NCAR) 
\item[] Busacca, A. (WSU)
\item[] Chien, Andrew, University of California, San Diego
\item[] Colarco, P. (GSFC)
\item[] Collins, W.~D. (NCAR) 
\item[] Cooper, W.~A. (NCAR)
\item[] DeFanti, Tom (UIC)
\item[] Dongarra, Jack, University of Tennessee, Knoxville
\item[] Famiglietti, J. (UCI) 
\item[] Foster, Ian, Argonne National Laboratory
\item[] Garrett Hildebrand (UCI)
\item[] Gaylord, D. (WSU)
\item[] Geist, George, Oak Ridge National Laboratory
\item[] Grimshaw, Andrew, University of Virginia
\item[] Grini, A. (U.~Oslo)
\item[] Kesselman, Carl, ISI, University of  Southern California
\item[] Kiehl, J.~T. (NCAR) 
\item[] Kohl, James, Oak Ridge National Laboratory
\item[] Kuester, F. (UCI) 
\item[] Mahowald, N.~M. (NCAR) 
\item[] Maxine Brown (UIC)
\item[] Messina, Paul, Caltech
\item[] Moore, J.~K. (UCI) 
\item[] Nachtigal, Noel, Sandia National Laboratories
\item[] Okin, G. (U.~Virginia) 
\item[] Pajarola, R. (UCI) 
\item[] Pratt, Thomas, Sandia National Laboratories
\item[] Rasch, P.~J. (NCAR)
\item[] Riesen, Rolf, Sandia National Laboratories
\item[] Sanderson, James, Los Alamos National Laboratory
\item[] Semeraro, David, National Computational Science Alliance
\item[] Shelton, William, Oak Ridge National Laboratory
\item[] Smarr, Larry, University of California, San Diego
\item[] Stammer, Detlef, Scripps Institution of Oceanography
\item[] Sunderam, Vaidy, Emory University
\item[] Thomas, G.~T. (CU) 
\item[] Torres, O. (NASA GSFC)
\item[] Valero, F.~P.~J. (Scripps) 
\item[] Yu, S. (Duke) 
\end{enumerate*}
\newpage

List is Alphabetical by Surname.
\begin{enumerate*}
\item[] Collaborators of Zender:
\begin{enumerate*}
\item[] Ammann, C.~A. (NCAR) 
\item[] Bian, H. (NASA/UMBC) 
\item[] Bonan, G.~B. (NCAR) 
\item[] Busacca, A. (WSU)
\item[] Colarco, P. (GSFC)
\item[] Collins, W.~D. (NCAR) 
\item[] Famiglietti, J. (UCI) 
\item[] Gaylord, D. (WSU)
\item[] Grini, A. (U.~Oslo)
\item[] Kiehl, J.~T. (NCAR) 
\item[] Kuester, F. (UCI) 
\item[] Mahowald, N.~M. (NCAR) 
\item[] Moore, J.~K. (UCI) 
\item[] Okin, G. (U.~Virginia) 
\item[] Pajarola, R. (UCI) 
\item[] Rasch, P.~J. (NCAR)
\item[] Valero, F.~P.~J. (Scripps) 
\item[] Yu, S. (Duke) 
\item[] Torres, O. (NASA GSFC)
\item[] Thomas, G.~T. (CU) 
\item[] Kiehl, J.~T. (NCAR) 
\item[] Cooper, W.~A. (NCAR)
\end{enumerate*}
\item[] Collaborators of Papadopoulos
\begin{enumerate*}
\item[] Sunderam, Vaidy, Emory University
\item[] Dongarra, Jack, University of Tennessee, Knoxville
\item[] Geist, George, Oak Ridge National Laboratory
\item[] Kohl, James, Oak Ridge National Laboratory
\item[] Nachtigal, Noel, Sandia National Laboratories
\item[] Pratt, Thomas, Sandia National Laboratories
\item[] Shelton, William, Oak Ridge National Laboratory
\item[] Riesen, Rolf, Sandia National Laboratories
\item[] Sanderson, James, Los Alamos National Laboratory
\item[] Semeraro, David, National Computational Science Alliance
\item[] Chien, Andrew, University of California, San Diego
\item[] Smarr, Larry, University of California, San Diego
\item[] Stammer, Detlef, Scripps Institution of Oceanography
\item[] Grimshaw, Andrew, University of Virginia
\item[] Foster, Ian, Argonne National Laboratory
\item[] Kesselman, Carl, ISI, University of  Southern California
\item[] Messina, Paul, Caltech
\end{enumerate*}
\item[] Collaborators of Chin:
\begin{enumerate*}
\item[] Maxine Brown (UIC)
\item[] Tom DeFanti (UIC)
\item[] Garrett Hildebrand (UCI)
\end{enumerate*}
\end{enumerate*}
\newpage

\subsection{Supplementary Documents}\label{sxn:spl_doc}
\setcounter{page}{1}
\thispagestyle{empty}
\begin{verbatim}
1. ${DATA}/prp/prp_sei/prp_sei_ltr_cornillon.pdf
2. ${DATA}/prp/prp_sei/prp_sei_ltr_smarr.pdf
3. ${DATA}/prp/prp_sei/prp_sei_ltr_rew.pdf
4. ${DATA}/prp/prp_sei/prp_sei_clb.pdf
5. ${DATA}/prp/prp_sei/prp_sei_abb.pdf
\end{verbatim}
\clearpage

\csznote{
Sent 20020515:
To: seablom@gsfc.nasa.gov, Mary.A.Esfandiari.1@gsfc.nasa.gov,
   mfolk@ncsa.uiuc.edu, Richard.E.Ullman.1@gsfc.nasa.gov
Subject: Funding opportunities for NCO->HDF
Reply-To: Charlie Zender <zender@uci.edu>
CC: Charlie Zender <zender@uci.edu> 
--text follows this line--
Dear Drs. Folk, Seablom, and Esfandiari, and Ullman

I am the author of the netCDF Operator (NCO) software suite and
an Assistant Professor of Earth System Science at the University 
of California at Irvine. You are probably familiar with netCDF
but may not be familiar with NCO. NCO is a free suite of netCDF
file manipulation and analysis software that is widely used in the
geophysics and climate modeling and analysis communities. 
For example, most General Circulation Models (GCMs) write
their output in netCDF format, and virtually all of these
institutions use NCO to help manage and analyze these data. 
The homepage for NCO is http://nco.sourceforge.net, and has
links to the User's guide which shows plenty of examples if
you are interested.

As the amount of NASA-generated HDF satellite data continues
to mushroom, I find myself needing the capabilities of NCO more
and more but there is no equivalent software for HDF files.
The chief advantage of NCO is that it treats whole files as units of
data, and makes performing relatively sophisticated operations on
multitudes of files almost trivial---everything from averaging,
and contatenating to arbitrary arithmetic expressions. 
Although NCO can be coaxed to work with HDF4 through the 
HDF4-netCDF libraries, no NCO equivalent exists for HD5 files
and what is needed is a native HDF port of NCO.

I am writing to inquire about funding opportunities to create 
a native HDF version of NCO, tentatively called SDO (Scientific
Data Operators). I am looking for a program announcement
or other opportunity to submit a proposal to accomplish this task.
The goal would be to hire a full-time programmer to port NCO
to HDF, creating SDO. I think the HDF port itself could be
accomplished in about 18 months. For a decent programmer, that would
cost about $150k including UC Irvine overhead. Of course there
are lots of features and polish a full-time programmer could
add to SDO that a tenure track assistant professor does not
have time to do. 

I think the benefits of SDO to both the geophysical community and to
NASA/HDF would be great. SDO would be free to the community and would
lower the barriers to using and analyzing HDF data. So I am asking for
your help in identifying potential funding opportunities for NCO.
I have NASA funding for science activities, but I have no idea where
to begin to seek funding for this project, so I have culled your
names from HDF-related web articles. I hope you will contact me
with suggestions on how to obtain funding for this project.
Please feel free to contact me with any questions.

Thanks,
Charlie

Hi,

I'm writing an NSF proposal to exploit netCDF Operators (NCO) running
over OPeNDAP to reduce large datasets stored at geographically
disparate locations. 
The more I think about OPeNDAP, the less I realize I know. 
Say I'm at UCI and I want to time-average a single remote file.
With NCO compiled as OPeNDAP clients, I would use the ncra operator...

ncra -O http://server1/file1

All of the averaging takes place on the OPeNDAP server (i.e., at NOAA)
which ships the results back to my client at UCI, right? 
The alternative is that OPeNDAP ships the data back to my client which
does the averaging itself. In that case then my understanding of
OPeNDAP is fundamentally flawed so sigh and ignore the following.
Otherwise, what happens when the files are on multiple servers?

ncra -O http://server1/file1 http://server2/file2 foo.nc

Based on the above example, server1 averages all the time slices
in file1 together and then...well, I'm not sure because I don't
understand the OPeNDAP server protocol. 

Does one of these scenarios resemble reality?
1. "dumb server, serial high traffic, high latency" scenario:
   Server1 sends back each time-slice in file1 to the local client.
   The client does all the averaging. 
   When file2 is needed that same thing happens and server2 sends
   each time-slice of file2 back to the local client.
   All averaging is performed by the client so there is heavy traffic
   and latency between the client and all the servers.
   All OPeNDAP servers do is data transfer, not arithmetic, 
2. "master server, serial high traffic, high latency" scenario:
   Server1 (the master server) averages all time slices in file1.
   Server1 contacts server2 each time it's ready to running average a 
   new time slice from file2 (i.e., with each ncra call to
   nc_get_var_*()).  
   Server1 performs all arithmetic so there is heavy traffic and
   latency between server1 and server2.
   Server2 does no arithmetic, just data transfer. 
3. "master server, serial low traffic, high latency" scenario:
   Server1 averages file1 and then Server2 averages file2 and sends
   just the average back to server1 so that server1 completes the task
   of aggregating the intermediate data and computing the final
   average, which it ships back to the client.
4. "master client, parallel low traffic, low latency" scenario:
   Server1 averages file1 and _at the same time_ Server2 averages
   file2. The servers send the averages back to the client, which 
   completes the task of aggregating the intermediate data and
   computing the final average.
5. "master server, parallel low traffic, low latency" scenario:
   Server1 averages file1 and _at the same time_ Server2 averages
   file2 and sends just the average back to server1 so that server1
   has the average of file1 and file2 at about the same time.
   Server1 completes the task of aggregating the intermediate data and
   computing the final average, which it ships back to the client.
6. Something else...

I think ...
Scenario 1 is the nightmare that OPeNDAP permanently woke us from.
Scenario 2 or 3 is reality.
Scenario 4 is achievable, but not desirable, since OPeNDAP servers
           probably have better connectivity among themselves than to  
           some piddly laptop like this one on a researcher's desk.
Scenario 5 is desirable and achievable by smarter clients than NCO
 
If so, we are interested in coaching NCO how to transparently function
as in Scenario 5, i.e., to identify logically segregable parts of
multifile jobs so that arithmetic and data reduction are done on the
server local to the file whenever possible and then the results of
those intermediate calculations are sent to the "master server" for
final aggregation/processing and then finally back to the local client.  

Thanks for any clarification/thoughts on this point,
Charlie

} % end csznote

\csznote{
% Text snippets for future proposals...

AEROCOM recommends NCO, see
http://nansen.ipsl.jussieu.fr/AEROCOM/protocol.html

A variety of software\footnote{
HDF and netCDF tools are listed at
\url{http://hdf.ncsa.uiuc.edu/tools.html} and
\url{http://www.unidata.ucar.edu/packages/netcdf/software.html}, 
respectively}, both free and commercial, works with one or the other
or both of these formats.

High level data processing languages reduce the time and development
expenses required by Fortran or C-based (i.e., low-level) processing
of scientific data.  

Separation of the format-independent front end semantics from format-dependent backend 
API (netCDF or HDF-EOS). 
The abstraction layer which translates requests into the backend API 
will be modular and support network transparent protocols such as
OPeNDAP. 

At the same time, the gap between the programming resources of end
users (often scientists) and the sophisticated computer skills
necessary to access data archived in an SDF has increased.
Today, fewer scientists possess the skills or time to directly
process the data they need than ever before.
This has spurred scientific users to migrate to high level languages 
(HLLs) designed to hide the complexity of the underlying SDF 
application programming interfaces (APIs).

Table~\ref{tbl:dpp} shows how intra-file processing methods 
appear when applied to the inter-file paradigm.
\begin{table}
\begin{minipage}{\hsize} % Minipage necessary for footnotes KoD95 p. 110 (4.10.4)
\renewcommand{\footnoterule}{\rule{\hsize}{0.0cm}\vspace{-0.0cm}} % KoD95 p. 111
\begin{center}
\caption[Data Processing Paradigms]{\textbf{Data Processing Paradigms}% 
\footnote{Scientific data for the Geosciences}
\label{tbl:dpp}}   
\vspace{\cpthdrhlnskp}
\begin{tabular}{ >{\raggedright}p{9.0em}<{} >{\raggedright}p{9.0em}<{}
r}
\hline \rule{0.0ex}{\hlntblhdrskp}% 
Interfile & Intrafile & \\[0.0ex]
& & \\[0.0ex]
\hline \rule{0.0ex}{\hlntblntrskp}%
& & \\[-2.0ex]
Average temperature $\tpt$ in files \flprn{data001.hd5},
\flprn{data002.hd5} \ldots \flprn{data999.hdf} 
& Average temperature $\tpt$ in file \flprn{data.hd5} & \\[0.5ex]
\hline
\end{tabular}
\end{center}
\end{minipage}
\end{table} % end tbl:dpp

} % end csznote

\csznote{
# Email distribution list
Peter Cornillon <pcornillon@gso.uri.edu>,
Dan Holloway <d.Holloway@gso.uri.edu>,
James Gallagher <jgallagher@gso.uri.edu>,
Steve Jenks <sjenks@uci.edu>,
Harry Mangalam <hjm@tacgi.com>,
Phil Papadopoulos <phil@sdsc.edu>,
Russ Rew <russ@unidata.ucar.edu>,
Larry Smarr <lsmarr@ucsd.edu>,
Padhraic Smyth <smyth@ics.uci.edu>,
Aaron Chin <akchin@soe.ucsd.edu>,
Albert Yee <afyee@uci.edu>,
Rajiv Bendale <bendale@us.ibm.com>,
John Caron <caron@ncar.ucar.edu>,
Brian Eaton <eaton@ncar.ucar.edu>,
Mark Taylor <mt@lanl.gov>,
Frank Wessel <fwessel@uci.edu>,
Henry Butowsky <henryb@ntlworld.com>,
Rorik Peterson <ffrap1@uaf.edu>,
Charlie Zender <zender@uci.edu>
} % end csznote

\csznote{
License issues:

20040921: 
Kevin Kennan <kkennan@uci.edu> 824-7295
Manager, Intellectual Property Administration,
UCI Office of Technology Alliances (OTA)
http://www.rgs.uci.edu/rig/ota/otacpsft.htm
No changes in licensing are required for NCO->SDO project
Kevin Kennan says assigning copyright to Charlie Zender
and GPL licensing is fine. 
UCI has no interest owning copyright or in modifying license

Donna Gilbertson <dagilber@uci.edu>
Re-budgeting with NSF required for changing more than 25% of original budget

20040922:
Sydney Mueller <semuelle@uci.edu> and I spoke to Harry Gunther 4-6510
Service providers need to show proof of ~$500k liability insurance 
Must sign on to standard UCI Terms and Conditions
I provide detailed scope of work and sole source justification
May be possible to hire Henry, even though in England
Contract would be OK

Hiring plan:
$10k yr-1 available to Butowsky/Open Source?
$48k yr-1 Associate Specialist Step II
$fxm Domestic graduate student (ask advisor about fees)
$
} % end csznote

\csznote{
% Usage: Place usage here at end of file so comment character % not needed
cd ~/prp;make -W prp_sei.tex prp_sei.dvi prp_sei.ps prp_sei.pdf prp_sei.txt;cd -
scp ${HOME}/prp/prp_sei.dvi ${DATA}/ps/prp_sei.pdf ${DATA}/ps/prp_sei.ps ${HOME}/prp/prp_sei.tex ${HOME}/prp/prp_sei.txt dust.ess.uci.edu:/var/www/html/prp/prp_sei

# NB: latex2html works well on prp_sei.tex
latex2html -dir /var/www/html/prp/prp_sei prp_sei.tex
# NB: tth chokes on prp_sei.tex
cd ${HOME}/prp;tth -a -Lprp_sei -p./:${TEXINPUTS}:${BIBINPUTS} < ${HOME}/prp/prp_sei.tex > prp_sei.html
scp prp_sei.html dust.ess.uci.edu:/var/www/html/prp/prp_sei
# NB: tex4ht works well on prp_sei.tex
cd ${HOME}/prp;htlatex prp_sei.tex
scp prp_sei*.css prp_sei*.html dust.ess.uci.edu:/var/www/html/prp/prp_sei
# NB: tex4moz works well on prp_sei.tex
cd ${HOME}/prp;/usr/share/tex4ht/mzlatex prp_sei.tex
scp prp_sei*.css prp_sei*.html prp_sei*.xml dust.ess.uci.edu:/var/www/html/prp/prp_sei

# Proposal preparation: 
# Divvy up master PDF into FastLane components
pdftk A=${DATA}/ps/prp_sei.pdf cat A3 output ${DATA}/prp/prp_sei/prp_sei_smr.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A4-18 output ${DATA}/prp/prp_sei/prp_sei_dsc.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A19 output ${DATA}/prp/prp_sei/prp_sei_rfr.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A20-22 output ${DATA}/prp/prp_sei/prp_sei_bdg_jst.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A23-24 output ${DATA}/prp/prp_sei/prp_sei_fcl.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A25-27 output ${DATA}/prp/prp_sei/prp_sei_abb.pdf
pdftk A=${DATA}/ps/prp_sei.pdf cat A28-29 output ${DATA}/prp/prp_sei/prp_sei_clb.pdf

# Add supplementary files in one command rather than loop
# pdftk does not allow input file to be output file
pdftk A=${DATA}/ps/prp_sei.pdf \
B=${DATA}/prp/prp_sei/prp_sei_ltr_cornillon.pdf \
C=${DATA}/prp/prp_sei/prp_sei_ltr_smarr.pdf \
D=${DATA}/prp/prp_sei/prp_sei_ltr_rew.pdf \
E=${DATA}/prp/prp_sei/prp_sei_cv_zender.pdf \
F=${DATA}/prp/prp_sei/prp_sei_cp_zender.pdf \
cat A B C D E F output ${DATA}/ps/prp_sei_fll.pdf
} % end csznote on usage

\end{document}
