aggregate_daily.cwl

#!/usr/bin/env cwl-runner
### Tool aggregating a NetCDF grid file over shapes
#  Copyright (c) 2021. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: CommandLineTool
baseCommand: [python, -m, gridmet.launcher]

requirements:
  InlineJavascriptRequirement: {}
  EnvVarRequirement:
    envDef:
      HTTP_PROXY: "$('proxy' in inputs? inputs.proxy: null)"
      HTTPS_PROXY: "$('proxy' in inputs? inputs.proxy: null)"
      NO_PROXY: "localhost,127.0.0.1,172.17.0.1"
  ResourceRequirement:
    # coresMin: 1
    coresMax: 2
    ramMin: 16384


doc: |
  This tool preprocesses a NetCDF (.nc) file and aggregates gridded data 
  over shapes (zip codes or counties) and time. It produces daily mean values.
  The tool expects daily values in teh grid nodes.
  
  The tool expects multiple (daily) values for one variable in every grid node. 
  The variables are expected to be formatted as they are in 
  [University of Idaho Gridded Surface Meteorological Dataset](https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#description)

  See also [aggregate_wustl.cwl](aggregate_wustl) for aggregating
  single variable values with multiple variables in every node as formatted
  by [Atmospheric Composition Analysis Group of Washington University](https://sites.wustl.edu/acag/datasets/surface-pm2-5/)

inputs:
  proxy:
    type: string?
    default: ""
    doc: HTTP/HTTPS Proxy if required
  strategy:
    type: string
    default: downscale
    inputBinding:
      prefix: --strategy
    doc: "Rasterization strategy"
  ram:
    type: string
    default: 2GB
    doc: Runtime memory, available to the process
    inputBinding:
      prefix: --ram
  shapes:
    type: Directory?
    inputBinding:
      prefix: --shapes_dir
  geography:
    type: string
    doc: |
      Type of geography: zip codes or counties
    inputBinding:
      prefix: --geography
  year:
    type: string
    doc: "Year to process"
    inputBinding:
      prefix: --years
  month:
    type: int?
    doc: "Optional month to process"
    inputBinding:
      prefix: --dates
      valueFrom: $("month:" + self)

  band:
    type: string
    doc: |
      [Gridmet Band](https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#bands)
    inputBinding:
      prefix: --var
  dates:
    type: string?
    doc: 'dates restriction, for testing purposes only'
    inputBinding:
      prefix: --dates
  input:
    type: File
    doc: "Downloaded file"
  shape_files:
    type: File[]
    doc: "Paths to shape files"
    inputBinding:
      prefix: --shape_files

arguments:
  - valueFrom: $(inputs.band)
    prefix: --destination
  - valueFrom: |
      ${
          return inputs.input["dirname"];
      }
    prefix: --raw_downloads

outputs:
  log:
    type: File?
    outputBinding:
      glob: "*.log"
  data:
    type: File?
    doc: |
      The output CSV file, containing daily means of the given
      gridMET variable over given geographies. Each line
      contains date, geo id (zip or county FIPS) and value
    outputBinding:
      glob: $(inputs.band + "/*.csv.gz")
  errors:
    type: stderr

stderr: $("aggr-" + inputs.band + "-" + inputs.year  + "-" + inputs.month + ".err")