aggregate_daily.cwl

  1#!/usr/bin/env cwl-runner
  2### Tool aggregating a NetCDF grid file over shapes
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.rasters.launcher]
 25
 26requirements:
 27  InlineJavascriptRequirement: {}
 28  EnvVarRequirement:
 29    envDef:
 30      HTTP_PROXY: "$('proxy' in inputs? inputs.proxy: null)"
 31      HTTPS_PROXY: "$('proxy' in inputs? inputs.proxy: null)"
 32      NO_PROXY: "localhost,127.0.0.1,172.17.0.1"
 33  ResourceRequirement:
 34    # coresMin: 1
 35    coresMax: 2
 36    ramMin: 16384
 37
 38
 39doc: |
 40  This tool preprocesses a NetCDF (.nc) file and aggregates gridded data 
 41  over shapes (zip codes or counties) and time. It produces daily mean values.
 42  The tool expects daily values in teh grid nodes.
 43  
 44  The tool expects multiple (daily) values for one variable in every grid node. 
 45  The variables are expected to be formatted as they are in 
 46  [University of Idaho Gridded Surface Meteorological Dataset](https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#description)
 47
 48  See also [aggregate_wustl.cwl](aggregate_wustl) for aggregating
 49  single variable values with multiple variables in every node as formatted
 50  by [Atmospheric Composition Analysis Group of Washington University](https://sites.wustl.edu/acag/datasets/surface-pm2-5/)
 51
 52inputs:
 53  proxy:
 54    type: string?
 55    default: ""
 56    doc: HTTP/HTTPS Proxy if required
 57  strategy:
 58    type: string
 59    default: downscale
 60    inputBinding:
 61      prefix: --strategy
 62    doc: "Rasterization strategy"
 63  ram:
 64    type: string
 65    default: 2GB
 66    doc: Runtime memory, available to the process
 67    inputBinding:
 68      prefix: --ram
 69  shapes:
 70    type: Directory?
 71    inputBinding:
 72      prefix: --shapes_dir
 73  geography:
 74    type: string
 75    doc: |
 76      Type of geography: zip codes or counties
 77    inputBinding:
 78      prefix: --geography
 79  year:
 80    type: string
 81    doc: "Year to process"
 82    inputBinding:
 83      prefix: --years
 84  month:
 85    type: int?
 86    doc: "Optional month to process"
 87    inputBinding:
 88      prefix: --dates
 89      valueFrom: $("month:" + self)
 90
 91  band:
 92    type: string
 93    doc: |
 94      [Gridmet Band](https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#bands)
 95    inputBinding:
 96      prefix: --var
 97  dates:
 98    type: string?
 99    doc: 'dates restriction, for testing purposes only'
100    inputBinding:
101      prefix: --dates
102  input:
103    type: File
104    doc: "Downloaded file"
105  shape_files:
106    type: File[]
107    doc: "Paths to shape files"
108    inputBinding:
109      prefix: --shape_files
110
111arguments:
112  - valueFrom: $(inputs.band)
113    prefix: --destination
114  - valueFrom: |
115      ${
116          return inputs.input["dirname"];
117      }
118    prefix: --raw_downloads
119
120outputs:
121  log:
122    type: File?
123    outputBinding:
124      glob: "*.log"
125  data:
126    type: File?
127    doc: |
128      The output CSV file, containing daily means of the given
129      gridMET variable over given geographies. Each line
130      contains date, geo id (zip or county FIPS) and value
131    outputBinding:
132      glob: $(inputs.band + "/*.csv.gz")
133  errors:
134    type: stderr
135
136stderr: $("aggr-" + inputs.band + "-" + inputs.year  + "-" + inputs.month + ".err")