aggregate_wustl.cwl

  1#!/usr/bin/env cwl-runner
  2### Aggregates data in NetCDF file over given geographies
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.rasters.file_processors.wustl_file_processor]
 25
 26requirements:
 27  InlineJavascriptRequirement: {}
 28  ResourceRequirement:
 29    coresMin: 2
 30    ramMin: 8192
 31    outdirMin: 5120
 32    # coresMax: 6
 33
 34
 35doc: |
 36  This tool aggregates data in NetCDF or GeoTiff file over provided shapes
 37  (zip codes or counties). It produces mean values over shape.
 38  
 39  The tool expects one value for every variable in every grid node. 
 40  The variables are expected to be formatted as they are for 
 41  [Atmospheric Composition Analysis Group of Washington University](https://sites.wustl.edu/acag/datasets/surface-pm2-5/)
 42  
 43  See also [aggregate_daily.cwl](aggregate_daily) - a tool, that expects 
 44  multiple (daily) values at every grid node.
 45
 46inputs:
 47  strategy:
 48    type: string
 49    default: downscale
 50    inputBinding:
 51      prefix: --strategy
 52    doc: "Rasterization strategy"
 53  ram:
 54    type: string
 55    default: 2GB
 56    doc: Runtime memory, available to the process
 57    inputBinding:
 58      prefix: --ram
 59  shapes_dir:
 60    type: Directory?
 61    inputBinding:
 62      prefix: --shapes_dir
 63  band:
 64    type: string[]
 65    inputBinding:
 66      prefix: --var
 67  geography:
 68    type: string
 69    doc: |
 70      Type of geography: zip codes or counties
 71    inputBinding:
 72      prefix: --geography
 73  netcdf_data:
 74    type: File
 75    doc: "Path to downloaded file"
 76    inputBinding:
 77      prefix: --raw_downloads
 78  shape_files:
 79    type: File[]?
 80    doc: "Paths to shape files"
 81    inputBinding:
 82      prefix: --shape_files
 83  output_type:
 84    type: string[]
 85    doc: What to output as the result of executing the tool
 86    default:
 87      - aggregation
 88      - data_dictionary
 89    inputBinding:
 90      prefix: --output
 91  table:
 92    type: string?
 93    doc: |
 94      Optional name ot the table where the aggregated data will be
 95      eventually stored
 96    inputBinding:
 97      prefix: --table
 98
 99arguments:
100  - valueFrom: "."
101    prefix: --destination
102
103outputs:
104  log:
105    type: File?
106    outputBinding:
107      glob: "*.log"
108  csv_data:
109    type: File
110    outputBinding:
111      glob:
112        - "*.csv*"
113        - "**/*.csv*"
114    doc: |
115      The output CSV file, containing mean values of the given
116      variable over given geographies. Each line
117      contains date, geo id (zip or county FIPS) and value
118  data_dictionary:
119    type: File?
120    outputBinding:
121      glob:
122        - "*.yaml"
123        - "**/*.yaml"
124    doc: |
125      Data dictionary for teh aggregated data
126
127  errors:
128    type: stderr
129
130stderr: $("aggr-" + inputs.netcdf_data.nameroot + ".err")