wustl_one_file.cwl

  1#!/usr/bin/env cwl-runner
  2### Workflow to aggregate and ingest one file in NetCDF format
  3#  Copyright (c) 2021-2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29  ScatterFeatureRequirement: {}
 30  MultipleInputFeatureRequirement: {}
 31
 32
 33doc: |
 34  Sub-workflow that aggregates a single NetCDF file over a given
 35  geography (zip codes or counties) and ingest the
 36  aggregated data into the database
 37
 38inputs:
 39  depends_on:
 40    type: Any?
 41  downloads:
 42    type: Directory
 43  geography:
 44    type: string
 45  year:
 46    type: int
 47  month:
 48    type: int
 49  band:
 50    type: string
 51    default: pm25
 52  table:
 53    type: string
 54  shape_files:
 55    type: File[]
 56    doc: "Paths to shape files"
 57  strategy:
 58    type: string
 59    doc: "Rasterization strategy"
 60  ram:
 61    type: string
 62    default: 2GB
 63    doc: Runtime memory, available to the process
 64  database:
 65    type: File
 66  connection_name:
 67    type: string
 68
 69steps:
 70  findfile:
 71    doc: |
 72      Given input directory, variable (band), year and month,
 73      evaluates the exepected file name for the input data
 74    run:
 75      class: ExpressionTool
 76      inputs:
 77        downloads:
 78          type: Directory
 79        year:
 80          type: int
 81        month:
 82          type: int
 83        band:
 84          type: string
 85      expression: |
 86        ${
 87          var v = inputs.band.toUpperCase();
 88          var y = String(inputs.year);
 89          var m;
 90          if (inputs.month < 10) {
 91            m = '0' + String(inputs.month);
 92          } else {
 93            m =  String(inputs.month);
 94          }
 95          var ym = y + m;
 96          var f = "V4NA03_" + v + "_NA_" + ym + "_" + ym + "-RH35.nc";
 97          f = inputs.downloads.location + '/' + f;
 98          return {
 99            netcdf_file: {
100              "class": "File",
101              "location": f
102            }
103          };
104        }
105      outputs:
106        netcdf_file:
107          type: File
108    in:
109      year: year
110      month: month
111      band: band
112      downloads: downloads
113    out: [netcdf_file]
114
115  aggregate:
116    doc: Aggregate data over geographies
117    run: aggregate_wustl.cwl
118    in:
119      strategy: strategy
120      ram: ram
121      band:
122        valueFrom: $([inputs.sband])
123      sband: band
124      geography: geography
125      netcdf_data: findfile/netcdf_file
126      shape_files: shape_files
127    out:
128      - log
129      - errors
130      - csv_data
131
132  ingest:
133    doc: Ingests the aggregated data into the database
134    run: add_data.cwl
135    in:
136      table: table
137      input: aggregate/csv_data
138      database: database
139      connection_name: connection_name
140      domain:
141        valueFrom: "exposures"
142    out: [log, errors]
143
144outputs:
145  aggregate_data:
146    type: File?
147    outputSource: aggregate/csv_data
148  aggregate_log:
149    type: File?
150    outputSource: aggregate/log
151  aggregate_err:
152    type: File
153    outputSource: aggregate/errors
154
155  ingest_log:
156    type: File?
157    outputSource: ingest/log
158  ingest_err:
159    type: File
160    outputSource: ingest/errors