wustl_one_year.cwl

#!/usr/bin/env cwl-runner
### Workflow to aggregate and ingest NetCDF files for one year
#  Copyright (c) 2021-2022. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: Workflow

requirements:
  SubworkflowFeatureRequirement: {}
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}
  ScatterFeatureRequirement: {}
  MultipleInputFeatureRequirement: {}


doc: |
  Sub-workflow to aggregate and ingest NetCDF files for one year over a given
  geography (zip codes or counties) and ingest the
  aggregated data into the database. Before aggregation, downloads
  shape files fo this year from US Census website

inputs:
  depends_on:
    type: Any?
  proxy:
    type: string?
    default: ""
    doc: HTTP/HTTPS Proxy if required
  downloads:
    type: Directory
  geography:
    type: string
  shape_file_collection:
    type: string
    default: tiger
    doc: |
      [Collection of shapefiles](https://www2.census.gov/geo/tiger), 
      either GENZ or TIGER
  table:
    type: string
  band:
    type: string
    default: pm25
  months:
    type: int[]
  year:
    type: int
  strategy:
    type: string
    doc: "Rasterization strategy"
  ram:
    type: string
    default: 2GB
    doc: Runtime memory, available to the process
  database:
    type: File
  connection_name:
    type: string

steps:
  get_shapes:
    run: get_shapes.cwl
    in:
      year:
        valueFrom: $(String(inputs.yy))
      yy: year
      geo: geography
      proxy: proxy
      collection: shape_file_collection
    out: [shape_files]

  process_files:
    doc: Aggregates and ingests relvant files
    run: wustl_one_file.cwl
    scatter:
      - month
    in:
      year: year
      month: months
      band: band
      table: table
      geography:  geography
      strategy: strategy
      ram: ram
      database: database
      connection_name: connection_name
      shape_files: get_shapes/shape_files
      downloads: downloads
    out:
      - aggregate_data
      - aggregate_log
      - aggregate_err
      - ingest_log
      - ingest_err

outputs:
  aggregate_data:
    type: File[]
    outputSource: process_files/aggregate_data
  aggregate_log:
    type: File[]
    outputSource: process_files/aggregate_log
  aggregate_err:
    type: File[]
    outputSource: process_files/aggregate_err

  ingest_log:
    type: File[]
    outputSource: process_files/ingest_log
  ingest_err:
    type: File[]
    outputSource: process_files/ingest_err