gridmet_local_shapes.cwl

#!/usr/bin/env cwl-runner
### gridMET Pipeline
#  Copyright (c) 2021. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Quantori LLC
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: Workflow

requirements:
  SubworkflowFeatureRequirement: {}
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}
  ScatterFeatureRequirement: {}
  MultipleInputFeatureRequirement: {}

hints:
  ResourceRequirement:
    coresMin: 24
    coresMax: 32

doc: |
  Downloads, processes gridMET data and ingests it into the database

inputs:
  proxy:
    type: string?
    default: ""
    doc: HTTP/HTTPS Proxy if required
  shapes:
    type: Directory?
  geography:
    type: string
    doc: |
      Type of geography: zip codes or counties
  years:
    type: string[]
#    default: ['1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
  bands:
    type: string[]
#    default: ['bi', 'erc', 'etr', 'fm100', 'fm1000', 'pet', 'pr', 'rmax', 'rmin', 'sph', 'srad', 'th', 'tmmn', 'tmmx', 'vpd', 'vs']
  database:
    type: File
    doc: Path to database connection file, usually database.ini
  connection_name:
    type: string
    doc: The name of the section in the database.ini file
  dates:
    type: string?
    doc: 'dates restriction, for testing purposes only'
  nc:
    type: File
  shape_files:
    type: File[]
    secondaryFiles:
      - "^.dbf"
      - "^.shx"
      - "^.prj"
      - "^.cpg"

steps:
  registry:
    run: registry.cwl
    doc: Writes down YAML file with the database model
    in: []
    out:
      - model
      - log
      - errors

  process:
    scatter: band
    in:
      proxy: proxy
      model: registry/model
      shapes: shapes
      geography: geography
      years: years
      dates: dates
      band: bands
      database: database
      connection_name: connection_name
      nc: nc
      shape_files: shape_files
      table:
        valueFrom: $(inputs.geography + '_' + inputs.band)

    run:
      class: Workflow
      inputs:
        proxy:
          type: string?
        model:
          type: File
        shapes:
          type: Directory?
        geography:
          type: string
        years:
          type: string[]
        band:
          type: string
        table:
          type: string
        database:
          type: File
        connection_name:
          type: string
        dates:
          type: string?
        nc:
          type: File
        shape_files:
          type: File[]

      steps:
        process:
          run: add_daily_data.cwl
          doc: Processes data
          scatter: year
          scatterMethod:  nested_crossproduct
          in:
            proxy: proxy
            shapes: shapes
            geography: geography
            year: years
            dates: dates
            band: band
            input: nc
            shape_files: shape_files
          out:
            - data
            - log

        ingest:
          run: ingest.cwl
          doc: Uploads data into the database
          in:
            registry: model
            domain:
              valueFrom: "gridmet"
            table: table
            input: process/data
            database: database
            connection_name: connection_name
          out: [log, errors]

        index:
          run: index.cwl
          in:
            depends_on: ingest/log
            registry: model
            domain:
              valueFrom: "gridmet"
            table: table
            database: database
            connection_name: connection_name
          out: [log, errors]

        vacuum:
          run: vacuum.cwl
          in:
            depends_on: index/log
            domain:
              valueFrom: "gridmet"
            registry: model
            table: table
            database: database
            connection_name: connection_name
          out: [log, errors]
      outputs:
        process_data:
          type: File[]
          outputSource: process/data
        process_log:
          type: File[]
          outputSource: process/log

        ingest_log:
          type: File
          outputSource: ingest/log
        ingest_err:
          type: File
          outputSource: ingest/errors

        index_log:
          type: File
          outputSource: index/log
        index_err:
          type: File
          outputSource: index/errors

        vacuum_log:
          type: File
          outputSource: vacuum/log
        vacuum_err:
          type: File
          outputSource: vacuum/errors
    out:
      - process_data
      - process_log
      - ingest_log
      - ingest_err
      - index_log
      - index_err
      - vacuum_log
      - vacuum_err



outputs:
  registry:
    type: File?
    outputSource: registry/model
  registry_log:
    type: File?
    outputSource: registry/log

  data:
    type:
      type: array
      items:
        type: array
        items: [File]
    outputSource: process/process_data

  process_log:
    type:
      type: array
      items:
        type: array
        items: [File]
    outputSource: process/process_log

  ingest_log:
    type: File[]
    outputSource: process/ingest_log
  ingest_err:
    type: File[]
    outputSource: process/ingest_err

  index_log:
    type: File[]
    outputSource: process/index_log
  index_err:
    type: File[]
    outputSource: process/index_err

  vacuum_log:
    type: File[]
    outputSource: process/vacuum_log
  vacuum_err:
    type: File[]
    outputSource: process/vacuum_err