ingest_cms.cwl

  1#!/usr/bin/env cwl-runner
  2### Workflow to load Medicaid data from files
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29
 30doc: |
 31  This tool is a shortcut to ingest CMS Medicaid raw data
 32
 33inputs:
 34  registry:
 35    type: File?
 36    doc: |
 37      A path to the data model file
 38  input:
 39    type: Directory
 40    doc: |
 41      A path to directory, containing unpacked CMS
 42      files. The tool will recursively look for data files
 43      according to provided pattern
 44  database:
 45    type: File
 46    doc: Path to database connection file, usually database.ini
 47  connection_name:
 48    type: string
 49    doc: The name of the section in the database.ini file
 50  table:
 51    type: string
 52    doc: the name of the table to be created
 53  domain:
 54    type: string
 55    doc: the name of the domain
 56    default: cms
 57  incremental:
 58    type:  boolean
 59    default: true
 60  
 61  depends_on:
 62    type: File?
 63    doc: a special field used to enforce dependencies and execution order
 64
 65steps:
 66  reset:
 67    run: reset.cwl
 68    doc: Initializes Raw CMS tables
 69    in:
 70      registry: registry
 71      domain: domain
 72      table:  table
 73      database: database
 74      connection_name: connection_name
 75    out: [log, errors]
 76
 77  create:
 78    run: load_raw_medicaid.cwl
 79    doc: Run data loader to load files to the database
 80    in:
 81      depends_on: reset/log
 82      registry: registry
 83      domain: domain
 84      table: table
 85      database: database
 86      input: input
 87      connection_name: connection_name
 88      incremental: incremental
 89      pattern:
 90        valueFrom: |
 91          ${
 92            var table = inputs.table
 93            if (inputs.table == 'admissions')
 94              table = 'ip'
 95            return "**/maxdata_*_" + table + "_*.csv*"
 96          }
 97    out: [ log, errors ]
 98
 99  index:
100    run: index.cwl
101    doc: Build indices
102    in:
103      depends_on: create/log
104      registry: registry
105      domain: domain
106      table: table
107      incremental: incremental
108      database: database
109      connection_name: connection_name
110
111    out: [ log, errors ]
112
113  vacuum:
114    run: vacuum.cwl
115    doc: Vacuum the view
116    in:
117      depends_on: index/log
118      registry: registry
119      domain: domain
120      table: table
121      database: database
122      connection_name: connection_name
123    out: [ log, errors ]
124
125outputs:
126  reset_log:
127    type: File
128    outputSource: reset/log
129  create_log:
130    type: File
131    outputSource: create/log
132  index_log:
133    type: File
134    outputSource: index/log
135  vacuum_log:
136    type: File
137    outputSource: vacuum/log
138
139  reset_err:
140    type: File
141    outputSource: reset/errors
142  create_err:
143    type: File
144    outputSource: create/errors
145  index_err:
146    type: File
147    outputSource: index/errors
148  vacuum_err:
149    type: File
150    outputSource: vacuum/errors