1#!/usr/bin/env cwl-runner
2### Workflow to load Medicaid data from files
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29
30doc: |
31 This tool is a shortcut to ingest CMS Medicaid raw data
32
33inputs:
34 registry:
35 type: File?
36 doc: |
37 A path to the data model file
38 input:
39 type: Directory
40 doc: |
41 A path to directory, containing unpacked CMS
42 files. The tool will recursively look for data files
43 according to provided pattern
44 database:
45 type: File
46 doc: Path to database connection file, usually database.ini
47 connection_name:
48 type: string
49 doc: The name of the section in the database.ini file
50 table:
51 type: string
52 doc: the name of the table to be created
53 domain:
54 type: string
55 doc: the name of the domain
56 default: cms
57 incremental:
58 type: boolean
59 default: true
60
61 depends_on:
62 type: File?
63 doc: a special field used to enforce dependencies and execution order
64
65steps:
66 reset:
67 run: reset.cwl
68 doc: Initializes Raw CMS tables
69 in:
70 registry: registry
71 domain: domain
72 table: table
73 database: database
74 connection_name: connection_name
75 out: [log, errors]
76
77 create:
78 run: load_raw_medicaid.cwl
79 doc: Run data loader to load files to the database
80 in:
81 depends_on: reset/log
82 registry: registry
83 domain: domain
84 table: table
85 database: database
86 input: input
87 connection_name: connection_name
88 incremental: incremental
89 pattern:
90 valueFrom: |
91 ${
92 var table = inputs.table
93 if (inputs.table == 'admissions')
94 table = 'ip'
95 return "**/maxdata_*_" + table + "_*.csv*"
96 }
97 out: [ log, errors ]
98
99 index:
100 run: index.cwl
101 doc: Build indices
102 in:
103 depends_on: create/log
104 registry: registry
105 domain: domain
106 table: table
107 incremental: incremental
108 database: database
109 connection_name: connection_name
110
111 out: [ log, errors ]
112
113 vacuum:
114 run: vacuum.cwl
115 doc: Vacuum the view
116 in:
117 depends_on: index/log
118 registry: registry
119 domain: domain
120 table: table
121 database: database
122 connection_name: connection_name
123 out: [ log, errors ]
124
125outputs:
126 reset_log:
127 type: File
128 outputSource: reset/log
129 create_log:
130 type: File
131 outputSource: create/log
132 index_log:
133 type: File
134 outputSource: index/log
135 vacuum_log:
136 type: File
137 outputSource: vacuum/log
138
139 reset_err:
140 type: File
141 outputSource: reset/errors
142 create_err:
143 type: File
144 outputSource: create/errors
145 index_err:
146 type: File
147 outputSource: index/errors
148 vacuum_err:
149 type: File
150 outputSource: vacuum/errors