1#!/usr/bin/env cwl-runner
2### Medicare data ingestion and processing pipeline
3# Copyright (c) 2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29
30doc: |
31 This workflow processes raw Medicare data. We assume that the data
32 for each year is in a separate set of SAS DAT files accompanied by FTS.
33 For each year we expect at least
34 two tables: patient summary and inpatient admissions.
35
36 > NB: Input files must be organized within the dicrectory given in
37 the `input` parameter in a certain way. Immediate parent folder for
38 each file should be named as the year of the data it contains. Example:
39
40 data/
41 a/
42 b/
43 2011/
44 2013/
45 d/
46 2017/
47
48 See [](../Medicare) for data processing details.
49
50inputs:
51 database:
52 type: File
53 doc: Path to database connection file, usually database.ini
54 connection_name:
55 type: string
56 doc: The name of the section in the database.ini file
57 input:
58 type: Directory
59 doc: |
60 A path to directory, containing folders with unpacked CMS
61 files. The tool will recursively look for data files
62 according to provided pattern. Immediate parent folder for
63 each file should be named as the year of the data it contains, e.g.
64 a/b/c/2017/mbsf_abcd_xyzacdfrtwe_request12345.fts
65
66steps:
67 initdb:
68 run: initdb.cwl
69 doc: Ensure that database utilities are at their latest version
70 in:
71 database: database
72 connection_name: connection_name
73 out:
74 - log
75 - err
76
77 load_raw_data:
78 run: load_raw_medicare.cwl
79 doc: Load raw CMS Medicare data into the database
80 in:
81 database: database
82 connection_name: connection_name
83 depends_on: initdb/log
84 input: input
85 out:
86 - log
87 - registry
88 - err
89
90 enrollments:
91 run: medicare_beneficiaries.cwl
92 doc: >
93 Process beneficiaries enrollment data
94 in:
95 database: database
96 connection_name: connection_name
97 depends_on: load_raw_data/registry
98 out:
99 - d_create_log
100 - d_index_log
101 - d_vacuum_log
102 - d_create_err
103 - d_index_err
104 - d_vacuum_err
105 - ps_create_log
106 - ps_create_err
107 - ps2_create_log
108 - ps2_create_err
109 - bene_view_log
110 - bene_view_err
111 - bene_table_create_log
112 - bene_table_index_log
113 - bene_table_vacuum_log
114 - bene_table_create_err
115 - bene_table_index_err
116 - bene_table_vacuum_err
117 - enrlm_view_log
118 - enrlm_view_err
119 - enrlm_table_create_log
120 - enrlm_table_index_log
121 - enrlm_table_vacuum_log
122 - enrlm_table_create_err
123 - enrlm_table_index_err
124 - enrlm_table_vacuum_err
125
126 admissions:
127 run: medicare_admissions.cwl
128 doc: Process medicare inpatient admissions (aka Medpar) data
129 in:
130 database: database
131 connection_name: connection_name
132 depends_on: enrollments/enrlm_table_vacuum_log
133 out:
134 - ip_create_log
135 - ip_create_err
136 - adm_create_log
137 - adm_create_err
138 - adm_populate_log
139 - adm_populate_err
140 - adm_index_log
141 - adm_index_err
142 - adm_vacuum_log
143 - adm_vacuum_err
144
145
146 qc:
147 run: medicare_qc.cwl
148 doc: Build QC Tables
149 in:
150 database: database
151 connection_name: connection_name
152 depends_on: admissions/adm_vacuum_log
153 out:
154 - ev_create_log
155 - ev_create_err
156 - av_create_log
157 - av_create_err
158 - enrollmen343_create_log
159 - enrollmen343_index_log
160 - enrollmen343_vacuum_log
161 - enrollmen343_create_err
162 - enrollmen343_index_err
163 - enrollmen343_vacuum_err
164 - admission697_create_log
165 - admission697_index_log
166 - admission697_vacuum_log
167 - admission697_create_err
168 - admission697_index_err
169 - admission697_vacuum_err
170
171 grant:
172 run: alter_database.cwl
173 doc: |
174 Grants read access to the members of NSAPH group for newly created
175 or updated tables
176 in:
177 database: database
178 connection_name: connection_name
179 depends_on: qc/admission697_vacuum_log
180 out:
181 - log
182 - err
183
184outputs:
185 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_beneficiaries.cwl:
186 initdb_log:
187 type: File
188 outputSource: initdb/log
189 initdb_err:
190 type: File
191 outputSource: initdb/err
192
193 load_raw_log:
194 type: File
195 outputSource: load_raw_data/log
196 load_raw_err:
197 type: File
198 outputSource: load_raw_data/err
199 registry:
200 type: File
201 outputSource: load_raw_data/registry
202
203 d_create_log:
204 type: File
205 outputSource: enrollments/d_create_log
206 d_create_err:
207 type: File
208 outputSource: enrollments/d_create_err
209 d_index_log:
210 type: File
211 outputSource: enrollments/d_index_log
212 d_index_err:
213 type: File
214 outputSource: enrollments/d_index_err
215 d_vacuum_log:
216 type: File
217 outputSource: enrollments/d_vacuum_log
218 d_vacuum_err:
219 type: File
220 outputSource: enrollments/d_vacuum_err
221
222 ps_create_log:
223 type: File
224 outputSource: enrollments/ps_create_log
225 ps_create_err:
226 type: File
227 outputSource: enrollments/ps_create_err
228 ps2_create_log:
229 type: File
230 outputSource: enrollments/ps2_create_log
231 ps2_create_err:
232 type: File
233 outputSource: enrollments/ps2_create_err
234 bene_view_log:
235 type: File
236 outputSource: enrollments/bene_view_log
237 bene_view_err:
238 type: File
239 outputSource: enrollments/bene_view_err
240 bene_table_create_log:
241 type: File
242 outputSource: enrollments/bene_table_create_log
243 bene_table_index_log:
244 type: File
245 outputSource: enrollments/bene_table_index_log
246 bene_table_vacuum_log:
247 type: File
248 outputSource: enrollments/bene_table_vacuum_log
249 bene_table_create_err:
250 type: File
251 outputSource: enrollments/bene_table_create_err
252 bene_table_index_err:
253 type: File
254 outputSource: enrollments/bene_table_index_err
255 bene_table_vacuum_err:
256 type: File
257 outputSource: enrollments/bene_table_vacuum_err
258 enrlm_view_log:
259 type: File
260 outputSource: enrollments/enrlm_view_log
261 enrlm_view_err:
262 type: File
263 outputSource: enrollments/enrlm_view_err
264 enrlm_table_create_log:
265 type: File
266 outputSource: enrollments/enrlm_table_create_log
267 enrlm_table_index_log:
268 type: File
269 outputSource: enrollments/enrlm_table_index_log
270 enrlm_table_vacuum_log:
271 type: File
272 outputSource: enrollments/enrlm_table_vacuum_log
273 enrlm_table_create_err:
274 type: File
275 outputSource: enrollments/enrlm_table_create_err
276 enrlm_table_index_err:
277 type: File
278 outputSource: enrollments/enrlm_table_index_err
279 enrlm_table_vacuum_err:
280 type: File
281 outputSource: enrollments/enrlm_table_vacuum_err
282 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_admissions.cwl:
283 ip_create_log:
284 type: File
285 outputSource: admissions/ip_create_log
286 ip_create_err:
287 type: File
288 outputSource: admissions/ip_create_err
289 adm_create_log:
290 type: File
291 outputSource: admissions/adm_create_log
292 adm_create_err:
293 type: File
294 outputSource: admissions/adm_create_err
295 adm_populate_log:
296 type: File
297 outputSource: admissions/adm_populate_log
298 adm_populate_err:
299 type: File
300 outputSource: admissions/adm_populate_err
301 adm_index_log:
302 type: File
303 outputSource: admissions/adm_index_log
304 adm_index_err:
305 type: File
306 outputSource: admissions/adm_index_err
307 adm_vacuum_log:
308 type: File
309 outputSource: admissions/adm_vacuum_log
310 adm_vacuum_err:
311 type: File
312 outputSource: admissions/adm_vacuum_err
313
314 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_qc.cwl:
315 qc_ev_create_log:
316 type: File
317 outputSource: qc/ev_create_log
318 qc_ev_create_err:
319 type: File
320 outputSource: qc/ev_create_err
321 qc_av_create_log:
322 type: File
323 outputSource: qc/av_create_log
324 qc_av_create_err:
325 type: File
326 outputSource: qc/av_create_err
327 qc_enrollmen343_create_log:
328 type: File
329 outputSource: qc/enrollmen343_create_log
330 qc_enrollmen343_index_log:
331 type: File
332 outputSource: qc/enrollmen343_index_log
333 qc_enrollmen343_vacuum_log:
334 type: File
335 outputSource: qc/enrollmen343_vacuum_log
336 qc_enrollmen343_create_err:
337 type: File
338 outputSource: qc/enrollmen343_create_err
339 qc_enrollmen343_index_err:
340 type: File
341 outputSource: qc/enrollmen343_index_err
342 qc_enrollmen343_vacuum_err:
343 type: File
344 outputSource: qc/enrollmen343_vacuum_err
345 qc_admission697_create_log:
346 type: File
347 outputSource: qc/admission697_create_log
348 qc_admission697_index_log:
349 type: File
350 outputSource: qc/admission697_index_log
351 qc_admission697_vacuum_log:
352 type: File
353 outputSource: qc/admission697_vacuum_log
354 qc_admission697_create_err:
355 type: File
356 outputSource: qc/admission697_create_err
357 qc_admission697_index_err:
358 type: File
359 outputSource: qc/admission697_index_err
360 qc_admission697_vacuum_err:
361 type: File
362 outputSource: qc/admission697_vacuum_err
363
364## Generated by nsaph/util/cwl_collect_outputs.py from grant_read_access.cwl:
365 grant_log:
366 type: File
367 outputSource: grant/log
368 grant_err:
369 type: File
370 outputSource: grant/err