medicare.cwl

  1#!/usr/bin/env cwl-runner
  2### Medicare data ingestion and processing pipeline
  3#  Copyright (c) 2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29
 30doc: |
 31  This workflow processes raw Medicare data. We assume that the data
 32  for each year is in a separate set of SAS DAT files accompanied by FTS.
 33  For each year we expect at least
 34  two tables: patient summary and inpatient admissions.
 35
 36  > NB: Input files must be organized within the dicrectory given in
 37  the `input` parameter in a certain way. Immediate parent folder for
 38  each file should be named as the year of the data it contains. Example:
 39
 40      data/
 41        a/
 42          b/
 43            2011/
 44            2013/
 45          d/
 46            2017/
 47
 48  See [](../Medicare) for data processing details.
 49
 50inputs:
 51  database:
 52    type: File
 53    doc: Path to database connection file, usually database.ini
 54  connection_name:
 55    type: string
 56    doc: The name of the section in the database.ini file
 57  input:
 58    type: Directory
 59    doc: |
 60      A path to directory, containing folders with unpacked CMS
 61      files. The tool will recursively look for data files
 62      according to provided pattern. Immediate parent folder for
 63      each file should be named as the year of the data it contains, e.g.
 64      a/b/c/2017/mbsf_abcd_xyzacdfrtwe_request12345.fts
 65
 66steps:
 67  initdb:
 68    run: initdb.cwl
 69    doc: Ensure that database utilities are at their latest version
 70    in:
 71      database: database
 72      connection_name: connection_name
 73    out:
 74      - log
 75      - err
 76
 77  load_raw_data:
 78    run: load_raw_medicare.cwl
 79    doc: Load raw CMS Medicare data into the database
 80    in:
 81      database: database
 82      connection_name: connection_name
 83      depends_on: initdb/log
 84      input: input
 85    out:
 86      - log
 87      - registry
 88      - err
 89
 90  enrollments:
 91    run: medicare_beneficiaries.cwl
 92    doc: >
 93      Process beneficiaries enrollment data
 94    in:
 95      database: database
 96      connection_name: connection_name
 97      depends_on: load_raw_data/registry
 98    out:
 99      - d_create_log
100      - d_index_log
101      - d_vacuum_log
102      - d_create_err
103      - d_index_err
104      - d_vacuum_err
105      - ps_create_log
106      - ps_create_err
107      - ps2_create_log
108      - ps2_create_err
109      - bene_view_log
110      - bene_view_err
111      - bene_table_create_log
112      - bene_table_index_log
113      - bene_table_vacuum_log
114      - bene_table_create_err
115      - bene_table_index_err
116      - bene_table_vacuum_err
117      - enrlm_view_log
118      - enrlm_view_err
119      - enrlm_table_create_log
120      - enrlm_table_index_log
121      - enrlm_table_vacuum_log
122      - enrlm_table_create_err
123      - enrlm_table_index_err
124      - enrlm_table_vacuum_err
125
126  admissions:
127    run: medicare_admissions.cwl
128    doc: Process medicare inpatient admissions (aka Medpar) data
129    in:
130      database: database
131      connection_name: connection_name
132      depends_on: enrollments/enrlm_table_vacuum_log
133    out:
134      - ip_create_log
135      - ip_create_err
136      - adm_create_log
137      - adm_create_err
138      - adm_populate_log
139      - adm_populate_err
140      - adm_index_log
141      - adm_index_err
142      - adm_vacuum_log
143      - adm_vacuum_err
144
145
146  qc:
147    run: medicare_qc.cwl
148    doc: Build QC Tables
149    in:
150      database: database
151      connection_name: connection_name
152      depends_on: admissions/adm_vacuum_log
153    out:
154      - ev_create_log
155      - ev_create_err
156      - av_create_log
157      - av_create_err
158      - enrollmen343_create_log
159      - enrollmen343_index_log
160      - enrollmen343_vacuum_log
161      - enrollmen343_create_err
162      - enrollmen343_index_err
163      - enrollmen343_vacuum_err
164      - admission697_create_log
165      - admission697_index_log
166      - admission697_vacuum_log
167      - admission697_create_err
168      - admission697_index_err
169      - admission697_vacuum_err
170
171  grant:
172    run: alter_database.cwl
173    doc: |
174      Grants read access to the members of NSAPH group for newly created
175      or updated tables
176    in:
177      database: database
178      connection_name: connection_name
179      depends_on: qc/admission697_vacuum_log
180    out:
181      - log
182      - err
183
184outputs:
185  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_beneficiaries.cwl:
186    initdb_log:
187      type: File
188      outputSource: initdb/log
189    initdb_err:
190      type: File
191      outputSource: initdb/err
192
193    load_raw_log:
194      type: File
195      outputSource: load_raw_data/log
196    load_raw_err:
197      type: File
198      outputSource: load_raw_data/err
199    registry:
200      type: File
201      outputSource: load_raw_data/registry
202
203    d_create_log:
204      type: File
205      outputSource: enrollments/d_create_log
206    d_create_err:
207      type: File
208      outputSource: enrollments/d_create_err
209    d_index_log:
210      type: File
211      outputSource: enrollments/d_index_log
212    d_index_err:
213      type: File
214      outputSource: enrollments/d_index_err
215    d_vacuum_log:
216      type: File
217      outputSource: enrollments/d_vacuum_log
218    d_vacuum_err:
219      type: File
220      outputSource: enrollments/d_vacuum_err
221
222    ps_create_log:
223      type: File
224      outputSource: enrollments/ps_create_log
225    ps_create_err:
226      type: File
227      outputSource: enrollments/ps_create_err
228    ps2_create_log:
229      type: File
230      outputSource: enrollments/ps2_create_log
231    ps2_create_err:
232      type: File
233      outputSource: enrollments/ps2_create_err
234    bene_view_log:
235      type: File
236      outputSource: enrollments/bene_view_log
237    bene_view_err:
238      type: File
239      outputSource: enrollments/bene_view_err
240    bene_table_create_log:
241      type: File
242      outputSource: enrollments/bene_table_create_log
243    bene_table_index_log:
244      type: File
245      outputSource: enrollments/bene_table_index_log
246    bene_table_vacuum_log:
247      type: File
248      outputSource: enrollments/bene_table_vacuum_log
249    bene_table_create_err:
250      type: File
251      outputSource: enrollments/bene_table_create_err
252    bene_table_index_err:
253      type: File
254      outputSource: enrollments/bene_table_index_err
255    bene_table_vacuum_err:
256      type: File
257      outputSource: enrollments/bene_table_vacuum_err
258    enrlm_view_log:
259      type: File
260      outputSource: enrollments/enrlm_view_log
261    enrlm_view_err:
262      type: File
263      outputSource: enrollments/enrlm_view_err
264    enrlm_table_create_log:
265      type: File
266      outputSource: enrollments/enrlm_table_create_log
267    enrlm_table_index_log:
268      type: File
269      outputSource: enrollments/enrlm_table_index_log
270    enrlm_table_vacuum_log:
271      type: File
272      outputSource: enrollments/enrlm_table_vacuum_log
273    enrlm_table_create_err:
274      type: File
275      outputSource: enrollments/enrlm_table_create_err
276    enrlm_table_index_err:
277      type: File
278      outputSource: enrollments/enrlm_table_index_err
279    enrlm_table_vacuum_err:
280      type: File
281      outputSource: enrollments/enrlm_table_vacuum_err
282  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_admissions.cwl:
283    ip_create_log:
284      type: File
285      outputSource: admissions/ip_create_log
286    ip_create_err:
287      type: File
288      outputSource: admissions/ip_create_err
289    adm_create_log:
290      type: File
291      outputSource: admissions/adm_create_log
292    adm_create_err:
293      type: File
294      outputSource: admissions/adm_create_err
295    adm_populate_log:
296      type: File
297      outputSource: admissions/adm_populate_log
298    adm_populate_err:
299      type: File
300      outputSource: admissions/adm_populate_err
301    adm_index_log:
302      type: File
303      outputSource: admissions/adm_index_log
304    adm_index_err:
305      type: File
306      outputSource: admissions/adm_index_err
307    adm_vacuum_log:
308      type: File
309      outputSource: admissions/adm_vacuum_log
310    adm_vacuum_err:
311      type: File
312      outputSource: admissions/adm_vacuum_err
313
314  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_qc.cwl:
315    qc_ev_create_log:
316      type: File
317      outputSource: qc/ev_create_log
318    qc_ev_create_err:
319      type: File
320      outputSource: qc/ev_create_err
321    qc_av_create_log:
322      type: File
323      outputSource: qc/av_create_log
324    qc_av_create_err:
325      type: File
326      outputSource: qc/av_create_err
327    qc_enrollmen343_create_log:
328      type: File
329      outputSource: qc/enrollmen343_create_log
330    qc_enrollmen343_index_log:
331      type: File
332      outputSource: qc/enrollmen343_index_log
333    qc_enrollmen343_vacuum_log:
334      type: File
335      outputSource: qc/enrollmen343_vacuum_log
336    qc_enrollmen343_create_err:
337      type: File
338      outputSource: qc/enrollmen343_create_err
339    qc_enrollmen343_index_err:
340      type: File
341      outputSource: qc/enrollmen343_index_err
342    qc_enrollmen343_vacuum_err:
343      type: File
344      outputSource: qc/enrollmen343_vacuum_err
345    qc_admission697_create_log:
346      type: File
347      outputSource: qc/admission697_create_log
348    qc_admission697_index_log:
349      type: File
350      outputSource: qc/admission697_index_log
351    qc_admission697_vacuum_log:
352      type: File
353      outputSource: qc/admission697_vacuum_log
354    qc_admission697_create_err:
355      type: File
356      outputSource: qc/admission697_create_err
357    qc_admission697_index_err:
358      type: File
359      outputSource: qc/admission697_index_err
360    qc_admission697_vacuum_err:
361      type: File
362      outputSource: qc/admission697_vacuum_err
363
364## Generated by nsaph/util/cwl_collect_outputs.py from grant_read_access.cwl:
365    grant_log:
366      type: File
367      outputSource: grant/log
368    grant_err:
369      type: File
370      outputSource: grant/err