medicare.cwl

#!/usr/bin/env cwl-runner
### Medicare data ingestion and processing pipeline
#  Copyright (c) 2022. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: Workflow

requirements:
  SubworkflowFeatureRequirement: {}
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}

doc: |
  This workflow processes raw Medicare data. We assume that the data
  for each year is in a separate set of SAS DAT files accompanied by FTS.
  For each year we expect at least
  two tables: patient summary and inpatient admissions.

  > NB: Input files must be organized within the dicrectory given in
  the `input` parameter in a certain way. Immediate parent folder for
  each file should be named as the year of the data it contains. Example:

      data/
        a/
          b/
            2011/
            2013/
          d/
            2017/

  See [](../Medicare) for data processing details.

inputs:
  database:
    type: File
    doc: Path to database connection file, usually database.ini
  connection_name:
    type: string
    doc: The name of the section in the database.ini file
  input:
    type: Directory
    doc: |
      A path to directory, containing folders with unpacked CMS
      files. The tool will recursively look for data files
      according to provided pattern. Immediate parent folder for
      each file should be named as the year of the data it contains, e.g.
      a/b/c/2017/mbsf_abcd_xyzacdfrtwe_request12345.fts

steps:
  initdb:
    run: initdb.cwl
    doc: Ensure that database utilities are at their latest version
    in:
      database: database
      connection_name: connection_name
    out:
      - log
      - err

  load_raw_data:
    run: load_raw_medicare.cwl
    doc: Load raw CMS Medicare data into the database
    in:
      database: database
      connection_name: connection_name
      depends_on: initdb/log
      input: input
    out:
      - log
      - registry
      - err

  enrollments:
    run: medicare_beneficiaries.cwl
    doc: >
      Process beneficiaries enrollment data
    in:
      database: database
      connection_name: connection_name
      depends_on: load_raw_data/registry
    out:
      - d_create_log
      - d_index_log
      - d_vacuum_log
      - d_create_err
      - d_index_err
      - d_vacuum_err
      - ps_create_log
      - ps_create_err
      - ps2_create_log
      - ps2_create_err
      - bene_view_log
      - bene_view_err
      - bene_table_create_log
      - bene_table_index_log
      - bene_table_vacuum_log
      - bene_table_create_err
      - bene_table_index_err
      - bene_table_vacuum_err
      - enrlm_view_log
      - enrlm_view_err
      - enrlm_table_create_log
      - enrlm_table_index_log
      - enrlm_table_vacuum_log
      - enrlm_table_create_err
      - enrlm_table_index_err
      - enrlm_table_vacuum_err

  admissions:
    run: medicare_admissions.cwl
    doc: Process medicare inpatient admissions (aka Medpar) data
    in:
      database: database
      connection_name: connection_name
      depends_on: enrollments/enrlm_table_vacuum_log
    out:
      - ip_create_log
      - ip_create_err
      - adm_create_log
      - adm_create_err
      - adm_populate_log
      - adm_populate_err
      - adm_index_log
      - adm_index_err
      - adm_vacuum_log
      - adm_vacuum_err


  qc:
    run: medicare_qc.cwl
    doc: Build QC Tables
    in:
      database: database
      connection_name: connection_name
      depends_on: admissions/adm_vacuum_log
    out:
      - ev_create_log
      - ev_create_err
      - av_create_log
      - av_create_err
      - enrollmen343_create_log
      - enrollmen343_index_log
      - enrollmen343_vacuum_log
      - enrollmen343_create_err
      - enrollmen343_index_err
      - enrollmen343_vacuum_err
      - admission697_create_log
      - admission697_index_log
      - admission697_vacuum_log
      - admission697_create_err
      - admission697_index_err
      - admission697_vacuum_err

  grant:
    run: alter_database.cwl
    doc: |
      Grants read access to the members of NSAPH group for newly created
      or updated tables
    in:
      database: database
      connection_name: connection_name
      depends_on: qc/admission697_vacuum_log
    out:
      - log
      - err

outputs:
  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_beneficiaries.cwl:
    initdb_log:
      type: File
      outputSource: initdb/log
    initdb_err:
      type: File
      outputSource: initdb/err

    load_raw_log:
      type: File
      outputSource: load_raw_data/log
    load_raw_err:
      type: File
      outputSource: load_raw_data/err
    registry:
      type: File
      outputSource: load_raw_data/registry

    d_create_log:
      type: File
      outputSource: enrollments/d_create_log
    d_create_err:
      type: File
      outputSource: enrollments/d_create_err
    d_index_log:
      type: File
      outputSource: enrollments/d_index_log
    d_index_err:
      type: File
      outputSource: enrollments/d_index_err
    d_vacuum_log:
      type: File
      outputSource: enrollments/d_vacuum_log
    d_vacuum_err:
      type: File
      outputSource: enrollments/d_vacuum_err

    ps_create_log:
      type: File
      outputSource: enrollments/ps_create_log
    ps_create_err:
      type: File
      outputSource: enrollments/ps_create_err
    ps2_create_log:
      type: File
      outputSource: enrollments/ps2_create_log
    ps2_create_err:
      type: File
      outputSource: enrollments/ps2_create_err
    bene_view_log:
      type: File
      outputSource: enrollments/bene_view_log
    bene_view_err:
      type: File
      outputSource: enrollments/bene_view_err
    bene_table_create_log:
      type: File
      outputSource: enrollments/bene_table_create_log
    bene_table_index_log:
      type: File
      outputSource: enrollments/bene_table_index_log
    bene_table_vacuum_log:
      type: File
      outputSource: enrollments/bene_table_vacuum_log
    bene_table_create_err:
      type: File
      outputSource: enrollments/bene_table_create_err
    bene_table_index_err:
      type: File
      outputSource: enrollments/bene_table_index_err
    bene_table_vacuum_err:
      type: File
      outputSource: enrollments/bene_table_vacuum_err
    enrlm_view_log:
      type: File
      outputSource: enrollments/enrlm_view_log
    enrlm_view_err:
      type: File
      outputSource: enrollments/enrlm_view_err
    enrlm_table_create_log:
      type: File
      outputSource: enrollments/enrlm_table_create_log
    enrlm_table_index_log:
      type: File
      outputSource: enrollments/enrlm_table_index_log
    enrlm_table_vacuum_log:
      type: File
      outputSource: enrollments/enrlm_table_vacuum_log
    enrlm_table_create_err:
      type: File
      outputSource: enrollments/enrlm_table_create_err
    enrlm_table_index_err:
      type: File
      outputSource: enrollments/enrlm_table_index_err
    enrlm_table_vacuum_err:
      type: File
      outputSource: enrollments/enrlm_table_vacuum_err
  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_admissions.cwl:
    ip_create_log:
      type: File
      outputSource: admissions/ip_create_log
    ip_create_err:
      type: File
      outputSource: admissions/ip_create_err
    adm_create_log:
      type: File
      outputSource: admissions/adm_create_log
    adm_create_err:
      type: File
      outputSource: admissions/adm_create_err
    adm_populate_log:
      type: File
      outputSource: admissions/adm_populate_log
    adm_populate_err:
      type: File
      outputSource: admissions/adm_populate_err
    adm_index_log:
      type: File
      outputSource: admissions/adm_index_log
    adm_index_err:
      type: File
      outputSource: admissions/adm_index_err
    adm_vacuum_log:
      type: File
      outputSource: admissions/adm_vacuum_log
    adm_vacuum_err:
      type: File
      outputSource: admissions/adm_vacuum_err

  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_qc.cwl:
    qc_ev_create_log:
      type: File
      outputSource: qc/ev_create_log
    qc_ev_create_err:
      type: File
      outputSource: qc/ev_create_err
    qc_av_create_log:
      type: File
      outputSource: qc/av_create_log
    qc_av_create_err:
      type: File
      outputSource: qc/av_create_err
    qc_enrollmen343_create_log:
      type: File
      outputSource: qc/enrollmen343_create_log
    qc_enrollmen343_index_log:
      type: File
      outputSource: qc/enrollmen343_index_log
    qc_enrollmen343_vacuum_log:
      type: File
      outputSource: qc/enrollmen343_vacuum_log
    qc_enrollmen343_create_err:
      type: File
      outputSource: qc/enrollmen343_create_err
    qc_enrollmen343_index_err:
      type: File
      outputSource: qc/enrollmen343_index_err
    qc_enrollmen343_vacuum_err:
      type: File
      outputSource: qc/enrollmen343_vacuum_err
    qc_admission697_create_log:
      type: File
      outputSource: qc/admission697_create_log
    qc_admission697_index_log:
      type: File
      outputSource: qc/admission697_index_log
    qc_admission697_vacuum_log:
      type: File
      outputSource: qc/admission697_vacuum_log
    qc_admission697_create_err:
      type: File
      outputSource: qc/admission697_create_err
    qc_admission697_index_err:
      type: File
      outputSource: qc/admission697_index_err
    qc_admission697_vacuum_err:
      type: File
      outputSource: qc/admission697_vacuum_err

## Generated by nsaph/util/cwl_collect_outputs.py from grant_read_access.cwl:
    grant_log:
      type: File
      outputSource: grant/log
    grant_err:
      type: File
      outputSource: grant/err