medicare_beneficiaries.cwl

#!/usr/bin/env cwl-runner
### Medicare Beneficiaries data in-database processing pipeline
#  Copyright (c) 2022. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: Workflow

requirements:
  SubworkflowFeatureRequirement: {}
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}
  MultipleInputFeatureRequirement: {}

doc: |
  This workflow processes raw Medicare beneficiaries summary data.
  The assumed initial state
  is that raw data is already in the database. We assume that the data
  for each year is in a separate table. The first step
  combines these disparate tables into a single view, creating uniform
  columns.

inputs:
  database:
    type: File
    doc: Path to database connection file, usually database.ini
  connection_name:
    type: string
    doc: The name of the section in the database.ini file
  depends_on:
    type: File?
    doc: a special field used to enforce dependencies and execution order

steps:
  create_d:
    run: medicare_combine_tables.cwl
    doc: >
      Combines patient summaries from disparate summary tables
      (one table per year) into a single view
    in:
      database: database
      connection_name: connection_name
      table:
        valueFrom: "mbsf_d"
    out:  [ log, errors ]

  index_d:
    run: index.cwl
    doc: Build indices
    in:
      depends_on: create_d/log
      domain:
        valueFrom: "medicare"
      table:
        valueFrom: "mbsf_d"
      database: database
      connection_name: connection_name

    out: [ log, errors ]

  vacuum_d:
    run: vacuum.cwl
    doc: Vacuum the view
    in:
      depends_on: index_d/log
      domain:
        valueFrom: "medicare"
      table:
        valueFrom: "mbsf_d"
      database: database
      connection_name: connection_name
    out: [ log, errors ]

  create_ps:
    run: medicare_combine_tables.cwl
    doc: >
      Combines patient summaries from disparate summary tables
      (one table per year) into a single view
    in:
      database: database
      connection_name: connection_name
      table:
        valueFrom: "ps"
    out:  [ log, errors ]

  create__ps_view:
    run: matview.cwl
    doc: Create _ps materialized view from ps
    in:
      table:
        valueFrom: "_ps"
      database: database
      connection_name: connection_name
      domain:
        valueFrom: "medicare"
      depends_on: create_ps/log
    out:
      - create_log
      - index_log
      - vacuum_log
      - create_err
      - index_err
      - vacuum_err

  create_bene_view:
    run: create.cwl
    doc: Creates preliminary beneficiaries view
    in:
      table:
        valueFrom: "_beneficiaries"
      database: database
      connection_name: connection_name
      domain:
        valueFrom: "medicare"
      depends_on: create__ps_view/vacuum_log
    out: [ log, errors ]

  create_bene_table:
    run: matview.cwl
    doc: Creates `Beneficiaries` Table from the view
    in:
      depends_on: create_bene_view/log
      table:
        valueFrom: "beneficiaries"
      domain:
         valueFrom: "medicare"
      database: database
      connection_name: connection_name
    out:
      - create_log
      - index_log
      - vacuum_log
      - create_err
      - index_err
      - vacuum_err

  create_enrlm_view:
    run: create.cwl
    doc: Creates preliminary _enrollments view
    in:
      table:
        valueFrom: "_enrollments"
      database: database
      connection_name: connection_name
      domain:
        valueFrom: "medicare"
      depends_on:
      - create__ps_view/vacuum_log
      - create_bene_table/vacuum_log
      - index_d/log
    out: [ log, errors ]

  create_enrlm_table:
    run: matview.cwl
    doc: Creates `Enrollments` Table from the view
    in:
      depends_on: create_enrlm_view/log
      table:
        valueFrom: "enrollments"
      domain:
         valueFrom: "medicare"
      database: database
      connection_name: connection_name
    out:
      - create_log
      - index_log
      - vacuum_log
      - create_err
      - index_err
      - vacuum_err

outputs:
  d_create_log:
    type: File
    outputSource: create_d/log
  d_index_log:
    type: File
    outputSource: index_d/log
  d_vacuum_log:
    type: File
    outputSource: vacuum_d/log

  d_create_err:
    type: File
    outputSource: create_d/errors
  d_index_err:
    type: File
    outputSource: index_d/errors
  d_vacuum_err:
    type: File
    outputSource: vacuum_d/errors

  ps_create_log:
    type: File
    outputSource: create_ps/log
  ps_create_err:
    type: File
    outputSource: create_ps/errors
  ps2_create_log:
    type: File
    outputSource: create__ps_view/create_log
  ps2_create_err:
    type: File
    outputSource: create__ps_view/create_err
  ps2_index_log:
    type: File
    outputSource: create__ps_view/index_log
  ps2_vacuum_log:
    type: File
    outputSource: create__ps_view/vacuum_log
  ps2_index_err:
    type: File
    outputSource: create__ps_view/index_err
  ps2_vacuum_err:
    type: File
    outputSource: create__ps_view/vacuum_err


  # bene_view
  bene_view_log:
    type: File
    outputSource: create_bene_view/log
  bene_view_err:
    type: File
    outputSource: create_bene_view/errors
  # bene_table  
  bene_table_create_log:
    type: File
    outputSource: create_bene_table/create_log
  bene_table_index_log:
    type: File
    outputSource: create_bene_table/index_log
  bene_table_vacuum_log:
    type: File
    outputSource: create_bene_table/vacuum_log
  bene_table_create_err:
    type: File
    outputSource: create_bene_table/create_err
  bene_table_index_err:
    type: File
    outputSource: create_bene_table/index_err
  bene_table_vacuum_err:
    type: File
    outputSource: create_bene_table/vacuum_err

  # enrollments view
  enrlm_view_log:
    type: File
    outputSource: create_enrlm_view/log
  enrlm_view_err:
    type: File
    outputSource: create_enrlm_view/errors

  # enrollments table
  enrlm_table_create_log:
    type: File
    outputSource: create_enrlm_table/create_log
  enrlm_table_index_log:
    type: File
    outputSource: create_enrlm_table/index_log
  enrlm_table_vacuum_log:
    type: File
    outputSource: create_enrlm_table/vacuum_log
  enrlm_table_create_err:
    type: File
    outputSource: create_enrlm_table/create_err
  enrlm_table_index_err:
    type: File
    outputSource: create_enrlm_table/index_err
  enrlm_table_vacuum_err:
    type: File
    outputSource: create_enrlm_table/vacuum_err