load_raw_medicare.cwl

#!/usr/bin/env cwl-runner
### Loader for raw CMS Medicare data
#  Copyright (c) 2022. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

cwlVersion: v1.2
class: CommandLineTool
baseCommand: [python, -m, cms.tools.mcr_fts2db]
requirements:
  InlineJavascriptRequirement: {}

doc: |
  This tool loads CMS Medicare data from *.dat files accompanied by FTS
  files, describing their metadata

inputs:
  database:
    type: File
    doc: Path to database connection file, usually database.ini
    inputBinding:
      prefix: --db
  connection_name:
    type: string
    doc: The name of the section in the database.ini file
    inputBinding:
      prefix: --connection
  input:
    type: Directory
    inputBinding:
      prefix: --data
    doc: |
      A path to directory, containing unpacked CMS
      files. The tool will recursively look for data files
      according to provided pattern
  threads:
    type: int
    default: 4
    doc: number of threads, concurrently writing into the database
    inputBinding:
      prefix: --threads
  page_size:
    type: int
    default: 1000
    doc: explicit page size for the database
    inputBinding:
      prefix: --page
  log_frequency:
    type: long
    default: 100000
    doc: informational logging occurs every specified number of records
    inputBinding:
      prefix: --log
  limit:
    type: long?
    doc: |
      if specified, the process will stop after ingesting
      the specified number of records
    inputBinding:
      prefix: --limit
  depends_on:
    type: File?
    doc: a special field used to enforce dependencies and execution order

arguments:
  - valueFrom: "--reset"
  - valueFrom: "--incremental"
  - valueFrom: "cms.yaml"
    prefix: --registry



outputs:
  log:
    type: File
    outputBinding:
      glob: "*.log"
  registry:
    type: File
    outputBinding:
      glob: "cms.yaml"
  err:
    type: stderr

stderr: "load_medicare_data.err"