load_raw_medicare.cwl

  1#!/usr/bin/env cwl-runner
  2### Loader for raw CMS Medicare data
  3#  Copyright (c) 2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.cms.tools.mcr_fts2db]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27
 28doc: |
 29  This tool loads CMS Medicare data from *.dat files accompanied by FTS
 30  files, describing their metadata
 31
 32inputs:
 33  database:
 34    type: File
 35    doc: Path to database connection file, usually database.ini
 36    inputBinding:
 37      prefix: --db
 38  connection_name:
 39    type: string
 40    doc: The name of the section in the database.ini file
 41    inputBinding:
 42      prefix: --connection
 43  input:
 44    type: Directory
 45    inputBinding:
 46      prefix: --data
 47    doc: |
 48      A path to directory, containing unpacked CMS
 49      files. The tool will recursively look for data files
 50      according to provided pattern
 51  threads:
 52    type: int
 53    default: 4
 54    doc: number of threads, concurrently writing into the database
 55    inputBinding:
 56      prefix: --threads
 57  page_size:
 58    type: int
 59    default: 1000
 60    doc: explicit page size for the database
 61    inputBinding:
 62      prefix: --page
 63  log_frequency:
 64    type: long
 65    default: 100000
 66    doc: informational logging occurs every specified number of records
 67    inputBinding:
 68      prefix: --log
 69  limit:
 70    type: long?
 71    doc: |
 72      if specified, the process will stop after ingesting
 73      the specified number of records
 74    inputBinding:
 75      prefix: --limit
 76  depends_on:
 77    type: File?
 78    doc: a special field used to enforce dependencies and execution order
 79
 80arguments:
 81  - valueFrom: "--reset"
 82  - valueFrom: "--incremental"
 83  - valueFrom: "cms.yaml"
 84    prefix: --registry
 85
 86
 87
 88outputs:
 89  log:
 90    type: File
 91    outputBinding:
 92      glob: "*.log"
 93  registry:
 94    type: File
 95    outputBinding:
 96      glob: "cms.yaml"
 97  err:
 98    type: stderr
 99
100stderr: "load_medicare_data.err"