load_raw_medicaid.cwl

  1#!/usr/bin/env cwl-runner
  2### Patient Summary Loader
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27
 28doc: |
 29  This tool loads patient summary data into a database.
 30  It should be run after the data is inspected and
 31  data model is created from FTS files
 32
 33
 34inputs:
 35  registry:
 36    type: File?
 37    inputBinding:
 38      prefix: --registry
 39    doc: |
 40      A path to the data model file
 41  domain:
 42    type: string
 43    doc: the name of the domain
 44    inputBinding:
 45      prefix: --domain
 46  table:
 47    type: string
 48    doc: the name of the table being populated
 49    inputBinding:
 50      prefix: --table
 51  database:
 52    type: File
 53    doc: Path to database connection file, usually database.ini
 54    inputBinding:
 55      prefix: --db
 56  connection_name:
 57    type: string
 58    doc: The name of the section in the database.ini file
 59    inputBinding:
 60      prefix: --connection
 61  incremental:
 62    type: boolean
 63    inputBinding:
 64      prefix: --incremental
 65    doc: |
 66      if defined, then the data ingestion is incremental.
 67      Transactions are committed after every file is processed
 68      and files that have already been processed are skipped
 69  input:
 70    type: Directory
 71    inputBinding:
 72      prefix: --data
 73    doc: |
 74      A path to directory, containing unpacked CMS
 75      files. The tool will recursively look for data files
 76      according to provided pattern
 77  pattern:
 78    type: string
 79    inputBinding:
 80      prefix: --pattern
 81  threads:
 82    type: int
 83    default: 4
 84    doc: number of threads, concurrently writing into the database
 85    inputBinding:
 86      prefix: --threads
 87  page_size:
 88    type: int
 89    default: 1000
 90    doc: explicit page size for the database
 91    inputBinding:
 92      prefix: --page
 93  log_frequency:
 94    type: long
 95    default: 100000
 96    doc: informational logging occurs every specified number of records
 97    inputBinding:
 98      prefix: --log
 99  limit:
100    type: long?
101    doc: |
102      if specified, the process will stop after ingesting
103      the specified number of records
104    inputBinding:
105      prefix: --limit
106  depends_on:
107    type: File?
108    doc: a special field used to enforce dependencies and execution order
109
110
111
112outputs:
113  log:
114    type: File
115    outputBinding:
116      glob: "*.log"
117  errors:
118    type: stderr
119
120stderr:  $("load-" + inputs.table + ".err")