ingest.cwl

  1#!/usr/bin/env cwl-runner
  2### Universal uploader of the tabular data to the database
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27
 28# Running in Docker container does not work on FASSE or Cannon, will have to find a workaround
 29#hints:
 30#  DockerRequirement:
 31#    dockerPull: forome/dorieh
 32
 33
 34doc: |
 35  This tool ingests tabular data, usually in CSV format into the database
 36
 37
 38inputs:
 39  registry:
 40    type: File
 41    inputBinding:
 42      prefix: --registry
 43    doc: |
 44      A path to the data model file
 45  table:
 46    type: string
 47    doc: the name of the table to be created
 48    inputBinding:
 49      prefix: --table
 50  database:
 51    type: File
 52    doc: Path to database connection file, usually database.ini
 53    inputBinding:
 54      prefix: --db
 55  connection_name:
 56    type: string
 57    doc: The name of the section in the database.ini file
 58    inputBinding:
 59      prefix: --connection
 60  domain:
 61    type: string
 62    inputBinding:
 63      prefix: --domain
 64  input:
 65    type:
 66      - File
 67      - File[]
 68    inputBinding:
 69      prefix: --data
 70    doc: |
 71      A path the downloaded data files
 72  pattern:
 73    type: string
 74    default: "*.csv*"
 75    inputBinding:
 76      prefix: --pattern
 77  threads:
 78    type: int
 79    default: 4
 80    doc: number of threads, concurrently writing into the database
 81  page_size:
 82    type: int
 83    default: 1000
 84    doc: explicit page size for the database
 85  log_frequency:
 86    type: long
 87    default: 100000
 88    doc: informational logging occurs every specified number of records
 89  limit:
 90    type: long?
 91    doc: |
 92      if specified, the process will stop after ingesting
 93      the specified number of records
 94  depends_on:
 95    type: Any?
 96    doc: a special field used to enforce dependencies and execution order
 97
 98arguments:
 99    - valueFrom: "--reset"
100
101outputs:
102  log:
103    type: File?
104    outputBinding:
105      glob: "*.log"
106  errors:
107    type: stderr
108
109stderr:  $("ingest-" + inputs.table + ".err")