add_data.cwl

  1#!/usr/bin/env cwl-runner
  2### Uploader of the gridMET Data to the database
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25# baseCommand: echo
 26requirements:
 27  InlineJavascriptRequirement: {}
 28
 29doc: |
 30  This tool uploads the data to the database
 31
 32
 33inputs:
 34  registry:
 35    type: File?
 36    inputBinding:
 37      prefix: --registry
 38    doc: |
 39      A path to the data model file
 40  table:
 41    type: string
 42    doc: the name of the table to be created
 43    inputBinding:
 44      prefix: --table
 45  database:
 46    type: File
 47    doc: Path to database connection file, usually database.ini
 48    inputBinding:
 49      prefix: --db
 50  connection_name:
 51    type: string
 52    doc: The name of the section in the database.ini file
 53    inputBinding:
 54      prefix: --connection
 55  input:
 56    type: File?
 57    inputBinding:
 58      prefix: --data
 59    doc: |
 60      A path the downloaded data files
 61  pattern:
 62    type: string
 63    default: "*.csv*"
 64    inputBinding:
 65      prefix: --pattern
 66  threads:
 67    type: int
 68    default: 4
 69    doc: number of threads, concurrently writing into the database
 70  page_size:
 71    type: int
 72    default: 1000
 73    doc: explicit page size for the database
 74  log_frequency:
 75    type: long
 76    default: 100000
 77    doc: informational logging occurs every specified number of records
 78  limit:
 79    type: long?
 80    doc: |
 81      if specified, the process will stop after ingesting
 82      the specified number of records
 83  depends_on:
 84    type: Any?
 85    doc: a special field used to enforce dependencies and execution order
 86  domain:
 87    type: string
 88    inputBinding:
 89      prefix: --domain
 90
 91
 92outputs:
 93  log:
 94    type: File?
 95    outputBinding:
 96      glob: "*.log"
 97  errors:
 98    type: stderr
 99
100stderr:  $("ingest-" + inputs.table + ".err")