1#!/usr/bin/env cwl-runner
2### Universal uploader of the tabular data to the database
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
25requirements:
26 InlineJavascriptRequirement: {}
27
28# Running in Docker container does not work on FASSE or Cannon, will have to find a workaround
29#hints:
30# DockerRequirement:
31# dockerPull: forome/dorieh
32
33
34doc: |
35 This tool ingests tabular data, usually in CSV format into the database
36
37
38inputs:
39 registry:
40 type: File
41 inputBinding:
42 prefix: --registry
43 doc: |
44 A path to the data model file
45 table:
46 type: string
47 doc: the name of the table to be created
48 inputBinding:
49 prefix: --table
50 database:
51 type: File
52 doc: Path to database connection file, usually database.ini
53 inputBinding:
54 prefix: --db
55 connection_name:
56 type: string
57 doc: The name of the section in the database.ini file
58 inputBinding:
59 prefix: --connection
60 domain:
61 type: string
62 inputBinding:
63 prefix: --domain
64 input:
65 type:
66 - File
67 - File[]
68 inputBinding:
69 prefix: --data
70 doc: |
71 A path the downloaded data files
72 pattern:
73 type: string
74 default: "*.csv*"
75 inputBinding:
76 prefix: --pattern
77 threads:
78 type: int
79 default: 4
80 doc: number of threads, concurrently writing into the database
81 page_size:
82 type: int
83 default: 1000
84 doc: explicit page size for the database
85 log_frequency:
86 type: long
87 default: 100000
88 doc: informational logging occurs every specified number of records
89 limit:
90 type: long?
91 doc: |
92 if specified, the process will stop after ingesting
93 the specified number of records
94 depends_on:
95 type: Any?
96 doc: a special field used to enforce dependencies and execution order
97
98arguments:
99 - valueFrom: "--reset"
100
101outputs:
102 log:
103 type: File?
104 outputBinding:
105 glob: "*.log"
106 errors:
107 type: stderr
108
109stderr: $("ingest-" + inputs.table + ".err")