1#!/usr/bin/env cwl-runner
2### Patient Summary Loader
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
25requirements:
26 InlineJavascriptRequirement: {}
27
28doc: |
29 This tool loads patient summary data into a database.
30 It should be run after the data is inspected and
31 data model is created from FTS files
32
33
34inputs:
35 registry:
36 type: File?
37 inputBinding:
38 prefix: --registry
39 doc: |
40 A path to the data model file
41 domain:
42 type: string
43 doc: the name of the domain
44 inputBinding:
45 prefix: --domain
46 table:
47 type: string
48 doc: the name of the table being populated
49 inputBinding:
50 prefix: --table
51 database:
52 type: File
53 doc: Path to database connection file, usually database.ini
54 inputBinding:
55 prefix: --db
56 connection_name:
57 type: string
58 doc: The name of the section in the database.ini file
59 inputBinding:
60 prefix: --connection
61 incremental:
62 type: boolean
63 inputBinding:
64 prefix: --incremental
65 doc: |
66 if defined, then the data ingestion is incremental.
67 Transactions are committed after every file is processed
68 and files that have already been processed are skipped
69 input:
70 type: Directory
71 inputBinding:
72 prefix: --data
73 doc: |
74 A path to directory, containing unpacked CMS
75 files. The tool will recursively look for data files
76 according to provided pattern
77 pattern:
78 type: string
79 inputBinding:
80 prefix: --pattern
81 threads:
82 type: int
83 default: 4
84 doc: number of threads, concurrently writing into the database
85 inputBinding:
86 prefix: --threads
87 page_size:
88 type: int
89 default: 1000
90 doc: explicit page size for the database
91 inputBinding:
92 prefix: --page
93 log_frequency:
94 type: long
95 default: 100000
96 doc: informational logging occurs every specified number of records
97 inputBinding:
98 prefix: --log
99 limit:
100 type: long?
101 doc: |
102 if specified, the process will stop after ingesting
103 the specified number of records
104 inputBinding:
105 prefix: --limit
106 depends_on:
107 type: File?
108 doc: a special field used to enforce dependencies and execution order
109
110
111
112outputs:
113 log:
114 type: File
115 outputBinding:
116 glob: "*.log"
117 errors:
118 type: stderr
119
120stderr: $("load-" + inputs.table + ".err")