1#!/usr/bin/env cwl-runner
2### Loader for raw CMS Medicare data
3# Copyright (c) 2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.cms.tools.mcr_fts2db]
25requirements:
26 InlineJavascriptRequirement: {}
27
28doc: |
29 This tool loads CMS Medicare data from *.dat files accompanied by FTS
30 files, describing their metadata
31
32inputs:
33 database:
34 type: File
35 doc: Path to database connection file, usually database.ini
36 inputBinding:
37 prefix: --db
38 connection_name:
39 type: string
40 doc: The name of the section in the database.ini file
41 inputBinding:
42 prefix: --connection
43 input:
44 type: Directory
45 inputBinding:
46 prefix: --data
47 doc: |
48 A path to directory, containing unpacked CMS
49 files. The tool will recursively look for data files
50 according to provided pattern
51 threads:
52 type: int
53 default: 4
54 doc: number of threads, concurrently writing into the database
55 inputBinding:
56 prefix: --threads
57 page_size:
58 type: int
59 default: 1000
60 doc: explicit page size for the database
61 inputBinding:
62 prefix: --page
63 log_frequency:
64 type: long
65 default: 100000
66 doc: informational logging occurs every specified number of records
67 inputBinding:
68 prefix: --log
69 limit:
70 type: long?
71 doc: |
72 if specified, the process will stop after ingesting
73 the specified number of records
74 inputBinding:
75 prefix: --limit
76 depends_on:
77 type: File?
78 doc: a special field used to enforce dependencies and execution order
79
80arguments:
81 - valueFrom: "--reset"
82 - valueFrom: "--incremental"
83 - valueFrom: "cms.yaml"
84 prefix: --registry
85
86
87
88outputs:
89 log:
90 type: File
91 outputBinding:
92 glob: "*.log"
93 registry:
94 type: File
95 outputBinding:
96 glob: "cms.yaml"
97 err:
98 type: stderr
99
100stderr: "load_medicare_data.err"