1#!/usr/bin/env cwl-runner
2### Workflow to aggregate and ingest one file in NetCDF format
3# Copyright (c) 2021-2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30 MultipleInputFeatureRequirement: {}
31
32
33doc: |
34 Sub-workflow that aggregates a single NetCDF file over a given
35 geography (zip codes or counties) and ingest the
36 aggregated data into the database
37
38inputs:
39 depends_on:
40 type: Any?
41 downloads:
42 type: Directory
43 geography:
44 type: string
45 year:
46 type: int
47 month:
48 type: int
49 band:
50 type: string
51 default: pm25
52 table:
53 type: string
54 shape_files:
55 type: File[]
56 doc: "Paths to shape files"
57 strategy:
58 type: string
59 doc: "Rasterization strategy"
60 ram:
61 type: string
62 default: 2GB
63 doc: Runtime memory, available to the process
64 database:
65 type: File
66 connection_name:
67 type: string
68
69steps:
70 findfile:
71 doc: |
72 Given input directory, variable (band), year and month,
73 evaluates the exepected file name for the input data
74 run:
75 class: ExpressionTool
76 inputs:
77 downloads:
78 type: Directory
79 year:
80 type: int
81 month:
82 type: int
83 band:
84 type: string
85 expression: |
86 ${
87 var v = inputs.band.toUpperCase();
88 var y = String(inputs.year);
89 var m;
90 if (inputs.month < 10) {
91 m = '0' + String(inputs.month);
92 } else {
93 m = String(inputs.month);
94 }
95 var ym = y + m;
96 var f = "V4NA03_" + v + "_NA_" + ym + "_" + ym + "-RH35.nc";
97 f = inputs.downloads.location + '/' + f;
98 return {
99 netcdf_file: {
100 "class": "File",
101 "location": f
102 }
103 };
104 }
105 outputs:
106 netcdf_file:
107 type: File
108 in:
109 year: year
110 month: month
111 band: band
112 downloads: downloads
113 out: [netcdf_file]
114
115 aggregate:
116 doc: Aggregate data over geographies
117 run: aggregate_wustl.cwl
118 in:
119 strategy: strategy
120 ram: ram
121 band:
122 valueFrom: $([inputs.sband])
123 sband: band
124 geography: geography
125 netcdf_data: findfile/netcdf_file
126 shape_files: shape_files
127 out:
128 - log
129 - errors
130 - csv_data
131
132 ingest:
133 doc: Ingests the aggregated data into the database
134 run: add_data.cwl
135 in:
136 table: table
137 input: aggregate/csv_data
138 database: database
139 connection_name: connection_name
140 domain:
141 valueFrom: "exposures"
142 out: [log, errors]
143
144outputs:
145 aggregate_data:
146 type: File?
147 outputSource: aggregate/csv_data
148 aggregate_log:
149 type: File?
150 outputSource: aggregate/log
151 aggregate_err:
152 type: File
153 outputSource: aggregate/errors
154
155 ingest_log:
156 type: File?
157 outputSource: ingest/log
158 ingest_err:
159 type: File
160 outputSource: ingest/errors