1#!/usr/bin/env cwl-runner
2### Workflow to aggregate and ingest NetCDF files for one year
3# Copyright (c) 2021-2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30 MultipleInputFeatureRequirement: {}
31
32
33doc: |
34 Sub-workflow to aggregate a NetCDF file for one year over a given
35 geography (zip codes or counties). Before aggregation, downloads
36 shape files fo this year from US Census website
37
38inputs:
39 depends_on:
40 type: Any?
41 proxy:
42 type: string?
43 default: ""
44 doc: HTTP/HTTPS Proxy if required
45 downloads:
46 type: Directory
47 geography:
48 type: string
49 variable:
50 type: string
51 component:
52 type: string[]
53 year:
54 type: int
55 strategy:
56 type: string
57 doc: "Rasterization strategy"
58 ram:
59 type: string
60 default: 2GB
61 doc: Runtime memory, available to the process
62 shape_file_collection:
63 type: string
64 default: tiger
65 doc: |
66 [Collection of shapefiles](https://www2.census.gov/geo/tiger),
67 either GENZ or TIGER
68 table:
69 type: string?
70 doc: |
71 Optional name ot the table where the aggregated data will be
72 eventually stored
73
74steps:
75 get_shapes:
76 run: get_shapes.cwl
77 doc: |
78 This step downloads Shape files from a given collection (TIGER/Line or GENZ)
79 and a geography (ZCTA or Counties) from the US Census website,
80 for a given year or for the closest one.
81
82 in:
83 year:
84 valueFrom: $(String(inputs.yy))
85 yy: year
86 geo: geography
87 collection: shape_file_collection
88 proxy: proxy
89 out:
90 - shape_files
91
92 find_pm25_file:
93 doc: |
94 Given input directory, variable (band), year and month,
95 evaluates the expected file name for the main variable input data
96 run: wustl_file_pattern.cwl
97 in:
98 year: year
99 variables:
100 valueFrom: $([inputs.variable])
101 variable: variable
102 downloads: downloads
103 out: [netcdf_files]
104
105 find_components_files:
106 doc: |
107 Given input directory, variable (band), year and month,
108 evaluates the expected file name for the main variable input data
109 run: wustl_file_pattern.cwl
110 in:
111 year: year
112 variables: component
113 downloads: downloads
114 out: [netcdf_files]
115
116 consolidate:
117 doc: consolidate components into one file
118 run: wustl_consolidate_components.cwl
119 in:
120 abs_values: find_pm25_file/netcdf_files
121 components: find_components_files/netcdf_files
122 out:
123 - consolidated_data
124
125 aggregate:
126 doc: Aggregate data over geographies
127 run: aggregate_wustl.cwl
128 in:
129 strategy: strategy
130 ram: ram
131 geography: geography
132 netcdf_data: consolidate/consolidated_data
133 shape_files: get_shapes/shape_files
134 variable: variable
135 components: component
136 table: table
137 band:
138 valueFrom: $([inputs.variable].concat(inputs.components))
139 out:
140 - log
141 - errors
142 - csv_data
143 - data_dictionary
144
145outputs:
146 shapes:
147 type: File[]
148 outputSource: get_shapes/shape_files
149
150 consolidated_data:
151 type: File
152 outputSource: consolidate/consolidated_data
153 aggregate_data:
154 type: File
155 outputSource: aggregate/csv_data
156 data_dictionary:
157 type: File?
158 outputSource: aggregate/data_dictionary
159 aggregate_log:
160 type: File?
161 outputSource: aggregate/log
162 aggregate_err:
163 type: File
164 outputSource: aggregate/errors