1#!/usr/bin/env cwl-runner
2### Aggregates data in NetCDF file over given geographies
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.rasters.file_processors.wustl_file_processor]
25
26requirements:
27 InlineJavascriptRequirement: {}
28 ResourceRequirement:
29 coresMin: 2
30 ramMin: 8192
31 outdirMin: 5120
32 # coresMax: 6
33
34
35doc: |
36 This tool aggregates data in NetCDF or GeoTiff file over provided shapes
37 (zip codes or counties). It produces mean values over shape.
38
39 The tool expects one value for every variable in every grid node.
40 The variables are expected to be formatted as they are for
41 [Atmospheric Composition Analysis Group of Washington University](https://sites.wustl.edu/acag/datasets/surface-pm2-5/)
42
43 See also [aggregate_daily.cwl](aggregate_daily) - a tool, that expects
44 multiple (daily) values at every grid node.
45
46inputs:
47 strategy:
48 type: string
49 default: downscale
50 inputBinding:
51 prefix: --strategy
52 doc: "Rasterization strategy"
53 ram:
54 type: string
55 default: 2GB
56 doc: Runtime memory, available to the process
57 inputBinding:
58 prefix: --ram
59 shapes_dir:
60 type: Directory?
61 inputBinding:
62 prefix: --shapes_dir
63 band:
64 type: string[]
65 inputBinding:
66 prefix: --var
67 geography:
68 type: string
69 doc: |
70 Type of geography: zip codes or counties
71 inputBinding:
72 prefix: --geography
73 netcdf_data:
74 type: File
75 doc: "Path to downloaded file"
76 inputBinding:
77 prefix: --raw_downloads
78 shape_files:
79 type: File[]?
80 doc: "Paths to shape files"
81 inputBinding:
82 prefix: --shape_files
83 output_type:
84 type: string[]
85 doc: What to output as the result of executing the tool
86 default:
87 - aggregation
88 - data_dictionary
89 inputBinding:
90 prefix: --output
91 table:
92 type: string?
93 doc: |
94 Optional name ot the table where the aggregated data will be
95 eventually stored
96 inputBinding:
97 prefix: --table
98
99arguments:
100 - valueFrom: "."
101 prefix: --destination
102
103outputs:
104 log:
105 type: File?
106 outputBinding:
107 glob: "*.log"
108 csv_data:
109 type: File
110 outputBinding:
111 glob:
112 - "*.csv*"
113 - "**/*.csv*"
114 doc: |
115 The output CSV file, containing mean values of the given
116 variable over given geographies. Each line
117 contains date, geo id (zip or county FIPS) and value
118 data_dictionary:
119 type: File?
120 outputBinding:
121 glob:
122 - "*.yaml"
123 - "**/*.yaml"
124 doc: |
125 Data dictionary for teh aggregated data
126
127 errors:
128 type: stderr
129
130stderr: $("aggr-" + inputs.netcdf_data.nameroot + ".err")