#!/usr/bin/env cwl-runner
### Test harness for pm25_yearly_download.cwl
cwlVersion: v1.2
class: Workflow
requirements:
InlineJavascriptRequirement: {}
MultipleInputFeatureRequirement: {}
ScatterFeatureRequirement: {}
StepInputExpressionRequirement: {}
SubworkflowFeatureRequirement: {}
# All inputs of original pipeline, remove what is not needed
inputs:
component:
default:
- BC
- NH4
- NIT
- OM
- SO4
- SOIL
- SS
doc: "Optional components provided as percentages in a separate set \nof netCDF\
\ files\n"
type: string[]
connection_name:
doc: 'The name of the section in the database.ini file or a literal
`None` to skip over database ingestion step
'
type: string
database:
default:
class: File
path: database.ini
doc: "Path to database connection file, usually database.ini. \nThis argument\
\ is ignored if `connection_name` == `None`\n"
type: File
downloads:
doc: "Local or AWS bucket folder containing netCDF grid files, downloaded \nand\
\ unpacked from Washington University in St. Louis (WUSTL) Box\nsite. Annual\
\ and monthly data repositories are described in\n[WUSTL Atmospheric Composition\
\ Analysis Group](https://sites.wustl.edu/acag/datasets/surface-pm2-5/).\n\n\
The annual data for PM2.5 is also available in \na Harvard URC AWS Bucket: `s3://nsaph-public/data/exposures/wustl/`\n"
type: Directory
geography:
doc: 'Type of geography: zip codes or counties
Supported values: "zip", "zcta" or "county"
'
type: string
proxy:
default: ''
doc: HTTP/HTTPS Proxy if required
type: string?
ram:
default: 2GB
doc: Runtime memory, available to the process
type: string
shape_file_collection:
default: tiger
doc: "[Collection of shapefiles](https://www2.census.gov/geo/tiger), \neither\
\ GENZ or TIGER\n"
type: string
strategy:
default: auto
doc: 'Rasterization strategy, see
[documentation](https://nsaph-data-platform.github.io/nsaph-platform-docs/common/gridmet/doc/strategy.html)
for the list of supported values and explanations
'
type: string
table:
default: pm25_aggregated
doc: The name of the table to store teh aggregated data in
type: string
test_script:
doc: File containing SQL test script
type: File
variable:
default: PM25
doc: 'The main variable that is being aggregated over shapes. We have tested
the pipeline for PM25
'
type: string
years:
default:
- 2000
- 2001
- 2002
- 2003
- 2004
- 2005
- 2006
- 2007
- 2008
- 2009
- 2010
- 2011
- 2012
- 2013
- 2014
- 2015
- 2016
- 2017
type: int[]
steps:
execute:
run: pm25_yearly_download.cwl
in:
proxy: proxy
downloads: downloads
geography: geography
years: years
variable: variable
component: component
strategy: strategy
ram: ram
shape_file_collection: shape_file_collection
database: database
connection_name: connection_name
table: table
out:
- aggregate_data
- data_dictionary
- consolidated_data
- shapes
- aggregate_log
- aggregate_err
- ingest_log
- index_log
- vacuum_log
- ingest_err
- index_err
- vacuum_err
verify:
run: run_test.cwl
in:
database: database
connection_name: connection_name
script: test_script
depends_on: execute/vacuum_err
out:
- log
- errors
outputs:
## Generated by nsaph/util/cwl_collect_outputs.py from pm25_yearly_download.cwl:
execute_aggregate_data:
type: File[]
outputSource: execute/aggregate_data
execute_data_dictionary:
type: File
outputSource: execute/data_dictionary
execute_consolidated_data:
type: File[]
outputSource: execute/consolidated_data
execute_shapes:
type: {'type': 'array', 'items': {'type': 'array', 'items': ['File']}}
outputSource: execute/shapes
execute_aggregate_log:
type: {'type': 'array', 'items': 'Any'}
outputSource: execute/aggregate_log
execute_aggregate_err:
type: File[]
outputSource: execute/aggregate_err
execute_ingest_log:
type: File
outputSource: execute/ingest_log
execute_index_log:
type: File
outputSource: execute/index_log
execute_vacuum_log:
type: File
outputSource: execute/vacuum_log
execute_ingest_err:
type: File
outputSource: execute/ingest_err
execute_index_err:
type: File
outputSource: execute/index_err
execute_vacuum_err:
type: File
outputSource: execute/vacuum_err
## Generated by nsaph/util/cwl_collect_outputs.py from run_test.cwl:
verify_log:
type: File
outputSource: verify/log
verify_errors:
type: File
outputSource: verify/errors