Source code for nsaph.dbt.gen_dbt_cwl

"""
Command line utility to generate a test carcass for a
CWL pipeline.

Usage:

    python -m nsaph.dbt.gen_dbt_cwl <path/to/cwl/workflow> [<path/to/generated/test>]

If the second argument is omitted, the test is generated in the
same directory as original workflow and given a name `test_<workflow_name>`

Test takes the same parameters as the original workflow plus a list
of files containing test cases generated by `nsaph.dbt.create_test.py`
tool.

"""
#  Copyright (c) 2023. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import os
import sys
from typing import Dict

import yaml

from nsaph.util.cwl_collect_outputs import collect
from nsaph.util.cwl_generator import CWLGenerator


[docs]class CWLTestGenerator(CWLGenerator):
    def __init__(self, path_to_pipeline: str, path_to_test: str):
        super().__init__(path_to_test)
        assert path_to_pipeline.endswith(".cwl")
        assert path_to_test.endswith(".cwl")
        self.path_to_pipeline = path_to_pipeline
        with open(path_to_pipeline) as f:
            self.cwl: Dict = yaml.safe_load(f)
        self.test = path_to_test
        self.basename = os.path.basename(path_to_pipeline)
        self.path_to_runner = os.path.join(
            os.path.dirname(self.path_to_pipeline),
            "run_test.cwl"
        )
        self.last_execute_output = None
        return

[docs]    def generate(self):
        self.write_header(
            requirements=self.cwl.get("requirements"),
            comment=f"Test harness for {self.basename}"
        )
        self.write_test_inputs()
        self.write_steps()
        self.write_outputs()

[docs]    def write_test_inputs(self):
        with open(self.test, "at") as test:
            print("# All inputs of original pipeline, remove what is not needed"
                  , file=test)
            inputs = self.cwl.get("inputs").copy()
            inputs["test_script"] = {
                "type": "File",
                "doc": "File containing SQL test script"
            }
            self.write_inputs(inputs)
        return

[docs]    def write_steps(self):
        with open(self.test, "at") as test:
            print("steps:", file=test)
            self.write_execute_step(test)
            self.write_verify_step(test)
        return

[docs]    def write_execute_step(self, test):
        print("  execute:", file=test)
        print(f"    run: {self.basename}", file=test)
        self.write_in(test)
        self.write_out(test)
        print(file=test)
        return

[docs]    def write_in(self, test):
        inputs = self.cwl.get("inputs")
        print("    in:", file=test)
        if inputs:
            for par in inputs:
                print(f"      {par}: {par}", file=test)
        return

[docs]    def write_out(self, test):
        collect("execute", self.path_to_pipeline, output=test, what="step")
        self.last_execute_output = list(self.cwl.get("outputs").keys())[-1]
        return

[docs]    def write_verify_step(self, test):
        print("  verify:", file=test)
        print("    run: run_test.cwl", file=test)
        print("    in:", file=test)
        inputs = self.cwl.get("inputs")
        for par in ["database", "connection_name"]:
            assert par in inputs
            print(f"      {par}: {par}", file=test)
        print(f"      script: test_script", file=test)
        print(f"      depends_on: execute/{self.last_execute_output}", file=test)
        collect("verify", self.path_to_runner, output=test, what="step")
        print(file=test)
        return

[docs]    def write_outputs(self):
        with open(self.test, "at") as test:
            print("outputs:", file=test)
            collect("execute", self.path_to_pipeline, output=test,
                    what="pipeline")
            collect("verify", self.path_to_runner, output=test,
                    what="pipeline")
        return


if __name__ == '__main__':
    arg1 = sys.argv[1]
    if len(sys.argv) > 2:
        arg2 = sys.argv[2]
    else:
        dirname, basename = os.path.split(arg1)
        arg2 = os.path.join(dirname, "test_" + basename)
    generator = CWLTestGenerator(arg1, arg2)
    generator.generate()