From 7c355f782e6c4eb02cefaaca2a34a4ebc17fb892 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 10:26:13 -0400 Subject: [PATCH 001/476] First Cleanup --- .gitignore | 20 +---- Workflows/bwa_sort_merge.cwl | 86 --------------------- Workflows/make_bam.cwl | 107 --------------------------- Workflows/msisensor-run-both.cwl | 59 --------------- Workflows/mutect_wf.cwl | 65 ---------------- Workflows/snp-pileup-to-facets.cwl | 80 -------------------- test-yamls/msi-test-one.yaml | 4 - test-yamls/msi-test.yaml | 4 - test-yamls/snp-pileup-to-facets.yaml | 4 - 9 files changed, 4 insertions(+), 425 deletions(-) delete mode 100644 Workflows/bwa_sort_merge.cwl delete mode 100644 Workflows/make_bam.cwl delete mode 100644 Workflows/msisensor-run-both.cwl delete mode 100644 Workflows/mutect_wf.cwl delete mode 100644 Workflows/snp-pileup-to-facets.cwl delete mode 100644 test-yamls/msi-test-one.yaml delete mode 100644 test-yamls/msi-test.yaml delete mode 100644 test-yamls/snp-pileup-to-facets.yaml diff --git a/.gitignore b/.gitignore index 76d33366..84229f45 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python +env/ build/ develop-eggs/ dist/ @@ -23,7 +24,6 @@ wheels/ *.egg-info/ .installed.cfg *.egg -MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -54,7 +54,6 @@ coverage.xml # Django stuff: *.log local_settings.py -db.sqlite3 # Flask stuff: instance/ @@ -81,14 +80,13 @@ celerybeat-schedule # SageMath parsed files *.sage.py -# Environments +# dotenv .env + +# virtualenv .venv -env/ venv/ ENV/ -env.bak/ -venv.bak/ # Spyder project settings .spyderproject @@ -102,13 +100,3 @@ venv.bak/ # mypy .mypy_cache/ - -# Mac -.DS_Store - -# vscode -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json \ No newline at end of file diff --git a/Workflows/bwa_sort_merge.cwl b/Workflows/bwa_sort_merge.cwl deleted file mode 100644 index 99b25be9..00000000 --- a/Workflows/bwa_sort_merge.cwl +++ /dev/null @@ -1,86 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: bwa_sort_merge -label: bwa_sort_merge - -inputs: - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: read_pair - type: - type: array - items: - items: File - type: array - - id: sample_id - type: string - - id: lane_id - type: 'string[]' - -outputs: - - id: sample_id_output - outputSource: - - bwa_sort/sample_id_output - type: - - string - - type: array - items: string - - id: output_md_metrics - outputSource: - - gatk_markduplicatesgatk/output_md_metrics - type: File - - id: output_md_bam - outputSource: - - gatk_markduplicatesgatk/output_md_bam - type: File - -steps: - - id: samtools_merge - in: - - id: input_bams - source: - - bwa_sort/output_file - out: - - id: output_file - run: ../CommandLineTools/samtools-merge_1.9/samtools-merge_1.9.cwl - - id: bwa_sort - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pair - - id: sample_id - source: sample_id - - id: lane_id - source: lane_id - out: - - id: output_file - - id: sample_id_output - - id: lane_id_output - run: ./bwa_sort.cwl - label: bwa_sort - scatter: - - read_pair - - lane_id - scatterMethod: dotproduct - - id: gatk_markduplicatesgatk - in: - - id: input_bam - source: samtools_merge/output_file - out: - - id: output_md_bam - - id: output_md_metrics - run: ../CommandLineTools/mark-duplicates_4.1.0.0/mark-duplicates_4.1.0.0.cwl - label: GATK MarkDuplicates - -requirements: - - class: SubworkflowFeatureRequirement - - class: ScatterFeatureRequirement diff --git a/Workflows/make_bam.cwl b/Workflows/make_bam.cwl deleted file mode 100644 index 5d1dfdef..00000000 --- a/Workflows/make_bam.cwl +++ /dev/null @@ -1,107 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: make_bam -label: make_bam - -inputs: - - id: read_pairs_normal - type: - type: array - items: - items: File - type: array - - id: lane_ids_normal - type: 'string[]' - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: sample_id_normal - type: string - - id: sample_id_tumor - type: string - - id: read_pairs_tumor - type: - type: array - items: - items: File - type: array - - id: lane_ids_tumor - type: 'string[]' - -outputs: - - id: sample_id_output_normal - outputSource: - - make_bam_Normal/sample_id_output - type: - - string - - type: array - items: string - - id: normal_bam - outputSource: - - make_bam_Normal/output_md_bam - type: File - - id: sample_id_output_tumor - outputSource: - - make_bam_Tumor/sample_id_output - type: - - string - - type: array - items: string - - id: tumor_bam - outputSource: - - make_bam_Tumor/output_md_bam - type: File - - id: tumor_metrics - outputSource: - - make_bam_Tumor/output_md_metrics - type: File - - id: normal_metrics - outputSource: - - make_bam_Normal/output_md_metrics - type: File - -steps: - - id: make_bam_Normal - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_normal - - id: sample_id - source: sample_id_normal - - id: lane_id - source: - - lane_ids_normal - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Normal - - id: make_bam_Tumor - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_tumor - - id: sample_id - source: sample_id_tumor - - id: lane_id - source: - - lane_ids_tumor - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Tumor -requirements: - - class: SubworkflowFeatureRequirement diff --git a/Workflows/msisensor-run-both.cwl b/Workflows/msisensor-run-both.cwl deleted file mode 100644 index a9d2f373..00000000 --- a/Workflows/msisensor-run-both.cwl +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env cwl-runner - -$namespaces: - dct: http://purl.org/dc/terms/ - foaf: http://xmlns.com/foaf/0.1/ - doap: http://usefulinc.com/ns/doap# - -cwlVersion: v1.0 - -class: Workflow -id: msisensor-run-both -requirements: - StepInputExpressionRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - SubworkflowFeatureRequirement: {} - InlineJavascriptRequirement: {} - -inputs: - normal_bam: - type: File - secondaryFiles: [ ".bai" ] - tumor_bam: - type: File - secondaryFiles: [ ".bai" ] - output_prefix: string - msi_file: File - -outputs: - msisensor_0.2_output: - type: File - outputSource: msisensor_0.2/output - - msisensor_0.6_output: - type: File - outputSource: msisensor_0.6/output - -steps: - msisensor_0.2: - run: ../CommandLineTools/msisensor_0.2/msisensor-0.2.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.2.txt"; } - out: [ output ] - - msisensor_0.6: - run: ../CommandLineTools/msisensor_0.6/msisensor-0.6.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.6.txt"; } - out: [ output ] diff --git a/Workflows/mutect_wf.cwl b/Workflows/mutect_wf.cwl deleted file mode 100644 index c046e18a..00000000 --- a/Workflows/mutect_wf.cwl +++ /dev/null @@ -1,65 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: mutect_wf -label: mutect_wf -inputs: - - id: scatter-count - type: int? - - id: output - type: string - - id: intervals - type: File? - - id: reference - type: File? - - id: tumor_sample - type: string? - - id: normal_sample - type: string? - - id: input_normal - type: File? - - id: input_tumor - type: File? - -outputs: - - id: output_1 - outputSource: - - mutect2/output - type: 'File[]?' - -steps: - - id: scatterintervals - in: - - id: reference - source: reference - - id: intervals - source: intervals - - id: scatter-count - source: scatter-count - - id: output - source: output - out: - - id: interval_files - run: ../CommandLineTools/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl - label: ScatterIntervals - - id: mutect2 - in: - - id: reference - source: reference - - id: intervals - source: scatterintervals/interval_files - - id: input - source: input_tumor - - id: tumor_sample - source: tumor_sample - - id: input_normal - source: input_normal - - id: normal_sample - source: normal_sample - out: - - id: output - run: ../CommandLineTools/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl - label: Mutect2 - scatter: - - intervals -requirements: - - class: ScatterFeatureRequirement diff --git a/Workflows/snp-pileup-to-facets.cwl b/Workflows/snp-pileup-to-facets.cwl deleted file mode 100644 index 619a8753..00000000 --- a/Workflows/snp-pileup-to-facets.cwl +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env cwl-runner - -class: Workflow -cwlVersion: v1.0 - -requirements: - InlineJavascriptRequirement: {} - StepInputExpressionRequirement: {} - SubworkflowFeatureRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - -inputs: - facets_vcf: - type: File - secondaryFiles: - - .gz - - bam_normal: - type: File - - bam_tumor: - type: File - - tumor_sample_name: - type: string - -outputs: - - snp_pileup_out: - type: File - outputSource: do_snp_pileup/output_file - - facets_png: - type: File[]? - outputSource: do_facets/png_files - - facets_txt_purity: - type: File? - outputSource: do_facets/txt_files_purity - - facets_txt_hisens: - type: File? - outputSource: do_facets/txt_files_hisens - - facets_out_files: - type: File[]? - outputSource: do_facets/out_files - - facets_rdata: - type: File[]? - outputSource: do_facets/rdata_files - - facets_seg: - type: File[]? - outputSource: do_facets/seg_files - -steps: - do_snp_pileup: - run: ../CommandLineTools/snp-pileup_0.1.1/htstools-0.1.1.cwl - in: - vcf_file: facets_vcf - bam_normal: bam_normal - bam_tumor: bam_tumor - output_file: - valueFrom: ${ return inputs.bam_normal.basename.replace(".bam", "") + "_" + inputs.bam_tumor.basename.replace(".bam", "") + ".dat.gz"; } - out: [ output_file ] - - do_facets: - run: ../CommandLineTools/facets_1.5.6/facets.doFacets-1.5.6.cwl - in: - genome: - valueFrom: ${ return "hg19"; } - counts_file: do_snp_pileup/output_file - TAG: - valueFrom: ${ return inputs.counts_file.basename.replace(".dat.gz", ""); } - tumor_id: tumor_sample_name - directory: - valueFrom: ${ return "."; } - out: [ png_files, txt_files_purity, txt_files_hisens, out_files, rdata_files, seg_files ] diff --git a/test-yamls/msi-test-one.yaml b/test-yamls/msi-test-one.yaml deleted file mode 100644 index 4115b769..00000000 --- a/test-yamls/msi-test-one.yaml +++ /dev/null @@ -1,4 +0,0 @@ -n: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -t: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -d: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -o: "my_output_prefix" diff --git a/test-yamls/msi-test.yaml b/test-yamls/msi-test.yaml deleted file mode 100644 index 5af76e7b..00000000 --- a/test-yamls/msi-test.yaml +++ /dev/null @@ -1,4 +0,0 @@ -normal_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -tumor_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -msi_file: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -output_prefix: "msi_run_prefix" diff --git a/test-yamls/snp-pileup-to-facets.yaml b/test-yamls/snp-pileup-to-facets.yaml deleted file mode 100644 index 6bdb254a..00000000 --- a/test-yamls/snp-pileup-to-facets.yaml +++ /dev/null @@ -1,4 +0,0 @@ -bam_normal: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -bam_tumor: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -tumor_sample_name: tumor_sample -facets_vcf: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/b37/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf } From 6faae61afdccdeb6ece29e8d3e84eca5ea48ce79 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 10:39:07 -0400 Subject: [PATCH 002/476] Adding cookiecutter :wrench: :book: --- .editorconfig | 21 ++++++ .github/ISSUE_TEMPLATE.md | 15 ++++ .travis.yml | 29 ++++++++ LICENSE | 16 ++++ MANIFEST.in | 8 ++ Makefile | 77 ++++++++++++++++++++ README.md | 32 ++++++++ __init__.py | 7 ++ cwl_commandlinetools.py | 3 + cwl_commandlinetools/__init__.py | 7 ++ cwl_commandlinetools/cwl_commandlinetools.py | 3 + requirements_dev.txt | 11 +++ setup.cfg | 26 +++++++ setup.py | 46 ++++++++++++ tests/__init__.py | 3 + tests/test_cwl_commandlinetools.py | 25 +++++++ tox.ini | 25 +++++++ 17 files changed, 354 insertions(+) create mode 100644 .editorconfig create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 __init__.py create mode 100644 cwl_commandlinetools.py create mode 100644 cwl_commandlinetools/__init__.py create mode 100644 cwl_commandlinetools/cwl_commandlinetools.py create mode 100644 requirements_dev.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/test_cwl_commandlinetools.py create mode 100644 tox.ini diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..d4a2c440 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..14740fbe --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* cwl-commandlinetools version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..df237ae4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,29 @@ +# Config file for automatic testing at travis-ci.org + +language: python +python: + - 3.7 + - 3.6 + - 3.5 + - 2.7 + +# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: pip install -U tox-travis + +# Command to run tests, e.g. python setup.py test +script: tox + +# Assuming you have installed the travis-ci CLI tool, after you +# create the Github repo and add it to Travis, run the +# following command to finish PyPI deployment setup: +# $ travis encrypt --add deploy.password +deploy: + provider: pypi + distributions: sdist bdist_wheel + user: msk-access + password: + secure: PLEASE_REPLACE_ME + on: + tags: true + repo: msk-access/cwl_commandlinetools + python: 3.7 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e994ec71 --- /dev/null +++ b/LICENSE @@ -0,0 +1,16 @@ +Apache Software License 2.0 + +Copyright (c) 2019, msk-access + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..d7a58ae3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include LICENSE +include README.md + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..4dba77a2 --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 cwl_commandlinetools tests + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source cwl_commandlinetools -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/README.md b/README.md new file mode 100644 index 00000000..cf01fb07 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +--- +description: Central location for storing common workflow language based command line tools for building msk-access workflows +--- + +# MSK-ACCESS BAM Generation workflow + +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) +[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) +[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) + +- Free software: Apache Software License 2.0 +- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools + +## Features + +Create command line tools in common workflow language to generate msk-access workflows. + +## Installation + +Clone the repository: + +``` +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +## Credits + +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + +- Cookiecutter: https://github.com/audreyr/cookiecutter +- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..846adb87 --- /dev/null +++ b/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '0.4.0' diff --git a/cwl_commandlinetools.py b/cwl_commandlinetools.py new file mode 100644 index 00000000..7fbbae4f --- /dev/null +++ b/cwl_commandlinetools.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Main module.""" diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py new file mode 100644 index 00000000..69862367 --- /dev/null +++ b/cwl_commandlinetools/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '0.1.0' diff --git a/cwl_commandlinetools/cwl_commandlinetools.py b/cwl_commandlinetools/cwl_commandlinetools.py new file mode 100644 index 00000000..7fbbae4f --- /dev/null +++ b/cwl_commandlinetools/cwl_commandlinetools.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Main module.""" diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 00000000..ca3c21ef --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,11 @@ +pip==19.2.3 +bump2version==0.5.11 +wheel==0.33.6 +watchdog==0.9.0 +flake8==3.7.8 +tox==3.14.0 +coverage==4.5.4 +twine==1.14.0 + +pytest==4.6.5 +pytest-runner==5.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..f07ef97f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:cwl_commandlinetools/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..4c0ba280 --- /dev/null +++ b/setup.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.md') as readme_file: + readme = readme_file.read() + +#with open('HISTORY.rst') as history_file: +# history = history_file.read() + +requirements = [ ] + +setup_requirements = ['pytest-runner', ] + +test_requirements = ['pytest>=3', ] + +setup( + author="msk-access", + author_email='msk.access@gmail.com', + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.7', + ], + description="Central location for CWL CommandLineTools", + install_requires=requirements, + license="Apache Software License 2.0", + long_description=readme + '\n\n' + history, + include_package_data=True, + keywords='cwl_commandlinetools', + name='cwl_commandlinetools', + packages=find_packages(include=['cwl_commandlinetools', 'cwl_commandlinetools.*']), + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/msk-access/cwl_commandlinetools', + version='0.4.0', + zip_safe=False, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..688b77f7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Unit test package for cwl_commandlinetools.""" diff --git a/tests/test_cwl_commandlinetools.py b/tests/test_cwl_commandlinetools.py new file mode 100644 index 00000000..12359623 --- /dev/null +++ b/tests/test_cwl_commandlinetools.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for `cwl_commandlinetools` package.""" + +import pytest + + +from cwl_commandlinetools import cwl_commandlinetools + + +@pytest.fixture +def response(): + """Sample pytest fixture. + + See more at: http://doc.pytest.org/en/latest/fixture.html + """ + # import requests + # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') + + +def test_content(response): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..978754f0 --- /dev/null +++ b/tox.ini @@ -0,0 +1,25 @@ +[tox] +envlist = py27, py37 flake8 + +[travis] +python = + 3.7: py37 + 2.7: py27 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 cwl_commandlinetools + +[testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_dev.txt +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following line: +; -r{toxinidir}/requirements.txt +commands = + pip install -U pip + pytest --basetemp={envtmpdir} + From 99f3bf3ca2cc4155a27056e4221d98302b66fd5e Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 23 Sep 2019 10:48:05 -0400 Subject: [PATCH 003/476] ignore pycharm folder --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 76d33366..71364a46 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,7 @@ venv.bak/ !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json -!.vscode/extensions.json \ No newline at end of file +!.vscode/extensions.json + +# pycharm +.idea \ No newline at end of file From cbdae62354eb4783ca8c29ccd784facda6aaa17e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 14:52:34 +0000 Subject: [PATCH 004/476] GitBook: [master] 2 pages modified --- README.md | 2 ++ SUMMARY.md | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 README.md create mode 100644 SUMMARY.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..8b694edc --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# Initial page + diff --git a/SUMMARY.md b/SUMMARY.md new file mode 100644 index 00000000..5da1733d --- /dev/null +++ b/SUMMARY.md @@ -0,0 +1,4 @@ +# Table of contents + +* [Initial page](README.md) + From 7b0111f2f852aef4908ec724e2ce03ab29dd1662 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 15:02:25 +0000 Subject: [PATCH 005/476] GitBook: [develop] 45 pages modified --- .github/README.md | 2 + .../{ISSUE_TEMPLATE.md => issue_template.md} | 12 +- README.md | 24 +-- SUMMARY.md | 26 ++++ abra2_2.17/README.md => abra2_2.17.md | 27 ++-- abra2_2.19.md | 21 +++ abra2_2.19/README.md | 21 --- bedtools_genomecov_v2.28.0_cv2.md | 43 ++++++ bedtools_genomecov_v2.28.0_cv2/README.md | 56 ------- bedtools_merge_v2.28.0_cv2.md | 43 ++++++ bedtools_merge_v2.28.0_cv2/README.md | 57 ------- disambiguate_1.0.0.md | 53 +++++++ disambiguate_1.0.0/README.md | 52 ------- gatk_ApplyBQSR_4.1.2.0/README.md | 128 ---------------- gatk_BaseRecalibrator_4.1.2.0/README.md | 142 ------------------ gatk_applybqsr_4.1.2.0.md | 43 ++++++ gatk_baserecalibrator_4.1.2.0.md | 43 ++++++ marianas_collapsing_first_pass_1.8.1.md | 19 +++ .../README.md | 19 --- marianas_collapsing_second_pass_1.8.1.md | 19 +++ .../README.md | 19 --- marianas_process_loop_umi_1.8.1.md | 19 +++ marianas_process_loop_umi_1.8.1/README.md | 19 --- marianas_separate_bams_1.8.1.md | 33 ++++ marianas_separate_bams_1.8.1/README.md | 33 ---- ... picard_add_or_replace_read_groups_1.96.md | 22 +-- ...collect_alignment_summary_metrics_2.8.1.md | 20 +-- ....md => picard_fix_mate_information_1.96.md | 27 ++-- picard_mark_duplicates_1.96.md | 20 +++ picard_mark_duplicates_1.96/README.md | 20 --- picard_mark_duplicates_2.8.1.md | 20 +++ picard_mark_duplicates_2.8.1/README.md | 20 --- .../README.md => trim_galore_0.6.2.md | 27 ++-- utilities_ubuntu_18.04.md | 18 +++ utilities_ubuntu_18.04/README.md | 17 --- .../README.md => waltz_count_reads_3.1.1.md | 20 +-- ...README.md => waltz_pileupmatrices_3.1.1.md | 20 +-- 37 files changed, 526 insertions(+), 698 deletions(-) create mode 100644 .github/README.md rename .github/{ISSUE_TEMPLATE.md => issue_template.md} (50%) create mode 100644 SUMMARY.md rename abra2_2.17/README.md => abra2_2.17.md (76%) create mode 100644 abra2_2.19.md delete mode 100644 abra2_2.19/README.md create mode 100644 bedtools_genomecov_v2.28.0_cv2.md delete mode 100644 bedtools_genomecov_v2.28.0_cv2/README.md create mode 100644 bedtools_merge_v2.28.0_cv2.md delete mode 100644 bedtools_merge_v2.28.0_cv2/README.md create mode 100644 disambiguate_1.0.0.md delete mode 100644 disambiguate_1.0.0/README.md delete mode 100644 gatk_ApplyBQSR_4.1.2.0/README.md delete mode 100644 gatk_BaseRecalibrator_4.1.2.0/README.md create mode 100644 gatk_applybqsr_4.1.2.0.md create mode 100644 gatk_baserecalibrator_4.1.2.0.md create mode 100644 marianas_collapsing_first_pass_1.8.1.md delete mode 100644 marianas_collapsing_first_pass_1.8.1/README.md create mode 100644 marianas_collapsing_second_pass_1.8.1.md delete mode 100644 marianas_collapsing_second_pass_1.8.1/README.md create mode 100644 marianas_process_loop_umi_1.8.1.md delete mode 100644 marianas_process_loop_umi_1.8.1/README.md create mode 100644 marianas_separate_bams_1.8.1.md delete mode 100644 marianas_separate_bams_1.8.1/README.md rename picard_add_or_replace_read_groups_1.96/README.md => picard_add_or_replace_read_groups_1.96.md (80%) rename picard_collect_alignment_summary_metrics_2.8.1/README.md => picard_collect_alignment_summary_metrics_2.8.1.md (86%) rename picard_fix_mate_information_1.96/README.md => picard_fix_mate_information_1.96.md (74%) create mode 100644 picard_mark_duplicates_1.96.md delete mode 100644 picard_mark_duplicates_1.96/README.md create mode 100644 picard_mark_duplicates_2.8.1.md delete mode 100644 picard_mark_duplicates_2.8.1/README.md rename trim_galore_0.6.2/README.md => trim_galore_0.6.2.md (82%) create mode 100644 utilities_ubuntu_18.04.md delete mode 100644 utilities_ubuntu_18.04/README.md rename waltz_count_reads_3.1.1/README.md => waltz_count_reads_3.1.1.md (72%) rename waltz_pileupmatrices_3.1.1/README.md => waltz_pileupmatrices_3.1.1.md (72%) diff --git a/.github/README.md b/.github/README.md new file mode 100644 index 00000000..c41e0e04 --- /dev/null +++ b/.github/README.md @@ -0,0 +1,2 @@ +# .github + diff --git a/.github/ISSUE_TEMPLATE.md b/.github/issue_template.md similarity index 50% rename from .github/ISSUE_TEMPLATE.md rename to .github/issue_template.md index 14740fbe..a8e10db8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/issue_template.md @@ -1,15 +1,17 @@ +# ISSUE\_TEMPLATE + * cwl-commandlinetools version: * Python version: * Operating System: -### Description +## Description -Describe what you were trying to get done. -Tell us what happened, what went wrong, and what you expected to happen. +Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. -### What I Did +## What I Did -``` +```text Paste the command(s) you ran and the output. If there was a crash, please include the traceback here. ``` + diff --git a/README.md b/README.md index cf01fb07..79c5f73f 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ --- -description: Central location for storing common workflow language based command line tools for building msk-access workflows +description: >- + Central location for storing common workflow language based command line tools + for building msk-access workflows --- -# MSK-ACCESS BAM Generation workflow +# MSK-ACCESS Command Line Tools -[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) -[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) -[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -- Free software: Apache Software License 2.0 -- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools +* Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/cwl-commandlinetools](https://msk-access.gitbook.io/cwl-commandlinetools) ## Features @@ -20,13 +19,14 @@ Create command line tools in common workflow language to generate msk-access wor Clone the repository: -``` +```text git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` ## Credits -This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. +This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. + +* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter) +* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) -- Cookiecutter: https://github.com/audreyr/cookiecutter -- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/SUMMARY.md b/SUMMARY.md new file mode 100644 index 00000000..151798ee --- /dev/null +++ b/SUMMARY.md @@ -0,0 +1,26 @@ +# Table of contents + +* [MSK-ACCESS Command Line Tools](README.md) +* [CWL and Dockerfile for running Waltz - PileupMetrics](waltz_pileupmatrices_3.1.1.md) +* [CWL and Dockerfile for running Disambiguate](disambiguate_1.0.0.md) +* [CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq](marianas_process_loop_umi_1.8.1.md) +* [CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass](marianas_collapsing_second_pass_1.8.1.md) +* [.github](.github/README.md) + * [ISSUE\_TEMPLATE](.github/issue_template.md) +* [CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass](marianas_collapsing_first_pass_1.8.1.md) +* [CWL and Dockerfile for running ABRA2](abra2_2.17.md) +* [CWL and Dockerfile for running ABRA2](abra2_2.19.md) +* [CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics](picard_collect_alignment_summary_metrics_2.8.1.md) +* [CWL and Dockerfile for running Picard - MarkDuplicates](picard_mark_duplicates_2.8.1.md) +* [CWL and Dockerfile for running Picard - FixMateInformation](picard_fix_mate_information_1.96.md) +* [CWL and Dockerfile for running utilites from Ubuntu 18.04](utilities_ubuntu_18.04.md) +* [CWL and Dockerfile for running Bedtools GenomeCov](bedtools_genomecov_v2.28.0_cv2.md) +* [CWL and Dockerfile for running Trim Galore](trim_galore_0.6.2.md) +* [CWL and Dockerfile for running Marianas - SeparateBams](marianas_separate_bams_1.8.1.md) +* [CWL and Dockerfile for running GATK4 - Apply BQSR](gatk_applybqsr_4.1.2.0.md) +* [CWL and Dockerfile for running Bedtools Merge](bedtools_merge_v2.28.0_cv2.md) +* [CWL and Dockerfile for running Picard - AddOrReplaceReadGroups](picard_add_or_replace_read_groups_1.96.md) +* [CWL and Dockerfile for running Waltz - Count Reads](waltz_count_reads_3.1.1.md) +* [CWL and Dockerfile for running GATK4 - Base Recalibrator](gatk_baserecalibrator_4.1.2.0.md) +* [CWL and Dockerfile for running Picard - MarkDuplicates](picard_mark_duplicates_1.96.md) + diff --git a/abra2_2.17/README.md b/abra2_2.17.md similarity index 76% rename from abra2_2.17/README.md rename to abra2_2.17.md index 89818e32..49de206f 100644 --- a/abra2_2.17/README.md +++ b/abra2_2.17.md @@ -1,25 +1,25 @@ # CWL and Dockerfile for running ABRA2 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) | -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -30,9 +30,9 @@ > toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & ``` -### Usage +### Usage -``` +```text usage: abra2_2.17.cwl [-h] positional arguments: @@ -85,4 +85,5 @@ optional arguments: VCF containing known (or suspected) variant sites. Very large files should be avoided. --no_sort Do not attempt to sort final output - ``` \ No newline at end of file +``` + diff --git a/abra2_2.19.md b/abra2_2.19.md new file mode 100644 index 00000000..4f8c9ba7 --- /dev/null +++ b/abra2_2.19.md @@ -0,0 +1,21 @@ +# CWL and Dockerfile for running ABRA2 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.19 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar) | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.19.cwl example_inputs.yaml +``` + diff --git a/abra2_2.19/README.md b/abra2_2.19/README.md deleted file mode 100644 index 3fbbef6f..00000000 --- a/abra2_2.19/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# CWL and Dockerfile for running ABRA2 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.19 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar | - -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own license badge on microbadger.com") - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner abra2_2.19.cwl example_inputs.yaml -``` - diff --git a/bedtools_genomecov_v2.28.0_cv2.md b/bedtools_genomecov_v2.28.0_cv2.md new file mode 100644 index 00000000..355b0293 --- /dev/null +++ b/bedtools_genomecov_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# CWL and Dockerfile for running Bedtools GenomeCov + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format + diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md deleted file mode 100644 index 75de8a57..00000000 --- a/bedtools_genomecov_v2.28.0_cv2/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# CWL and Dockerfile for running Bedtools GenomeCov - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help - -usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT - --output_file_name OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--option_bedgraph] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT The input file can be in BAM format (Note: BAM must be - sorted by position) - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --option_bedgraph option flag parameter to choose output file format. - -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2.md b/bedtools_merge_v2.28.0_cv2.md new file mode 100644 index 00000000..a1f1cc4b --- /dev/null +++ b/bedtools_merge_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# CWL and Dockerfile for running Bedtools Merge + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged. + diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md deleted file mode 100644 index 960664db..00000000 --- a/bedtools_merge_v2.28.0_cv2/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# CWL and Dockerfile for running Bedtools Merge - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help - -usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name - OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--distance_between_features DISTANCE_BETWEEN_FEATURES] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BEDgraph format file generated from Bedtools Genomecov - module - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --distance_between_features DISTANCE_BETWEEN_FEATURES - Maximum distance between features allowed for features - to be merged. \ No newline at end of file diff --git a/disambiguate_1.0.0.md b/disambiguate_1.0.0.md new file mode 100644 index 00000000..c7c4209a --- /dev/null +++ b/disambiguate_1.0.0.md @@ -0,0 +1,53 @@ +# CWL and Dockerfile for running Disambiguate + +## Version of tools in docker image \(/container/Dockerfile\) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +| :--- | :--- | :--- | :--- | +| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub | +| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - | +| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) + +## CWL + +* CWL specification 1.0 +* Use `example_inputs.yaml` to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command + +```text +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` + diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md deleted file mode 100644 index b1cd50f0..00000000 --- a/disambiguate_1.0.0/README.md +++ /dev/null @@ -1,52 +0,0 @@ - # CWL and Dockerfile for running Disambiguate - -## Version of tools in docker image (/container/Dockerfile) - -Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. - -| Tool | Version | Location | Notes | -|--- |--- |--- | - | -| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| -| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | -| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | - -[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") - - -## CWL - -- CWL specification 1.0 -- Use `example_inputs.yaml` to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml -``` - -## Command -``` -USAGE: - - cwltool disambiguate_1.0.0.cwl \ - --prefix \ - --output_dir \ - [--aligner ] \ - - -Where: - - --prefix - (required) Sample ID or name used as prefix. Do not include .bam - - --output_dir - (required) Output directory - - --aligner - Aligner option {bwa(default),tophat,hisat2,star} - - - (required) Species A BAM file - - - (required) Species B BAM file -``` diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md deleted file mode 100644 index 6dd376f7..00000000 --- a/gatk_ApplyBQSR_4.1.2.0/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help - -usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE - [--create_output_bam_index] - --bqsr_recal_file BQSR_RECAL_FILE --input - INPUT [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--emit_original_quals] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--global_qscore_prior GLOBAL_QSCORE_PRIOR] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantize_quals QUANTIZE_QUALS] [--quiet] - [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--read_validation_stringency READ_VALIDATION_STRINGENCY] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_jdk_deflater] [--use_jdk_inflater] - [--use_original_qualities] - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --reference REFERENCE - Reference sequence - --create_output_bam_index - --bqsr_recal_file BQSR_RECAL_FILE - Input recalibration table for BQSR. Only run ApplyBQSR - with the covariates table created from the input BAM - --input INPUT A BAM file containing input read data - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --emit_original_quals - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --global_qscore_prior GLOBAL_QSCORE_PRIOR - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantize_quals QUANTIZE_QUALS - --quiet - --read_filter READ_FILTER - --read_index READ_INDEX - --read_validation_stringency READ_VALIDATION_STRINGENCY - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_jdk_deflater - --use_jdk_inflater - --use_original_qualities - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md deleted file mode 100644 index 005acb15..00000000 --- a/gatk_BaseRecalibrator_4.1.2.0/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help - -usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 - KNOWN_SITES_1 --reference REFERENCE - [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--binary_tag_name BINARY_TAG_NAME] - [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] - [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_index] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--default_base_qualities DEFAULT_BASE_QUALITIES] - [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--indels_context_size INDELS_CONTEXT_SIZE] - [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--low_quality_tail LOW_QUALITY_TAIL] - [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] - [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] - [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantizing_levels QUANTIZING_LEVELS] - [--QUIET] [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_original_qualities] - [--number_of_threads NUMBER_OF_THREADS] - [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--known_sites_2 KNOWN_SITES_2] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BAM/SAM file containing reads - --known_sites_1 KNOWN_SITES_1 - One or more databases of known polymorphic sites used - to exclude regions around known polymorphisms from - analysis - --reference REFERENCE - Reference sequence file - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --binary_tag_name BINARY_TAG_NAME - --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY - --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_index - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --default_base_qualities DEFAULT_BASE_QUALITIES - --deletions_default_quality DELETIONS_DEFAULT_QUALITY - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --indels_context_size INDELS_CONTEXT_SIZE - --insertions_default_quality INSERTIONS_DEFAULT_QUALITY - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --low_quality_tail LOW_QUALITY_TAIL - --maximum_cycle_value MAXIMUM_CYCLE_VALUE - --mismatches_context_size MISMATCHES_CONTEXT_SIZE - --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantizing_levels QUANTIZING_LEVELS - --QUIET - --read_filter READ_FILTER - --read_index READ_INDEX - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_original_qualities - --number_of_threads NUMBER_OF_THREADS - --memory_per_job MEMORY_PER_JOB - --memory_overhead MEMORY_OVERHEAD - --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_applybqsr_4.1.2.0.md b/gatk_applybqsr_4.1.2.0.md new file mode 100644 index 00000000..0574257e --- /dev/null +++ b/gatk_applybqsr_4.1.2.0.md @@ -0,0 +1,43 @@ +# CWL and Dockerfile for running GATK4 - Apply BQSR + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help + +usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/gatk_baserecalibrator_4.1.2.0.md b/gatk_baserecalibrator_4.1.2.0.md new file mode 100644 index 00000000..921c05c1 --- /dev/null +++ b/gatk_baserecalibrator_4.1.2.0.md @@ -0,0 +1,43 @@ +# CWL and Dockerfile for running GATK4 - Base Recalibrator + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help + +usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/marianas_collapsing_first_pass_1.8.1.md b/marianas_collapsing_first_pass_1.8.1.md new file mode 100644 index 00000000..a9345bee --- /dev/null +++ b/marianas_collapsing_first_pass_1.8.1.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md deleted file mode 100644 index eba9e12c..00000000 --- a/marianas_collapsing_first_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1.md b/marianas_collapsing_second_pass_1.8.1.md new file mode 100644 index 00000000..5701bafd --- /dev/null +++ b/marianas_collapsing_second_pass_1.8.1.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` + diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md deleted file mode 100644 index b3cdf8d7..00000000 --- a/marianas_collapsing_second_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml -``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1.md b/marianas_process_loop_umi_1.8.1.md new file mode 100644 index 00000000..d664f9e2 --- /dev/null +++ b/marianas_process_loop_umi_1.8.1.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md deleted file mode 100644 index a5b4e900..00000000 --- a/marianas_process_loop_umi_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_separate_bams_1.8.1.md b/marianas_separate_bams_1.8.1.md new file mode 100644 index 00000000..a8a45060 --- /dev/null +++ b/marianas_separate_bams_1.8.1.md @@ -0,0 +1,33 @@ +# CWL and Dockerfile for running Marianas - SeparateBams + +## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl + [-h] --input_bam INPUT_BAM [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input_bam INPUT_BAM +``` + diff --git a/marianas_separate_bams_1.8.1/README.md b/marianas_separate_bams_1.8.1/README.md deleted file mode 100644 index 998ce5c6..00000000 --- a/marianas_separate_bams_1.8.1/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# CWL and Dockerfile for running Marianas - SeparateBams - -## Version of tools in docker image (../marianas_process_loop_umi_1.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl - [-h] --input_bam INPUT_BAM [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input_bam INPUT_BAM -``` diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/picard_add_or_replace_read_groups_1.96.md similarity index 80% rename from picard_add_or_replace_read_groups_1.96/README.md rename to picard_add_or_replace_read_groups_1.96.md index b07355a4..2663a69a 100644 --- a/picard_add_or_replace_read_groups_1.96/README.md +++ b/picard_add_or_replace_read_groups_1.96.md @@ -1,26 +1,26 @@ # CWL and Dockerfile for running Picard - AddOrReplaceReadGroups -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/picard_collect_alignment_summary_metrics_2.8.1.md similarity index 86% rename from picard_collect_alignment_summary_metrics_2.8.1/README.md rename to picard_collect_alignment_summary_metrics_2.8.1.md index 16d0b11b..78227469 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/README.md +++ b/picard_collect_alignment_summary_metrics_2.8.1.md @@ -1,19 +1,18 @@ # CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml @@ -78,3 +77,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/picard_fix_mate_information_1.96/README.md b/picard_fix_mate_information_1.96.md similarity index 74% rename from picard_fix_mate_information_1.96/README.md rename to picard_fix_mate_information_1.96.md index 567a78e3..6c834c90 100644 --- a/picard_fix_mate_information_1.96/README.md +++ b/picard_fix_mate_information_1.96.md @@ -1,26 +1,26 @@ # CWL and Dockerfile for running Picard - FixMateInformation -## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) +## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -33,7 +33,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_1.96.cwl [-h] positional arguments: @@ -72,4 +72,5 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} -``` \ No newline at end of file +``` + diff --git a/picard_mark_duplicates_1.96.md b/picard_mark_duplicates_1.96.md new file mode 100644 index 00000000..962dc58b --- /dev/null +++ b/picard_mark_duplicates_1.96.md @@ -0,0 +1,20 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml +``` + diff --git a/picard_mark_duplicates_1.96/README.md b/picard_mark_duplicates_1.96/README.md deleted file mode 100644 index bb651139..00000000 --- a/picard_mark_duplicates_1.96/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml -``` diff --git a/picard_mark_duplicates_2.8.1.md b/picard_mark_duplicates_2.8.1.md new file mode 100644 index 00000000..df66db75 --- /dev/null +++ b/picard_mark_duplicates_2.8.1.md @@ -0,0 +1,20 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml +``` + diff --git a/picard_mark_duplicates_2.8.1/README.md b/picard_mark_duplicates_2.8.1/README.md deleted file mode 100644 index 173d0b7e..00000000 --- a/picard_mark_duplicates_2.8.1/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/trim_galore_0.6.2/README.md b/trim_galore_0.6.2.md similarity index 82% rename from trim_galore_0.6.2/README.md rename to trim_galore_0.6.2.md index 3727924e..ecf109aa 100644 --- a/trim_galore_0.6.2/README.md +++ b/trim_galore_0.6.2.md @@ -1,25 +1,25 @@ # CWL and Dockerfile for running Trim Galore -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | -| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ | -| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip | -| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz | +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | +| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) | +| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) | +| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -32,7 +32,7 @@ ### Usage -``` +```text usage: trim_galore_0.6.2.cwl [-h] positional arguments: @@ -92,4 +92,5 @@ optional arguments: --error_rate ERROR_RATE Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: 0.1) -``` \ No newline at end of file +``` + diff --git a/utilities_ubuntu_18.04.md b/utilities_ubuntu_18.04.md new file mode 100644 index 00000000..d6433af3 --- /dev/null +++ b/utilities_ubuntu_18.04.md @@ -0,0 +1,18 @@ +# CWL and Dockerfile for running utilites from Ubuntu 18.04 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs\_toolname.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml +``` + diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md deleted file mode 100644 index a13033b0..00000000 --- a/utilities_ubuntu_18.04/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# CWL and Dockerfile for running utilites from Ubuntu 18.04 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs_toolname.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml -``` diff --git a/waltz_count_reads_3.1.1/README.md b/waltz_count_reads_3.1.1.md similarity index 72% rename from waltz_count_reads_3.1.1/README.md rename to waltz_count_reads_3.1.1.md index 8ad83443..3f40ee54 100644 --- a/waltz_count_reads_3.1.1/README.md +++ b/waltz_count_reads_3.1.1.md @@ -1,24 +1,25 @@ # CWL and Dockerfile for running Waltz - Count Reads -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -55,3 +56,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/waltz_pileupmatrices_3.1.1/README.md b/waltz_pileupmatrices_3.1.1.md similarity index 72% rename from waltz_pileupmatrices_3.1.1/README.md rename to waltz_pileupmatrices_3.1.1.md index b5aed666..41a88a62 100644 --- a/waltz_pileupmatrices_3.1.1/README.md +++ b/waltz_pileupmatrices_3.1.1.md @@ -1,24 +1,25 @@ # CWL and Dockerfile for running Waltz - PileupMetrics -## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) +## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -56,3 +57,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + From 3930752664fdf7cdf6196ff1492ba6718a493d0e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 11:05:42 -0400 Subject: [PATCH 006/476] Revert "GitBook: [develop] 45 pages modified" This reverts commit 7b0111f2f852aef4908ec724e2ce03ab29dd1662. --- .../{issue_template.md => ISSUE_TEMPLATE.md} | 12 +- .github/README.md | 2 - README.md | 24 +-- SUMMARY.md | 26 ---- abra2_2.17.md => abra2_2.17/README.md | 27 ++-- abra2_2.19.md | 21 --- abra2_2.19/README.md | 21 +++ bedtools_genomecov_v2.28.0_cv2.md | 43 ------ bedtools_genomecov_v2.28.0_cv2/README.md | 56 +++++++ bedtools_merge_v2.28.0_cv2.md | 43 ------ bedtools_merge_v2.28.0_cv2/README.md | 57 +++++++ disambiguate_1.0.0.md | 53 ------- disambiguate_1.0.0/README.md | 52 +++++++ gatk_ApplyBQSR_4.1.2.0/README.md | 128 ++++++++++++++++ gatk_BaseRecalibrator_4.1.2.0/README.md | 142 ++++++++++++++++++ gatk_applybqsr_4.1.2.0.md | 43 ------ gatk_baserecalibrator_4.1.2.0.md | 43 ------ marianas_collapsing_first_pass_1.8.1.md | 19 --- .../README.md | 19 +++ marianas_collapsing_second_pass_1.8.1.md | 19 --- .../README.md | 19 +++ marianas_process_loop_umi_1.8.1.md | 19 --- marianas_process_loop_umi_1.8.1/README.md | 19 +++ marianas_separate_bams_1.8.1.md | 33 ---- marianas_separate_bams_1.8.1/README.md | 33 ++++ .../README.md | 22 +-- .../README.md | 20 +-- .../README.md | 27 ++-- picard_mark_duplicates_1.96.md | 20 --- picard_mark_duplicates_1.96/README.md | 20 +++ picard_mark_duplicates_2.8.1.md | 20 --- picard_mark_duplicates_2.8.1/README.md | 20 +++ .../README.md | 27 ++-- utilities_ubuntu_18.04.md | 18 --- utilities_ubuntu_18.04/README.md | 17 +++ .../README.md | 20 ++- .../README.md | 20 ++- 37 files changed, 698 insertions(+), 526 deletions(-) rename .github/{issue_template.md => ISSUE_TEMPLATE.md} (50%) delete mode 100644 .github/README.md delete mode 100644 SUMMARY.md rename abra2_2.17.md => abra2_2.17/README.md (76%) delete mode 100644 abra2_2.19.md create mode 100644 abra2_2.19/README.md delete mode 100644 bedtools_genomecov_v2.28.0_cv2.md create mode 100644 bedtools_genomecov_v2.28.0_cv2/README.md delete mode 100644 bedtools_merge_v2.28.0_cv2.md create mode 100644 bedtools_merge_v2.28.0_cv2/README.md delete mode 100644 disambiguate_1.0.0.md create mode 100644 disambiguate_1.0.0/README.md create mode 100644 gatk_ApplyBQSR_4.1.2.0/README.md create mode 100644 gatk_BaseRecalibrator_4.1.2.0/README.md delete mode 100644 gatk_applybqsr_4.1.2.0.md delete mode 100644 gatk_baserecalibrator_4.1.2.0.md delete mode 100644 marianas_collapsing_first_pass_1.8.1.md create mode 100644 marianas_collapsing_first_pass_1.8.1/README.md delete mode 100644 marianas_collapsing_second_pass_1.8.1.md create mode 100644 marianas_collapsing_second_pass_1.8.1/README.md delete mode 100644 marianas_process_loop_umi_1.8.1.md create mode 100644 marianas_process_loop_umi_1.8.1/README.md delete mode 100644 marianas_separate_bams_1.8.1.md create mode 100644 marianas_separate_bams_1.8.1/README.md rename picard_add_or_replace_read_groups_1.96.md => picard_add_or_replace_read_groups_1.96/README.md (80%) rename picard_collect_alignment_summary_metrics_2.8.1.md => picard_collect_alignment_summary_metrics_2.8.1/README.md (86%) rename picard_fix_mate_information_1.96.md => picard_fix_mate_information_1.96/README.md (74%) delete mode 100644 picard_mark_duplicates_1.96.md create mode 100644 picard_mark_duplicates_1.96/README.md delete mode 100644 picard_mark_duplicates_2.8.1.md create mode 100644 picard_mark_duplicates_2.8.1/README.md rename trim_galore_0.6.2.md => trim_galore_0.6.2/README.md (82%) delete mode 100644 utilities_ubuntu_18.04.md create mode 100644 utilities_ubuntu_18.04/README.md rename waltz_count_reads_3.1.1.md => waltz_count_reads_3.1.1/README.md (72%) rename waltz_pileupmatrices_3.1.1.md => waltz_pileupmatrices_3.1.1/README.md (72%) diff --git a/.github/issue_template.md b/.github/ISSUE_TEMPLATE.md similarity index 50% rename from .github/issue_template.md rename to .github/ISSUE_TEMPLATE.md index a8e10db8..14740fbe 100644 --- a/.github/issue_template.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,17 +1,15 @@ -# ISSUE\_TEMPLATE - * cwl-commandlinetools version: * Python version: * Operating System: -## Description +### Description -Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. -## What I Did +### What I Did -```text +``` Paste the command(s) you ran and the output. If there was a crash, please include the traceback here. ``` - diff --git a/.github/README.md b/.github/README.md deleted file mode 100644 index c41e0e04..00000000 --- a/.github/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# .github - diff --git a/README.md b/README.md index 79c5f73f..cf01fb07 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,16 @@ --- -description: >- - Central location for storing common workflow language based command line tools - for building msk-access workflows +description: Central location for storing common workflow language based command line tools for building msk-access workflows --- -# MSK-ACCESS Command Line Tools +# MSK-ACCESS BAM Generation workflow -[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) +[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) +[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -* Free software: Apache Software License 2.0 -* Documentation: [https://msk-access.gitbook.io/cwl-commandlinetools](https://msk-access.gitbook.io/cwl-commandlinetools) +- Free software: Apache Software License 2.0 +- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools ## Features @@ -19,14 +20,13 @@ Create command line tools in common workflow language to generate msk-access wor Clone the repository: -```text +``` git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` ## Credits -This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. - -* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter) -* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. +- Cookiecutter: https://github.com/audreyr/cookiecutter +- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/SUMMARY.md b/SUMMARY.md deleted file mode 100644 index 151798ee..00000000 --- a/SUMMARY.md +++ /dev/null @@ -1,26 +0,0 @@ -# Table of contents - -* [MSK-ACCESS Command Line Tools](README.md) -* [CWL and Dockerfile for running Waltz - PileupMetrics](waltz_pileupmatrices_3.1.1.md) -* [CWL and Dockerfile for running Disambiguate](disambiguate_1.0.0.md) -* [CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq](marianas_process_loop_umi_1.8.1.md) -* [CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass](marianas_collapsing_second_pass_1.8.1.md) -* [.github](.github/README.md) - * [ISSUE\_TEMPLATE](.github/issue_template.md) -* [CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass](marianas_collapsing_first_pass_1.8.1.md) -* [CWL and Dockerfile for running ABRA2](abra2_2.17.md) -* [CWL and Dockerfile for running ABRA2](abra2_2.19.md) -* [CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics](picard_collect_alignment_summary_metrics_2.8.1.md) -* [CWL and Dockerfile for running Picard - MarkDuplicates](picard_mark_duplicates_2.8.1.md) -* [CWL and Dockerfile for running Picard - FixMateInformation](picard_fix_mate_information_1.96.md) -* [CWL and Dockerfile for running utilites from Ubuntu 18.04](utilities_ubuntu_18.04.md) -* [CWL and Dockerfile for running Bedtools GenomeCov](bedtools_genomecov_v2.28.0_cv2.md) -* [CWL and Dockerfile for running Trim Galore](trim_galore_0.6.2.md) -* [CWL and Dockerfile for running Marianas - SeparateBams](marianas_separate_bams_1.8.1.md) -* [CWL and Dockerfile for running GATK4 - Apply BQSR](gatk_applybqsr_4.1.2.0.md) -* [CWL and Dockerfile for running Bedtools Merge](bedtools_merge_v2.28.0_cv2.md) -* [CWL and Dockerfile for running Picard - AddOrReplaceReadGroups](picard_add_or_replace_read_groups_1.96.md) -* [CWL and Dockerfile for running Waltz - Count Reads](waltz_count_reads_3.1.1.md) -* [CWL and Dockerfile for running GATK4 - Base Recalibrator](gatk_baserecalibrator_4.1.2.0.md) -* [CWL and Dockerfile for running Picard - MarkDuplicates](picard_mark_duplicates_1.96.md) - diff --git a/abra2_2.17.md b/abra2_2.17/README.md similarity index 76% rename from abra2_2.17.md rename to abra2_2.17/README.md index 49de206f..89818e32 100644 --- a/abra2_2.17.md +++ b/abra2_2.17/README.md @@ -1,25 +1,25 @@ # CWL and Dockerfile for running ABRA2 -## Version of tools in docker image \(/container/Dockerfile\) +## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| openjdk | 8 | - | -| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) | +| Tool | Version | Location | +|--- |--- |--- | +| openjdk | 8 | - | +| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar | -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com") ## CWL -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -30,9 +30,9 @@ > toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & ``` -### Usage +### Usage -```text +``` usage: abra2_2.17.cwl [-h] positional arguments: @@ -85,5 +85,4 @@ optional arguments: VCF containing known (or suspected) variant sites. Very large files should be avoided. --no_sort Do not attempt to sort final output -``` - + ``` \ No newline at end of file diff --git a/abra2_2.19.md b/abra2_2.19.md deleted file mode 100644 index 4f8c9ba7..00000000 --- a/abra2_2.19.md +++ /dev/null @@ -1,21 +0,0 @@ -# CWL and Dockerfile for running ABRA2 - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| openjdk | 8 | - | -| ABRA2 | 2.19 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar) | - -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner abra2_2.19.cwl example_inputs.yaml -``` - diff --git a/abra2_2.19/README.md b/abra2_2.19/README.md new file mode 100644 index 00000000..3fbbef6f --- /dev/null +++ b/abra2_2.19/README.md @@ -0,0 +1,21 @@ +# CWL and Dockerfile for running ABRA2 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| openjdk | 8 | - | +| ABRA2 | 2.19 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.19.jar | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own license badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.19.cwl example_inputs.yaml +``` + diff --git a/bedtools_genomecov_v2.28.0_cv2.md b/bedtools_genomecov_v2.28.0_cv2.md deleted file mode 100644 index 355b0293..00000000 --- a/bedtools_genomecov_v2.28.0_cv2.md +++ /dev/null @@ -1,43 +0,0 @@ -# CWL and Dockerfile for running Bedtools GenomeCov - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -\`\`\`bash - -> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help - -usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\] - -positional arguments: job\_order Job input json file - -optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format - diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md new file mode 100644 index 00000000..75de8a57 --- /dev/null +++ b/bedtools_genomecov_v2.28.0_cv2/README.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running Bedtools GenomeCov + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +|--- |--- |--- | +| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help + +usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT + --output_file_name OUTPUT_FILE_NAME + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [--option_bedgraph] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT The input file can be in BAM format (Note: BAM must be + sorted by position) + --output_file_name OUTPUT_FILE_NAME + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS + --option_bedgraph option flag parameter to choose output file format. + -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2.md b/bedtools_merge_v2.28.0_cv2.md deleted file mode 100644 index a1f1cc4b..00000000 --- a/bedtools_merge_v2.28.0_cv2.md +++ /dev/null @@ -1,43 +0,0 @@ -# CWL and Dockerfile for running Bedtools Merge - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -\`\`\`bash - -> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help - -usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\] - -positional arguments: job\_order Job input json file - -optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged. - diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md new file mode 100644 index 00000000..960664db --- /dev/null +++ b/bedtools_merge_v2.28.0_cv2/README.md @@ -0,0 +1,57 @@ +# CWL and Dockerfile for running Bedtools Merge + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +|--- |--- |--- | +| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help + +usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name + OUTPUT_FILE_NAME + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [--distance_between_features DISTANCE_BETWEEN_FEATURES] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BEDgraph format file generated from Bedtools Genomecov + module + --output_file_name OUTPUT_FILE_NAME + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS + --distance_between_features DISTANCE_BETWEEN_FEATURES + Maximum distance between features allowed for features + to be merged. \ No newline at end of file diff --git a/disambiguate_1.0.0.md b/disambiguate_1.0.0.md deleted file mode 100644 index c7c4209a..00000000 --- a/disambiguate_1.0.0.md +++ /dev/null @@ -1,53 +0,0 @@ -# CWL and Dockerfile for running Disambiguate - -## Version of tools in docker image \(/container/Dockerfile\) - -Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. - -| Tool | Version | Location | Notes | -| :--- | :--- | :--- | :--- | -| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub | -| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - | -| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - | - -[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) - -## CWL - -* CWL specification 1.0 -* Use `example_inputs.yaml` to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml -``` - -## Command - -```text -USAGE: - - cwltool disambiguate_1.0.0.cwl \ - --prefix \ - --output_dir \ - [--aligner ] \ - - -Where: - - --prefix - (required) Sample ID or name used as prefix. Do not include .bam - - --output_dir - (required) Output directory - - --aligner - Aligner option {bwa(default),tophat,hisat2,star} - - - (required) Species A BAM file - - - (required) Species B BAM file -``` - diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md new file mode 100644 index 00000000..b1cd50f0 --- /dev/null +++ b/disambiguate_1.0.0/README.md @@ -0,0 +1,52 @@ + # CWL and Dockerfile for running Disambiguate + +## Version of tools in docker image (/container/Dockerfile) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +|--- |--- |--- | - | +| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| +| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | +| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") + + +## CWL + +- CWL specification 1.0 +- Use `example_inputs.yaml` to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command +``` +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md new file mode 100644 index 00000000..6dd376f7 --- /dev/null +++ b/gatk_ApplyBQSR_4.1.2.0/README.md @@ -0,0 +1,128 @@ +# CWL and Dockerfile for running GATK4 - Apply BQSR + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help + +usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE + [--create_output_bam_index] + --bqsr_recal_file BQSR_RECAL_FILE --input + INPUT [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--emit_original_quals] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--global_qscore_prior GLOBAL_QSCORE_PRIOR] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantize_quals QUANTIZE_QUALS] [--quiet] + [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--read_validation_stringency READ_VALIDATION_STRINGENCY] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_jdk_deflater] [--use_jdk_inflater] + [--use_original_qualities] + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Reference sequence + --create_output_bam_index + --bqsr_recal_file BQSR_RECAL_FILE + Input recalibration table for BQSR. Only run ApplyBQSR + with the covariates table created from the input BAM + --input INPUT A BAM file containing input read data + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --emit_original_quals + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --global_qscore_prior GLOBAL_QSCORE_PRIOR + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantize_quals QUANTIZE_QUALS + --quiet + --read_filter READ_FILTER + --read_index READ_INDEX + --read_validation_stringency READ_VALIDATION_STRINGENCY + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_jdk_deflater + --use_jdk_inflater + --use_original_qualities + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md new file mode 100644 index 00000000..005acb15 --- /dev/null +++ b/gatk_BaseRecalibrator_4.1.2.0/README.md @@ -0,0 +1,142 @@ +# CWL and Dockerfile for running GATK4 - Base Recalibrator + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help + +usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 + KNOWN_SITES_1 --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--binary_tag_name BINARY_TAG_NAME] + [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--default_base_qualities DEFAULT_BASE_QUALITIES] + [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--indels_context_size INDELS_CONTEXT_SIZE] + [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--low_quality_tail LOW_QUALITY_TAIL] + [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] + [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] + [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantizing_levels QUANTIZING_LEVELS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_original_qualities] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--known_sites_2 KNOWN_SITES_2] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --known_sites_1 KNOWN_SITES_1 + One or more databases of known polymorphic sites used + to exclude regions around known polymorphisms from + analysis + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --binary_tag_name BINARY_TAG_NAME + --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --default_base_qualities DEFAULT_BASE_QUALITIES + --deletions_default_quality DELETIONS_DEFAULT_QUALITY + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --indels_context_size INDELS_CONTEXT_SIZE + --insertions_default_quality INSERTIONS_DEFAULT_QUALITY + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --low_quality_tail LOW_QUALITY_TAIL + --maximum_cycle_value MAXIMUM_CYCLE_VALUE + --mismatches_context_size MISMATCHES_CONTEXT_SIZE + --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantizing_levels QUANTIZING_LEVELS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_original_qualities + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_applybqsr_4.1.2.0.md b/gatk_applybqsr_4.1.2.0.md deleted file mode 100644 index 0574257e..00000000 --- a/gatk_applybqsr_4.1.2.0.md +++ /dev/null @@ -1,43 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -\`\`\`bash - -> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help - -usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] - -positional arguments: job\_order Job input json file - -optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS - diff --git a/gatk_baserecalibrator_4.1.2.0.md b/gatk_baserecalibrator_4.1.2.0.md deleted file mode 100644 index 921c05c1..00000000 --- a/gatk_baserecalibrator_4.1.2.0.md +++ /dev/null @@ -1,43 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -\`\`\`bash - -> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help - -usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] - -positional arguments: job\_order Job input json file - -optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 - diff --git a/marianas_collapsing_first_pass_1.8.1.md b/marianas_collapsing_first_pass_1.8.1.md deleted file mode 100644 index a9345bee..00000000 --- a/marianas_collapsing_first_pass_1.8.1.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` - diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md new file mode 100644 index 00000000..eba9e12c --- /dev/null +++ b/marianas_collapsing_first_pass_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1.md b/marianas_collapsing_second_pass_1.8.1.md deleted file mode 100644 index 5701bafd..00000000 --- a/marianas_collapsing_second_pass_1.8.1.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml -``` - diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md new file mode 100644 index 00000000..b3cdf8d7 --- /dev/null +++ b/marianas_collapsing_second_pass_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1.md b/marianas_process_loop_umi_1.8.1.md deleted file mode 100644 index d664f9e2..00000000 --- a/marianas_process_loop_umi_1.8.1.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` - diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md new file mode 100644 index 00000000..a5b4e900 --- /dev/null +++ b/marianas_process_loop_umi_1.8.1/README.md @@ -0,0 +1,19 @@ +# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` \ No newline at end of file diff --git a/marianas_separate_bams_1.8.1.md b/marianas_separate_bams_1.8.1.md deleted file mode 100644 index a8a45060..00000000 --- a/marianas_separate_bams_1.8.1.md +++ /dev/null @@ -1,33 +0,0 @@ -# CWL and Dockerfile for running Marianas - SeparateBams - -## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl - [-h] --input_bam INPUT_BAM [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input_bam INPUT_BAM -``` - diff --git a/marianas_separate_bams_1.8.1/README.md b/marianas_separate_bams_1.8.1/README.md new file mode 100644 index 00000000..998ce5c6 --- /dev/null +++ b/marianas_separate_bams_1.8.1/README.md @@ -0,0 +1,33 @@ +# CWL and Dockerfile for running Marianas - SeparateBams + +## Version of tools in docker image (../marianas_process_loop_umi_1.8.1/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl + [-h] --input_bam INPUT_BAM [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input_bam INPUT_BAM +``` diff --git a/picard_add_or_replace_read_groups_1.96.md b/picard_add_or_replace_read_groups_1.96/README.md similarity index 80% rename from picard_add_or_replace_read_groups_1.96.md rename to picard_add_or_replace_read_groups_1.96/README.md index 2663a69a..b07355a4 100644 --- a/picard_add_or_replace_read_groups_1.96.md +++ b/picard_add_or_replace_read_groups_1.96/README.md @@ -1,26 +1,26 @@ # CWL and Dockerfile for running Picard - AddOrReplaceReadGroups -## Version of tools in docker image \(/container/Dockerfile\) +## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") ## CWL -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL diff --git a/picard_collect_alignment_summary_metrics_2.8.1.md b/picard_collect_alignment_summary_metrics_2.8.1/README.md similarity index 86% rename from picard_collect_alignment_summary_metrics_2.8.1.md rename to picard_collect_alignment_summary_metrics_2.8.1/README.md index 78227469..16d0b11b 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1.md +++ b/picard_collect_alignment_summary_metrics_2.8.1/README.md @@ -1,18 +1,19 @@ # CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics -## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | +| R | 3.3.3 | r-base for opnejdk:8 | -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | -| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml @@ -77,4 +78,3 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` - diff --git a/picard_fix_mate_information_1.96.md b/picard_fix_mate_information_1.96/README.md similarity index 74% rename from picard_fix_mate_information_1.96.md rename to picard_fix_mate_information_1.96/README.md index 6c834c90..567a78e3 100644 --- a/picard_fix_mate_information_1.96.md +++ b/picard_fix_mate_information_1.96/README.md @@ -1,26 +1,26 @@ # CWL and Dockerfile for running Picard - FixMateInformation -## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\) +## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") ## CWL -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -33,7 +33,7 @@ ### Usage -```text +``` usage: picard_fix_mate_information_1.96.cwl [-h] positional arguments: @@ -72,5 +72,4 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} -``` - +``` \ No newline at end of file diff --git a/picard_mark_duplicates_1.96.md b/picard_mark_duplicates_1.96.md deleted file mode 100644 index 962dc58b..00000000 --- a/picard_mark_duplicates_1.96.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | -| R | 3.3.3 | r-base for opnejdk:8 | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml -``` - diff --git a/picard_mark_duplicates_1.96/README.md b/picard_mark_duplicates_1.96/README.md new file mode 100644 index 00000000..bb651139 --- /dev/null +++ b/picard_mark_duplicates_1.96/README.md @@ -0,0 +1,20 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | +| R | 3.3.3 | r-base for opnejdk:8 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml +``` diff --git a/picard_mark_duplicates_2.8.1.md b/picard_mark_duplicates_2.8.1.md deleted file mode 100644 index df66db75..00000000 --- a/picard_mark_duplicates_2.8.1.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | -| R | 3.3.3 | r-base for opnejdk:8 | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml -``` - diff --git a/picard_mark_duplicates_2.8.1/README.md b/picard_mark_duplicates_2.8.1/README.md new file mode 100644 index 00000000..173d0b7e --- /dev/null +++ b/picard_mark_duplicates_2.8.1/README.md @@ -0,0 +1,20 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | +| R | 3.3.3 | r-base for opnejdk:8 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml +``` \ No newline at end of file diff --git a/trim_galore_0.6.2.md b/trim_galore_0.6.2/README.md similarity index 82% rename from trim_galore_0.6.2.md rename to trim_galore_0.6.2/README.md index ecf109aa..3727924e 100644 --- a/trim_galore_0.6.2.md +++ b/trim_galore_0.6.2/README.md @@ -1,25 +1,25 @@ # CWL and Dockerfile for running Trim Galore -## Version of tools in docker image \(/container/Dockerfile\) +## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| Ubuntu base image | 18.04 | - | -| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) | -| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) | -| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) | +| Tool | Version | Location | +|--- |--- |--- | +| Ubuntu base image | 18.04 | - | +| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ | +| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip | +| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz | ## CWL -* CWL specification 1.0 -* Use example\_inputs.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -32,7 +32,7 @@ ### Usage -```text +``` usage: trim_galore_0.6.2.cwl [-h] positional arguments: @@ -92,5 +92,4 @@ optional arguments: --error_rate ERROR_RATE Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: 0.1) -``` - +``` \ No newline at end of file diff --git a/utilities_ubuntu_18.04.md b/utilities_ubuntu_18.04.md deleted file mode 100644 index d6433af3..00000000 --- a/utilities_ubuntu_18.04.md +++ /dev/null @@ -1,18 +0,0 @@ -# CWL and Dockerfile for running utilites from Ubuntu 18.04 - -## Version of tools in docker image \(/container/Dockerfile\) - -| Tool | Version | Location | -| :--- | :--- | :--- | -| Ubuntu base image | 18.04 | - | - -## CWL - -* CWL specification 1.0 -* Use example\_inputs\_toolname.yaml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml -``` - diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md new file mode 100644 index 00000000..a13033b0 --- /dev/null +++ b/utilities_ubuntu_18.04/README.md @@ -0,0 +1,17 @@ +# CWL and Dockerfile for running utilites from Ubuntu 18.04 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| Ubuntu base image | 18.04 | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs_toolname.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml +``` diff --git a/waltz_count_reads_3.1.1.md b/waltz_count_reads_3.1.1/README.md similarity index 72% rename from waltz_count_reads_3.1.1.md rename to waltz_count_reads_3.1.1/README.md index 3f40ee54..8ad83443 100644 --- a/waltz_count_reads_3.1.1.md +++ b/waltz_count_reads_3.1.1/README.md @@ -1,25 +1,24 @@ # CWL and Dockerfile for running Waltz - Count Reads -## Version of tools in docker image \(/container/Dockerfile\) +## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) - ## CWL -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -56,4 +55,3 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` - diff --git a/waltz_pileupmatrices_3.1.1.md b/waltz_pileupmatrices_3.1.1/README.md similarity index 72% rename from waltz_pileupmatrices_3.1.1.md rename to waltz_pileupmatrices_3.1.1/README.md index 41a88a62..b5aed666 100644 --- a/waltz_pileupmatrices_3.1.1.md +++ b/waltz_pileupmatrices_3.1.1/README.md @@ -1,25 +1,24 @@ # CWL and Dockerfile for running Waltz - PileupMetrics -## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\) +## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) -| Tool | Version | Location | -| :--- | :--- | :--- | -| java base image | 8 | - | -| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) - ## CWL -* CWL specification 1.0 -* Use example\_inputs.yml to see the inputs to the cwl -* Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying** [**lsf.py**](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) **by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL @@ -57,4 +56,3 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` - From 3b91ab063f8d3b16792274d8781ca893ef8180bd Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 11:08:14 -0400 Subject: [PATCH 007/476] adding .gitbook.yaml --- .gitbook.yaml | 1 + docs/README.md | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 .gitbook.yaml create mode 100644 docs/README.md diff --git a/.gitbook.yaml b/.gitbook.yaml new file mode 100644 index 00000000..afdeba9f --- /dev/null +++ b/.gitbook.yaml @@ -0,0 +1 @@ +root: ./docs diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..cf01fb07 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +--- +description: Central location for storing common workflow language based command line tools for building msk-access workflows +--- + +# MSK-ACCESS BAM Generation workflow + +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) +[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) +[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) + +- Free software: Apache Software License 2.0 +- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools + +## Features + +Create command line tools in common workflow language to generate msk-access workflows. + +## Installation + +Clone the repository: + +``` +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +## Credits + +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + +- Cookiecutter: https://github.com/audreyr/cookiecutter +- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage From 1a3727b51f6a569dc5434a82185c6c5076b0f166 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 15:09:35 +0000 Subject: [PATCH 008/476] GitBook: [develop] 2 pages modified --- docs/README.md | 24 ++++++++++++------------ docs/SUMMARY.md | 4 ++++ 2 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 docs/SUMMARY.md diff --git a/docs/README.md b/docs/README.md index cf01fb07..25e14729 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,16 +1,15 @@ --- -description: Central location for storing common workflow language based command line tools for building msk-access workflows +description: >- + Central location for storing common workflow language based command line tools + for building msk-access workflows --- -# MSK-ACCESS BAM Generation workflow +# MSK-ACCESS command-line tools -[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) -[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) -[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -- Free software: Apache Software License 2.0 -- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools +* Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/cwl-commandlinetools](https://msk-access.gitbook.io/cwl-commandlinetools) ## Features @@ -20,13 +19,14 @@ Create command line tools in common workflow language to generate msk-access wor Clone the repository: -``` +```text git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` ## Credits -This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. +This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. + +* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter) +* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) -- Cookiecutter: https://github.com/audreyr/cookiecutter -- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 00000000..0658922d --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,4 @@ +# Table of contents + +* [MSK-ACCESS command-line tools](README.md) + From 4a9b5281480200b649de298571d3080340f8b9a9 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 11:10:16 -0400 Subject: [PATCH 009/476] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cf01fb07..64957357 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ description: Central location for storing common workflow language based command line tools for building msk-access workflows --- -# MSK-ACCESS BAM Generation workflow +# MSK-ACCESS command-line tools [![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) From 14ea25582f09e52f7739c6d579e81a1a936fc65b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 13:27:49 -0400 Subject: [PATCH 010/476] adding string parameter to GATK readfilter option --- gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl | 9 +++++++-- .../gatk_baserecalibrator_4.1.2.0.cwl | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 5fda71e8..c6265d4e 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -174,10 +174,15 @@ inputs: position: 6 prefix: '--QUIET' - id: read_filter - type: string? + type: + - 'null' + - type: string + - type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 6 - prefix: '--read-filter' - id: read_index type: string? inputBinding: diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index cf12e785..924c2509 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -221,6 +221,7 @@ inputs: - id: read_filter type: - 'null' + - type: string - type: array items: string inputBinding: From ab47f2d5b8e1276255693fa9fe3f70d89062d2fc Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 13:35:57 -0400 Subject: [PATCH 011/476] Fixing Readfilter option in commandlinetools --- gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl | 2 +- gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index c6265d4e..00c30213 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -176,7 +176,7 @@ inputs: - id: read_filter type: - 'null' - - type: string + - string - type: array items: string inputBinding: diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index 924c2509..f4fc46b6 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -221,7 +221,7 @@ inputs: - id: read_filter type: - 'null' - - type: string + - string - type: array items: string inputBinding: From a24c2b3cdb9292aec82cf06e8a5ee8fffd7a02e4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 14:08:53 -0400 Subject: [PATCH 012/476] Going back to original version --- gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl | 11 ++++------- .../gatk_baserecalibrator_4.1.2.0.cwl | 4 +--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 00c30213..509148be 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: gatk_apply_bqsr_4_1_2_0 baseCommand: - gatk @@ -176,13 +177,11 @@ inputs: - id: read_filter type: - 'null' - - string - type: array items: string inputBinding: + position: 6 prefix: '--read-filter' - inputBinding: - position: 6 - id: read_index type: string? inputBinding: @@ -269,8 +268,6 @@ requirements: - class: ResourceRequirement ramMin: 10000 coresMin: 8 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.2.0' - class: InlineJavascriptRequirement @@ -291,4 +288,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': gatk4 - 'doap:revision': 4.1.2.0 \ No newline at end of file + 'doap:revision': 4.1.2.0 diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index f4fc46b6..95a5e883 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -221,13 +221,11 @@ inputs: - id: read_filter type: - 'null' - - string - type: array items: string inputBinding: + position: 10 prefix: '--read-filter' - inputBinding: - position: 10 - id: read_index type: string? inputBinding: From 195afd4316e40e0834b240c937a286d4e76f360c Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Sep 2019 14:16:53 -0400 Subject: [PATCH 013/476] Making it simial to sort read-filter option --- gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl | 3 ++- .../gatk_baserecalibrator_4.1.2.0.cwl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 509148be..010d648d 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -180,8 +180,9 @@ inputs: - type: array items: string inputBinding: - position: 6 prefix: '--read-filter' + inputBinding: + position: 6 - id: read_index type: string? inputBinding: diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index 95a5e883..cf12e785 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -224,8 +224,9 @@ inputs: - type: array items: string inputBinding: - position: 10 prefix: '--read-filter' + inputBinding: + position: 10 - id: read_index type: string? inputBinding: From 565cd031500eaa98db218f677283c67cee95609e Mon Sep 17 00:00:00 2001 From: Yu Hu Date: Wed, 25 Sep 2019 14:49:43 -0400 Subject: [PATCH 014/476] add mutect 1.1.5 module --- mutect_1.1.5/container/Dockerfile | 33 ++ mutect_1.1.5/example_inputs.yaml | 34 ++ mutect_1.1.5/mutect_1.1.5.cwl | 764 ++++++++++++++++++++++++++++++ 3 files changed, 831 insertions(+) create mode 100644 mutect_1.1.5/container/Dockerfile create mode 100644 mutect_1.1.5/example_inputs.yaml create mode 100644 mutect_1.1.5/mutect_1.1.5.cwl diff --git a/mutect_1.1.5/container/Dockerfile b/mutect_1.1.5/container/Dockerfile new file mode 100644 index 00000000..7df7dfb7 --- /dev/null +++ b/mutect_1.1.5/container/Dockerfile @@ -0,0 +1,33 @@ +################## BASE IMAGE ###################### +FROM openjdk:7 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=1.0.0 +ARG MUTECT_VERSION=1.1.5 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Yu Hu" + +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.mutect=${MUTECT_VERSION} +LABEL org.opencontainers.image.source="https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip" + +LABEL org.opencontainers.image.description="This container uses java7 as the base image to build mutect version ${MUTECT_VERSION}" + +################## INSTALL ########################## + +ENV MUTECT_VERSION=${MUTECT_VERSION} + +#Get Mutect +RUN wget https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip +RUN unzip muTect-${MUTECT_VERSION}-bin.zip +RUN rm muTect-${MUTECT_VERSION}-bin.zip +RUN chmod a+x muTect-${MUTECT_VERSION}.jar +RUN mv muTect-${MUTECT_VERSION}.jar /usr/local/bin/muTect.jar +RUN java -version +RUN which java + diff --git a/mutect_1.1.5/example_inputs.yaml b/mutect_1.1.5/example_inputs.yaml new file mode 100644 index 00000000..687bed6f --- /dev/null +++ b/mutect_1.1.5/example_inputs.yaml @@ -0,0 +1,34 @@ +input_file_normal: + class: File + path: /path/to/normal/bam/file +input_file_tumor: + class: File + path: /path/to/tumor/bam/file +normal_sample_name: normalsamplename +tumor_sample_name: tumorsamplename + +read_filter: BadCigar +downsample_to_coverage: 50000 +fraction_contamination: 0.0005 +minimum_mutation_cell_fraction: 0.0005 + +cosmic: + class: File + path: /path/to/cosmic/coding/mutations/file +dbsnp: + class: File + path: /path/to/dbsnp/vcf/file +intervals: + class: File + path: /path/to/canonicaltargets/bed/file +reference_sequence: + class: File + path: /path/to/genomic/reference/file + +out: output.mutect.txt +vcf: output.mutect.vcf + +tmp_dir: /path/to/tmp/dir + +java_7: "/usr/bin/java" +mutect: "/path/to/muTect-1.1.5.jar" diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl new file mode 100644 index 00000000..f5220ab3 --- /dev/null +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -0,0 +1,764 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: mutect_v1 + +arguments: + - $(inputs.java_7) + - -Xmx8g + - -Xms256m + - -XX:-UseGCOverheadLimit + - -jar + - $(inputs.mutect) + - --analysis_type + - MuTect + +requirements: + ResourceRequirement: + ramMin: 32000 + coresMin: 1 + outdirMax: 20000 + InlineJavascriptRequirement: {} + +inputs: + + java_7: string + mutect: string + + BQSR: + type: string[]? + inputBinding: + position: 0 + prefix: --BQSR + doc: >- + The input covariates table file which enables on-the-fly base quality + score recalibration + + absolute_copy_number_data: + type: string[]? + inputBinding: + position: 0 + prefix: --absolute_copy_number_data + doc: >- + Absolute Copy Number Data, as defined by Absolute, to use in power + calculations + + arg_file: + type: string[]? + inputBinding: + position: 0 + prefix: --arg_file + doc: Reads arguments from the specified file + + bam_tumor_sample_name: + type: string[]? + inputBinding: + position: 0 + prefix: --bam_tumor_sample_name + doc: >- + if the tumor bam contains multiple samples, only use read groups with SM + equal to this value + + baq: + type: string[]? + inputBinding: + position: 0 + prefix: --baq + doc: >- + Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + + baqGapOpenPenalty: + type: string[]? + inputBinding: + position: 0 + prefix: --baqGapOpenPenalty + doc: >- + BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps + better for whole genome call sets + + clipping_bias_pvalue_threshold: + type: string[]? + inputBinding: + position: 0 + prefix: --clipping_bias_pvalue_threshold + doc: >- + pvalue threshold for fishers exact test of clipping bias in mutant reads + vs ref reads + + cosmic: + type: File? + inputBinding: + position: 0 + prefix: --cosmic + doc: VCF file of COSMIC sites + secondaryFiles: [^.vcf.idx] + + coverage_20_q20_file: + type: string[]? + inputBinding: + position: 0 + prefix: --coverage_20_q20_file + doc: write out 20x of Q20 coverage in WIGGLE format to this file + + coverage_file: + type: string[]? + inputBinding: + position: 0 + prefix: --coverage_file + doc: write out coverage in WIGGLE format to this file + + dbsnp: + type: File? + inputBinding: + position: 0 + prefix: --dbsnp + doc: VCF file of DBSNP information + secondaryFiles: [^.vcf.idx] + + dbsnp_normal_lod: + type: string[]? + inputBinding: + position: 0 + prefix: --dbsnp_normal_lod + doc: LOD threshold for calling normal non-variant at dbsnp sites + + defaultBaseQualities: + type: string[]? + inputBinding: + position: 0 + prefix: --defaultBaseQualities + doc: >- + If reads are missing some or all base quality scores, this value will be + used for all base quality scores + + disableRandomization: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --disableRandomization + doc: >- + Completely eliminates randomization from nondeterministic methods. To be + used mostly in the testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + + disable_indel_quals: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --disable_indel_quals + doc: >- + If true, disables printing of base insertion and base deletion tags (with + -BQSR) + + downsample_to_coverage: + type: int? + inputBinding: + position: 0 + prefix: --downsample_to_coverage + doc: Target coverage threshold for downsampling to coverage + + downsampling_type: + type: string? + default: NONE + inputBinding: + position: 0 + prefix: --downsampling_type + doc: >- + Type of reads downsampling to employ at a given locus. Reads will be + selected randomly to be removed from the pile based on the method + described here (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all possible reads at a locus + + emit_original_quals: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --emit_original_quals + doc: >- + If true, enables printing of the OQ tag with the original base qualities + (with -BQSR) + + enable_extended_output: + type: boolean + default: true + inputBinding: + position: 0 + prefix: --enable_extended_output + + excludeIntervals: + type: string[]? + inputBinding: + position: 0 + prefix: --excludeIntervals + doc: >- + One or more genomic intervals to exclude from processing. Can be + explicitly specified on the command line or in a file (including a rod + file) + + filter_mismatching_base_and_quals: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --filter_mismatching_base_and_quals + doc: >- + if a read has mismatching number of bases and base qualities, filter out + the read instead of blowing up. + + force_alleles: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --force_alleles + doc: force output for all alleles at each site + + force_output: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --force_output + doc: force output for each site + + fraction_contamination: + type: float? + inputBinding: + position: 0 + prefix: --fraction_contamination + doc: >- + estimate of fraction (0-1) of physical contamination with other unrelated + samples + + fraction_mapq0_threshold: + type: string[]? + inputBinding: + position: 0 + prefix: --fraction_mapq0_threshold + doc: >- + threshold for determining if there is relatedness between the alt and ref + allele read piles + + gap_events_threshold: + type: string[]? + inputBinding: + position: 0 + prefix: --gap_events_threshold + doc: >- + how many gapped events (ins/del) are allowed in proximity to this + candidate + + gatk_key: + type: string[]? + inputBinding: + position: 0 + prefix: --gatk_key + doc: >- + GATK Key file. Required if running with -et NO_ET. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + + heavily_clipped_read_fraction: + type: string[]? + inputBinding: + position: 0 + prefix: --heavily_clipped_read_fraction + doc: >- + if this fraction or more of the bases in a read are soft/hard clipped, do + not use this read for mutation calling + + initial_tumor_lod: + type: string[]? + inputBinding: + position: 0 + prefix: --initial_tumor_lod + doc: Initial LOD threshold for calling tumor variant + + input_file_normal: + type: File? + inputBinding: + position: 0 + prefix: --input_file:normal + doc: SAM or BAM file(s) + secondaryFiles: [^.bai] + + input_file_tumor: + type: File? + inputBinding: + position: 0 + prefix: --input_file:tumor + doc: SAM or BAM file(s) + secondaryFiles: [^.bai] + + interval_merging: + type: string[]? + inputBinding: + position: 0 + prefix: --interval_merging + doc: >- + Indicates the interval merging rule we should use for abutting intervals + (ALL| OVERLAPPING_ONLY) + + interval_padding: + type: string[]? + inputBinding: + position: 0 + prefix: --interval_padding + doc: >- + Indicates how many basepairs of padding to include around each of the + intervals specified with the -L/ + + interval_set_rule: + type: string[]? + inputBinding: + position: 0 + prefix: --interval_set_rule + doc: >- + Indicates the set merging approach the interval parser should use to + combine the various -L or -XL inputs (UNION| INTERSECTION) + + intervals: + type: + - string + - File + - 'null' + inputBinding: + position: 0 + prefix: --intervals + doc: >- + One or more genomic intervals over which to operate. Can be explicitly + specified on the command line or in a file (including a rod file) + + keep_program_records: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --keep_program_records + doc: >- + Should we override the Walkers default and keep program records from the + SAM header + + log_to_file: + type: string[]? + inputBinding: + position: 0 + prefix: --log_to_file + doc: Set the logging location + + logging_level: + type: string[]? + inputBinding: + position: 0 + prefix: --logging_level + doc: >- + Set the minimum level of logging, i.e. setting INFO gets you INFO up to + FATAL, setting ERROR gets you ERROR and FATAL level logging. + + maxRuntime: + type: string[]? + inputBinding: + position: 0 + prefix: --maxRuntime + doc: >- + If provided, that GATK will stop execution cleanly as soon after + maxRuntime has been exceeded, truncating the run but not exiting with a + failure. By default the value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + + maxRuntimeUnits: + type: string[]? + inputBinding: + position: 0 + prefix: --maxRuntimeUnits + doc: >- + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + + max_alt_allele_in_normal_fraction: + type: string[]? + inputBinding: + position: 0 + prefix: --max_alt_allele_in_normal_fraction + doc: threshold for maximum alternate allele fraction in normal + + max_alt_alleles_in_normal_count: + type: string[]? + inputBinding: + position: 0 + prefix: --max_alt_alleles_in_normal_count + doc: threshold for maximum alternate allele counts in normal + + max_alt_alleles_in_normal_qscore_sum: + type: string[]? + inputBinding: + position: 0 + prefix: --max_alt_alleles_in_normal_qscore_sum + doc: threshold for maximum alternate allele quality score sum in normal + + min_qscore: + type: string[]? + inputBinding: + position: 0 + prefix: --min_qscore + doc: threshold for minimum base quality score + + minimum_mutation_cell_fraction: + type: float? + inputBinding: + position: 0 + prefix: --minimum_mutation_cell_fraction + doc: >- + minimum fraction of cells which are presumed to have a mutation, used to + handle non-clonality and contamination + + minimum_normal_allele_fraction: + type: string[]? + inputBinding: + position: 0 + prefix: --minimum_normal_allele_fraction + doc: >- + minimum allele fraction to be considered in normal, useful for normal + sample contaminated with tumor + + monitorThreadEfficiency: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --monitorThreadEfficiency + doc: Enable GATK threading efficiency monitoring + + nonDeterministicRandomSeed: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --nonDeterministicRandomSeed + doc: >- + Makes the GATK behave non deterministically, that is, the random numbers + generated will be different in every run + + noop: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --noop + doc: used for debugging, basically exit as soon as we get the reads + + normal_depth_file: + type: string[]? + inputBinding: + position: 0 + prefix: --normal_depth_file + doc: write out normal read depth in WIGGLE format to this file + + normal_lod: + type: string[]? + inputBinding: + position: 0 + prefix: --normal_lod + doc: LOD threshold for calling normal non-germline + + normal_sample_name: + type: string + inputBinding: + position: 0 + prefix: --normal_sample_name + doc: name to use for normal in output files + + num_bam_file_handles: + type: string[]? + inputBinding: + position: 0 + prefix: --num_bam_file_handles + doc: The total number of BAM file handles to keep open simultaneously + + num_cpu_threads_per_data_thread: + type: string? + inputBinding: + position: 0 + prefix: --num_cpu_threads_per_data_thread + doc: >- + How many CPU threads should be allocated per data thread to running this + analysis? + + num_threads: + type: string? + inputBinding: + position: 0 + prefix: --num_threads + doc: How many data threads should be allocated to running this analysis. + + only_passing_calls: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --only_passing_calls + doc: only emit passing calls + + out: + type: + - 'null' + - string + - File + inputBinding: + position: 0 + prefix: --out + doc: Call-stats output + + pedigree: + type: string[]? + inputBinding: + position: 0 + prefix: --pedigree + doc: Pedigree files for samples + + pedigreeString: + type: string[]? + inputBinding: + position: 0 + prefix: --pedigreeString + doc: Pedigree string for samples + + pedigreeValidationType: + type: string[]? + inputBinding: + position: 0 + prefix: --pedigreeValidationType + doc: >- + How strict should we be in validating the pedigree information? + (STRICT|SILENT) + + performanceLog: + type: string[]? + inputBinding: + position: 0 + prefix: --performanceLog + doc: If provided, a GATK runtime performance log will be written to this file + + phone_home: + type: string[]? + inputBinding: + position: 0 + prefix: --phone_home + doc: >- + What kind of GATK run report should we generate? STANDARD is the default, + can be NO_ET so nothing is posted to the run repository. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + + pir_mad_threshold: + type: string[]? + inputBinding: + position: 0 + prefix: --pir_mad_threshold + doc: threshold for clustered read position artifact MAD + + pir_median_threshold: + type: string[]? + inputBinding: + position: 0 + prefix: --pir_median_threshold + doc: threshold for clustered read position artifact median + + power_constant_af: + type: string[]? + inputBinding: + position: 0 + prefix: --power_constant_af + doc: Allelic fraction constant to use in power calculations + + power_constant_qscore: + type: string[]? + inputBinding: + position: 0 + prefix: --power_constant_qscore + doc: Phred scale quality score constant to use in power calculations + + power_file: + type: string[]? + inputBinding: + position: 0 + prefix: --power_file + doc: write out power in WIGGLE format to this file + + preserve_qscores_less_than: + type: string[]? + inputBinding: + position: 0 + prefix: --preserve_qscores_less_than + doc: >- + Bases with quality scores less than this threshold wont be recalibrated + (with -BQSR) + + read_buffer_size: + type: string[]? + inputBinding: + position: 0 + prefix: --read_buffer_size + doc: Number of reads per SAM file to buffer in memory + + read_filter: + type: string? + inputBinding: + position: 0 + prefix: --read_filter + doc: Specify filtration criteria to apply to each read individually + + read_group_black_list: + type: string[]? + inputBinding: + position: 0 + prefix: --read_group_black_list + doc: >- + Filters out read groups matching - or a .txt file containing + the filter strings one per line. + + reference_sequence: + type: File + inputBinding: + position: 0 + prefix: --reference_sequence + secondaryFiles: [.fai, ^.dict] + + remove_program_records: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --remove_program_records + doc: >- + Should we override the Walkers default and remove program records from + the SAM header + + required_maximum_alt_allele_mapping_quality_score: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --required_maximum_alt_allele_mapping_quality_score + doc: >- + required minimum value for + tumor alt allele + maximum mapping quality score + + somatic_classification_normal_power_threshold: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --somatic_classification_normal_power_threshold + doc: >- + Power threshold for normal to + determine germline vs + variant + + tag: + type: string[]? + inputBinding: + position: 0 + prefix: --tag + doc: >- + Arbitrary tag string to identify this GATK run as part of a group of runs, + for later analysis + + tumor_depth_file: + type: string[]? + inputBinding: + position: 0 + prefix: --tumor_depth_file + doc: write out tumor read depth in WIGGLE format to this file + + tumor_f_pretest: + type: string[]? + inputBinding: + position: 0 + prefix: --tumor_f_pretest + doc: >- + for computational efficiency, reject sites with allelic fraction below + this threshold + + tumor_lod: + type: string[]? + inputBinding: + position: 0 + prefix: --tumor_lod + doc: LOD threshold for calling tumor variant + + tumor_sample_name: + type: string + inputBinding: + position: 0 + prefix: --tumor_sample_name + doc: name to use for tumor in output files + + unsafe: + type: string[]? + inputBinding: + position: 0 + prefix: --unsafe + doc: >- + If set, enables unsafe operations - nothing will be checked at runtime. + For expert users only who know what they are doing. We do not support + usage of this argument. (ALLOW_UNINDEXED_BAM| ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + + useOriginalQualities: + type: boolean? + default: false + inputBinding: + position: 0 + prefix: --useOriginalQualities + doc: >- + If set, use the original base quality scores from the OQ tag when present + instead of the standard scores + + validation_strictness: + type: string[]? + inputBinding: + position: 0 + prefix: --validation_strictness + doc: How strict should we be with validation (STRICT|LENIENT|SILENT) + + vcf: + type: string? + inputBinding: + position: 0 + prefix: --vcf + doc: VCF output of mutation candidates + + +outputs: + callstats_output: + type: File? + outputBinding: + glob: | + ${ + if (inputs.out) + return inputs.out; + return null; + } + + output: + type: File? + outputBinding: + glob: | + ${ + if (inputs.vcf) + return inputs.vcf; + return null; + } From 547e75b60d96c1da9b8d14084de05939886f87e7 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Thu, 26 Sep 2019 21:51:35 -0400 Subject: [PATCH 015/476] Testing if links work --- cwl_commandlinetools.py | 3 --- docs/SUMMARY.md | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 cwl_commandlinetools.py diff --git a/cwl_commandlinetools.py b/cwl_commandlinetools.py deleted file mode 100644 index 7fbbae4f..00000000 --- a/cwl_commandlinetools.py +++ /dev/null @@ -1,3 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Main module.""" diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 0658922d..e64877db 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,4 +1,4 @@ # Table of contents * [MSK-ACCESS command-line tools](README.md) - +* [ABRA2 2.17](../abra2_2.17/README.md) From 357159b1bd21ab514f1b3abb3f152d0eed123d6c Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Thu, 26 Sep 2019 22:16:27 -0400 Subject: [PATCH 016/476] :blue_book: Updating docs --- docs/SUMMARY.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e64877db..1d145bee 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,4 +1,26 @@ # Table of contents * [MSK-ACCESS command-line tools](README.md) -* [ABRA2 2.17](../abra2_2.17/README.md) + * Tools + * ABRA2 + * [v2.17](../abra2_2.17/README.md) + * [v2.19](../abra2_2.19/README.md) + * Bedtools + * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) +* [Disambiguate v1.0.0](../disambiguate_1.0.0/README.md) +* [GATK ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) +* [GATK BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) +* [Marianas Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) +* [Marianas Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) +* [Marianas Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) +* [Marianas Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) +* [Picard Tools AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) +* [Picard Tools CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) +* [Picard Tools FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) +* [Picard Tools MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) +* [Picard Tools MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) +* [Trim Galore v0.6.2](../trim_galore_0.6.2/README.md) +* [Ubuntu Utilities v18.04](../utilities_ubuntu_18.04/README.md) +* [Waltz CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) +* [Waltz PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) From a08f2dd012f7bbcbe1d7b8b10a2563e96956c183 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Thu, 26 Sep 2019 22:25:10 -0400 Subject: [PATCH 017/476] Update SUMMARY.md --- docs/SUMMARY.md | 52 +++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 1d145bee..7f785a4b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,26 +1,32 @@ # Table of contents * [MSK-ACCESS command-line tools](README.md) - * Tools - * ABRA2 - * [v2.17](../abra2_2.17/README.md) - * [v2.19](../abra2_2.19/README.md) - * Bedtools - * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) -* [Disambiguate v1.0.0](../disambiguate_1.0.0/README.md) -* [GATK ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) -* [GATK BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) -* [Marianas Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) -* [Marianas Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) -* [Marianas Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) -* [Marianas Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) -* [Picard Tools AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) -* [Picard Tools CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) -* [Picard Tools FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) -* [Picard Tools MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) -* [Picard Tools MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) -* [Trim Galore v0.6.2](../trim_galore_0.6.2/README.md) -* [Ubuntu Utilities v18.04](../utilities_ubuntu_18.04/README.md) -* [Waltz CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) -* [Waltz PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) + * ABRA2 + * [v2.17](../abra2_2.17/README.md) + * [v2.19](../abra2_2.19/README.md) + * Bedtools + * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + * Disambiguate + * [v1.0.0](../disambiguate_1.0.0/README.md) + * GATK + * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + * Marianas + * [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) + * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) + * [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) + * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + * Picard Tools + * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) + * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + * Trim Galore + * [v0.6.2](../trim_galore_0.6.2/README.md) + * Ubuntu utilites + * [v18.04](../utilities_ubuntu_18.04/README.md) + * Waltz + * [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) + * [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) From 2d117dbf01184975e622b2f0602a856402d71ca2 Mon Sep 17 00:00:00 2001 From: Yu Hu Date: Fri, 27 Sep 2019 12:35:28 -0400 Subject: [PATCH 018/476] add README --- mutect_1.1.5/README.md | 274 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 mutect_1.1.5/README.md diff --git a/mutect_1.1.5/README.md b/mutect_1.1.5/README.md new file mode 100644 index 00000000..cc3f748f --- /dev/null +++ b/mutect_1.1.5/README.md @@ -0,0 +1,274 @@ +# CWL and Dockerfile for running Mutect v1.1.5 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| openjdk:7 base image | 7 | - | +| muTect | 1.1.5 | https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir mutect_toil_log +> toil-cwl-runner --singularity --logFile /path/to/mutect_toil_log/cwltoil.log --jobStore /path/to/mutect_jobStore --batchSystem lsf --workDir /path/to/mutect_toil_log --outdir . --writeLogs /path/to/mutect_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml > mutect_toil.stdout 2> mutect_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner mutect_1.1.5.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --BQSR BQSR The input covariates table file which enables on-the- + fly base quality score recalibration + --absolute_copy_number_data ABSOLUTE_COPY_NUMBER_DATA + Absolute Copy Number Data, as defined by Absolute, to + use in power calculations + --arg_file ARG_FILE Reads arguments from the specified file + --bam_tumor_sample_name BAM_TUMOR_SAMPLE_NAME + if the tumor bam contains multiple samples, only use + read groups with SM equal to this value + --baq BAQ Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + --baqGapOpenPenalty BAQGAPOPENPENALTY + BAQ gap open penalty (Phred Scaled). Default value is + 40. 30 is perhaps better for whole genome call sets + --clipping_bias_pvalue_threshold CLIPPING_BIAS_PVALUE_THRESHOLD + pvalue threshold for fishers exact test of clipping + bias in mutant reads vs ref reads + --cosmic COSMIC VCF file of COSMIC sites + --coverage_20_q20_file COVERAGE_20_Q20_FILE + write out 20x of Q20 coverage in WIGGLE format to this + file + --coverage_file COVERAGE_FILE + write out coverage in WIGGLE format to this file + --dbsnp DBSNP VCF file of DBSNP information + --dbsnp_normal_lod DBSNP_NORMAL_LOD + LOD threshold for calling normal non-variant at dbsnp + sites + --defaultBaseQualities DEFAULTBASEQUALITIES + If reads are missing some or all base quality scores, + this value will be used for all base quality scores + --disableRandomization + Completely eliminates randomization from + nondeterministic methods. To be used mostly in the + testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + --disable_indel_quals + If true, disables printing of base insertion and base + deletion tags (with -BQSR) + --downsample_to_coverage DOWNSAMPLE_TO_COVERAGE + Target coverage threshold for downsampling to coverage + --downsampling_type DOWNSAMPLING_TYPE + Type of reads downsampling to employ at a given locus. + Reads will be selected randomly to be removed from the + pile based on the method described here + (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all + possible reads at a locus + --emit_original_quals + If true, enables printing of the OQ tag with the + original base qualities (with -BQSR) + --enable_extended_output + --excludeIntervals EXCLUDEINTERVALS + One or more genomic intervals to exclude from + processing. Can be explicitly specified on the command + line or in a file (including a rod file) + --filter_mismatching_base_and_quals + if a read has mismatching number of bases and base + qualities, filter out the read instead of blowing up. + --force_alleles force output for all alleles at each site + --force_output force output for each site + --fraction_contamination FRACTION_CONTAMINATION + estimate of fraction (0-1) of physical contamination + with other unrelated samples + --fraction_mapq0_threshold FRACTION_MAPQ0_THRESHOLD + threshold for determining if there is relatedness + between the alt and ref allele read piles + --gap_events_threshold GAP_EVENTS_THRESHOLD + how many gapped events (ins/del) are allowed in + proximity to this candidate + --gatk_key GATK_KEY GATK Key file. Required if running with -et NO_ET. + Please see -phone-home-and-how-does-it-affect- + me#latest for details. + --heavily_clipped_read_fraction HEAVILY_CLIPPED_READ_FRACTION + if this fraction or more of the bases in a read are + soft/hard clipped, do not use this read for mutation + calling + --initial_tumor_lod INITIAL_TUMOR_LOD + Initial LOD threshold for calling tumor variant + --input_file_normal INPUT_FILE_NORMAL + SAM or BAM file(s) + --input_file_tumor INPUT_FILE_TUMOR + SAM or BAM file(s) + --interval_merging INTERVAL_MERGING + Indicates the interval merging rule we should use for + abutting intervals (ALL| OVERLAPPING_ONLY) + --interval_padding INTERVAL_PADDING + Indicates how many basepairs of padding to include + around each of the intervals specified with the -L/ + --interval_set_rule INTERVAL_SET_RULE + Indicates the set merging approach the interval parser + should use to combine the various -L or -XL inputs + (UNION| INTERSECTION) + --java_7 JAVA_7 + --keep_program_records + Should we override the Walkers default and keep + program records from the SAM header + --log_to_file LOG_TO_FILE + Set the logging location + --logging_level LOGGING_LEVEL + Set the minimum level of logging, i.e. setting INFO + gets you INFO up to FATAL, setting ERROR gets you + ERROR and FATAL level logging. + --maxRuntime MAXRUNTIME + If provided, that GATK will stop execution cleanly as + soon after maxRuntime has been exceeded, truncating + the run but not exiting with a failure. By default the + value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + --maxRuntimeUnits MAXRUNTIMEUNITS + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + --max_alt_allele_in_normal_fraction MAX_ALT_ALLELE_IN_NORMAL_FRACTION + threshold for maximum alternate allele fraction in + normal + --max_alt_alleles_in_normal_count MAX_ALT_ALLELES_IN_NORMAL_COUNT + threshold for maximum alternate allele counts in + normal + --max_alt_alleles_in_normal_qscore_sum MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM + threshold for maximum alternate allele quality score + sum in normal + --min_qscore MIN_QSCORE + threshold for minimum base quality score + --minimum_mutation_cell_fraction MINIMUM_MUTATION_CELL_FRACTION + minimum fraction of cells which are presumed to have a + mutation, used to handle non-clonality and + contamination + --minimum_normal_allele_fraction MINIMUM_NORMAL_ALLELE_FRACTION + minimum allele fraction to be considered in normal, + useful for normal sample contaminated with tumor + --monitorThreadEfficiency + Enable GATK threading efficiency monitoring + --mutect MUTECT + --nonDeterministicRandomSeed + Makes the GATK behave non deterministically, that is, + the random numbers generated will be different in + every run + --noop used for debugging, basically exit as soon as we get + the reads + --normal_depth_file NORMAL_DEPTH_FILE + write out normal read depth in WIGGLE format to this + file + --normal_lod NORMAL_LOD + LOD threshold for calling normal non-germline + --normal_sample_name NORMAL_SAMPLE_NAME + name to use for normal in output files + --num_bam_file_handles NUM_BAM_FILE_HANDLES + The total number of BAM file handles to keep open + simultaneously + --num_cpu_threads_per_data_thread NUM_CPU_THREADS_PER_DATA_THREAD + How many CPU threads should be allocated per data + thread to running this analysis? + --num_threads NUM_THREADS + How many data threads should be allocated to running + this analysis. + --only_passing_calls only emit passing calls + --pedigree PEDIGREE Pedigree files for samples + --pedigreeString PEDIGREESTRING + Pedigree string for samples + --pedigreeValidationType PEDIGREEVALIDATIONTYPE + How strict should we be in validating the pedigree + information? (STRICT|SILENT) + --performanceLog PERFORMANCELOG + If provided, a GATK runtime performance log will be + written to this file + --phone_home PHONE_HOME + What kind of GATK run report should we generate? + STANDARD is the default, can be NO_ET so nothing is + posted to the run repository. Please see -phone-home- + and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + --pir_mad_threshold PIR_MAD_THRESHOLD + threshold for clustered read position artifact MAD + --pir_median_threshold PIR_MEDIAN_THRESHOLD + threshold for clustered read position artifact median + --power_constant_af POWER_CONSTANT_AF + Allelic fraction constant to use in power calculations + --power_constant_qscore POWER_CONSTANT_QSCORE + Phred scale quality score constant to use in power + calculations + --power_file POWER_FILE + write out power in WIGGLE format to this file + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + Bases with quality scores less than this threshold + wont be recalibrated (with -BQSR) + --read_buffer_size READ_BUFFER_SIZE + Number of reads per SAM file to buffer in memory + --read_filter READ_FILTER + Specify filtration criteria to apply to each read + individually + --read_group_black_list READ_GROUP_BLACK_LIST + Filters out read groups matching - or a + .txt file containing the filter strings one per line. + --reference_sequence REFERENCE_SEQUENCE + --remove_program_records + Should we override the Walkers default and remove + program records from the SAM header + --required_maximum_alt_allele_mapping_quality_score + required minimum value for + + tumor alt allele maximum mapping quality score + --somatic_classification_normal_power_threshold + Power threshold for normal to + + determine germline vs variant + --tag TAG Arbitrary tag string to identify this GATK run as part + of a group of runs, for later analysis + --tumor_depth_file TUMOR_DEPTH_FILE + write out tumor read depth in WIGGLE format to this + file + --tumor_f_pretest TUMOR_F_PRETEST + for computational efficiency, reject sites with + allelic fraction below this threshold + --tumor_lod TUMOR_LOD + LOD threshold for calling tumor variant + --tumor_sample_name TUMOR_SAMPLE_NAME + name to use for tumor in output files + --unsafe UNSAFE If set, enables unsafe operations - nothing will be + checked at runtime. For expert users only who know + what they are doing. We do not support usage of this + argument. (ALLOW_UNINDEXED_BAM| + ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| + ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + --useOriginalQualities + If set, use the original base quality scores from the + OQ tag when present instead of the standard scores + --validation_strictness VALIDATION_STRICTNESS + How strict should we be with validation + (STRICT|LENIENT|SILENT) + --vcf VCF VCF output of mutation candidates + + +``` From ac956a79ae12e339228e2e7db35d7ab4693159b0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 27 Sep 2019 15:32:04 -0400 Subject: [PATCH 019/476] :wrench: Docker, CWL :heavy_check_mark: Modified and created mskcc/mutect:0.1.0 :heavy_check_mark: Made CWL compatible with docker and added unbound memory arguments :heavy_check_mark: Removed Static -Xmx and -Xms option. Co-Authored-By: huyu335 --- mutect_1.1.5/container/Dockerfile | 4 +- mutect_1.1.5/mutect_1.1.5.cwl | 721 +++++++++++++++--------------- 2 files changed, 358 insertions(+), 367 deletions(-) diff --git a/mutect_1.1.5/container/Dockerfile b/mutect_1.1.5/container/Dockerfile index 7df7dfb7..6a87b817 100644 --- a/mutect_1.1.5/container/Dockerfile +++ b/mutect_1.1.5/container/Dockerfile @@ -4,7 +4,7 @@ FROM openjdk:7 ################## ARGUMENTS/Environments ########## ARG BUILD_DATE -ARG BUILD_VERSION=1.0.0 +ARG BUILD_VERSION=0.1.0 ARG MUTECT_VERSION=1.1.5 ################## METADATA ######################## @@ -28,6 +28,4 @@ RUN unzip muTect-${MUTECT_VERSION}-bin.zip RUN rm muTect-${MUTECT_VERSION}-bin.zip RUN chmod a+x muTect-${MUTECT_VERSION}.jar RUN mv muTect-${MUTECT_VERSION}.jar /usr/local/bin/muTect.jar -RUN java -version -RUN which java diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index f5220ab3..4998199d 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -1,749 +1,655 @@ - class: CommandLineTool cwlVersion: v1.0 - $namespaces: - dct: http://purl.org/dc/terms/ - doap: http://usefulinc.com/ns/doap# - foaf: http://xmlns.com/foaf/0.1/ - sbg: https://www.sevenbridges.com/ - -id: mutect_v1 - -arguments: - - $(inputs.java_7) - - -Xmx8g - - -Xms256m - - -XX:-UseGCOverheadLimit - - -jar - - $(inputs.mutect) - - --analysis_type - - MuTect - -requirements: - ResourceRequirement: - ramMin: 32000 - coresMin: 1 - outdirMax: 20000 - InlineJavascriptRequirement: {} - + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mutect_v1_1_5 +baseCommand: + - java inputs: - - java_7: string - mutect: string - - BQSR: - type: string[]? + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: BQSR + type: 'string[]?' inputBinding: position: 0 - prefix: --BQSR + prefix: '--BQSR' doc: >- The input covariates table file which enables on-the-fly base quality score recalibration - - absolute_copy_number_data: - type: string[]? + - id: absolute_copy_number_data + type: 'string[]?' inputBinding: position: 0 - prefix: --absolute_copy_number_data + prefix: '--absolute_copy_number_data' doc: >- Absolute Copy Number Data, as defined by Absolute, to use in power calculations - - arg_file: - type: string[]? + - id: arg_file + type: 'string[]?' inputBinding: position: 0 - prefix: --arg_file + prefix: '--arg_file' doc: Reads arguments from the specified file - - bam_tumor_sample_name: - type: string[]? + - id: bam_tumor_sample_name + type: 'string[]?' inputBinding: position: 0 - prefix: --bam_tumor_sample_name + prefix: '--bam_tumor_sample_name' doc: >- if the tumor bam contains multiple samples, only use read groups with SM equal to this value - - baq: - type: string[]? + - id: baq + type: 'string[]?' inputBinding: position: 0 - prefix: --baq + prefix: '--baq' doc: >- Type of BAQ calculation to apply in the engine (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) - - baqGapOpenPenalty: - type: string[]? + - id: baqGapOpenPenalty + type: 'string[]?' inputBinding: position: 0 - prefix: --baqGapOpenPenalty + prefix: '--baqGapOpenPenalty' doc: >- BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets - - clipping_bias_pvalue_threshold: - type: string[]? + - id: clipping_bias_pvalue_threshold + type: 'string[]?' inputBinding: position: 0 - prefix: --clipping_bias_pvalue_threshold + prefix: '--clipping_bias_pvalue_threshold' doc: >- pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads - - cosmic: + - id: cosmic type: File? inputBinding: position: 0 - prefix: --cosmic + prefix: '--cosmic' doc: VCF file of COSMIC sites - secondaryFiles: [^.vcf.idx] - - coverage_20_q20_file: - type: string[]? + secondaryFiles: + - ^.vcf.idx + - id: coverage_20_q20_file + type: 'string[]?' inputBinding: position: 0 - prefix: --coverage_20_q20_file + prefix: '--coverage_20_q20_file' doc: write out 20x of Q20 coverage in WIGGLE format to this file - - coverage_file: - type: string[]? + - id: coverage_file + type: 'string[]?' inputBinding: position: 0 - prefix: --coverage_file + prefix: '--coverage_file' doc: write out coverage in WIGGLE format to this file - - dbsnp: + - id: dbsnp type: File? inputBinding: position: 0 - prefix: --dbsnp + prefix: '--dbsnp' doc: VCF file of DBSNP information - secondaryFiles: [^.vcf.idx] - - dbsnp_normal_lod: - type: string[]? + secondaryFiles: + - ^.vcf.idx + - id: dbsnp_normal_lod + type: 'string[]?' inputBinding: position: 0 - prefix: --dbsnp_normal_lod + prefix: '--dbsnp_normal_lod' doc: LOD threshold for calling normal non-variant at dbsnp sites - - defaultBaseQualities: - type: string[]? + - id: defaultBaseQualities + type: 'string[]?' inputBinding: position: 0 - prefix: --defaultBaseQualities + prefix: '--defaultBaseQualities' doc: >- If reads are missing some or all base quality scores, this value will be used for all base quality scores - - disableRandomization: + - default: false + id: disableRandomization type: boolean? - default: false inputBinding: position: 0 - prefix: --disableRandomization + prefix: '--disableRandomization' doc: >- Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator. - - disable_indel_quals: + - default: false + id: disable_indel_quals type: boolean? - default: false inputBinding: position: 0 - prefix: --disable_indel_quals + prefix: '--disable_indel_quals' doc: >- If true, disables printing of base insertion and base deletion tags (with -BQSR) - - downsample_to_coverage: + - id: downsample_to_coverage type: int? inputBinding: position: 0 - prefix: --downsample_to_coverage + prefix: '--downsample_to_coverage' doc: Target coverage threshold for downsampling to coverage - - downsampling_type: + - default: NONE + id: downsampling_type type: string? - default: NONE inputBinding: position: 0 - prefix: --downsampling_type + prefix: '--downsampling_type' doc: >- Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here (NONE|ALL_READS| BY_SAMPLE) given locus; note that downsampled reads are randomly selected from all possible reads at a locus - - emit_original_quals: + - default: false + id: emit_original_quals type: boolean? - default: false inputBinding: position: 0 - prefix: --emit_original_quals + prefix: '--emit_original_quals' doc: >- If true, enables printing of the OQ tag with the original base qualities (with -BQSR) - - enable_extended_output: + - default: true + id: enable_extended_output type: boolean - default: true inputBinding: position: 0 - prefix: --enable_extended_output - - excludeIntervals: - type: string[]? + prefix: '--enable_extended_output' + - id: excludeIntervals + type: 'string[]?' inputBinding: position: 0 - prefix: --excludeIntervals + prefix: '--excludeIntervals' doc: >- One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file) - - filter_mismatching_base_and_quals: + - default: false + id: filter_mismatching_base_and_quals type: boolean? - default: false inputBinding: position: 0 - prefix: --filter_mismatching_base_and_quals + prefix: '--filter_mismatching_base_and_quals' doc: >- if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. - - force_alleles: + - default: false + id: force_alleles type: boolean? - default: false inputBinding: position: 0 - prefix: --force_alleles + prefix: '--force_alleles' doc: force output for all alleles at each site - - force_output: + - default: false + id: force_output type: boolean? - default: false inputBinding: position: 0 - prefix: --force_output + prefix: '--force_output' doc: force output for each site - - fraction_contamination: + - id: fraction_contamination type: float? inputBinding: position: 0 - prefix: --fraction_contamination + prefix: '--fraction_contamination' doc: >- estimate of fraction (0-1) of physical contamination with other unrelated samples - - fraction_mapq0_threshold: - type: string[]? + - id: fraction_mapq0_threshold + type: 'string[]?' inputBinding: position: 0 - prefix: --fraction_mapq0_threshold + prefix: '--fraction_mapq0_threshold' doc: >- threshold for determining if there is relatedness between the alt and ref allele read piles - - gap_events_threshold: - type: string[]? + - id: gap_events_threshold + type: 'string[]?' inputBinding: position: 0 - prefix: --gap_events_threshold + prefix: '--gap_events_threshold' doc: >- how many gapped events (ins/del) are allowed in proximity to this candidate - - gatk_key: - type: string[]? + - id: gatk_key + type: 'string[]?' inputBinding: position: 0 - prefix: --gatk_key + prefix: '--gatk_key' doc: >- GATK Key file. Required if running with -et NO_ET. Please see -phone-home-and-how-does-it-affect-me#latest for details. - - heavily_clipped_read_fraction: - type: string[]? + - id: heavily_clipped_read_fraction + type: 'string[]?' inputBinding: position: 0 - prefix: --heavily_clipped_read_fraction + prefix: '--heavily_clipped_read_fraction' doc: >- if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling - - initial_tumor_lod: - type: string[]? + - id: initial_tumor_lod + type: 'string[]?' inputBinding: position: 0 - prefix: --initial_tumor_lod + prefix: '--initial_tumor_lod' doc: Initial LOD threshold for calling tumor variant - - input_file_normal: + - id: input_file_normal type: File? inputBinding: position: 0 - prefix: --input_file:normal + prefix: '--input_file:normal' doc: SAM or BAM file(s) - secondaryFiles: [^.bai] - - input_file_tumor: + secondaryFiles: + - ^.bai + - id: input_file_tumor type: File? inputBinding: position: 0 - prefix: --input_file:tumor + prefix: '--input_file:tumor' doc: SAM or BAM file(s) - secondaryFiles: [^.bai] - - interval_merging: - type: string[]? + secondaryFiles: + - ^.bai + - id: interval_merging + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_merging + prefix: '--interval_merging' doc: >- Indicates the interval merging rule we should use for abutting intervals (ALL| OVERLAPPING_ONLY) - - interval_padding: - type: string[]? + - id: interval_padding + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_padding + prefix: '--interval_padding' doc: >- Indicates how many basepairs of padding to include around each of the intervals specified with the -L/ - - interval_set_rule: - type: string[]? + - id: interval_set_rule + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_set_rule + prefix: '--interval_set_rule' doc: >- Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs (UNION| INTERSECTION) - - intervals: + - id: intervals type: - string - File - 'null' inputBinding: position: 0 - prefix: --intervals + prefix: '--intervals' doc: >- One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file) - - keep_program_records: + - default: false + id: keep_program_records type: boolean? - default: false inputBinding: position: 0 - prefix: --keep_program_records + prefix: '--keep_program_records' doc: >- Should we override the Walkers default and keep program records from the SAM header - - log_to_file: - type: string[]? + - id: log_to_file + type: 'string[]?' inputBinding: position: 0 - prefix: --log_to_file + prefix: '--log_to_file' doc: Set the logging location - - logging_level: - type: string[]? + - id: logging_level + type: 'string[]?' inputBinding: position: 0 - prefix: --logging_level + prefix: '--logging_level' doc: >- Set the minimum level of logging, i.e. setting INFO gets you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. - - maxRuntime: - type: string[]? + - id: maxRuntime + type: 'string[]?' inputBinding: position: 0 - prefix: --maxRuntime + prefix: '--maxRuntime' doc: >- If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits - - maxRuntimeUnits: - type: string[]? + - id: maxRuntimeUnits + type: 'string[]?' inputBinding: position: 0 - prefix: --maxRuntimeUnits + prefix: '--maxRuntimeUnits' doc: >- The TimeUnit for maxRuntime (NANOSECONDS| MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) - - max_alt_allele_in_normal_fraction: - type: string[]? + - id: max_alt_allele_in_normal_fraction + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_allele_in_normal_fraction + prefix: '--max_alt_allele_in_normal_fraction' doc: threshold for maximum alternate allele fraction in normal - - max_alt_alleles_in_normal_count: - type: string[]? + - id: max_alt_alleles_in_normal_count + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_alleles_in_normal_count + prefix: '--max_alt_alleles_in_normal_count' doc: threshold for maximum alternate allele counts in normal - - max_alt_alleles_in_normal_qscore_sum: - type: string[]? + - id: max_alt_alleles_in_normal_qscore_sum + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_alleles_in_normal_qscore_sum + prefix: '--max_alt_alleles_in_normal_qscore_sum' doc: threshold for maximum alternate allele quality score sum in normal - - min_qscore: - type: string[]? + - id: min_qscore + type: 'string[]?' inputBinding: position: 0 - prefix: --min_qscore + prefix: '--min_qscore' doc: threshold for minimum base quality score - - minimum_mutation_cell_fraction: + - id: minimum_mutation_cell_fraction type: float? inputBinding: position: 0 - prefix: --minimum_mutation_cell_fraction + prefix: '--minimum_mutation_cell_fraction' doc: >- minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination - - minimum_normal_allele_fraction: - type: string[]? + - id: minimum_normal_allele_fraction + type: 'string[]?' inputBinding: position: 0 - prefix: --minimum_normal_allele_fraction + prefix: '--minimum_normal_allele_fraction' doc: >- minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor - - monitorThreadEfficiency: + - default: false + id: monitorThreadEfficiency type: boolean? - default: false inputBinding: position: 0 - prefix: --monitorThreadEfficiency + prefix: '--monitorThreadEfficiency' doc: Enable GATK threading efficiency monitoring - - nonDeterministicRandomSeed: + - default: false + id: nonDeterministicRandomSeed type: boolean? - default: false inputBinding: position: 0 - prefix: --nonDeterministicRandomSeed + prefix: '--nonDeterministicRandomSeed' doc: >- Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run - - noop: + - default: false + id: noop type: boolean? - default: false inputBinding: position: 0 - prefix: --noop - doc: used for debugging, basically exit as soon as we get the reads - - normal_depth_file: - type: string[]? + prefix: '--noop' + doc: 'used for debugging, basically exit as soon as we get the reads' + - id: normal_depth_file + type: 'string[]?' inputBinding: position: 0 - prefix: --normal_depth_file + prefix: '--normal_depth_file' doc: write out normal read depth in WIGGLE format to this file - - normal_lod: - type: string[]? + - id: normal_lod + type: 'string[]?' inputBinding: position: 0 - prefix: --normal_lod + prefix: '--normal_lod' doc: LOD threshold for calling normal non-germline - - normal_sample_name: + - id: normal_sample_name type: string inputBinding: position: 0 - prefix: --normal_sample_name + prefix: '--normal_sample_name' doc: name to use for normal in output files - - num_bam_file_handles: - type: string[]? + - id: num_bam_file_handles + type: 'string[]?' inputBinding: position: 0 - prefix: --num_bam_file_handles + prefix: '--num_bam_file_handles' doc: The total number of BAM file handles to keep open simultaneously - - num_cpu_threads_per_data_thread: + - id: num_cpu_threads_per_data_thread type: string? inputBinding: position: 0 - prefix: --num_cpu_threads_per_data_thread + prefix: '--num_cpu_threads_per_data_thread' doc: >- How many CPU threads should be allocated per data thread to running this analysis? - - num_threads: + - id: num_threads type: string? inputBinding: position: 0 - prefix: --num_threads + prefix: '--num_threads' doc: How many data threads should be allocated to running this analysis. - - only_passing_calls: + - default: false + id: only_passing_calls type: boolean? - default: false inputBinding: position: 0 - prefix: --only_passing_calls + prefix: '--only_passing_calls' doc: only emit passing calls - - out: + - id: out type: - - 'null' - string - File + - 'null' inputBinding: position: 0 - prefix: --out + prefix: '--out' doc: Call-stats output - - pedigree: - type: string[]? + - id: pedigree + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigree + prefix: '--pedigree' doc: Pedigree files for samples - - pedigreeString: - type: string[]? + - id: pedigreeString + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigreeString + prefix: '--pedigreeString' doc: Pedigree string for samples - - pedigreeValidationType: - type: string[]? + - id: pedigreeValidationType + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigreeValidationType + prefix: '--pedigreeValidationType' doc: >- How strict should we be in validating the pedigree information? (STRICT|SILENT) - - performanceLog: - type: string[]? + - id: performanceLog + type: 'string[]?' inputBinding: position: 0 - prefix: --performanceLog - doc: If provided, a GATK runtime performance log will be written to this file - - phone_home: - type: string[]? + prefix: '--performanceLog' + doc: 'If provided, a GATK runtime performance log will be written to this file' + - id: phone_home + type: 'string[]?' inputBinding: position: 0 - prefix: --phone_home + prefix: '--phone_home' doc: >- What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see -phone-home-and-how-does-it-affect-me#latest for details. (NO_ET|STANDARD|STDOUT) - - pir_mad_threshold: - type: string[]? + - id: pir_mad_threshold + type: 'string[]?' inputBinding: position: 0 - prefix: --pir_mad_threshold + prefix: '--pir_mad_threshold' doc: threshold for clustered read position artifact MAD - - pir_median_threshold: - type: string[]? + - id: pir_median_threshold + type: 'string[]?' inputBinding: position: 0 - prefix: --pir_median_threshold + prefix: '--pir_median_threshold' doc: threshold for clustered read position artifact median - - power_constant_af: - type: string[]? + - id: power_constant_af + type: 'string[]?' inputBinding: position: 0 - prefix: --power_constant_af + prefix: '--power_constant_af' doc: Allelic fraction constant to use in power calculations - - power_constant_qscore: - type: string[]? + - id: power_constant_qscore + type: 'string[]?' inputBinding: position: 0 - prefix: --power_constant_qscore + prefix: '--power_constant_qscore' doc: Phred scale quality score constant to use in power calculations - - power_file: - type: string[]? + - id: power_file + type: 'string[]?' inputBinding: position: 0 - prefix: --power_file + prefix: '--power_file' doc: write out power in WIGGLE format to this file - - preserve_qscores_less_than: - type: string[]? + - id: preserve_qscores_less_than + type: 'string[]?' inputBinding: position: 0 - prefix: --preserve_qscores_less_than + prefix: '--preserve_qscores_less_than' doc: >- Bases with quality scores less than this threshold wont be recalibrated (with -BQSR) - - read_buffer_size: - type: string[]? + - id: read_buffer_size + type: 'string[]?' inputBinding: position: 0 - prefix: --read_buffer_size + prefix: '--read_buffer_size' doc: Number of reads per SAM file to buffer in memory - - read_filter: + - id: read_filter type: string? inputBinding: position: 0 - prefix: --read_filter + prefix: '--read_filter' doc: Specify filtration criteria to apply to each read individually - - read_group_black_list: - type: string[]? + - id: read_group_black_list + type: 'string[]?' inputBinding: position: 0 - prefix: --read_group_black_list + prefix: '--read_group_black_list' doc: >- Filters out read groups matching - or a .txt file containing the filter strings one per line. - - reference_sequence: + - id: reference_sequence type: File inputBinding: position: 0 - prefix: --reference_sequence - secondaryFiles: [.fai, ^.dict] - - remove_program_records: + prefix: '--reference_sequence' + secondaryFiles: + - .fai + - ^.dict + - default: false + id: remove_program_records type: boolean? - default: false inputBinding: position: 0 - prefix: --remove_program_records + prefix: '--remove_program_records' doc: >- - Should we override the Walkers default and remove program records from - the SAM header - - required_maximum_alt_allele_mapping_quality_score: + Should we override the Walkers default and remove program records from the + SAM header + - default: false + id: required_maximum_alt_allele_mapping_quality_score type: boolean? - default: false inputBinding: position: 0 - prefix: --required_maximum_alt_allele_mapping_quality_score + prefix: '--required_maximum_alt_allele_mapping_quality_score' doc: >- required minimum value for tumor alt allele maximum mapping quality score - - somatic_classification_normal_power_threshold: + - default: false + id: somatic_classification_normal_power_threshold type: boolean? - default: false inputBinding: position: 0 - prefix: --somatic_classification_normal_power_threshold + prefix: '--somatic_classification_normal_power_threshold' doc: >- Power threshold for normal to determine germline vs variant - - tag: - type: string[]? + - id: tag + type: 'string[]?' inputBinding: position: 0 - prefix: --tag + prefix: '--tag' doc: >- Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis - - tumor_depth_file: - type: string[]? + - id: tumor_depth_file + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_depth_file + prefix: '--tumor_depth_file' doc: write out tumor read depth in WIGGLE format to this file - - tumor_f_pretest: - type: string[]? + - id: tumor_f_pretest + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_f_pretest + prefix: '--tumor_f_pretest' doc: >- for computational efficiency, reject sites with allelic fraction below this threshold - - tumor_lod: - type: string[]? + - id: tumor_lod + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_lod + prefix: '--tumor_lod' doc: LOD threshold for calling tumor variant - - tumor_sample_name: + - id: tumor_sample_name type: string inputBinding: position: 0 - prefix: --tumor_sample_name + prefix: '--tumor_sample_name' doc: name to use for tumor in output files - - unsafe: - type: string[]? + - id: unsafe + type: 'string[]?' inputBinding: position: 0 - prefix: --unsafe + prefix: '--unsafe' doc: >- If set, enables unsafe operations - nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument. (ALLOW_UNINDEXED_BAM| ALLOW_UNSET_BAM_SORT_ORDER| NO_READ_ORDER_VERIFICATION| ALLOW_SEQ_DICT_INCOMPATIBILITY| LENIENT_VCF_PROCESSING|ALL) - - useOriginalQualities: + - default: false + id: useOriginalQualities type: boolean? - default: false inputBinding: position: 0 - prefix: --useOriginalQualities + prefix: '--useOriginalQualities' doc: >- If set, use the original base quality scores from the OQ tag when present instead of the standard scores - - validation_strictness: - type: string[]? + - id: validation_strictness + type: 'string[]?' inputBinding: position: 0 - prefix: --validation_strictness + prefix: '--validation_strictness' doc: How strict should we be with validation (STRICT|LENIENT|SILENT) - - vcf: + - id: vcf type: string? inputBinding: position: 0 - prefix: --vcf + prefix: '--vcf' doc: VCF output of mutation candidates - - outputs: - callstats_output: + - id: mutect_output_txt type: File? outputBinding: glob: | @@ -752,8 +658,7 @@ outputs: return inputs.out; return null; } - - output: + - id: mutext_output_vcf type: File? outputBinding: glob: | @@ -762,3 +667,91 @@ outputs: return inputs.vcf; return null; } +arguments: + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx1G" + } + else { + return "-Xmx1G" + } + } + - '-XX:-UseGCOverheadLimit' + - '-jar' + - /usr/local/bin/muTect.jar + - '--analysis_type' + - MuTect +requirements: + - class: ResourceRequirement + ramMin: 34000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskcc/mutect:0.1.0' + - class: InlineJavascriptRequirement + 'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:huy1@mskcc.org' + 'foaf:name': Yu Hu + 'foaf:name': Memorial Sloan Kettering Cancer Center + 'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:huy1@mskcc.org' + 'foaf:name': Yu Hu + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': MuTect + 'doap:revision': 1.1.5 From 81e060e07e1e2b017c157ba65a2083691ecbe868 Mon Sep 17 00:00:00 2001 From: Yu Hu Date: Tue, 1 Oct 2019 14:12:24 -0400 Subject: [PATCH 020/476] tested after adding docker image --- mutect_1.1.5/example_inputs.yaml | 2 - mutect_1.1.5/mutect_1.1.5.cwl | 353 ++++++++++++++++--------------- 2 files changed, 181 insertions(+), 174 deletions(-) diff --git a/mutect_1.1.5/example_inputs.yaml b/mutect_1.1.5/example_inputs.yaml index 687bed6f..cd5ea18b 100644 --- a/mutect_1.1.5/example_inputs.yaml +++ b/mutect_1.1.5/example_inputs.yaml @@ -30,5 +30,3 @@ vcf: output.mutect.vcf tmp_dir: /path/to/tmp/dir -java_7: "/usr/bin/java" -mutect: "/path/to/muTect-1.1.5.jar" diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index 4998199d..207e6a5c 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -1,13 +1,18 @@ + class: CommandLineTool cwlVersion: v1.0 + $namespaces: - dct: 'http://purl.org/dc/terms/' - doap: 'http://usefulinc.com/ns/doap#' - foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + id: mutect_v1_1_5 + baseCommand: - java + inputs: - id: memory_per_job type: int? @@ -18,56 +23,56 @@ inputs: - id: number_of_threads type: int? - id: BQSR - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--BQSR' + prefix: --BQSR doc: >- The input covariates table file which enables on-the-fly base quality score recalibration - id: absolute_copy_number_data - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--absolute_copy_number_data' + prefix: --absolute_copy_number_data doc: >- Absolute Copy Number Data, as defined by Absolute, to use in power calculations - id: arg_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--arg_file' + prefix: --arg_file doc: Reads arguments from the specified file - id: bam_tumor_sample_name - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--bam_tumor_sample_name' + prefix: --bam_tumor_sample_name doc: >- if the tumor bam contains multiple samples, only use read groups with SM equal to this value - id: baq - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--baq' + prefix: --baq doc: >- Type of BAQ calculation to apply in the engine (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) - id: baqGapOpenPenalty - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--baqGapOpenPenalty' + prefix: --baqGapOpenPenalty doc: >- BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets - id: clipping_bias_pvalue_threshold - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--clipping_bias_pvalue_threshold' + prefix: --clipping_bias_pvalue_threshold doc: >- pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads @@ -75,41 +80,41 @@ inputs: type: File? inputBinding: position: 0 - prefix: '--cosmic' + prefix: --cosmic doc: VCF file of COSMIC sites secondaryFiles: - ^.vcf.idx - id: coverage_20_q20_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--coverage_20_q20_file' + prefix: --coverage_20_q20_file doc: write out 20x of Q20 coverage in WIGGLE format to this file - id: coverage_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--coverage_file' + prefix: --coverage_file doc: write out coverage in WIGGLE format to this file - id: dbsnp type: File? inputBinding: position: 0 - prefix: '--dbsnp' + prefix: --dbsnp doc: VCF file of DBSNP information secondaryFiles: - ^.vcf.idx - id: dbsnp_normal_lod - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--dbsnp_normal_lod' + prefix: --dbsnp_normal_lod doc: LOD threshold for calling normal non-variant at dbsnp sites - id: defaultBaseQualities - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--defaultBaseQualities' + prefix: --defaultBaseQualities doc: >- If reads are missing some or all base quality scores, this value will be used for all base quality scores @@ -118,7 +123,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--disableRandomization' + prefix: --disableRandomization doc: >- Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result @@ -128,7 +133,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--disable_indel_quals' + prefix: --disable_indel_quals doc: >- If true, disables printing of base insertion and base deletion tags (with -BQSR) @@ -136,14 +141,14 @@ inputs: type: int? inputBinding: position: 0 - prefix: '--downsample_to_coverage' + prefix: --downsample_to_coverage doc: Target coverage threshold for downsampling to coverage - default: NONE id: downsampling_type type: string? inputBinding: position: 0 - prefix: '--downsampling_type' + prefix: --downsampling_type doc: >- Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method @@ -154,7 +159,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--emit_original_quals' + prefix: --emit_original_quals doc: >- If true, enables printing of the OQ tag with the original base qualities (with -BQSR) @@ -163,12 +168,12 @@ inputs: type: boolean inputBinding: position: 0 - prefix: '--enable_extended_output' + prefix: --enable_extended_output - id: excludeIntervals - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--excludeIntervals' + prefix: --excludeIntervals doc: >- One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod @@ -178,7 +183,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--filter_mismatching_base_and_quals' + prefix: --filter_mismatching_base_and_quals doc: >- if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. @@ -187,66 +192,66 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--force_alleles' + prefix: --force_alleles doc: force output for all alleles at each site - default: false id: force_output type: boolean? inputBinding: position: 0 - prefix: '--force_output' + prefix: --force_output doc: force output for each site - id: fraction_contamination type: float? inputBinding: position: 0 - prefix: '--fraction_contamination' + prefix: --fraction_contamination doc: >- estimate of fraction (0-1) of physical contamination with other unrelated samples - id: fraction_mapq0_threshold - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--fraction_mapq0_threshold' + prefix: --fraction_mapq0_threshold doc: >- threshold for determining if there is relatedness between the alt and ref allele read piles - id: gap_events_threshold - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--gap_events_threshold' + prefix: --gap_events_threshold doc: >- how many gapped events (ins/del) are allowed in proximity to this candidate - id: gatk_key - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--gatk_key' + prefix: --gatk_key doc: >- GATK Key file. Required if running with -et NO_ET. Please see -phone-home-and-how-does-it-affect-me#latest for details. - id: heavily_clipped_read_fraction - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--heavily_clipped_read_fraction' + prefix: --heavily_clipped_read_fraction doc: >- if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling - id: initial_tumor_lod - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--initial_tumor_lod' + prefix: --initial_tumor_lod doc: Initial LOD threshold for calling tumor variant - id: input_file_normal type: File? inputBinding: position: 0 - prefix: '--input_file:normal' + prefix: --input_file:normal doc: SAM or BAM file(s) secondaryFiles: - ^.bai @@ -254,31 +259,31 @@ inputs: type: File? inputBinding: position: 0 - prefix: '--input_file:tumor' + prefix: --input_file:tumor doc: SAM or BAM file(s) secondaryFiles: - ^.bai - id: interval_merging - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--interval_merging' + prefix: --interval_merging doc: >- Indicates the interval merging rule we should use for abutting intervals (ALL| OVERLAPPING_ONLY) - id: interval_padding - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--interval_padding' + prefix: --interval_padding doc: >- Indicates how many basepairs of padding to include around each of the intervals specified with the -L/ - id: interval_set_rule - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--interval_set_rule' + prefix: --interval_set_rule doc: >- Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs (UNION| INTERSECTION) @@ -289,7 +294,7 @@ inputs: - 'null' inputBinding: position: 0 - prefix: '--intervals' + prefix: --intervals doc: >- One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file) @@ -298,79 +303,79 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--keep_program_records' + prefix: --keep_program_records doc: >- Should we override the Walkers default and keep program records from the SAM header - id: log_to_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--log_to_file' + prefix: --log_to_file doc: Set the logging location - id: logging_level - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--logging_level' + prefix: --logging_level doc: >- Set the minimum level of logging, i.e. setting INFO gets you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. - id: maxRuntime - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--maxRuntime' + prefix: --maxRuntime doc: >- If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits - id: maxRuntimeUnits - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--maxRuntimeUnits' + prefix: --maxRuntimeUnits doc: >- The TimeUnit for maxRuntime (NANOSECONDS| MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) - id: max_alt_allele_in_normal_fraction - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--max_alt_allele_in_normal_fraction' + prefix: --max_alt_allele_in_normal_fraction doc: threshold for maximum alternate allele fraction in normal - id: max_alt_alleles_in_normal_count - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--max_alt_alleles_in_normal_count' + prefix: --max_alt_alleles_in_normal_count doc: threshold for maximum alternate allele counts in normal - id: max_alt_alleles_in_normal_qscore_sum - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--max_alt_alleles_in_normal_qscore_sum' + prefix: --max_alt_alleles_in_normal_qscore_sum doc: threshold for maximum alternate allele quality score sum in normal - id: min_qscore - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--min_qscore' + prefix: --min_qscore doc: threshold for minimum base quality score - id: minimum_mutation_cell_fraction type: float? inputBinding: position: 0 - prefix: '--minimum_mutation_cell_fraction' + prefix: --minimum_mutation_cell_fraction doc: >- minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination - id: minimum_normal_allele_fraction - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--minimum_normal_allele_fraction' + prefix: --minimum_normal_allele_fraction doc: >- minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor @@ -379,14 +384,14 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--monitorThreadEfficiency' + prefix: --monitorThreadEfficiency doc: Enable GATK threading efficiency monitoring - default: false id: nonDeterministicRandomSeed type: boolean? inputBinding: position: 0 - prefix: '--nonDeterministicRandomSeed' + prefix: --nonDeterministicRandomSeed doc: >- Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run @@ -395,37 +400,37 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--noop' - doc: 'used for debugging, basically exit as soon as we get the reads' + prefix: --noop + doc: used for debugging, basically exit as soon as we get the reads - id: normal_depth_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--normal_depth_file' + prefix: --normal_depth_file doc: write out normal read depth in WIGGLE format to this file - id: normal_lod - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--normal_lod' + prefix: --normal_lod doc: LOD threshold for calling normal non-germline - id: normal_sample_name type: string inputBinding: position: 0 - prefix: '--normal_sample_name' + prefix: --normal_sample_name doc: name to use for normal in output files - id: num_bam_file_handles - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--num_bam_file_handles' + prefix: --num_bam_file_handles doc: The total number of BAM file handles to keep open simultaneously - id: num_cpu_threads_per_data_thread type: string? inputBinding: position: 0 - prefix: '--num_cpu_threads_per_data_thread' + prefix: --num_cpu_threads_per_data_thread doc: >- How many CPU threads should be allocated per data thread to running this analysis? @@ -433,14 +438,14 @@ inputs: type: string? inputBinding: position: 0 - prefix: '--num_threads' + prefix: --num_threads doc: How many data threads should be allocated to running this analysis. - default: false id: only_passing_calls type: boolean? inputBinding: position: 0 - prefix: '--only_passing_calls' + prefix: --only_passing_calls doc: only emit passing calls - id: out type: @@ -449,99 +454,99 @@ inputs: - 'null' inputBinding: position: 0 - prefix: '--out' + prefix: --out doc: Call-stats output - id: pedigree - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--pedigree' + prefix: --pedigree doc: Pedigree files for samples - id: pedigreeString - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--pedigreeString' + prefix: --pedigreeString doc: Pedigree string for samples - id: pedigreeValidationType - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--pedigreeValidationType' + prefix: --pedigreeValidationType doc: >- How strict should we be in validating the pedigree information? (STRICT|SILENT) - id: performanceLog - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--performanceLog' - doc: 'If provided, a GATK runtime performance log will be written to this file' + prefix: --performanceLog + doc: If provided, a GATK runtime performance log will be written to this file - id: phone_home - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--phone_home' + prefix: --phone_home doc: >- What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see -phone-home-and-how-does-it-affect-me#latest for details. (NO_ET|STANDARD|STDOUT) - id: pir_mad_threshold - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--pir_mad_threshold' + prefix: --pir_mad_threshold doc: threshold for clustered read position artifact MAD - id: pir_median_threshold - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--pir_median_threshold' + prefix: --pir_median_threshold doc: threshold for clustered read position artifact median - id: power_constant_af - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--power_constant_af' + prefix: --power_constant_af doc: Allelic fraction constant to use in power calculations - id: power_constant_qscore - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--power_constant_qscore' + prefix: --power_constant_qscore doc: Phred scale quality score constant to use in power calculations - id: power_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--power_file' + prefix: --power_file doc: write out power in WIGGLE format to this file - id: preserve_qscores_less_than - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--preserve_qscores_less_than' + prefix: --preserve_qscores_less_than doc: >- Bases with quality scores less than this threshold wont be recalibrated (with -BQSR) - id: read_buffer_size - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--read_buffer_size' + prefix: --read_buffer_size doc: Number of reads per SAM file to buffer in memory - id: read_filter type: string? inputBinding: position: 0 - prefix: '--read_filter' + prefix: --read_filter doc: Specify filtration criteria to apply to each read individually - id: read_group_black_list - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--read_group_black_list' + prefix: --read_group_black_list doc: >- Filters out read groups matching - or a .txt file containing the filter strings one per line. @@ -549,7 +554,7 @@ inputs: type: File inputBinding: position: 0 - prefix: '--reference_sequence' + prefix: --reference_sequence secondaryFiles: - .fai - ^.dict @@ -558,7 +563,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--remove_program_records' + prefix: --remove_program_records doc: >- Should we override the Walkers default and remove program records from the SAM header @@ -567,7 +572,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--required_maximum_alt_allele_mapping_quality_score' + prefix: --required_maximum_alt_allele_mapping_quality_score doc: >- required minimum value for tumor alt allele @@ -577,50 +582,50 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--somatic_classification_normal_power_threshold' + prefix: --somatic_classification_normal_power_threshold doc: >- Power threshold for normal to determine germline vs variant - id: tag - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--tag' + prefix: --tag doc: >- Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis - id: tumor_depth_file - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--tumor_depth_file' + prefix: --tumor_depth_file doc: write out tumor read depth in WIGGLE format to this file - id: tumor_f_pretest - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--tumor_f_pretest' + prefix: --tumor_f_pretest doc: >- for computational efficiency, reject sites with allelic fraction below this threshold - id: tumor_lod - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--tumor_lod' + prefix: --tumor_lod doc: LOD threshold for calling tumor variant - id: tumor_sample_name type: string inputBinding: position: 0 - prefix: '--tumor_sample_name' + prefix: --tumor_sample_name doc: name to use for tumor in output files - id: unsafe - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--unsafe' + prefix: --unsafe doc: >- If set, enables unsafe operations - nothing will be checked at runtime. For expert users only who know what they are doing. We do not support @@ -632,22 +637,23 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: '--useOriginalQualities' + prefix: --useOriginalQualities doc: >- If set, use the original base quality scores from the OQ tag when present instead of the standard scores - id: validation_strictness - type: 'string[]?' + type: string[]? inputBinding: position: 0 - prefix: '--validation_strictness' + prefix: --validation_strictness doc: How strict should we be with validation (STRICT|LENIENT|SILENT) - id: vcf type: string? inputBinding: position: 0 - prefix: '--vcf' + prefix: --vcf doc: VCF output of mutation candidates + outputs: - id: mutect_output_txt type: File? @@ -656,7 +662,7 @@ outputs: ${ if (inputs.out) return inputs.out; - return null; + return 'null'; } - id: mutext_output_vcf type: File? @@ -665,8 +671,9 @@ outputs: ${ if (inputs.vcf) return inputs.vcf; - return null; + return 'null'; } + arguments: - |- ${ @@ -718,40 +725,42 @@ arguments: return "-Xmx1G" } } - - '-XX:-UseGCOverheadLimit' - - '-jar' + - -XX:-UseGCOverheadLimit + - -jar - /usr/local/bin/muTect.jar - - '--analysis_type' + - --analysis_type - MuTect + requirements: - class: ResourceRequirement ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/mutect:0.1.0' + dockerPull: mskcc/mutect:0.1.0 - class: InlineJavascriptRequirement - 'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:huy1@mskcc.org' - 'foaf:name': Yu Hu - 'foaf:name': Memorial Sloan Kettering Cancer Center - 'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah - 'foaf:name': Memorial Sloan Kettering Cancer Center -'dct:creator': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:huy1@mskcc.org' - 'foaf:name': Yu Hu - 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': MuTect - 'doap:revision': 1.1.5 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:huy1@mskcc.org + foaf:name: Yu Hu + foaf:name: Memorial Sloan Kettering Cancer Center +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:shahr2@mskcc.org + foaf:name: Ronak Shah + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:huy1@mskcc.org + foaf:name: Yu Hu + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: MuTect + doap:revision: 1.1.5 From f97d02d2069ed8326f7885dba07da02e221dd847 Mon Sep 17 00:00:00 2001 From: Yu Hu Date: Thu, 3 Oct 2019 19:27:40 -0400 Subject: [PATCH 021/476] add default output file names --- mutect_1.1.5/mutect_1.1.5.cwl | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index 207e6a5c..d277766d 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -448,10 +448,8 @@ inputs: prefix: --only_passing_calls doc: only emit passing calls - id: out - type: - - string - - File - - 'null' + default: 'mutect_variants.txt' + type: string? inputBinding: position: 0 prefix: --out @@ -649,6 +647,7 @@ inputs: doc: How strict should we be with validation (STRICT|LENIENT|SILENT) - id: vcf type: string? + default: 'mutect_variants.vcf' inputBinding: position: 0 prefix: --vcf @@ -658,21 +657,11 @@ outputs: - id: mutect_output_txt type: File? outputBinding: - glob: | - ${ - if (inputs.out) - return inputs.out; - return 'null'; - } + glob: $(inputs.out) - id: mutext_output_vcf type: File? outputBinding: - glob: | - ${ - if (inputs.vcf) - return inputs.vcf; - return 'null'; - } + glob: $(inputs.vcf) arguments: - |- From 18e43ef870d5a01bbeaa73f010d128bdb40b9b8d Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Tue, 15 Oct 2019 22:10:42 -0400 Subject: [PATCH 022/476] Update SUMMARY.md --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 7f785a4b..cd63b5d3 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -17,6 +17,8 @@ * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) * [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + * MuTect + * [MuTect 1.1.5](../mutect_1.1.5/README.md) * Picard Tools * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) From ccf346511cb3d93a5712575b829debf31e4c81de Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Tue, 15 Oct 2019 22:59:16 -0400 Subject: [PATCH 023/476] Chainging setup files --- .travis.yml | 4 +--- setup.cfg | 2 +- setup.py | 2 +- tests/test_cwl_commandlinetools.py | 18 ++---------------- 4 files changed, 5 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index df237ae4..9771f62c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,6 @@ language: python python: - 3.7 - - 3.6 - - 3.5 - 2.7 # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors @@ -22,7 +20,7 @@ deploy: distributions: sdist bdist_wheel user: msk-access password: - secure: PLEASE_REPLACE_ME + secure: ithmgopowELxskUKR2LAi/cMnet6J+PH4emAOiJ57rBqFkwq8lVXmT/NW8D7k2VoJdTguB9v+RH+Q/91ShJh1VnwbRQ1bX7Ecr7P7O35DIQBp40Eqg5AH6ap8se37dsufnNyVNhj8vNtusd65jC3+6w4kQkcCfysD5eByUDDw04FNYCbhbXIn4S/JJ/EJUFFb8ElNzo5iWpVPnfP9uddYxpXTEgWZBo+TylZsa1gxMYUqio5xa34TMkUzN5N4tooe0C1uZ6H9/zRzh2pLwoof1ITf3wws/FOJkPwsJFZWviC+0K1pNJTaTY77kHEEC5W8/JupvdShrFg+BzldLHWsGuyC+Mfj3bjJFTqyDnbthc0kosQ3x7UdTKzyztL+gekdCcNEsusN10VTdO0DYkUnYTc1YWpsC+ORQxPGANg+RrvjO8lGobvQ4ZlhcWBJ1ynlvntlg+b0iHzuOntznwZGTJRcTqtjsH+zzIEo12FiWnJwjvM78OkEsNp5XYaKgYryhQBIo5Uqy79h7wtF8mAnIRrsK5cyQmYTJtWZ+OIQbuzj+l9o+Ff19hykD7LEB8I9So8240w09+HoNxpZKYMQFndGGZnC1wkoZNdBEOjn9Az9ZouHO7qkFHZVSp3rqSAEqiG8fm3TTl/5VAO9RNEKT5vmZMDhl04F6LXZYGCLKU= on: tags: true repo: msk-access/cwl_commandlinetools diff --git a/setup.cfg b/setup.cfg index f07ef97f..55e8ea2b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.0 +current_version = 0.5.0 commit = True tag = True diff --git a/setup.py b/setup.py index 4c0ba280..8a6ce75f 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl_commandlinetools', - version='0.4.0', + version='0.5.0', zip_safe=False, ) diff --git a/tests/test_cwl_commandlinetools.py b/tests/test_cwl_commandlinetools.py index 12359623..9ce7b51e 100644 --- a/tests/test_cwl_commandlinetools.py +++ b/tests/test_cwl_commandlinetools.py @@ -6,20 +6,6 @@ import pytest -from cwl_commandlinetools import cwl_commandlinetools - - -@pytest.fixture -def response(): - """Sample pytest fixture. - - See more at: http://doc.pytest.org/en/latest/fixture.html - """ - # import requests - # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') - - -def test_content(response): +def test_content(): """Sample pytest test function with the pytest fixture as an argument.""" - # from bs4 import BeautifulSoup - # assert 'GitHub' in BeautifulSoup(response.content).title.string + print("All good") From e1611b11e94aa2fc204e9f6186112080b3354056 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Tue, 15 Oct 2019 23:10:45 -0400 Subject: [PATCH 024/476] :wrench: Making dev changes --- .travis.yml | 2 +- requirements.txt | 20 ++++++++++++++++++++ requirements_dev.txt | 11 ----------- requirements_p2.7.txt | 14 ++++++++++++++ setup.py | 2 +- tox.ini | 19 ++++++++++++------- 6 files changed, 48 insertions(+), 20 deletions(-) create mode 100644 requirements.txt delete mode 100644 requirements_dev.txt create mode 100644 requirements_p2.7.txt diff --git a/.travis.yml b/.travis.yml index 9771f62c..67ceb861 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,4 +24,4 @@ deploy: on: tags: true repo: msk-access/cwl_commandlinetools - python: 3.7 + python: 2.7 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..151708e9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +toil-ionox0[cwl]==0.0.7 +pytz +typing==3.7.4 + +# From fixing pkg_resources.ContextualVersionConflict: +ruamel.yaml==0.15.77 + +# From requirements_dev +pip==18.1 +bumpversion==0.5.3 +wheel==0.32.1 +watchdog==0.9.0 +flake8==3.5.0 +tox==3.5.2 +coverage==4.5.1 +Sphinx==1.8.1 +twine==1.12.1 +pytest==3.8.2 +pytest-runner==4.2 +coloredlogs==10.0.0 diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index ca3c21ef..00000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,11 +0,0 @@ -pip==19.2.3 -bump2version==0.5.11 -wheel==0.33.6 -watchdog==0.9.0 -flake8==3.7.8 -tox==3.14.0 -coverage==4.5.4 -twine==1.14.0 - -pytest==4.6.5 -pytest-runner==5.1 diff --git a/requirements_p2.7.txt b/requirements_p2.7.txt new file mode 100644 index 00000000..24958578 --- /dev/null +++ b/requirements_p2.7.txt @@ -0,0 +1,14 @@ +toil-ionox0[cwl]==0.0.7 +pytz==2019.2 +typing==3.7.4.1 +ruamel.yaml==0.15.77 +bumpversion==0.5.3 +watchdog==0.9.0 +flake8==3.7.8 +tox==3.14.0 +coverage==4.5.4 +twine==1.15.0 +pytest==4.6.5 +pytest-runner==5.1 +coloredlogs==10.0 +pathlib2==2.3.2 diff --git a/setup.py b/setup.py index 8a6ce75f..079a4466 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ description="Central location for CWL CommandLineTools", install_requires=requirements, license="Apache Software License 2.0", - long_description=readme + '\n\n' + history, + long_description=readme + '\n\n', include_package_data=True, keywords='cwl_commandlinetools', name='cwl_commandlinetools', diff --git a/tox.ini b/tox.ini index 978754f0..29dc8a5e 100644 --- a/tox.ini +++ b/tox.ini @@ -11,15 +11,20 @@ basepython = python deps = flake8 commands = flake8 cwl_commandlinetools -[testenv] +[testenv:py37] setenv = PYTHONPATH = {toxinidir} deps = - -r{toxinidir}/requirements_dev.txt -; If you want to make tox run the tests with the same versions, create a -; requirements.txt with the pinned versions and uncomment the following line: -; -r{toxinidir}/requirements.txt + -r{toxinidir}/requirements.txt + commands = - pip install -U pip - pytest --basetemp={envtmpdir} + py.test --capture=sys --basetemp={envtmpdir} tests + +[testenv:py27] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_p2.7.txt +commands = + py.test --capture=sys --basetemp={envtmpdir} tests \ No newline at end of file From eecc79d5c0c5330c7def8d520bf1caafde3c1395 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Tue, 15 Oct 2019 23:17:19 -0400 Subject: [PATCH 025/476] :blue_book: Update README --- README.md | 3 +++ docs/README.md | 2 ++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index 64957357..a4dfe1f7 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,9 @@ Clone the repository: git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` +**Follow the README in repsective tool folder for execution of the tool.** + + ## Credits This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. diff --git a/docs/README.md b/docs/README.md index 25e14729..48ea23a6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -23,6 +23,8 @@ Clone the repository: git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` +**Follow the README in repsective tool folder for execution of the tool.** + ## Credits This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. From 26ee64b9c7c36129cebc600aa5968a03c76ae8d1 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Tue, 15 Oct 2019 23:19:56 -0400 Subject: [PATCH 026/476] :blue_book: Update URL --- README.md | 2 +- docs/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a4dfe1f7..1dbb86ca 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ description: Central location for storing common workflow language based command [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) - Free software: Apache Software License 2.0 -- Documentation: https://msk-access.gitbook.io/cwl-commandlinetools +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) ## Features diff --git a/docs/README.md b/docs/README.md index 48ea23a6..3545e793 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,7 +9,7 @@ description: >- [![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) * Free software: Apache Software License 2.0 -* Documentation: [https://msk-access.gitbook.io/cwl-commandlinetools](https://msk-access.gitbook.io/cwl-commandlinetools) +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) ## Features From 2477bfbf9ac41d1b7a940d2355b8648d1ef512b0 Mon Sep 17 00:00:00 2001 From: "rons.shah@gmail.com" Date: Wed, 16 Oct 2019 00:36:07 -0400 Subject: [PATCH 027/476] Modified all to 0.6.0 --- MANIFEST.in | 2 +- abra2_2.17/abra2_2.17.cwl | 2 +- abra2_2.19/abra2_2.19.cwl | 5 +---- .../marianas_second_pass.cwl | 2 +- .../marianas_process_loop_umi.cwl | 2 +- .../marianas_separate_bams_1.8.1.cwl | 2 +- mutect_1.1.5/mutect_1.1.5.cwl | 2 +- .../picard_add_or_replace_read_groups_1.96.cwl | 2 +- .../picard_collect_alignment_summary_metrics_2.8.1.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 2 +- picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl | 2 +- .../picard_mark_duplicates_2.8.1.cwl | 2 +- setup.cfg | 2 +- setup.py | 2 +- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 2 +- waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl | 2 +- waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl | 2 +- 17 files changed, 17 insertions(+), 20 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index d7a58ae3..9fcfa9f1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,4 +5,4 @@ recursive-include tests * recursive-exclude * __pycache__ recursive-exclude * *.py[co] -recursive-include docs *.jpg *.png *.gif +recursive-include docs *.jpg *.png *.gif *.cwl diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index f89895fd..3245082d 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -183,7 +183,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.1.0' + dockerPull: 'mskaccess/abra2:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl index 8c9be47a..da93b6dc 100644 --- a/abra2_2.19/abra2_2.19.cwl +++ b/abra2_2.19/abra2_2.19.cwl @@ -173,7 +173,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.2.0' + dockerPull: 'mskaccess/abra2:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -193,6 +193,3 @@ requirements: - class: 'doap:Version' 'doap:name': abra2 'doap:revision': 2.19 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 7975c3c2..6f976867 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -141,7 +141,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'mskaccess/marianas:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 616ddb01..63d01ce1 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -124,7 +124,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'mskaccess/marianas:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index 64dd863f..72b5f82f 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'mskaccess/marianas:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index d277766d..e7435e4b 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -725,7 +725,7 @@ requirements: ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: mskcc/mutect:0.1.0 + dockerPull: mskaccess/mutect:0.6.0 - class: InlineJavascriptRequirement dct:contributor: diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 091ebc34..e7a75e8b 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -190,7 +190,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index 23ceec08..1fd9ad6c 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -156,7 +156,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'mskaccess/picard:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index cfc74168..ac3bc1c3 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -139,7 +139,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index f6b19306..d83bb12d 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -115,7 +115,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index ccc6165a..bf25c199 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -153,7 +153,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'mskaccess/picard:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/setup.cfg b/setup.cfg index 55e8ea2b..77e1ef29 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.5.0 +current_version = 0.6.0 commit = True tag = True diff --git a/setup.py b/setup.py index 079a4466..6006f13f 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl_commandlinetools', - version='0.5.0', + version='0.6.0', zip_safe=False, ) diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 4f2d83b9..bfdd6572 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -162,7 +162,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskcc/trim_galore:0.1.0' + dockerPull: 'mskaccess/trim_galore:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index b06321c7..2ffb545c 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -78,7 +78,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'mskaccess/waltz:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 65a73de9..c65a54ca 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -90,7 +90,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'mskcc/waltz:0.6.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 80fd7344eac511d49222de9bfc8c6730b117cc39 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 16 Oct 2019 14:14:05 -0400 Subject: [PATCH 028/476] Bumping the minor version --- .travis.yml | 2 +- __init__.py | 2 +- abra2_2.17/abra2_2.17.cwl | 2 +- abra2_2.19/abra2_2.19.cwl | 2 +- cwl_commandlinetools/__init__.py | 2 +- marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl | 2 +- marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl | 2 +- marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl | 2 +- marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl | 2 +- mutect_1.1.5/mutect_1.1.5.cwl | 2 +- .../picard_add_or_replace_read_groups_1.96.cwl | 2 +- .../picard_collect_alignment_summary_metrics_2.8.1.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 2 +- picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl | 2 +- picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl | 2 +- setup.cfg | 2 +- setup.py | 2 +- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 2 +- waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl | 2 +- waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.travis.yml b/.travis.yml index 67ceb861..8c0c649e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,5 +23,5 @@ deploy: secure: ithmgopowELxskUKR2LAi/cMnet6J+PH4emAOiJ57rBqFkwq8lVXmT/NW8D7k2VoJdTguB9v+RH+Q/91ShJh1VnwbRQ1bX7Ecr7P7O35DIQBp40Eqg5AH6ap8se37dsufnNyVNhj8vNtusd65jC3+6w4kQkcCfysD5eByUDDw04FNYCbhbXIn4S/JJ/EJUFFb8ElNzo5iWpVPnfP9uddYxpXTEgWZBo+TylZsa1gxMYUqio5xa34TMkUzN5N4tooe0C1uZ6H9/zRzh2pLwoof1ITf3wws/FOJkPwsJFZWviC+0K1pNJTaTY77kHEEC5W8/JupvdShrFg+BzldLHWsGuyC+Mfj3bjJFTqyDnbthc0kosQ3x7UdTKzyztL+gekdCcNEsusN10VTdO0DYkUnYTc1YWpsC+ORQxPGANg+RrvjO8lGobvQ4ZlhcWBJ1ynlvntlg+b0iHzuOntznwZGTJRcTqtjsH+zzIEo12FiWnJwjvM78OkEsNp5XYaKgYryhQBIo5Uqy79h7wtF8mAnIRrsK5cyQmYTJtWZ+OIQbuzj+l9o+Ff19hykD7LEB8I9So8240w09+HoNxpZKYMQFndGGZnC1wkoZNdBEOjn9Az9ZouHO7qkFHZVSp3rqSAEqiG8fm3TTl/5VAO9RNEKT5vmZMDhl04F6LXZYGCLKU= on: tags: true - repo: msk-access/cwl_commandlinetools + repo: msk-access/cwl-commandlinetools python: 2.7 diff --git a/__init__.py b/__init__.py index 846adb87..2f2edd13 100644 --- a/__init__.py +++ b/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.4.0' +__version__ = '0.6.1' diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index 3245082d..4aca77b7 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -183,7 +183,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/abra2:0.6.0' + dockerPull: 'mskaccess/abra2:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl index da93b6dc..a623c3c2 100644 --- a/abra2_2.19/abra2_2.19.cwl +++ b/abra2_2.19/abra2_2.19.cwl @@ -173,7 +173,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/abra2:0.6.0' + dockerPull: 'mskaccess/abra2_2.19:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index 69862367..2f2edd13 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.1.0' +__version__ = '0.6.1' diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index df5a19f0..8954b32a 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -137,7 +137,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'mskaccess/marianas:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 6f976867..4fdd46f3 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -141,7 +141,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.0' + dockerPull: 'mskaccess/marianas:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 63d01ce1..5b7a7f95 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -124,7 +124,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.0' + dockerPull: 'mskaccess/marianas:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index 72b5f82f..55e52b00 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.0' + dockerPull: 'mskaccess/marianas:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index e7435e4b..222e8714 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -725,7 +725,7 @@ requirements: ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: mskaccess/mutect:0.6.0 + dockerPull: mskaccess/mutect:0.6.1 - class: InlineJavascriptRequirement dct:contributor: diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index e7a75e8b..dd8f32de 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -190,7 +190,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.0' + dockerPull: 'mskaccess/picard_1.96:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index 1fd9ad6c..6bce80ac 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -156,7 +156,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.0' + dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index ac3bc1c3..bfcdc7fe 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -139,7 +139,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.0' + dockerPull: 'mskaccess/picard_1.96:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index d83bb12d..4c45a2a7 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -115,7 +115,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.0' + dockerPull: 'mskaccess/picard_1.96:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index bf25c199..8ad50ddf 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -153,7 +153,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.0' + dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/setup.cfg b/setup.cfg index 77e1ef29..8334c8a8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.0 +current_version = 0.6.1 commit = True tag = True diff --git a/setup.py b/setup.py index 6006f13f..874763b6 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl_commandlinetools', - version='0.6.0', + version='0.6.1', zip_safe=False, ) diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index bfdd6572..7bac7988 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -162,7 +162,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskaccess/trim_galore:0.6.0' + dockerPull: 'mskaccess/trim_galore:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index 2ffb545c..3a6a7d70 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -78,7 +78,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.0' + dockerPull: 'mskaccess/waltz:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index c65a54ca..4449ee4e 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -90,7 +90,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:0.6.0' + dockerPull: 'mskcc/waltz:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 11a601e3ecde4ef713152ba8760b5bd1704b6f4e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 17 Oct 2019 22:24:53 -0400 Subject: [PATCH 029/476] :wrench: Repo Cleanup for 0.7.0 --- .gitignore | 4 ---- MANIFEST.in | 3 ++- README.md | 2 +- SUMMARY.md | 4 ---- __init__.py | 2 +- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 11 ++++------- 8 files changed, 10 insertions(+), 20 deletions(-) delete mode 100644 SUMMARY.md diff --git a/.gitignore b/.gitignore index e2391c52..063f3b15 100644 --- a/.gitignore +++ b/.gitignore @@ -111,7 +111,3 @@ venv.bak/ # vscode .vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 9fcfa9f1..405d5d7e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,8 @@ include LICENSE include README.md recursive-include tests * +recursive-include *.cwl recursive-exclude * __pycache__ recursive-exclude * *.py[co] -recursive-include docs *.jpg *.png *.gif *.cwl +recursive-include docs *.jpg *.png *.gif diff --git a/README.md b/README.md index 98bc7e36..15a9cabe 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ description: Central location for storing common workflow language based command # MSK-ACCESS command-line tools [![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) -[![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) +[![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) diff --git a/SUMMARY.md b/SUMMARY.md deleted file mode 100644 index 5da1733d..00000000 --- a/SUMMARY.md +++ /dev/null @@ -1,4 +0,0 @@ -# Table of contents - -* [Initial page](README.md) - diff --git a/__init__.py b/__init__.py index 2f2edd13..616ed899 100644 --- a/__init__.py +++ b/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.1' +__version__ = '0.7.0' diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index 2f2edd13..616ed899 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.1' +__version__ = '0.7.0' diff --git a/setup.cfg b/setup.cfg index 8334c8a8..c0972676 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.1 +current_version = 0.7.0 commit = True tag = True diff --git a/setup.py b/setup.py index 874763b6..cf158915 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,6 @@ with open('README.md') as readme_file: readme = readme_file.read() -#with open('HISTORY.rst') as history_file: -# history = history_file.read() - requirements = [ ] setup_requirements = ['pytest-runner', ] @@ -34,13 +31,13 @@ license="Apache Software License 2.0", long_description=readme + '\n\n', include_package_data=True, - keywords='cwl_commandlinetools', - name='cwl_commandlinetools', + keywords='cwl-commandlinetools', + name='cwl-commandlinetools', packages=find_packages(include=['cwl_commandlinetools', 'cwl_commandlinetools.*']), setup_requires=setup_requirements, test_suite='tests', tests_require=test_requirements, - url='https://github.com/msk-access/cwl_commandlinetools', - version='0.6.1', + url='https://github.com/msk-access/cwl-commandlinetools', + version='0.7.0', zip_safe=False, ) From 5159c5fe1756da8b1b3cf37f7ae11b8dc898b65b Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Tue, 22 Oct 2019 17:17:19 -0400 Subject: [PATCH 030/476] initiating the CWL-tool for Picard CollectMultipleMetrics --- .../picard_collectmultiplemetrics_2-8-1.cwl | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl new file mode 100644 index 00000000..9c9ed32d --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -0,0 +1,200 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: all_metrics + type: Directory + outputBinding: + glob: . + outputEval: | + ${ + self[0].basename = inputs.output_file_name + '_picard_metrics'; + return self[0] + } +label: picard_collectmultiplemetrices_2.8.1 +arguments: + - position: 0 + prefix: '' + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + prefix: '' + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" + coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 + - class: 'doap:Version' + 'doap:name': cwl-wrapper + 'doap:revision': 1.0.0 From 3ce1c0184eeea2eac97d28d43e3f7e1d86c621a9 Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Tue, 22 Oct 2019 17:41:09 -0400 Subject: [PATCH 031/476] Optimizing the code to run Picard CollectMultipleMetrics --- .../picard_collectmultiplemetrics_2-8-1.cwl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index 9c9ed32d..afb5d72c 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -45,6 +45,7 @@ inputs: inputBinding: position: 0 prefix: AS=true + separate: false - id: bam_compression_level type: int? inputBinding: @@ -61,6 +62,7 @@ inputs: inputBinding: position: 0 prefix: CREATE_INDEX=true + separate: false doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default @@ -69,6 +71,8 @@ inputs: type: int? inputBinding: position: 0 + prefix: STOP_AFTER= + separate: false doc: >- Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. @@ -77,6 +81,7 @@ inputs: inputBinding: position: 0 prefix: METRIC_ACCUMULATION_LEVEL= + separate: false doc: >- The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible @@ -88,6 +93,7 @@ inputs: inputBinding: position: 0 prefix: FILE_EXTENSION= + separate: false doc: >- Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null Default value: null. @@ -96,6 +102,7 @@ inputs: inputBinding: position: 0 prefix: PROGRAM= + separate: false doc: >- Set of metrics programs to apply during the pass through the SAM file. Default value: [CollectAlignmentSummaryMetrics, @@ -113,6 +120,7 @@ inputs: inputBinding: position: 0 prefix: INTERVALS= + separate: false doc: >- An optional list of intervals to restrict analysis to. Only pertains to some of the PROGRAMs. Programs whose stand-alone CLP does not have an @@ -123,6 +131,7 @@ inputs: inputBinding: position: 0 prefix: DB_SNP= + separate: false doc: >- VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this @@ -132,6 +141,7 @@ inputs: inputBinding: position: 0 prefix: INCLUDE_UNPAIRED=true + separate: false doc: >- Include unpaired reads in CollectSequencingArtifactMetrics. If set to true then all paired reads will be included as well - MINIMUM_INSERT_SIZE and @@ -143,10 +153,15 @@ outputs: type: Directory outputBinding: glob: . - outputEval: | + outputEval: |- ${ - self[0].basename = inputs.output_file_name + '_picard_metrics'; + if(inputs.output_file_name){ + self[0].basename = inputs.output_file_name + '_picard_metrics'; + return self[0] + } else { + self[0].basename = 'picard_metrics'; return self[0] + } } label: picard_collectmultiplemetrices_2.8.1 arguments: From d11b0a3fd7034eafc70841aadc3017bf4e568bbc Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 24 Oct 2019 11:35:45 -0400 Subject: [PATCH 032/476] Picard hsmetrics tool added --- picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl new file mode 100644 index 00000000..23acfe90 --- /dev/null +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -0,0 +1,10 @@ +$namespaces: + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2.8.1 +label: picard_hsmetrics_2.8.1 +class: CommandLineTool +cwlVersion: v1.0 +inputs: [] +outputs: [] +baseCommand: '' +doc: '' From 5f541c7b42f758470e8718b2685e1737bc091f30 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 24 Oct 2019 11:36:53 -0400 Subject: [PATCH 033/476] add new tool (forgot to save in rabix) --- .../picard_hsmetrics_2.8.1.cwl | 142 +++++++++++++++++- 1 file changed, 135 insertions(+), 7 deletions(-) diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index 23acfe90..80c2fa08 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -1,10 +1,138 @@ +class: CommandLineTool +cwlVersion: v1.0 $namespaces: sbg: 'https://www.sevenbridges.com/' -id: picard_hsmetrics_2.8.1 +id: picard_hsmetrics_2_8_1 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false +outputs: + - id: hs_metrics_file + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } label: picard_hsmetrics_2.8.1 -class: CommandLineTool -cwlVersion: v1.0 -inputs: [] -outputs: [] -baseCommand: '' -doc: '' +arguments: + - position: 0 + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + prefix: '' + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskcc/picard:2.8.1' + - class: InlineJavascriptRequirement From b34c10dfa125bf32b36df0a7c6ab1edbf91947db Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 25 Oct 2019 10:10:57 -0400 Subject: [PATCH 034/476] README and updates to hsmetrics cwl --- picard_hsmetrics_2.8.1/README.md | 26 +++++++++++++++++++ .../picard_hsmetrics_2.8.1.cwl | 21 ++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 picard_hsmetrics_2.8.1/README.md diff --git a/picard_hsmetrics_2.8.1/README.md b/picard_hsmetrics_2.8.1/README.md new file mode 100644 index 00000000..d10b8ca4 --- /dev/null +++ b/picard_hsmetrics_2.8.1/README.md @@ -0,0 +1,26 @@ +# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics + +## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | +| R | 3.3.3 | r-base for opnejdk:8 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.8.1.cwl [-h] +``` diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index 80c2fa08..5b16797e 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -134,5 +134,24 @@ requirements: ramMin: 4000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'msk-access/picard:0.6.1' - class: InlineJavascriptRequirement + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 \ No newline at end of file From 4307c9c80aa9b47c63b1823e7a89ab19ae07feb7 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 25 Oct 2019 10:13:13 -0400 Subject: [PATCH 035/476] update SUMMARY docs --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cd63b5d3..d93428e9 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -23,6 +23,7 @@ * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + * [hsmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) * Trim Galore From a7be8c3cd338d61b65b652e33e141aa8d54e93d0 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 25 Oct 2019 10:16:25 -0400 Subject: [PATCH 036/476] remove empty argument --- picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index 5b16797e..bc4341dc 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -112,7 +112,6 @@ outputs: } label: picard_hsmetrics_2.8.1 arguments: - - position: 0 - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/picard.jar @@ -136,7 +135,6 @@ requirements: - class: DockerRequirement dockerPull: 'msk-access/picard:0.6.1' - class: InlineJavascriptRequirement - 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': @@ -154,4 +152,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 2.8.1 \ No newline at end of file + 'doap:revision': 2.8.1 From 1af786aaf3cb6bf845767be9a4711d938671eb1b Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 25 Oct 2019 10:19:59 -0400 Subject: [PATCH 037/476] correct Docker Hub URL (mskaccess instead of msk-access) --- picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index bc4341dc..058f9753 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -133,7 +133,7 @@ requirements: ramMin: 4000 coresMin: 1 - class: DockerRequirement - dockerPull: 'msk-access/picard:0.6.1' + dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From bc62254d0b8bb87c9dc6a8026d06fee1abe4c8f0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 25 Oct 2019 11:03:52 -0400 Subject: [PATCH 038/476] Update SUMMARY.md --- docs/SUMMARY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d93428e9..531fd9fb 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -23,7 +23,7 @@ * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - * [hsmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + * [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) * Trim Galore From 654a1577e65e057a367779bb8de8d20cbd04012b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 25 Oct 2019 11:43:17 -0400 Subject: [PATCH 039/476] Making the template --- merge_fastq_0.1.6/README.md | 0 merge_fastq_0.1.6/container/Dockerfile | 0 merge_fastq_0.1.6/merge_fastq_0.1.6.cwl | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 merge_fastq_0.1.6/README.md create mode 100644 merge_fastq_0.1.6/container/Dockerfile create mode 100644 merge_fastq_0.1.6/merge_fastq_0.1.6.cwl diff --git a/merge_fastq_0.1.6/README.md b/merge_fastq_0.1.6/README.md new file mode 100644 index 00000000..e69de29b diff --git a/merge_fastq_0.1.6/container/Dockerfile b/merge_fastq_0.1.6/container/Dockerfile new file mode 100644 index 00000000..e69de29b diff --git a/merge_fastq_0.1.6/merge_fastq_0.1.6.cwl b/merge_fastq_0.1.6/merge_fastq_0.1.6.cwl new file mode 100644 index 00000000..e69de29b From 06fe7e6acfa20dc35e60de6132888fb6867d34bf Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 25 Oct 2019 11:59:31 -0400 Subject: [PATCH 040/476] Update Dockerfile --- merge_fastq_0.1.6/container/Dockerfile | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/merge_fastq_0.1.6/container/Dockerfile b/merge_fastq_0.1.6/container/Dockerfile index e69de29b..c3de33aa 100644 --- a/merge_fastq_0.1.6/container/Dockerfile +++ b/merge_fastq_0.1.6/container/Dockerfile @@ -0,0 +1,37 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:18.04 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG UBUNTU_VERSION=18.04 +ARG LICENSE="Apache-2.0" +ARG MERGE_FASTQ_VERSION=0.1.6 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.trimgalore=${MERGE_FASTQ_VERSION} \ + org.opencontainers.image.source.cutadapt="https://pypi.org/project/merge-fastq/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ + cutadpat version ${MERGE_FASTQ_VERSION}" + +################## INSTALL ########################## + +#Ubuntu +RUN apt-get update && \ + apt-get --no-install-recommends install -y build-essential python3 python3-pip python3-dev wget zip unzip default-jre && \ + apt-get clean && apt-get purge && apt autoremove && apt-get remove --yes --purge build-essential && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +#MERGE_FASTQ +RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} From d79b4852e0708206818387a63776ab2fbaae4011 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 25 Oct 2019 15:20:41 -0400 Subject: [PATCH 041/476] Modifying to 0.1.7 --- {merge_fastq_0.1.6 => merge_fastq_0.1.7}/README.md | 0 {merge_fastq_0.1.6 => merge_fastq_0.1.7}/container/Dockerfile | 2 +- .../merge_fastq_0.1.7.cwl | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {merge_fastq_0.1.6 => merge_fastq_0.1.7}/README.md (100%) rename {merge_fastq_0.1.6 => merge_fastq_0.1.7}/container/Dockerfile (98%) rename merge_fastq_0.1.6/merge_fastq_0.1.6.cwl => merge_fastq_0.1.7/merge_fastq_0.1.7.cwl (100%) diff --git a/merge_fastq_0.1.6/README.md b/merge_fastq_0.1.7/README.md similarity index 100% rename from merge_fastq_0.1.6/README.md rename to merge_fastq_0.1.7/README.md diff --git a/merge_fastq_0.1.6/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile similarity index 98% rename from merge_fastq_0.1.6/container/Dockerfile rename to merge_fastq_0.1.7/container/Dockerfile index c3de33aa..e5f3321e 100644 --- a/merge_fastq_0.1.6/container/Dockerfile +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -8,7 +8,7 @@ ARG BUILD_DATE ARG BUILD_VERSION ARG UBUNTU_VERSION=18.04 ARG LICENSE="Apache-2.0" -ARG MERGE_FASTQ_VERSION=0.1.6 +ARG MERGE_FASTQ_VERSION=0.1.7 ARG VCS_REF ################## METADATA ######################## LABEL org.opencontainers.image.vendor="MSKCC" diff --git a/merge_fastq_0.1.6/merge_fastq_0.1.6.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl similarity index 100% rename from merge_fastq_0.1.6/merge_fastq_0.1.6.cwl rename to merge_fastq_0.1.7/merge_fastq_0.1.7.cwl From 752d05ce794703742c575435b3e441f562cd5c83 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 25 Oct 2019 15:59:17 -0400 Subject: [PATCH 042/476] Update Dockerfile --- merge_fastq_0.1.7/container/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile index e5f3321e..5a2adbb3 100644 --- a/merge_fastq_0.1.7/container/Dockerfile +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -29,9 +29,10 @@ LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_ #Ubuntu RUN apt-get update && \ - apt-get --no-install-recommends install -y build-essential python3 python3-pip python3-dev wget zip unzip default-jre && \ + apt-get --no-install-recommends install -y build-essential python3.7 python3-pip python3-dev wget zip unzip default-jre && \ apt-get clean && apt-get purge && apt autoremove && apt-get remove --yes --purge build-essential && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* #MERGE_FASTQ RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} + From c35f7c8e007dd68226df27f4755bb221637cead6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 13:33:53 -0400 Subject: [PATCH 043/476] Changing Docker File --- merge_fastq_0.1.7/container/Dockerfile | 16 +++----- merge_fastq_0.1.7/container/Dockerfile_ubuntu | 38 +++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 merge_fastq_0.1.7/container/Dockerfile_ubuntu diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile index 5a2adbb3..290d8dea 100644 --- a/merge_fastq_0.1.7/container/Dockerfile +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -1,12 +1,12 @@ ################## BASE IMAGE ###################### -FROM ubuntu:18.04 +FROM python:3 ################## ARGUMENTS/Environments ########## ARG BUILD_DATE ARG BUILD_VERSION -ARG UBUNTU_VERSION=18.04 +ARG PYTHON_VERSION=3 ARG LICENSE="Apache-2.0" ARG MERGE_FASTQ_VERSION=0.1.7 ARG VCS_REF @@ -18,21 +18,15 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.version=${BUILD_VERSION} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.trimgalore=${MERGE_FASTQ_VERSION} \ - org.opencontainers.image.source.cutadapt="https://pypi.org/project/merge-fastq/" \ + org.opencontainers.image.source.merge_fastq="https://pypi.org/project/merge-fastq/" \ org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ org.opencontainers.image.vcs-ref=${VCS_REF} -LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ - cutadpat version ${MERGE_FASTQ_VERSION}" +LABEL org.opencontainers.image.description="This container uses python ${PYTHON_VERSION} as the base image to build \ + merge fastq version ${MERGE_FASTQ_VERSION}" ################## INSTALL ########################## -#Ubuntu -RUN apt-get update && \ - apt-get --no-install-recommends install -y build-essential python3.7 python3-pip python3-dev wget zip unzip default-jre && \ - apt-get clean && apt-get purge && apt autoremove && apt-get remove --yes --purge build-essential && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - #MERGE_FASTQ RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} diff --git a/merge_fastq_0.1.7/container/Dockerfile_ubuntu b/merge_fastq_0.1.7/container/Dockerfile_ubuntu new file mode 100644 index 00000000..5a2adbb3 --- /dev/null +++ b/merge_fastq_0.1.7/container/Dockerfile_ubuntu @@ -0,0 +1,38 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:18.04 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG UBUNTU_VERSION=18.04 +ARG LICENSE="Apache-2.0" +ARG MERGE_FASTQ_VERSION=0.1.7 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.trimgalore=${MERGE_FASTQ_VERSION} \ + org.opencontainers.image.source.cutadapt="https://pypi.org/project/merge-fastq/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ + cutadpat version ${MERGE_FASTQ_VERSION}" + +################## INSTALL ########################## + +#Ubuntu +RUN apt-get update && \ + apt-get --no-install-recommends install -y build-essential python3.7 python3-pip python3-dev wget zip unzip default-jre && \ + apt-get clean && apt-get purge && apt autoremove && apt-get remove --yes --purge build-essential && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +#MERGE_FASTQ +RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} + From 4fb1cfdd757f31e04992d6ce1edf149fde4929ab Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 14:18:54 -0400 Subject: [PATCH 044/476] :wrench: Modifying the docker file --- merge_fastq_0.1.7/container/Dockerfile | 18 ++++++--- merge_fastq_0.1.7/container/Dockerfile_ubuntu | 38 ------------------- 2 files changed, 12 insertions(+), 44 deletions(-) delete mode 100644 merge_fastq_0.1.7/container/Dockerfile_ubuntu diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile index 290d8dea..cd1d46b4 100644 --- a/merge_fastq_0.1.7/container/Dockerfile +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -1,12 +1,12 @@ ################## BASE IMAGE ###################### -FROM python:3 +FROM continuumio/miniconda3 ################## ARGUMENTS/Environments ########## ARG BUILD_DATE ARG BUILD_VERSION -ARG PYTHON_VERSION=3 +ARG MINICONDA_VERSION=3 ARG LICENSE="Apache-2.0" ARG MERGE_FASTQ_VERSION=0.1.7 ARG VCS_REF @@ -17,16 +17,22 @@ LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.version=${BUILD_VERSION} \ org.opencontainers.image.licenses=${LICENSE} \ - org.opencontainers.image.version.trimgalore=${MERGE_FASTQ_VERSION} \ + org.opencontainers.image.version.merge_fastq=${MERGE_FASTQ_VERSION} \ org.opencontainers.image.source.merge_fastq="https://pypi.org/project/merge-fastq/" \ org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ org.opencontainers.image.vcs-ref=${VCS_REF} -LABEL org.opencontainers.image.description="This container uses python ${PYTHON_VERSION} as the base image to build \ +LABEL org.opencontainers.image.description="This container uses miniconda ${MINICONDA_VERSION} as the base image to build \ merge fastq version ${MERGE_FASTQ_VERSION}" ################## INSTALL ########################## -#MERGE_FASTQ -RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} +#Ubuntu +RUN conda create --name merge_fastq python=3.7 && \ + echo "source activate merge_fastq" > ~/.bashrc + +#Set the path of environment as default +ENV PATH /opt/conda/envs/merge_fastq/bin:$PATH +#MERGE_FASTQ +RUN pip install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} diff --git a/merge_fastq_0.1.7/container/Dockerfile_ubuntu b/merge_fastq_0.1.7/container/Dockerfile_ubuntu deleted file mode 100644 index 5a2adbb3..00000000 --- a/merge_fastq_0.1.7/container/Dockerfile_ubuntu +++ /dev/null @@ -1,38 +0,0 @@ -################## BASE IMAGE ###################### - -FROM ubuntu:18.04 - -################## ARGUMENTS/Environments ########## - -ARG BUILD_DATE -ARG BUILD_VERSION -ARG UBUNTU_VERSION=18.04 -ARG LICENSE="Apache-2.0" -ARG MERGE_FASTQ_VERSION=0.1.7 -ARG VCS_REF -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" - -LABEL org.opencontainers.image.created=${BUILD_DATE} \ - org.opencontainers.image.version=${BUILD_VERSION} \ - org.opencontainers.image.licenses=${LICENSE} \ - org.opencontainers.image.version.trimgalore=${MERGE_FASTQ_VERSION} \ - org.opencontainers.image.source.cutadapt="https://pypi.org/project/merge-fastq/" \ - org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ - org.opencontainers.image.vcs-ref=${VCS_REF} - -LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ - cutadpat version ${MERGE_FASTQ_VERSION}" - -################## INSTALL ########################## - -#Ubuntu -RUN apt-get update && \ - apt-get --no-install-recommends install -y build-essential python3.7 python3-pip python3-dev wget zip unzip default-jre && \ - apt-get clean && apt-get purge && apt autoremove && apt-get remove --yes --purge build-essential && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -#MERGE_FASTQ -RUN pip3 install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} - From 49e1c27c83b93daabc9a85478e29adeab2991ca1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 14:35:06 -0400 Subject: [PATCH 045/476] Update README.md --- merge_fastq_0.1.7/README.md | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/merge_fastq_0.1.7/README.md b/merge_fastq_0.1.7/README.md index e69de29b..6a6f2a37 100644 --- a/merge_fastq_0.1.7/README.md +++ b/merge_fastq_0.1.7/README.md @@ -0,0 +1,50 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| miniconda | 3 | | +| merge_fastq | 0.1.7 | | + +[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own license badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/merge_fastq_0\.1\.7/merge_fastq_0\.1\.7\.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0\.1\.7/merge_fastq_0\.1\.7\.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner merge_fastq_0\.1\.7\.cwl --help +usage: merge_fastq_0.1.7.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + +``` From 2c578e8186b214757fec6a2e7be5ad27714fe000 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 14:37:22 -0400 Subject: [PATCH 046/476] Update README.md --- merge_fastq_0.1.7/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/merge_fastq_0.1.7/README.md b/merge_fastq_0.1.7/README.md index 6a6f2a37..158061ec 100644 --- a/merge_fastq_0.1.7/README.md +++ b/merge_fastq_0.1.7/README.md @@ -23,17 +23,17 @@ ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/merge_fastq_0\.1\.7/merge_fastq_0\.1\.7\.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir tool_toil_log -> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0\.1\.7/merge_fastq_0\.1\.7\.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & ``` ### Usage ```bash -> toil-cwl-runner merge_fastq_0\.1\.7\.cwl --help +> toil-cwl-runner merge_fastq_0.1.7.cwl --help usage: merge_fastq_0.1.7.cwl [-h] positional arguments: From 9ec592afc9892e36c3ee09839c79a6f4caeb64a4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 14:39:41 -0400 Subject: [PATCH 047/476] Update SUMMARY.md --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cd63b5d3..c98886be 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -19,6 +19,8 @@ * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) * MuTect * [MuTect 1.1.5](../mutect_1.1.5/README.md) + * Merge Fastq + * [v0.1.7](../merge_fastq_0.1.7/README.md) * Picard Tools * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) From 5a0b48818e1c1bcc90b5812f76267a0687cef1d0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 16:59:22 -0400 Subject: [PATCH 048/476] Update merge_fastq_0.1.7.cwl --- merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 99 +++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index e69de29b..f9116643 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -0,0 +1,99 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: merge_fastq_0_1_7 +baseCommand: + - merge_fastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: fastq1 + type: + type: array + items: File + inputBinding: + prefix: '--fastq1' + inputBinding: + position: 1 + doc: >- + Full path to gziped READ1 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + - id: fastq2 + type: + type: array + items: File + inputBinding: + prefix: '--fastq2' + inputBinding: + position: 1 + doc: >- + Full path to gziped READ2 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + - id: output_path + type: string? + inputBinding: + position: 0 + prefix: '--output-path' + doc: 'Full path to write the output files (default: Current working directory)' + - id: out_fastq1_name + type: string? + inputBinding: + position: 0 + prefix: '--out-fastq1' + doc: >- + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + - id: out_fastq2_name + type: string? + inputBinding: + position: 0 + prefix: '--out-fastq2' + doc: >- + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) +outputs: + - id: mergedfastq1 + type: File + outputBinding: + glob: '$(inputs.fastq1.basename.replace(''.fastq.gz'', ''_val_1.fq.gz''))' + - id: mergedfastq2 + type: File + outputBinding: + glob: '$(inputs.fastq2.basename.replace(''.fastq.gz'', ''_val_2.fq.gz''))' +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/merge_fastq:0.6.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': merge_fastq + 'doap:revision': 0.1.7 From c54cf1b1eba9ebb847e84dc7599ff8b0e63cad5a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 28 Oct 2019 17:05:50 -0400 Subject: [PATCH 049/476] Update Dockerfile --- merge_fastq_0.1.7/container/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile index cd1d46b4..86cb1d51 100644 --- a/merge_fastq_0.1.7/container/Dockerfile +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -1,12 +1,12 @@ ################## BASE IMAGE ###################### -FROM continuumio/miniconda3 +FROM continuumio/miniconda3:4.7.12 ################## ARGUMENTS/Environments ########## ARG BUILD_DATE ARG BUILD_VERSION -ARG MINICONDA_VERSION=3 +ARG MINICONDA3_VERSION=4.7.12 ARG LICENSE="Apache-2.0" ARG MERGE_FASTQ_VERSION=0.1.7 ARG VCS_REF @@ -17,12 +17,13 @@ LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.version=${BUILD_VERSION} \ org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.miniconda3=${MINICONDA3_VERSION} \ org.opencontainers.image.version.merge_fastq=${MERGE_FASTQ_VERSION} \ org.opencontainers.image.source.merge_fastq="https://pypi.org/project/merge-fastq/" \ org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ org.opencontainers.image.vcs-ref=${VCS_REF} -LABEL org.opencontainers.image.description="This container uses miniconda ${MINICONDA_VERSION} as the base image to build \ +LABEL org.opencontainers.image.description="This container uses miniconda ${MINICONDA3_VERSION} as the base image to build \ merge fastq version ${MERGE_FASTQ_VERSION}" ################## INSTALL ########################## From c1d5a37c274c94b60beb90146299d7a824649c5e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 09:55:45 -0400 Subject: [PATCH 050/476] :wrench: Making final changes to merge_Fastq cwl --- merge_fastq_0.1.7/README.md | 19 +++++++++++++++- merge_fastq_0.1.7/example_inputs.yaml | 20 +++++++++++++++++ merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 30 ++++++++++++++++++------- 3 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 merge_fastq_0.1.7/example_inputs.yaml diff --git a/merge_fastq_0.1.7/README.md b/merge_fastq_0.1.7/README.md index 158061ec..05ade25a 100644 --- a/merge_fastq_0.1.7/README.md +++ b/merge_fastq_0.1.7/README.md @@ -46,5 +46,22 @@ optional arguments: --memory_overhead MEMORY_OVERHEAD Memory overhead per job in megabytes --number_of_threads NUMBER_OF_THREADS - + --fastq1 FASTQ1 Full path to gziped READ1 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + --fastq2 FASTQ2 Full path to gziped READ2 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + --output_path OUTPUT_PATH + Full path to write the output files (default: Current + working directory) + --out_fastq1_name OUT_FASTQ1_NAME + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + --out_fastq2_name OUT_FASTQ2_NAME + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) + ``` diff --git a/merge_fastq_0.1.7/example_inputs.yaml b/merge_fastq_0.1.7/example_inputs.yaml new file mode 100644 index 00000000..31e86cce --- /dev/null +++ b/merge_fastq_0.1.7/example_inputs.yaml @@ -0,0 +1,20 @@ +fastq1: + - class: File + path: >- + /path/to/R1_001.fastq.gz + - class: File + path: >- + /path/to/R1_002.fastq.gz +fastq2: + - class: File + path: >- + /path/to/R2_001.fastq.gz + - class: File + path: >- + /path/to/R2_002.fastq.gz +memory_overhead: null +memory_per_job: null +number_of_threads: null +out_fastq1_name: null +out_fastq2_name: null +output_path: null diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index f9116643..be5f28a7 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -24,7 +24,7 @@ inputs: inputBinding: prefix: '--fastq1' inputBinding: - position: 1 + position: 0 doc: >- Full path to gziped READ1 fastq files, can be specified multiple times for example: --fastq1 test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz @@ -44,13 +44,13 @@ inputs: - id: output_path type: string? inputBinding: - position: 0 + position: 2 prefix: '--output-path' doc: 'Full path to write the output files (default: Current working directory)' - id: out_fastq1_name type: string? inputBinding: - position: 0 + position: 2 prefix: '--out-fastq1' doc: >- Name of the merged output READ1 fastq file(default: @@ -58,7 +58,7 @@ inputs: - id: out_fastq2_name type: string? inputBinding: - position: 0 + position: 2 prefix: '--out-fastq2' doc: >- Name of the merged output READ2 fastq file(default: @@ -67,15 +67,29 @@ outputs: - id: mergedfastq1 type: File outputBinding: - glob: '$(inputs.fastq1.basename.replace(''.fastq.gz'', ''_val_1.fq.gz''))' + glob: |- + ${ + if(inputs.out_fastq1_name){ + return inputs.out_fastq1_name + } else { + return 'merged_fastq_R1.fastq.gz' + } + } - id: mergedfastq2 type: File outputBinding: - glob: '$(inputs.fastq2.basename.replace(''.fastq.gz'', ''_val_2.fq.gz''))' + glob: |- + ${ + if(inputs.out_fastq2_name){ + return inputs.out_fastq2_name + } else { + return 'merged_fastq_R2.fastq.gz' + } + } requirements: - class: ResourceRequirement - ramMin: 16000 - coresMin: 2 + ramMin: 8000 + coresMin: 1 - class: DockerRequirement dockerPull: 'mskaccess/merge_fastq:0.6.1' - class: InlineJavascriptRequirement From f283bcc9ae12c8c5f356c314eeddb774255a80a5 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:01:04 -0400 Subject: [PATCH 051/476] Create mv.cwl --- utilities_ubuntu_18.04/mv.cwl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 utilities_ubuntu_18.04/mv.cwl diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl new file mode 100644 index 00000000..db46a114 --- /dev/null +++ b/utilities_ubuntu_18.04/mv.cwl @@ -0,0 +1,17 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: mv +inputs: + infile: + type: File + inputBinding: + position: 1 + outfile: + type: string + inputBinding: + position: 2 +outputs: + out: + type: File + outputBinding: + glob: $(inputs.outfile) From 6c49ee32e8957511d0142e5f88327a275515516e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:12:14 -0400 Subject: [PATCH 052/476] Adding move command to ubuntu utilities --- utilities_ubuntu_18.04/example_inputs_mv.yaml | 6 ++ utilities_ubuntu_18.04/mv.cwl | 63 +++++++++++++++++-- 2 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 utilities_ubuntu_18.04/example_inputs_mv.yaml diff --git a/utilities_ubuntu_18.04/example_inputs_mv.yaml b/utilities_ubuntu_18.04/example_inputs_mv.yaml new file mode 100644 index 00000000..ae4246e9 --- /dev/null +++ b/utilities_ubuntu_18.04/example_inputs_mv.yaml @@ -0,0 +1,6 @@ +force: null +infile: /path/to/source/file +memory_overhead: null +memory_per_job: null +outfile: /path/to/destination/file +verbose: null diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl index db46a114..9313a1a0 100644 --- a/utilities_ubuntu_18.04/mv.cwl +++ b/utilities_ubuntu_18.04/mv.cwl @@ -1,17 +1,70 @@ -cwlVersion: v1.0 class: CommandLineTool -baseCommand: mv +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mv +baseCommand: + - mv inputs: - infile: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: infile type: File inputBinding: position: 1 - outfile: + doc: 'Source that needs to be moved' + - id: outfile type: string inputBinding: position: 2 + doc: 'Target destination for the source' + - id: force + type: boolean? + inputBinding: + position: 0 + prefix: '-f' + doc: 'Do not prompt for confirmation before overwriting the destination path.' + - id: verbose + type: boolean? + inputBinding: + position: 0 + prefix: '-v' + doc: 'Cause mv to be verbose, showing files after they are moved.' outputs: - out: + - id: out type: File outputBinding: glob: $(inputs.outfile) +label: mv +requirements: + - class: ResourceRequirement + ramMin: 100 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ubuntu:18.04' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mv + 'doap:revision': 18.04 \ No newline at end of file From 5ec269743d055b932580a0116c469020bd4f96c5 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:15:55 -0400 Subject: [PATCH 053/476] Update mv.cwl --- utilities_ubuntu_18.04/mv.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl index 9313a1a0..c2450800 100644 --- a/utilities_ubuntu_18.04/mv.cwl +++ b/utilities_ubuntu_18.04/mv.cwl @@ -19,7 +19,7 @@ inputs: type: File inputBinding: position: 1 - doc: 'Source that needs to be moved' + doc: 'Source with path that needs to be moved' - id: outfile type: string inputBinding: From d64cc02eeecbd2e2d9ac0e8632c0a17fd4e77c7e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:55:16 -0400 Subject: [PATCH 054/476] Update README.md --- utilities_ubuntu_18.04/README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md index a13033b0..798dde4d 100644 --- a/utilities_ubuntu_18.04/README.md +++ b/utilities_ubuntu_18.04/README.md @@ -1,10 +1,18 @@ -# CWL and Dockerfile for running utilites from Ubuntu 18.04 +# CWL and Dockerfile for running utilities from Ubuntu 18.04 ## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | +| Tool | Version | Location | +|--- |--- |--- | +| Ubuntu base image | 18.04 | - | + +## Available tools + +| Tool | Description | +|--- |--- | +| sort | sort lines of text files | +| gzip | compress or expand files | +| mv | move (rename) files | ## CWL From c779de54009d3fe37c2ab96d3db034045af39570 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:56:30 -0400 Subject: [PATCH 055/476] Update README.md --- utilities_ubuntu_18.04/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md index 798dde4d..4fbc4f12 100644 --- a/utilities_ubuntu_18.04/README.md +++ b/utilities_ubuntu_18.04/README.md @@ -10,9 +10,9 @@ | Tool | Description | |--- |--- | -| sort | sort lines of text files | -| gzip | compress or expand files | -| mv | move (rename) files | +| sort.cwl | sort lines of text files | +| gzip.cwl | compress or expand files | +| mv.cwl | move (rename) files | ## CWL From 0ea12a51df25d5cb16ffc25784eb5f42e4c839d7 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 11:57:50 -0400 Subject: [PATCH 056/476] Update README.md --- utilities_ubuntu_18.04/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md index 4fbc4f12..d6499376 100644 --- a/utilities_ubuntu_18.04/README.md +++ b/utilities_ubuntu_18.04/README.md @@ -2,17 +2,17 @@ ## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | +| Tool | Version | Location | +| ----------------- | ------- | -------- | +| Ubuntu base image | 18.04 | - | ## Available tools -| Tool | Description | -|--- |--- | -| sort.cwl | sort lines of text files | -| gzip.cwl | compress or expand files | -| mv.cwl | move (rename) files | +| Tool | Description | +| -------- | ------------------------ | +| sort.cwl | sort lines of text files | +| gzip.cwl | compress or expand files | +| mv.cwl | move (rename) files | ## CWL From a4bb593d9bc886725b7ab176336861664d660ddf Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 29 Oct 2019 13:07:48 -0400 Subject: [PATCH 057/476] fix prefix after cwltool test + include example inputs --- picard_hsmetrics_2.8.1/example_inputs.yaml | 24 +++++++++++++++++++ .../picard_hsmetrics_2.8.1.cwl | 7 ++++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 picard_hsmetrics_2.8.1/example_inputs.yaml diff --git a/picard_hsmetrics_2.8.1/example_inputs.yaml b/picard_hsmetrics_2.8.1/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.8.1/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index 058f9753..a99183e7 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -1,6 +1,9 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: picard_hsmetrics_2_8_1 baseCommand: @@ -116,7 +119,6 @@ arguments: prefix: '-jar' valueFrom: /usr/local/bin/picard.jar - position: 0 - prefix: '' valueFrom: CollectHsMetrics - position: 0 prefix: OUTPUT= @@ -135,6 +137,7 @@ requirements: - class: DockerRequirement dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement + 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': @@ -151,5 +154,5 @@ requirements: 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': picard + 'doap:name': hsmetrics 'doap:revision': 2.8.1 From ca973ca923f25676f42ff1d6564dc9923c991966 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 29 Oct 2019 14:54:22 -0400 Subject: [PATCH 058/476] Adding MD changes --- .../picard_mark_duplicates_2.8.1.cwl | 8 +++----- utilities_ubuntu_18.04/mv.cwl | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index 8ad50ddf..8947cc4a 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_mark_duplicates_2_8_1 baseCommand: - java @@ -150,8 +151,8 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 17000 + coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement @@ -173,6 +174,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 2.8.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl index c2450800..401b5b34 100644 --- a/utilities_ubuntu_18.04/mv.cwl +++ b/utilities_ubuntu_18.04/mv.cwl @@ -45,7 +45,7 @@ outputs: label: mv requirements: - class: ResourceRequirement - ramMin: 100 + ramMin: 2000 coresMin: 1 - class: DockerRequirement dockerPull: 'ubuntu:18.04' From b8f41555a3a25b83db5007644889959697e08410 Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Wed, 30 Oct 2019 13:42:12 -0400 Subject: [PATCH 059/476] Made modifications for Hard-coded RAM/CPU requirents on CWL --- .../picard_collectmultiplemetrics_2-8-1.cwl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index afb5d72c..9cb29e62 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_mark_duplicates_2_8_1 +id: picard_collectmultiplemetrics_2.8.1 baseCommand: - java inputs: @@ -187,8 +187,10 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 10000 + coresMin: 8 + #ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" + #coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement @@ -196,15 +198,15 @@ requirements: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' From eecfec87f20d8b331784c3d7e7aac03971004167 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 30 Oct 2019 16:01:53 -0400 Subject: [PATCH 060/476] Updating waltz --- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 4 +--- waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 7bac7988..701717e7 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: trim_galore_0_6_2 baseCommand: - trim_galore @@ -23,9 +24,6 @@ inputs: inputBinding: position: 0 prefix: '--cores' - - id: path_to_trim_galore - type: File? - doc: Path to trim_galore executable file - id: adapter type: string? inputBinding: diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 4449ee4e..67b91ed9 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -90,7 +90,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:0.6.1' + dockerPull: 'mskaccess/waltz:0.6.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 0611a8350b69d3a236edbc639b1ff2ebdc1a9f95 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 31 Oct 2019 11:32:49 -0400 Subject: [PATCH 061/476] update tools to tag 0.6.2 --- abra2_2.17/abra2_2.17.cwl | 2 +- marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl | 2 +- marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl | 2 +- marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl | 2 +- marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl | 2 +- merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 2 +- mutect_1.1.5/mutect_1.1.5.cwl | 2 +- .../picard_add_or_replace_read_groups_1.96.cwl | 2 +- .../picard_collect_alignment_summary_metrics_2.8.1.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 2 +- picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl | 2 +- picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl | 2 +- picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl | 2 +- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 2 +- waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl | 2 +- waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index 4aca77b7..c2403cdc 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -183,7 +183,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/abra2:0.6.1' + dockerPull: 'mskaccess/abra2:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index 8954b32a..5403a2c9 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -137,7 +137,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.1' + dockerPull: 'mskaccess/marianas:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 4fdd46f3..5b7f2c9b 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -141,7 +141,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.1' + dockerPull: 'mskaccess/marianas:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 5b7a7f95..b730adc4 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -124,7 +124,7 @@ requirements: # } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.1' + dockerPull: 'mskaccess/marianas:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index 55e52b00..eda65adc 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.1' + dockerPull: 'mskaccess/marianas:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index be5f28a7..f81026b4 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/merge_fastq:0.6.1' + dockerPull: 'mskaccess/merge_fastq:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index 222e8714..d15e5fcf 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -725,7 +725,7 @@ requirements: ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: mskaccess/mutect:0.6.1 + dockerPull: mskaccess/mutect:0.6.2 - class: InlineJavascriptRequirement dct:contributor: diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index dd8f32de..d1f51ad1 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -190,7 +190,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.1' + dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index 6bce80ac..bb3d0975 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -156,7 +156,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.1' + dockerPull: 'mskaccess/picard:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index bfcdc7fe..1287b1c7 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -139,7 +139,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.1' + dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index a99183e7..4e23cc79 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -135,7 +135,7 @@ requirements: ramMin: 4000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.1' + dockerPull: 'mskaccess/picard:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index 4c45a2a7..5b9c5dec 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -115,7 +115,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.1' + dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index 8947cc4a..8806ef3e 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -154,7 +154,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.1' + dockerPull: 'mskaccess/picard:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 701717e7..35eb150a 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -160,7 +160,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskaccess/trim_galore:0.6.1' + dockerPull: 'mskaccess/trim_galore:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index 3a6a7d70..a1cc8c2a 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -78,7 +78,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.1' + dockerPull: 'mskaccess/waltz:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 67b91ed9..c0892f1e 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -90,7 +90,7 @@ requirements: # ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" # coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.1' + dockerPull: 'mskaccess/waltz:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From e7b5fadbc3aa22623c8cd8c7dd38a3543f47454d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 31 Oct 2019 11:35:54 -0400 Subject: [PATCH 062/476] update setup files to tag 0.6.2 --- __init__.py | 2 +- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/__init__.py b/__init__.py index 616ed899..a00a8263 100644 --- a/__init__.py +++ b/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.7.0' +__version__ = '0.6.2' diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index 616ed899..a00a8263 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.7.0' +__version__ = '0.6.2' diff --git a/setup.cfg b/setup.cfg index c0972676..e4576a0a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.7.0 +current_version = 0.6.2 commit = True tag = True diff --git a/setup.py b/setup.py index cf158915..3751d176 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl-commandlinetools', - version='0.7.0', + version='0.6.2', zip_safe=False, ) From 426d77d69165268239fe91fc42c58e48d560b1a2 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 31 Oct 2019 11:39:01 -0400 Subject: [PATCH 063/476] Updating the travis --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8c0c649e..8de575c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,10 +18,11 @@ script: tox deploy: provider: pypi distributions: sdist bdist_wheel - user: msk-access + user: __token__ password: - secure: ithmgopowELxskUKR2LAi/cMnet6J+PH4emAOiJ57rBqFkwq8lVXmT/NW8D7k2VoJdTguB9v+RH+Q/91ShJh1VnwbRQ1bX7Ecr7P7O35DIQBp40Eqg5AH6ap8se37dsufnNyVNhj8vNtusd65jC3+6w4kQkcCfysD5eByUDDw04FNYCbhbXIn4S/JJ/EJUFFb8ElNzo5iWpVPnfP9uddYxpXTEgWZBo+TylZsa1gxMYUqio5xa34TMkUzN5N4tooe0C1uZ6H9/zRzh2pLwoof1ITf3wws/FOJkPwsJFZWviC+0K1pNJTaTY77kHEEC5W8/JupvdShrFg+BzldLHWsGuyC+Mfj3bjJFTqyDnbthc0kosQ3x7UdTKzyztL+gekdCcNEsusN10VTdO0DYkUnYTc1YWpsC+ORQxPGANg+RrvjO8lGobvQ4ZlhcWBJ1ynlvntlg+b0iHzuOntznwZGTJRcTqtjsH+zzIEo12FiWnJwjvM78OkEsNp5XYaKgYryhQBIo5Uqy79h7wtF8mAnIRrsK5cyQmYTJtWZ+OIQbuzj+l9o+Ff19hykD7LEB8I9So8240w09+HoNxpZKYMQFndGGZnC1wkoZNdBEOjn9Az9ZouHO7qkFHZVSp3rqSAEqiG8fm3TTl/5VAO9RNEKT5vmZMDhl04F6LXZYGCLKU= + secure: g9Ja5QDLc1WGu50xpmXl6wcP7qRNzfYZk7i3PEJtQNO6JLPtxEmBgDAb4+RedRxLo9MRmws/n/bFkTOSP837d+tJ91cYN6TFbVu2teWiR6hblDX/Twhbceq/MjdYJyAVsH+KpuORjuJGqzk2I4QLzI+B/0mXuWcE4EPaCZ5mpm0aYYOTLW1Ukxl1j/PoV8wWC2glItLQ02zIvLyr276+en+RAdWYwqW8sY7rn4hI6VaM78OMsc2/cvG27X82SX4rBxJ3/VveslAc3O7Kck02ltOPyOLI3w++HEVvhHAaCK3kDxNEYQCMly1lDYWTfAGm2F5TZ5mgt2adb08AN//0GnWQOfciHh3JUrIt7po7B5Zs8kmZNGGTJFog8o+btU4pAeCDIt61lFyMo7VVpvPzR4ToiGP3zBvGEgnZd7WpTI0H0E4oc821vl9SAN+3aWQhDxDHl+z3VDwpZTA18mgQikFNc7asKDSXCAGoStI/YFWjw3X+tvFGMXR+R6dpmeSplFFSOx9L3TbrtymWProH8MOyxSVNDdQG6Vz41bN9IS47GRI+/1A9jXxwGurKY1ZL7HZDApDx42Fn2RdOFQNyLCeCneI+RUXtkHY56tH3GpBmnnJX6bKPrn4+VIbgd1VCahPrG8INqxx+SE4JojdIQHcxDy924PCL8mS4hakE4Z0= on: tags: true repo: msk-access/cwl-commandlinetools python: 2.7 + From f76b689443a865af6267c17e4a150cc975eb96bd Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Thu, 31 Oct 2019 15:41:01 -0400 Subject: [PATCH 064/476] Changes on Output Argument inside CWL --- .../picard_collectmultiplemetrics_2-8-1.cwl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index 9cb29e62..2aacfb57 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_collectmultiplemetrics_2.8.1 +id: picard_collectmultiplemetrics_2_8_1 baseCommand: - java inputs: @@ -182,15 +182,13 @@ arguments: if(inputs.output_file_name){ return inputs.output_file_name } else { - return inputs.input.basename.replace(/.bam/,'_md.bam') + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') } } requirements: - class: ResourceRequirement ramMin: 10000 coresMin: 8 - #ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - #coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement dockerPull: 'mskaccess/picard:0.6.1' - class: InlineJavascriptRequirement From bea5296410cee67bc73b0a436ab3798740456ca5 Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Thu, 31 Oct 2019 16:01:47 -0400 Subject: [PATCH 065/476] Added Output Files inside CWL --- .../picard_collectmultiplemetrics_2-8-1.cwl | 82 +++++++++++++++---- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index 2aacfb57..0c170a21 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -149,20 +149,74 @@ inputs: Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} outputs: - - id: all_metrics - type: Directory - outputBinding: - glob: . - outputEval: |- - ${ - if(inputs.output_file_name){ - self[0].basename = inputs.output_file_name + '_picard_metrics'; - return self[0] - } else { - self[0].basename = 'picard_metrics'; - return self[0] - } - } + - id: alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' label: picard_collectmultiplemetrices_2.8.1 arguments: - position: 0 From eae7e040073de23e545dc6dc2c814cc50f27960e Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Fri, 1 Nov 2019 14:18:42 -0400 Subject: [PATCH 066/476] made changes on the cwl code to optimize for CWLTOOL pass --- .../picard_collectmultiplemetrics_2-8-1.cwl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index 0c170a21..26f41d41 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -221,12 +221,14 @@ label: picard_collectmultiplemetrices_2.8.1 arguments: - position: 0 prefix: '' + separate: false valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/picard.jar - position: 0 prefix: '' + separate: false valueFrom: CollectMultipleMetrics - position: 0 prefix: O= From 34b1759dac9a6cf70e858c70cac26f83bb64ca1f Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Fri, 1 Nov 2019 14:22:42 -0400 Subject: [PATCH 067/476] adding an example input file --- .../example_input.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 picard_collectmultiplemetric_2.8.1/example_input.yml diff --git a/picard_collectmultiplemetric_2.8.1/example_input.yml b/picard_collectmultiplemetric_2.8.1/example_input.yml new file mode 100644 index 00000000..1c2354cd --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/example_input.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "/Users/sumans/test_reference/bam/SeraCare_0-5_14.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: From 6ac2dec27ab8e2b29225aa1bb647163990fa4fc4 Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Fri, 1 Nov 2019 14:41:18 -0400 Subject: [PATCH 068/476] Adding a README --- picard_collectmultiplemetric_2.8.1/README.md | 80 +++++++++++++++++++ .../{example_input.yml => example_inputs.yml} | 0 2 files changed, 80 insertions(+) create mode 100644 picard_collectmultiplemetric_2.8.1/README.md rename picard_collectmultiplemetric_2.8.1/{example_input.yml => example_inputs.yml} (100%) diff --git a/picard_collectmultiplemetric_2.8.1/README.md b/picard_collectmultiplemetric_2.8.1/README.md new file mode 100644 index 00000000..cef53fac --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/README.md @@ -0,0 +1,80 @@ +# CWL and Dockerfile for running Picard - CollectMultipleMetrics + +## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | +| R | 3.3.3 | r-base for opnejdk:8 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2-8-1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` diff --git a/picard_collectmultiplemetric_2.8.1/example_input.yml b/picard_collectmultiplemetric_2.8.1/example_inputs.yml similarity index 100% rename from picard_collectmultiplemetric_2.8.1/example_input.yml rename to picard_collectmultiplemetric_2.8.1/example_inputs.yml From ce7d8b175c29ef2fc31d15adbfc60bc6b3e17bfa Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Fri, 1 Nov 2019 14:49:06 -0400 Subject: [PATCH 069/476] Updated the Summary.md to reflect the newly added CollectMultipleMetrics Cmdtool --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cd63b5d3..fef0d169 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -22,6 +22,7 @@ * Picard Tools * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + * [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) From e1614a984ec84614ac9ac4bf6decb23a583cf873 Mon Sep 17 00:00:00 2001 From: shalabhsuman Date: Fri, 1 Nov 2019 14:52:29 -0400 Subject: [PATCH 070/476] changes on Input parameter to make it more generic --- picard_collectmultiplemetric_2.8.1/example_inputs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard_collectmultiplemetric_2.8.1/example_inputs.yml b/picard_collectmultiplemetric_2.8.1/example_inputs.yml index 1c2354cd..72207111 100644 --- a/picard_collectmultiplemetric_2.8.1/example_inputs.yml +++ b/picard_collectmultiplemetric_2.8.1/example_inputs.yml @@ -1,6 +1,6 @@ input: class: File - path: "/Users/sumans/test_reference/bam/SeraCare_0-5_14.bam" + path: "Sample.bam" assume_sorted: bam_compression_level: create_bam_index: From 93f9ed9475fe1f2347213b87dee04a0c0c06bb3a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 5 Nov 2019 14:05:17 -0500 Subject: [PATCH 071/476] Update picard_collectmultiplemetrics_2-8-1.cwl Removing cwl-wrapper tag and changing docker version to 0.6.2 --- .../picard_collectmultiplemetrics_2-8-1.cwl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index 26f41d41..d5937222 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -246,7 +246,7 @@ requirements: ramMin: 10000 coresMin: 8 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.1' + dockerPull: 'mskaccess/picard:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -266,6 +266,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 2.8.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 From 8cf184af14bef52ccbb17d719c7df1e8dcf97a5d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 13:53:58 -0500 Subject: [PATCH 072/476] adding markduplicated 2.21.2 --- ...card_add_or_replace_read_groups_2.21.2.cwl | 212 ++++++++++++++ ...llect_alignment_summary_metrics_2.21.2.cwl | 178 ++++++++++++ .../picard_collectmultiplemetrics_2.21.2.cwl | 268 ++++++++++++++++++ .../picard_fix_mate_information_2.21.2.cwl | 161 +++++++++++ .../picard_hsmetrics_2.21.2.cwl | 158 +++++++++++ .../picard_mark_duplicates_2.21.2.cwl | 181 ++++++++++++ 6 files changed, 1158 insertions(+) create mode 100644 picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl create mode 100644 picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl create mode 100644 picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl create mode 100644 picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl create mode 100644 picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl create mode 100644 picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl new file mode 100644 index 00000000..d1f51ad1 --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -0,0 +1,212 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: picard_add_or_replace_read_groups_1_96 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: RGID= + separate: false + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequnecing_center + type: string + inputBinding: + position: 0 + prefix: RGCN= + separate: false + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: int + inputBinding: + position: 0 + prefix: RGLB= + separate: false + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: RGPU= + separate: false + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: RGSM= + separate: false + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: RGPL= + separate: false + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: RGDS= + separate: false + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: RGDT= + separate: false + doc: 'Read Group run date Default value: null.' + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_1.96 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_1.96:0.6.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 1.96 diff --git a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl new file mode 100644 index 00000000..bb3d0975 --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl @@ -0,0 +1,178 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: picard_collect_alignment_summary_metrics_2.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: MAX_INSERT_SIZE= + separate: false + doc: >- + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. This option can be + set to 'null' to clear the default value. + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: reference_sequence + type: File + inputBinding: + position: 0 + prefix: R= + separate: false + doc: >- + Reference sequence file. Note that while this argument isn't required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.dict + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. +outputs: + - id: alignment_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +label: picard_collect_alignment_summary_metrics_2.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectAlignmentSummaryMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 12000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl new file mode 100644 index 00000000..d5937222 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl @@ -0,0 +1,268 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collectmultiplemetrics_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + separate: false + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + separate: false + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + separate: false + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + separate: false + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + separate: false + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + separate: false + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + separate: false + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' +label: picard_collectmultiplemetrices_2.8.1 +arguments: + - position: 0 + prefix: '' + separate: false + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + prefix: '' + separate: false + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl new file mode 100644 index 00000000..1287b1c7 --- /dev/null +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -0,0 +1,161 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: picard_fix_mate_information_1.96 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: >- + Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_1.96 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_1.96:0.6.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 1.96 diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl new file mode 100644 index 00000000..4e23cc79 --- /dev/null +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -0,0 +1,158 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2_8_1 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false +outputs: + - id: hs_metrics_file + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +label: picard_hsmetrics_2.8.1 +arguments: + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.2' + - class: InlineJavascriptRequirement + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': hsmetrics + 'doap:revision': 2.8.1 diff --git a/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl new file mode 100644 index 00000000..792b6b82 --- /dev/null +++ b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl @@ -0,0 +1,181 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: ASO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + separate: false + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: duplication_stats + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_2.21.2 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 From 35ffa13f96a0fe50eaefa82aadc8fd97fa841780 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 14:01:06 -0500 Subject: [PATCH 073/476] Adding README and example yaml --- picard_mark_duplicates_2.21.2/README.md | 77 +++++++++++++++++++ .../example_inputs.yaml | 15 ++++ 2 files changed, 92 insertions(+) create mode 100644 picard_mark_duplicates_2.21.2/README.md create mode 100644 picard_mark_duplicates_2.21.2/example_inputs.yaml diff --git a/picard_mark_duplicates_2.21.2/README.md b/picard_mark_duplicates_2.21.2/README.md new file mode 100644 index 00000000..69f2f101 --- /dev/null +++ b/picard_mark_duplicates_2.21.2/README.md @@ -0,0 +1,77 @@ +# CWL for running Picard - MarkDuplicates + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` diff --git a/picard_mark_duplicates_2.21.2/example_inputs.yaml b/picard_mark_duplicates_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..4cb5941e --- /dev/null +++ b/picard_mark_duplicates_2.21.2/example_inputs.yaml @@ -0,0 +1,15 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +tmp_dir: null +validation_stringency: null From 643bcbf002071f0a040c99f2a821e8e1976b429a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 22:25:03 -0500 Subject: [PATCH 074/476] Adding Picard AddOrReplaceReadGroups 2.21.2 --- .../README.md | 91 +++++++++++++++++++ .../example_inputs.yaml | 20 ++++ ...card_add_or_replace_read_groups_2.21.2.cwl | 16 ++-- 3 files changed, 121 insertions(+), 6 deletions(-) create mode 100644 picard_add_or_replace_read_groups_2.21.2/README.md create mode 100644 picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml diff --git a/picard_add_or_replace_read_groups_2.21.2/README.md b/picard_add_or_replace_read_groups_2.21.2/README.md new file mode 100644 index 00000000..df47f212 --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/README.md @@ -0,0 +1,91 @@ +# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl --help +usage: picard_add_or_replace_read_groups_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..9c25bd7d --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml @@ -0,0 +1,20 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl index d1f51ad1..d8e3af88 100644 --- a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_add_or_replace_read_groups_1_96 + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_2.21.2 baseCommand: - java inputs: @@ -145,7 +146,7 @@ outputs: } secondaryFiles: - ^.bai -label: picard_add_or_replace_read_groups_1.96 +label: picard_add_or_replace_read_groups_2.21.2 arguments: - position: 0 valueFrom: |- @@ -175,7 +176,10 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + valueFrom: /usr/picard/picard.jar + - position: 0 + separate: false + valueFrom: AddOrReplaceReadGroups - position: 0 prefix: O= separate: false @@ -187,10 +191,10 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 16000 + ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.2' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -209,4 +213,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 1.96 + 'doap:revision': 2.21.2 From c3ffd8dc421e8368744b1d2a61bd8cab773b38ba Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 22:41:53 -0500 Subject: [PATCH 075/476] Adding picard 2.21.2 fixmate information --- picard_fix_mate_information_2.21.2/README.md | 72 +++++++++++++++++++ .../example_inputs.yaml | 12 ++++ .../picard_fix_mate_information_2.21.2.cwl | 16 +++-- 3 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 picard_fix_mate_information_2.21.2/README.md create mode 100644 picard_fix_mate_information_2.21.2/example_inputs.yaml diff --git a/picard_fix_mate_information_2.21.2/README.md b/picard_fix_mate_information_2.21.2/README.md new file mode 100644 index 00000000..8a4ca7ab --- /dev/null +++ b/picard_fix_mate_information_2.21.2/README.md @@ -0,0 +1,72 @@ +# CWL for running Picard - FixMateInformation + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` diff --git a/picard_fix_mate_information_2.21.2/example_inputs.yaml b/picard_fix_mate_information_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..1d9e4ee2 --- /dev/null +++ b/picard_fix_mate_information_2.21.2/example_inputs.yaml @@ -0,0 +1,12 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl index 1287b1c7..504e43f4 100644 --- a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_fix_mate_information_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2.21.2 baseCommand: - java inputs: @@ -27,8 +28,7 @@ inputs: - ^.bai - id: output_file_name type: string? - doc: >- - Output file name (bam or sam). Not Required + doc: Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -122,7 +122,9 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/FixMateInformation.jar + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: FixMateInformation - position: 0 prefix: O= separate: false @@ -136,10 +138,10 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 16000 + ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.2' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -158,4 +160,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 1.96 + 'doap:revision': 2.21.2 From e77315bcd2e5053dc9c782ebb64461e951b6fb15 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 22:49:23 -0500 Subject: [PATCH 076/476] Adding CollectAlignmentMetrics 2.21.2 --- .../README.md | 4 +- .../README.md | 78 +++++++++++++++++++ .../example_inputs.yaml | 26 +++++++ ...ollect_alignment_summary_metrics_2.8.1.cwl | 11 +-- 4 files changed, 112 insertions(+), 7 deletions(-) create mode 100644 picard_collect_alignment_summary_metrics_2.21.2/README.md create mode 100644 picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml diff --git a/picard_add_or_replace_read_groups_2.21.2/README.md b/picard_add_or_replace_read_groups_2.21.2/README.md index df47f212..0585eae8 100644 --- a/picard_add_or_replace_read_groups_2.21.2/README.md +++ b/picard_add_or_replace_read_groups_2.21.2/README.md @@ -1,6 +1,6 @@ -# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups +# CWL for running Picard - AddOrReplaceReadGroups -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image | Tool | Version | Location | |--- |--- |--- | diff --git a/picard_collect_alignment_summary_metrics_2.21.2/README.md b/picard_collect_alignment_summary_metrics_2.21.2/README.md new file mode 100644 index 00000000..ecee2769 --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/README.md @@ -0,0 +1,78 @@ +# CWL for running Picard - CollectAlignmentSummaryMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collect_alignment_summary_metrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` diff --git a/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..f8a51739 --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml @@ -0,0 +1,26 @@ +assume_sorted: true +bam_compression_level: null +create_bam_index: null +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +max_insert_size: null +memory_overhead: null +memory_per_job: null +metrics_acciumulation_level: null +number_of_threads: null +output_file_name: null +reference_sequence: + class: File + metadata: {} + path: "/path/to/reference.fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +stop_after: null +tmp_dir: null +validation_stringency: null diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index bb3d0975..f68401ea 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_collect_alignment_summary_metrics_2.8.1 + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2.21.2 baseCommand: - java inputs: @@ -106,7 +107,7 @@ outputs: return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') } } -label: picard_collect_alignment_summary_metrics_2.8.1 +label: picard_collect_alignment_summary_metrics_2.21.2 arguments: - position: 0 valueFrom: |- @@ -137,7 +138,7 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + valueFrom: /usr/picard/picard.jar - position: 0 valueFrom: CollectAlignmentSummaryMetrics - position: 0 @@ -156,7 +157,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -175,4 +176,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 2.8.1 + 'doap:revision': 2.21.2 From a1dd2f192adaaa407e77d56dbf849758c72c9a4b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 23:00:44 -0500 Subject: [PATCH 077/476] Adding Picard Hsmetrics 2.21.2 --- picard_hsmetrics_2.21.2/README.md | 87 +++++++++++++++++++ picard_hsmetrics_2.21.2/example_inputs.yaml | 24 +++++ .../picard_hsmetrics_2.21.2.cwl | 60 +++++++++++-- 3 files changed, 165 insertions(+), 6 deletions(-) create mode 100644 picard_hsmetrics_2.21.2/README.md create mode 100644 picard_hsmetrics_2.21.2/example_inputs.yaml diff --git a/picard_hsmetrics_2.21.2/README.md b/picard_hsmetrics_2.21.2/README.md new file mode 100644 index 00000000..9afa976b --- /dev/null +++ b/picard_hsmetrics_2.21.2/README.md @@ -0,0 +1,87 @@ +# CWL for running Picard - CollectAlignmentSummaryMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.21.2.cwl [-h] + +optional arguments: + -h, --help show this help message and exit + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. Default value: null. This option must + be specified at least 1 times. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. This option can be set + to 'null' to clear the default value. Possible values: + {true, false} + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. Default value: null. This option must be + specified at least 1 times. + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --metric_accumulation_level METRIC_ACCUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. This option can be set to 'null' to clear + the default value. + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. This option can be set to 'null' to clear + the default value. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. This + option can be set to 'null' to clear the default + value. + +``` diff --git a/picard_hsmetrics_2.21.2/example_inputs.yaml b/picard_hsmetrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.21.2/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl index 4e23cc79..d38c8ac3 100644 --- a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_hsmetrics_2_8_1 +id: picard_hsmetrics_2_21_2 baseCommand: - java inputs: @@ -15,48 +15,71 @@ inputs: position: 0 prefix: BAIT_INTERVALS= separate: false + doc: >- + An interval list file that contains the locations of the baits used. + Default value: null. This option must be specified at least 1 times. - id: bait_set_name type: string? inputBinding: position: 0 prefix: BAIT_SET_NAME= separate: false + doc: >- + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null - id: minimum_mapping_quality type: int? inputBinding: position: 0 prefix: MINIMUM_MAPPING_QUALITY= separate: false + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: + 20. This option can be set to 'null' to clear the default value. - id: minimum_base_quality type: int? inputBinding: position: 0 prefix: MINIMUM_BASE_QUALITY= separate: false + doc: >- + Minimum base quality for a base to contribute coverage. Default value: 20. + This option can be set to 'null' to clear the default value. - id: clip_overlapping_reads type: boolean? inputBinding: position: 0 prefix: CLIP_OVERLAPPING_READS=true separate: false + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: + true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} - id: target_intervals type: File? inputBinding: position: 0 prefix: TARGET_INTERVALS= separate: false + doc: >- + An interval list file that contains the locations of the targets. Default + value: null. This option must be specified at least 1 times. - id: input type: File inputBinding: position: 0 prefix: INPUT= separate: false + doc: An aligned SAM or BAM file. Required. + secondaryFiles: + - ^.bai - id: output_file_name type: string? inputBinding: position: 0 prefix: OUTPUT= separate: false + doc: The output file to write the metrics to. Required. - id: metric_accumulation_level type: - 'null' @@ -71,36 +94,62 @@ inputs: position: 0 prefix: METRIC_ACCUMULATION_LEVEL= separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. - id: per_target_coverage type: File? inputBinding: position: 0 prefix: PER_TARGET_COVERAGE= separate: false + doc: >- + An optional file to output per target coverage information to. Default + value: null. - id: per_base_coverage type: File? inputBinding: position: 0 prefix: PER_BASE_COVERAGE= separate: false + doc: >- + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. - id: near_distance type: int? inputBinding: position: 0 prefix: NEAR_DISTANCE= separate: false + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. This option can be set to 'null' to clear + the default value. - id: coverage_cap type: int? inputBinding: position: 0 prefix: COVERAGE_CAP= separate: false + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. This option can be set + to 'null' to clear the default value. - id: sample_size type: int? inputBinding: position: 0 prefix: SAMPLE_SIZE= separate: false + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. This option can be set to 'null' to clear the + default value. outputs: - id: hs_metrics_file type: File? @@ -113,11 +162,11 @@ outputs: return inputs.input.basename.replace(/.bam/,'.hsmetrics') } } -label: picard_hsmetrics_2.8.1 +label: picard_hsmetrics_2.21.2 arguments: - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + valueFrom: /usr/picard/picard.jar - position: 0 valueFrom: CollectHsMetrics - position: 0 @@ -135,9 +184,8 @@ requirements: ramMin: 4000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement - 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': @@ -155,4 +203,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': hsmetrics - 'doap:revision': 2.8.1 + 'doap:revision': 2.21.2 From 2df47b3426dbdd57ed96891aa7d4833dc0b31693 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 23:10:10 -0500 Subject: [PATCH 078/476] Adding collectmultiplemetrics 2.21.2 --- picard_collectmultiplemetric_2.21.2/README.md | 78 +++++++++++++++++++ .../example_inputs.yml | 18 +++++ .../picard_collectmultiplemetrics_2.21.2.cwl | 14 ++-- .../picard_fix_mate_information_2.21.2.cwl | 4 +- .../picard_hsmetrics_2.21.2.cwl | 4 +- 5 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 picard_collectmultiplemetric_2.21.2/README.md create mode 100644 picard_collectmultiplemetric_2.21.2/example_inputs.yml diff --git a/picard_collectmultiplemetric_2.21.2/README.md b/picard_collectmultiplemetric_2.21.2/README.md new file mode 100644 index 00000000..c8b92500 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/README.md @@ -0,0 +1,78 @@ +# CWL for running Picard - CollectMultipleMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` diff --git a/picard_collectmultiplemetric_2.21.2/example_inputs.yml b/picard_collectmultiplemetric_2.21.2/example_inputs.yml new file mode 100644 index 00000000..72207111 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/example_inputs.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "Sample.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: diff --git a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl index d5937222..8699d438 100644 --- a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl +++ b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_collectmultiplemetrics_2_8_1 +id: picard_collectmultiplemetrics_2.21.2 baseCommand: - java inputs: @@ -217,7 +217,7 @@ outputs: type: File? outputBinding: glob: '*quality_distribution.pdf' -label: picard_collectmultiplemetrices_2.8.1 +label: picard_collectmultiplemetrices_2.21.2 arguments: - position: 0 prefix: '' @@ -225,7 +225,7 @@ arguments: valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + valueFrom: /usr/picard/picard.jar - position: 0 prefix: '' separate: false @@ -246,7 +246,7 @@ requirements: ramMin: 10000 coresMin: 8 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -259,10 +259,10 @@ requirements: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:sumans@mskcc.org' - 'foaf:name': Shalabh Suman + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 2.8.1 + 'doap:revision': 2.21.2 diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl index 504e43f4..083e75a5 100644 --- a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_fix_mate_information_2.21.2 +id: picard_fix_mate_information_2_21_2 baseCommand: - java inputs: @@ -92,7 +92,7 @@ outputs: } secondaryFiles: - ^.bai -label: picard_fix_mate_information_1.96 +label: picard_fix_mate_information_2.21.2 arguments: - position: 0 valueFrom: |- diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl index d38c8ac3..f7b43ecc 100644 --- a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -197,8 +197,8 @@ requirements: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:johnsoni@mskcc.org' - 'foaf:name': Ian Johnson + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' From 34f8b4095dab8fc1f586cca00323c33a07376726 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 6 Nov 2019 23:15:51 -0500 Subject: [PATCH 079/476] Update SUMMARY.md --- docs/SUMMARY.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 4afb6009..198b7b7d 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -23,12 +23,18 @@ * [v0.1.7](../merge_fastq_0.1.7/README.md) * Picard Tools * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + * [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + * [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) * [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) + * [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + * [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) * [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + * [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + * [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) * Trim Galore * [v0.6.2](../trim_galore_0.6.2/README.md) * Ubuntu utilites From bf962287682ab39f9479d42774371284cc1e9a8d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 20 Nov 2019 15:46:34 -0500 Subject: [PATCH 080/476] Modifying the picrd tools to have additional options --- .../picard_add_or_replace_read_groups_1.96.cwl | 14 +++++++++++--- .../picard_add_or_replace_read_groups_2.21.2.cwl | 15 +++++++++++---- .../picard_fix_mate_information_1.96.cwl | 9 ++++++++- .../picard_fix_mate_information_2.21.2.cwl | 9 ++++++++- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index d1f51ad1..2e276bc0 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_add_or_replace_read_groups_1_96 baseCommand: - java @@ -45,7 +46,7 @@ inputs: doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required - - id: read_group_sequnecing_center + - id: read_group_sequencing_center type: string inputBinding: position: 0 @@ -53,7 +54,7 @@ inputs: separate: false doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library - type: int + type: string inputBinding: position: 0 prefix: RGLB= @@ -100,6 +101,7 @@ inputs: position: 0 prefix: TMP_DIR= separate: false + default: $(runtime.tmpdir) doc: This option may be specified 0 or more times - id: validation_stringency type: string? @@ -173,6 +175,12 @@ arguments: return "-Xmx15G" } } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar @@ -187,7 +195,7 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl index d8e3af88..0795136e 100644 --- a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_add_or_replace_read_groups_2.21.2 +id: picard_add_or_replace_read_groups_2_21_2 baseCommand: - java inputs: @@ -46,7 +46,7 @@ inputs: doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required - - id: read_group_sequnecing_center + - id: read_group_sequencing_center type: string inputBinding: position: 0 @@ -54,7 +54,7 @@ inputs: separate: false doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library - type: int + type: string inputBinding: position: 0 prefix: RGLB= @@ -101,6 +101,7 @@ inputs: position: 0 prefix: TMP_DIR= separate: false + default: $(runtime.tmpdir) doc: This option may be specified 0 or more times - id: validation_stringency type: string? @@ -174,6 +175,12 @@ arguments: return "-Xmx15G" } } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false - position: 0 prefix: '-jar' valueFrom: /usr/picard/picard.jar @@ -191,7 +198,7 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 17000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/picard:2.21.2' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 1287b1c7..6a3ed4e7 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -45,6 +45,7 @@ inputs: position: 0 prefix: TMP_DIR= separate: false + default: $(runtime.tmpdir) doc: This option may be specified 0 or more times - id: validation_stringency type: string? @@ -120,6 +121,12 @@ arguments: return "-Xmx15G" } } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar @@ -136,7 +143,7 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl index 083e75a5..0c737992 100644 --- a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -45,6 +45,7 @@ inputs: position: 0 prefix: TMP_DIR= separate: false + default: $(runtime.tmpdir) doc: This option may be specified 0 or more times - id: validation_stringency type: string? @@ -120,6 +121,12 @@ arguments: return "-Xmx15G" } } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false - position: 0 prefix: '-jar' valueFrom: /usr/picard/picard.jar @@ -138,7 +145,7 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 17000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/picard:2.21.2' From 2a2881762f0153095563747be94f01d2afb1548e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 21 Nov 2019 21:42:01 -0500 Subject: [PATCH 081/476] Adding TMP_DIR to arguments section --- abra2_2.19/abra2_2.19.cwl | 5 +++-- .../picard_add_or_replace_read_groups_1.96.cwl | 12 ++++-------- .../picard_add_or_replace_read_groups_2.21.2.cwl | 12 ++++-------- .../picard_fix_mate_information_1.96.cwl | 12 ++++-------- .../picard_fix_mate_information_2.21.2.cwl | 12 ++++-------- 5 files changed, 19 insertions(+), 34 deletions(-) diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl index a623c3c2..1af3c194 100644 --- a/abra2_2.19/abra2_2.19.cwl +++ b/abra2_2.19/abra2_2.19.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: abra2_2.19 + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_19 baseCommand: - java inputs: @@ -173,7 +174,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/abra2_2.19:0.6.1' + dockerPull: 'aphoid/abra2:2.19' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 2e276bc0..cce8a707 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -95,14 +95,6 @@ inputs: prefix: RGDT= separate: false doc: 'Read Group run date Default value: null.' - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - default: $(runtime.tmpdir) - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -184,6 +176,10 @@ arguments: - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" - position: 0 prefix: O= separate: false diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl index 0795136e..bdc291df 100644 --- a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -95,14 +95,6 @@ inputs: prefix: RGDT= separate: false doc: 'Read Group run date Default value: null.' - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - default: $(runtime.tmpdir) - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -187,6 +179,10 @@ arguments: - position: 0 separate: false valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" - position: 0 prefix: O= separate: false diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 6a3ed4e7..1bf08bbe 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -39,14 +39,6 @@ inputs: Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, coordinate} - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - default: $(runtime.tmpdir) - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -130,6 +122,10 @@ arguments: - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" - position: 0 prefix: O= separate: false diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl index 0c737992..5d6dde04 100644 --- a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -39,14 +39,6 @@ inputs: Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, coordinate} - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - default: $(runtime.tmpdir) - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -132,6 +124,10 @@ arguments: valueFrom: /usr/picard/picard.jar - position: 0 valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" - position: 0 prefix: O= separate: false From 5b284d7b814f5258cce45194d04e015805bc8a32 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 22 Nov 2019 15:12:48 -0500 Subject: [PATCH 082/476] Fix separate tagg error --- .../picard_add_or_replace_read_groups_2.21.2.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl index bdc291df..ed057707 100644 --- a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -177,7 +177,6 @@ arguments: prefix: '-jar' valueFrom: /usr/picard/picard.jar - position: 0 - separate: false valueFrom: AddOrReplaceReadGroups - position: 0 prefix: TMP_DIR= From 60e991ded8620041804778f8ed5da10140425f4b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sun, 24 Nov 2019 11:28:03 -0500 Subject: [PATCH 083/476] Adding TMP_DIR as unbound argument --- .../picard_add_or_replace_read_groups_1.96.cwl | 15 ++++++++++++--- .../picard_fix_mate_information_1.96.cwl | 10 +++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index cce8a707..fcd59f2b 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -127,6 +127,9 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam type: File @@ -168,18 +171,23 @@ arguments: } } - position: 0 - valueFrom: "-XX:-UseGCOverheadLimit" shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" shellQuote: false + valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar - position: 0 prefix: TMP_DIR= separate: false - valueFrom: "$(runtime.tmpdir)" + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return $(runtime.tmpdir) + } - position: 0 prefix: O= separate: false @@ -190,6 +198,7 @@ arguments: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement ramMin: 25000 coresMin: 2 diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 1bf08bbe..41f263b1 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -71,6 +71,9 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam type: File @@ -125,7 +128,12 @@ arguments: - position: 0 prefix: TMP_DIR= separate: false - valueFrom: "$(runtime.tmpdir)" + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return $(runtime.tmpdir) + } - position: 0 prefix: O= separate: false From 988695b20bea87f867718dfaf40195b4f757c344 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sun, 24 Nov 2019 11:38:55 -0500 Subject: [PATCH 084/476] single quotes runtime variable --- .../picard_add_or_replace_read_groups_1.96.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index fcd59f2b..a47a32eb 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -186,7 +186,7 @@ arguments: ${ if(inputs.temporary_directory) return inputs.temporary_directory; - return $(runtime.tmpdir) + return '$(runtime.tmpdir)' } - position: 0 prefix: O= diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 41f263b1..8e05a8db 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -132,7 +132,7 @@ arguments: ${ if(inputs.temporary_directory) return inputs.temporary_directory; - return $(runtime.tmpdir) + return '$(runtime.tmpdir)' } - position: 0 prefix: O= From 4e9e4f1fc64984a933079d6af24bc0ca121c3284 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 26 Nov 2019 09:38:11 -0500 Subject: [PATCH 085/476] Making temporary_directory as Directory instead of string --- .../picard_add_or_replace_read_groups_1.96.cwl | 4 ++-- .../picard_fix_mate_information_1.96.cwl | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index a47a32eb..2f07be69 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -128,7 +128,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} - id: temporary_directory - type: string? + type: Directory? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam @@ -186,7 +186,7 @@ arguments: ${ if(inputs.temporary_directory) return inputs.temporary_directory; - return '$(runtime.tmpdir)' + return runtime.tmpdir } - position: 0 prefix: O= diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 8e05a8db..ab0b7c59 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_fix_mate_information_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_1_96 baseCommand: - java inputs: @@ -27,8 +28,7 @@ inputs: - ^.bai - id: output_file_name type: string? - doc: >- - Output file name (bam or sam). Not Required + doc: Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -72,7 +72,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} - id: temporary_directory - type: string? + type: Directory? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam @@ -117,11 +117,11 @@ arguments: } } - position: 0 - valueFrom: "-XX:-UseGCOverheadLimit" shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" shellQuote: false + valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar @@ -132,7 +132,7 @@ arguments: ${ if(inputs.temporary_directory) return inputs.temporary_directory; - return '$(runtime.tmpdir)' + return runtime.tmpdir } - position: 0 prefix: O= @@ -146,6 +146,7 @@ arguments: } } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement ramMin: 25000 coresMin: 2 From 5754f9d3b8970ca9f14c0886dd54f40e9264133f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 26 Nov 2019 23:23:04 -0500 Subject: [PATCH 086/476] Adding hints to avoid loding list --- .../picard_add_or_replace_read_groups_1.96.cwl | 4 ++++ .../picard_fix_mate_information_1.96.cwl | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 2f07be69..fdb8908f 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -5,6 +5,10 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' + cwltool: 'http://commonwl.org/cwltool#' +hints: + cwltool:LoadListingRequirement: + loadListing: no_listing id: picard_add_or_replace_read_groups_1_96 baseCommand: - java diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index ab0b7c59..3d7d0108 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -5,6 +5,10 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' + cwltool: 'http://commonwl.org/cwltool#' +hints: + cwltool:LoadListingRequirement: + loadListing: no_listing id: picard_fix_mate_information_1_96 baseCommand: - java From 13201da11dcff3e51e3370ed361a215bba366d89 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 27 Nov 2019 21:45:33 -0500 Subject: [PATCH 087/476] Adding InititalWorkDirOption --- .../picard_add_or_replace_read_groups_1.96.cwl | 9 +++++---- .../picard_fix_mate_information_1.96.cwl | 8 +++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index fdb8908f..356080f4 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -5,10 +5,6 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' - cwltool: 'http://commonwl.org/cwltool#' -hints: - cwltool:LoadListingRequirement: - loadListing: no_listing id: picard_add_or_replace_read_groups_1_96 baseCommand: - java @@ -209,6 +205,11 @@ requirements: - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + entry: "$({class: 'Directory', listing: []})" + entryname: $(inputs.temporary_directory) + writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 3d7d0108..d0cfd4b3 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -6,9 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' cwltool: 'http://commonwl.org/cwltool#' -hints: - cwltool:LoadListingRequirement: - loadListing: no_listing id: picard_fix_mate_information_1_96 baseCommand: - java @@ -157,6 +154,11 @@ requirements: - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + entry: "$({class: 'Directory', listing: []})" + entryname: $(inputs.temporary_directory) + writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From f536180dbfa98216d2553470158713d3f3cc0fd4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 29 Nov 2019 12:56:03 -0500 Subject: [PATCH 088/476] Adding InititalWorkDirOption --- .../picard_add_or_replace_read_groups_1.96.cwl | 5 ++--- .../picard_fix_mate_information_1.96.cwl | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 356080f4..fe563de8 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -207,9 +207,8 @@ requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - entry: "$({class: 'Directory', listing: []})" - entryname: $(inputs.temporary_directory) - writable: true + - entry: $(inputs.temporary_directory) + writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index d0cfd4b3..33e1a6d9 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -156,9 +156,8 @@ requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - entry: "$({class: 'Directory', listing: []})" - entryname: $(inputs.temporary_directory) - writable: true + - entry: $(inputs.temporary_directory) + writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From beb8ed2dd126a4e20fe819d47c15658833254c71 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 29 Nov 2019 15:04:06 -0500 Subject: [PATCH 089/476] Fixing InititalWorkDirOption --- .../picard_add_or_replace_read_groups_1.96.cwl | 3 ++- .../picard_fix_mate_information_1.96.cwl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index fe563de8..1916ca5b 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -207,7 +207,8 @@ requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - - entry: $(inputs.temporary_directory) + - entryname: $(inputs.temporary_directory) + entry: "$({class: 'Directory', listing: []})" writable: true 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 33e1a6d9..6a2f5c13 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -156,7 +156,8 @@ requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - - entry: $(inputs.temporary_directory) + - entryname: $(inputs.temporary_directory) + entry: "$({class: 'Directory', listing: []})" writable: true 'dct:contributor': - class: 'foaf:Organization' From 071021064ced38dd138aa93e161da1d672e631bd Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 29 Nov 2019 15:27:19 -0500 Subject: [PATCH 090/476] Fixing InititalWorkDirOption --- .../picard_add_or_replace_read_groups_1.96.cwl | 10 ++++++---- .../picard_fix_mate_information_1.96.cwl | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 1916ca5b..1b47c743 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -204,12 +204,14 @@ requirements: coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' - - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - - entryname: $(inputs.temporary_directory) - entry: "$({class: 'Directory', listing: []})" - writable: true + - entry: |- + ${ + return {"class": "Directory", "basename": inputs.temporary_directory, "listing": []} + } + writable: true + - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 6a2f5c13..139114ad 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -156,9 +156,11 @@ requirements: - class: InlineJavascriptRequirement - class: InitialWorkDirRequirement listing: - - entryname: $(inputs.temporary_directory) - entry: "$({class: 'Directory', listing: []})" - writable: true + - entry: |- + ${ + return {"class": "Directory", "basename": inputs.temporary_directory, "listing": []} + } + writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From f6ded4c46c3f17c280ee5db3bfc1b48ff44fc92b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 2 Dec 2019 16:31:57 -0500 Subject: [PATCH 091/476] Last attempt Fixing TMPDIR --- .../picard_add_or_replace_read_groups_1.96.cwl | 17 ++++++++--------- .../picard_fix_mate_information_1.96.cwl | 17 ++++++++--------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 1b47c743..b9e1db01 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -174,8 +174,14 @@ arguments: shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - shellQuote: false - valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' + prefix: -Djava.io.tmpdir= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar @@ -204,13 +210,6 @@ requirements: coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' - - class: InitialWorkDirRequirement - listing: - - entry: |- - ${ - return {"class": "Directory", "basename": inputs.temporary_directory, "listing": []} - } - writable: true - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 139114ad..fa4dcfe5 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -121,8 +121,14 @@ arguments: shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - shellQuote: false - valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' + prefix: -Djava.io.tmpdir= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar @@ -154,13 +160,6 @@ requirements: - class: DockerRequirement dockerPull: 'mskaccess/picard_1.96:0.6.2' - class: InlineJavascriptRequirement - - class: InitialWorkDirRequirement - listing: - - entry: |- - ${ - return {"class": "Directory", "basename": inputs.temporary_directory, "listing": []} - } - writable: true 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From f2ad1796f2bababd8b9fbe3915fde4b6d5c50eb4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 2 Dec 2019 16:50:20 -0500 Subject: [PATCH 092/476] Last attempt Fixing TMPDIR --- .../picard_add_or_replace_read_groups_1.96.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index b9e1db01..12f1a574 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -128,7 +128,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} - id: temporary_directory - type: Directory? + type: string? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index fa4dcfe5..7d61658a 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -73,7 +73,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} - id: temporary_directory - type: Directory? + type: string? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: bam From d62455f5c2076f7ce58c81d38f60a13bc5307ccf Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 28 Feb 2020 17:11:24 -0500 Subject: [PATCH 093/476] Adding Manta :heavy_check_mark: Adding Dockerfile :heavy_check_mark: Adding example_inputs.yaml :heavy_check_mark: Adding manta CWL :heavy_check_mark: Adding README.md --- manta_1.5.1/README.md | 69 ++++++++++++ manta_1.5.1/container/Dockerfile | 83 ++++++++++++++ manta_1.5.1/example_inputs.yaml | 10 ++ manta_1.5.1/manta_1.5.1.cwl | 178 +++++++++++++++++++++++++++++++ 4 files changed, 340 insertions(+) create mode 100644 manta_1.5.1/README.md create mode 100644 manta_1.5.1/container/Dockerfile create mode 100644 manta_1.5.1/example_inputs.yaml create mode 100644 manta_1.5.1/manta_1.5.1.cwl diff --git a/manta_1.5.1/README.md b/manta_1.5.1/README.md new file mode 100644 index 00000000..0e7abc83 --- /dev/null +++ b/manta_1.5.1/README.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running Manta + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu base image | 16.04 | - | +| manta | 1.5.1 | https://github.com/Illumina/manta/releases/download/ | +| samtools | 1.9 | https://github.com/samtools/samtools/releases/download/ | +| htslib | 1.9 | "https://github.com/samtools/htslib/releases/download/ | + +[![](https://images.microbadger.com/badges/image/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own image badge on microbadger.com")[![](https://images.microbadger.com/badges/version/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own license badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/manta_1.5.1/manta_1.51.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/manta.1.5.1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner manta_1.5.1.cwl --help +usage: manta_1.5.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --call_regions CALL_REGIONS + bgzip-compressed, tabix-indexed BED file specifiying + regions to which variant analysis will be restricted + --non_wgs toggles on settings for WES + --normal_bam NORMAL_BAM + Normal sample BAM or CRAM file. May be specified more + than once, multiple inputs will be treated as each BAM + file representing a different sample. [optional] (no + default) + --output_contigs if true, outputs assembled contig sequences in final + VCF files, in the INFO field CONTIG + --reference_fasta REFERENCE_FASTA + samtools-indexed reference fasta file [required] + --tumor_bam TUMOR_BAM + Tumor sample BAM or CRAM file. Only up to one tumor + bam file accepted. + --generateEvidenceBam + Generate a bam of supporting reads for all SVs +``` diff --git a/manta_1.5.1/container/Dockerfile b/manta_1.5.1/container/Dockerfile new file mode 100644 index 00000000..f1171f34 --- /dev/null +++ b/manta_1.5.1/container/Dockerfile @@ -0,0 +1,83 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG VCS_REF +ARG UBUNTU_VERSION=16.04 +ARG LICENSE="Apache-2.0" +ARG MANTA_VERSION=1.5.1 +ARG SAMTOOLS_VERSION=1.9 +ARG HTSLIB_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.label-schema.vcs-ref=${VCS_REF} \ + org.label-schema.vcs-url="https://github.com/msk-access/cwl-commandlinetools" \ + org.opencontainers.image.version.manta=${MANTA_VERSION} \ + org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} \ + org.opencontainers.image.version.samtools=${HTSLIB_VERSION} \ + org.opencontainers.image.version.ubuntu=${UBUNTU_VERSION} \ + org.opencontainers.image.source.manta="https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2" \ + org.opencontainers.image.source.samtools=" https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2" \ + org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2" + +LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ + manta version ${MANTA_VERSION}, \ + samtools version ${SAMTOOLS_VERSION} and \ + htslib version ${HTSLIB_VERSION}" + +################## INSTALL ########################## + + +#UBUNTU +RUN apt-get update \ + && apt install -y g++ libbz2-dev liblzma-dev make ncurses-dev wget zlib1g-dev libcurl4-openssl-dev wget python bzip2 \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#MANTA +RUN apt-get update \ + && wget https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && tar -jxvf manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64/share/demo \ + && ln -s $PWD/manta-${MANTA_VERSION}.centos6_x86_64 /usr/local/bin/manta \ + && cp $PWD/manta-${MANTA_VERSION}.centos6_x86_64/libexec/convertInversion.py /usr/local/bin \ + && chmod +x /usr/local/bin/* \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#SAMTOOLS +RUN apt-get update && apt-get install -y \ + && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd htslib-${HTSLIB_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && cd ../.. \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd samtools-${SAMTOOLS_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && rm -r /htslib-${HTSLIB_VERSION}/test \ + && cp -r /htslib-${HTSLIB_VERSION}/* /usr/local/bin \ + && rm -r /samtools-${SAMTOOLS_VERSION}/test \ + && cp -r /samtools-${SAMTOOLS_VERSION}/* /usr/local/bin \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* diff --git a/manta_1.5.1/example_inputs.yaml b/manta_1.5.1/example_inputs.yaml new file mode 100644 index 00000000..6b5ca0f7 --- /dev/null +++ b/manta_1.5.1/example_inputs.yaml @@ -0,0 +1,10 @@ +call_regions: null +generateEvidenceBam: null +memory_overhead: null +memory_per_job: null +non_wgs: null +normal_bam: null +number_of_threads: null +output_contigs: null +reference_fasta: null +tumor_bam: null diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl new file mode 100644 index 00000000..7445ad07 --- /dev/null +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -0,0 +1,178 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: manta_1_51 +baseCommand: + - /usr/bin/python + - /usr/local/bin/manta/bin/configManta.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: call_regions + type: File? + inputBinding: + position: -5 + prefix: '--callRegions' + doc: >- + bgzip-compressed, tabix-indexed BED file specifiying regions to which + variant analysis will be restricted + secondaryFiles: + - .tbi + - id: non_wgs + type: boolean? + inputBinding: + position: -6 + prefix: '--exome' + doc: toggles on settings for WES + - id: normal_bam + type: File? + inputBinding: + position: -2 + prefix: '--normalBam' + doc: >- + Normal sample BAM or CRAM file. May be specified more than once, multiple + inputs will be treated as each BAM file representing a different sample. + [optional] (no default) + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: output_contigs + type: boolean? + inputBinding: + position: -7 + prefix: '--outputContig' + doc: >- + if true, outputs assembled contig sequences in final VCF files, in the + INFO field CONTIG + - id: reference_fasta + type: File + inputBinding: + position: -4 + prefix: '--referenceFasta' + doc: 'samtools-indexed reference fasta file [required]' + secondaryFiles: + - .fai + - id: tumor_bam + type: File + inputBinding: + position: -3 + prefix: '--tumorBam' + doc: Tumor sample BAM or CRAM file. Only up to one tumor bam file accepted. + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: generateEvidenceBam + type: boolean? + inputBinding: + position: -8 + prefix: '--generateEvidenceBam' + separate: false + doc: Generate a bam of supporting reads for all SVs +outputs: + - id: all_candidates + type: File + outputBinding: + glob: results/variants/candidateSV.vcf.gz + secondaryFiles: + - .tbi + - id: diploid_variants + type: File? + outputBinding: + glob: results/variants/diploidSV.vcf.gz + secondaryFiles: + - .tbi + - id: small_candidates + type: File + outputBinding: + glob: results/variants/candidateSmallIndels.vcf.gz + secondaryFiles: + - .tbi + - id: somatic_variants + type: File? + outputBinding: + glob: results/variants/somaticSV.vcf.gz + secondaryFiles: + - .tbi + - id: tumor_only_variants + type: File? + outputBinding: + glob: results/variants/tumorSV.vcf.gz + secondaryFiles: + - .tbi + - id: evidence_bams + type: 'File[]?' + outputBinding: + glob: |- + ${ if(inputs.generateEvidenceBam){ + return 'results/evidence/*.bam' + } + } + secondaryFiles: + - .bai +doc: Setup and execute Manta 1.51 +label: manta_1.5.1 +arguments: + - position: -1 + prefix: '--runDir' + valueFrom: $(runtime.outdir) + - position: 0 + shellQuote: false + valueFrom: '&&' + - /usr/bin/python + - runWorkflow.py + - '-m' + - local + - position: 1 + prefix: '-j' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 24000 + coresMin: 12 + tmpdirMin: 10000 + - class: DockerRequirement + dockerPull: 'mskaccess/manta:0.0.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': manta + 'doap:revision': 1.5.1 From 36c4420e30bcf110ac654297acbd74d86e1c1490 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 2 Mar 2020 16:32:58 -0500 Subject: [PATCH 094/476] Adding Docs to GitBook --- docs/SUMMARY.md | 2 ++ manta_1.5.1/example_inputs.yaml | 12 ++++++------ manta_1.5.1/manta_1.5.1.cwl | 3 +-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 198b7b7d..ca421b79 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -12,6 +12,8 @@ * GATK * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + * Manta + * [Manta v1.5.1](../manta_1.5.1/README.md) * Marianas * [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) diff --git a/manta_1.5.1/example_inputs.yaml b/manta_1.5.1/example_inputs.yaml index 6b5ca0f7..bd82b9b3 100644 --- a/manta_1.5.1/example_inputs.yaml +++ b/manta_1.5.1/example_inputs.yaml @@ -1,10 +1,10 @@ call_regions: null -generateEvidenceBam: null +generateEvidenceBam: true memory_overhead: null memory_per_job: null -non_wgs: null -normal_bam: null +non_wgs: true +normal_bam: /path/to/normal_bam number_of_threads: null -output_contigs: null -reference_fasta: null -tumor_bam: null +output_contigs: true +reference_fasta: /path/to/reference_fasta +tumor_bam: /path/to/tumor_bam diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl index 7445ad07..1adb91f1 100644 --- a/manta_1.5.1/manta_1.5.1.cwl +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -4,8 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' -id: manta_1_51 +id: manta_1.5.1 baseCommand: - /usr/bin/python - /usr/local/bin/manta/bin/configManta.py From 77ecdbf940369a30a9c638d9478e21451bb53da3 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 12 Mar 2020 17:33:44 -0400 Subject: [PATCH 095/476] Added vcf2maf 1.6.17 docker image --- vcf2maf_1.6.17/container/Dockerfile | 94 +++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 vcf2maf_1.6.17/container/Dockerfile diff --git a/vcf2maf_1.6.17/container/Dockerfile b/vcf2maf_1.6.17/container/Dockerfile new file mode 100644 index 00000000..61af253c --- /dev/null +++ b/vcf2maf_1.6.17/container/Dockerfile @@ -0,0 +1,94 @@ +################## BASE IMAGE ###################### +FROM alpine:3.8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=1.0.0 +ARG VCF2MAF_VERSION=1.6.17 +ARG HTSLIB_VERSION=1.9 +ARG SAMTOOLS_VERSION=1.9 +ARG BCFTOOLS_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="86" +LABEL org.opencontainers.image.version.htslib=${HTSLIB_VERSION} +LABEL org.opencontainers.image.version.bcftools=${BCFTOOLS_VERSION} +LABEL org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} +LABEL org.opencontainers.image.version.perl="5.26.2-r1" +LABEL org.opencontainers.image.version.alpine="3.8" +LABEL org.opencontainers.image.source.vcf2maf="https://github.com/mskcc/vcf2maf/releases/tag/v${VCF2MAF_VERSION}" +LABEL org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/tag/${HTSLIB_VERSION}" +LABEL org.opencontainers.image.source.bcftools="https://github.com/samtools/bcftools/releases/tag/${BCFTOOLS_VERSION}" +LABEL org.opencontainers.image.source.samtools="https://github.com/samtools/samtools/releases/tag/${SAMTOOLS_VERSION}" + + +LABEL org.opencontainers.image.description="This container uses alpine3.8 as the base image to build vcf2maf version ${VCF2MAF_VERSION}" + +################## INSTALL ########################## + +ENV VCF2MAF_VERSION=${VCF2MAF_VERSION} +ENV VEP_VERSION=86 +ENV VEP_DATA=/var/cache +ENV VEP_PATH=/usr/bin/vep +ENV HTSLIB_VERSION=${HTSLIB_VERSION} +ENV SAMTOOLS_VERSION=${SAMTOOLS_VERSION} +ENV BCFTOOLS_VERSION=${BCFTOOLS_VERSION} + + +RUN apk add --update \ + # install all the build-related tools + && apk add ca-certificates gcc g++ make git curl curl-dev wget gzip perl perl-dev musl-dev libgcrypt-dev zlib-dev bzip2-dev xz-dev ncurses-dev rsync \ + # install system packages and Perl modules + && apk add expat-dev libressl-dev perl-net-ssleay mariadb-dev libxml2-dev perl-dbd-mysql perl-module-metadata perl-gd perl-db_file perl-archive-zip perl-cgi perl-dbi perl-encode perl-time-hires perl-file-copy-recursive perl-json \ + # install cpanminus + && curl -L https://cpanmin.us | perl - App::cpanminus \ + # install perl libraries that VEP will need + && cpanm --notest LWP LWP::Simple LWP::Protocol::https Archive::Extract Archive::Tar Archive::Zip \ + CGI DBI Encode version Time::HiRes File::Copy::Recursive Perl::OSType Module::Metadata \ + Sereal JSON Bio::Root::Version Set::IntervalTree PerlIO::gzip \ + # install htslib (for vep) + && cd /tmp && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd /tmp/htslib-${HTSLIB_VERSION} \ + && ./configure \ + && make && make install \ + # download/unzip vep + && cd /tmp && wget https://github.com/Ensembl/ensembl-tools/archive/release/${VEP_VERSION}.zip \ + && unzip ${VEP_VERSION} \ + # install vep + && cd /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor \ + && perl INSTALL.pl --AUTO a 2>&1 | tee install.log \ + && cd /tmp && mv /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor /usr/bin/vep \ + # download and unpack VEP's offline cache + && mkdir -p ${VEP_DATA} \ + && rsync -zvh rsync://ftp.ensembl.org/ensembl/pub/release-86/variation/VEP/homo_sapiens_vep_86_GRCh37.tar.gz ${VEP_DATA} \ + && tar -zxf ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz -C ${VEP_DATA} \ + && cd /usr/bin/vep \ + && perl convert_cache.pl --species homo_sapiens --version 86_GRCh37 --dir ${VEP_DATA} \ + && rm ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz \ + # install bcftools + && cd /tmp && wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && tar xvjf bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/bcftools-${BCFTOOLS_VERSION} \ + && make HTSDIR=/tmp/htslib-${HTSLIB_VERSION} && make install \ + # install samtools + && cd /tmp && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/samtools-${SAMTOOLS_VERSION} \ + && ./configure --with-htslib=/tmp/htslib-${HTSLIB_VERSION} \ + && make && make install \ + # install vcf2maf + && cd /tmp && wget -O vcf2maf-v${VCF2MAF_VERSION} https://github.com/mskcc/vcf2maf/archive/v${VCF2MAF_VERSION}.zip \ + && unzip vcf2maf-v${VCF2MAF_VERSION} \ + && mkdir -p /usr/bin/vcf2maf/ \ + && cp -r vcf2maf-${VCF2MAF_VERSION}/* /usr/bin/vcf2maf/ \ + # clean up + && rm -rf /var/cache/apk/* /tmp/* \ + && chmod +x /usr/bin/runscript.sh \ + && exec /run_test.sh From 35c025771cc2e0dca4a6c4cbcebae8d18d1b8d75 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 12 Mar 2020 17:52:19 -0400 Subject: [PATCH 096/476] Added bcftools 1.6 docker image --- bcftools_1.6/container/Dockerfile | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bcftools_1.6/container/Dockerfile diff --git a/bcftools_1.6/container/Dockerfile b/bcftools_1.6/container/Dockerfile new file mode 100644 index 00000000..42b00041 --- /dev/null +++ b/bcftools_1.6/container/Dockerfile @@ -0,0 +1,51 @@ +################## BASE IMAGE ###################### +FROM alpine:3.8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=1.0.0 +ARG HTSLIB_VERSION=1.6 +ARG BCFTOOLS_VERSION=1.6 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.htslib=${HTSLIB_VERSION} +LABEL org.opencontainers.image.version.bcftools=${BCFTOOLS_VERSION} +LABEL org.opencontainers.image.version.perl="5.26.2-r1" +LABEL org.opencontainers.image.version.alpine="3.8" +LABEL org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/tag/${HTSLIB_VERSION}" +LABEL org.opencontainers.image.source.bcftools="https://github.com/samtools/bcftools/releases/tag/${BCFTOOLS_VERSION}" + + +LABEL org.opencontainers.image.description="This container uses alpine3.8 as the base image to build bcftools version ${BCFTOOLS_VERSION}" + +################## INSTALL ########################## + +ENV HTSLIB_VERSION=${HTSLIB_VERSION} +ENV BCFTOOLS_VERSION=${BCFTOOLS_VERSION} + + +RUN apk add --update \ + # install all the build-related tools + && apk add ca-certificates gcc g++ make git curl curl-dev wget gzip perl perl-dev musl-dev libgcrypt-dev zlib-dev bzip2-dev xz-dev ncurses-dev rsync \ + # install system packages and Perl modules + && apk add expat-dev libressl-dev perl-net-ssleay mariadb-dev libxml2-dev perl-dbd-mysql perl-module-metadata perl-gd perl-db_file perl-archive-zip perl-cgi perl-dbi perl-encode perl-time-hires perl-file-copy-recursive perl-json \ + # install htslib (for vep) + && cd /tmp && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd /tmp/htslib-${HTSLIB_VERSION} \ + && ./configure \ + && make && make install \ + # install bcftools + && cd /tmp && wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && tar xvjf bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/bcftools-${BCFTOOLS_VERSION} \ + && make HTSDIR=/tmp/htslib-${HTSLIB_VERSION} && make install \ + # clean up + && rm -rf /var/cache/apk/* /tmp/* \ + && chmod +x /usr/bin/runscript.sh \ + && exec /run_test.sh From 01afe9cd206f82af66a0c554326bbb3e958cb130 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 13 Mar 2020 10:34:42 -0400 Subject: [PATCH 097/476] Update manta_1.5.1.cwl --- manta_1.5.1/manta_1.5.1.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl index 1adb91f1..eb26303e 100644 --- a/manta_1.5.1/manta_1.5.1.cwl +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -153,7 +153,6 @@ requirements: - class: ResourceRequirement ramMin: 24000 coresMin: 12 - tmpdirMin: 10000 - class: DockerRequirement dockerPull: 'mskaccess/manta:0.0.2' - class: InlineJavascriptRequirement From 48ffae6d1b2883bf45735699ef0bcab228baa91b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 13 Mar 2020 10:46:25 -0400 Subject: [PATCH 098/476] =?UTF-8?q?Bump=20version:=200.6.2=20=E2=86=92=200?= =?UTF-8?q?.6.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 3 +-- setup.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index a00a8263..687dc425 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.2' +__version__ = '0.6.3' diff --git a/setup.cfg b/setup.cfg index e4576a0a..c3ff1fe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.2 +current_version = 0.6.3 commit = True tag = True @@ -18,7 +18,6 @@ universal = 1 exclude = docs [aliases] -# Define setup.py command aliases here test = pytest [tool:pytest] diff --git a/setup.py b/setup.py index 3751d176..c835f032 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl-commandlinetools', - version='0.6.2', + version='0.6.3', zip_safe=False, ) From f2b85c6c1aec37d99312722619b2c17aac6787bc Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 13 Mar 2020 11:07:47 -0400 Subject: [PATCH 099/476] Making proper versison for Docker --- README.md | 4 +- __init__.py | 2 +- abra2_2.17/abra2_2.17.cwl | 3 +- manta_1.5.1/manta_1.5.1.cwl | 5 +- .../marianas_first_pass.cwl | 15 +- .../marianas_second_pass.cwl | 15 +- .../marianas_process_loop_umi.cwl | 15 +- .../marianas_separate_bams_1.8.1.cwl | 5 +- merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 2 +- mutect_1.1.5/mutect_1.1.5.cwl | 350 +++++++++--------- ...picard_add_or_replace_read_groups_1.96.cwl | 4 +- ...llect_alignment_summary_metrics_2.21.2.cwl | 5 +- .../picard_collectmultiplemetrics_2-8-1.cwl | 2 +- .../picard_fix_mate_information_1.96.cwl | 6 +- .../picard_mark_duplicates_1.96.cwl | 5 +- .../picard_mark_duplicates_2.8.1.cwl | 2 +- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 2 +- .../waltz_count_reads_3.1.1.cwl | 7 +- .../waltz_pileupmatrices_3.1.1.cwl | 8 +- 19 files changed, 206 insertions(+), 251 deletions(-) diff --git a/README.md b/README.md index 15a9cabe..2b7458f2 100644 --- a/README.md +++ b/README.md @@ -27,12 +27,14 @@ Clone the repository: git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ``` -**Follow the README in repsective tool folder for execution of the tool.** +**Follow the README in respective tool folder for execution of the tool.** ## Credits This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. +- CMO ACCESS Informatics Team - Cookiecutter: https://github.com/audreyr/cookiecutter - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage + diff --git a/__init__.py b/__init__.py index a00a8263..687dc425 100644 --- a/__init__.py +++ b/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.2' +__version__ = '0.6.3' diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index c2403cdc..12f7af16 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: abra2_2_17 baseCommand: - java @@ -183,7 +184,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/abra2:0.6.2' + dockerPull: 'mskaccess/abra2:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl index eb26303e..52cbc3ae 100644 --- a/manta_1.5.1/manta_1.5.1.cwl +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: manta_1.5.1 + sbg: 'https://www.sevenbridges.com/' +id: manta_1_5_1 baseCommand: - /usr/bin/python - /usr/local/bin/manta/bin/configManta.py @@ -154,7 +155,7 @@ requirements: ramMin: 24000 coresMin: 12 - class: DockerRequirement - dockerPull: 'mskaccess/manta:0.0.2' + dockerPull: 'mskaccess/manta:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index 5403a2c9..f83bf1d7 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_first_pass_cwl baseCommand: - java @@ -123,21 +124,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.2' + dockerPull: 'mskaccess/marianas:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 5b7f2c9b..491a34d4 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_second_pass_cwl baseCommand: - java @@ -127,21 +128,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.2' + dockerPull: 'mskaccess/marianas:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index b730adc4..f0cf85f0 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_process_loop_umi_cwl baseCommand: - java @@ -110,21 +111,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 8000 + inputs.memory_overhead -# } else { -# return 10000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.2' + dockerPull: 'mskaccess/marianas:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index eda65adc..cb140601 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: marianas_separate_bams_1.8.1 + sbg: 'https://www.sevenbridges.com/' +id: marianas_separate_bams_1_8_1 baseCommand: - java inputs: @@ -91,7 +92,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.2' + dockerPull: 'mskaccess/marianas:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index f81026b4..056d4696 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/merge_fastq:0.6.2' + dockerPull: 'mskaccess/merge_fastq:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index d15e5fcf..c8e16fe9 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -1,18 +1,13 @@ - class: CommandLineTool cwlVersion: v1.0 - $namespaces: - dct: http://purl.org/dc/terms/ - doap: http://usefulinc.com/ns/doap# - foaf: http://xmlns.com/foaf/0.1/ - sbg: https://www.sevenbridges.com/ - + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: mutect_v1_1_5 - baseCommand: - java - inputs: - id: memory_per_job type: int? @@ -23,56 +18,56 @@ inputs: - id: number_of_threads type: int? - id: BQSR - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --BQSR + prefix: '--BQSR' doc: >- The input covariates table file which enables on-the-fly base quality score recalibration - id: absolute_copy_number_data - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --absolute_copy_number_data + prefix: '--absolute_copy_number_data' doc: >- Absolute Copy Number Data, as defined by Absolute, to use in power calculations - id: arg_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --arg_file + prefix: '--arg_file' doc: Reads arguments from the specified file - id: bam_tumor_sample_name - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --bam_tumor_sample_name + prefix: '--bam_tumor_sample_name' doc: >- if the tumor bam contains multiple samples, only use read groups with SM equal to this value - id: baq - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --baq + prefix: '--baq' doc: >- Type of BAQ calculation to apply in the engine (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) - id: baqGapOpenPenalty - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --baqGapOpenPenalty + prefix: '--baqGapOpenPenalty' doc: >- BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets - id: clipping_bias_pvalue_threshold - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --clipping_bias_pvalue_threshold + prefix: '--clipping_bias_pvalue_threshold' doc: >- pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads @@ -80,41 +75,41 @@ inputs: type: File? inputBinding: position: 0 - prefix: --cosmic + prefix: '--cosmic' doc: VCF file of COSMIC sites secondaryFiles: - ^.vcf.idx - id: coverage_20_q20_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --coverage_20_q20_file + prefix: '--coverage_20_q20_file' doc: write out 20x of Q20 coverage in WIGGLE format to this file - id: coverage_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --coverage_file + prefix: '--coverage_file' doc: write out coverage in WIGGLE format to this file - id: dbsnp type: File? inputBinding: position: 0 - prefix: --dbsnp + prefix: '--dbsnp' doc: VCF file of DBSNP information secondaryFiles: - ^.vcf.idx - id: dbsnp_normal_lod - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --dbsnp_normal_lod + prefix: '--dbsnp_normal_lod' doc: LOD threshold for calling normal non-variant at dbsnp sites - id: defaultBaseQualities - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --defaultBaseQualities + prefix: '--defaultBaseQualities' doc: >- If reads are missing some or all base quality scores, this value will be used for all base quality scores @@ -123,7 +118,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --disableRandomization + prefix: '--disableRandomization' doc: >- Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result @@ -133,7 +128,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --disable_indel_quals + prefix: '--disable_indel_quals' doc: >- If true, disables printing of base insertion and base deletion tags (with -BQSR) @@ -141,14 +136,14 @@ inputs: type: int? inputBinding: position: 0 - prefix: --downsample_to_coverage + prefix: '--downsample_to_coverage' doc: Target coverage threshold for downsampling to coverage - default: NONE id: downsampling_type type: string? inputBinding: position: 0 - prefix: --downsampling_type + prefix: '--downsampling_type' doc: >- Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method @@ -159,7 +154,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --emit_original_quals + prefix: '--emit_original_quals' doc: >- If true, enables printing of the OQ tag with the original base qualities (with -BQSR) @@ -168,12 +163,12 @@ inputs: type: boolean inputBinding: position: 0 - prefix: --enable_extended_output + prefix: '--enable_extended_output' - id: excludeIntervals - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --excludeIntervals + prefix: '--excludeIntervals' doc: >- One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod @@ -183,7 +178,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --filter_mismatching_base_and_quals + prefix: '--filter_mismatching_base_and_quals' doc: >- if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. @@ -192,66 +187,66 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --force_alleles + prefix: '--force_alleles' doc: force output for all alleles at each site - default: false id: force_output type: boolean? inputBinding: position: 0 - prefix: --force_output + prefix: '--force_output' doc: force output for each site - id: fraction_contamination type: float? inputBinding: position: 0 - prefix: --fraction_contamination + prefix: '--fraction_contamination' doc: >- estimate of fraction (0-1) of physical contamination with other unrelated samples - id: fraction_mapq0_threshold - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --fraction_mapq0_threshold + prefix: '--fraction_mapq0_threshold' doc: >- threshold for determining if there is relatedness between the alt and ref allele read piles - id: gap_events_threshold - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --gap_events_threshold + prefix: '--gap_events_threshold' doc: >- how many gapped events (ins/del) are allowed in proximity to this candidate - id: gatk_key - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --gatk_key + prefix: '--gatk_key' doc: >- GATK Key file. Required if running with -et NO_ET. Please see -phone-home-and-how-does-it-affect-me#latest for details. - id: heavily_clipped_read_fraction - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --heavily_clipped_read_fraction + prefix: '--heavily_clipped_read_fraction' doc: >- if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling - id: initial_tumor_lod - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --initial_tumor_lod + prefix: '--initial_tumor_lod' doc: Initial LOD threshold for calling tumor variant - id: input_file_normal type: File? inputBinding: position: 0 - prefix: --input_file:normal + prefix: '--input_file:normal' doc: SAM or BAM file(s) secondaryFiles: - ^.bai @@ -259,31 +254,31 @@ inputs: type: File? inputBinding: position: 0 - prefix: --input_file:tumor + prefix: '--input_file:tumor' doc: SAM or BAM file(s) secondaryFiles: - ^.bai - id: interval_merging - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_merging + prefix: '--interval_merging' doc: >- Indicates the interval merging rule we should use for abutting intervals (ALL| OVERLAPPING_ONLY) - id: interval_padding - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_padding + prefix: '--interval_padding' doc: >- Indicates how many basepairs of padding to include around each of the intervals specified with the -L/ - id: interval_set_rule - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --interval_set_rule + prefix: '--interval_set_rule' doc: >- Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs (UNION| INTERSECTION) @@ -294,7 +289,7 @@ inputs: - 'null' inputBinding: position: 0 - prefix: --intervals + prefix: '--intervals' doc: >- One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file) @@ -303,79 +298,79 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --keep_program_records + prefix: '--keep_program_records' doc: >- Should we override the Walkers default and keep program records from the SAM header - id: log_to_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --log_to_file + prefix: '--log_to_file' doc: Set the logging location - id: logging_level - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --logging_level + prefix: '--logging_level' doc: >- Set the minimum level of logging, i.e. setting INFO gets you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging. - id: maxRuntime - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --maxRuntime + prefix: '--maxRuntime' doc: >- If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits - id: maxRuntimeUnits - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --maxRuntimeUnits + prefix: '--maxRuntimeUnits' doc: >- The TimeUnit for maxRuntime (NANOSECONDS| MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) - id: max_alt_allele_in_normal_fraction - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_allele_in_normal_fraction + prefix: '--max_alt_allele_in_normal_fraction' doc: threshold for maximum alternate allele fraction in normal - id: max_alt_alleles_in_normal_count - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_alleles_in_normal_count + prefix: '--max_alt_alleles_in_normal_count' doc: threshold for maximum alternate allele counts in normal - id: max_alt_alleles_in_normal_qscore_sum - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --max_alt_alleles_in_normal_qscore_sum + prefix: '--max_alt_alleles_in_normal_qscore_sum' doc: threshold for maximum alternate allele quality score sum in normal - id: min_qscore - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --min_qscore + prefix: '--min_qscore' doc: threshold for minimum base quality score - id: minimum_mutation_cell_fraction type: float? inputBinding: position: 0 - prefix: --minimum_mutation_cell_fraction + prefix: '--minimum_mutation_cell_fraction' doc: >- minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination - id: minimum_normal_allele_fraction - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --minimum_normal_allele_fraction + prefix: '--minimum_normal_allele_fraction' doc: >- minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor @@ -384,14 +379,14 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --monitorThreadEfficiency + prefix: '--monitorThreadEfficiency' doc: Enable GATK threading efficiency monitoring - default: false id: nonDeterministicRandomSeed type: boolean? inputBinding: position: 0 - prefix: --nonDeterministicRandomSeed + prefix: '--nonDeterministicRandomSeed' doc: >- Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run @@ -400,37 +395,37 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --noop - doc: used for debugging, basically exit as soon as we get the reads + prefix: '--noop' + doc: 'used for debugging, basically exit as soon as we get the reads' - id: normal_depth_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --normal_depth_file + prefix: '--normal_depth_file' doc: write out normal read depth in WIGGLE format to this file - id: normal_lod - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --normal_lod + prefix: '--normal_lod' doc: LOD threshold for calling normal non-germline - id: normal_sample_name type: string inputBinding: position: 0 - prefix: --normal_sample_name + prefix: '--normal_sample_name' doc: name to use for normal in output files - id: num_bam_file_handles - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --num_bam_file_handles + prefix: '--num_bam_file_handles' doc: The total number of BAM file handles to keep open simultaneously - id: num_cpu_threads_per_data_thread type: string? inputBinding: position: 0 - prefix: --num_cpu_threads_per_data_thread + prefix: '--num_cpu_threads_per_data_thread' doc: >- How many CPU threads should be allocated per data thread to running this analysis? @@ -438,113 +433,113 @@ inputs: type: string? inputBinding: position: 0 - prefix: --num_threads + prefix: '--num_threads' doc: How many data threads should be allocated to running this analysis. - default: false id: only_passing_calls type: boolean? inputBinding: position: 0 - prefix: --only_passing_calls + prefix: '--only_passing_calls' doc: only emit passing calls - - id: out - default: 'mutect_variants.txt' + - default: mutect_variants.txt + id: out type: string? inputBinding: position: 0 - prefix: --out + prefix: '--out' doc: Call-stats output - id: pedigree - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigree + prefix: '--pedigree' doc: Pedigree files for samples - id: pedigreeString - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigreeString + prefix: '--pedigreeString' doc: Pedigree string for samples - id: pedigreeValidationType - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --pedigreeValidationType + prefix: '--pedigreeValidationType' doc: >- How strict should we be in validating the pedigree information? (STRICT|SILENT) - id: performanceLog - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --performanceLog - doc: If provided, a GATK runtime performance log will be written to this file + prefix: '--performanceLog' + doc: 'If provided, a GATK runtime performance log will be written to this file' - id: phone_home - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --phone_home + prefix: '--phone_home' doc: >- What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see -phone-home-and-how-does-it-affect-me#latest for details. (NO_ET|STANDARD|STDOUT) - id: pir_mad_threshold - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --pir_mad_threshold + prefix: '--pir_mad_threshold' doc: threshold for clustered read position artifact MAD - id: pir_median_threshold - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --pir_median_threshold + prefix: '--pir_median_threshold' doc: threshold for clustered read position artifact median - id: power_constant_af - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --power_constant_af + prefix: '--power_constant_af' doc: Allelic fraction constant to use in power calculations - id: power_constant_qscore - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --power_constant_qscore + prefix: '--power_constant_qscore' doc: Phred scale quality score constant to use in power calculations - id: power_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --power_file + prefix: '--power_file' doc: write out power in WIGGLE format to this file - id: preserve_qscores_less_than - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --preserve_qscores_less_than + prefix: '--preserve_qscores_less_than' doc: >- Bases with quality scores less than this threshold wont be recalibrated (with -BQSR) - id: read_buffer_size - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --read_buffer_size + prefix: '--read_buffer_size' doc: Number of reads per SAM file to buffer in memory - id: read_filter type: string? inputBinding: position: 0 - prefix: --read_filter + prefix: '--read_filter' doc: Specify filtration criteria to apply to each read individually - id: read_group_black_list - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --read_group_black_list + prefix: '--read_group_black_list' doc: >- Filters out read groups matching - or a .txt file containing the filter strings one per line. @@ -552,7 +547,7 @@ inputs: type: File inputBinding: position: 0 - prefix: --reference_sequence + prefix: '--reference_sequence' secondaryFiles: - .fai - ^.dict @@ -561,7 +556,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --remove_program_records + prefix: '--remove_program_records' doc: >- Should we override the Walkers default and remove program records from the SAM header @@ -570,7 +565,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --required_maximum_alt_allele_mapping_quality_score + prefix: '--required_maximum_alt_allele_mapping_quality_score' doc: >- required minimum value for tumor alt allele @@ -580,50 +575,50 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --somatic_classification_normal_power_threshold + prefix: '--somatic_classification_normal_power_threshold' doc: >- Power threshold for normal to determine germline vs variant - id: tag - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --tag + prefix: '--tag' doc: >- Arbitrary tag string to identify this GATK run as part of a group of runs, for later analysis - id: tumor_depth_file - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_depth_file + prefix: '--tumor_depth_file' doc: write out tumor read depth in WIGGLE format to this file - id: tumor_f_pretest - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_f_pretest + prefix: '--tumor_f_pretest' doc: >- for computational efficiency, reject sites with allelic fraction below this threshold - id: tumor_lod - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --tumor_lod + prefix: '--tumor_lod' doc: LOD threshold for calling tumor variant - id: tumor_sample_name type: string inputBinding: position: 0 - prefix: --tumor_sample_name + prefix: '--tumor_sample_name' doc: name to use for tumor in output files - id: unsafe - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --unsafe + prefix: '--unsafe' doc: >- If set, enables unsafe operations - nothing will be checked at runtime. For expert users only who know what they are doing. We do not support @@ -635,24 +630,23 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --useOriginalQualities + prefix: '--useOriginalQualities' doc: >- If set, use the original base quality scores from the OQ tag when present instead of the standard scores - id: validation_strictness - type: string[]? + type: 'string[]?' inputBinding: position: 0 - prefix: --validation_strictness + prefix: '--validation_strictness' doc: How strict should we be with validation (STRICT|LENIENT|SILENT) - - id: vcf + - default: mutect_variants.vcf + id: vcf type: string? - default: 'mutect_variants.vcf' inputBinding: position: 0 - prefix: --vcf + prefix: '--vcf' doc: VCF output of mutation candidates - outputs: - id: mutect_output_txt type: File? @@ -662,7 +656,6 @@ outputs: type: File? outputBinding: glob: $(inputs.vcf) - arguments: - |- ${ @@ -714,42 +707,33 @@ arguments: return "-Xmx1G" } } - - -XX:-UseGCOverheadLimit - - -jar + - '-XX:-UseGCOverheadLimit' + - '-jar' - /usr/local/bin/muTect.jar - - --analysis_type + - '--analysis_type' - MuTect - requirements: - class: ResourceRequirement ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: mskaccess/mutect:0.6.2 + dockerPull: 'mskaccess/mutect:0.6.3' - class: InlineJavascriptRequirement - -dct:contributor: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:huy1@mskcc.org - foaf:name: Yu Hu - foaf:name: Memorial Sloan Kettering Cancer Center -dct:contributor: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:shahr2@mskcc.org - foaf:name: Ronak Shah - foaf:name: Memorial Sloan Kettering Cancer Center -dct:creator: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:huy1@mskcc.org - foaf:name: Yu Hu - foaf:name: Memorial Sloan Kettering Cancer Center -doap:release: - - class: doap:Version - doap:name: MuTect - doap:revision: 1.1.5 +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:huy1@mskcc.org' + 'foaf:name': Yu Hu + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': MuTect + 'doap:revision': 1.1.5 diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 12f1a574..19d39955 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -174,7 +174,7 @@ arguments: shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - prefix: -Djava.io.tmpdir= + prefix: '-Djava.io.tmpdir=' separate: false valueFrom: |- ${ @@ -209,7 +209,7 @@ requirements: ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.2' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl index bb3d0975..811adb2f 100644 --- a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl +++ b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_collect_alignment_summary_metrics_2.8.1 + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2_8_1 baseCommand: - java inputs: @@ -156,7 +157,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'mskaccess/picard:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index d5937222..e65ed07f 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -246,7 +246,7 @@ requirements: ramMin: 10000 coresMin: 8 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'mskaccess/picard:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 7d61658a..90f1bc49 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -1,11 +1,11 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: + cwltool: 'http://commonwl.org/cwltool#' dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' - cwltool: 'http://commonwl.org/cwltool#' id: picard_fix_mate_information_1_96 baseCommand: - java @@ -121,7 +121,7 @@ arguments: shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - prefix: -Djava.io.tmpdir= + prefix: '-Djava.io.tmpdir=' separate: false valueFrom: |- ${ @@ -158,7 +158,7 @@ requirements: ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.2' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index 5b9c5dec..e663bbc9 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_mark_duplicates_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_1_96 baseCommand: - java inputs: @@ -115,7 +116,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/picard_1.96:0.6.2' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index 8806ef3e..9cda15b7 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -154,7 +154,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.2' + dockerPull: 'mskaccess/picard:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 35eb150a..991f19ba 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -160,7 +160,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskaccess/trim_galore:0.6.2' + dockerPull: 'mskaccess/trim_galore:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index a1cc8c2a..58b2ee3a 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_count_reads baseCommand: - java @@ -75,10 +76,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.2' + dockerPull: 'mskaccess/waltz:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index c0892f1e..85829060 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_pileupmetrics baseCommand: - java @@ -87,10 +88,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.2' + dockerPull: 'mskaccess/waltz:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -110,4 +109,3 @@ requirements: - class: 'doap:Version' 'doap:name': waltz 'doap:revision': 3.1.1 - From b4f5daa0a384888d26ecc7345c71596ad2861352 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 13 Mar 2020 11:10:29 -0400 Subject: [PATCH 100/476] Updating README --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2b7458f2..56d21767 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,8 @@ git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git ## Credits -This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. - - CMO ACCESS Informatics Team -- Cookiecutter: https://github.com/audreyr/cookiecutter -- `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage +- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + - Cookiecutter: https://github.com/audreyr/cookiecutter + - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage From 6edec54ec428ea925bc169520e742b67c5d3c3c1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 13 Mar 2020 11:11:37 -0400 Subject: [PATCH 101/476] Updating README --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 56d21767..3c2b02cd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -# Initial page - -======= --- description: Central location for storing common workflow language based command line tools for building msk-access workflows --- From 7a9a7520581b29ca4688f066b3ec6b4ec5e40df4 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:15:56 -0400 Subject: [PATCH 102/476] Added README to vcf2maf --- vcf2maf_1.6.17/README.md | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 vcf2maf_1.6.17/README.md diff --git a/vcf2maf_1.6.17/README.md b/vcf2maf_1.6.17/README.md new file mode 100644 index 00000000..9111d806 --- /dev/null +++ b/vcf2maf_1.6.17/README.md @@ -0,0 +1,90 @@ +# CWL and Dockerfile for running Mutect v1.1.5 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| vcf2maf | 1.6.17 | https://github.com/mskcc/vcf2maf/archive/v1.6.17.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner vcf2maf_1.6.17.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --any_allele ANY_ALLELE + When reporting co-located variants, allow mismatched + variant alleles too + --buffer_size BUFFER_SIZE + Number of variants VEP loads at a time; Reduce this + for low memory systems + --cache_version CACHE_VERSION + Version of VEP and its cache to use + --custom_enst CUSTOM_ENST + List of custom ENST IDs that override canonical + selection + --maf_center MAF_CENTER + Variant calling center to report in MAF + --max_filter_ac MAX_FILTER_AC + Use tag common_variant if the filter-vcf reports a + subpopulation AC higher than this + --min_hom_vaf MIN_HOM_VAF + If GT undefined in VCF, minimum allele fraction to + call a variant homozygous + --ncbi_build NCBI_BUILD + Genome build of variants in input + --normal_id NORMAL_ID + Matched_Norm_Sample_Barcode to report in the MAF + --output_maf OUTPUT_MAF + Path to output MAF file + --ref_fasta REF_FASTA + Reference FASTA file + --remap_chain REMAP_CHAIN + Chain file to remap variants to a different assembly + before running VEP + --retain_fmt RETAIN_FMT + Comma-delimited names of FORMAT fields to retain as + extra columns in MAF [] + --retain_info RETAIN_INFO + Comma-delimited names of INFO fields to retain as + extra columns in MAF + --species SPECIES Species of variants in input + --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF + --vcf_normal_id VCF_NORMAL_ID + Matched normal ID used in VCFs genotype columns + --vcf_tumor_id VCF_TUMOR_ID + Tumor sample ID used in VCFs genotype columns + --vep_data VEP_DATA VEPs base cache/plugin directory + --vep_forks VEP_FORKS + Number of forked processes to use when running VEP + --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep + binary + + +``` From 3bfd55214c78d90d26c19dfa4aa543d8784b988c Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:16:19 -0400 Subject: [PATCH 103/476] Added example inputs for vcf2maf --- vcf2maf_1.6.17/example_inputs.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 vcf2maf_1.6.17/example_inputs.yaml diff --git a/vcf2maf_1.6.17/example_inputs.yaml b/vcf2maf_1.6.17/example_inputs.yaml new file mode 100644 index 00000000..3c32cc16 --- /dev/null +++ b/vcf2maf_1.6.17/example_inputs.yaml @@ -0,0 +1,20 @@ +input_vcf: + class: File + path: /path/to/vcf_file +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: genome_string +filter_vcf: + class: File + path: /path/to/filter/vcf +vep_data: vep_cache_path_str (/var/cache in container) +ref_fasta: + class: File + path: /path/to/ref/fasta +vep_path: vep_path (/usr/bin/vep in container) +custom_enst: custom_enst_str +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: output_maf_str \ No newline at end of file From fa69a925415cdd9b0edf4b95e6147a30379cb164 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:16:56 -0400 Subject: [PATCH 104/476] Added vcf2maf cwl --- vcf2maf_1.6.17/vcf2maf_1.6.17.cwl | 208 ++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 vcf2maf_1.6.17/vcf2maf_1.6.17.cwl diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl new file mode 100644 index 00000000..da1fdfc6 --- /dev/null +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -0,0 +1,208 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.17 + +baseCommand: + - perl + - /usr/bin/vcf2maf/vcf2maf.pl + +inputs: + cache_version: + type: + - 'null' + - string + default: '86' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', File] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + max_filter_ac: + type: + - 'null' + - int + default: 10 + doc: Use tag common_variant if the filter-vcf reports a subpopulation AC higher + than this + inputBinding: + prefix: --max-filter-ac + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + filter_vcf: + type: + - 'null' + - string + - File + doc: The non-TCGA VCF from exac.broadinstitute.org + inputBinding: + prefix: --filter-vcf + secondaryFiles: + - .tbi + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + output: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: mskaccess/vcf2maf:1.6.17 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.0.0 From 83561ff7b9849937865fd1ebda0d53e20e133d5b Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 21 May 2020 18:32:22 -0400 Subject: [PATCH 105/476] Updated vcf2maf cwl version --- vcf2maf_1.6.17/vcf2maf_1.6.17.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl index da1fdfc6..b32be233 100644 --- a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -205,4 +205,4 @@ dct:creator: doap:release: - class: doap:Version doap:name: vcf2maf - doap:revision: 1.0.0 + doap:revision: 1.6.17 From 49044688cf499b7e748c6520ae2184f49316d4d3 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 21 May 2020 18:40:34 -0400 Subject: [PATCH 106/476] Fixed README title --- vcf2maf_1.6.17/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf2maf_1.6.17/README.md b/vcf2maf_1.6.17/README.md index 9111d806..491ef1c3 100644 --- a/vcf2maf_1.6.17/README.md +++ b/vcf2maf_1.6.17/README.md @@ -1,4 +1,4 @@ -# CWL and Dockerfile for running Mutect v1.1.5 +# CWL and Dockerfile for running vcf2maf v1.6.17 ## Version of tools in docker image (/container/Dockerfile) From 234c9a29aadd900a140fab240c944d67cd69050a Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:44:20 -0400 Subject: [PATCH 107/476] Added bcftools README --- bcftools_1.6/README.md | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 bcftools_1.6/README.md diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md new file mode 100644 index 00000000..ecf5b687 --- /dev/null +++ b/bcftools_1.6/README.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running bcftools v1.6 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir bcftools_toil_log +> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --compact_PS Do not output PS tag at each site, only at the start + of a new phase set block. + --ligate Ligate phased VCFs by matching phase at overlapping + haplotypes + --list LIST Read the list of files from a file. + --min_PQ MIN_PQ Break phase set if phasing quality is lower than + [30] + --naive Concatenate BCF files without recompression + (dangerous, use with caution) + --no_version do not append version and command line to the header + --output OUTPUT Write output to a file [standard output] + --output_type OUTPUT_TYPE + b - compressed BCF, u - uncompressed BCF, z + - compressed VCF, v - uncompressed VCF [v] + --regions REGIONS Restrict to comma-separated list of regions + --regions_file REGIONS_FILE + Restrict to regions listed in a file + --remove_duplicates Alias for -d none + --rm_dups RM_DUPS Output duplicate records present in multiple + files only once - + --threads THREADS Number of extra output compression threads [0] + --vcf_files_csi VCF_FILES_CSI + Array of vcf files to be concatenated into one vcf + --vcf_files_tbi VCF_FILES_TBI + Array of vcf files to be concatenated into one vcf + +``` From 3dc485ae3057feb726720a4d34e5965d919a8150 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:44:36 -0400 Subject: [PATCH 108/476] Added bcftools cwl --- bcftools_1.6/bcftools_concat_1.6.cwl | 172 +++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 bcftools_1.6/bcftools_concat_1.6.cwl diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl new file mode 100644 index 00000000..23dcb30c --- /dev/null +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -0,0 +1,172 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: bcftools_concat_v1_6 + +baseCommand: + - /usr/bin/bcftools + - concat + +doc: | + concatenate VCF/BCF files from the same set of samples + +inputs: + + threads: + type: ["null", string] + doc: Number of extra output compression threads [0] + inputBinding: + prefix: --threads + + compact_PS: + type: ["null", boolean] + default: false + doc: Do not output PS tag at each site, only at the start of a new phase set block. + inputBinding: + prefix: --compact-PS + + remove_duplicates: + type: ["null", boolean] + default: false + doc: Alias for -d none + inputBinding: + prefix: --remove-duplicates + + ligate: + type: ["null", boolean] + default: false + doc: Ligate phased VCFs by matching phase at overlapping haplotypes + inputBinding: + prefix: --ligate + + output_type: + type: ["null", string] + doc: b - compressed BCF, u - uncompressed BCF, z - compressed VCF, v - uncompressed VCF [v] + inputBinding: + prefix: --output-type + + no_version: + type: ["null", boolean] + default: false + doc: do not append version and command line to the header + inputBinding: + prefix: --no-version + + naive: + type: ["null", boolean] + default: false + doc: Concatenate BCF files without recompression (dangerous, use with caution) + inputBinding: + prefix: --naive + + allow_overlaps: + type: ["null", boolean] + default: false + doc: First coordinate of the next file can precede last record of the current file. + inputBinding: + prefix: --allow-overlaps + + min_PQ: + type: ["null", string] + doc: Break phase set if phasing quality is lower than [30] + inputBinding: + prefix: --min-PQ + + regions_file: + type: ["null", string] + doc: Restrict to regions listed in a file + inputBinding: + prefix: --regions-file + + regions: + type: ["null", string] + doc: Restrict to comma-separated list of regions + inputBinding: + prefix: --regions + + rm_dups: + type: ["null", string] + doc: Output duplicate records present in multiple files only once - + inputBinding: + prefix: --rm-dups + + output: + type: string + doc: Write output to a file [standard output] + default: "bcftools_concat.vcf" + inputBinding: + prefix: --output + + list: + type: ['null', string] + doc: Read the list of files from a file. + inputBinding: + prefix: --file-list + + vcf_files_tbi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - .tbi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + + vcf_files_csi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - ^.bcf.csi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + +outputs: + concat_vcf_output_file: + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) + return inputs.output; + return null; + } + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 1 + DockerRequirement: + dockerPull: mskaccess/bcftools:1.6 + + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: bcftools + doap:revision: 1.6 From 172f5278fa48e015f37670740c5d0131c0be07d1 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 21 May 2020 18:44:49 -0400 Subject: [PATCH 109/476] Added bcftools example inputs --- bcftools_1.6/example_inputs.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 bcftools_1.6/example_inputs.yaml diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml new file mode 100644 index 00000000..33e20ac1 --- /dev/null +++ b/bcftools_1.6/example_inputs.yaml @@ -0,0 +1,9 @@ +vcf_files_tbi: + class: File + path: /path/to/vcf/and/tbi/files +tumor_sample_name: tumor_sample_name +normal_sample_name: normal_sample_name +allow_overlaps: allow_overlaps_boolean +rm_dups: rm_dups_str +output_type: output_type_str +output: output_file_name \ No newline at end of file From e9933cc97ad2c962bd6487c0e72cf309b4c13bde Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 19 Jun 2020 11:10:02 -0400 Subject: [PATCH 110/476] Update example_inputs.yaml Adding new line --- bcftools_1.6/example_inputs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml index 33e20ac1..0512753d 100644 --- a/bcftools_1.6/example_inputs.yaml +++ b/bcftools_1.6/example_inputs.yaml @@ -6,4 +6,4 @@ normal_sample_name: normal_sample_name allow_overlaps: allow_overlaps_boolean rm_dups: rm_dups_str output_type: output_type_str -output: output_file_name \ No newline at end of file +output: output_file_name From 2a4ee94e8be2770f2a3c4e6bade41e1580fd2c47 Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 19 Jun 2020 13:02:34 -0400 Subject: [PATCH 111/476] validate all CWL files through travis with cwltool --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8de575c0..c0504869 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,13 @@ python: - 2.7 # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: pip install -U tox-travis +install: + - pip install -U tox-travis + - pip install -r requirements.txt # Command to run tests, e.g. python setup.py test -script: tox +script: + - find . -name '*.cwl' | xargs -n 1 cwltool --validate # Assuming you have installed the travis-ci CLI tool, after you # create the Github repo and add it to Travis, run the From e5a4440106ea82cd114a251cb762223ad0bb1392 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 19 Jun 2020 14:45:44 -0400 Subject: [PATCH 112/476] fix samtools sort cwl validation --- samtools_sort_1.3.1/samtools_sort_1.3.1.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl index eb0eaeb9..975bf18b 100644 --- a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl +++ b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl @@ -1,6 +1,5 @@ class: CommandLineTool cwlVersion: v1.0 -$namespaces: baseCommand: - samtools - sort From baf9a087f34c174d8ccf49712f406182ad2492be Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 9 Jul 2020 19:00:23 -0400 Subject: [PATCH 113/476] Added memory inputs --- bcftools_1.6/bcftools_concat_1.6.cwl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl index 23dcb30c..d2e1b680 100644 --- a/bcftools_1.6/bcftools_concat_1.6.cwl +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -19,6 +19,14 @@ doc: | inputs: + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + threads: type: ["null", string] doc: Number of extra output compression threads [0] From c689a09c1b7b8f941a7918cde91807adb13067fd Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Date: Thu, 9 Jul 2020 19:03:06 -0400 Subject: [PATCH 114/476] Added memory inputs --- vcf2maf_1.6.17/vcf2maf_1.6.17.cwl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl index b32be233..5aa97470 100644 --- a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -15,6 +15,15 @@ baseCommand: - /usr/bin/vcf2maf/vcf2maf.pl inputs: + + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + cache_version: type: - 'null' From 006dd5f965803e64f3334c29617814695404690a Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 10 Jul 2020 16:32:22 -0400 Subject: [PATCH 115/476] bwa 0.7.17 --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 214 ++++++++++++++++++++++++++++ bwa_mem_0.7.17/container/Dockerfile | 23 +++ 2 files changed, 237 insertions(+) create mode 100644 bwa_mem_0.7.17/bwa_mem_0.7.17.cwl create mode 100644 bwa_mem_0.7.17/container/Dockerfile diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl new file mode 100644 index 00000000..c7e4f907 --- /dev/null +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -0,0 +1,214 @@ +class: CommandLineTool +cwlVersion: v1.0 +baseCommand: + - bwa + - mem +inputs: + - id: reads + type: 'File[]' + inputBinding: + position: 3 + - id: reference + type: File + inputBinding: + position: 2 + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - id: sample_id + type: string + - id: lane_id + type: string + - id: A + type: int? + inputBinding: + position: 0 + prefix: '-A' + - id: B + type: int? + inputBinding: + position: 0 + prefix: '-B' + - id: C + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + - id: E + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-E' + itemSeparator: ',' + - id: L + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-L' + itemSeparator: ',' + - id: M + type: boolean? + inputBinding: + position: 0 + prefix: '-M' + - id: O + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-O' + itemSeparator: ',' + - id: P + type: boolean? + inputBinding: + position: 0 + prefix: '-P' + - id: S + type: boolean? + inputBinding: + position: 0 + prefix: '-S' + - id: T + type: int? + inputBinding: + position: 0 + prefix: '-T' + - id: U + type: int? + inputBinding: + position: 0 + prefix: '-U' + - id: a + type: boolean? + inputBinding: + position: 0 + prefix: '-a' + - id: c + type: int? + inputBinding: + position: 0 + prefix: '-c' + - id: d + type: int? + inputBinding: + position: 0 + prefix: '-d' + - id: k + type: int? + inputBinding: + position: 0 + prefix: '-k' + - id: K + type: int? + inputBinding: + position: 0 + prefix: '-K' + - id: output + type: string? + - id: p + type: boolean? + inputBinding: + position: 0 + prefix: '-p' + - id: r + type: float? + inputBinding: + position: 0 + prefix: '-r' + - id: v + type: int? + inputBinding: + position: 0 + prefix: '-v' + - id: w + type: int? + inputBinding: + position: 0 + prefix: '-w' + - id: 'y' + type: int? + inputBinding: + position: 0 + prefix: '-y' + - id: D + type: float? + inputBinding: + position: 0 + prefix: '-D' + - id: W + type: int? + inputBinding: + position: 0 + prefix: '-W' + - id: m + type: int? + inputBinding: + position: 0 + prefix: '-m' + - id: e + type: boolean? + inputBinding: + position: 0 + prefix: '-e' + - id: x + type: string? + inputBinding: + position: 0 + prefix: '-x' + - id: H + type: + - File? + - string? + inputBinding: + position: 0 + prefix: '-H' + - id: j + type: File? + inputBinding: + position: 0 + prefix: '-j' + - id: h + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-h' + itemSeparator: ',' + - id: V + type: boolean? + inputBinding: + position: 0 + prefix: '-V' + - id: 'Y' + type: boolean? + inputBinding: + position: 0 + prefix: '-Y' + - id: I + type: string? + inputBinding: + position: 0 + prefix: '-M' +outputs: + - id: output_sam + type: File + outputBinding: + glob: '$(inputs.reads[0].basename.replace(''fastq.gz'', ''sam''))' +arguments: + - position: 0 + prefix: '-R' + valueFrom: >- + @RG\\tID:$(inputs.lane_id)\\tSM:$(inputs.sample_id)\\tLB:$(inputs.sample_id)\\tPL:Illumina\\tPU:$(inputs.lane_id) + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'mskcc/bwa_mem:0.7.17' + - class: InlineJavascriptRequirement +stdout: '$(inputs.reads[0].basename.replace(''fastq.gz'', ''sam''))' diff --git a/bwa_mem_0.7.17/container/Dockerfile b/bwa_mem_0.7.17/container/Dockerfile new file mode 100644 index 00000000..49cc6831 --- /dev/null +++ b/bwa_mem_0.7.17/container/Dockerfile @@ -0,0 +1,23 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Ian Johnson (johnsoni@mskcc.org)" \ + version.image="0.1.0" \ + version.bwa="0.7.17" \ + version.ubuntu="16.04" \ + source.bwa="https://github.com/lh3/bwa/releases/tag/v0.7.17" + +ENV BWA_VERSION 0.7.17 + +RUN apt-get -y update \ + # install build tools and dependencies + && apt-get -y install build-essential zlib1g-dev wget unzip \ + # download and unzip bwa + && cd /tmp && wget "https://github.com/lh3/bwa/archive/v${BWA_VERSION}.zip" \ + && unzip "v${BWA_VERSION}.zip" \ + # build + && cd "/tmp/bwa-${BWA_VERSION}" \ + && make \ + # move binaries to /usr/bin + && mv "/tmp/bwa-${BWA_VERSION}/bwa" /usr/bin \ + # clean up + && rm -rf /tmp/* From 83c1c38be21adb6a888e5de5e5f77bd0bce6585f Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 10 Jul 2020 17:00:08 -0400 Subject: [PATCH 116/476] picard 2.9.0 --- .../container/Dockerfile | 44 +++++ .../picard_fix_mate_information_2.9.0.cwl | 180 ++++++++++++++++++ .../picard_mark_duplicates_2.8.1.cwl | 176 +++++++++++++++++ 3 files changed, 400 insertions(+) create mode 100644 picard_fix_mate_information_2.9.0/container/Dockerfile create mode 100644 picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl create mode 100644 picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl diff --git a/picard_fix_mate_information_2.9.0/container/Dockerfile b/picard_fix_mate_information_2.9.0/container/Dockerfile new file mode 100644 index 00000000..643fa70d --- /dev/null +++ b/picard_fix_mate_information_2.9.0/container/Dockerfile @@ -0,0 +1,44 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG JAVA_VERSION=8 +ARG LICENSE="Apache-2.0" +ARG PICARD_VERSION=2.9.0 +ARG R_VERSION="3.3.3" + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.picard=${PICARD_VERSION} \ + org.opencontainers.image.version.R=${R_VERSION} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.source.picard="https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" \ + org.opencontainers.image.source.R="r-base" + +LABEL org.opencontainers.image.description="This container uses openjdk ${JAVA_VERSION} as the base image to build \ + picard version ${PICARD_VERSION}, \ + R version ${R_VERSION}" + +# Install ant, git for building +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + git \ + unzip \ + wget \ + r-base && \ + apt-get clean autoclean && \ + apt-get autoremove -y + +WORKDIR /usr/src + +RUN wget "https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" && \ + cp -s /usr/src/picard.jar /usr/local/bin/ diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl new file mode 100644 index 00000000..3bee0f79 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -0,0 +1,180 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + cwltool: 'http://commonwl.org/cwltool#' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_2.9.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl new file mode 100644 index 00000000..e95e6d07 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl @@ -0,0 +1,176 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + valueFrom: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + doc: File to write duplication metrics to Required. + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - ^.metrics +label: picard_mark_duplicates_2.9.0 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 From 29e3c76fbf39f6b3d7079d944d70922ff163e424 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 10 Jul 2020 17:07:25 -0400 Subject: [PATCH 117/476] fix path to jar in container, + fix mskaccess docker tag --- .../picard_fix_mate_information_2.9.0.cwl | 4 +++- picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl index 3bee0f79..bac18f66 100644 --- a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -131,7 +131,9 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/FixMateInformation.jar + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: MarkDuplicates - position: 0 prefix: TMP_DIR= separate: false diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl index e95e6d07..f65e1e6b 100644 --- a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl @@ -154,7 +154,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.3' + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From aefafe64dfd9dfad36406a3e71073d6d5230fd81 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 10 Jul 2020 17:14:10 -0400 Subject: [PATCH 118/476] fix docker tag --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index c7e4f907..a6810b60 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -209,6 +209,6 @@ requirements: ramMin: 32000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskcc/bwa_mem:0.7.17' + dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' - class: InlineJavascriptRequirement stdout: '$(inputs.reads[0].basename.replace(''fastq.gz'', ''sam''))' From a3a039281bc8c50a99debbaf975d0e4ccff9cfc9 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Mon, 13 Jul 2020 13:13:36 -0400 Subject: [PATCH 119/476] rename to correct file name --- ...mark_duplicates_2.8.1.cwl => picard_mark_duplicates_2.9.0.cwl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename picard_mark_duplicates_2.9.0/{picard_mark_duplicates_2.8.1.cwl => picard_mark_duplicates_2.9.0.cwl} (100%) diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl similarity index 100% rename from picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.8.1.cwl rename to picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl From 8b94d6a614fe613a1a231528c1e40fd27e4646de Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Mon, 13 Jul 2020 14:47:56 -0400 Subject: [PATCH 120/476] add read name regex param --- .../picard_mark_duplicates_2.9.0.cwl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl index f65e1e6b..9c4d76bf 100644 --- a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl @@ -114,6 +114,12 @@ inputs: is moreappropriate. For other platforms and models, users should experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: READ_NAME_REGEX= + separate: false outputs: - id: bam type: File From 2353e8e572b95d31f3ee5af0ef0daff2d0e5fec7 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Mon, 13 Jul 2020 19:07:21 -0400 Subject: [PATCH 121/476] fix BWA read group param, fix picard FixMateInformation command --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 20 +++++++++++++------ .../picard_fix_mate_information_2.9.0.cwl | 3 ++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index a6810b60..68be62a7 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -1,5 +1,7 @@ class: CommandLineTool cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' baseCommand: - bwa - mem @@ -195,12 +197,13 @@ outputs: - id: output_sam type: File outputBinding: - glob: '$(inputs.reads[0].basename.replace(''fastq.gz'', ''sam''))' + glob: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } arguments: - - position: 0 - prefix: '-R' - valueFrom: >- - @RG\\tID:$(inputs.lane_id)\\tSM:$(inputs.sample_id)\\tLB:$(inputs.sample_id)\\tPL:Illumina\\tPU:$(inputs.lane_id) - position: 0 prefix: '-t' valueFrom: $(runtime.cores) @@ -211,4 +214,9 @@ requirements: - class: DockerRequirement dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' - class: InlineJavascriptRequirement -stdout: '$(inputs.reads[0].basename.replace(''fastq.gz'', ''sam''))' +stdout: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl index bac18f66..1343e2b9 100644 --- a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -133,7 +133,8 @@ arguments: prefix: '-jar' valueFrom: /usr/local/bin/picard.jar - position: 0 - valueFrom: MarkDuplicates + prefix: '' + valueFrom: FixMateInformation - position: 0 prefix: TMP_DIR= separate: false From 13411616f011549c2ffd19fe9cf2bad06a4b578c Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Mon, 13 Jul 2020 19:36:11 -0400 Subject: [PATCH 122/476] did not mean to make lane_id required --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 68be62a7..87681d96 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -24,7 +24,7 @@ inputs: - id: sample_id type: string - id: lane_id - type: string + type: string? - id: A type: int? inputBinding: From 5ebb1b6d62ca140eaaface28315fe69a34c37e3c Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Tue, 14 Jul 2020 16:14:23 -0400 Subject: [PATCH 123/476] remove empty prefix created by rabix that breaks on cwltool --- .../picard_fix_mate_information_2.9.0.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl index 1343e2b9..b0123ad1 100644 --- a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -133,7 +133,6 @@ arguments: prefix: '-jar' valueFrom: /usr/local/bin/picard.jar - position: 0 - prefix: '' valueFrom: FixMateInformation - position: 0 prefix: TMP_DIR= From 894ac315356fb89273763bac53a013eb56898cee Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Wed, 15 Jul 2020 12:10:02 -0400 Subject: [PATCH 124/476] add read group param for bwa mem, and use threads as input instead of argument --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 87681d96..e5e6a58a 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -193,6 +193,17 @@ inputs: inputBinding: position: 0 prefix: '-M' + - id: t + type: int? + inputBinding: + position: 0 + prefix: '-t' + - id: R + type: string? + inputBinding: + position: 0 + prefix: '-R' + doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' outputs: - id: output_sam type: File @@ -203,10 +214,6 @@ outputs: return inputs.output; return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); } -arguments: - - position: 0 - prefix: '-t' - valueFrom: $(runtime.cores) requirements: - class: ResourceRequirement ramMin: 32000 From 272db7a7e960e7914ac1f25e7bfe92fcf5fe913d Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Wed, 15 Jul 2020 12:39:39 -0400 Subject: [PATCH 125/476] add docs for new bwa params --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index e5e6a58a..c5436ae0 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -78,6 +78,9 @@ inputs: inputBinding: position: 0 prefix: '-T' + doc: >- + Don’t output alignment with score lower than INT. This option only affects + output. - id: U type: int? inputBinding: @@ -188,6 +191,9 @@ inputs: inputBinding: position: 0 prefix: '-Y' + doc: >- + Force soft-clipping rather than default hard-clipping of supplementary + alignments - id: I type: string? inputBinding: @@ -198,6 +204,7 @@ inputs: inputBinding: position: 0 prefix: '-t' + doc: Number of threads - id: R type: string? inputBinding: From 65e687319d3a407d5975ba6bb8712b020b9a56d5 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 17 Jul 2020 13:31:49 -0400 Subject: [PATCH 126/476] add tool --- .../README.md | 103 +++++++++ .../example_inputs.yaml | 21 ++ ...ard_collect_insert_size_metrics_2.21.2.cwl | 216 ++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 picard_collect_insert_size_metrics_2.21.2/README.md create mode 100644 picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml create mode 100644 picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl diff --git a/picard_collect_insert_size_metrics_2.21.2/README.md b/picard_collect_insert_size_metrics_2.21.2/README.md new file mode 100644 index 00000000..6853906e --- /dev/null +++ b/picard_collect_insert_size_metrics_2.21.2/README.md @@ -0,0 +1,103 @@ +# CWL for running Picard - CollectInsertSizeMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collect_insert_size_metrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE + [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] + [--min_histogram_width MIN_HISTOGRAM_WIDTH] [--minimum_pct MINIMUM_PCT] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] + [--assume_sorted] [--stop_after STOP_AFTER] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --histogram_file HISTOGRAM_FILE + File to write insert size Histogram chart to. + Required. + --deviations DEVIATIONS + Generate mean, sd and plots by trimming the data down + to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This + is done because insert size data typically includes + enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading + regarding the real distribution. Default value: 10.0. + This option can be set to 'null' to clear the default + value. + --histogram_width HISTOGRAM_WIDTH + Explicitly sets the Histogram width, overriding + automatic truncation of Histogram tail. Also, when + calculating mean and standard deviation, only bins <= + Histogram_WIDTH will be included. Default value: null. + --min_histogram_width MIN_HISTOGRAM_WIDTH + Minimum width of histogram plots. In the case when the + histogram would otherwise betruncated to a shorter + range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a + minimum range. Default value: null. + --minimum_pct MINIMUM_PCT + When generating the Histogram, discard any data + categories (out of FR, TANDEM, RF) that have fewer + than this percentage of overall reads. (Range: 0 to + 1). Default value: 0.05. This option can be set to + 'null' to clear the default value. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --include_duplicates If true, also include reads marked as duplicates in + the insert size histogram. Default value: false. This + option can be set to 'null' to clear the default + value. Possible values: {true, false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` diff --git a/picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml b/picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..0b91a8c2 --- /dev/null +++ b/picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml @@ -0,0 +1,21 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +output_file_name: "insert_size_metrics.txt" +histogram_file: "histogram.pdf" +deviations: 10.0 +histogram_width: 800 +min_histogram_width: 100 +minimum_pct: 0 +metrics_acciumulation_level: null +include_duplicates: null +validation_stringency: null +assume_sorted: null +stop_after: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl b/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl new file mode 100644 index 00000000..08cece21 --- /dev/null +++ b/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl @@ -0,0 +1,216 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_insert_size_metrics_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: O= + separate: false + doc: File to write the output to. Required. + - id: histogram_file + type: string + inputBinding: + position: 0 + prefix: H= + separate: false + doc: File to write insert size Histogram chart to. Required. + - id: deviations + type: float? + inputBinding: + position: 0 + prefix: DEVIATIONS= + separate: false + doc: >- + Generate mean, sd and plots by trimming the data down to MEDIAN + + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically + includes enough anomalous values from chimeras and other artifacts to make the mean and sd + grossly misleading regarding the real distribution. Default value: 10.0. This option can + be set to 'null' to clear the default value. + - id: histogram_width + type: int? + inputBinding: + position: 0 + prefix: HISTOGRAM_WIDTH= + separate: false + doc: >- + Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. + Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be + included. Default value: null. + - id: min_histogram_width + type: int? + inputBinding: + position: 0 + prefix: MIN_HISTOGRAM_WIDTH= + separate: false + doc: >- + Minimum width of histogram plots. In the case when the histogram would otherwise + betruncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum + range. Default value: null. + - id: minimum_pct + type: float? + inputBinding: + position: 0 + prefix: MINIMUM_PCT= + separate: false + doc: >- + When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that + have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. + This option can be set to 'null' to clear the default value. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: include_duplicates + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_DUPLICATES=true + separate: false + doc: >- + If true, also include reads marked as duplicates in the insert size histogram. Default + value: false. This option can be set to 'null' to clear the default value. Possible + values: {true, false} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: assume_sorted + default: true + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + doc: >- + If true (default), then the sort order in the header file will be ignored. Default value: + true. This option can be set to 'null' to clear the default value. Possible values: {true, + false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. +outputs: + - id: insert_size_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - id: histogram_file_out + type: ["null", File] + outputBinding: + glob: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } +label: picard_collect_alignment_summary_metrics_2.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectInsertSizeMetrics +requirements: + - class: ResourceRequirement + ramMin: 12000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 From 3adfa3b023f41f8e747a66806038c01380d139d6 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 20 Jul 2020 10:22:53 -0400 Subject: [PATCH 127/476] update collect insert size metrics to gatk --- .../README.md | 0 .../example_inputs.yaml | 3 +- ...tk_collect_insert_size_metrics_4.1.3.0.cwl | 146 ++++++++---------- .../README.md | 105 +++++++++++++ 4 files changed, 172 insertions(+), 82 deletions(-) rename {picard_collect_insert_size_metrics_2.21.2 => gatk_collect_insert_size_metrics_4.1.3.0}/README.md (100%) rename {picard_collect_insert_size_metrics_2.21.2 => gatk_collect_insert_size_metrics_4.1.3.0}/example_inputs.yaml (91%) rename picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl => gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl (64%) create mode 100644 gatk_collect_insert_size_metrics_4_1_3_0/README.md diff --git a/picard_collect_insert_size_metrics_2.21.2/README.md b/gatk_collect_insert_size_metrics_4.1.3.0/README.md similarity index 100% rename from picard_collect_insert_size_metrics_2.21.2/README.md rename to gatk_collect_insert_size_metrics_4.1.3.0/README.md diff --git a/picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml b/gatk_collect_insert_size_metrics_4.1.3.0/example_inputs.yaml similarity index 91% rename from picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml rename to gatk_collect_insert_size_metrics_4.1.3.0/example_inputs.yaml index 0b91a8c2..59a104cd 100644 --- a/picard_collect_insert_size_metrics_2.21.2/example_inputs.yaml +++ b/gatk_collect_insert_size_metrics_4.1.3.0/example_inputs.yaml @@ -9,13 +9,14 @@ output_file_name: "insert_size_metrics.txt" histogram_file: "histogram.pdf" deviations: 10.0 histogram_width: 800 -min_histogram_width: 100 minimum_pct: 0 metrics_acciumulation_level: null include_duplicates: null validation_stringency: null assume_sorted: null stop_after: null +create_index: null +create_md5_file: null memory_overhead: null memory_per_job: null number_of_threads: null diff --git a/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl b/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl similarity index 64% rename from picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl rename to gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl index 08cece21..dbba9a73 100644 --- a/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl +++ b/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl @@ -5,9 +5,11 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: picard_collect_insert_size_metrics_2_21_2 +id: gatk_collect_insert_size_metrics_4_1_3_0 +label: GATK-CollectInsertSizeMetrics baseCommand: - - java + - gatk + - CollectInsertSizeMetrics inputs: - id: memory_per_job type: int? @@ -21,29 +23,25 @@ inputs: type: File inputBinding: position: 0 - prefix: I= - separate: false + prefix: -I doc: Input file (bam or sam). Required. - id: output_file_name type: string inputBinding: position: 0 - prefix: O= - separate: false + prefix: -O doc: File to write the output to. Required. - id: histogram_file type: string inputBinding: position: 0 - prefix: H= - separate: false + prefix: -H doc: File to write insert size Histogram chart to. Required. - id: deviations type: float? inputBinding: position: 0 - prefix: DEVIATIONS= - separate: false + prefix: --DEVIATIONS doc: >- Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically @@ -54,28 +52,16 @@ inputs: type: int? inputBinding: position: 0 - prefix: HISTOGRAM_WIDTH= - separate: false + prefix: --HISTOGRAM_WIDTH doc: >- Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null. - - id: min_histogram_width - type: int? - inputBinding: - position: 0 - prefix: MIN_HISTOGRAM_WIDTH= - separate: false - doc: >- - Minimum width of histogram plots. In the case when the histogram would otherwise - betruncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum - range. Default value: null. - id: minimum_pct type: float? inputBinding: position: 0 - prefix: MINIMUM_PCT= - separate: false + prefix: --MINIMUM_PCT doc: >- When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. @@ -84,8 +70,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: LEVEL= - separate: false + prefix: --METRIC_ACCUMULATION_LEVEL doc: >- The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible @@ -96,8 +81,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: INCLUDE_DUPLICATES=true - separate: false + prefix: --INCLUDE_DUPLICATES doc: >- If true, also include reads marked as duplicates in the insert size histogram. Default value: false. This option can be set to 'null' to clear the default value. Possible @@ -106,8 +90,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: VALIDATION_STRINGENCY= - separate: false + prefix: --VALIDATION_STRINGENCY doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -119,7 +102,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: AS=true + prefix: --ASSUME_SORTED doc: >- If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, @@ -128,10 +111,26 @@ inputs: type: int? inputBinding: position: 0 - prefix: STOP_AFTER= + prefix: --STOP_AFTER doc: >- Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_MD5_FILE + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false} outputs: - id: insert_size_metrics type: File @@ -145,7 +144,7 @@ outputs: } } - id: histogram_file_out - type: ["null", File] + type: File outputBinding: glob: |- ${ @@ -155,62 +154,47 @@ outputs: return inputs.input.basename.replace(/.bam/, '_histogram.pdf') } } -label: picard_collect_alignment_summary_metrics_2.8.1 arguments: - position: 0 + prefix: '--java-options' valueFrom: |- ${ - if(inputs.memory_per_job && inputs.memory_overhead) { - if(inputs.memory_per_job % 1000 == 0) { - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } - } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - if(inputs.memory_per_job % 1000 == 0) { - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } - } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx8G" - } - else { - return "-Xmx8G" - } - - } + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . - position: 0 - prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + prefix: '--COMPRESSION_LEVEL' + valueFrom: '2' - position: 0 - valueFrom: CollectInsertSizeMetrics + prefix: '--MAX_RECORDS_IN_RAM' + valueFrom: '50000' requirements: - class: ResourceRequirement - ramMin: 12000 + ramMin: 32000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/picard:0.6.3' + dockerPull: 'broadinstitute/gatk:4.1.3.0' - class: InlineJavascriptRequirement -'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah - 'foaf:name': Memorial Sloan Kettering Cancer Center -'dct:creator': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah - 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': picard - 'doap:revision': 2.8.1 diff --git a/gatk_collect_insert_size_metrics_4_1_3_0/README.md b/gatk_collect_insert_size_metrics_4_1_3_0/README.md new file mode 100644 index 00000000..c3557e54 --- /dev/null +++ b/gatk_collect_insert_size_metrics_4_1_3_0/README.md @@ -0,0 +1,105 @@ +# CWL for running GATK - CollectInsertSizeMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.3.0 | broadinstitute/gatk:4.1.3.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.3.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE + [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] + [--minimum_pct MINIMUM_PCT] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] + [--assume_sorted] [--stop_after STOP_AFTER] [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --histogram_file HISTOGRAM_FILE + File to write insert size Histogram chart to. + Required. + --deviations DEVIATIONS + Generate mean, sd and plots by trimming the data down + to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This + is done because insert size data typically includes + enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading + regarding the real distribution. Default value: 10.0. + This option can be set to 'null' to clear the default + value. + --histogram_width HISTOGRAM_WIDTH + Explicitly sets the Histogram width, overriding + automatic truncation of Histogram tail. Also, when + calculating mean and standard deviation, only bins <= + Histogram_WIDTH will be included. Default value: null. + --minimum_pct MINIMUM_PCT + When generating the Histogram, discard any data + categories (out of FR, TANDEM, RF) that have fewer + than this percentage of overall reads. (Range: 0 to + 1). Default value: 0.05. This option can be set to + 'null' to clear the default value. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --include_duplicates If true, also include reads marked as duplicates in + the insert size histogram. Default value: false. This + option can be set to 'null' to clear the default + value. Possible values: {true, false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` From 103d12352291f964006694a1b71fb3b846ffd9ab Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 20 Jul 2020 10:23:32 -0400 Subject: [PATCH 128/476] update readme --- .../README.md | 24 ++-- .../README.md | 105 ------------------ 2 files changed, 13 insertions(+), 116 deletions(-) delete mode 100644 gatk_collect_insert_size_metrics_4_1_3_0/README.md diff --git a/gatk_collect_insert_size_metrics_4.1.3.0/README.md b/gatk_collect_insert_size_metrics_4.1.3.0/README.md index 6853906e..c3557e54 100644 --- a/gatk_collect_insert_size_metrics_4.1.3.0/README.md +++ b/gatk_collect_insert_size_metrics_4.1.3.0/README.md @@ -1,10 +1,10 @@ -# CWL for running Picard - CollectInsertSizeMetrics +# CWL for running GATK - CollectInsertSizeMetrics ## Version of tools in docker image | Tool | Version | Location | |--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | +| gatk | 4.1.3.0 | broadinstitute/gatk:4.1.3.0 | ## CWL @@ -14,22 +14,23 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner picard_collect_insert_size_metrics_2.21.2.cwl example_inputs.yaml + > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.3.0.cwl example_inputs.yaml ``` ### Usage ```bash -usage: ./GitHub/cwl-commandlinetools/picard_collect_insert_size_metrics_2.21.2/picard_collect_insert_size_metrics_2.21.2.cwl +usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] - [--min_histogram_width MIN_HISTOGRAM_WIDTH] [--minimum_pct MINIMUM_PCT] + [--minimum_pct MINIMUM_PCT] [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] - [--assume_sorted] [--stop_after STOP_AFTER] + [--assume_sorted] [--stop_after STOP_AFTER] [--create_index] + [--create_md5_file] [job_order] positional arguments: @@ -62,11 +63,6 @@ optional arguments: automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null. - --min_histogram_width MIN_HISTOGRAM_WIDTH - Minimum width of histogram plots. In the case when the - histogram would otherwise betruncated to a shorter - range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a - minimum range. Default value: null. --minimum_pct MINIMUM_PCT When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer @@ -100,4 +96,10 @@ optional arguments: Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} ``` diff --git a/gatk_collect_insert_size_metrics_4_1_3_0/README.md b/gatk_collect_insert_size_metrics_4_1_3_0/README.md deleted file mode 100644 index c3557e54..00000000 --- a/gatk_collect_insert_size_metrics_4_1_3_0/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# CWL for running GATK - CollectInsertSizeMetrics - -## Version of tools in docker image - -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.3.0 | broadinstitute/gatk:4.1.3.0 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.3.0.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl - [-h] [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--number_of_threads NUMBER_OF_THREADS] --input INPUT - --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE - [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] - [--minimum_pct MINIMUM_PCT] - [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] - [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] - [--assume_sorted] [--stop_after STOP_AFTER] [--create_index] - [--create_md5_file] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --memory_per_job MEMORY_PER_JOB - Memory per job in megabytes - --memory_overhead MEMORY_OVERHEAD - Memory overhead per job in megabytes - --number_of_threads NUMBER_OF_THREADS - --input INPUT Input file (bam or sam). Required. - --output_file_name OUTPUT_FILE_NAME - File to write the output to. Required. - --histogram_file HISTOGRAM_FILE - File to write insert size Histogram chart to. - Required. - --deviations DEVIATIONS - Generate mean, sd and plots by trimming the data down - to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This - is done because insert size data typically includes - enough anomalous values from chimeras and other - artifacts to make the mean and sd grossly misleading - regarding the real distribution. Default value: 10.0. - This option can be set to 'null' to clear the default - value. - --histogram_width HISTOGRAM_WIDTH - Explicitly sets the Histogram width, overriding - automatic truncation of Histogram tail. Also, when - calculating mean and standard deviation, only bins <= - Histogram_WIDTH will be included. Default value: null. - --minimum_pct MINIMUM_PCT - When generating the Histogram, discard any data - categories (out of FR, TANDEM, RF) that have fewer - than this percentage of overall reads. (Range: 0 to - 1). Default value: 0.05. This option can be set to - 'null' to clear the default value. - --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL - The level(s) at which to accumulate metrics. Default - value: [ALL_READS]. This option can be set to 'null' - to clear the default value. Possible values: - {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option - may be specified 0 or more times. This option can be - set to 'null' to clear the default list. - --include_duplicates If true, also include reads marked as duplicates in - the insert size histogram. Default value: false. This - option can be set to 'null' to clear the default - value. Possible values: {true, false} - --validation_stringency VALIDATION_STRINGENCY - Validation stringency for all SAM files read by this - program. Setting stringency to SILENT can improve - performance when processing a BAM file in which - variable-length data (read, qualities, tags) do not - otherwise need to be decoded. Default value: STRICT. - This option can be set to 'null' to clear the default - value. Possible values: {STRICT,LENIENT, SILENT} - --assume_sorted If true (default), then the sort order in the header - file will be ignored. Default value: true. This option - can be set to 'null' to clear the default value. - Possible values: {true, false} - --stop_after STOP_AFTER - Stop after processing N reads, mainly for debugging. - Default value: 0. This option can be set to 'null' to - clear the default value. - --create_index Whether to create a BAM index when writing a - coordinate-sorted BAM file. Default value: false. - Possible values: {true, false} - --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ - files created. Default value: false. Possible values: - {true, false} -``` From 31bd1a6c47a70fc5d2c6ba248dcbb402c0ad1e05 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 20 Jul 2020 12:17:53 -0400 Subject: [PATCH 129/476] updated to 4.1.8.0 for gatk insert size metrics --- .../README.md | 6 +++--- .../example_inputs.yaml | 0 .../gatk_collect_insert_size_metrics_4.1.8.0.cwl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename {gatk_collect_insert_size_metrics_4.1.3.0 => gatk_collect_insert_size_metrics_4.1.8.0}/README.md (96%) rename {gatk_collect_insert_size_metrics_4.1.3.0 => gatk_collect_insert_size_metrics_4.1.8.0}/example_inputs.yaml (100%) rename gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl => gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl (98%) diff --git a/gatk_collect_insert_size_metrics_4.1.3.0/README.md b/gatk_collect_insert_size_metrics_4.1.8.0/README.md similarity index 96% rename from gatk_collect_insert_size_metrics_4.1.3.0/README.md rename to gatk_collect_insert_size_metrics_4.1.8.0/README.md index c3557e54..b0ea21bf 100644 --- a/gatk_collect_insert_size_metrics_4.1.3.0/README.md +++ b/gatk_collect_insert_size_metrics_4.1.8.0/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| gatk | 4.1.3.0 | broadinstitute/gatk:4.1.3.0 | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | ## CWL @@ -14,13 +14,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.3.0.cwl example_inputs.yaml + > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.8.0.cwl example_inputs.yaml ``` ### Usage ```bash -usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl +usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] [--number_of_threads NUMBER_OF_THREADS] --input INPUT diff --git a/gatk_collect_insert_size_metrics_4.1.3.0/example_inputs.yaml b/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml similarity index 100% rename from gatk_collect_insert_size_metrics_4.1.3.0/example_inputs.yaml rename to gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml diff --git a/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl similarity index 98% rename from gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl rename to gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index dbba9a73..261339d0 100644 --- a/gatk_collect_insert_size_metrics_4.1.3.0/gatk_collect_insert_size_metrics_4.1.3.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: gatk_collect_insert_size_metrics_4_1_3_0 +id: gatk_collect_insert_size_metrics_4_1_8_0 label: GATK-CollectInsertSizeMetrics baseCommand: - gatk @@ -196,5 +196,5 @@ requirements: ramMin: 32000 coresMin: 1 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.3.0' + dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement From dfe2b2d7c13ade465e7421312e2aa351d24e7469 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 20 Jul 2020 12:24:15 -0400 Subject: [PATCH 130/476] added gatk alignment summary metrics --- .../README.md | 99 +++++++++ .../example_inputs.yaml | 28 +++ ...lect_alignment_summary_metrics_4.1.8.0.cwl | 195 ++++++++++++++++++ 3 files changed, 322 insertions(+) create mode 100644 gatk_collect_alignment_summary_metrics_4.1.8.0/README.md create mode 100644 gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml create mode 100644 gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..d1c1901d --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md @@ -0,0 +1,99 @@ +# CWL for running GATK - CollectAlignmentSummaryMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_alignment_summary_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME [--reference REFERENCE] + [--adaptor_sequence ADAPTOR_SEQUENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS] + [--is_bisulfite_sequenced] [--max_insert_size MAX_INSERT_SIZE] + [--validation_stringency VALIDATION_STRINGENCY] [--assume_sorted] + [--stop_after STOP_AFTER] [--create_index] [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --adaptor_sequence ADAPTOR_SEQUENCE + List of adapter sequences to use when processing the + alignment metrics. This argument may be specified 0 or + more times. Default value: [AATGATACGGCGACCACCGAGATCTA + CACTCTTTCCCTACACGACGCTCTTCCGATCT, + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS + Paired-end reads that do not have this expected + orientation will be considered chimeric. This argument + may be specified 0 or more times. Default value: [FR]. + Possible values: {FR, RF, TANDEM} + --is_bisulfite_sequenced + Whether the SAM or BAM file consists of bisulfite + sequenced reads. Default value: false. Possible + values: {true, false} + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..665cef4b --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml @@ -0,0 +1,28 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +input: + class: File? + metadata: {} + path: "/path/to/fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +output_file_name: "alignment_summary_metrics.txt" +adaptor_sequence: null +metrics_acciumulation_level: null +expected_pair_orientations: null +is_bisulfite_sequenced: false +max_insert_size: null +validation_stringency: null +assume_sorted: null +stop_after: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..ecbfc216 --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -0,0 +1,195 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_alignment_summary_metrics_4_1_3_0 +label: GATK-CollectAlignmentSummaryMetrics +baseCommand: + - gatk + - CollectAlignmentSummaryMetrics +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: -I + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: -O + doc: File to write the output to. Required. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: -R + secondaryFiles: + - ^.dict + doc: >- + Reference sequence file. Note that while this argument is not required, without it only a + small subset of the metrics will be calculated. Note also that if a reference sequence is + provided, it must be accompanied by a sequence dictionary. Default value: null. + - id: adaptor_sequence + type: string? + inputBinding: + position: 0 + prefix: --ADAPTER_SEQUENCE + doc: >- + List of adapter sequences to use when processing the alignment metrics. This argument may + be specified 0 or more times. Default value: + [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: --METRIC_ACCUMULATION_LEVEL + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: expected_pair_orientations + type: string? + inputBinding: + position: 0 + prefix: --EXPECTED_PAIR_ORIENTATIONS + doc: >- + Paired-end reads that do not have this expected orientation will be considered chimeric. + This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, + RF, TANDEM} + - id: is_bisulfite_sequenced + type: boolean? + inputBinding: + position: 0 + prefix: --IS_BISULFITE_SEQUENCED + doc: >- + Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. + Possible values: {true, false} + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: --MAX_INSERT_SIZE + doc: >- + Paired-end reads above this insert size will be considered chimeric along with + inter-chromosomal pairs. Default value: 100000. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: assume_sorted + default: true + type: boolean? + inputBinding: + position: 0 + prefix: --ASSUME_SORTED + doc: >- + If true (default), then the sort order in the header file will be ignored. Default value: + true. This option can be set to 'null' to clear the default value. Possible values: {true, + false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: --STOP_AFTER + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_MD5_FILE + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false} +outputs: + - id: alignment_summary_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . + - position: 0 + prefix: '--COMPRESSION_LEVEL' + valueFrom: '2' + - position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + valueFrom: '50000' +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.0' + - class: InlineJavascriptRequirement From 95411dfc3fc2df597a413b866667d6ab5f2b9b36 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 20 Jul 2020 14:26:51 -0400 Subject: [PATCH 131/476] added gatk hs metrics --- gatk_collect_hs_metrics_4.1.8.0/README.md | 136 +++++++++ .../gatk_collect_hs_metrics_4.1.8.0.cwl | 272 ++++++++++++++++++ 2 files changed, 408 insertions(+) create mode 100644 gatk_collect_hs_metrics_4.1.8.0/README.md create mode 100644 gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl diff --git a/gatk_collect_hs_metrics_4.1.8.0/README.md b/gatk_collect_hs_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..2f0ab1b8 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/README.md @@ -0,0 +1,136 @@ +# CWL for running GATK - CollectHsMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_hs_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl + [-h] --input INPUT --bait_intervals BAIT_INTERVALS --target_intervals + TARGET_INTERVALS --output_file_name OUTPUT_FILE_NAME + [--per_base_coverage PER_BASE_COVERAGE] + [--per_target_coverage PER_TARGET_COVERAGE] + [--theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT] + [--allele_fraction ALLELE_FRACTION] [--bait_set_name BAIT_SET_NAME] + [--clip_overlapping_reads] [--coverage_cap COVERAGE_CAP] + [--include_indels] [--minimum_base_quality MINIMUM_BASE_QUALITY] + [--minimum_mapping_quality MINIMUM_MAPPING_QUALITY] + [--near_distance NEAR_DISTANCE] [--sample_size SAMPLE_SIZE] + [--reference REFERENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--validation_stringency VALIDATION_STRINGENCY] [--create_index] + [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. This argument must be specified at + least once. Required. + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. This argument must be specified at least + once. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT + Output for Theoretical Sensitivity metrics where the + allele fractions are provided by the ALLELE_FRACTION + argument. Default value: null. + --allele_fraction ALLELE_FRACTION + Allele fraction for which to calculate theoretical + sensitivity. This argument may be specified 0 or more + times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, + 0.1, 0.2, 0.3, 0.5]. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. Possible values: + {true, false} + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. + --include_indels If true count inserted bases as on target and deleted + bases as covered by a read. Default value: false. + Possible values: {true, false} + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS +``` diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..dd707bc9 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -0,0 +1,272 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_hs_metrics_4_1_8_0 +label: GATK-CollectHsMetrics +baseCommand: + - gatk + - CollectHsMetrics +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: -I + doc: An aligned SAM or BAM file. Required. + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: --BAIT_INTERVALS + doc: >- + An interval list file that contains the locations of the baits used. This argument must + be specified at least once. Required. + - id: target_intervals + type: File + inputBinding: + position: 0 + prefix: --TARGET_INTERVALS + doc: >- + An interval list file that contains the locations of the targets. This argument must be + specified at least once. Required. + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: -O + doc: The output file to write the metrics to. Required. + - id: per_base_coverage + type: string? + inputBinding: + position: 0 + prefix: --PER_BASE_COVERAGE + doc: >- + An optional file to output per base coverage information to. The per-base file contains + one line per target base and can grow very large. It is not recommended for use with large + target sets. Default value: null. + - id: per_target_coverage + type: string? + inputBinding: + position: 0 + prefix: --PER_TARGET_COVERAGE + doc: >- + An optional file to output per target coverage information to. Default value: null. + - id: theoretical_sensitivity_output + type: string? + inputBinding: + position: 0 + prefix: --THEORETICAL_SENSITIVITY_OUTPUT + doc: >- + Output for Theoretical Sensitivity metrics where the allele fractions are provided by the + ALLELE_FRACTION argument. Default value: null. + - id: allele_fraction + type: float? + inputBinding: + position: 0 + prefix: --ALLELE_FRACTION + doc: >- + Allele fraction for which to calculate theoretical sensitivity. This argument may be + specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, + 0.5]. + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: --BAIT_SET_NAME + doc: >- + Bait set name. If not provided it is inferred from the filename of the bait intervals. + Default value: null. + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: --CLIP_OVERLAPPING_READS + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: true. Possible + values: {true, false} + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: --COVERAGE_CAP + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is + 200. Default value: 200. + - id: include_indels + type: boolean? + inputBinding: + position: 0 + prefix: --INCLUDE_INDELS + doc: >- + If true count inserted bases as on target and deleted bases as covered by a read. Default + value: false. Possible values: {true, false} + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: --MINIMUM_BASE_QUALITY + doc: >- + Minimum base quality for a base to contribute coverage. Default value: 20. + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: --MINIMUM_MAPPING_QUALITY + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: 20. + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: --NEAR_DISTANCE + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon for the read to be + considered 'near probe' and included in percent selected. Default value: 250. + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: --SAMPLE_SIZE + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default + value: 10000. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: -R + secondaryFiles: + - ^.fasta.fai + - ^.dict + doc: >- + Reference sequence file. Note that while this argument is not required, without it only a + small subset of the metrics will be calculated. Note also that if a reference sequence is + provided, it must be accompanied by a sequence dictionary. Default value: null. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: --METRIC_ACCUMULATION_LEVEL + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_MD5_FILE + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? +outputs: + - id: hs_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - id: per_base_coverage_out + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } + - id: per_target_coverage_out + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . + - position: 0 + prefix: '--COMPRESSION_LEVEL' + valueFrom: '2' + - position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + valueFrom: '50000' +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.0' + - class: InlineJavascriptRequirement From bffba1a62fa4baddf64ff256d0b34042fd6eced1 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 24 Jul 2020 10:21:25 -0400 Subject: [PATCH 132/476] set default output filenames for gatk-picardtools --- .../gatk_collect_alignment_summary_metrics_4.1.8.0.cwl | 2 ++ .../gatk_collect_hs_metrics_4.1.8.0.cwl | 2 ++ .../gatk_collect_insert_size_metrics_4.1.8.0.cwl | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index ecbfc216..75e15307 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -27,9 +27,11 @@ inputs: doc: Input file (bam or sam). Required. - id: output_file_name type: string + default: '$(inputs.input.basename.replace(/.bam/, ''_alignment_summary_metrics.txt''))' inputBinding: position: 0 prefix: -O + valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_alignment_summary_metrics.txt''))' doc: File to write the output to. Required. - id: reference type: File? diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index dd707bc9..669fe512 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -35,9 +35,11 @@ inputs: specified at least once. Required. - id: output_file_name type: string + default: '$(inputs.input.basename.replace(/.bam/, ''_hs_metrics.txt''))' inputBinding: position: 0 prefix: -O + valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_hs_metrics.txt''))' doc: The output file to write the metrics to. Required. - id: per_base_coverage type: string? diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 261339d0..b232f61d 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -27,15 +27,19 @@ inputs: doc: Input file (bam or sam). Required. - id: output_file_name type: string + default: '$(inputs.input.basename.replace(/.bam/, ''_insert_size_metrics.txt''))' inputBinding: position: 0 prefix: -O + valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_insert_size_metrics.txt''))' doc: File to write the output to. Required. - id: histogram_file type: string + default: '$(inputs.input.basename.replace(/.bam/, ''_histogram.pdf''))' inputBinding: position: 0 prefix: -H + valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_histogram.pdf''))' doc: File to write insert size Histogram chart to. Required. - id: deviations type: float? From 6e427aab6c1bab8d3e95cfc3ec03ff2b9eeaea7c Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 24 Jul 2020 11:20:23 -0400 Subject: [PATCH 133/476] fixes to gatk picardtools to run in subworkflow --- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 20 ++++++--- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 43 +++++++++++++------ ...tk_collect_insert_size_metrics_4.1.8.0.cwl | 34 +++++++++------ 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index 75e15307..1844d921 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -26,12 +26,7 @@ inputs: prefix: -I doc: Input file (bam or sam). Required. - id: output_file_name - type: string - default: '$(inputs.input.basename.replace(/.bam/, ''_alignment_summary_metrics.txt''))' - inputBinding: - position: 0 - prefix: -O - valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_alignment_summary_metrics.txt''))' + type: string? doc: File to write the output to. Required. - id: reference type: File? @@ -39,6 +34,7 @@ inputs: position: 0 prefix: -R secondaryFiles: + - ^.fasta.fai - ^.dict doc: >- Reference sequence file. Note that while this argument is not required, without it only a @@ -145,7 +141,7 @@ outputs: outputBinding: glob: |- ${ - if(inputs.output_file_name){ + if (inputs.output_file_name){ return inputs.output_file_name } else { return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') @@ -188,6 +184,16 @@ arguments: - position: 0 prefix: '--MAX_RECORDS_IN_RAM' valueFrom: '50000' + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } requirements: - class: ResourceRequirement ramMin: 32000 diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index 669fe512..760fd31b 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -34,27 +34,16 @@ inputs: An interval list file that contains the locations of the targets. This argument must be specified at least once. Required. - id: output_file_name - type: string - default: '$(inputs.input.basename.replace(/.bam/, ''_hs_metrics.txt''))' - inputBinding: - position: 0 - prefix: -O - valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_hs_metrics.txt''))' + type: string? doc: The output file to write the metrics to. Required. - id: per_base_coverage type: string? - inputBinding: - position: 0 - prefix: --PER_BASE_COVERAGE doc: >- An optional file to output per base coverage information to. The per-base file contains one line per target base and can grow very large. It is not recommended for use with large target sets. Default value: null. - id: per_target_coverage type: string? - inputBinding: - position: 0 - prefix: --PER_TARGET_COVERAGE doc: >- An optional file to output per target coverage information to. Default value: null. - id: theoretical_sensitivity_output @@ -265,6 +254,36 @@ arguments: - position: 0 prefix: '--MAX_RECORDS_IN_RAM' valueFrom: '50000' + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - position: 2 + prefix: '--PER_TARGET_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } + - position: 2 + prefix: '--PER_BASE_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } requirements: - class: ResourceRequirement ramMin: 32000 diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index b232f61d..0f165535 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -26,20 +26,10 @@ inputs: prefix: -I doc: Input file (bam or sam). Required. - id: output_file_name - type: string - default: '$(inputs.input.basename.replace(/.bam/, ''_insert_size_metrics.txt''))' - inputBinding: - position: 0 - prefix: -O - valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_insert_size_metrics.txt''))' + type: string? doc: File to write the output to. Required. - id: histogram_file - type: string - default: '$(inputs.input.basename.replace(/.bam/, ''_histogram.pdf''))' - inputBinding: - position: 0 - prefix: -H - valueFrom: '$(inputs.input.basename.replace(/.bam/, ''_histogram.pdf''))' + type: string? doc: File to write insert size Histogram chart to. Required. - id: deviations type: float? @@ -195,6 +185,26 @@ arguments: - position: 0 prefix: '--MAX_RECORDS_IN_RAM' valueFrom: '50000' + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - position: 2 + prefix: '-H' + valueFrom: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } requirements: - class: ResourceRequirement ramMin: 32000 From ca43953fdf485356ce0722271386fb981f978a57 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 24 Jul 2020 12:53:04 -0400 Subject: [PATCH 134/476] add contribution lines --- ...llect_alignment_summary_metrics_4.1.8.0.cwl | 18 ++++++++++++++++++ .../gatk_collect_hs_metrics_4.1.8.0.cwl | 18 ++++++++++++++++++ ...atk_collect_insert_size_metrics_4.1.8.0.cwl | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index 1844d921..d5797842 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -201,3 +201,21 @@ requirements: - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Tool Name + 'doap:revision': Tool Version diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index 760fd31b..8e0179d0 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -291,3 +291,21 @@ requirements: - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Tool Name + 'doap:revision': Tool Version diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 0f165535..090a9311 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -212,3 +212,21 @@ requirements: - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Tool Name + 'doap:revision': Tool Version From 5b6ce72a3617f6462879f8c41f5b02095ed3f957 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 24 Jul 2020 13:15:27 -0400 Subject: [PATCH 135/476] add version to contribution section --- .../gatk_collect_alignment_summary_metrics_4.1.8.0.cwl | 4 ++-- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 4 ++-- .../gatk_collect_insert_size_metrics_4.1.8.0.cwl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index d5797842..e523dca9 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -217,5 +217,5 @@ requirements: 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': Tool Name - 'doap:revision': Tool Version + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index 8e0179d0..9ea7e049 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -307,5 +307,5 @@ requirements: 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': Tool Name - 'doap:revision': Tool Version + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 090a9311..2e177dd7 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -228,5 +228,5 @@ requirements: 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': Tool Name - 'doap:revision': Tool Version + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From 27037319effe5787e049d5f74860bd537e8e9568 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 16:45:34 -0400 Subject: [PATCH 136/476] initial cwl --- sequence_qc_0.1.12/README.md | 65 ++++++++++++ sequence_qc_0.1.12/example_inputs.yaml | 17 ++++ sequence_qc_0.1.12/sequence_qc_0.1.12.cwl | 114 ++++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 sequence_qc_0.1.12/README.md create mode 100644 sequence_qc_0.1.12/example_inputs.yaml create mode 100644 sequence_qc_0.1.12/sequence_qc_0.1.12.cwl diff --git a/sequence_qc_0.1.12/README.md b/sequence_qc_0.1.12/README.md new file mode 100644 index 00000000..75fa09b2 --- /dev/null +++ b/sequence_qc_0.1.12/README.md @@ -0,0 +1,65 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| miniconda | 3 | | +| merge_fastq | 0.1.7 | | + +[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own license badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner sequence_qc_0.1.12.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +toil-cwl-runner sequence_qc_0.1.12.cwl -h + +usage: sequence_qc_0.1.12.cwl [-h] --reference REFERENCE --bam_file BAM_FILE + --bed_file BED_FILE --output_prefix + OUTPUT_PREFIX [--threshold THRESHOLD] + [--truncate TRUNCATE] [--min_mapq MIN_MAPQ] + [--min_basq MIN_BASQ] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Path to reference fasta, containing all regions in + bed_file + --bam_file BAM_FILE Path to BAM file for calculating noise [required] + --bed_file BED_FILE Path to BED file containing regions over which to + calculate noise [required] + --output_prefix OUTPUT_PREFIX + Prefix to include in all output file names + --threshold THRESHOLD + Alt allele frequency past which to ignore positions + from the calculation. + --truncate TRUNCATE Whether to exclude trailing bases from reads that only + partially overlap the bed file (0 or 1) + --min_mapq MIN_MAPQ Exclude reads with a lower mapping quality + --min_basq MIN_BASQ Exclude bases with a lower base quality +``` diff --git a/sequence_qc_0.1.12/example_inputs.yaml b/sequence_qc_0.1.12/example_inputs.yaml new file mode 100644 index 00000000..17623090 --- /dev/null +++ b/sequence_qc_0.1.12/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + - class: File + path: >- + /path/to/reference.fasta +bam_file: + - class: File + path: >- + /path/to/file.bam +bed_file: + - class: File + path: >- + /path/to/file.bed +output_prefix: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 diff --git a/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl b/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl new file mode 100644 index 00000000..9f66f706 --- /dev/null +++ b/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl @@ -0,0 +1,114 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_1_12 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: output_prefix + type: string + inputBinding: + position: 0 + prefix: --output_prefix + doc: >- + Prefix to include in all output file names + - id: threshold + type: string? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality +outputs: + - id: pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + 'pileup.tsv' + } + - id: noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + 'noise_positions.tsv' + } + - id: noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + 'noise_acgt.tsv' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: '424e59769581' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.1.12 From c0c5ea9982756afcf321ea77c86cfe00bf27ae7a Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 16:45:42 -0400 Subject: [PATCH 137/476] initial docker --- sequence_qc_0.1.12/container/Dockerfile | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 sequence_qc_0.1.12/container/Dockerfile diff --git a/sequence_qc_0.1.12/container/Dockerfile b/sequence_qc_0.1.12/container/Dockerfile new file mode 100644 index 00000000..a3cebeef --- /dev/null +++ b/sequence_qc_0.1.12/container/Dockerfile @@ -0,0 +1,32 @@ +################## BASE IMAGE ###################### + +FROM python:3.6-alpine + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG SEQUENCE_QC_VERSION=0.1.12 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.sequence_qc=${SEQUENCE_QC_VERSION} \ + org.opencontainers.image.source.sequence_qc="https://pypi.org/project/sequence_qc/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ + sequence_qc version ${SEQUENCE_QC_VERSION}" + +################## INSTALL ########################## + +RUN apk add --no-cache --virtual .build-deps gcc musl-dev make zlib-dev bzip2-dev xz-dev g++ libstdc++ \ + && pip install cython \ + && pip install sequence_qc==0.1.12 \ + && apk del .build-deps \ No newline at end of file From 1e6f39098ad09f9b04b8d31b945bdaebc842aec5 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 17:55:07 -0400 Subject: [PATCH 138/476] update to 0.1.15 --- sequence_qc_0.1.12/example_inputs.yaml | 17 ----------------- .../README.md | 10 +++++----- .../container/Dockerfile | 5 ++--- sequence_qc_0.1.15/example_inputs.yaml | 17 +++++++++++++++++ .../sequence_qc_0.1.15.cwl | 8 ++++---- 5 files changed, 28 insertions(+), 29 deletions(-) delete mode 100644 sequence_qc_0.1.12/example_inputs.yaml rename {sequence_qc_0.1.12 => sequence_qc_0.1.15}/README.md (90%) rename {sequence_qc_0.1.12 => sequence_qc_0.1.15}/container/Dockerfile (92%) create mode 100644 sequence_qc_0.1.15/example_inputs.yaml rename sequence_qc_0.1.12/sequence_qc_0.1.12.cwl => sequence_qc_0.1.15/sequence_qc_0.1.15.cwl (96%) diff --git a/sequence_qc_0.1.12/example_inputs.yaml b/sequence_qc_0.1.12/example_inputs.yaml deleted file mode 100644 index 17623090..00000000 --- a/sequence_qc_0.1.12/example_inputs.yaml +++ /dev/null @@ -1,17 +0,0 @@ -reference: - - class: File - path: >- - /path/to/reference.fasta -bam_file: - - class: File - path: >- - /path/to/file.bam -bed_file: - - class: File - path: >- - /path/to/file.bed -output_prefix: test_sample_ -threshold: 0.01 -truncate: 1 -min_mapq: 10 -min_basq: 10 diff --git a/sequence_qc_0.1.12/README.md b/sequence_qc_0.1.15/README.md similarity index 90% rename from sequence_qc_0.1.12/README.md rename to sequence_qc_0.1.15/README.md index 75fa09b2..52b77cb3 100644 --- a/sequence_qc_0.1.12/README.md +++ b/sequence_qc_0.1.15/README.md @@ -16,26 +16,26 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner sequence_qc_0.1.12.cwl example_inputs.yaml + > toil-cwl-runner sequence_qc_0.1.15.cwl example_inputs.yaml ``` **If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir tool_toil_log -> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & ``` ### Usage ```bash -toil-cwl-runner sequence_qc_0.1.12.cwl -h +toil-cwl-runner sequence_qc_0.1.15.cwl -h -usage: sequence_qc_0.1.12.cwl [-h] --reference REFERENCE --bam_file BAM_FILE +usage: sequence_qc_0.1.15.cwl [-h] --reference REFERENCE --bam_file BAM_FILE --bed_file BED_FILE --output_prefix OUTPUT_PREFIX [--threshold THRESHOLD] [--truncate TRUNCATE] [--min_mapq MIN_MAPQ] diff --git a/sequence_qc_0.1.12/container/Dockerfile b/sequence_qc_0.1.15/container/Dockerfile similarity index 92% rename from sequence_qc_0.1.12/container/Dockerfile rename to sequence_qc_0.1.15/container/Dockerfile index a3cebeef..8ca6f01e 100644 --- a/sequence_qc_0.1.12/container/Dockerfile +++ b/sequence_qc_0.1.15/container/Dockerfile @@ -7,7 +7,7 @@ FROM python:3.6-alpine ARG BUILD_DATE ARG BUILD_VERSION ARG LICENSE="Apache-2.0" -ARG SEQUENCE_QC_VERSION=0.1.12 +ARG SEQUENCE_QC_VERSION=0.1.15 ARG VCS_REF ################## METADATA ######################## LABEL org.opencontainers.image.vendor="MSKCC" @@ -28,5 +28,4 @@ LABEL org.opencontainers.image.description="This container uses python3.6 as the RUN apk add --no-cache --virtual .build-deps gcc musl-dev make zlib-dev bzip2-dev xz-dev g++ libstdc++ \ && pip install cython \ - && pip install sequence_qc==0.1.12 \ - && apk del .build-deps \ No newline at end of file + && pip install sequence_qc==0.1.15 \ No newline at end of file diff --git a/sequence_qc_0.1.15/example_inputs.yaml b/sequence_qc_0.1.15/example_inputs.yaml new file mode 100644 index 00000000..3fcf0e44 --- /dev/null +++ b/sequence_qc_0.1.15/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta +bam_file: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX.bam +bed_file: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-A.sorted_correct.bed +output_prefix: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 diff --git a/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl similarity index 96% rename from sequence_qc_0.1.12/sequence_qc_0.1.12.cwl rename to sequence_qc_0.1.15/sequence_qc_0.1.15.cwl index 9f66f706..329fabab 100644 --- a/sequence_qc_0.1.12/sequence_qc_0.1.12.cwl +++ b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: calculate_noise_0_1_12 +id: calculate_noise_0_1_15 baseCommand: - calculate_noise inputs: @@ -38,7 +38,7 @@ inputs: doc: >- Prefix to include in all output file names - id: threshold - type: string? + type: float? inputBinding: position: 0 prefix: --threshold @@ -92,7 +92,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: '424e59769581' + dockerPull: 'sequence_qc/0.1.15' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -111,4 +111,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': sesquence_qc - 'doap:revision': 0.1.12 + 'doap:revision': 0.1.15 From 9fc232dbe2b954a9f02cb14a407e39d2a0a0c205 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 18:02:25 -0400 Subject: [PATCH 139/476] Update README.md --- sequence_qc_0.1.15/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sequence_qc_0.1.15/README.md b/sequence_qc_0.1.15/README.md index 52b77cb3..6cbb621a 100644 --- a/sequence_qc_0.1.15/README.md +++ b/sequence_qc_0.1.15/README.md @@ -1,13 +1,10 @@ -# CWL and Dockerfile for running merge_fastq +# CWL and Dockerfile for running sequence_qc ## Version of tools in docker image (/container/Dockerfile) | Tool | Version | Location | |--- |--- |--- | -| miniconda | 3 | | -| merge_fastq | 0.1.7 | | - -[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own license badge on microbadger.com") +| sequence_qc | 0.1.15 | | ## CWL From 680e5efd116b17a63001b96b52b4abd326c28414 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 18:14:20 -0400 Subject: [PATCH 140/476] add missing outputs --- sequence_qc_0.1.15/sequence_qc_0.1.15.cwl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl index 329fabab..b4348393 100644 --- a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl +++ b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl @@ -87,6 +87,20 @@ outputs: ${ return inputs.output_prefix + 'noise_acgt.tsv' } + - id: noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + 'noise_n.tsv' + } + - id: noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + 'noise_del.tsv' + } requirements: - class: ResourceRequirement ramMin: 8000 From a2d8fbb365b99a1164c7f0f6d176cf6dd3072433 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 18:33:38 -0400 Subject: [PATCH 141/476] rename docker image, add secondary files --- sequence_qc_0.1.15/sequence_qc_0.1.15.cwl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl index b4348393..822d35c7 100644 --- a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl +++ b/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl @@ -14,6 +14,8 @@ inputs: inputBinding: position: 0 prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai doc: >- Path to reference fasta, containing all regions in bed_file - id: bam_file @@ -21,6 +23,8 @@ inputs: inputBinding: position: 0 prefix: --bam_file + secondaryFiles: + - ^.bai doc: >- Path to BAM file for calculating noise [required] - id: bed_file @@ -106,7 +110,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'sequence_qc/0.1.15' + dockerPull: 'mskaccess/sequence_qc:0.1.15' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From c2034e4914f4dd1ec9e6352d129d22f5f98658f1 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 18:33:47 -0400 Subject: [PATCH 142/476] remove example paths --- sequence_qc_0.1.12/example_inputs.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 sequence_qc_0.1.12/example_inputs.yaml diff --git a/sequence_qc_0.1.12/example_inputs.yaml b/sequence_qc_0.1.12/example_inputs.yaml new file mode 100644 index 00000000..b369f4ac --- /dev/null +++ b/sequence_qc_0.1.12/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /path/to/fasta +bam_file: + class: File + metadata: {} + path: /path/to/bam +bed_file: + class: File + metadata: {} + path: /path/to/bed +output_prefix: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 From defc6b1c885079342a0655677727420cbaee9957 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 4 Aug 2020 18:38:04 -0400 Subject: [PATCH 143/476] remove old example inputs --- sequence_qc_0.1.12/example_inputs.yaml | 17 ----------------- sequence_qc_0.1.15/example_inputs.yaml | 6 +++--- 2 files changed, 3 insertions(+), 20 deletions(-) delete mode 100644 sequence_qc_0.1.12/example_inputs.yaml diff --git a/sequence_qc_0.1.12/example_inputs.yaml b/sequence_qc_0.1.12/example_inputs.yaml deleted file mode 100644 index b369f4ac..00000000 --- a/sequence_qc_0.1.12/example_inputs.yaml +++ /dev/null @@ -1,17 +0,0 @@ -reference: - class: File - metadata: {} - path: /path/to/fasta -bam_file: - class: File - metadata: {} - path: /path/to/bam -bed_file: - class: File - metadata: {} - path: /path/to/bed -output_prefix: test_sample_ -threshold: 0.01 -truncate: 1 -min_mapq: 10 -min_basq: 10 diff --git a/sequence_qc_0.1.15/example_inputs.yaml b/sequence_qc_0.1.15/example_inputs.yaml index 3fcf0e44..b369f4ac 100644 --- a/sequence_qc_0.1.15/example_inputs.yaml +++ b/sequence_qc_0.1.15/example_inputs.yaml @@ -1,15 +1,15 @@ reference: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta + path: /path/to/fasta bam_file: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/mskcc-analyses/200608_compare_qc_tools/manually_count_bases/C-52YNHF-N001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX.bam + path: /path/to/bam bed_file: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-A.sorted_correct.bed + path: /path/to/bed output_prefix: test_sample_ threshold: 0.01 truncate: 1 From fda922ed2781a620f84fd4aadf765195444ddd78 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 7 Aug 2020 18:16:22 -0400 Subject: [PATCH 144/476] update version of sequence qc --- .../README.md | 21 +++++++-------- .../example_inputs.yaml | 2 +- .../sequence_qc_0.1.16.cwl | 27 ++++++++++++------- 3 files changed, 28 insertions(+), 22 deletions(-) rename {sequence_qc_0.1.15 => sequence_qc_0.1.16}/README.md (74%) rename {sequence_qc_0.1.15 => sequence_qc_0.1.16}/example_inputs.yaml (89%) rename sequence_qc_0.1.15/sequence_qc_0.1.15.cwl => sequence_qc_0.1.16/sequence_qc_0.1.16.cwl (83%) diff --git a/sequence_qc_0.1.15/README.md b/sequence_qc_0.1.16/README.md similarity index 74% rename from sequence_qc_0.1.15/README.md rename to sequence_qc_0.1.16/README.md index 6cbb621a..fcce4813 100644 --- a/sequence_qc_0.1.15/README.md +++ b/sequence_qc_0.1.16/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| sequence_qc | 0.1.15 | | +| sequence_qc | 0.1.16 | | ## CWL @@ -13,30 +13,29 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner sequence_qc_0.1.15.cwl example_inputs.yaml + > toil-cwl-runner sequence_qc_0.1.16.cwl example_inputs.yaml ``` **If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir tool_toil_log -> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & ``` ### Usage ```bash -toil-cwl-runner sequence_qc_0.1.15.cwl -h +toil-cwl-runner sequence_qc_0.1.16.cwl -h -usage: sequence_qc_0.1.15.cwl [-h] --reference REFERENCE --bam_file BAM_FILE - --bed_file BED_FILE --output_prefix - OUTPUT_PREFIX [--threshold THRESHOLD] - [--truncate TRUNCATE] [--min_mapq MIN_MAPQ] - [--min_basq MIN_BASQ] +usage: sequence_qc_0.1.16.cwl [-h] --reference REFERENCE --bam_file BAM_FILE + --bed_file BED_FILE --sample_id SAMPLE_ID + [--threshold THRESHOLD] [--truncate TRUNCATE] + [--min_mapq MIN_MAPQ] [--min_basq MIN_BASQ] [job_order] positional arguments: @@ -50,7 +49,7 @@ optional arguments: --bam_file BAM_FILE Path to BAM file for calculating noise [required] --bed_file BED_FILE Path to BED file containing regions over which to calculate noise [required] - --output_prefix OUTPUT_PREFIX + --sample_id SAMPLE_ID Prefix to include in all output file names --threshold THRESHOLD Alt allele frequency past which to ignore positions diff --git a/sequence_qc_0.1.15/example_inputs.yaml b/sequence_qc_0.1.16/example_inputs.yaml similarity index 89% rename from sequence_qc_0.1.15/example_inputs.yaml rename to sequence_qc_0.1.16/example_inputs.yaml index b369f4ac..04cf48b4 100644 --- a/sequence_qc_0.1.15/example_inputs.yaml +++ b/sequence_qc_0.1.16/example_inputs.yaml @@ -10,7 +10,7 @@ bed_file: class: File metadata: {} path: /path/to/bed -output_prefix: test_sample_ +sample_id: test_sample_ threshold: 0.01 truncate: 1 min_mapq: 10 diff --git a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl b/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl similarity index 83% rename from sequence_qc_0.1.15/sequence_qc_0.1.15.cwl rename to sequence_qc_0.1.16/sequence_qc_0.1.16.cwl index 822d35c7..59785216 100644 --- a/sequence_qc_0.1.15/sequence_qc_0.1.15.cwl +++ b/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: calculate_noise_0_1_15 +id: calculate_noise_0_1_16 baseCommand: - calculate_noise inputs: @@ -34,11 +34,11 @@ inputs: prefix: --bed_file doc: >- Path to BED file containing regions over which to calculate noise [required] - - id: output_prefix + - id: sample_id type: string inputBinding: position: 0 - prefix: --output_prefix + prefix: --sample_id doc: >- Prefix to include in all output file names - id: threshold @@ -75,42 +75,49 @@ outputs: outputBinding: glob: |- ${ - return inputs.output_prefix + 'pileup.tsv' + return inputs.sample_id + 'pileup.tsv' } - id: noise_positions type: File outputBinding: glob: |- ${ - return inputs.output_prefix + 'noise_positions.tsv' + return inputs.sample_id + 'noise_positions.tsv' } - id: noise_acgt type: File outputBinding: glob: |- ${ - return inputs.output_prefix + 'noise_acgt.tsv' + return inputs.sample_id + 'noise_acgt.tsv' } - id: noise_n type: File outputBinding: glob: |- ${ - return inputs.output_prefix + 'noise_n.tsv' + return inputs.sample_id + 'noise_n.tsv' } - id: noise_del type: File outputBinding: glob: |- ${ - return inputs.output_prefix + 'noise_del.tsv' + return inputs.sample_id + 'noise_del.tsv' + } + - id: figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' } requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/sequence_qc:0.1.15' + dockerPull: 'mskaccess/sequence_qc:0.1.16' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -129,4 +136,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': sesquence_qc - 'doap:revision': 0.1.15 + 'doap:revision': 0.1.16 From 0c180797ee1e583a23587dca11cc5061ea942a76 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 7 Aug 2020 18:16:47 -0400 Subject: [PATCH 145/476] faster docker build for sequence_qc, update version --- .../container/Dockerfile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) rename {sequence_qc_0.1.15 => sequence_qc_0.1.16}/container/Dockerfile (76%) diff --git a/sequence_qc_0.1.15/container/Dockerfile b/sequence_qc_0.1.16/container/Dockerfile similarity index 76% rename from sequence_qc_0.1.15/container/Dockerfile rename to sequence_qc_0.1.16/container/Dockerfile index 8ca6f01e..f165b659 100644 --- a/sequence_qc_0.1.15/container/Dockerfile +++ b/sequence_qc_0.1.16/container/Dockerfile @@ -1,19 +1,19 @@ ################## BASE IMAGE ###################### -FROM python:3.6-alpine +FROM python:3.6-slim ################## ARGUMENTS/Environments ########## ARG BUILD_DATE ARG BUILD_VERSION ARG LICENSE="Apache-2.0" -ARG SEQUENCE_QC_VERSION=0.1.15 +ARG SEQUENCE_QC_VERSION=0.1.16 ARG VCS_REF ################## METADATA ######################## LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" +LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" -LABEL org.opencontainers.image.created=${BUILD_DATE} \ +LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.version=${BUILD_VERSION} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.sequence_qc=${SEQUENCE_QC_VERSION} \ @@ -26,6 +26,7 @@ LABEL org.opencontainers.image.description="This container uses python3.6 as the ################## INSTALL ########################## -RUN apk add --no-cache --virtual .build-deps gcc musl-dev make zlib-dev bzip2-dev xz-dev g++ libstdc++ \ - && pip install cython \ - && pip install sequence_qc==0.1.15 \ No newline at end of file +RUN apt-get update \ + && apt-get install gcc g++ zlib1g-dev -y \ + && pip install cython plotly \ + && pip install sequence_qc==0.1.16 From d18a8371fe341d8e013a32ac305b4d4b80fcffd0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 01:56:05 -0400 Subject: [PATCH 146/476] Adding workign version of fgbio fastqtobam --- fgbio_fastq_to_bam_1.2.0/README.md | 82 ++++++ fgbio_fastq_to_bam_1.2.0/example_inputs.yaml | 27 ++ .../fgbio_fastq_to_bam_1.2.0.cwl | 243 ++++++++++++++++++ 3 files changed, 352 insertions(+) create mode 100644 fgbio_fastq_to_bam_1.2.0/README.md create mode 100644 fgbio_fastq_to_bam_1.2.0/example_inputs.yaml create mode 100644 fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl diff --git a/fgbio_fastq_to_bam_1.2.0/README.md b/fgbio_fastq_to_bam_1.2.0/README.md new file mode 100644 index 00000000..ccc256f1 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/README.md @@ -0,0 +1,82 @@ +# CWL for running Fgbio - FastqToBam + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash + +usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read-structures READ_STRUCTURES] + [--sort] [--umi-tag UMI_TAG] + [--read-group-id READ_GROUP_ID] + [--sample SAMPLE] [--library LIBRARY] + [--platform PLATFORM] + [--platform-unit PLATFORM_UNIT] + [--platform-model PLATFORM_MODEL] + [--sequencing-center SEQUENCING_CENTER] + [--predicted-insert-size PREDICTED_INSERT_SIZE] + [--description DESCRIPTION] + [--comment COMMENT] [--run-date RUN_DATE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Fastq files corresponding to each sequencing read + (e.g. R1, I1, etc.). + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --read-structures READ_STRUCTURES + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read- + Structures + --sort If true, queryname sort the BAM file, otherwise + preserve input order. + --umi-tag UMI_TAG Tag in which to store molecular barcodes/UMIs + --read-group-id READ_GROUP_ID + Read group ID to use in the file header. + --sample SAMPLE The name of the sequenced sample. + --library LIBRARY The name/ID of the sequenced library. + --platform PLATFORM Sequencing Platform + --platform-unit PLATFORM_UNIT + Platform unit (e.g. ‘..') + --platform-model PLATFORM_MODEL + Platform model to insert into the group header (ex. + miseq, hiseq2500, hiseqX) + --sequencing-center SEQUENCING_CENTER + The sequencing center from which the data originated + --predicted-insert-size PREDICTED_INSERT_SIZE + Predicted median insert size, to insert into the read + group header + --description DESCRIPTION + Description of the read group. + --comment COMMENT Comment(s) to include in the output file’s header + --run-date RUN_DATE Date the run was produced, to insert into the read + group header +``` diff --git a/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..f30727a2 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml @@ -0,0 +1,27 @@ +comment: null +description: null +input: + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R1_001.fastq.gz + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R2_001.fastq.gz +library: test +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +platform: Illumina +platform-model: novaseq +platform-unit: . +predicted-insert-size: null +read-group-id: test +read-structures: + - 3M2S+T + - 3M2S+T +run-date: null +sample: test +sequencing-center: mskcc +sort: null +umi-tag: null diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl new file mode 100644 index 00000000..e2d695d8 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -0,0 +1,243 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_fastq_to_bam_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: 'File[]' + inputBinding: + position: 0 + prefix: '--input' + itemSeparator: ' ' + shellQuote: false + label: PathToFastq + doc: 'Fastq files corresponding to each sequencing read (e.g. R1, I1, etc.).' + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: read-structures + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--read-structures' + itemSeparator: ' ' + shellQuote: false + doc: >- + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures + - 'sbg:altPrefix': '-s' + id: sort + type: boolean? + inputBinding: + position: 0 + prefix: '--sort' + shellQuote: false + doc: 'If true, queryname sort the BAM file, otherwise preserve input order.' + - 'sbg:altPrefix': '-u' + id: umi-tag + type: string? + inputBinding: + position: 0 + prefix: '--umi-tag' + shellQuote: false + doc: Tag in which to store molecular barcodes/UMIs + - id: read-group-id + type: string? + inputBinding: + position: 0 + prefix: '--read-group-id' + shellQuote: false + doc: Read group ID to use in the file header. + - id: sample + type: string? + inputBinding: + position: 0 + prefix: '--sample' + shellQuote: false + doc: The name of the sequenced sample. + - id: library + type: string? + inputBinding: + position: 0 + prefix: '--library' + shellQuote: false + doc: The name/ID of the sequenced library. + - id: platform + type: string? + inputBinding: + position: 0 + prefix: '--platform' + shellQuote: false + doc: Sequencing Platform + - id: platform-unit + type: string? + inputBinding: + position: 0 + prefix: '--platform-unit' + shellQuote: false + doc: Platform unit (e.g. ‘..') + - id: platform-model + type: string? + inputBinding: + position: 0 + prefix: '--platform-model' + shellQuote: false + doc: >- + Platform model to insert into the group header (ex. miseq, hiseq2500, + hiseqX) + - id: sequencing-center + type: string? + inputBinding: + position: 0 + prefix: '--sequencing-center' + shellQuote: false + doc: The sequencing center from which the data originated + - id: predicted-insert-size + type: int? + inputBinding: + position: 0 + prefix: '--predicted-insert-size' + shellQuote: false + doc: 'Predicted median insert size, to insert into the read group header' + - id: description + type: string? + inputBinding: + position: 0 + prefix: '--description' + doc: Description of the read group. + - id: comment + type: string? + inputBinding: + position: 0 + prefix: '--comment' + doc: Comment(s) to include in the output file’s header + - id: run-date + type: string? + inputBinding: + position: 0 + prefix: '--run-date' + shellQuote: false + doc: 'Date the run was produced, to insert into the read group header' +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +doc: >- + Generates an unmapped BAM (or SAM or CRAM) file from fastq files. Takes in one + or more fastq files (optionally gzipped), each representing a different + sequencing read (e.g. R1, R2, I1 or I2) and can use a set of read structures + to allocate bases in those reads to template reads, sample indices, unique + molecular indices, or to designate bases to be skipped over. + + + Read structures are made up of pairs much like the CIGAR + string in BAM files. Four kinds of operators are recognized: + + + 1. T identifies a template read + + 2. B identifies a sample barcode read + + 3. M identifies a unique molecular index read + + 4. S identifies a set of bases that should be skipped or ignored + + + The last pair may be specified using a + sign instead of + number to denote “all remaining bases”. This is useful if, e.g., fastqs have + been trimmed and contain reads of varying length. For example to convert a + paired-end run with an index read and where the first 5 bases of R1 are a UMI + and the second five bases are monotemplate you might specify: +label: fgbio_fastq_to_bam_1.2.0 +arguments: + - position: 0 + prefix: '' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx4G" + } + else { + return "-Xmx4G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + shellQuote: false + valueFrom: '${ return runtime.tmpdir}' + - position: 0 + valueFrom: FastqToBam + - position: 0 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 10000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio + 'doap:revision': 1.2.0 From 49115981f5c36b7058a9bf523e4929f6cafd61a6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 09:50:18 -0400 Subject: [PATCH 147/476] Update SUMMARY.md Adding Fgbio FastqToBam --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index ca421b79..50c5c686 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -9,6 +9,8 @@ * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) * Disambiguate * [v1.0.0](../disambiguate_1.0.0/README.md) + * Fgbio + * [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) * GATK * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) From 26b66d419458db22d7e5a1a643b00e29388a0523 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 09:50:44 -0400 Subject: [PATCH 148/476] Update SUMMARY.md --- docs/SUMMARY.md | 91 ++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 50c5c686..c997b150 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,48 +1,47 @@ # Table of contents -* [MSK-ACCESS command-line tools](README.md) - * ABRA2 - * [v2.17](../abra2_2.17/README.md) - * [v2.19](../abra2_2.19/README.md) - * Bedtools - * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - * Disambiguate - * [v1.0.0](../disambiguate_1.0.0/README.md) - * Fgbio - * [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) - * GATK - * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - * Manta - * [Manta v1.5.1](../manta_1.5.1/README.md) - * Marianas - * [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) - * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - * [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) - * MuTect - * [MuTect 1.1.5](../mutect_1.1.5/README.md) - * Merge Fastq - * [v0.1.7](../merge_fastq_0.1.7/README.md) - * Picard Tools - * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - * [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) - * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - * [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - * [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) - * [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - * [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) - * [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - * [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - * [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) - * Trim Galore - * [v0.6.2](../trim_galore_0.6.2/README.md) - * Ubuntu utilites - * [v18.04](../utilities_ubuntu_18.04/README.md) - * Waltz - * [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) - * [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) +- [MSK-ACCESS command-line tools](README.md) + - ABRA2 + - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) + - Bedtools + _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + _ [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + - Disambiguate + - [v1.0.0](../disambiguate_1.0.0/README.md) + - Fgbio + - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) + - GATK + - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - Manta + - [Manta v1.5.1](../manta_1.5.1/README.md) + - Marianas + - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) + - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) + - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) + - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + - MuTect + - [MuTect 1.1.5](../mutect_1.1.5/README.md) + - Merge Fastq + - [v0.1.7](../merge_fastq_0.1.7/README.md) + - Picard Tools + - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) + - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) + - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) + - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) + - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) + - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) + - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) + - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) + - Trim Galore + - [v0.6.2](../trim_galore_0.6.2/README.md) + - Ubuntu utilites + - [v18.04](../utilities_ubuntu_18.04/README.md) + - Waltz + - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) + - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) From ad2e600d88945570e6b8fbfc05d6b4e868f4432a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 10:52:25 -0400 Subject: [PATCH 149/476] Adding Fgbio GroupReadsByUmi :heavy_check_mark: CWL :heavy_check_mark: Example Input :heavy_check_mark: Readme :heavy_check_mark: Using existing fgbio image --- docs/SUMMARY.md | 2 + fgbio_group_reads_by_umi_1.2.0/README.md | 68 +++++ .../example_inputs.yaml | 13 + .../fgbio_group_reads_by_umi_1.2.0.cwl | 243 ++++++++++++++++++ 4 files changed, 326 insertions(+) create mode 100644 fgbio_group_reads_by_umi_1.2.0/README.md create mode 100644 fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml create mode 100644 fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index ca421b79..e5337063 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -9,6 +9,8 @@ * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) * Disambiguate * [v1.0.0](../disambiguate_1.0.0/README.md) + * Fgbio + * [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) * GATK * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) diff --git a/fgbio_group_reads_by_umi_1.2.0/README.md b/fgbio_group_reads_by_umi_1.2.0/README.md new file mode 100644 index 00000000..5c4080cb --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/README.md @@ -0,0 +1,68 @@ +# CWL for running Fgbio - GroupReadsByUmi + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash + +usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--family_size_histogram FAMILY_SIZE_HISTOGRAM] + [--raw_tag RAW_TAG] + [--assign_tag ASSIGN_TAG] + [--min_map_q MIN_MAP_Q] + [--include_non_pf_reads] + --strategy STRATEGY + [--edits EDITS] + [--min_umi_length MIN_UMI_LENGTH] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input BAM file. + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --family_size_histogram FAMILY_SIZE_HISTOGRAM + Optional output of tag family size counts. + --raw_tag RAW_TAG The tag containing the raw UMI. + --assign_tag ASSIGN_TAG + The output tag for UMI grouping. + --min_map_q MIN_MAP_Q + Minimum mapping quality. + --include_non_pf_reads + --strategy STRATEGY The UMI assignment strategy. + (identity,edit,adjacency,paired) + --edits EDITS The allowable number of edits between UMIs. + --min_umi_length MIN_UMI_LENGTH + The minimum UMI length. If not specified then all UMIs + must have the same length, otherwise discard reads + with UMIs shorter than this length and allow for + differing UMI lengths. +``` diff --git a/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..9fe66421 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml @@ -0,0 +1,13 @@ +assign_tag: null +edits: null +family_size_histogram: null +include_non_pf_reads: null +input: /path/to/bam_file +memory_overhead: null +memory_per_job: null +min_map_q: null +min_umi_length: null +number_of_threads: null +output_file_name: null +raw_tag: null +strategy: paired diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl new file mode 100644 index 00000000..1ed15334 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -0,0 +1,243 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_group_reads_by_umi_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '--input' + shellQuote: false + doc: The input BAM file. + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: family_size_histogram + type: string? + inputBinding: + position: 0 + prefix: '--family-size-histogram' + doc: Optional output of tag family size counts. + - id: raw_tag + type: string? + inputBinding: + position: 0 + prefix: '--raw-tag' + doc: The tag containing the raw UMI. + - id: assign_tag + type: string? + inputBinding: + position: 0 + prefix: '--assign-tag' + doc: The output tag for UMI grouping. + - id: min_map_q + type: int? + inputBinding: + position: 0 + prefix: '--min-map-q' + doc: Minimum mapping quality. + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--include-non-pf-reads' + - id: strategy + type: string + inputBinding: + position: 0 + prefix: '--strategy' + doc: 'The UMI assignment strategy. (identity,edit,adjacency,paired)' + - id: edits + type: int? + inputBinding: + position: 0 + prefix: '--edits' + doc: The allowable number of edits between UMIs. + - id: min_umi_length + type: int? + inputBinding: + position: 0 + prefix: '--min-umi-length' + doc: >- + The minimum UMI length. If not specified then all UMIs must have the same + length, otherwise discard reads with UMIs shorter than this length and + allow for differing UMI lengths. +outputs: + - id: group_reads_by_umi_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } + - id: group_reads_by_umi_histogram + type: File + outputBinding: + glob: |- + ${ + if(inputs.family_size_histogram) + return inputs.family_size_histogram + } +doc: >- + Groups reads together that appear to have come from the same original + molecule. Reads are grouped by template, and then templates are sorted by the + 5’ mapping positions of the reads from the template, used from earliest + mapping position to latest. Reads that have the same end positions are then + sub-grouped by UMI sequence. + + + Accepts reads in any order (including unsorted) and outputs reads sorted by: + + + The lower genome coordinate of the two outer ends of the templates + + The sequencing library + + The assigned UMI tag + + Read Name + + Reads are aggressively filtered out so that only high quality reads/mappings + are taken forward. Single-end reads must have mapping quality >= min-map-q. + Paired-end reads must have both reads mapped to the same chromosome with both + reads having mapping quality >= min-mapq. (Note: the MQ tag is required on + reads with mapped mates). + + + This is done with the expectation that the next step is building consensus + reads, where it is undesirable to either: + + + Assign reads together that are really from different source molecules + + Build two groups from reads that are really from the same molecule + + Errors in mapping reads could lead to both and therefore are minimized. + + + Grouping of UMIs is performed by one of three strategies: + + + 1. identity: only reads with identical UMI sequences are grouped together. + This strategy may be useful for evaluating data, but should generally be + avoided as it will generate multiple UMI groups per original molecule in the + presence of errors. + + 2. edit: reads are clustered into groups such that each read within a group + has at least one other read in the group with <= edits differences and there + are inter-group pairings with <= edits differences. Effective when there are + small numbers of reads per UMI, but breaks down at very high coverage of UMIs. + + 3. adjacency: a version of the directed adjacency method described in + umi_tools that allows for errors between UMIs but only when there is a count + gradient. + + 4. paired: similar to adjacency but for methods that produce template with a + pair of UMIs such that a read with A-B is related to but not identical to a + read with B-A. Expects the pair of UMIs to be stored in a single tag, + separated by a hyphen (e.g. ACGT-CCGG). The molecular IDs produced have more + structure than for single UMI strategies, and are of the form {base}/{AB|BA}. + E.g. two UMI pairs would be mapped as follows AAAA-GGGG -> 1/AB, GGGG-AAAA -> + 1/BA. + + edit, adjacency and paired make use of the --edits parameter to control the + matching of non-identical UMIs. + + + By default, all UMIs must be the same length. If --min-umi-length=len is + specified then reads that have a UMI shorter than len will be discarded, and + when comparing UMIs of different lengths, the first len bases will be + compared, where len is the length of the shortest UMI. The UMI length is the + number of [ACGT] bases in the UMI (i.e. does not count dashes and other + non-ACGT characters). This option is not implemented for reads with UMI pairs + (i.e. using the paired assigner). +label: fgbio_group_reads_by_umi_1.2.0 +arguments: + - position: 0 + prefix: '' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx4G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + shellQuote: false + valueFrom: '${ return runtime.tmpdir}' + - position: 0 + valueFrom: GroupReadsByUmi + - position: 0 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 15000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio GroupReadsByUmi + 'doap:revision': 1.2.0 From e81e892cfff609095d5cc981363209a19327cc08 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 11:44:38 -0400 Subject: [PATCH 150/476] Update fgbio_fastq_to_bam_1.2.0.cwl Made more specific output id --- fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index e2d695d8..895cc001 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -133,7 +133,7 @@ inputs: shellQuote: false doc: 'Date the run was produced, to insert into the read group header' outputs: - - id: bam + - id: fastqtobam_ubam type: File outputBinding: glob: |- From 03a2f6dd9c42feca4aa788ada039369e9999685a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 11:47:29 -0400 Subject: [PATCH 151/476] Update fgbio_group_reads_by_umi_1.2.0.cwl Keep in memory values synced --- .../fgbio_group_reads_by_umi_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 1ed15334..6d58b7c5 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -194,7 +194,7 @@ arguments: return "-Xmx10G" } else { - return "-Xmx4G" + return "-Xmx10G" } } - position: 0 From c832d04b3d7815f726dc74f0c91fc1b7e0605aa9 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 11:49:54 -0400 Subject: [PATCH 152/476] Update fgbio_fastq_to_bam_1.2.0.cwl Removing Alt Prefix --- .../fgbio_fastq_to_bam_1.2.0.cwl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index 895cc001..cd2c15dd 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: fgbio_fastq_to_bam_1_2_0 +id: fgbio_fastq_to_bam_1.2.0 baseCommand: - fgbio inputs: @@ -39,16 +39,14 @@ inputs: doc: >- Read structures, one for each of the FASTQs. https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures - - 'sbg:altPrefix': '-s' - id: sort + - id: sort type: boolean? inputBinding: position: 0 prefix: '--sort' shellQuote: false doc: 'If true, queryname sort the BAM file, otherwise preserve input order.' - - 'sbg:altPrefix': '-u' - id: umi-tag + - id: umi-tag type: string? inputBinding: position: 0 @@ -133,7 +131,7 @@ inputs: shellQuote: false doc: 'Date the run was produced, to insert into the read group header' outputs: - - id: fastqtobam_ubam + - id: fastq_to_bam_ubam type: File outputBinding: glob: |- @@ -239,5 +237,5 @@ requirements: 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': fgbio + 'doap:name': fgbio FastqToBam 'doap:revision': 1.2.0 From 0159cd449c505cbfa8d02b748fca5c0fe2342116 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 11:51:09 -0400 Subject: [PATCH 153/476] Update SUMMARY.md --- docs/SUMMARY.md | 91 ++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e5337063..cfae1a5b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,48 +1,47 @@ # Table of contents -* [MSK-ACCESS command-line tools](README.md) - * ABRA2 - * [v2.17](../abra2_2.17/README.md) - * [v2.19](../abra2_2.19/README.md) - * Bedtools - * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - * Disambiguate - * [v1.0.0](../disambiguate_1.0.0/README.md) - * Fgbio - * [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - * GATK - * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - * Manta - * [Manta v1.5.1](../manta_1.5.1/README.md) - * Marianas - * [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) - * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - * [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) - * MuTect - * [MuTect 1.1.5](../mutect_1.1.5/README.md) - * Merge Fastq - * [v0.1.7](../merge_fastq_0.1.7/README.md) - * Picard Tools - * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - * [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) - * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - * [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - * [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) - * [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - * [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) - * [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - * [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - * [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) - * Trim Galore - * [v0.6.2](../trim_galore_0.6.2/README.md) - * Ubuntu utilites - * [v18.04](../utilities_ubuntu_18.04/README.md) - * Waltz - * [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) - * [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) +- [MSK-ACCESS command-line tools](README.md) + - ABRA2 + - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) + - Bedtools + _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + _ [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + - Disambiguate + - [v1.0.0](../disambiguate_1.0.0/README.md) + - Fgbio + - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) + - GATK + - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - Manta + - [Manta v1.5.1](../manta_1.5.1/README.md) + - Marianas + - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) + - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) + - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) + - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + - MuTect + - [MuTect 1.1.5](../mutect_1.1.5/README.md) + - Merge Fastq + - [v0.1.7](../merge_fastq_0.1.7/README.md) + - Picard Tools + - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) + - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) + - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) + - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) + - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) + - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) + - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) + - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) + - Trim Galore + - [v0.6.2](../trim_galore_0.6.2/README.md) + - Ubuntu utilites + - [v18.04](../utilities_ubuntu_18.04/README.md) + - Waltz + - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) + - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) From 86e43db0dc68cce9238aa0ba9680052169ee45be Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 14 Aug 2020 15:15:55 -0400 Subject: [PATCH 154/476] Adding CallDuplexConsensusReads :heavy_check_mark: CWL :heavy_check_mark: example input :heavy_check_mark: Readme :heavy_check_mark: Updated Summary Closes #56 --- docs/SUMMARY.md | 89 ++++---- .../README.md | 79 +++++++ .../example_inputs.yaml | 17 ++ ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 212 ++++++++++++++++++ 4 files changed, 353 insertions(+), 44 deletions(-) create mode 100644 fgbio_call_duplex_consensus_reads_1.2.0/README.md create mode 100644 fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml create mode 100644 fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index ca421b79..c40bebb3 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,46 +1,47 @@ # Table of contents -* [MSK-ACCESS command-line tools](README.md) - * ABRA2 - * [v2.17](../abra2_2.17/README.md) - * [v2.19](../abra2_2.19/README.md) - * Bedtools - * [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - * [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - * Disambiguate - * [v1.0.0](../disambiguate_1.0.0/README.md) - * GATK - * [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - * [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - * Manta - * [Manta v1.5.1](../manta_1.5.1/README.md) - * Marianas - * [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) - * [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - * [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - * [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) - * MuTect - * [MuTect 1.1.5](../mutect_1.1.5/README.md) - * Merge Fastq - * [v0.1.7](../merge_fastq_0.1.7/README.md) - * Picard Tools - * [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - * [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) - * [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - * [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - * [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) - * [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - * [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - * [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) - * [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - * [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - * [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - * [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - * [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) - * Trim Galore - * [v0.6.2](../trim_galore_0.6.2/README.md) - * Ubuntu utilites - * [v18.04](../utilities_ubuntu_18.04/README.md) - * Waltz - * [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) - * [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) +- [MSK-ACCESS command-line tools](README.md) + - ABRA2 + - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) + - Bedtools + _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + _ [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + - Disambiguate + - [v1.0.0](../disambiguate_1.0.0/README.md) + - Fgbio + - [CallDuplexConsensusReads v1.2.0](../fgbio_call_duplex_consensus_reads_1.2.0/README.md) + - GATK + - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - Manta + - [Manta v1.5.1](../manta_1.5.1/README.md) + - Marianas + - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) + - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) + - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) + - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + - MuTect + - [MuTect 1.1.5](../mutect_1.1.5/README.md) + - Merge Fastq + - [v0.1.7](../merge_fastq_0.1.7/README.md) + - Picard Tools + - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) + - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) + - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) + - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) + - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) + - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) + - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) + - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) + - Trim Galore + - [v0.6.2](../trim_galore_0.6.2/README.md) + - Ubuntu utilites + - [v18.04](../utilities_ubuntu_18.04/README.md) + - Waltz + - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) + - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/README.md b/fgbio_call_duplex_consensus_reads_1.2.0/README.md new file mode 100644 index 00000000..e7a9060e --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/README.md @@ -0,0 +1,79 @@ +# CWL for running Fgbio - CallDuplexConsensusReads + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash + +usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read_name_prefix READ_NAME_PREFIX] + [--read_group_id READ_GROUP_ID] + [--error_rate_pre_umi ERROR_RATE_PRE_UMI] + [--error_rate_post_umi ERROR_RATE_POST_UMI] + [--min_input_base_quality MIN_INPUT_BASE_QUALITY] + [--trim] + [--sort_order SORT_ORDER] + [--min_reads MIN_READS] + [--max_reads_per_strand MAX_READS_PER_STRAND] + [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --read_name_prefix READ_NAME_PREFIX + The prefix all consensus read names + --read_group_id READ_GROUP_ID + The new read group ID for all the consensus reads. + --error_rate_pre_umi ERROR_RATE_PRE_UMI + The Phred-scaled error rate for an error prior to the + UMIs being integrated. + --error_rate_post_umi ERROR_RATE_POST_UMI + The Phred-scaled error rate for an error post the UMIs + have been integrated. + --min_input_base_quality MIN_INPUT_BASE_QUALITY + Ignore bases in raw reads that have Q below this + value. + --trim If true, quality trim input reads in addition to + masking low Q bases + --sort_order SORT_ORDER + The sort order of the output, if :none: then the same + as the input. + --min_reads MIN_READS + The minimum number of input reads to a consensus read. + --max_reads_per_strand MAX_READS_PER_STRAND + The maximum number of reads to use when building a + single-strand consensus. If more than this many reads + are present in a tag family, the family is randomly + downsampled to exactly max-reads reads. +``` diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..59eaa165 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +error_rate_post_umi: null +error_rate_pre_umi: null +input: /path/to/bam_file +max_reads_per_strand: null +memory_overhead: null +memory_per_job: null +min_input_base_quality: null +min_reads: + - 1 + - 1 + - 0 +number_of_threads: null +output_file_name: null +read_group_id: null +read_name_prefix: null +sort_order: null +trim: null diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..b78e0db4 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -0,0 +1,212 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_call_duplex_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: read_name_prefix + type: string? + inputBinding: + position: 0 + prefix: '--read-name-prefix' + doc: The prefix all consensus read names + - id: read_group_id + type: string? + inputBinding: + position: 0 + prefix: '--read-group-id' + doc: The new read group ID for all the consensus reads. + - id: error_rate_pre_umi + type: int? + inputBinding: + position: 0 + prefix: '--error-rate-pre-umi' + doc: >- + The Phred-scaled error rate for an error prior to the UMIs being + integrated. + - id: error_rate_post_umi + type: int? + inputBinding: + position: 0 + prefix: '--error-rate-post-umi' + doc: >- + The Phred-scaled error rate for an error post the UMIs have been + integrated. + - id: min_input_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-input-base-quality' + doc: Ignore bases in raw reads that have Q below this value. + - id: trim + type: boolean? + inputBinding: + position: 0 + prefix: '--trim' + doc: 'If true, quality trim input reads in addition to masking low Q bases' + - id: sort_order + type: string? + inputBinding: + position: 0 + valueFrom: '--sort-order' + doc: 'The sort order of the output, if :none: then the same as the input.' + - id: min_reads + type: 'int[]' + inputBinding: + position: 0 + prefix: '--min-reads' + itemSeparator: ' ' + doc: The minimum number of input reads to a consensus read. + - id: max_reads_per_strand + type: int? + inputBinding: + position: 0 + prefix: '--max-reads-per-strand' + doc: >- + The maximum number of reads to use when building a single-strand + consensus. If more than this many reads are present in a tag family, the + family is randomly downsampled to exactly max-reads reads. +outputs: + - id: call_duplex_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } +doc: >- + Calls duplex consensus sequences from reads generated from the same + double-stranded source molecule. Prior to running this tool, read must have + been grouped with GroupReadsByUmi using the paired strategy. Doing so will + apply (by default) MI tags to all reads of the form */A and */B where the /A + and /B suffixes with the same identifier denote reads that are derived from + opposite strands of the same source duplex molecule. + + + Reads from the same unique molecule are first partitioned by source strand and + assembled into single strand consensus molecules as described by + CallMolecularConsensusReads. Subsequently, for molecules that have at least + one observation of each strand, duplex consensus reads are assembled by + combining the evidence from the two single strand consensus reads. + + + Because of the nature of duplex sequencing, this tool does not support + fragment reads - if found in the input they are ignored. Similarly, read pairs + for which consensus reads cannot be generated for one or other read (R1 or R2) + are omitted from the output. + + + Consensus reads have a number of additional optional tags set in the resulting + BAM file. The tag names follow a pattern where the first letter (a, b or c) + denotes that the tag applies to the first single strand consensus (a), second + single-strand consensus (b) or the final duplex consensus (c). The second + letter is intended to capture the meaning of the tag (e.g. d=depth, m=min + depth, e=errors/error-rate) and is upper case for values that are one per read + and lower case for values that are one per base. +label: fgbio_call_duplex_consensus_reads_1.2.0 +arguments: + - position: 0 + prefix: '' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx10G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + shellQuote: false + valueFrom: '${ return runtime.tmpdir}' + - position: 0 + prefix: '' + valueFrom: CallDuplexConsensusReads + - position: 0 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 1000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CallDuplexConsensusReads + 'doap:revision': 1.2.0 From 283fd4b412923faaaaecf5ebe74dc3f8871828cc Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 20 Aug 2020 16:39:13 -0400 Subject: [PATCH 155/476] add cwl, readme, and example input --- .../README.md | 61 +++++ .../example_inputs.yaml | 16 ++ ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 232 ++++++++++++++++++ 3 files changed, 309 insertions(+) create mode 100644 fgbio_collect_duplex_seq_metrics_1.2.0/README.md create mode 100644 fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml create mode 100644 fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/README.md b/fgbio_collect_duplex_seq_metrics_1.2.0/README.md new file mode 100644 index 00000000..20371c1a --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/README.md @@ -0,0 +1,61 @@ +# CWL for running Fgbio - CollectDuplexSeqMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_collect_duplex_seq_metrics_1.2.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_prefix + OUTPUT_PREFIX [--intervals INTERVALS] [--description DESCRIPTION] + [--duplex_umi_counts DUPLEX_UMI_COUNTS] [--min_ab_reads MIN_AB_READS] + [--min_ba_reads MIN_BA_READS] [--umi_tag UMI_TAG] [--mi_tag MI_TAG] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input BAM file generated by GroupReadByUmi. + --output_prefix OUTPUT_PREFIX + Prefix of output files to write. + --intervals INTERVALS + Optional set of intervals over which to restrict + analysis. [Optional]. + --description DESCRIPTION + Description of data set used to label plots. Defaults + to sample/library. [Optional]. + --duplex_umi_counts DUPLEX_UMI_COUNTS + If true, produce the .duplex_umi_counts.txt file with + counts of duplex UMI observations. [Optional]. + --min_ab_reads MIN_AB_READS + Minimum AB reads to call a tag family a 'duplex'. + [Optional]. + --min_ba_reads MIN_BA_READS + Minimum BA reads to call a tag family a 'duplex'. + [Optional]. + --umi_tag UMI_TAG The tag containing the raw UMI. [Optional]. + --mi_tag MI_TAG The output tag for UMI grouping. [Optional]. +``` diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..38dd911b --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml @@ -0,0 +1,16 @@ +input: + class: File + metadata: {} + path: /path/to/bam +output_prefix: prefix +intervals: + class: File? + metadata: {} + path: /path/to/intervals +description: null +duplex_umi_counts: null +min_ab_reads: null +min_ba_reads: null +number_of_threads: null +umi_tag: null +mi_tag: null diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl new file mode 100644 index 00000000..45f3dfbc --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -0,0 +1,232 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_collect_duplex_seq_metrics_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '--input' + doc: Input BAM file generated by GroupReadByUmi. + - id: output_prefix + type: string + inputBinding: + position: 0 + prefix: '--output' + doc: Prefix of output files to write. + - id: intervals + type: File? + inputBinding: + position: 0 + prefix: '--intervals' + doc: Optional set of intervals over which to restrict analysis. [Optional]. + - id: description + type: string? + inputBinding: + position: 0 + prefix: '--description' + doc: Description of data set used to label plots. Defaults to sample/library. [Optional]. + - id: duplex_umi_counts + type: string? + inputBinding: + position: 0 + prefix: '--duplex-umi-counts' + doc: If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI observations. [Optional]. + - id: min_ab_reads + type: int? + inputBinding: + position: 0 + prefix: '--min-ab-reads' + doc: Minimum AB reads to call a tag family a 'duplex'. [Optional]. + - id: min_ba_reads + type: int? + inputBinding: + position: 0 + prefix: '--min-ba-reads' + doc: >- + Minimum BA reads to call a tag family a 'duplex'. [Optional]. + - id: umi_tag + type: string? + inputBinding: + position: 0 + prefix: '--umi-tag' + doc: >- + The tag containing the raw UMI. [Optional]. + - id: mi_tag + type: string? + inputBinding: + position: 0 + prefix: '--mi-tag' + doc: The output tag for UMI grouping. [Optional]. +outputs: + - id: family_size + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.family_sizes.txt' + } + - id: duplex_family_size + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.duplex_family_sizes.txt' + } + - id: duplex_yield_metrics + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.duplex_yield_metrics.txt' + } + - id: umi_counts + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.umi_counts.txt' + } + - id: duplex_qc + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.duplex_qc.pdf' + } + - id: duplex_umi_counts + type: File + outputBinding: + glob: |- + ${ + return inputs.output_prefix + '.duplex_umi_counts.txt' + } +doc: >- + Collects a suite of metrics to QC duplex sequencing data. + + Inputs + ------ + + The input to this tool must be a BAM file that is either: + + 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it was produced in) + 2. A BAM file that has MI tags present on all reads (usually set by 'GroupReadsByUmi' and has been sorted with + 'SortBam' into 'TemplateCoordinate' order. + + Calculation of metrics may be restricted to a set of regions using the '--intervals' parameter. This can significantly + affect results as off-target reads in duplex sequencing experiments often have very different properties than on-target + reads due to the lack of enrichment. + + Several metrics are calculated related to the fraction of tag families that have duplex coverage. The definition of + "duplex" is controlled by the '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any tag family + with at least one observation of each strand as a duplex, but this could be made more stringent, e.g. by setting + '--min-ab-reads=3 --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must be the higher value. + + Outputs + ------- + + The following output files are produced: + + 1. .family_sizes.txt: metrics on the frequency of different types of families of different sizes + 2. .duplex_family_sizes.txt: metrics on the frequency of duplex tag families by the number of observations + from each strand + 3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, 10%, 15%...100% of the data + 4. .umi_counts.txt: metrics on the frequency of observations of UMIs within reads and tag families + 5. .duplex_qc.pdf: a series of plots generated from the preceding metrics files for visualization + 6. .duplex_umi_counts.txt: (optional) metrics on the frequency of observations of duplex UMIs within reads + and tag families. This file is only produced if the '--duplex-umi-counts' option is used as it requires significantly + more memory to track all pairs of UMIs seen when a large number of UMI sequences are present. + + Within the metrics files the prefixes 'CS', 'SS' and 'DS' are used to mean: + + * CS: tag families where membership is defined solely on matching genome coordinates and strand + * SS: single-stranded tag families where membership is defined by genome coordinates, strand and UMI; ie. 50/A and + 50/B are considered different tag families. + * DS: double-stranded tag families where membership is collapsed across single-stranded tag families from the same + double-stranded source molecule; i.e. 50/A and 50/B become one family + + Requirements + ------------ + + For plots to be generated R must be installed and the ggplot2 package installed with suggested dependencies. + Successfully executing the following in R will ensure a working installation: + + install.packages("ggplot2", repos="http://cran.us.r-project.org", dependencies=TRUE) + +label: fgbio_collect_duplex_seq_metrics_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx10G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + shellQuote: false + valueFrom: '${ return runtime.tmpdir}' + - position: 0 + valueFrom: CollectDuplexSeqMetrics +requirements: + - class: ResourceRequirement + ramMin: 1000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CollectDuplexSeqMetrics + 'doap:revision': 1.2.0 From 862ae6fcf5a7544b0a6ffd74e514a241e85ec7cd Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 20 Aug 2020 20:07:58 -0400 Subject: [PATCH 156/476] Added picard_add_or_replace_read_groups version 4.1.8.1 Resolves: #52 --- .../README.md | 90 ++++++++ .../example_inputs.yaml | 20 ++ ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 218 ++++++++++++++++++ 3 files changed, 328 insertions(+) create mode 100644 picard_add_or_replace_read_groups_4.1.8.1/README.md create mode 100644 picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml create mode 100644 picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl diff --git a/picard_add_or_replace_read_groups_4.1.8.1/README.md b/picard_add_or_replace_read_groups_4.1.8.1/README.md new file mode 100644 index 00000000..4a3aa7b4 --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/README.md @@ -0,0 +1,90 @@ +# CWL for running Picard - AddOrReplaceReadGroups + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl --help +usage: picard_add_or_replace_read_groups_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file ( sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequencing_center READ_GROUP_SEQUENCING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..9c25bd7d --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml @@ -0,0 +1,20 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl new file mode 100644 index 00000000..e6320ebe --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -0,0 +1,218 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: RGID= + separate: false + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequencing_center + type: string + inputBinding: + position: 0 + prefix: RGCN= + separate: false + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: string + inputBinding: + position: 0 + prefix: RGLB= + separate: false + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: RGPU= + separate: false + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: RGSM= + separate: false + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: RGPL= + separate: false + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: RGDS= + separate: false + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: RGDT= + separate: false + doc: 'Read Group run date Default value: null.' + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 From c0b25a8d7ec1211542c32c71ea22037d96d013a3 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 20 Aug 2020 20:15:35 -0400 Subject: [PATCH 157/476] Added picard_mark_duplicates version 4.1.8.1 Resolves: #53 --- picard_mark_duplicates_4.1.8.1/README.md | 77 ++++++++ .../example_inputs.yaml | 15 ++ .../picard_mark_duplicates_4.1.8.1.cwl | 181 ++++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 picard_mark_duplicates_4.1.8.1/README.md create mode 100644 picard_mark_duplicates_4.1.8.1/example_inputs.yaml create mode 100644 picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl diff --git a/picard_mark_duplicates_4.1.8.1/README.md b/picard_mark_duplicates_4.1.8.1/README.md new file mode 100644 index 00000000..69f2f101 --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/README.md @@ -0,0 +1,77 @@ +# CWL for running Picard - MarkDuplicates + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` diff --git a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..4cb5941e --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml @@ -0,0 +1,15 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +tmp_dir: null +validation_stringency: null diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl new file mode 100644 index 00000000..c554e0db --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -0,0 +1,181 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: ASO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + separate: false + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: duplication_stats + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_4.1.8.1 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 From 63a8ba81f3ef46b3711d78fd6026a91d0830dccf Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 20 Aug 2020 20:18:49 -0400 Subject: [PATCH 158/476] Added picard_fix_mate_information version 4.1.8.1 Resolves: #60 --- picard_fix_mate_information_4.1.8.1/README.md | 71 ++++++++ .../example_inputs.yaml | 12 ++ .../picard_fix_mate_information_4.1.8.1.cwl | 166 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 picard_fix_mate_information_4.1.8.1/README.md create mode 100644 picard_fix_mate_information_4.1.8.1/example_inputs.yaml create mode 100644 picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl diff --git a/picard_fix_mate_information_4.1.8.1/README.md b/picard_fix_mate_information_4.1.8.1/README.md new file mode 100644 index 00000000..2c766dcc --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/README.md @@ -0,0 +1,71 @@ +# CWL for running Picard - FixMateInformation + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` diff --git a/picard_fix_mate_information_4.1.8.1/example_inputs.yaml b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..1d9e4ee2 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml @@ -0,0 +1,12 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl new file mode 100644 index 00000000..78e383a7 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -0,0 +1,166 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_4_1_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 From 28541848d405ba8d6a4d080d6ae44cb0a8241ac7 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Thu, 20 Aug 2020 20:21:35 -0400 Subject: [PATCH 159/476] Added abra2 version 2.22 Resolves: #54 --- abra2_2.22/README.md | 21 ++++ abra2_2.22/abra2_2.22.cwl | 203 ++++++++++++++++++++++++++++++++ abra2_2.22/container/Dockerfile | 40 +++++++ abra2_2.22/example_inputs.yaml | 30 +++++ 4 files changed, 294 insertions(+) create mode 100644 abra2_2.22/README.md create mode 100644 abra2_2.22/abra2_2.22.cwl create mode 100644 abra2_2.22/container/Dockerfile create mode 100644 abra2_2.22/example_inputs.yaml diff --git a/abra2_2.22/README.md b/abra2_2.22/README.md new file mode 100644 index 00000000..6e518dd8 --- /dev/null +++ b/abra2_2.22/README.md @@ -0,0 +1,21 @@ +# CWL and Dockerfile for running ABRA2 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| openjdk | 8 | - | +| ABRA2 | 2.22 | https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own license badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml +``` + diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl new file mode 100644 index 00000000..40711a51 --- /dev/null +++ b/abra2_2.22/abra2_2.22.cwl @@ -0,0 +1,203 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_22 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + inputBinding: + position: 0 + prefix: '--threads' + - id: input_bam + type: + - File + - type: array + items: File + inputBinding: + position: 0 + prefix: '--in' + doc: Required list of input sam or bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: working_directory + type: Directory? + inputBinding: + position: 0 + prefix: '--tmpdir' + doc: Set the temp directory (overrides java.io.tmpdir) + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '--ref' + doc: Genome reference location + secondaryFiles: + - ^.fai + - id: targets + type: File + inputBinding: + position: 0 + prefix: '--targets' + - id: kmer_size + type: string? + inputBinding: + position: 0 + prefix: '--kmer' + doc: >- + Optional assembly kmer size(delimit with commas if multiple sizes + specified) + - id: maximum_average_depth + type: int? + inputBinding: + position: 0 + prefix: '--mad' + doc: >- + Regions with average depth exceeding this value will be downsampled + (default: 1000) + - id: soft_clip_contig + type: string? + inputBinding: + position: 0 + prefix: '--sc' + doc: >- + Soft clip contig args + [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len] + (default:16,13,80,15) + - id: maximum_mixmatch_rate + type: float? + inputBinding: + position: 0 + prefix: '--mmr' + doc: >- + Max allowed mismatch rate when mapping reads back to contigs (default: + 0.05) + - id: scoring_gap_alignments + type: string? + inputBinding: + position: 0 + prefix: '--sga' + doc: >- + Scoring used for contig alignments(match, + mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1) + - id: contig_anchor + type: string? + inputBinding: + position: 0 + prefix: '--ca' + doc: >- + Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge] + (default:10,2) + - id: window_size + type: string? + inputBinding: + position: 0 + prefix: '--ws' + doc: |- + Processing window size and overlap + (size,overlap) (default: 400,200) + - id: consensus_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--cons' + doc: Use positional consensus sequence when aligning high quality soft clipping + - id: output_bams + type: + - string + - type: array + items: string + inputBinding: + position: 0 + prefix: '--out' + doc: Required list of output sam or bam file (s) separated by comma + - id: ignore_bad_assembly + type: boolean? + inputBinding: + position: 0 + prefix: '--ignore-bad-assembly' + doc: Use this option to avoid parsing errors for corrupted assemblies + - id: bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--index' + doc: >- + Enable BAM index generation when outputting sorted alignments (may require + additonal memory) + - id: input_vcf + type: File? + inputBinding: + position: 0 + prefix: '--in-vcf' + doc: >- + VCF containing known (or suspected) variant sites. Very large files + should be avoided. + - id: no_edge_complex_indel + type: boolean? + inputBinding: + position: 0 + prefix: '--no-edge-ci' + doc: >- + Prevent output of complex indels at read start or read end + - id: no_sort + type: boolean? + inputBinding: + position: 0 + prefix: '--nosort' + doc: Do not attempt to sort final output +outputs: + - id: realigned_bam + type: + - 'null' + - File + - type: array + items: File + outputBinding: + glob: | + *abra.bam + secondaryFiles: + - ^.bai +label: abra2_2.22 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/abra2.jar +requirements: + - class: ResourceRequirement + ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" + coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" + - class: DockerRequirement + dockerPull: 'aphoid/abra2:2.22' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': abra2 + 'doap:revision': 2.22 diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile new file mode 100644 index 00000000..e064f3d6 --- /dev/null +++ b/abra2_2.22/container/Dockerfile @@ -0,0 +1,40 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG JAVA_VERSION=8 +ARG ABRA2_VERSION=2.22 + +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ + org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/" + +LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ + chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar \ No newline at end of file diff --git a/abra2_2.22/example_inputs.yaml b/abra2_2.22/example_inputs.yaml new file mode 100644 index 00000000..928aafd8 --- /dev/null +++ b/abra2_2.22/example_inputs.yaml @@ -0,0 +1,30 @@ +bam_index: true +no_edge_complex_indel: true +consensus_sequence: +contig_anchor: +ignore_bad_assembly: +input_bam: + class: File + path: "path/to/alignment.bam" +input_vcf: +kmer_size: +maximum_average_depth: +maximum_mixmatch_rate: +memory_overhead: +memory_per_job: +no_sort: +number_of_threads: +output_bam: name_of_realigned_abra.bam +path_to_abra: +reference_fasta: + class: File + path: "/path/to/reference.fasta" +scoring_gap_alignments: +soft_clip_contig: +targets: + class: File + metadata: {} + path: "/path/to/target.bed" + secondaryFiles: [] +window_size: +working_directory: From 866d4c873a7c2c1e6c7e1c7ea8b7fee74f40bf26 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 21 Aug 2020 13:15:19 -0400 Subject: [PATCH 160/476] added sam to fastq files --- docs/SUMMARY.md | 1 + gatk_sam_to_fastq_4.1.8.0/README.md | 175 +++++++++ gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml | 32 ++ .../gatk_sam_to_fastq_4.1.8.0.cwl | 340 ++++++++++++++++++ 4 files changed, 548 insertions(+) create mode 100644 gatk_sam_to_fastq_4.1.8.0/README.md create mode 100644 gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml create mode 100644 gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c997b150..d8229ceb 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -13,6 +13,7 @@ - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) - Manta - [Manta v1.5.1](../manta_1.5.1/README.md) - Marianas diff --git a/gatk_sam_to_fastq_4.1.8.0/README.md b/gatk_sam_to_fastq_4.1.8.0/README.md new file mode 100644 index 00000000..491eac17 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/README.md @@ -0,0 +1,175 @@ +# CWL for running GATK - SamToFastq + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_sam_to_fastq_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_sam_to_fastq_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT [--fastq FASTQ] + [--clipping_action CLIPPING_ACTION] + [--clipping_attribute CLIPPING_ATTRIBUTE] + [--clipping_min_length CLIPPING_MIN_LENGTH] + [--compress_outputs_per_rg] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--include_non_pf_reads] + [--include_non_primary_alignments] + [--interleave] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_dir OUTPUT_DIR] + [--create_md5_file] [--output_per_rg] + [--quality QUALITY] [--re_reverse] + [--read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE] + [--read1_trim READ1_TRIM] + [--read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE] + [--read2_trim READ2_TRIM] + [--reference_sequence REFERENCE_SEQUENCE] + [--rg_tag RG_TAG] + [--second_end_fastq SECOND_END_FASTQ] + [--unpaired_fastq UNPAIRED_FASTQ] + [--validation_stringency VALIDATION_STRINGENCY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input SAM/BAM file to extract reads from Required. + --fastq FASTQ Output FASTQ file (single-end fastq or, if paired, + first end of the pair FASTQ). Required. Cannot be used + in conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) OUTPUT_DIR (ODIR) + --clipping_action CLIPPING_ACTION + The action that should be taken with clipped reads: + 'X' means the reads and qualities should be trimmed at + the clipped position; 'N' means the bases should be + changed to Ns in the clipped region; and any integer + means that the base qualities should be set to that + value in the clipped region. Default value: null. + --clipping_attribute CLIPPING_ATTRIBUTE + The attribute that stores the position at which the + SAM record should be clipped Default value: null. + --clipping_min_length CLIPPING_MIN_LENGTH + When performing clipping with the CLIPPING_ATTRIBUTE + and CLIPPING_ACTION parameters, ensure that the + resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read + is shorter than CLIPPING_MIN_LENGTH then the original + read length will be maintained. Default value: 0. + --compress_outputs_per_rg + Compress output FASTQ files per read group using gzip + and append a .gz extension to the file names. Default + value: false. Possible values: {true, false} Cannot be + used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --include_non_pf_reads + Include non-PF reads from the SAM file into the output + FASTQ files. PF means 'passes filtering'. Reads whose + 'not passing quality controls' flag is set are non-PF + reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} + --include_non_primary_alignments + If true, include non-primary alignments in the output. + Support of non-primary alignments in SamToFastq is not + comprehensive, so there may be exceptions if this is + set to true and there are paired reads with non- + primary alignments. Default value: false. Possible + values: {true, false} + --interleave Will generate an interleaved fastq if paired, each + line will have /1 or /2 to describe which end it came + from Default value: false. Possible values: {true, + false} + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_dir OUTPUT_DIR + Directory in which to output the FASTQ file(s). Used + only when OUTPUT_PER_RG is true. Default value: null. + Cannot be used in conjunction with argument(s) FASTQ + (F). + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false}. + --output_per_rg Output a FASTQ file per read group (two FASTQ files + per read group if the group is paired). Default value: + false. Possible values: {true, false} Cannot be used + in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --quality QUALITY End-trim reads using the phred/bwa quality trimming + algorithm and this quality. Default value: null. + --re_reverse Re-reverse bases and qualities of reads with negative + strand flag set before writing them to FASTQ Default + value: true. Possible values: {true, false} + --read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 1 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read1_trim READ1_TRIM + The number of bases to trim from the beginning of read + 1. Default value: 0. + --read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 2 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read2_trim READ2_TRIM + The number of bases to trim from the beginning of read + 2. Default value: 0. + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --rg_tag RG_TAG The read group tag (PU or ID) to be used to output a + FASTQ file per read group. Default value: PU. + --second_end_fastq SECOND_END_FASTQ + Output FASTQ file (if paired, second end of the pair + FASTQ). Default value: null. Cannot be used in + conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --unpaired_fastq UNPAIRED_FASTQ + Output FASTQ file for unpaired reads; may only be + provided in paired-FASTQ mode Default value: null. + Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} +``` diff --git a/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..aa1c7e46 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml @@ -0,0 +1,32 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" +fastq: null +clipping_action: null +clipping_attribute: null +clipping_min_length: null +compress_outputs_per_rg: null +compression_level: null +create_index: null +include_non_pf_reads: null +include_non_primary_alignments: null +interleave: null +max_records_in_ram: null +output_dir: null +create_md5_file: null +output_per_rg: null +quality: null +re_reverse: null +read1_max_bases_to_write: null +read1_trim: null +read2_max_bases_to_write: null +read2_trim: null +reference_sequence: null +rg_tag: null +second_end_fastq: null +unpaired_fastq: null +validation_stringency: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl new file mode 100644 index 00000000..c835d69d --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -0,0 +1,340 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_sam_to_fastq_4_1_8_0 +label: GATK-SamToFastq +baseCommand: + - gatk + - SamToFastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: --INPUT + doc: Input SAM/BAM file to extract reads from Required. + - id: fastq + type: string? + inputBinding: + position: 0 + prefix: --FASTQ + doc: >- + Output FASTQ file (single-end fastq or, if paired, first end of the pair FASTQ). + Required. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) OUTPUT_DIR (ODIR) + - id: clipping_action + type: string? + inputBinding: + position: 0 + prefix: --CLIPPING_ACTION + doc: >- + The action that should be taken with clipped reads: 'X' means the reads and qualities + should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in + the clipped region; and any integer means that the base qualities should be set to that + value in the clipped region. Default value: null. + - id: clipping_attribute + type: string? + inputBinding: + position: 0 + prefix: --CLIPPING_ATTRIBUTE + doc: >- + The attribute that stores the position at which the SAM record should be clipped Default value: null. + - id: clipping_min_length + type: int? + inputBinding: + position: 0 + prefix: --CLIPPING_MIN_LENGTH + doc: >- + When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION parameters, + ensure that the resulting reads after clipping are at least CLIPPING_MIN_LENGTH bases + long. If the original read is shorter than CLIPPING_MIN_LENGTH then the original read + length will be maintained. Default value: 0. + - id: compress_outputs_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: --COMPRESS_OUTPUTS_PER_RG + doc: >- + Compress output FASTQ files per read group using gzip and append a .gz extension to the + file names. Default value: false. Possible values: {true, false} Cannot be used in + conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: --COMPRESSION_LEVEL + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 0 + prefix: --INCLUDE_NON_PF_READS + doc: >- + Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes + filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads. See + GATK Dictionary for more info. Default value: false. Possible values: {true, false} + - id: include_non_primary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: --INCLUDE_NON_PRIMARY_ALIGNMENTS + doc: >- + If true, include non-primary alignments in the output. Support of non-primary alignments + in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and + there are paired reads with non-primary alignments. Default value: false. Possible + values: {true, false} + - id: interleave + type: boolean? + inputBinding: + position: 0 + prefix: --INTERLEAVE + doc: >- + Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe + which end it came from Default value: false. Possible values: {true, false} + - id: max_records_in_ram + default: 50000 + type: int? + inputBinding: + position: 0 + prefix: --MAX_RECORDS_IN_RAM + doc: >- + When writing files that need to be sorted, this will specify the number of records stored + in RAM before spilling to disk. Increasing this number reduces the number of file handles + needed to sort the file, and increases the amount of RAM needed. Default value: 500000. + - id: output_dir + type: string? + inputBinding: + position: 0 + prefix: --OUTPUT_DIR + doc: >- + Directory in which to output the FASTQ file(s). Used only when OUTPUT_PER_RG is true. + Default value: null. Cannot be used in conjunction with argument(s) FASTQ (F). + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_MD5_FILE + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false}. + - id: output_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: --OUTPUT_PER_RG + doc: >- + Output a FASTQ file per read group (two FASTQ files per read group if the group is + paired). Default value: false. Possible values: {true, false} Cannot be used in + conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + - id: quality + type: int? + inputBinding: + position: 0 + prefix: --QUALITY + doc: >- + End-trim reads using the phred/bwa quality trimming algorithm and this quality. Default value: null. + - id: re_reverse + type: boolean? + inputBinding: + position: 0 + prefix: --RE_REVERSE + doc: >- + Re-reverse bases and qualities of reads with negative strand flag set before writing them + to FASTQ Default value: true. Possible values: {true, false} + - id: read1_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: --READ1_MAX_BASES_TO_WRITE + doc: >- + The maximum number of bases to write from read 1 after trimming. If there are fewer than + this many bases left after trimming, all will be written. If this value is null then all + bases left after trimming will be written. Default value: null. + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: --READ1_TRIM + doc: >- + The number of bases to trim from the beginning of read 1. Default value: 0. + - id: read2_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: --READ2_MAX_BASES_TO_WRITE + doc: >- + The maximum number of bases to write from read 2 after trimming. If there are fewer than + this many bases left after trimming, all will be written. If this value is null then all + bases left after trimming will be written. Default value: null. + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: --READ2_TRIM + doc: >- + The number of bases to trim from the beginning of read 2. Default value: 0. + - id: reference_sequence + type: File? + inputBinding: + position: 0 + prefix: --REFERENCE_SEQUENCE + doc: >- + Reference sequence file. Default value: null. + - id: rg_tag + type: string? + inputBinding: + position: 0 + prefix: --RG_TAG + doc: >- + The read group tag (PU or ID) to be used to output a FASTQ file per read group. Default + value: PU. + - id: second_end_fastq + type: string? + inputBinding: + position: 0 + prefix: --SECOND_END_FASTQ + doc: >- + Output FASTQ file (if paired, second end of the pair FASTQ). Default value: null. Cannot + be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG + (GZOPRG) + - id: unpaired_fastq + type: string? + inputBinding: + position: 0 + prefix: --UNPAIRED_FASTQ + doc: >- + Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ mode Default + value: null. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting stringency to + SILENT can improve performance when processing a BAM file in which variable-length data + (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} +outputs: + - id: fastq_output + type: File? + outputBinding: + glob: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') + } + } + - id: unpaired_fastq_output + type: File? + outputBinding: + glob: |- + ${ + if(inputs.unpaired_fastq){ + return inputs.unpaired_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '-unpaired.fastq') + } + } + - id: second_end_fastq_output + type: File? + outputBinding: + glob: |- + ${ + if(inputs.second_end_fastq){ + return inputs.second_end_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '-R2.fastq') + } + } + +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . + - position: 2 + prefix: '--FASTQ' + valueFrom: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From 0826aca1e7374b87601850ea93ba9b1acedcd86c Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 21 Aug 2020 13:18:52 -0400 Subject: [PATCH 161/476] added CollectDuplexSeqMetrics to summary.md --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index b3521887..d6463a0e 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -10,6 +10,7 @@ - [v1.0.0](../disambiguate_1.0.0/README.md) - Fgbio - [CallDuplexConsensusReads v1.2.0](../fgbio_call_duplex_consensus_reads_1.2.0/README.md) + - [CollectDuplexSeqMetrics v1.2.0](../fgbio_collect_duplex_seq_metrics_1.2.0/README.md) - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) - GATK From 8e591d3f0cba81d0f76d36ab7c07e6cf86ab6ff2 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 21 Aug 2020 15:09:51 -0400 Subject: [PATCH 162/476] update output file names --- gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index c835d69d..8eb50df9 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -238,7 +238,7 @@ inputs: (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, SILENT} outputs: - - id: fastq_output + - id: gatk_sam_to_fastq_fastq_output type: File? outputBinding: glob: |- @@ -249,7 +249,7 @@ outputs: return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') } } - - id: unpaired_fastq_output + - id: gatk_sam_to_fastq_unpaired_fastq_output type: File? outputBinding: glob: |- @@ -260,7 +260,7 @@ outputs: return inputs.input.basename.replace(/.bam|.sam/, '-unpaired.fastq') } } - - id: second_end_fastq_output + - id: gatk_sam_to_fastq_second_end_fastq_output type: File? outputBinding: glob: |- From f9ec1d9aa2acea721548208b27973b722156e6fe Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 21 Aug 2020 15:44:05 -0400 Subject: [PATCH 163/476] add example inputs --- bwa_mem_0.7.17/example_inputs.yaml | 10 ++++++++++ picard_fix_mate_information_2.9.0/example_inputs.yaml | 5 +++++ picard_mark_duplicates_2.9.0/example_inputs.yaml | 7 +++++++ 3 files changed, 22 insertions(+) create mode 100644 bwa_mem_0.7.17/example_inputs.yaml create mode 100644 picard_fix_mate_information_2.9.0/example_inputs.yaml create mode 100644 picard_mark_duplicates_2.9.0/example_inputs.yaml diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml new file mode 100644 index 00000000..e63510d9 --- /dev/null +++ b/bwa_mem_0.7.17/example_inputs.yaml @@ -0,0 +1,10 @@ +reads: +- class: File + path: "path/to/fastq_R1.fastq" +- class: File + path: "path/to/fastq_R2.fastq" +reference_fasta: + class: File + path: "/path/to/reference.fasta" +sample_id: test_sample_id +lane_id: test_lane_id diff --git a/picard_fix_mate_information_2.9.0/example_inputs.yaml b/picard_fix_mate_information_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..91d8e497 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/example_inputs.yaml @@ -0,0 +1,5 @@ +create_bam_index: true +input: + class: File + path: "/path/to/sample_id.bam" +output_file_name: sample_id_fm.bam diff --git a/picard_mark_duplicates_2.9.0/example_inputs.yaml b/picard_mark_duplicates_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..234d0b62 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/example_inputs.yaml @@ -0,0 +1,7 @@ +create_bam_index: true +duplication_metrics: mark_duplicates_md.metrics +input: + class: File + path: "path/to/sample_id.bam" +optical_duplicate_pixel_distance: 2500 +output: mark_duplicates_md.bam From 8cd03ab51f5007035bf7266a553045f1f588d2d3 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 21 Aug 2020 15:44:27 -0400 Subject: [PATCH 164/476] reformat memory_per_job argument --- .../picard_mark_duplicates_2.9.0.cwl | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl index f65e1e6b..b6e115b5 100644 --- a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl @@ -132,7 +132,31 @@ outputs: label: picard_mark_duplicates_2.9.0 arguments: - position: 0 - valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/picard.jar From 1c96acc2e13573f9d5068a22859cc21d6080562b Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 21 Aug 2020 16:22:00 -0400 Subject: [PATCH 165/476] readme for BWA mem 0.7.17 --- bwa_mem_0.7.17/README.md | 106 ++++++++++++++++++++++++++++++ bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 41 ++++++++++-- 2 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 bwa_mem_0.7.17/README.md diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md new file mode 100644 index 00000000..6de3155c --- /dev/null +++ b/bwa_mem_0.7.17/README.md @@ -0,0 +1,106 @@ +# CWL and Dockerfile for running ABRA2 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 16.04 | - | +| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 | + +[![](https://images.microbadger.com/badges/version/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: bwa_mem_0.7.17/bwa_mem_0.7.17.cwl [-h] --reads READS --reference + REFERENCE --sample_id SAMPLE_ID + [--lane_id LANE_ID] [-A A] [-B B] + [-C] [-E E] [-L L] [-M] [-O O] [-P] + [-S] [-T T] [-U U] [-a] [-c C] [-d D] + [-k K] [-K K] [--output OUTPUT] [-p] + [-r R] [-v V] [-w W] [-y Y] [-D D] + [-W W] [-m M] [-e] [-x X] [-j J] + [--he HE] [-V] [-Y] [-I I] [-t T] + [-R R] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reads READS + --reference REFERENCE + --sample_id SAMPLE_ID + --lane_id LANE_ID + -A A score for a sequence match, which scales options + -TdBOELU unless overridden [1] + -B B penalty for a mismatch [4] + -C append FASTA/FASTQ comment to SAM output + -E E gap extension penalty; a gap of size k cost '{-O} + + {-E}*k' [1,1] + -L L penalty for 5'- and 3'-end clipping [5,5] + -M + -O O gap open penalties for deletions and insertions [6,6] + -P skip pairing; mate rescue performed unless -S also in + use + -S skip mate rescue + -T T minimum score to output [30] + -U U penalty for an unpaired read pair [17] + -a output all alignments for SE or unpaired PE + -c C skip seeds with more than INT occurrences [500] + -d D off-diagonal X-dropoff [100] + -k K minimum seed length [19] + -K K process INT input bases in each batch regardless of + nThreads (for reproducibility) [] + --output OUTPUT + -p smart pairing (ignoring in2.fq) + -r R look for internal seeds inside a seed longer than {-k} + * FLOAT [1.5] + -v V verbosity level: 1=error, 2=warning, 3=message, + 4+=debugging [3] + -w W band width for banded alignment [100] + -y Y seed occurrence for the 3rd round seeding [20] + -D D drop chains shorter than FLOAT fraction of the longest + overlapping chain [0.50] + -W W discard a chain if seeded bases shorter than INT [0] + -m M perform at most INT rounds of mate rescues for each + read [50] + -e + -x X read type. Setting -x changes multiple parameters + unless overridden [null] pacbio: -k17 -W40 -r10 -A1 + -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: -k14 -W20 + -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to + ref) intractg: -B9 -O16 -L5 (intra-species contigs to + ref) + -j J treat ALT contigs as part of the primary assembly + (i.e. ignore .alt file) + --he HE if there are 80% of the max + score, output all in XA [5,200] + -V output the reference FASTA header in the XR tag + -Y use soft clipping for supplementary alignments + -I I + -t T Number of threads + -R R STR read group header line such as '@RG\tID -foo\tSM + -bar' [null] +``` diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index c5436ae0..261302db 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -26,27 +26,32 @@ inputs: - id: lane_id type: string? - id: A + doc: score for a sequence match, which scales options -TdBOELU unless overridden [1] type: int? inputBinding: position: 0 prefix: '-A' - id: B + doc: penalty for a mismatch [4] type: int? inputBinding: position: 0 prefix: '-B' - id: C + doc: append FASTA/FASTQ comment to SAM output type: boolean? inputBinding: position: 0 prefix: '-C' - id: E + doc: gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] type: 'int[]?' inputBinding: position: 0 prefix: '-E' itemSeparator: ',' - id: L + doc: penalty for 5'- and 3'-end clipping [5,5] type: 'int[]?' inputBinding: position: 0 @@ -58,55 +63,62 @@ inputs: position: 0 prefix: '-M' - id: O + doc: gap open penalties for deletions and insertions [6,6] type: 'int[]?' inputBinding: position: 0 prefix: '-O' itemSeparator: ',' - id: P + doc: skip pairing; mate rescue performed unless -S also in use type: boolean? inputBinding: position: 0 prefix: '-P' - id: S + doc: skip mate rescue type: boolean? inputBinding: position: 0 prefix: '-S' - id: T + doc: minimum score to output [30] type: int? inputBinding: position: 0 prefix: '-T' - doc: >- - Don’t output alignment with score lower than INT. This option only affects - output. - id: U + doc: penalty for an unpaired read pair [17] type: int? inputBinding: position: 0 prefix: '-U' - id: a + doc: output all alignments for SE or unpaired PE type: boolean? inputBinding: position: 0 prefix: '-a' - id: c + doc: skip seeds with more than INT occurrences [500] type: int? inputBinding: position: 0 prefix: '-c' - id: d + doc: off-diagonal X-dropoff [100] type: int? inputBinding: position: 0 prefix: '-d' - id: k + doc: minimum seed length [19] type: int? inputBinding: position: 0 prefix: '-k' - id: K + doc: process INT input bases in each batch regardless of nThreads (for reproducibility) [] type: int? inputBinding: position: 0 @@ -114,41 +126,49 @@ inputs: - id: output type: string? - id: p + doc: smart pairing (ignoring in2.fq) type: boolean? inputBinding: position: 0 prefix: '-p' - id: r + doc: look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] type: float? inputBinding: position: 0 prefix: '-r' - id: v + doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]' type: int? inputBinding: position: 0 prefix: '-v' - id: w + doc: band width for banded alignment [100] type: int? inputBinding: position: 0 prefix: '-w' - id: 'y' + doc: seed occurrence for the 3rd round seeding [20] type: int? inputBinding: position: 0 prefix: '-y' - id: D + doc: drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] type: float? inputBinding: position: 0 prefix: '-D' - id: W + doc: discard a chain if seeded bases shorter than INT [0] type: int? inputBinding: position: 0 prefix: '-W' - id: m + doc: perform at most INT rounds of mate rescues for each read [50] type: int? inputBinding: position: 0 @@ -159,11 +179,17 @@ inputs: position: 0 prefix: '-e' - id: x + doc: >- + read type. Setting -x changes multiple parameters unless overridden [null] + pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) + ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) + intractg: -B9 -O16 -L5 (intra-species contigs to ref) type: string? inputBinding: position: 0 prefix: '-x' - id: H + doc: if there are 80% of the max score, output all in XA [5,200] type: - File? - string? @@ -171,29 +197,30 @@ inputs: position: 0 prefix: '-H' - id: j + doc: treat ALT contigs as part of the primary assembly (i.e. ignore .alt file) type: File? inputBinding: position: 0 prefix: '-j' - - id: h + - id: he + doc: if there are 80% of the max score, output all in XA [5,200] type: 'int[]?' inputBinding: position: 0 prefix: '-h' itemSeparator: ',' - id: V + doc: output the reference FASTA header in the XR tag type: boolean? inputBinding: position: 0 prefix: '-V' - id: 'Y' + doc: use soft clipping for supplementary alignments type: boolean? inputBinding: position: 0 prefix: '-Y' - doc: >- - Force soft-clipping rather than default hard-clipping of supplementary - alignments - id: I type: string? inputBinding: From 83201a06021b3b3e979630d35d64678d251ef23b Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 21 Aug 2020 16:32:16 -0400 Subject: [PATCH 166/476] add readmes for picard 2.9.0 tools --- picard_fix_mate_information_2.9.0/README.md | 84 +++++++++++++++++++ picard_mark_duplicates_2.9.0/README.md | 91 +++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 picard_fix_mate_information_2.9.0/README.md create mode 100644 picard_mark_duplicates_2.9.0/README.md diff --git a/picard_fix_mate_information_2.9.0/README.md b/picard_fix_mate_information_2.9.0/README.md new file mode 100644 index 00000000..3485d823 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Picard - FixMateInformation + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.9.0.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_jobStore.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] [--sort_order SORT_ORDER] + [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/picard_mark_duplicates_2.9.0/README.md b/picard_mark_duplicates_2.9.0/README.md new file mode 100644 index 00000000..ece95d84 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/README.md @@ -0,0 +1,91 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.9.0.cwl example_inputs.yaml +``` + +## Usage +``` +usage: picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--duplication_metrics DUPLICATION_METRICS] [--sort_order SORT_ORDER] + [--tmp_dir TMP_DIR] [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--assume_sorted] + [--duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY] + [--optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` From 28c1b3bdfd003716efead50b2388b1e75233aaca Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Mon, 24 Aug 2020 14:49:30 -0400 Subject: [PATCH 167/476] Add Fastp CWL --- fastp_0.20.1/README.md | 84 ++++++++++++++++++++ fastp_0.20.1/example_inputs.yaml | 13 ++++ fastp_0.20.1/fastp_0.20.1.cwl | 127 +++++++++++++++++++++++++++++++ fastp_0.20.1/test_data/R1.fq | 36 +++++++++ fastp_0.20.1/test_data/R2.fq | 36 +++++++++ 5 files changed, 296 insertions(+) create mode 100644 fastp_0.20.1/README.md create mode 100644 fastp_0.20.1/example_inputs.yaml create mode 100644 fastp_0.20.1/fastp_0.20.1.cwl create mode 100644 fastp_0.20.1/test_data/R1.fq create mode 100644 fastp_0.20.1/test_data/R2.fq diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md new file mode 100644 index 00000000..99a1db29 --- /dev/null +++ b/fastp_0.20.1/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Fastp + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu base image | 16.04 | - | +| fastp | 0.20.1 | https://github.com/OpenGene/fastp | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner ./fastp_0.20.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool ./fastp_0.20.1.cwl example_inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/fastp-0_20_1/fastp-0_20_1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage +``` +usage: fastp_0.20.1.cwl [-h] --read1_input READ1_INPUT --read1_output_path + READ1_OUTPUT_PATH [--read2_input READ2_INPUT] + [--read2_output_path READ2_OUTPUT_PATH] + [--unpaired1_path UNPAIRED1_PATH] + [--unpaired2_path UNPAIRED2_PATH] + [--failed_reads_path FAILED_READS_PATH] + [--read1_adapter_sequence READ1_ADAPTER_SEQUENCE] + [--read2_adapter_sequence READ2_ADAPTER_SEQUENCE] + [--minimum_read_length MINIMUM_READ_LENGTH] + --json_output_path JSON_OUTPUT_PATH --html_output_path + HTML_OUTPUT_PATH + [job_order] + +Setup and execute Fastp + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --read1_input READ1_INPUT + read1 input file name + --read1_output_path READ1_OUTPUT_PATH + read1 output file name + --read2_input READ2_INPUT + read2 input file name, for PE data + --read2_output_path READ2_OUTPUT_PATH + read2 output file name + --unpaired1_path UNPAIRED1_PATH + for PE input, if read1 passed QC but read2 not, it + will be written to unpaired1. + --unpaired2_path UNPAIRED2_PATH + for PE input, if read2 passed QC but read1 not, it + will be written to unpaired2. + --failed_reads_path FAILED_READS_PATH + specify the file to store reads that cannot pass the + filters. + --read1_adapter_sequence READ1_ADAPTER_SEQUENCE + the adapter for read1. For SE data, if not specified, + the adapter will be auto-detected. For PE data, this + is used if R1/R2 are found not overlapped. + --read2_adapter_sequence READ2_ADAPTER_SEQUENCE + the adapter for read2. For PE data, this is used if + R1/R2 are found not overlapped. + --minimum_read_length MINIMUM_READ_LENGTH + reads shorter than length_required will be discarded, + default is 15. + --json_output_path JSON_OUTPUT_PATH + the json format report file name + --html_output_path HTML_OUTPUT_PATH + the html format report file name +``` diff --git a/fastp_0.20.1/example_inputs.yaml b/fastp_0.20.1/example_inputs.yaml new file mode 100644 index 00000000..5d3af3bf --- /dev/null +++ b/fastp_0.20.1/example_inputs.yaml @@ -0,0 +1,13 @@ +read1_input: + class: File + path: "./test_data/R1.fq" +read2_input: + class: File + path: "./test_data/R2.fq" +read1_output_path: "./R1.output" +read2_output_path: "./R2.output" +read1_adapter_sequence: "GATCGGAAGAGC" +read2_adapter_sequence: "AGATCGGAAGAGC" +minimum_read_length: 25 +json_output_path: "sample_name.json" +html_output_path: "sample_name.html" diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl new file mode 100644 index 00000000..0f204675 --- /dev/null +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -0,0 +1,127 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fastp_0_20_1 +baseCommand: + - fastp +inputs: + - id: read1_input + type: File + inputBinding: + prefix: '--in1' + doc: | + read1 input file name + - id: read1_output_path + type: string + inputBinding: + prefix: '--out1' + doc: | + read1 output file name + - id: read2_input + type: File? + inputBinding: + prefix: '--in2' + doc: | + read2 input file name, for PE data + - id: read2_output_path + type: string? + inputBinding: + prefix: '--out2' + doc: | + read2 output file name + - id: unpaired1_path + type: string? + inputBinding: + prefix: '--unpaired1' + doc: | + for PE input, if read1 passed QC but read2 not, it will be written to unpaired1. + - id: unpaired2_path + type: string? + inputBinding: + prefix: '--unpaired2' + doc: | + for PE input, if read2 passed QC but read1 not, it will be written to unpaired2. + - id: failed_reads_path + type: string? + inputBinding: + prefix: '--failed_out' + doc: | + specify the file to store reads that cannot pass the filters. + - id: read1_adapter_sequence + type: string? + inputBinding: + prefix: '--adapter_sequence' + doc: | + the adapter for read1. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped. + - id: read2_adapter_sequence + type: string? + inputBinding: + prefix: '--adapter_sequence_r2' + doc: | + the adapter for read2. For PE data, this is used if R1/R2 are found not overlapped. + - id: minimum_read_length + type: int? + inputBinding: + prefix: '--length_required' + doc: | + reads shorter than length_required will be discarded, default is 15. + + - id: json_output_path + type: string + inputBinding: + prefix: '--json' + doc: | + the json format report file name + - id: html_output_path + type: string + inputBinding: + prefix: '--html' + doc: | + the html format report file name + +outputs: + - id: json_output + type: File + outputBinding: + glob: $(inputs.json_output_path) + - id: html_output + type: File + outputBinding: + glob: $(inputs.html_output_path) + - id: read1_output + type: File + outputBinding: + glob: $(inputs.read1_output_path) + - id: read2_output + type: File? + outputBinding: + glob: $(inputs.read2_output_path) + +doc: Setup and execute Fastp +label: fastp_0.20.1 +requirements: + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fastp + 'doap:revision': 0.20.1 diff --git a/fastp_0.20.1/test_data/R1.fq b/fastp_0.20.1/test_data/R1.fq new file mode 100644 index 00000000..d7ca9082 --- /dev/null +++ b/fastp_0.20.1/test_data/R1.fq @@ -0,0 +1,36 @@ +@AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA + ++ + +@AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA +TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT ++ +AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEECFE####EEEE6EE Date: Tue, 25 Aug 2020 10:09:46 -0400 Subject: [PATCH 168/476] update cwl authors and docker path --- fastp_0.20.1/README.md | 4 ++-- fastp_0.20.1/fastp_0.20.1.cwl | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md index 99a1db29..10e9866a 100644 --- a/fastp_0.20.1/README.md +++ b/fastp_0.20.1/README.md @@ -4,8 +4,8 @@ | Tool | Version | Location | |--- |--- |--- | -| ubuntu base image | 16.04 | - | -| fastp | 0.20.1 | https://github.com/OpenGene/fastp | +| fastp | 0.20.1 | quay.io/biocontainers/fastp:0.20.1--h8b12597_0 | + ## CWL diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 0f204675..0c39acd8 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -111,8 +111,11 @@ requirements: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:fraihaa@mskcc.org' + 'foaf:name': Adrian Fraiha 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' From d1e7a6b5f0cd0f617874c78c938ea47e0694a0d1 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Tue, 25 Aug 2020 10:10:20 -0400 Subject: [PATCH 169/476] remove test data --- fastp_0.20.1/test_data/R1.fq | 36 ------------------------------------ fastp_0.20.1/test_data/R2.fq | 36 ------------------------------------ 2 files changed, 72 deletions(-) delete mode 100644 fastp_0.20.1/test_data/R1.fq delete mode 100644 fastp_0.20.1/test_data/R2.fq diff --git a/fastp_0.20.1/test_data/R1.fq b/fastp_0.20.1/test_data/R1.fq deleted file mode 100644 index d7ca9082..00000000 --- a/fastp_0.20.1/test_data/R1.fq +++ /dev/null @@ -1,36 +0,0 @@ -@AS500713:64:HFKJJBGXY:1:11101:1675:1101 1:A:0:TATAGCCT+GACCCCCA - -+ - -@AS500713:64:HFKJJBGXY:1:11101:17113:1101 1:A:0:TATAGCCT+GTTTCTTA -TACAAAATGCACATCGCTGAAAGGGGTAAAGGAGAGAAATCGCTTTATAAAACCTTGAAAAGGAATATTCAAATATAAGCTGGGAAGGTATAAAAAACTCTGTACATCACAAGTAAACAAATGGAACCTGCAAAATATTAAACAAAGGATT -+ -AAAAAEEEEE6EEAAAEEEEE6EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEEEEEEEEEECFE####EEEE6EE Date: Tue, 25 Aug 2020 16:16:21 -0400 Subject: [PATCH 170/476] Adding Fgbio FilterConsensusReads :heavy_check_mark: CWL :heavy_check_mark: Readme :heavy_check_mark: example yaml :heavy_check_mark: test with cwltool of help :heavy_check_mark: Update on summary.md Resolves #58 --- docs/SUMMARY.md | 1 + fgbio_filter_consensus_reads_1.2.0/README.md | 80 ++++++ .../example_inputs.yaml | 17 ++ .../fgbio_filter_consensus_reads_1.2.0.cwl | 230 ++++++++++++++++++ 4 files changed, 328 insertions(+) create mode 100644 fgbio_filter_consensus_reads_1.2.0/README.md create mode 100644 fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml create mode 100644 fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d6463a0e..a72bd274 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -13,6 +13,7 @@ - [CollectDuplexSeqMetrics v1.2.0](../fgbio_collect_duplex_seq_metrics_1.2.0/README.md) - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) + - [FilterConsensusReads v1.2.0](../fgbio_filter_consensus_reads_1.2.0/README.md) - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) diff --git a/fgbio_filter_consensus_reads_1.2.0/README.md b/fgbio_filter_consensus_reads_1.2.0/README.md new file mode 100644 index 00000000..496a048b --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/README.md @@ -0,0 +1,80 @@ +# CWL for running Fgbio - FilterConsensusReads + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash + +usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + --reference_fasta + REFERENCE_FASTA + [--reverse_per_base_tags] + [--min_reads MIN_READS] + [--max_read_error_rate MAX_READ_ERROR_RATE] + [--max_base_error_rate MAX_BASE_ERROR_RATE] + [--min_base_quality MIN_BASE_QUALITY] + [--max_no_call_fraction MAX_NO_CALL_FRACTION] + [--min_mean_base_quality MIN_MEAN_BASE_QUALITY] + [--require_single_strand_agreement] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --reference_fasta REFERENCE_FASTA + Reference fasta file. + --reverse_per_base_tags + Reverse [complement] per base tags on reverse strand + reads. + --min_reads MIN_READS + The minimum number of reads supporting a consensus + base/read. (Max 3 values) + --max_read_error_rate MAX_READ_ERROR_RATE + The maximum raw-read error rate across the entire + consensus read. (Max 3 values) + --max_base_error_rate MAX_BASE_ERROR_RATE + The maximum error rate for a single consensus base. + (Max 3 values) + --min_base_quality MIN_BASE_QUALITY + Mask (make N) consensus bases with quality less than + this threshold. + --max_no_call_fraction MAX_NO_CALL_FRACTION + Maximum fraction of no-calls in the read after + filtering + --min_mean_base_quality MIN_MEAN_BASE_QUALITY + The minimum mean base quality across the consensus + read + --require_single_strand_agreement + Mask (make N) consensus bases where the AB and BA + consensus reads disagree (for duplex-sequencing only). +``` diff --git a/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..afe8426d --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +input: /path/to/bam_file +max_base_error_rate: null +max_no_call_fraction: null +max_read_error_rate: null +memory_overhead: null +memory_per_job: null +min_base_quality: null +min_mean_base_quality: null +min_reads: + - 2 + - 2 + - 1 +number_of_threads: null +output_file_name: test.bam +reference_fasta: /path/to/reference_fasta +require_single_strand_agreement: true +reverse_per_base_tags: null diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..0ea3496c --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -0,0 +1,230 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_filter_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '--ref' + doc: Reference fasta file. + secondaryFiles: + - ^.fai + - ^.dict + - id: reverse_per_base_tags + type: boolean? + inputBinding: + position: 0 + prefix: '--reverse-per-base-tags' + doc: 'Reverse [complement] per base tags on reverse strand reads.' + - id: min_reads + type: 'int[]?' + inputBinding: + position: 0 + prefix: '--min-reads' + itemSeparator: ' ' + doc: >- + The minimum number of reads supporting a consensus base/read. (Max 3 + values) + - id: max_read_error_rate + type: 'float[]?' + inputBinding: + position: 0 + prefix: '--max-read-error-rate' + itemSeparator: ' ' + doc: >- + The maximum raw-read error rate across the entire consensus read. (Max 3 + values) + - id: max_base_error_rate + type: 'float[]?' + inputBinding: + position: 0 + prefix: '--max-base-error-rate' + itemSeparator: ' ' + doc: The maximum error rate for a single consensus base. (Max 3 values) + - id: min_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-base-quality' + doc: Mask (make N) consensus bases with quality less than this threshold. + - id: max_no_call_fraction + type: float? + inputBinding: + position: 0 + prefix: '--max-no-call-fraction' + doc: Maximum fraction of no-calls in the read after filtering + - id: min_mean_base_quality + type: File? + inputBinding: + position: 0 + prefix: '--min-mean-base-quality' + doc: The minimum mean base quality across the consensus read + - id: require_single_strand_agreement + type: boolean? + inputBinding: + position: 0 + prefix: '--require-single-strand-agreement' + doc: >- + Mask (make N) consensus bases where the AB and BA consensus reads disagree + (for duplex-sequencing only). +outputs: + - id: filter_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } +doc: >- + Filters consensus reads generated by CallMolecularConsensusReads or + CallDuplexConsensusReads. Two kinds of filtering are performed: + + + 1. Masking/filtering of individual bases in reads + + 2. Filtering out of reads (i.e. not writing them to the output file) + + + Base-level filtering/masking is only applied if per-base tags are present (see + CallDuplexConsensusReads and CallMolecularConsensusReads for descriptions of + these tags). Read-level filtering is always applied. When filtering reads, + secondary alignments and supplementary records may be removed independently if + they fail one or more filters; if either R1 or R2 primary alignments fail a + filter then all records for the template will be filtered out. + + + The filters applied are as follows: + + + 1. Reads with fewer than min-reads contributing reads are filtered out + + 2. Reads with an average consensus error rate higher than max-read-error-rate + are filtered out + + 3. Reads with mean base quality of the consensus read, prior to any masking, + less than min-mean-base-quality are filtered out (if specified) + + 4. Bases with quality scores below min-base-quality are masked to Ns + + 5. Bases with fewer than min-reads contributing raw reads are masked to Ns + + 6. Bases with a consensus error rate (defined as the fraction of contributing + reads that voted for a different base than the consensus call) higher than + max-base-error-rate are masked to Ns + + 7. For duplex reads, if require-single-strand-agreement is provided, masks to + Ns any bases where the base was observed in both single-strand consensus reads + and the two reads did not agree + + 8. Reads with a proportion of Ns higher than max-no-call-fraction after + per-base filtering are filtered out +label: fgbio_filter_consensus_reads_1.2.0 +arguments: + - position: 0 + prefix: '' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx10G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + shellQuote: false + valueFrom: '${ return runtime.tmpdir}' + - position: 0 + prefix: '' + valueFrom: FilterConsensusReads + - position: 0 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 4000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio FilterConsensusReads + 'doap:revision': 1.2.0 From 7ebeb1f023aaff89f6a1462114b1f47510a6402b Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 26 Aug 2020 16:11:30 -0400 Subject: [PATCH 171/476] add cwl, readme, example input --- gatk_merge_bam_alignment_4.1.8.0/README.md | 245 ++++++++++ .../example_inputs.yaml | 44 ++ .../gatk_merge_bam_alignment_4.1.8.0.cwl | 420 ++++++++++++++++++ 3 files changed, 709 insertions(+) create mode 100644 gatk_merge_bam_alignment_4.1.8.0/README.md create mode 100644 gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml create mode 100644 gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl diff --git a/gatk_merge_bam_alignment_4.1.8.0/README.md b/gatk_merge_bam_alignment_4.1.8.0/README.md new file mode 100644 index 00000000..4bd55029 --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/README.md @@ -0,0 +1,245 @@ +# CWL for running GATK - MergeBamAlignment + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_bam_alignment_4.1.8.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --unmapped_bam UNMAPPED_BAM + --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_mate_cigar] + [--add_pg_tag_to_reads] + [--aligned_bam ALIGNED_BAM] + [--aligned_reads_only] + [--aligner_proper_pair_flags] + [--attributes_to_remove ATTRIBUTES_TO_REMOVE] + [--attributes_to_retain ATTRIBUTES_TO_RETAIN] + [--attributes_to_reverse ATTRIBUTES_TO_REVERSE] + [--attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT] + [--clip_adapters] + [--clip_overlapping_reads] + [--expected_orientations EXPECTED_ORIENTATIONS] + [--hard_clip_overlapping_reads] + [--include_secondary_alignments] + [--is_bisulfite_sequence] + [--jump_size JUMP_SIZE] + [--matching_dictionary_tags MATCHING_DICTIONARY_TAGS] + [--max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS] + [--min_unclipped_bases MIN_UNCLIPPED_BASES] + [--paired_run] + [--primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY] + [--read1_aligned_bam READ1_ALIGNED_BAM] + [--read1_trim READ1_TRIM] + [--read2_aligned_bam READ2_ALIGNED_BAM] + [--read2_trim READ2_TRIM] + [--sort_order SORT_ORDER] + [--unmap_contaminant_reads] + [--unmapped_read_strategy UNMAPPED_READ_STRATEGY] + [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --unmapped_bam UNMAPPED_BAM + Original SAM or BAM file of unmapped reads, which must + be in queryname order. Reads MUST be unmapped. + Required. + --reference REFERENCE + Reference sequence file. Required. + --output_file_name OUTPUT_FILE_NAME + Merged SAM or BAM file to write to. Required. + --add_mate_cigar Adds the mate CIGAR tag (MC) if true, does not if + false. Default value: true. Possible values: {true, + false} + --add_pg_tag_to_reads + Add PG tag to each read in a SAM or BAM Default value: + true. Possible values: {true, false} + --aligned_bam ALIGNED_BAM + SAM or BAM file(s) with alignment data. This argument + may be specified 0 or more times. Default value: null. + Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM + (R2_ALIGNED) + --aligned_reads_only Whether to output only aligned reads. Default value: + false. Possible values: {true, false} + --aligner_proper_pair_flags + Use the aligners idea of what a proper pair is rather + than computing in this program. Default value: false. + Possible values: {true, false} + --attributes_to_remove ATTRIBUTES_TO_REMOVE + Attributes from the alignment record that should be + removed when merging. This overrides + ATTRIBUTES_TO_RETAIN if they share common tags. This + argument may be specified 0 or more times. Default + value: null. + --attributes_to_retain ATTRIBUTES_TO_RETAIN + Reserved alignment attributes (tags starting with X, + Y, or Z) that should be brought over from the + alignment data when merging. This argument may be + specified 0 or more times. Default value: null. + --attributes_to_reverse ATTRIBUTES_TO_REVERSE + Attributes on negative strand reads that need to be + reversed. This argument may be specified 0 or more + times. Default value: [OQ, U2]. + --attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT + Attributes on negative strand reads that need to be + reverse complemented. This argument may be specified 0 + or more times. Default value: [E2, SQ]. + --clip_adapters Whether to clip adapters where identified. Default + value: true. Possible values: {true, false} + --clip_overlapping_reads + For paired reads, clip the 3' end of each read if + necessary so that it does not extend past the 5' end + of its mate. Clipping will be either soft or hard + clipping, depending on CLIP_OVERLAPPING_READS_OPERATOR + setting. Hard clipped bases and their qualities will + be stored in the XB and XQ tags respectively. Default + value: true. Possible values: {true, false} + --expected_orientations EXPECTED_ORIENTATIONS + The expected orientation of proper read pairs. + Replaces JUMP_SIZE This argument may be specified 0 or + more times. Default value: null. Possible values: {FR, + RF, TANDEM} Cannot be used in conjunction with + argument(s) JUMP_SIZE (JUMP) + --hard_clip_overlapping_reads + If true, hard clipping will be applied to overlapping + reads. By default, soft clipping is used. Default + value: false. Possible values: {true, false} + --include_secondary_alignments + If false, do not write secondary alignments to output. + Default value: true. Possible values: {true, false} + --is_bisulfite_sequence + Whether the lane is bisulfite sequence (used when + calculating the NM tag). Default value: false. + Possible values: {true, false} + --jump_size JUMP_SIZE + The expected jump size (required if this is a jumping + library). Deprecated. Use EXPECTED_ORIENTATIONS + instead Default value: null. Cannot be used in + conjunction with argument(s) EXPECTED_ORIENTATIONS + (ORIENTATIONS) + --matching_dictionary_tags MATCHING_DICTIONARY_TAGS + List of Sequence Records tags that must be equal (if + present) in the reference dictionary and in the + aligned file. Mismatching tags will cause an error if + in this list, and a warning otherwise. This argument + may be specified 0 or more times. Default value: [M5, + LN]. + --max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS + The maximum number of insertions or deletions + permitted for an alignment to be included. Alignments + with more than this many insertions or deletions will + be ignored. Set to -1 to allow any number of + insertions or deletions. Default value: 1. + --min_unclipped_bases MIN_UNCLIPPED_BASES + If UNMAP_CONTAMINANT_READS is set, require this many + unclipped bases or else the read will be marked as + contaminant. Default value: 32. + --paired_run DEPRECATED. This argument is ignored and will be + removed. Default value: true. Possible values: {true, + false} + --primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY + Strategy for selecting primary alignment when the + aligner has provided more than one alignment for a + pair or fragment, and none are marked as primary, more + than one is marked as primary, or the primary + alignment is filtered out for some reason. For all + strategies, ties are resolved arbitrarily. Default + value: BestMapq. BestMapq (Expects that multiple + alignments will be correlated with HI tag, and prefers + the pair of alignments with the largest MAPQ, in the + absence of a primary selected by the aligner.) + EarliestFragment (Prefers the alignment which maps the + earliest base in the read. Note that EarliestFragment + may not be used for paired reads.) BestEndMapq + (Appropriate for cases in which the aligner is not + pair-aware, and does not output the HI tag. It simply + picks the alignment for each end with the highest + MAPQ, and makes those alignments primary, regardless + of whether the two alignments make sense together.) + MostDistant (Appropriate for a non-pair-aware aligner. + Picks the alignment pair with the largest insert size. + If all alignments would be chimeric, it picks the + alignments for each end with the best MAPQ.) + --read1_aligned_bam READ1_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the first + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read1_trim READ1_TRIM + The number of bases trimmed from the beginning of read + 1 prior to alignment Default value: 0. + --read2_aligned_bam READ2_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the second + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read2_trim READ2_TRIM + The number of bases trimmed from the beginning of read + 2 prior to alignment Default value: 0. + --sort_order SORT_ORDER + The order in which the merged reads should be output. + Default value: coordinate. Possible values: {unsorted, + queryname, coordinate, duplicate, unknown} + --unmap_contaminant_reads + Detect reads originating from foreign organisms (e.g. + bacterial DNA in a non-bacterial sample),and unmap + + label those reads accordingly. Default value: false. + Possible values: {true, false} + --unmapped_read_strategy UNMAPPED_READ_STRATEGY + How to deal with alignment information in reads that + are being unmapped (e.g. due to cross-species + contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. Note that the + DO_NOT_CHANGE strategy will actually reset the cigar + and set the mapping quality on unmapped reads since + otherwisethe result will be an invalid record. To + force no change use the DO_NOT_CHANGE_INVALID + strategy. Default value: DO_NOT_CHANGE. Possible + values: {COPY_TO_TAG, DO_NOT_CHANGE, + DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..4a638bcb --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml @@ -0,0 +1,44 @@ +unmapped_bam: + class: File + path: "/Users/charlesmurphy/Desktop/test-unmapped.bam" +reference: + class: File + path: "/Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta" +aligned_bam: + - class: File + path: "/Users/charlesmurphy/Desktop/test.bam" +output_file_name: null +add_mate_cigar: null +add_pg_tag_to_reads: null +aligned_reads_only: null +include_duplicates: null +aligner_proper_pair_flags: null +attributes_to_remove: null +attributes_to_retain: null +attributes_to_reverse: null +attributes_to_reverse_complement: null +clip_adapters: null +clip_overlapping_reads: null +expected_orientations: null +hard_clip_overlapping_reads: null +include_secondary_alignments: null +is_bisulfite_sequence: null +jump_size: null +matching_dictionary_tags: null +max_insertions_or_deletions: null +min_unclipped_bases: null +paired_run: null +primary_alignment_strategy: null +read1_aligned_bam: null +read1_trim: null +read2_aligned_bam: null +read2_trim: null +sort_order: null +unmap_contaminant_reads: null +unmapped_read_strategy: null +validation_stringency: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl new file mode 100644 index 00000000..8c1bcf61 --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -0,0 +1,420 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_bam_alignment_4_1_8_0 +label: GATK-MergeBamAlignment +baseCommand: + - gatk + - MergeBamAlignment +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: unmapped_bam + type: File + inputBinding: + position: 0 + prefix: --UNMAPPED_BAM + doc: | + Original SAM or BAM file of unmapped reads, which must be in queryname order. Reads MUST + be unmapped. Required. + - id: reference + type: File + inputBinding: + position: 0 + prefix: --REFERENCE_SEQUENCE + doc: | + Reference sequence file. Required. + secondaryFiles: + - ^.dict + - id: output_file_name + type: string? + doc: | + Merged SAM or BAM file to write to. Required. + - id: add_mate_cigar + type: boolean? + inputBinding: + position: 0 + prefix: --ADD_MATE_CIGAR + doc: | + Adds the mate CIGAR tag (MC) if true, does not if false. Default value: true. Possible + values: {true, false} + - id: add_pg_tag_to_reads + type: boolean? + inputBinding: + position: 0 + prefix: --ADD_PG_TAG_TO_READS + doc: | + Add PG tag to each read in a SAM or BAM Default value: true. Possible values: {true, + false} + - id: aligned_bam + type: + - "null" + - type: array + items: File + inputBinding: + prefix: --ALIGNED_BAM + inputBinding: + position: 1 + doc: | + SAM or BAM file(s) with alignment data. This argument may be specified 0 or more times. + Default value: null. Cannot be used in conjunction with argument(s) READ1_ALIGNED_BAM + (R1_ALIGNED) READ2_ALIGNED_BAM (R2_ALIGNED) + - id: aligned_reads_only + type: boolean? + inputBinding: + position: 0 + prefix: --ALIGNED_READS_ONLY + doc: | + Whether to output only aligned reads. Default value: false. Possible values: {true, + false} + - id: aligner_proper_pair_flags + type: boolean? + inputBinding: + position: 0 + prefix: --ALIGNER_PROPER_PAIR_FLAGS + doc: | + Use the aligners idea of what a proper pair is rather than computing in this program. + Default value: false. Possible values: {true, false} + - id: attributes_to_remove + type: string? + inputBinding: + position: 0 + prefix: --ATTRIBUTES_TO_REMOVE + doc: | + Attributes from the alignment record that should be removed when merging. This overrides + ATTRIBUTES_TO_RETAIN if they share common tags. This argument may be specified 0 or more + times. Default value: null. + - id: attributes_to_retain + type: string? + inputBinding: + position: 0 + prefix: --ATTRIBUTES_TO_RETAIN + doc: | + Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over + from the alignment data when merging. This argument may be specified 0 or more times. + Default value: null. + - id: attributes_to_reverse + type: string? + inputBinding: + position: 0 + prefix: --ATTRIBUTES_TO_REVERSE + doc: | + Attributes on negative strand reads that need to be reversed. This argument may be + specified 0 or more times. Default value: [OQ, U2]. + - id: attributes_to_reverse_complement + type: string? + inputBinding: + position: 0 + prefix: --ATTRIBUTES_TO_REVERSE_COMPLEMENT + doc: | + Attributes on negative strand reads that need to be reverse complemented. This argument + may be specified 0 or more times. Default value: [E2, SQ]. + - id: clip_adapters + type: boolean? + inputBinding: + position: 0 + prefix: --CLIP_ADAPTERS + doc: | + Whether to clip adapters where identified. Default value: true. Possible values: {true, + false} + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: --CLIP_OVERLAPPING_READS + doc: | + For paired reads, clip the 3' end of each read if necessary so that it does not extend + past the 5' end of its mate. Clipping will be either soft or hard clipping, depending on + CLIP_OVERLAPPING_READS_OPERATOR setting. Hard clipped bases and their qualities will be + stored in the XB and XQ tags respectively. Default value: true. Possible values: {true, + false} + - id: expected_orientations + type: string? + inputBinding: + position: 0 + prefix: --EXPECTED_ORIENTATIONS + doc: | + The expected orientation of proper read pairs. Replaces JUMP_SIZE This argument may be + specified 0 or more times. Default value: null. Possible values: {FR, RF, TANDEM} Cannot + be used in conjunction with argument(s) JUMP_SIZE (JUMP) + - id: hard_clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: --HARD_CLIP_OVERLAPPING_READS + doc: | + If true, hard clipping will be applied to overlapping reads. By default, soft clipping is + used. Default value: false. Possible values: {true, false} + - id: include_secondary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: --INCLUDE_SECONDARY_ALIGNMENTS + doc: | + If false, do not write secondary alignments to output. Default value: true. Possible + values: {true, false} + - id: is_bisulfite_sequence + type: boolean? + inputBinding: + position: 0 + prefix: --IS_BISULFITE_SEQUENCE + doc: | + Whether the lane is bisulfite sequence (used when calculating the NM tag). Default value: + false. Possible values: {true, false} + - id: jump_size + type: int? + inputBinding: + position: 0 + prefix: --JUMP_SIZE + doc: | + The expected jump size (required if this is a jumping library). Deprecated. Use + EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in conjunction with + argument(s) EXPECTED_ORIENTATIONS (ORIENTATIONS) + - id: matching_dictionary_tags + type: string? + inputBinding: + position: 0 + prefix: --MATCHING_DICTIONARY_TAGS + doc: | + List of Sequence Records tags that must be equal (if present) in the reference dictionary + and in the aligned file. Mismatching tags will cause an error if in this list, and a + warning otherwise. This argument may be specified 0 or more times. Default value: [M5, + LN]. + - id: max_insertions_or_deletions + type: int? + inputBinding: + position: 0 + prefix: --MAX_INSERTIONS_OR_DELETIONS + doc: | + The maximum number of insertions or deletions permitted for an alignment to be included. + Alignments with more than this many insertions or deletions will be ignored. Set to -1 to + allow any number of insertions or deletions. Default value: 1. + - id: min_unclipped_bases + type: int? + inputBinding: + position: 0 + prefix: --MIN_UNCLIPPED_BASES + doc: | + If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or else the read will + be marked as contaminant. Default value: 32. + - id: paired_run + type: boolean? + inputBinding: + position: 0 + prefix: --PAIRED_RUN + doc: | + DEPRECATED. This argument is ignored and will be removed. Default value: true. Possible + values: {true, false} + - id: primary_alignment_strategy + type: string? + inputBinding: + position: 0 + prefix: --PRIMARY_ALIGNMENT_STRATEGY + doc: | + Strategy for selecting primary alignment when the aligner has provided more than one + alignment for a pair or fragment, and none are marked as primary, more than one is marked + as primary, or the primary alignment is filtered out for some reason. For all strategies, + ties are resolved arbitrarily. Default value: BestMapq. BestMapq (Expects that multiple + alignments will be correlated with HI tag, and prefers the pair of alignments with the + largest MAPQ, in the absence of a primary selected by the aligner.) + EarliestFragment (Prefers the alignment which maps the earliest base in the read. Note + that EarliestFragment may not be used for paired reads.) + BestEndMapq (Appropriate for cases in which the aligner is not pair-aware, and does not + output the HI tag. It simply picks the alignment for each end with the highest MAPQ, and + makes those alignments primary, regardless of whether the two alignments make sense + together.) + MostDistant (Appropriate for a non-pair-aware aligner. Picks the alignment pair with the + largest insert size. If all alignments would be chimeric, it picks the alignments for each + end with the best MAPQ.) + - id: read1_aligned_bam + type: + - "null" + - type: array + items: File + inputBinding: + prefix: --READ1_ALIGNED_BAM + inputBinding: + position: 1 + doc: | + SAM or BAM file(s) with alignment data from the first read of a pair. This argument may + be specified 0 or more times. Default value: null. Cannot be used in conjunction with + argument(s) ALIGNED_BAM (ALIGNED) + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: --READ1_TRIM + doc: | + The number of bases trimmed from the beginning of read 1 prior to alignment Default + value: 0. + - id: read2_aligned_bam + type: + - "null" + - type: array + items: File + inputBinding: + prefix: --READ2_ALIGNED_BAM + inputBinding: + position: 1 + doc: | + SAM or BAM file(s) with alignment data from the second read of a pair. This argument may + be specified 0 or more times. Default value: null. Cannot be used in conjunction with + argument(s) ALIGNED_BAM (ALIGNED) + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: --READ2_TRIM + doc: | + The number of bases trimmed from the beginning of read 2 prior to alignment Default + value: 0. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: --SORT_ORDER + doc: | + The order in which the merged reads should be output. Default value: coordinate. Possible + values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: unmap_contaminant_reads + type: boolean? + inputBinding: + position: 1 + prefix: --UNMAP_CONTAMINANT_READS + doc: | + Detect reads originating from foreign organisms (e.g. bacterial DNA in a non-bacterial + sample),and unmap + label those reads accordingly. Default value: false. Possible values: + {true, false} + - id: unmapped_read_strategy + type: string? + inputBinding: + position: 1 + prefix: --UNMAPPED_READ_STRATEGY + doc: | + How to deal with alignment information in reads that are being unmapped (e.g. due to + cross-species contamination.) Currently ignored unless UNMAP_CONTAMINANT_READS = true. + Note that the DO_NOT_CHANGE strategy will actually reset the cigar and set the mapping + quality on unmapped reads since otherwisethe result will be an invalid record. To force no + change use the DO_NOT_CHANGE_INVALID strategy. Default value: DO_NOT_CHANGE. Possible + values: {COPY_TO_TAG, DO_NOT_CHANGE, DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_MD5_FILE + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false} +outputs: + - id: gatk_merge_bam_alignment_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '-merged.bam') + } + } +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 1 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '-merged.bam') + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . + - position: 0 + prefix: '--COMPRESSION_LEVEL' + valueFrom: '2' + - position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + valueFrom: '50000' +requirements: + - class: ResourceRequirement + ramMin: 20000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From 490995fcef16a14d58db6ceac6711903ae033974 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 26 Aug 2020 16:31:09 -0400 Subject: [PATCH 172/476] Update SUMMARY.md --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d6463a0e..2d7c2344 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,6 +16,7 @@ - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - Manta - [Manta v1.5.1](../manta_1.5.1/README.md) - Marianas From 83a33d6e25e0b54d33c558df482bc708831a6235 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 26 Aug 2020 17:38:18 -0400 Subject: [PATCH 173/476] added cwl, readme, example input --- docs/SUMMARY.md | 1 + gatk_merge_sam_files_4.1.8.0/README.md | 91 ++++++++ .../example_inputs.yaml | 21 ++ .../gatk_merge_sam_files_4.1.8.0.cwl | 201 ++++++++++++++++++ 4 files changed, 314 insertions(+) create mode 100644 gatk_merge_sam_files_4.1.8.0/README.md create mode 100644 gatk_merge_sam_files_4.1.8.0/example_inputs.yaml create mode 100644 gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d6463a0e..a7adbebc 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,6 +16,7 @@ - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - Manta - [Manta v1.5.1](../manta_1.5.1/README.md) - Marianas diff --git a/gatk_merge_sam_files_4.1.8.0/README.md b/gatk_merge_sam_files_4.1.8.0/README.md new file mode 100644 index 00000000..767c2ab0 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/README.md @@ -0,0 +1,91 @@ +# CWL for running GATK - MergeSamFiles + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--assume_sorted] [--comment COMMENT] + [--create_index] [--create_md5_file] + [--intervals INTERVALS] + [--merge_sequence_dictionaries] + [--reference_sequence REFERENCE_SEQUENCE] + [--sort_order SORT_ORDER] + [--validation_stringency VALIDATION_STRINGENCY] + [--verbosity VERBOSITY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT SAM or BAM input file This argument must be specified + at least once. Required. + --output_file_name OUTPUT_FILE_NAME + SAM or BAM file to write merged result to Required. + --assume_sorted If true, assume that the input files are in the same + sort order as the requested output sort order, even if + their headers say otherwise. Default value: false. + Possible values: {true, false} + --comment COMMENT Comment(s) to include in the merged output files + header. This argument may be specified 0 or more + times. Default value: null. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --intervals INTERVALS + An interval list file that contains the locations of + the positions to merge. Assume bam are sorted and + indexed. The resulting file will contain alignments + that may overlap with genomic regions outside the + requested region. Unmapped reads are discarded. + Default value: null. + --merge_sequence_dictionaries + Merge the sequence dictionaries Default value: false. + Possible values: {true, false} + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --sort_order SORT_ORDER + Sort order of output file Default value: coordinate. + Possible values: {unsorted, queryname, coordinate, + duplicate, unknown} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} + --verbosity VERBOSITY + Control verbosity of logging. Default value: INFO. + Possible values: {ERROR, WARNING, INFO, DEBUG} +``` diff --git a/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..7fcf6550 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml @@ -0,0 +1,21 @@ +input: + - class: File + path: "/path/to/bam" + - class: File + path: "/path/to/bam" +output_file_name: null +assume_sorted: null +comment: null +create_index: null +create_md5_file: null +intervals: null +merge_sequence_dictionaries: null +reference_sequence: + class: File + path: "/path/to/reference.fasta" +sort_order: null +validation_stringency: null +verbosity: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl new file mode 100644 index 00000000..f2e254ed --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -0,0 +1,201 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_sam_files_4_1_8_0 +label: GATK-MergeSamFiles +baseCommand: + - gatk + - MergeSamFiles +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: + type: array + items: File + inputBinding: + prefix: -I + inputBinding: + position: 1 + doc: | + SAM or BAM input file This argument must be specified at least once. Required. + - id: output_file_name + type: string? + doc: SAM or BAM file to write merged result to Required. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: --ASSUME_SORTED + doc: | + If true, assume that the input files are in the same sort order as the requested output + sort order, even if their headers say otherwise. Default value: false. Possible values: + {true, false} + - id: comment + type: string? + inputBinding: + position: 1 + prefix: --COMMENT + doc: | + Comment(s) to include in the merged output files header. This argument may be specified + 0 or more times. Default value: null. + - id: create_index + type: boolean? + inputBinding: + position: 1 + prefix: --CREATE_INDEX + doc: | + Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 1 + prefix: --CREATE_MD5_FILE + doc: | + Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + false. Possible values: {true, false} + - id: intervals + type: File? + inputBinding: + position: 1 + prefix: --INTERVALS + doc: | + An interval list file that contains the locations of the positions to merge. Assume bam + are sorted and indexed. The resulting file will contain alignments that may overlap with + genomic regions outside the requested region. Unmapped reads are discarded. Default + value: null. + - id: merge_sequence_dictionaries + type: boolean? + inputBinding: + position: 1 + prefix: --MERGE_SEQUENCE_DICTIONARIES + doc: | + Merge the sequence dictionaries Default value: false. Possible values: {true, false} + - id: reference_sequence + type: File? + inputBinding: + position: 1 + prefix: --REFERENCE_SEQUENCE + doc: | + Reference sequence file. Default value: null. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: --SORT_ORDER + doc: | + Sort order of output file Default value: coordinate. Possible values: {unsorted, + queryname, coordinate, duplicate, unknown} + - id: validation_stringency + type: string? + inputBinding: + position: 1 + prefix: --VALIDATION_STRINGENCY + doc: | + Validation stringency for all SAM files read by this program. Setting stringency to + SILENT can improve performance when processing a BAM file in which variable-length data + (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} + - id: verbosity + type: string? + inputBinding: + position: 1 + prefix: --VERBOSITY + doc: | + Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, + INFO, DEBUG} +outputs: + - id: gatk_merge_sam_files_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: . + - position: 0 + prefix: '--COMPRESSION_LEVEL' + valueFrom: '2' + - position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + valueFrom: '50000' + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +requirements: + - class: ResourceRequirement + ramMin: 20000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From 01bbb46d9bddad544083f21c942bc183b19db94a Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 27 Aug 2020 09:25:55 -0400 Subject: [PATCH 174/476] remove hardcoded paths from example inputs --- gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml index 4a638bcb..004d24ec 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml +++ b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml @@ -1,12 +1,12 @@ unmapped_bam: class: File - path: "/Users/charlesmurphy/Desktop/test-unmapped.bam" + path: "/path/to/bam" reference: class: File - path: "/Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta" + path: "/path/to/reference.fasta" aligned_bam: - class: File - path: "/Users/charlesmurphy/Desktop/test.bam" + path: "/path/to/bam" output_file_name: null add_mate_cigar: null add_pg_tag_to_reads: null From 89eb3905b51a478e7986a5413f5932f8870b62cf Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 27 Aug 2020 10:12:00 -0400 Subject: [PATCH 175/476] add another option --- .../gatk_merge_sam_files_4.1.8.0.cwl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index f2e254ed..55da0796 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -97,6 +97,15 @@ inputs: doc: | Sort order of output file Default value: coordinate. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: use_threading + type: boolean? + inputBinding: + position: 1 + prefix: --USE_THREADING + doc: | + Option to create a background thread to encode, compress and write to disk the output + file. The threaded version uses about 20% more CPU and decreases runtime by ~20% when + writing out a compressed BAM file. Default value: false. Possible values: {true, false} - id: validation_stringency type: string? inputBinding: From 14c2703a028906a60e7cb89daa66a9777fef950c Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 27 Aug 2020 10:13:05 -0400 Subject: [PATCH 176/476] Update README.md --- gatk_merge_sam_files_4.1.8.0/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gatk_merge_sam_files_4.1.8.0/README.md b/gatk_merge_sam_files_4.1.8.0/README.md index 767c2ab0..92147c3f 100644 --- a/gatk_merge_sam_files_4.1.8.0/README.md +++ b/gatk_merge_sam_files_4.1.8.0/README.md @@ -31,6 +31,7 @@ usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--merge_sequence_dictionaries] [--reference_sequence REFERENCE_SEQUENCE] [--sort_order SORT_ORDER] + [--use_threading] [--validation_stringency VALIDATION_STRINGENCY] [--verbosity VERBOSITY] [job_order] @@ -78,6 +79,12 @@ optional arguments: Sort order of output file Default value: coordinate. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} + --use_threading Option to create a background thread to encode, + compress and write to disk the output file. The + threaded version uses about 20% more CPU and decreases + runtime by ~20% when writing out a compressed BAM + file. Default value: false. Possible values: {true, + false} --validation_stringency VALIDATION_STRINGENCY Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve From c6b798e6d80bfcbefc54066983d5659a5b4aae7d Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 28 Aug 2020 10:27:34 -0400 Subject: [PATCH 177/476] update resource requirments --- gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index 55da0796..6e7059f7 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -185,8 +185,8 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 20000 - coresMin: 1 + ramMin: 12000 + coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement From bbf6ae238e86e6ee15c8aece82d0dffb6de4dc28 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 28 Aug 2020 10:30:55 -0400 Subject: [PATCH 178/476] fix output name, update resource requirments --- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 8c1bcf61..da82c4be 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -381,7 +381,7 @@ arguments: if(inputs.output_file_name){ return inputs.output_file_name } else { - return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '-merged.bam') + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') } } - position: 0 @@ -395,8 +395,8 @@ arguments: valueFrom: '50000' requirements: - class: ResourceRequirement - ramMin: 20000 - coresMin: 1 + ramMin: 12000 + coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement From 236310faf34387d1e8773f87d8213d0f780ceb60 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 28 Aug 2020 10:41:17 -0400 Subject: [PATCH 179/476] Update gatk_merge_bam_alignment_4.1.8.0.cwl using `_` instead of `-` --- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index da82c4be..d24dfcbd 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -343,7 +343,7 @@ outputs: if(inputs.output_file_name){ return inputs.output_file_name } else { - return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '-merged.bam') + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') } } arguments: From 17a2336c27aaac56fb128aa821b6253c949df201 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 28 Aug 2020 11:00:50 -0400 Subject: [PATCH 180/476] Update fgbio_filter_consensus_reads_1.2.0.cwl File->INT --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 0ea3496c..08dd316f 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -80,7 +80,7 @@ inputs: prefix: '--max-no-call-fraction' doc: Maximum fraction of no-calls in the read after filtering - id: min_mean_base_quality - type: File? + type: int? inputBinding: position: 0 prefix: '--min-mean-base-quality' From c8a8a4b78e03d49572537e158d22dc9ca27e144e Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:02:28 -0400 Subject: [PATCH 181/476] Updated Summary to include picard add or replace read groups 4.1.8.1 --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c997b150..ab040972 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -27,6 +27,7 @@ - Picard Tools - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) + - [AddOrReplaceReadGroups v4.1.8.1](../picard_add_or_replace_read_groups_4.1.8.1/README.md) - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) From 90adbdf81948d34f064438603e472697e1fcd616 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:03:30 -0400 Subject: [PATCH 182/476] Added deflater and inflater options --- .../README.md | 6 ++++++ .../example_inputs.yaml | 20 ++++++++++--------- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 16 +++++++++++++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/README.md b/picard_add_or_replace_read_groups_4.1.8.1/README.md index 4a3aa7b4..59aeafbb 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/README.md +++ b/picard_add_or_replace_read_groups_4.1.8.1/README.md @@ -82,6 +82,12 @@ optional arguments: Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} --create_bam_index Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default diff --git a/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml index 9c25bd7d..aa9cd8b0 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml +++ b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml @@ -1,20 +1,22 @@ -bam_compression_level: +bam_compression_level: create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true input: class: File path: "/path/to/bam" -memory_overhead: -memory_per_job: -number_of_threads: +memory_overhead: +memory_per_job: +number_of_threads: output_file_name: somename_srt.bam -read_group_description: +read_group_description: read_group_identifier: test read_group_library: 1 read_group_platform_unit: bc01 -read_group_run_date: +read_group_run_date: read_group_sample_name: seracare read_group_sequencing_platform: Illumina read_group_sequnecing_center: msk -sort_order: -tmp_dir: -validation_stringency: +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index e6320ebe..2c2434d9 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -117,6 +117,22 @@ inputs: Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} - default: true id: create_bam_index type: boolean? From 52329365398d1132a58b3565ae9dd7a90bc9f86a Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:04:45 -0400 Subject: [PATCH 183/476] Renamed cwl tool output --- .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 2c2434d9..55a491bc 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -144,7 +144,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} outputs: - - id: bam + - id: picard_add_or_replace_read_groups_bam type: File outputBinding: glob: |- From 473792c8611e30ea3e807f356cc936b1e15e8a07 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:37:17 -0400 Subject: [PATCH 184/476] Updated tool version --- picard_mark_duplicates_4.1.8.1/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard_mark_duplicates_4.1.8.1/README.md b/picard_mark_duplicates_4.1.8.1/README.md index 69f2f101..01b544d5 100644 --- a/picard_mark_duplicates_4.1.8.1/README.md +++ b/picard_mark_duplicates_4.1.8.1/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | ## CWL @@ -14,13 +14,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml + > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml ``` ### Usage ```bash -usage: picard_mark_duplicates_2.21.2.cwl [-h] +usage: picard_mark_duplicates_4.1.8.1.cwl [-h] positional arguments: job_order Job input json file From ba5d5472ad0c167f535293f79f5be091461ba1a9 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:38:00 -0400 Subject: [PATCH 185/476] Added extra options --- picard_mark_duplicates_4.1.8.1/README.md | 38 ++++++++++++++ .../example_inputs.yaml | 4 ++ .../picard_mark_duplicates_4.1.8.1.cwl | 51 ++++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/picard_mark_duplicates_4.1.8.1/README.md b/picard_mark_duplicates_4.1.8.1/README.md index 01b544d5..00079d8a 100644 --- a/picard_mark_duplicates_4.1.8.1/README.md +++ b/picard_mark_duplicates_4.1.8.1/README.md @@ -59,6 +59,44 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + --read_name_regex READ_NAME_REGEX + MarkDuplicates can use the tile and cluster positions + to estimate the rate of optical duplication in + addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. + By default (with no READ_NAME_REGEX specified), + MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set + READ_NAME_REGEX to 'null' to disable optical duplicate + detection. Note that without optical duplicate counts, + library size estimation will be less accurate. If the + read name does not follow a standard Illumina colon- + separation convention, but does contain tile and x,y + coordinates, a regular expression can be specified to + extract three variables: tile/region, x coordinate and + y coordinate from a read name. The regular expression + must contain three capture groups for the three + variables, in order. It must match the entire read + name. e.g. if field names were separated by semi-colon + (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ + Note that if no READ_NAME_REGEX is specified, the read + name is split on ':'. For 5 element names, the 3rd, + 4th and 5th elements are assumed to be tile, x and y + values. For 7 element names (CASAVA 1.8), the 5th, + 6th, and 7th elements are assumed to be tile, x and y + values. + --sorting_collection_size_ratio SORTING_COLLECTION_SIZE_RATIO + This number, plus the maximum RAM available to the + JVM, determine the memory footprint used by some of + the sorting collections. If you are running out of + memory, try reducing this number. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY The scoring strategy for choosing the non-duplicate among candidates. Default value:SUM_OF_BASE_QUALITIES. diff --git a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml index 4cb5941e..518d0a06 100644 --- a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml +++ b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml @@ -1,6 +1,10 @@ assume_sort_order: coordinate bam_compression_level: null create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +sorting_collection_size_ratio: 0.25 +read_name_regex: 'null' duplicate_scoring_strategy: null duplication_metrics: test_metrics.txt input: diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index c554e0db..9f4ca563 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -79,8 +79,55 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: CREATE_INDEX=true - separate: false + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: --READ_NAME_REGEX + doc: >- + MarkDuplicates can use the tile and cluster positions to estimate the rate of + optical duplication in addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. By default (with no + READ_NAME_REGEX specified), MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set READ_NAME_REGEX to 'null' to + disable optical duplicate detection. Note that without optical duplicate + counts, library size estimation will be less accurate. If the read name does + not follow a standard Illumina colon-separation convention, but does contain + tile and x,y coordinates, a regular expression can be specified to extract + three variables: tile/region, x coordinate and y coordinate from a read name. + The regular expression must contain three capture groups for the three variables, + in order. It must match the entire read name. e.g. if field names were separated + by semi-colon (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ Note that if no + READ_NAME_REGEX is specified, the read name is split on ':'. For 5 element names, + the 3rd, 4th and 5th elements are assumed to be tile, x and y values. For 7 + element names (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be + tile, x and y values. + - id: sorting_collection_size_ratio + type: int? + inputBinding: + position: 0 + prefix: --SORTING_COLLECTION_SIZE_RATIO + doc: >- + This number, plus the maximum RAM available to the JVM, determine the memory + footprint used by some of the sorting collections. If you are running out of memory, try reducing this number. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default From 6fd2c49c0a27f324f30ad9822ed74c3feb127382 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:38:22 -0400 Subject: [PATCH 186/476] Use runtime tmpdir --- picard_mark_duplicates_4.1.8.1/example_inputs.yaml | 2 +- .../picard_mark_duplicates_4.1.8.1.cwl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml index 518d0a06..08a4e56d 100644 --- a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml +++ b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml @@ -15,5 +15,5 @@ memory_per_job: null number_of_threads: null optical_duplicate_pixel_distance: null output_file_name: null -tmp_dir: null validation_stringency: null + diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index 9f4ca563..c50950aa 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -201,6 +201,9 @@ arguments: return inputs.input.basename.replace(/.bam/,'_md.bam') } } + - position: 0 + prefix: --TMP_DIR + valueFrom: $(runtime.tmpdir) requirements: - class: ResourceRequirement ramMin: 17000 From 99578b9ad583c69e9f372e77e5568bba0f62fcad Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:39:09 -0400 Subject: [PATCH 187/476] Refactored to use the argument scheme in the docs --- .../picard_mark_duplicates_4.1.8.1.cwl | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index c50950aa..5d0abdfd 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -21,8 +21,7 @@ inputs: type: File inputBinding: position: 0 - prefix: I= - separate: false + prefix: -I doc: Input file (bam or sam). Required. - id: output_file_name type: string? @@ -32,15 +31,13 @@ inputs: type: string inputBinding: position: 0 - prefix: M= - separate: false + prefix: -M doc: File to write duplication metrics to Required. - id: assume_sort_order type: string? inputBinding: position: 0 - prefix: ASO= - separate: false + prefix: -ASO doc: >- Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, @@ -49,15 +46,13 @@ inputs: type: string? inputBinding: position: 0 - prefix: TMP_DIR= - separate: false + prefix: --TMP_DIR doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: position: 0 - prefix: VALIDATION_STRINGENCY= - separate: false + prefix: --VALIDATION_STRINGENCY doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -68,8 +63,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: COMPRESSION_LEVEL= - separate: false + prefix: --COMPRESSION_LEVEL doc: >- Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default @@ -136,8 +130,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: DUPLICATE_SCORING_STRATEGY= - separate: false + prefix: --DUPLICATE_SCORING_STRATEGY doc: >- The scoring strategy for choosing the non-duplicate among candidates. Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to @@ -147,8 +140,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= - separate: false + prefix: --OPTICAL_DUPLICATE_PIXEL_DISTANCE doc: >- The maximum offset between two duplicate clusters in order to consider them optical duplicates. The default is appropriate for unpatterned @@ -191,8 +183,7 @@ arguments: - position: 0 valueFrom: MarkDuplicates - position: 0 - prefix: O= - separate: false + prefix: -O valueFrom: |- ${ if(inputs.output_file_name){ From e22001c129fb0aa955723ccfb1bdd162767cf1cf Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:39:29 -0400 Subject: [PATCH 188/476] Expanded javascript function --- .../picard_mark_duplicates_4.1.8.1.cwl | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index 5d0abdfd..b3e3e72b 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -176,7 +176,30 @@ outputs: label: picard_mark_duplicates_4.1.8.1 arguments: - position: 0 - valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + valueFrom: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return \"-Xmx15G\" + } + else { + return \"-Xmx15G\" + } + }" - position: 0 prefix: '-jar' valueFrom: /gatk/gatk-package-4.1.8.1-local.jar From ae6035ccd086ad064755b42ed54dc5c5c1b92f10 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:40:54 -0400 Subject: [PATCH 189/476] Updated name of tool outputs --- .../picard_mark_duplicates_4.1.8.1.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index b3e3e72b..dd7cffef 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -149,7 +149,7 @@ inputs: experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- @@ -162,7 +162,7 @@ outputs: } secondaryFiles: - ^.bai - - id: duplication_stats + - id: picard_mark_duplicates_stats type: File outputBinding: glob: |- From 7fbe62396e1c0ae5fbcf252022fd95f85d78d28a Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:43:36 -0400 Subject: [PATCH 190/476] Updated summary --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c997b150..c96616b1 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -38,6 +38,7 @@ - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) + - [MarkDuplicates v4.1.8.1](../picard_mark_duplicates_4.1.8.1/README.md) - Trim Galore - [v0.6.2](../trim_galore_0.6.2/README.md) - Ubuntu utilites From 4376532cb6921bb2ec92198e724bc7f381b7e662 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:48:13 -0400 Subject: [PATCH 191/476] Updated argument format to match the docs --- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 55a491bc..ec2469ed 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -21,8 +21,7 @@ inputs: type: File inputBinding: position: 0 - prefix: I= - separate: false + prefix: -I doc: Input file ( sam). Required. - id: output_file_name type: string? @@ -31,8 +30,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: SO= - separate: false + prefix: -SO doc: >- Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, @@ -41,8 +39,7 @@ inputs: type: string inputBinding: position: 0 - prefix: RGID= - separate: false + prefix: --RGID doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required @@ -50,57 +47,49 @@ inputs: type: string inputBinding: position: 0 - prefix: RGCN= - separate: false + prefix: --RGCN doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library type: string inputBinding: position: 0 - prefix: RGLB= - separate: false + prefix: --RGLB doc: Read Group Library. Required - id: read_group_platform_unit type: string inputBinding: position: 0 - prefix: RGPU= - separate: false + prefix: --RGPU doc: Read Group platform unit (eg. run barcode) Required. - id: read_group_sample_name type: string inputBinding: position: 0 - prefix: RGSM= - separate: false + prefix: --RGSM doc: Read Group sample name. Required - id: read_group_sequencing_platform type: string inputBinding: position: 0 - prefix: RGPL= - separate: false + prefix: --RGPL doc: 'Read Group platform (e.g. illumina, solid) Required.' - id: read_group_description type: string? inputBinding: position: 0 - prefix: RGDS= - separate: false + prefix: --RGDS doc: 'Read Group description Default value: null.' - id: read_group_run_date type: string? inputBinding: position: 0 - prefix: RGDT= - separate: false + prefix: --RGDT doc: 'Read Group run date Default value: null.' - id: validation_stringency type: string? inputBinding: position: 0 - prefix: VALIDATION_STRINGENCY= - separate: false + prefix: --VALIDATION_STRINGENCY doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -111,8 +100,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: COMPRESSION_LEVEL= - separate: false + prefix: --COMPRESSION_LEVEL doc: >- Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default @@ -195,12 +183,10 @@ arguments: - position: 0 valueFrom: AddOrReplaceReadGroups - position: 0 - prefix: TMP_DIR= - separate: false + prefix: --TMP_DIR valueFrom: "$(runtime.tmpdir)" - position: 0 - prefix: O= - separate: false + prefix: -O valueFrom: |- ${ if(inputs.output_file_name) From fe3cd348749f10d85370bbea5088a1885d3740a3 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:56:19 -0400 Subject: [PATCH 192/476] Updated doc --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c997b150..13cbf2e5 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -33,6 +33,7 @@ - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) + - [FixMateInformation v4.1.8.1](../picard_fix_mate_information_4.1.8.1/README.md) - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) From 161b5fe70f2cb6439b2b2995b83d89b14867467c Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:56:58 -0400 Subject: [PATCH 193/476] Reformatted arguments to match doc --- .../picard_fix_mate_information_4.1.8.1.cwl | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 78e383a7..7137c4e4 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -21,8 +21,7 @@ inputs: type: File inputBinding: position: 0 - prefix: I= - separate: false + prefix: -I doc: The input file to fix. This option may be specified 0 or more times secondaryFiles: - ^.bai @@ -33,8 +32,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: SO= - separate: false + prefix: -SO doc: >- Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, @@ -43,8 +41,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: VALIDATION_STRINGENCY= - separate: false + prefix: --VALIDATION_STRINGENCY doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -55,8 +52,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: COMPRESSION_LEVEL= - separate: false + prefix: --COMPRESSION_LEVEL doc: >- Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default @@ -66,7 +62,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: CREATE_INDEX=true + prefix: --CREATE_INDEX doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default @@ -125,12 +121,11 @@ arguments: - position: 0 valueFrom: FixMateInformation - position: 0 - prefix: TMP_DIR= + prefix: --TMP_DIR separate: false valueFrom: "$(runtime.tmpdir)" - position: 0 - prefix: O= - separate: false + prefix: -O valueFrom: |- ${ if(inputs.output_file_name){ From dfea1b1a987d8362ac2ad4a35d1e2bda7f440786 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:57:16 -0400 Subject: [PATCH 194/476] Added new arguments --- .../example_inputs.yaml | 16 +++++++++------- .../picard_fix_mate_information_4.1.8.1.cwl | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/picard_fix_mate_information_4.1.8.1/example_inputs.yaml b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml index 1d9e4ee2..a6581208 100644 --- a/picard_fix_mate_information_4.1.8.1/example_inputs.yaml +++ b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml @@ -1,12 +1,14 @@ -bam_compression_level: +bam_compression_level: create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true input: class: File path: "/path/to/bam" -memory_overhead: -memory_per_job: -number_of_threads: +memory_overhead: +memory_per_job: +number_of_threads: output_file_name: somename_fm.bam -sort_order: -tmp_dir: -validation_stringency: +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 7137c4e4..29c3bcf9 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -57,6 +57,22 @@ inputs: Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} - default: true id: create_bam_index type: boolean? From 22ac39ca0a1983a178269bcf1c03e23bfc9eee02 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 13:58:41 -0400 Subject: [PATCH 195/476] Updated tool doc --- picard_fix_mate_information_4.1.8.1/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/picard_fix_mate_information_4.1.8.1/README.md b/picard_fix_mate_information_4.1.8.1/README.md index 2c766dcc..da45f413 100644 --- a/picard_fix_mate_information_4.1.8.1/README.md +++ b/picard_fix_mate_information_4.1.8.1/README.md @@ -64,6 +64,12 @@ optional arguments: Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} --create_bam_index Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default From b7b8a5e8ed16db0ca7d508a38dcd5270d3020048 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 14:15:07 -0400 Subject: [PATCH 196/476] Updated dockerfile --- abra2_2.22/container/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile index e064f3d6..2c882f60 100644 --- a/abra2_2.22/container/Dockerfile +++ b/abra2_2.22/container/Dockerfile @@ -37,4 +37,4 @@ RUN apt-get update && \ RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ - cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar \ No newline at end of file + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar From 032bf7ce4402b1fab33352283d78f89c25db682b Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 14:16:45 -0400 Subject: [PATCH 197/476] Expanded the javascript function --- abra2_2.22/abra2_2.22.cwl | 49 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 40711a51..801dd2a8 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -172,14 +172,57 @@ outputs: label: abra2_2.22 arguments: - position: 0 - valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + valueFrom: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return \"-Xmx15G\" + } + else { + return \"-Xmx15G\" + } + }" - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/abra2.jar requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" + ramMin: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + return inputs.memory_per_job + inputs.memory_overhead + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + return inputs.memory_per_job + 2000 + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return 15000 + inputs.memory_overhead + } + else { + return 17000 + } + }" + coresMin: "${ + if (inputs.number_of_threads) { + return inputs.number_of_threads + } + else { + return 4 + } + }" - class: DockerRequirement dockerPull: 'aphoid/abra2:2.22' - class: InlineJavascriptRequirement From 1111588d8d04fa0d8908338c360068e61858d916 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 14:17:02 -0400 Subject: [PATCH 198/476] Updated docker image url --- abra2_2.22/abra2_2.22.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 801dd2a8..7069c503 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -224,7 +224,7 @@ requirements: } }" - class: DockerRequirement - dockerPull: 'aphoid/abra2:2.22' + dockerPull: mskaccess/abra:2.22 - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 7643a4b4e737718d6683ac55bdf964d1c78106d2 Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 14:17:22 -0400 Subject: [PATCH 199/476] Updated summary doc --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c997b150..e3e10485 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -3,6 +3,8 @@ - [MSK-ACCESS command-line tools](README.md) - ABRA2 - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) + - ABRA2 + - [v2.22](../abra2_2.22/README.md) \* [v2.22](../abra2_2.22/README.md) - Bedtools _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) _ [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) From 7526ae832a5a985b6de177d58cadd12a7e30cdcf Mon Sep 17 00:00:00 2001 From: Nikhil Date: Fri, 28 Aug 2020 14:23:38 -0400 Subject: [PATCH 200/476] Updated abra output name --- abra2_2.22/abra2_2.22.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 7069c503..22308777 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -158,7 +158,7 @@ inputs: prefix: '--nosort' doc: Do not attempt to sort final output outputs: - - id: realigned_bam + - id: abra_realigned_bam type: - 'null' - File From 9c86bca7f0d8e038ae8dcda1d363428ccec5e658 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 16:14:55 -0400 Subject: [PATCH 201/476] include simplex filter tool from fgbio_postprocessing --- ...io_postprocessing_simplex_filter_0.1.6.cwl | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl diff --git a/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl b/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl new file mode 100644 index 00000000..f641cb76 --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl @@ -0,0 +1,96 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_postprocessing_simplex_filter_0.1.7 +baseCommand: + - simplex_filter +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input_bam + type: File + inputBinding: + prefix: --input_bam + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + inputBinding: + prefix: --output_filename + doc: Output file (bam or sam). +outputs: + - id: fgbio_postprocessing_simplex_bam + type: File + outputBinding: + glob: |- + ${ + if (inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_simplex.bam') + } + } + secondaryFiles: + - ^.bai +label: fgbio_postprocessing_simplex_filter_0.1.7 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + } +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/fgbio_postprocessing_simplex_filter_0.1.7:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio_postprocessing + 'doap:revision': 0.1.7 From aba1c581d452265767c94f9e07b0422dcd3bab41 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 16:48:01 -0400 Subject: [PATCH 202/476] update to container with tag 0.1.0 --- .../fgbio_postprocessing_simplex_filter_0.1.6.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl b/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl index f641cb76..b8118166 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl @@ -74,7 +74,7 @@ requirements: ramMin: 2000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/fgbio_postprocessing_simplex_filter_0.1.7:0.1.0' + dockerPull: 'mskaccess/fgbio_postprocessing:0.1.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 91bc26ed7843f1647dad64301794a718476a93df Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:02:31 -0400 Subject: [PATCH 203/476] change filenames to 0.1.7 and include example inputs and readme --- .../README.md | 43 +++++++++++++++++++ .../example_inputs.yaml | 2 + ...io_postprocessing_simplex_filter_0.1.7.cwl | 0 3 files changed, 45 insertions(+) create mode 100644 fgbio_postprocessing_simplex_filter_0.1.7/README.md create mode 100644 fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml rename fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl => fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl (100%) diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/README.md b/fgbio_postprocessing_simplex_filter_0.1.7/README.md new file mode 100644 index 00000000..85e673e2 --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.7/README.md @@ -0,0 +1,43 @@ +# CWL for running Fgbio - GroupReadsByUmi + +## Version of tools in docker image + +| Tool | Version | Location | +| ----- | ------- | ------------------------------------ | +| fgbio_postprocessing | 0.1.7 | https://github.com/msk-access/fgbio_postprocessing | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.7.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM + [--output_file_name OUTPUT_FILE_NAME] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input_bam INPUT_BAM + Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). +``` diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml b/fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml new file mode 100644 index 00000000..96445c8c --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml @@ -0,0 +1,2 @@ +input_bam: /path/to/simplex_duplex_fgbio.bam +output_filename: simplex_filtered.bam \ No newline at end of file diff --git a/fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl similarity index 100% rename from fgbio_postprocessing_simplex_filter_0.1.6/fgbio_postprocessing_simplex_filter_0.1.6.cwl rename to fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl From 8d8dc49b92015001ee23e9f2c6a3f7b8f0d094e7 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:05:54 -0400 Subject: [PATCH 204/476] fix output when no output_file_name provided --- .../fgbio_postprocessing_simplex_filter_0.1.7.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl index b8118166..bd888dfb 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl @@ -36,7 +36,7 @@ outputs: if (inputs.output_file_name){ return inputs.output_file_name } else { - return inputs.input.basename.replace(/.bam/,'_simplex.bam') + return inputs.input_bam.basename.replace(/.bam/,'_simplex.bam') } } secondaryFiles: From e6ff6e638ff6ae1f01d592ab2e9bba4037c18d7b Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:09:35 -0400 Subject: [PATCH 205/476] add min_simplex_reads option --- .../fgbio_postprocessing_simplex_filter_0.1.7.cwl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl index bd888dfb..9931121d 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl @@ -27,6 +27,11 @@ inputs: inputBinding: prefix: --output_filename doc: Output file (bam or sam). + - id: min_simplex_reads + type: int? + inputBinding: + prefix: --min_simplex_reads + doc: Minimum number of simplex reads to pass filter for consensus reads outputs: - id: fgbio_postprocessing_simplex_bam type: File From e8c7c660ee835eb865b40cbcbb944faba4ad9a46 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:10:28 -0400 Subject: [PATCH 206/476] remove java memory arguments --- ...io_postprocessing_simplex_filter_0.1.7.cwl | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl index 9931121d..b81e44e9 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl @@ -9,14 +9,6 @@ id: fgbio_postprocessing_simplex_filter_0.1.7 baseCommand: - simplex_filter inputs: - - id: memory_per_job - type: int? - doc: Memory per job in megabytes - - id: memory_overhead - type: int? - doc: Memory overhead per job in megabytes - - id: number_of_threads - type: int? - id: input_bam type: File inputBinding: @@ -47,33 +39,6 @@ outputs: secondaryFiles: - ^.bai label: fgbio_postprocessing_simplex_filter_0.1.7 -arguments: - - position: 0 - valueFrom: |- - ${ - if(inputs.memory_per_job && inputs.memory_overhead) { - if(inputs.memory_per_job % 1000 == 0) { - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } - } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - if(inputs.memory_per_job % 1000 == 0) { - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } - } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx8G" - } - else { - return "-Xmx8G" - } - } requirements: - class: ResourceRequirement ramMin: 2000 From 5daba10b7832f1b05958edccd157b82ab8b0012a Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:18:17 -0400 Subject: [PATCH 207/476] update to 0.1.8 with output filename fix --- .../README.md | 6 +++--- .../example_inputs.yaml | 0 .../fgbio_postprocessing_simplex_filter_0.1.8.cwl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename {fgbio_postprocessing_simplex_filter_0.1.7 => fgbio_postprocessing_simplex_filter_0.1.8}/README.md (84%) rename {fgbio_postprocessing_simplex_filter_0.1.7 => fgbio_postprocessing_simplex_filter_0.1.8}/example_inputs.yaml (100%) rename fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl => fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl (97%) diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/README.md b/fgbio_postprocessing_simplex_filter_0.1.8/README.md similarity index 84% rename from fgbio_postprocessing_simplex_filter_0.1.7/README.md rename to fgbio_postprocessing_simplex_filter_0.1.8/README.md index 85e673e2..b368d854 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.7/README.md +++ b/fgbio_postprocessing_simplex_filter_0.1.8/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | | ----- | ------- | ------------------------------------ | -| fgbio_postprocessing | 0.1.7 | https://github.com/msk-access/fgbio_postprocessing | +| fgbio_postprocessing | 0.1.8 | https://github.com/msk-access/fgbio_postprocessing | ## CWL @@ -13,13 +13,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.7.cwl example_inputs.yaml + > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml ``` ## Usage ```bash -usage: fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl +usage: fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml b/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml similarity index 100% rename from fgbio_postprocessing_simplex_filter_0.1.7/example_inputs.yaml rename to fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml diff --git a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl similarity index 97% rename from fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl rename to fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index b81e44e9..2ab7d141 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.7/fgbio_postprocessing_simplex_filter_0.1.7.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -44,7 +44,7 @@ requirements: ramMin: 2000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/fgbio_postprocessing:0.1.0' + dockerPull: 'mskaccess/fgbio_postprocessing:0.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 486c92b511af5dbee7a90a4705ba307e47a1fd88 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:36:58 -0400 Subject: [PATCH 208/476] need secondaryFiles for input_bam file --- .../fgbio_postprocessing_simplex_filter_0.1.8.cwl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index 2ab7d141..e3e2e547 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: fgbio_postprocessing_simplex_filter_0.1.7 +id: fgbio_postprocessing_simplex_filter_0.1.8 baseCommand: - simplex_filter inputs: @@ -14,6 +14,8 @@ inputs: inputBinding: prefix: --input_bam doc: Input file (bam or sam). Required. + secondaryFiles: + - ^.bai - id: output_file_name type: string? inputBinding: @@ -38,7 +40,7 @@ outputs: } secondaryFiles: - ^.bai -label: fgbio_postprocessing_simplex_filter_0.1.7 +label: fgbio_postprocessing_simplex_filter_0.1.8 requirements: - class: ResourceRequirement ramMin: 2000 @@ -63,4 +65,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': fgbio_postprocessing - 'doap:revision': 0.1.7 + 'doap:revision': 0.1.8 From d458d7c1684f520eb1814d4e82ca302c36953ead Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 28 Aug 2020 17:41:57 -0400 Subject: [PATCH 209/476] only replace ".bam" at the end of the filename --- .../fgbio_postprocessing_simplex_filter_0.1.8.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index e3e2e547..ad1514f3 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -35,7 +35,7 @@ outputs: if (inputs.output_file_name){ return inputs.output_file_name } else { - return inputs.input_bam.basename.replace(/.bam/,'_simplex.bam') + return inputs.input_bam.basename.replace(/.bam$/,'_simplex.bam') } } secondaryFiles: From fb802fe54825080cbef5545cd1e00d1f8d51a48f Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 31 Aug 2020 13:53:03 -0400 Subject: [PATCH 210/476] add link in TOC and fix readme --- docs/SUMMARY.md | 1 + fgbio_postprocessing_simplex_filter_0.1.8/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 5bf5bede..2127b26f 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,6 +16,7 @@ - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) - [FilterConsensusReads v1.2.0](../fgbio_filter_consensus_reads_1.2.0/README.md) + - [simplex_filter v0.1.8](../fgbio_postprocessing_simplex_filter_0.1.8/README.md) - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/README.md b/fgbio_postprocessing_simplex_filter_0.1.8/README.md index b368d854..66daa3e9 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/README.md +++ b/fgbio_postprocessing_simplex_filter_0.1.8/README.md @@ -1,4 +1,4 @@ -# CWL for running Fgbio - GroupReadsByUmi +# CWL for running simplex_filter script from fgbio_postprocessing package ## Version of tools in docker image From 57c637ed1d45de4e3619df6eb0f859807593d77a Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Wed, 2 Sep 2020 13:19:24 -0400 Subject: [PATCH 211/476] Unpaired output paths + html/json outputs --- fastp_0.20.1/fastp_0.20.1.cwl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 0c39acd8..19e9a777 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -76,8 +76,11 @@ inputs: prefix: '--json' doc: | the json format report file name + default: "fastp.json" + - id: html_output_path type: string + default: "fastp.html" inputBinding: prefix: '--html' doc: | @@ -100,7 +103,14 @@ outputs: type: File? outputBinding: glob: $(inputs.read2_output_path) - + - id: unpaired1_output + type: File? + outputBinding: + glob: $(inputs.unpaired1_path) + - id: unpaired2_output + type: File? + outputBinding: + glob: $(inputs.unpaired2_path) doc: Setup and execute Fastp label: fastp_0.20.1 requirements: From ab841426d458cfc4587396e17ec437dd635b7411 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 5 Sep 2020 23:48:37 -0400 Subject: [PATCH 212/476] Adding descriptive names to all output ids --- bcftools_1.6/bcftools_concat_1.6.cwl | 2 +- .../bedtools_genomecov_v2.28.0_cv2.cwl | 2 +- .../bedtools_merge_v2.28.0_cv2.cwl | 2 +- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 2 +- fastp_0.20.1/fastp_0.20.1.cwl | 12 +++---- ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 2 +- ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 12 +++---- .../fgbio_fastq_to_bam_1.2.0.cwl | 2 +- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 +- .../fgbio_group_reads_by_umi_1.2.0.cwl | 4 +-- .../gatk_ApplyBQSR_4.1.2.0.cwl | 2 +- .../gatk_baserecalibrator_4.1.2.0.cwl | 2 +- .../gatk_apply_bqsr_4.1.0.0.cwl | 2 +- .../gatk_base_recalibrator_4.1.0.0.cwl | 2 +- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 2 +- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 6 ++-- ...tk_collect_insert_size_metrics_4.1.8.0.cwl | 4 +-- .../gatk_mark_duplicates_4.1.0.0.cwl | 4 +-- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 2 +- .../gatk_merge_sam_files_4.1.8.0.cwl | 2 +- .../gatk_sam_to_fastq_4.1.8.0.cwl | 6 ++-- manta_1.5.1/manta_1.5.1.cwl | 12 +++---- .../marianas_first_pass.cwl | 8 ++--- .../marianas_second_pass.cwl | 8 ++--- .../marianas_process_loop_umi.cwl | 8 ++--- .../marianas_separate_bams_1.8.1.cwl | 4 +-- merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 4 +-- msisensor_0.2/msisensor-0.2.cwl | 2 +- msisensor_0.6/msisensor-0.6.cwl | 2 +- mutect2_4.1.0.0/mutect2_4.1.0.0.cwl | 2 +- ...picard_add_or_replace_read_groups_1.96.cwl | 2 +- ...card_add_or_replace_read_groups_2.21.2.cwl | 2 +- ...llect_alignment_summary_metrics_2.21.2.cwl | 2 +- ...ollect_alignment_summary_metrics_2.8.1.cwl | 2 +- .../picard_collectmultiplemetrics_2.21.2.cwl | 34 +++++++++---------- .../picard_collectmultiplemetrics_2-8-1.cwl | 34 +++++++++---------- .../picard_fix_mate_information_1.96.cwl | 2 +- .../picard_fix_mate_information_2.21.2.cwl | 2 +- .../picard_fix_mate_information_2.9.0.cwl | 2 +- .../picard_fix_mate_information_4.1.8.1.cwl | 2 +- .../picard_hsmetrics_2.21.2.cwl | 2 +- .../picard_hsmetrics_2.8.1.cwl | 2 +- .../picard_mark_duplicates_1.96.cwl | 2 +- .../picard_mark_duplicates_2.21.2.cwl | 4 +-- .../picard_mark_duplicates_2.8.1.cwl | 2 +- .../picard_mark_duplicates_2.9.0.cwl | 2 +- .../picard_mark_duplicates_4.1.8.1.cwl | 2 +- samtools-merge_1.9/samtools-merge_1.9.cwl | 2 +- samtools_sort_1.3.1/samtools_sort_1.3.1.cwl | 2 +- samtools_view_1.3.1/samtools_view_1.3.1.cwl | 2 +- .../scatterintervals_4.1.0.0.cwl | 2 +- sequence_qc_0.1.16/sequence_qc_0.1.16.cwl | 12 +++---- vcf2maf_1.6.17/vcf2maf_1.6.17.cwl | 2 +- .../waltz_count_reads_3.1.1.cwl | 6 ++-- .../waltz_pileupmatrices_3.1.1.cwl | 8 ++--- 55 files changed, 131 insertions(+), 131 deletions(-) diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl index d2e1b680..c6d665f3 100644 --- a/bcftools_1.6/bcftools_concat_1.6.cwl +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -141,7 +141,7 @@ inputs: position: 1 outputs: - concat_vcf_output_file: + bcftools_concat_vcf_output_file: type: File outputBinding: glob: |- diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl index 43f392dd..3001b154 100644 --- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl +++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl @@ -39,7 +39,7 @@ inputs: option flag parameter to choose output file format. -bg refers to bedgraph format outputs: - - id: output_file + - id: bedtools_genomecove_bedgraph type: File outputBinding: glob: |- diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index 21e7cc7b..806f21b2 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -33,7 +33,7 @@ inputs: shellQuote: false doc: Maximum distance between features allowed for features to be merged. outputs: - - id: output_file + - id: bedtools_merge_bed type: File? outputBinding: glob: |- diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 261302db..a075ff76 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -239,7 +239,7 @@ inputs: prefix: '-R' doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' outputs: - - id: output_sam + - id: bwa_mem_output_sam type: File outputBinding: glob: |- diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 19e9a777..265185e2 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -87,27 +87,27 @@ inputs: the html format report file name outputs: - - id: json_output + - id: fastp_json_output type: File outputBinding: glob: $(inputs.json_output_path) - - id: html_output + - id: fastp_html_output type: File outputBinding: glob: $(inputs.html_output_path) - - id: read1_output + - id: fastp_read1_output type: File outputBinding: glob: $(inputs.read1_output_path) - - id: read2_output + - id: fastp_read2_output type: File? outputBinding: glob: $(inputs.read2_output_path) - - id: unpaired1_output + - id: fastp_unpaired1_output type: File? outputBinding: glob: $(inputs.unpaired1_path) - - id: unpaired2_output + - id: fastp_unpaired2_output type: File? outputBinding: glob: $(inputs.unpaired2_path) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index b78e0db4..90cc1fc5 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -90,7 +90,7 @@ inputs: consensus. If more than this many reads are present in a tag family, the family is randomly downsampled to exactly max-reads reads. outputs: - - id: call_duplex_consensus_reads_bam + - id: fgbio_call_duplex_consensus_reads_bam type: File outputBinding: glob: |- diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 45f3dfbc..d69075fe 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -74,42 +74,42 @@ inputs: prefix: '--mi-tag' doc: The output tag for UMI grouping. [Optional]. outputs: - - id: family_size + - id: fgbio_collect_duplex_seq_metrics_family_size type: File outputBinding: glob: |- ${ return inputs.output_prefix + '.family_sizes.txt' } - - id: duplex_family_size + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size type: File outputBinding: glob: |- ${ return inputs.output_prefix + '.duplex_family_sizes.txt' } - - id: duplex_yield_metrics + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics type: File outputBinding: glob: |- ${ return inputs.output_prefix + '.duplex_yield_metrics.txt' } - - id: umi_counts + - id: fgbio_collect_duplex_seq_metrics_umi_counts type: File outputBinding: glob: |- ${ return inputs.output_prefix + '.umi_counts.txt' } - - id: duplex_qc + - id: fgbio_collect_duplex_seq_metrics_duplex_qc type: File outputBinding: glob: |- ${ return inputs.output_prefix + '.duplex_qc.pdf' } - - id: duplex_umi_counts + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts type: File outputBinding: glob: |- diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index cd2c15dd..eac42dd3 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -131,7 +131,7 @@ inputs: shellQuote: false doc: 'Date the run was produced, to insert into the read group header' outputs: - - id: fastq_to_bam_ubam + - id: fgbio_fastq_to_bam_ubam type: File outputBinding: glob: |- diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 08dd316f..53671a82 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -94,7 +94,7 @@ inputs: Mask (make N) consensus bases where the AB and BA consensus reads disagree (for duplex-sequencing only). outputs: - - id: filter_consensus_reads_bam + - id: fgbio_filter_consensus_reads_bam type: File outputBinding: glob: |- diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 6d58b7c5..a51475e9 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -78,7 +78,7 @@ inputs: length, otherwise discard reads with UMIs shorter than this length and allow for differing UMI lengths. outputs: - - id: group_reads_by_umi_bam + - id: fgbio_group_reads_by_umi_bam type: File outputBinding: glob: |- @@ -87,7 +87,7 @@ outputs: return inputs.output_file_name; return inputs.input.basename.replace(/.bam/,'_group.bam'); } - - id: group_reads_by_umi_histogram + - id: fgbio_group_reads_by_umi_histogram type: File outputBinding: glob: |- diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 010d648d..14f9ac57 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -230,7 +230,7 @@ inputs: - id: number_of_threads type: int? outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: |- diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index cf12e785..ba59e2d0 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -266,7 +266,7 @@ inputs: secondaryFiles: - .idx outputs: - - id: output + - id: gatk_base_recalibrator_output type: File outputBinding: glob: |- diff --git a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl index 1753956b..f288b782 100644 --- a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl +++ b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl @@ -204,7 +204,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.bam' diff --git a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl index d923ebe4..fac760ef 100644 --- a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl +++ b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl @@ -234,7 +234,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_base_recalibrator_output type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.table' diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index e523dca9..c680e691 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -136,7 +136,7 @@ inputs: Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} outputs: - - id: alignment_summary_metrics + - id: gatk_collect_alignment_summary_metrics_txt type: File outputBinding: glob: |- diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index 9ea7e049..323180fb 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -184,7 +184,7 @@ inputs: - id: number_of_threads type: int? outputs: - - id: hs_metrics + - id: gatk_collect_hs_metrics_txt type: File outputBinding: glob: |- @@ -195,7 +195,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') } } - - id: per_base_coverage_out + - id: gatk_collect_hs_metrics_per_base_coverage_txt type: File outputBinding: glob: |- @@ -206,7 +206,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') } } - - id: per_target_coverage_out + - id: gatk_collect_hs_metrics_per_target_coverage_txt type: File outputBinding: glob: |- diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 2e177dd7..830f7bd3 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -126,7 +126,7 @@ inputs: Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} outputs: - - id: insert_size_metrics + - id: gatk_collect_insert_size_metrics_txt type: File outputBinding: glob: |- @@ -137,7 +137,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') } } - - id: histogram_file_out + - id: gatk_collect_insert_size_metrics_histogram_pdf type: File outputBinding: glob: |- diff --git a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl index 636dc04a..f474b88d 100644 --- a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl +++ b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl @@ -137,14 +137,14 @@ inputs: position: 0 prefix: '--TAGGING_POLICY' outputs: - - id: output_md_bam + - id: gatk_mark_duplicates_bam doc: Output marked duplicate bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''md.bam'', ''bam''))' secondaryFiles: - ^.bai - - id: output_md_metrics + - id: gatk_mark_duplicates_metrics doc: Output marked duplicate metrics type: File outputBinding: diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index d24dfcbd..024bc4e6 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -335,7 +335,7 @@ inputs: Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} outputs: - - id: gatk_merge_bam_alignment_output + - id: gatk_merge_bam_alignment_bam type: File outputBinding: glob: |- diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index 6e7059f7..22316995 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -125,7 +125,7 @@ inputs: Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} outputs: - - id: gatk_merge_sam_files_output + - id: gatk_merge_sam_files_bam type: File outputBinding: glob: |- diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index 8eb50df9..4af9fe5c 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -238,7 +238,7 @@ inputs: (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, SILENT} outputs: - - id: gatk_sam_to_fastq_fastq_output + - id: gatk_sam_to_fastq_fastq type: File? outputBinding: glob: |- @@ -249,7 +249,7 @@ outputs: return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') } } - - id: gatk_sam_to_fastq_unpaired_fastq_output + - id: gatk_sam_to_fastq_unpaired_fastq type: File? outputBinding: glob: |- @@ -260,7 +260,7 @@ outputs: return inputs.input.basename.replace(/.bam|.sam/, '-unpaired.fastq') } } - - id: gatk_sam_to_fastq_second_end_fastq_output + - id: gatk_sam_to_fastq_second_end_fastq type: File? outputBinding: glob: |- diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl index 52cbc3ae..f5e00c65 100644 --- a/manta_1.5.1/manta_1.5.1.cwl +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -93,37 +93,37 @@ inputs: separate: false doc: Generate a bam of supporting reads for all SVs outputs: - - id: all_candidates + - id: manta_all_candidates_vcf_gz type: File outputBinding: glob: results/variants/candidateSV.vcf.gz secondaryFiles: - .tbi - - id: diploid_variants + - id: manta_diploid_variants_vcf_gz type: File? outputBinding: glob: results/variants/diploidSV.vcf.gz secondaryFiles: - .tbi - - id: small_candidates + - id: manta_small_candidates_vcf_gz type: File outputBinding: glob: results/variants/candidateSmallIndels.vcf.gz secondaryFiles: - .tbi - - id: somatic_variants + - id: manta_somatic_variants_vcf_gz type: File? outputBinding: glob: results/variants/somaticSV.vcf.gz secondaryFiles: - .tbi - - id: tumor_only_variants + - id: manta_tumor_only_variants_vcf_gz type: File? outputBinding: glob: results/variants/tumorSV.vcf.gz secondaryFiles: - .tbi - - id: evidence_bams + - id: manta_evidence_bams type: 'File[]?' outputBinding: glob: |- diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index f83bf1d7..81c90c62 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -45,19 +45,19 @@ inputs: - .fai - ^.dict outputs: - - id: first_pass_output_file + - id: marianas_first_pass_output_file type: File outputBinding: glob: first-pass.txt - - id: alt_allele_file + - id: marianas_first_pass_alt_allele_file type: File outputBinding: glob: first-pass-alt-alleles.txt - - id: first_pass_insertions + - id: marianas_first_pass_insertions type: File outputBinding: glob: first-pass-insertions.txt - - id: first_pass_output_dir + - id: marianas_first_pass_output_dir type: Directory outputBinding: glob: . diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 491a34d4..ee094cd0 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -49,19 +49,19 @@ inputs: inputBinding: position: 9 outputs: - - id: collapsed_fastq_1 + - id: marianas_second_pass_collapsed_fastq_1 type: File outputBinding: glob: collapsed_R1_.fastq - - id: collapsed_fastq_2 + - id: marianas_second_pass_collapsed_fastq_2 type: File outputBinding: glob: collapsed_R2_.fastq - - id: second_pass_alt_alleles + - id: marianas_second_pass_alt_alleles type: File outputBinding: glob: second-pass-alt-alleles.txt - - id: second_pass_insertions + - id: marianas_second_pass_insertions type: File outputBinding: glob: second-pass-insertions.txt diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index f0cf85f0..3e3f1e12 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -31,19 +31,19 @@ inputs: inputBinding: position: 0 outputs: - - id: processed_fastq_1 + - id: marianas_process_loop_umi_processed_fastq_1 type: File outputBinding: glob: '$(inputs.fastq1.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: processed_fastq_2 + - id: marianas_process_loop_umi_processed_fastq_2 type: File outputBinding: glob: '$(inputs.fastq2.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: clipping_info + - id: marianas_process_loop_umi_clipping_info type: File outputBinding: glob: info.txt - - id: composite_umi_frequencies + - id: marianas_process_loop_umi_composite_umi_frequencies type: File outputBinding: glob: composite-umi-frequencies.txt diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index cb140601..cc035c5b 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -14,13 +14,13 @@ inputs: inputBinding: position: 1 outputs: - - id: duplex-bam + - id: marianas_separate_bams_duplex_bam type: File outputBinding: glob: '*duplex.bam' secondaryFiles: - ^.bai - - id: simplex-bam + - id: marianas_separate_bams_simplex_bam type: File outputBinding: glob: '*simplex.bam' diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index 056d4696..0d94d7f8 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -64,7 +64,7 @@ inputs: Name of the merged output READ2 fastq file(default: merged_fastq_R2.fastq.gz) outputs: - - id: mergedfastq1 + - id: merge_fastq_1 type: File outputBinding: glob: |- @@ -75,7 +75,7 @@ outputs: return 'merged_fastq_R1.fastq.gz' } } - - id: mergedfastq2 + - id: merge_fastq_2 type: File outputBinding: glob: |- diff --git a/msisensor_0.2/msisensor-0.2.cwl b/msisensor_0.2/msisensor-0.2.cwl index 14757a55..d7edc89e 100644 --- a/msisensor_0.2/msisensor-0.2.cwl +++ b/msisensor_0.2/msisensor-0.2.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/msisensor_0.6/msisensor-0.6.cwl b/msisensor_0.6/msisensor-0.6.cwl index d0436482..e2f26ac6 100644 --- a/msisensor_0.6/msisensor-0.6.cwl +++ b/msisensor_0.6/msisensor-0.6.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl index a1838139..abcd85ac 100644 --- a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl +++ b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl @@ -42,7 +42,7 @@ inputs: position: 4 prefix: '--normal-sample' outputs: - - id: output + - id: mutect2_vcf_gz type: File? outputBinding: glob: >- diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 19d39955..576e07ae 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -131,7 +131,7 @@ inputs: type: string? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_add_or_replace_read_groups_bam type: File outputBinding: glob: |- diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl index ed057707..715b53bc 100644 --- a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -128,7 +128,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} outputs: - - id: bam + - id: picard_add_or_replace_read_groups_bam type: File outputBinding: glob: |- diff --git a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl index 811adb2f..24a6302c 100644 --- a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl +++ b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl @@ -96,7 +96,7 @@ inputs: Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. outputs: - - id: alignment_metrics + - id: picard_collect_alignment_summary_metrics_txt type: File outputBinding: glob: |- diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index f68401ea..0c98a858 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -96,7 +96,7 @@ inputs: Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. outputs: - - id: alignment_metrics + - id: picard_collect_alignment_summary_metrics_txt type: File outputBinding: glob: |- diff --git a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl index 8699d438..0d841d74 100644 --- a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl +++ b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl @@ -149,71 +149,71 @@ inputs: Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} outputs: - - id: alignment_summary_metrics + - id: picard_collectmultiplemetrics_alignment_summary_metrics type: File? outputBinding: glob: '*alignment_summary_metrics' - - id: bait_bias_detail_metrics + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics type: File? outputBinding: glob: '*bait_bias_detail_metrics' - - id: bait_bias_summary_metrics + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics type: File? outputBinding: glob: '*bait_bias_summary_metrics' - - id: base_distribution_by_cycle_metrics + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics type: File? outputBinding: glob: '*base_distribution_by_cycle_metrics' - - id: base_distribution_by_cycle_pdf + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf type: File? outputBinding: glob: '*base_distribution_by_cycle.pdf' - - id: error_summary_metrics + - id: picard_collectmultiplemetrics_error_summary_metrics type: File? outputBinding: glob: '*error_summary_metrics' - - id: gc_bias_detail_metrics + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics type: File? outputBinding: glob: '*gc_bias.detail_metrics' - - id: gc_bias_pdf + - id: picard_collectmultiplemetrics_gc_bias_pdf type: File? outputBinding: glob: '*gc_bias.pdf' - - id: gc_bias_summary_metrics + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics type: File? outputBinding: glob: '*gc_bias.summary_metrics' - - id: insert_size_histogram_pdf + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf type: File? outputBinding: glob: '*insert_size_histogram.pdf' - - id: insert_size_metrics + - id: picard_collectmultiplemetrics_insert_size_metrics type: File? outputBinding: glob: '*insert_size_metrics' - - id: pre_adapter_detail_metrics + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics type: File? outputBinding: glob: '*pre_adapter_detail_metrics' - - id: pre_adapter_summary_metrics + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics type: File? outputBinding: glob: '*pre_adapter_summary_metrics' - - id: quality_by_cycle_metrics + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics type: File? outputBinding: glob: '*quality_by_cycle_metrics' - - id: quality_by_cycle_pdf + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf type: File? outputBinding: glob: '*quality_by_cycle.pdf' - - id: quality_distribution_metrics + - id: picard_collectmultiplemetrics_quality_distribution_metrics type: File? outputBinding: glob: '*quality_distribution_metrics' - - id: quality_distribution_pdf + - id: picard_collectmultiplemetrics_quality_distribution_pdf type: File? outputBinding: glob: '*quality_distribution.pdf' diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl index e65ed07f..fa277b31 100644 --- a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -149,71 +149,71 @@ inputs: Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} outputs: - - id: alignment_summary_metrics + - id: picard_collectmultiplemetrics_alignment_summary_metrics type: File? outputBinding: glob: '*alignment_summary_metrics' - - id: bait_bias_detail_metrics + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics type: File? outputBinding: glob: '*bait_bias_detail_metrics' - - id: bait_bias_summary_metrics + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics type: File? outputBinding: glob: '*bait_bias_summary_metrics' - - id: base_distribution_by_cycle_metrics + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics type: File? outputBinding: glob: '*base_distribution_by_cycle_metrics' - - id: base_distribution_by_cycle_pdf + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf type: File? outputBinding: glob: '*base_distribution_by_cycle.pdf' - - id: error_summary_metrics + - id: picard_collectmultiplemetrics_error_summary_metrics type: File? outputBinding: glob: '*error_summary_metrics' - - id: gc_bias_detail_metrics + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics type: File? outputBinding: glob: '*gc_bias.detail_metrics' - - id: gc_bias_pdf + - id: picard_collectmultiplemetrics_gc_bias_pdf type: File? outputBinding: glob: '*gc_bias.pdf' - - id: gc_bias_summary_metrics + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics type: File? outputBinding: glob: '*gc_bias.summary_metrics' - - id: insert_size_histogram_pdf + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf type: File? outputBinding: glob: '*insert_size_histogram.pdf' - - id: insert_size_metrics + - id: picard_collectmultiplemetrics_insert_size_metrics type: File? outputBinding: glob: '*insert_size_metrics' - - id: pre_adapter_detail_metrics + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics type: File? outputBinding: glob: '*pre_adapter_detail_metrics' - - id: pre_adapter_summary_metrics + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics type: File? outputBinding: glob: '*pre_adapter_summary_metrics' - - id: quality_by_cycle_metrics + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics type: File? outputBinding: glob: '*quality_by_cycle_metrics' - - id: quality_by_cycle_pdf + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf type: File? outputBinding: glob: '*quality_by_cycle.pdf' - - id: quality_distribution_metrics + - id: picard_collectmultiplemetrics_quality_distribution_metrics type: File? outputBinding: glob: '*quality_distribution_metrics' - - id: quality_distribution_pdf + - id: picard_collectmultiplemetrics_quality_distribution_pdf type: File? outputBinding: glob: '*quality_distribution.pdf' diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index 90f1bc49..d1b9c299 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -76,7 +76,7 @@ inputs: type: string? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl index 5d6dde04..731652ae 100644 --- a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -72,7 +72,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl index b0123ad1..f0f5eb7a 100644 --- a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -76,7 +76,7 @@ inputs: type: string? doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 29c3bcf9..8a1436d8 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -84,7 +84,7 @@ inputs: Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl index f7b43ecc..10503349 100644 --- a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -151,7 +151,7 @@ inputs: 10000. Default value: 10000. This option can be set to 'null' to clear the default value. outputs: - - id: hs_metrics_file + - id: picard_hsmetrics_txt type: File? outputBinding: glob: |- diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl index 4e23cc79..e24d9fbf 100644 --- a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -102,7 +102,7 @@ inputs: prefix: SAMPLE_SIZE= separate: false outputs: - - id: hs_metrics_file + - id: picard_hsmetrics_txt type: File? outputBinding: glob: |- diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index e663bbc9..186f7a0b 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -97,7 +97,7 @@ inputs: position: 0 prefix: AS=true outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(/.bam/, ''_md.bam''))' diff --git a/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl index 792b6b82..e5869717 100644 --- a/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl +++ b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl @@ -110,7 +110,7 @@ inputs: experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- @@ -123,7 +123,7 @@ outputs: } secondaryFiles: - ^.bai - - id: duplication_stats + - id: picard_mark_duplicates_metrics type: File outputBinding: glob: |- diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index 9cda15b7..402a37fa 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -115,7 +115,7 @@ inputs: experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl index 2ae972b2..7c032bce 100644 --- a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl @@ -121,7 +121,7 @@ inputs: prefix: READ_NAME_REGEX= separate: false outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index dd7cffef..d5e3290f 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -162,7 +162,7 @@ outputs: } secondaryFiles: - ^.bai - - id: picard_mark_duplicates_stats + - id: picard_mark_duplicates_metrics type: File outputBinding: glob: |- diff --git a/samtools-merge_1.9/samtools-merge_1.9.cwl b/samtools-merge_1.9/samtools-merge_1.9.cwl index 5746b362..b7b5fbbc 100644 --- a/samtools-merge_1.9/samtools-merge_1.9.cwl +++ b/samtools-merge_1.9/samtools-merge_1.9.cwl @@ -11,7 +11,7 @@ inputs: position: 2 doc: Input array containing files to be merged outputs: - - id: output_file + - id: samtools_merge_bam type: File outputBinding: glob: '*merged.bam' diff --git a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl index 975bf18b..d3ce72f1 100644 --- a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl +++ b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl @@ -42,7 +42,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_file + - id: samtools_sort_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''bam'', ''sorted.bam''))' diff --git a/samtools_view_1.3.1/samtools_view_1.3.1.cwl b/samtools_view_1.3.1/samtools_view_1.3.1.cwl index 87616cff..6c738f64 100644 --- a/samtools_view_1.3.1/samtools_view_1.3.1.cwl +++ b/samtools_view_1.3.1/samtools_view_1.3.1.cwl @@ -197,7 +197,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_bam + - id: samtools_view_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''sam'', ''bam''))' diff --git a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl index e334ca81..9d75bfe9 100644 --- a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl +++ b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl @@ -26,7 +26,7 @@ inputs: position: 0 prefix: '--output' outputs: - - id: interval_files + - id: gatk_scatter_intervals_interval_files type: 'File[]' outputBinding: glob: $(inputs.output)/*.interval_list diff --git a/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl b/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl index 59785216..33ae4694 100644 --- a/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl +++ b/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl @@ -70,42 +70,42 @@ inputs: doc: >- Exclude bases with a lower base quality outputs: - - id: pileup + - id: sequence_qc_pileup type: File outputBinding: glob: |- ${ return inputs.sample_id + 'pileup.tsv' } - - id: noise_positions + - id: sequence_qc_noise_positions type: File outputBinding: glob: |- ${ return inputs.sample_id + 'noise_positions.tsv' } - - id: noise_acgt + - id: sequence_qc_noise_acgt type: File outputBinding: glob: |- ${ return inputs.sample_id + 'noise_acgt.tsv' } - - id: noise_n + - id: sequence_qc_noise_n type: File outputBinding: glob: |- ${ return inputs.sample_id + 'noise_n.tsv' } - - id: noise_del + - id: sequence_qc_noise_del type: File outputBinding: glob: |- ${ return inputs.sample_id + 'noise_del.tsv' } - - id: figures + - id: sequence_qc_figures type: File outputBinding: glob: |- diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl index 5aa97470..eca0fb95 100644 --- a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -174,7 +174,7 @@ inputs: inputBinding: prefix: --retain-fmt outputs: - output: + vcf2maf_maf: type: File outputBinding: glob: | diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index 58b2ee3a..1a766160 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -34,15 +34,15 @@ inputs: inputBinding: position: 4 outputs: - - id: covered_regions + - id: waltz_count_reads_covered_regions type: File outputBinding: glob: '*.covered-regions' - - id: fragment_sizes + - id: waltz_count_reads_fragment_sizes type: File outputBinding: glob: '*.fragment-sizes' - - id: read_counts + - id: waltz_count_reads_read_counts type: File outputBinding: glob: '*.read-counts' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 85829060..27ae5b86 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -38,19 +38,19 @@ inputs: inputBinding: position: 13 outputs: - - id: pileup + - id: waltz_pileupmetrics_pileup type: File outputBinding: glob: '*-pileup.txt' - - id: pileup_without_duplicates + - id: waltz_pileupmetrics_pileup_without_duplicates type: File outputBinding: glob: '*-pileup-without-duplicates.txt' - - id: intervals + - id: waltz_pileupmetrics_intervals type: File outputBinding: glob: '*-intervals.txt' - - id: intervals_without_duplicates + - id: waltz_pileupmetrics_intervals_without_duplicates type: File outputBinding: glob: '*-intervals-without-duplicates.txt' From 532b40a73a180ff2ea801a58c6a8842a755fe713 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 5 Sep 2020 23:58:30 -0400 Subject: [PATCH 213/476] Adding JDK INFLATER/DEFLATER options --- ...k_collect_alignment_summary_metrics_4.1.8.0.cwl | 14 ++++++++++++++ .../gatk_collect_insert_size_metrics_4.1.8.0.cwl | 14 ++++++++++++++ .../gatk_merge_bam_alignment_4.1.8.0.cwl | 14 ++++++++++++++ .../gatk_merge_sam_files_4.1.8.0.cwl | 14 ++++++++++++++ .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 6 ++---- .../picard_fix_mate_information_4.1.8.1.cwl | 6 ++---- .../picard_mark_duplicates_4.1.8.1.cwl | 4 +--- 7 files changed, 61 insertions(+), 11 deletions(-) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index c680e691..1c08d679 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -135,6 +135,20 @@ inputs: doc: >- Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input outputs: - id: gatk_collect_alignment_summary_metrics_txt type: File diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 830f7bd3..8b6ca7a5 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -125,6 +125,20 @@ inputs: doc: >- Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input outputs: - id: gatk_collect_insert_size_metrics_txt type: File diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 024bc4e6..0ca74322 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -334,6 +334,20 @@ inputs: doc: >- Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input outputs: - id: gatk_merge_bam_alignment_bam type: File diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index 22316995..d0551a31 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -124,6 +124,20 @@ inputs: doc: | Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input outputs: - id: gatk_merge_sam_files_bam type: File diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index ec2469ed..8bf0e14e 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -111,16 +111,14 @@ inputs: position: 0 prefix: --USE_JDK_DEFLATER doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 prefix: --USE_JDK_INFLATER doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. - Default value:false. This option can be set to 'null' to clear the default - value. Possible values:{true, false} + Use the JDK Inflater instead of the Intel Inflater for reading compressed input - default: true id: create_bam_index type: boolean? diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 8a1436d8..849c764e 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -63,16 +63,14 @@ inputs: position: 0 prefix: --USE_JDK_DEFLATER doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 prefix: --USE_JDK_INFLATER doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. - Default value:false. This option can be set to 'null' to clear the default - value. Possible values:{true, false} + Use the JDK Inflater instead of the Intel Inflater for reading compressed input - default: true id: create_bam_index type: boolean? diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index d5e3290f..7e7078a8 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -123,9 +123,7 @@ inputs: position: 0 prefix: --USE_JDK_INFLATER doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. - Default value:false. This option can be set to 'null' to clear the default - value. Possible values:{true, false} + Use the JDK Inflater instead of the Intel Inflater for reading compressed input - id: duplicate_scoring_strategy type: string? inputBinding: From 6a38534bfd0638f531c6e42dd71442a9e5c22026 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 9 Sep 2020 11:56:56 -0400 Subject: [PATCH 214/476] Update fgbio_filter_consensus_reads_1.2.0.cwl Add secondary file as output --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 53671a82..47fce2ac 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -103,6 +103,8 @@ outputs: return inputs.output_file_name; return inputs.input.basename.replace(/.bam/,'_filtered.bam'); } + secondaryFiles: + - ^.bai doc: >- Filters consensus reads generated by CallMolecularConsensusReads or CallDuplexConsensusReads. Two kinds of filtering are performed: From ef6090fa1436caaf1e51d617ed3166e2afb23a37 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 9 Sep 2020 14:31:27 -0400 Subject: [PATCH 215/476] Update gatk_collect_alignment_summary_metrics_4.1.8.0.cwl :heavy_plus_sign: Update ID to reflect correct version --- .../gatk_collect_alignment_summary_metrics_4.1.8.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index 1c08d679..b53a46af 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: gatk_collect_alignment_summary_metrics_4_1_3_0 +id: gatk_collect_alignment_summary_metrics_4.1.8.0 label: GATK-CollectAlignmentSummaryMetrics baseCommand: - gatk From 9e914febcf55e43e4f630bbb932cc63f4d564973 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Sep 2020 15:53:27 -0400 Subject: [PATCH 216/476] Adding GATK Base Recalibrator and GATK BQSR 4.1.8.1 Resolves #75 --- docs/SUMMARY.md | 2 + gatk_apply_bqsr_4.1.8.1/README.md | 128 +++++++ gatk_apply_bqsr_4.1.8.1/example_inputs.yml | 48 +++ .../gatk_apply_bqsr_4.1.8.1.cwl | 316 ++++++++++++++++ gatk_base_recalibrator_4.1.8.1/README.md | 142 +++++++ .../example_inputs.yml | 58 +++ .../gatk_base_recalibrator_4.1.8.1.cwl | 349 ++++++++++++++++++ 7 files changed, 1043 insertions(+) create mode 100644 gatk_apply_bqsr_4.1.8.1/README.md create mode 100644 gatk_apply_bqsr_4.1.8.1/example_inputs.yml create mode 100644 gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl create mode 100644 gatk_base_recalibrator_4.1.8.1/README.md create mode 100644 gatk_base_recalibrator_4.1.8.1/example_inputs.yml create mode 100644 gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2127b26f..18f31ee0 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -19,7 +19,9 @@ - [simplex_filter v0.1.8](../fgbio_postprocessing_simplex_filter_0.1.8/README.md) - GATK - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + - [ApplyBQSR v4.1.8.1](../gatk_apply_bqsr_4.1.8.1/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - [BaseRecalibrator v4.1.8.1](../gatk_base_recalibrator_4.1.8.1/README.md) - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) diff --git a/gatk_apply_bqsr_4.1.8.1/README.md b/gatk_apply_bqsr_4.1.8.1/README.md new file mode 100644 index 00000000..bffb6a78 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/README.md @@ -0,0 +1,128 @@ +# CWL and Dockerfile for running GATK4 - Apply BQSR + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.8.1 | https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1 | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl --help + +usage: gatk_apply_bqsr_4.1.8.1.cwl [-h] --reference REFERENCE + [--create_output_bam_index] + --bqsr_recal_file BQSR_RECAL_FILE --input + INPUT [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--emit_original_quals] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--global_qscore_prior GLOBAL_QSCORE_PRIOR] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantize_quals QUANTIZE_QUALS] [--quiet] + [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--read_validation_stringency READ_VALIDATION_STRINGENCY] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_jdk_deflater] [--use_jdk_inflater] + [--use_original_qualities] + [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Reference sequence + --create_output_bam_index + --bqsr_recal_file BQSR_RECAL_FILE + Input recalibration table for BQSR. Only run ApplyBQSR + with the covariates table created from the input BAM + --input INPUT A BAM file containing input read data + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --emit_original_quals + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --global_qscore_prior GLOBAL_QSCORE_PRIOR + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantize_quals QUANTIZE_QUALS + --quiet + --read_filter READ_FILTER + --read_index READ_INDEX + --read_validation_stringency READ_VALIDATION_STRINGENCY + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_jdk_deflater + --use_jdk_inflater + --use_original_qualities + --memory_overhead MEMORY_OVERHEAD + --memory_per_job MEMORY_PER_JOB + --number_of_threads NUMBER_OF_THREADS diff --git a/gatk_apply_bqsr_4.1.8.1/example_inputs.yml b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..094a1bc1 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml @@ -0,0 +1,48 @@ +reference: + class: File + path: chr14_chr16.fasta +input: + class: File + path: SeraCare_0-5_14.bam +bqsr_recal_file: + class: File + path: SeraCare_0-5_14.recal.table +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +cloud_index_prefetch_buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +emit_original_quals: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +global_qscore_prior: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +memory_overhead: +memory_per_job: +number_of_threads: +preserve_qscores_less_than: +quantize_quals: +quiet: +read_filter: +read_index: +read_validation_stringency: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_jdk_deflater: +use_jdk_inflater: +use_original_qualities: diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl new file mode 100644 index 00000000..fd7ad8b4 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -0,0 +1,316 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_apply_bqsr_4_1_8_1 +baseCommand: + - gatk +inputs: + - id: reference + type: File + inputBinding: + position: 4 + prefix: '--reference' + doc: Reference sequence + secondaryFiles: + - .fai + - ^.dict + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-index' + - id: bqsr_recal_file + type: File + inputBinding: + position: 4 + prefix: '--bqsr-recal-file' + doc: >- + Input recalibration table for BQSR. Only run ApplyBQSR with the covariates + table created from the input BAM + - id: input + type: File + inputBinding: + position: 4 + prefix: '--input' + doc: A BAM file containing input read data + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: File? + inputBinding: + position: 6 + prefix: '--arguments_file' + - id: cloud_index_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-md5' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: string? + inputBinding: + position: 6 + prefix: '--disable-read-filter' + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-sequence-dictionary-validation' + - default: true + id: emit_original_quals + type: boolean? + inputBinding: + position: 6 + prefix: '--emit-original-quals' + - id: exclude_intervals + type: string? + inputBinding: + position: 6 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 6 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 6 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 6 + prefix: '--gcs-project-for-requester-pays' + - id: global_qscore_prior + type: float? + inputBinding: + position: 6 + prefix: '--global-qscore-prior' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-merging-rule' + - id: interval_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 6 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 6 + prefix: '--lenient' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 6 + prefix: '--preserve-qscores-less-than' + - id: quantize_quals + type: int? + inputBinding: + position: 6 + prefix: '--quantize-quals' + - id: quiet + type: boolean? + inputBinding: + position: 6 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 6 + - id: read_index + type: string? + inputBinding: + position: 6 + prefix: '--read-index' + - id: read_validation_stringency + type: string? + inputBinding: + position: 6 + prefix: '--read-validation-stringency' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 6 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 6 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 6 + prefix: '--sites-only-vcf-output' + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-deflater' + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-inflater' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 6 + prefix: '--use-original-qualities' + - id: memory_overhead + type: int? + - id: memory_per_job + type: int? + - id: number_of_threads + type: int? +outputs: + - id: gatk_apply_bqsr_bam + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } + secondaryFiles: + - ^.bai +label: gatk_apply_bqsr_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx4G" + } else { + return "-Xmx4G" + } + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: $(runtime.tmpdir) + - position: 1 + prefix: '' + separate: false + valueFrom: ApplyBQSR +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.1 diff --git a/gatk_base_recalibrator_4.1.8.1/README.md b/gatk_base_recalibrator_4.1.8.1/README.md new file mode 100644 index 00000000..db6542cf --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/README.md @@ -0,0 +1,142 @@ +# CWL and Dockerfile for running GATK4 - Base Recalibrator + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +|--- |--- |--- | +| GATK | 4.1.8.1 | https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1 | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) +## CWL + +- CWL specification 1.0 +- Use example_inputs.yml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```bash +> toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl --help + +usage: gatk_base_recalibrator_4.1.8.1.cwl [-h] --input INPUT --known_sites_1 + KNOWN_SITES_1 --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_output_sam_program_record] + [--add_output_vcf_command_line] + [--arguments_file ARGUMENTS_FILE] + [--binary_tag_name BINARY_TAG_NAME] + [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--create_output_variant_index] + [--create_output_variant_md5] + [--default_base_qualities DEFAULT_BASE_QUALITIES] + [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--indels_context_size INDELS_CONTEXT_SIZE] + [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] + [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] + [--interval_merging_rule INTERVAL_MERGING_RULE] + [--interval_padding INTERVAL_PADDING] + [--interval_set_rule INTERVAL_SET_RULE] + [--intervals INTERVALS] [--lenient] + [--low_quality_tail LOW_QUALITY_TAIL] + [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] + [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] + [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] + [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] + [--quantizing_levels QUANTIZING_LEVELS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--sequence_dictionary SEQUENCE_DICTIONARY] + [--sites_only_vcf_output] + [--use_original_qualities] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--known_sites_2 KNOWN_SITES_2] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --known_sites_1 KNOWN_SITES_1 + One or more databases of known polymorphic sites used + to exclude regions around known polymorphisms from + analysis + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --add_output_sam_program_record + --add_output_vcf_command_line + --arguments_file ARGUMENTS_FILE + --binary_tag_name BINARY_TAG_NAME + --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --create_output_variant_index + --create_output_variant_md5 + --default_base_qualities DEFAULT_BASE_QUALITIES + --deletions_default_quality DELETIONS_DEFAULT_QUALITY + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --indels_context_size INDELS_CONTEXT_SIZE + --insertions_default_quality INSERTIONS_DEFAULT_QUALITY + --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING + --interval_merging_rule INTERVAL_MERGING_RULE + --interval_padding INTERVAL_PADDING + --interval_set_rule INTERVAL_SET_RULE + --intervals INTERVALS + --lenient + --low_quality_tail LOW_QUALITY_TAIL + --maximum_cycle_value MAXIMUM_CYCLE_VALUE + --mismatches_context_size MISMATCHES_CONTEXT_SIZE + --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + --quantizing_levels QUANTIZING_LEVELS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --sequence_dictionary SEQUENCE_DICTIONARY + --sites_only_vcf_output + --use_original_qualities + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --known_sites_2 KNOWN_SITES_2 diff --git a/gatk_base_recalibrator_4.1.8.1/example_inputs.yml b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..0e7e657b --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml @@ -0,0 +1,58 @@ +input: + class: File + metadata: {} + path: input.bam +reference: + class: File + metadata: {} + path: ref.fasta +known_sites_1: + class: File + path: dbsnp_137_14_16.b37.vcf +known_sites_2: + class: File + path: Mills_and_1000G_gold_standard-14_16.indels.b37.vcf +QUIET: +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +binary_tag_name: +bqsr_baq_gap_open_penalty: +cloud-index-prefetch-buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +default_base_qualities: +deletions_default_quality: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +indels_context_size: +insertions_default_quality: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +low_quality_tail: +maximum_cycle_value: +memory_overhead: +memory_per_job: +mismatches_context_size: +mismatches_default_quality: +number_of_threads: +preserve_qscores_less_than: +quantizing_levels: +read_filter: +read_index: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_original_qualities: diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl new file mode 100644 index 00000000..cdcb0d86 --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -0,0 +1,349 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_base_recalibrator_4_1_8_1 +baseCommand: + - gatk +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--input' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: known_sites_1 + type: File + inputBinding: + position: 3 + prefix: '--known-sites' + doc: >- + One or more databases of known polymorphic sites used to exclude regions + around known polymorphisms from analysis + secondaryFiles: + - .idx + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--reference' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: binary_tag_name + type: string? + inputBinding: + position: 10 + prefix: '--binary-tag-name' + - id: bqsr_baq_gap_open_penalty + type: float? + inputBinding: + position: 10 + prefix: '--bqsr-baq-gap-open-penalty' + - id: cloud-index-prefetch-buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-index' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-md5' + - id: default_base_qualities + type: int? + inputBinding: + position: 10 + prefix: '--default-base-qualities' + - id: deletions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--deletions-default-quality' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: string? + inputBinding: + position: 10 + prefix: '--disable-read-filter' + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-sequence-dictionary-validation' + - id: exclude_intervals + type: string? + inputBinding: + position: 10 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 10 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 10 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 10 + prefix: '--gcs-project-for-requester-pays' + - id: indels_context_size + type: int? + inputBinding: + position: 10 + prefix: '--indels-context-size' + - id: insertions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--insertions-default-quality' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 10 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-merging-rule' + - id: interval_padding + type: string? + inputBinding: + position: 10 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 10 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 10 + prefix: '--lenient' + - id: low_quality_tail + type: int? + inputBinding: + position: 10 + prefix: '--low-quality-tail' + - id: maximum_cycle_value + type: int? + inputBinding: + position: 10 + prefix: '--maximum-cycle-value' + - id: mismatches_context_size + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-context-size' + - id: mismatches_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-default-quality' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 10 + prefix: '--preserve-qscores-less-than' + - id: quantizing_levels + type: int? + inputBinding: + position: 10 + prefix: '--quantizing-levels' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 10 + - id: read_index + type: string? + inputBinding: + position: 10 + prefix: '--read-index' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 10 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 10 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 10 + prefix: '--sites-only-vcf-output' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 10 + prefix: '--use-original-qualities' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: known_sites_2 + type: File? + inputBinding: + position: 3 + prefix: '--known-sites' + secondaryFiles: + - .idx +outputs: + - id: gatk_base_recalibrator_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } +label: gatk_base_recalibrator_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx4G" + } else { + return "-Xmx4G" + } + } + - position: 1 + prefix: '' + separate: false + valueFrom: BaseRecalibrator + - position: 2 + prefix: '--tmp-dir' + valueFrom: $(runtime.tmpdir) + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } + - position: 2 + prefix: '--verbosity' + valueFrom: INFO +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'broadinstitute/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center From d7142869e55c0887881d4f5316b0c44cc10d846e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 14 Sep 2020 11:43:50 -0400 Subject: [PATCH 217/476] Update gatk_base_recalibrator_4.1.8.1.cwl :heavy_check_mark: Fixes as reported by @murphycj2 --- .../gatk_base_recalibrator_4.1.8.1.cwl | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index cdcb0d86..aab36fac 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -17,16 +17,19 @@ inputs: doc: BAM/SAM file containing reads secondaryFiles: - ^.bai - - id: known_sites_1 - type: File + - id: known_sites + type: + type: array + items: File + inputBinding: + prefix: '--known-sites' inputBinding: position: 3 - prefix: '--known-sites' doc: >- One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis secondaryFiles: - - .idx + - ^.idx - id: reference type: File inputBinding: @@ -163,7 +166,7 @@ inputs: position: 10 prefix: '--interval-merging-rule' - id: interval_padding - type: string? + type: int? inputBinding: position: 10 prefix: '--interval-padding' @@ -257,13 +260,6 @@ inputs: type: int? - id: memory_overhead type: int? - - id: known_sites_2 - type: File? - inputBinding: - position: 3 - prefix: '--known-sites' - secondaryFiles: - - .idx outputs: - id: gatk_base_recalibrator_output type: File From f8016ea7d0224f8d5ec41856a3f03c82d22a5dea Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 14 Sep 2020 11:54:18 -0400 Subject: [PATCH 218/476] Fix diable read filter --- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 9 +++++++-- .../gatk_base_recalibrator_4.1.8.1.cwl | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index fd7ad8b4..a25ddb4c 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -88,10 +88,15 @@ inputs: position: 6 prefix: '--disable-bam-index-caching' - id: disable_read_filter - type: string? + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 6 - prefix: '--disable-read-filter' + doc: Read filters to be disabled before analysis - id: disable_sequence_dictionary_validation type: boolean? inputBinding: diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index aab36fac..7c2d439c 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -116,10 +116,15 @@ inputs: position: 10 prefix: '--disable-bam-index-caching' - id: disable_read_filter - type: string? + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 10 - prefix: '--disable-read-filter' + doc: Read filters to be disabled before analysis - id: disable_sequence_dictionary_validation type: boolean? inputBinding: From 6ef6b505192d1fd9b0379660cad58c038ac10d7f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 14 Sep 2020 11:56:14 -0400 Subject: [PATCH 219/476] Update README.md --- gatk_apply_bqsr_4.1.8.1/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/README.md b/gatk_apply_bqsr_4.1.8.1/README.md index bffb6a78..e27a608e 100644 --- a/gatk_apply_bqsr_4.1.8.1/README.md +++ b/gatk_apply_bqsr_4.1.8.1/README.md @@ -21,11 +21,11 @@ ```bash #Using CWLTOOL -> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.2.0.cwl inputs.yaml +> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml #Using toil-cwl-runner > mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & ``` ## Usage From 59969ff957674a143555811219d1532b623a67de Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Sep 2020 15:28:53 -0400 Subject: [PATCH 220/476] Update example_inputs.yml --- gatk_base_recalibrator_4.1.8.1/example_inputs.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gatk_base_recalibrator_4.1.8.1/example_inputs.yml b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml index 0e7e657b..01b02cbb 100644 --- a/gatk_base_recalibrator_4.1.8.1/example_inputs.yml +++ b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml @@ -6,11 +6,10 @@ reference: class: File metadata: {} path: ref.fasta -known_sites_1: - class: File +known_sites: + - class: File path: dbsnp_137_14_16.b37.vcf -known_sites_2: - class: File + - class: File path: Mills_and_1000G_gold_standard-14_16.indels.b37.vcf QUIET: add_output_sam_program_record: From d5ba9ea7ce3f630289a01c45125e343cfb1b4c91 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 15 Sep 2020 15:41:45 -0400 Subject: [PATCH 221/476] bug in tmp dir for picard fix mate info --- .../picard_fix_mate_information_4.1.8.1.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 849c764e..d1fcb70a 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -136,7 +136,6 @@ arguments: valueFrom: FixMateInformation - position: 0 prefix: --TMP_DIR - separate: false valueFrom: "$(runtime.tmpdir)" - position: 0 prefix: -O From ccb818c98bd788d4cb8ff9c24f14fdc635f72e3f Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 15 Sep 2020 15:42:21 -0400 Subject: [PATCH 222/476] fix for abra output --- abra2_2.22/abra2_2.22.cwl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 22308777..9aade364 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -165,8 +165,7 @@ outputs: - type: array items: File outputBinding: - glob: | - *abra.bam + glob: '*abra.bam' secondaryFiles: - ^.bai label: abra2_2.22 From d6e7d19d0be03e47384683892b4f8ee9c79c3725 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 15 Sep 2020 16:04:57 -0400 Subject: [PATCH 223/476] update fai secondary file --- abra2_2.22/abra2_2.22.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 9aade364..6c5f8bfa 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -44,7 +44,7 @@ inputs: prefix: '--ref' doc: Genome reference location secondaryFiles: - - ^.fai + - ^.fasta.fai - id: targets type: File inputBinding: From 8a92de1432db383ec903b10ffcb9e42822d268b8 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 16 Sep 2020 13:33:04 -0400 Subject: [PATCH 224/476] fix create index param --- .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 8bf0e14e..4357a544 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -124,7 +124,7 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: CREATE_INDEX=true + prefix: --CREATE_INDEX doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default From 267a0dea1ee3f1d027123112ba25c87dfbef28c5 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 16 Sep 2020 14:15:26 -0400 Subject: [PATCH 225/476] update reference and docker --- abra2_2.22/abra2_2.22.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 6c5f8bfa..8575e4ae 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -44,7 +44,7 @@ inputs: prefix: '--ref' doc: Genome reference location secondaryFiles: - - ^.fasta.fai + - .fai - id: targets type: File inputBinding: @@ -223,7 +223,7 @@ requirements: } }" - class: DockerRequirement - dockerPull: mskaccess/abra:2.22 + dockerPull: mskaccess/abra2:2.22 - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From db9384c475bb992beff0e431e0c31bea751055d2 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 28 Sep 2020 17:20:28 -0400 Subject: [PATCH 226/476] allow bwa 0.7.17 to take RG_ID as a string, or build it dynamically from inputs --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 118 ++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index a075ff76..4fd2f010 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -21,206 +21,214 @@ inputs: - .pac - .sa - .fai - - id: sample_id - type: string - - id: lane_id - type: string? - id: A - doc: score for a sequence match, which scales options -TdBOELU unless overridden [1] type: int? inputBinding: position: 0 prefix: '-A' + doc: >- + score for a sequence match, which scales options -TdBOELU unless + overridden [1] - id: B - doc: penalty for a mismatch [4] type: int? inputBinding: position: 0 prefix: '-B' + doc: 'penalty for a mismatch [4]' - id: C - doc: append FASTA/FASTQ comment to SAM output type: boolean? inputBinding: position: 0 prefix: '-C' + doc: append FASTA/FASTQ comment to SAM output - id: E - doc: gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] type: 'int[]?' inputBinding: position: 0 prefix: '-E' itemSeparator: ',' + doc: 'gap extension penalty; a gap of size k cost ''{-O} + {-E}*k'' [1,1]' - id: L - doc: penalty for 5'- and 3'-end clipping [5,5] type: 'int[]?' inputBinding: position: 0 prefix: '-L' itemSeparator: ',' + doc: 'penalty for 5''- and 3''-end clipping [5,5]' - id: M type: boolean? inputBinding: position: 0 prefix: '-M' - id: O - doc: gap open penalties for deletions and insertions [6,6] type: 'int[]?' inputBinding: position: 0 prefix: '-O' itemSeparator: ',' + doc: 'gap open penalties for deletions and insertions [6,6]' - id: P - doc: skip pairing; mate rescue performed unless -S also in use type: boolean? inputBinding: position: 0 prefix: '-P' + doc: skip pairing; mate rescue performed unless -S also in use - id: S - doc: skip mate rescue type: boolean? inputBinding: position: 0 prefix: '-S' + doc: skip mate rescue - id: T - doc: minimum score to output [30] type: int? inputBinding: position: 0 prefix: '-T' + doc: 'minimum score to output [30]' - id: U - doc: penalty for an unpaired read pair [17] type: int? inputBinding: position: 0 prefix: '-U' + doc: 'penalty for an unpaired read pair [17]' - id: a - doc: output all alignments for SE or unpaired PE type: boolean? inputBinding: position: 0 prefix: '-a' + doc: output all alignments for SE or unpaired PE - id: c - doc: skip seeds with more than INT occurrences [500] type: int? inputBinding: position: 0 prefix: '-c' + doc: 'skip seeds with more than INT occurrences [500]' - id: d - doc: off-diagonal X-dropoff [100] type: int? inputBinding: position: 0 prefix: '-d' + doc: 'off-diagonal X-dropoff [100]' - id: k - doc: minimum seed length [19] type: int? inputBinding: position: 0 prefix: '-k' + doc: 'minimum seed length [19]' - id: K - doc: process INT input bases in each batch regardless of nThreads (for reproducibility) [] type: int? inputBinding: position: 0 prefix: '-K' + doc: >- + process INT input bases in each batch regardless of nThreads (for + reproducibility) [] - id: output type: string? - id: p - doc: smart pairing (ignoring in2.fq) type: boolean? inputBinding: position: 0 prefix: '-p' + doc: smart pairing (ignoring in2.fq) - id: r - doc: look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] type: float? inputBinding: position: 0 prefix: '-r' + doc: 'look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]' - id: v - doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]' type: int? inputBinding: position: 0 prefix: '-v' + doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]' - id: w - doc: band width for banded alignment [100] type: int? inputBinding: position: 0 prefix: '-w' + doc: 'band width for banded alignment [100]' - id: 'y' - doc: seed occurrence for the 3rd round seeding [20] type: int? inputBinding: position: 0 prefix: '-y' + doc: 'seed occurrence for the 3rd round seeding [20]' - id: D - doc: drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] type: float? inputBinding: position: 0 prefix: '-D' + doc: >- + drop chains shorter than FLOAT fraction of the longest overlapping chain + [0.50] - id: W - doc: discard a chain if seeded bases shorter than INT [0] type: int? inputBinding: position: 0 prefix: '-W' + doc: 'discard a chain if seeded bases shorter than INT [0]' - id: m - doc: perform at most INT rounds of mate rescues for each read [50] type: int? inputBinding: position: 0 prefix: '-m' + doc: 'perform at most INT rounds of mate rescues for each read [50]' - id: e type: boolean? inputBinding: position: 0 prefix: '-e' - id: x - doc: >- - read type. Setting -x changes multiple parameters unless overridden [null] - pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) - ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) - intractg: -B9 -O16 -L5 (intra-species contigs to ref) type: string? inputBinding: position: 0 prefix: '-x' + doc: >- + read type. Setting -x changes multiple parameters unless overridden [null] + pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: + -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) + intractg: -B9 -O16 -L5 (intra-species contigs to ref) - id: H - doc: if there are 80% of the max score, output all in XA [5,200] type: - File? - string? inputBinding: position: 0 prefix: '-H' + doc: >- + if there are 80% of the max score, output all in XA + [5,200] - id: j - doc: treat ALT contigs as part of the primary assembly (i.e. ignore .alt file) type: File? inputBinding: position: 0 prefix: '-j' + doc: >- + treat ALT contigs as part of the primary assembly (i.e. ignore + .alt file) - id: he - doc: if there are 80% of the max score, output all in XA [5,200] type: 'int[]?' inputBinding: position: 0 prefix: '-h' itemSeparator: ',' + doc: >- + if there are 80% of the max score, output all in XA + [5,200] - id: V - doc: output the reference FASTA header in the XR tag type: boolean? inputBinding: position: 0 prefix: '-V' + doc: output the reference FASTA header in the XR tag - id: 'Y' - doc: use soft clipping for supplementary alignments type: boolean? inputBinding: position: 0 prefix: '-Y' + doc: use soft clipping for supplementary alignments - id: I type: string? inputBinding: @@ -234,10 +242,19 @@ inputs: doc: Number of threads - id: R type: string? - inputBinding: - position: 0 - prefix: '-R' doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' + - id: sample_id + type: string? + - id: lane_id + type: string? + - id: platform + type: string? + - id: platform_unit + type: string? + - id: center_name + type: string? + - id: library_id + type: string? outputs: - id: bwa_mem_output_sam type: File @@ -248,6 +265,27 @@ outputs: return inputs.output; return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); } +arguments: + - position: 0 + prefix: -R + valueFrom: |- + ${ + if (inputs.sample_id) { + var rg_id = "@RG ID:$(inputs.sample_id) SM:$(inputs.sample_id)"; + if (inputs.library_id) { + rg_id += " LB:$(inputs.library_id) "; + } if (inputs.platform) { + rg_id += " PL:$(inputs.platform) "; + } if (inputs.platform_unit) { + rg_id += " PU:$(inputs.platform_unit) "; + } if (inputs.center_name) { + rg_id += " CN:$(inputs.center_name) "; + } + return rg_id + } else { + return inputs.R + } + } requirements: - class: ResourceRequirement ramMin: 32000 From 1ec889dcc514f4d5c04a0c1285a89f82628d84f4 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 28 Sep 2020 17:32:36 -0400 Subject: [PATCH 227/476] use \t instead of tab --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 4fd2f010..d42dbe13 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -271,15 +271,15 @@ arguments: valueFrom: |- ${ if (inputs.sample_id) { - var rg_id = "@RG ID:$(inputs.sample_id) SM:$(inputs.sample_id)"; + var rg_id = "@RG\tID:$(inputs.sample_id)\tSM:$(inputs.sample_id)"; if (inputs.library_id) { - rg_id += " LB:$(inputs.library_id) "; + rg_id += "\tLB:$(inputs.library_id)"; } if (inputs.platform) { - rg_id += " PL:$(inputs.platform) "; + rg_id += "\tPL:$(inputs.platform)"; } if (inputs.platform_unit) { - rg_id += " PU:$(inputs.platform_unit) "; + rg_id += "\tPU:$(inputs.platform_unit)"; } if (inputs.center_name) { - rg_id += " CN:$(inputs.center_name) "; + rg_id += "\tCN:$(inputs.center_name)"; } return rg_id } else { From d7409322e87341dbdfa733a87e93c99c1b7df38a Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 29 Sep 2020 11:47:51 -0400 Subject: [PATCH 228/476] fix dynamic RG header javascript references when inside of a ${} expression, there is no need to use $() for parameter references --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 36 +++++++++++++++--------------- bwa_mem_0.7.17/example_inputs.yaml | 9 ++++---- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index d42dbe13..1ea512b0 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -267,25 +267,25 @@ outputs: } arguments: - position: 0 - prefix: -R + prefix: '-R' valueFrom: |- - ${ - if (inputs.sample_id) { - var rg_id = "@RG\tID:$(inputs.sample_id)\tSM:$(inputs.sample_id)"; - if (inputs.library_id) { - rg_id += "\tLB:$(inputs.library_id)"; - } if (inputs.platform) { - rg_id += "\tPL:$(inputs.platform)"; - } if (inputs.platform_unit) { - rg_id += "\tPU:$(inputs.platform_unit)"; - } if (inputs.center_name) { - rg_id += "\tCN:$(inputs.center_name)"; - } - return rg_id - } else { - return inputs.R - } - } + ${ + if (inputs.sample_id) { + var rg_id = "@RG\\tID:" + inputs.sample_id + "\\tSM:" + inputs.sample_id; + if (inputs.library_id) { + rg_id += "\\tLB:" + inputs.library_id; + } if (inputs.platform) { + rg_id += "\\tPL:" + inputs.platform; + } if (inputs.platform_unit) { + rg_id += "\\tPU:" + inputs.platform_unit; + } if (inputs.center_name) { + rg_id += "\\tCN:" + inputs.center_name; + } + return rg_id + } else { + return inputs.R + } + } requirements: - class: ResourceRequirement ramMin: 32000 diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml index e63510d9..85b01a8a 100644 --- a/bwa_mem_0.7.17/example_inputs.yaml +++ b/bwa_mem_0.7.17/example_inputs.yaml @@ -1,10 +1,9 @@ reads: - class: File - path: "path/to/fastq_R1.fastq" + path: "/Users/johnsoni/Desktop/test_fastq_to_bam/fastq/test_R1_001.fastq.gz" - class: File - path: "path/to/fastq_R2.fastq" -reference_fasta: + path: "/Users/johnsoni/Desktop/test_fastq_to_bam/fastq/test_R2_001.fastq.gz" +reference: class: File - path: "/path/to/reference.fasta" + path: "/Users/johnsoni/Desktop/test_fastq_to_bam/reference/chr14_chr16.fasta" sample_id: test_sample_id -lane_id: test_lane_id From ed3a49beee1ad4fb36e12cd84cf5f2270db19669 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 29 Sep 2020 11:49:03 -0400 Subject: [PATCH 229/476] fix test inputs --- bwa_mem_0.7.17/example_inputs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml index 85b01a8a..74683384 100644 --- a/bwa_mem_0.7.17/example_inputs.yaml +++ b/bwa_mem_0.7.17/example_inputs.yaml @@ -1,9 +1,9 @@ reads: - class: File - path: "/Users/johnsoni/Desktop/test_fastq_to_bam/fastq/test_R1_001.fastq.gz" + path: "path/to/fastq_R1.fastq" - class: File - path: "/Users/johnsoni/Desktop/test_fastq_to_bam/fastq/test_R2_001.fastq.gz" + path: "path/to/fastq_R2.fastq" reference: class: File - path: "/Users/johnsoni/Desktop/test_fastq_to_bam/reference/chr14_chr16.fasta" + path: "/path/to/reference.fasta" sample_id: test_sample_id From 79e4fd27deb7aa034721deeab356143afb73cd75 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 1 Oct 2020 00:13:49 -0400 Subject: [PATCH 230/476] updating parameters and docs --- bwa_mem_0.7.17/README.md | 2 +- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 59 ++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md index 6de3155c..e5533af1 100644 --- a/bwa_mem_0.7.17/README.md +++ b/bwa_mem_0.7.17/README.md @@ -1,4 +1,4 @@ -# CWL and Dockerfile for running ABRA2 +# CWL and Dockerfile for running BWA MEM ## Version of tools in docker image (/container/Dockerfile) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 1ea512b0..269c58b8 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -6,6 +6,14 @@ baseCommand: - bwa - mem inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? - id: reads type: 'File[]' inputBinding: @@ -234,12 +242,6 @@ inputs: inputBinding: position: 0 prefix: '-M' - - id: t - type: int? - inputBinding: - position: 0 - prefix: '-t' - doc: Number of threads - id: R type: string? doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' @@ -266,6 +268,9 @@ outputs: return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); } arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) - position: 0 prefix: '-R' valueFrom: |- @@ -288,8 +293,28 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 32000 - coresMin: 4 + ramMin: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + return inputs.memory_per_job + inputs.memory_overhead + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + return inputs.memory_per_job + 2000 + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return 32000 + inputs.memory_overhead + } + else { + return 32000 + } + }" + coresMin: "${ + if (inputs.number_of_threads) { + return inputs.number_of_threads + } + else { + return 16 + } + }" - class: DockerRequirement dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' - class: InlineJavascriptRequirement @@ -299,3 +324,21 @@ stdout: |- return inputs.output; return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bwa + 'doap:revision': 0.7.17 \ No newline at end of file From c1d10b01a3c8476667e8f37b1f9666e586e02725 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 1 Oct 2020 00:20:23 -0400 Subject: [PATCH 231/476] updating docs --- bwa_mem_0.7.17/README.md | 72 ++++++++++++++++++++++--------- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 64 ++++++++++++++++----------- 2 files changed, 91 insertions(+), 45 deletions(-) diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md index e5533af1..fd6785d6 100644 --- a/bwa_mem_0.7.17/README.md +++ b/bwa_mem_0.7.17/README.md @@ -2,18 +2,18 @@ ## Version of tools in docker image (/container/Dockerfile) -| Tool | Version | Location | -|--- |--- |--- | -| ubuntu | 16.04 | - | -| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 | +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 | [![](https://images.microbadger.com/badges/version/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com") ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml @@ -32,27 +32,52 @@ ### Usage ``` -usage: bwa_mem_0.7.17/bwa_mem_0.7.17.cwl [-h] --reads READS --reference - REFERENCE --sample_id SAMPLE_ID - [--lane_id LANE_ID] [-A A] [-B B] - [-C] [-E E] [-L L] [-M] [-O O] [-P] - [-S] [-T T] [-U U] [-a] [-c C] [-d D] - [-k K] [-K K] [--output OUTPUT] [-p] - [-r R] [-v V] [-w W] [-y Y] [-D D] - [-W W] [-m M] [-e] [-x X] [-j J] - [--he HE] [-V] [-Y] [-I I] [-t T] - [-R R] - [job_order] +usage: bwa_mem_0.7.17.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --reads + READS --reference REFERENCE [-A A] [-B B] [-C] + [-E E] [-L L] [-M] [-O O] [-P] [-S] [-T T] [-U U] + [-a] [-c C] [-d D] [-k K] [-K K] [--output OUTPUT] + [-p] [-r R] [-v V] [-w W] [-y Y] [-D D] [-W W] + [-m M] [-e] [-x X] [-j J] [--he HE] [-V] [-Y] [-I I] + [-R R] [--sample_id SAMPLE_ID] [--lane_id LANE_ID] + [--platform PLATFORM] + [--platform_unit PLATFORM_UNIT] + [--center_name CENTER_NAME] + [--library_id LIBRARY_ID] + [job_order] + +bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] +[-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] +[-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] +db.prefix reads.fq [mates.fq] Align 70bp-1Mbp query sequences with the BWA-MEM +algorithm. Briefly, the algorithm works by seeding alignments with maximal +exact matches (MEMs) and then extending seeds with the affine-gap Smith- +Waterman algorithm (SW). If mates.fq file is absent and option -p is not set, +this command regards input reads are single-end. If mates.fq is present, this +command assumes the i-th read in reads.fq and the i-th read in mates.fq +constitute a read pair. If -p is used, the command assumes the 2i-th and the +(2i+1)-th read in reads.fq constitute a read pair (such input file is said to +be interleaved). In this case, mates.fq is ignored. In the paired-end mode, +the mem command will infer the read orientation and the insert size +distribution from a batch of reads. The BWA-MEM algorithm performs local +alignment. It may produce multiple primary alignments for different part of a +query sequence. This is a crucial feature for long sequences. However, some +tools such as Picard’s markDuplicates does not work with split alignments. One +may consider to use option -M to flag shorter split hits as secondary. positional arguments: job_order Job input json file optional arguments: -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS --reads READS --reference REFERENCE - --sample_id SAMPLE_ID - --lane_id LANE_ID -A A score for a sequence match, which scales options -TdBOELU unless overridden [1] -B B penalty for a mismatch [4] @@ -100,7 +125,12 @@ optional arguments: -V output the reference FASTA header in the XR tag -Y use soft clipping for supplementary alignments -I I - -t T Number of threads -R R STR read group header line such as '@RG\tID -foo\tSM -bar' [null] + --sample_id SAMPLE_ID + --lane_id LANE_ID + --platform PLATFORM + --platform_unit PLATFORM_UNIT + --center_name CENTER_NAME + --library_id LIBRARY_ID ``` diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 269c58b8..9775f5cd 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -1,7 +1,9 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: - sbg: 'https://www.sevenbridges.com/' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' baseCommand: - bwa - mem @@ -267,6 +269,32 @@ outputs: return inputs.output; return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); } +doc: >- + bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] + [-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] + [-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] + db.prefix reads.fq [mates.fq] + + Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the + algorithm works by seeding alignments with maximal exact matches (MEMs) and + then extending seeds with the affine-gap Smith-Waterman algorithm (SW). + + + If mates.fq file is absent and option -p is not set, this command regards + input reads are single-end. If mates.fq is present, this command assumes the + i-th read in reads.fq and the i-th read in mates.fq constitute a read pair. If + -p is used, the command assumes the 2i-th and the (2i+1)-th read in reads.fq + constitute a read pair (such input file is said to be interleaved). In this + case, mates.fq is ignored. In the paired-end mode, the mem command will infer + the read orientation and the insert size distribution from a batch of reads. + + + The BWA-MEM algorithm performs local alignment. It may produce multiple + primary alignments for different part of a query sequence. This is a crucial + feature for long sequences. However, some tools such as Picard’s + markDuplicates does not work with split alignments. One may consider to use + option -M to flag shorter split hits as secondary. +label: bwa_mem_0.7.17 arguments: - position: 0 prefix: '-t' @@ -293,28 +321,16 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: "${ - if(inputs.memory_per_job && inputs.memory_overhead) { - return inputs.memory_per_job + inputs.memory_overhead - } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - return inputs.memory_per_job + 2000 - } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - return 32000 + inputs.memory_overhead - } - else { - return 32000 - } - }" - coresMin: "${ - if (inputs.number_of_threads) { - return inputs.number_of_threads - } - else { - return 16 - } - }" + ramMin: >- + ${ if(inputs.memory_per_job && inputs.memory_overhead) { return + inputs.memory_per_job + inputs.memory_overhead } else if + (inputs.memory_per_job && !inputs.memory_overhead){ return + inputs.memory_per_job + 2000 } else if(!inputs.memory_per_job && + inputs.memory_overhead){ return 32000 + inputs.memory_overhead } else { + return 32000 } } + coresMin: >- + ${ if (inputs.number_of_threads) { return inputs.number_of_threads } else + { return 16 } } - class: DockerRequirement dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' - class: InlineJavascriptRequirement @@ -341,4 +357,4 @@ stdout: |- 'doap:release': - class: 'doap:Version' 'doap:name': bwa - 'doap:revision': 0.7.17 \ No newline at end of file + 'doap:revision': 0.7.17 From 8feca1d2d45e45cab81d9af35c15ae9c47fdc49b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 2 Oct 2020 11:57:14 -0400 Subject: [PATCH 232/476] Update bwa_mem_0.7.17.cwl --- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 9775f5cd..9c630577 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' baseCommand: - bwa - mem @@ -202,14 +203,13 @@ inputs: intractg: -B9 -O16 -L5 (intra-species contigs to ref) - id: H type: - - File? - - string? + - boolean? inputBinding: position: 0 prefix: '-H' doc: >- - if there are 80% of the max score, output all in XA - [5,200] + Use hard clipping ’H’ in the SAM output. This option may dramatically + reduce the redundancy of output when mapping long contig or BAC sequences - id: j type: File? inputBinding: From b2012be6afe8637f5847421648767c6396fff016 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 2 Oct 2020 14:29:51 -0400 Subject: [PATCH 233/476] Update fgbio_fastq_to_bam_1.2.0.cwl --- fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index eac42dd3..7fa258b0 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: fgbio_fastq_to_bam_1.2.0 +id: fgbio_fastq_to_bam_1_2_0 baseCommand: - fgbio inputs: @@ -169,7 +169,6 @@ doc: >- label: fgbio_fastq_to_bam_1.2.0 arguments: - position: 0 - prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { From bc32467a1cbdf01a38fc5853b5aac9c9477c3654 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 6 Oct 2020 19:25:25 -0400 Subject: [PATCH 234/476] Update gatk_sam_to_fastq_4.1.8.0.cwl --- .../gatk_sam_to_fastq_4.1.8.0.cwl | 215 +++++++++--------- 1 file changed, 111 insertions(+), 104 deletions(-) diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index 4af9fe5c..b4e5284e 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_sam_to_fastq_4_1_8_0 -label: GATK-SamToFastq baseCommand: - gatk - SamToFastq @@ -23,220 +22,228 @@ inputs: type: File inputBinding: position: 0 - prefix: --INPUT + prefix: '--INPUT' doc: Input SAM/BAM file to extract reads from Required. - - id: fastq - type: string? - inputBinding: - position: 0 - prefix: --FASTQ - doc: >- - Output FASTQ file (single-end fastq or, if paired, first end of the pair FASTQ). - Required. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) - COMPRESS_OUTPUTS_PER_RG (GZOPRG) OUTPUT_DIR (ODIR) - id: clipping_action type: string? inputBinding: position: 0 - prefix: --CLIPPING_ACTION + prefix: '--CLIPPING_ACTION' doc: >- - The action that should be taken with clipped reads: 'X' means the reads and qualities - should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in - the clipped region; and any integer means that the base qualities should be set to that - value in the clipped region. Default value: null. + The action that should be taken with clipped reads: 'X' means the reads + and qualities should be trimmed at the clipped position; 'N' means the + bases should be changed to Ns in the clipped region; and any integer means + that the base qualities should be set to that value in the clipped + region. Default value: null. - id: clipping_attribute type: string? inputBinding: position: 0 - prefix: --CLIPPING_ATTRIBUTE + prefix: '--CLIPPING_ATTRIBUTE' doc: >- - The attribute that stores the position at which the SAM record should be clipped Default value: null. + The attribute that stores the position at which the SAM record should be + clipped Default value: null. - id: clipping_min_length type: int? inputBinding: position: 0 - prefix: --CLIPPING_MIN_LENGTH + prefix: '--CLIPPING_MIN_LENGTH' doc: >- - When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION parameters, - ensure that the resulting reads after clipping are at least CLIPPING_MIN_LENGTH bases - long. If the original read is shorter than CLIPPING_MIN_LENGTH then the original read - length will be maintained. Default value: 0. + When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION + parameters, ensure that the resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read is shorter than + CLIPPING_MIN_LENGTH then the original read length will be maintained. + Default value: 0. - id: compress_outputs_per_rg type: boolean? inputBinding: position: 0 - prefix: --COMPRESS_OUTPUTS_PER_RG + prefix: '--COMPRESS_OUTPUTS_PER_RG' doc: >- - Compress output FASTQ files per read group using gzip and append a .gz extension to the - file names. Default value: false. Possible values: {true, false} Cannot be used in - conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + Compress output FASTQ files per read group using gzip and append a .gz + extension to the file names. Default value: false. Possible values: + {true, false} Cannot be used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) - id: compression_level type: int? inputBinding: position: 0 - prefix: --COMPRESSION_LEVEL + prefix: '--COMPRESSION_LEVEL' doc: >- - Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2. + Compression level for all compressed files created (e.g. BAM and VCF). + Default value: 2. - id: create_index type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: - false. Possible values: {true, false} + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} - id: include_non_pf_reads type: boolean? inputBinding: position: 0 - prefix: --INCLUDE_NON_PF_READS + prefix: '--INCLUDE_NON_PF_READS' doc: >- - Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes - filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads. See - GATK Dictionary for more info. Default value: false. Possible values: {true, false} + Include non-PF reads from the SAM file into the output FASTQ files. PF + means 'passes filtering'. Reads whose 'not passing quality controls' flag + is set are non-PF reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} - id: include_non_primary_alignments type: boolean? inputBinding: position: 0 - prefix: --INCLUDE_NON_PRIMARY_ALIGNMENTS + prefix: '--INCLUDE_NON_PRIMARY_ALIGNMENTS' doc: >- - If true, include non-primary alignments in the output. Support of non-primary alignments - in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and - there are paired reads with non-primary alignments. Default value: false. Possible - values: {true, false} + If true, include non-primary alignments in the output. Support of + non-primary alignments in SamToFastq is not comprehensive, so there may be + exceptions if this is set to true and there are paired reads with + non-primary alignments. Default value: false. Possible values: {true, + false} - id: interleave type: boolean? inputBinding: position: 0 - prefix: --INTERLEAVE + prefix: '--INTERLEAVE' doc: >- - Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe - which end it came from Default value: false. Possible values: {true, false} - - id: max_records_in_ram - default: 50000 + Will generate an interleaved fastq if paired, each line will have /1 or /2 + to describe which end it came from Default value: false. Possible values: + {true, false} + - default: 50000 + id: max_records_in_ram type: int? inputBinding: position: 0 - prefix: --MAX_RECORDS_IN_RAM + prefix: '--MAX_RECORDS_IN_RAM' doc: >- - When writing files that need to be sorted, this will specify the number of records stored - in RAM before spilling to disk. Increasing this number reduces the number of file handles - needed to sort the file, and increases the amount of RAM needed. Default value: 500000. + When writing files that need to be sorted, this will specify the number of + records stored in RAM before spilling to disk. Increasing this number + reduces the number of file handles needed to sort the file, and increases + the amount of RAM needed. Default value: 500000. - id: output_dir type: string? inputBinding: position: 0 - prefix: --OUTPUT_DIR + prefix: '--OUTPUT_DIR' doc: >- - Directory in which to output the FASTQ file(s). Used only when OUTPUT_PER_RG is true. - Default value: null. Cannot be used in conjunction with argument(s) FASTQ (F). + Directory in which to output the FASTQ file(s). Used only when + OUTPUT_PER_RG is true. Default value: null. Cannot be used in conjunction + with argument(s) FASTQ (F). - id: create_md5_file type: boolean? inputBinding: position: 0 - prefix: --CREATE_MD5_FILE + prefix: '--CREATE_MD5_FILE' doc: >- - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: - false. Possible values: {true, false}. + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false}. - id: output_per_rg type: boolean? inputBinding: position: 0 - prefix: --OUTPUT_PER_RG + prefix: '--OUTPUT_PER_RG' doc: >- - Output a FASTQ file per read group (two FASTQ files per read group if the group is - paired). Default value: false. Possible values: {true, false} Cannot be used in - conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + Output a FASTQ file per read group (two FASTQ files per read group if the + group is paired). Default value: false. Possible values: {true, false} + Cannot be used in conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ + (F2) UNPAIRED_FASTQ (FU) - id: quality type: int? inputBinding: position: 0 - prefix: --QUALITY + prefix: '--QUALITY' doc: >- - End-trim reads using the phred/bwa quality trimming algorithm and this quality. Default value: null. + End-trim reads using the phred/bwa quality trimming algorithm and this + quality. Default value: null. - id: re_reverse type: boolean? inputBinding: position: 0 - prefix: --RE_REVERSE + prefix: '--RE_REVERSE' doc: >- - Re-reverse bases and qualities of reads with negative strand flag set before writing them - to FASTQ Default value: true. Possible values: {true, false} + Re-reverse bases and qualities of reads with negative strand flag set + before writing them to FASTQ Default value: true. Possible values: {true, + false} - id: read1_max_bases_to_write type: int? inputBinding: position: 0 - prefix: --READ1_MAX_BASES_TO_WRITE + prefix: '--READ1_MAX_BASES_TO_WRITE' doc: >- - The maximum number of bases to write from read 1 after trimming. If there are fewer than - this many bases left after trimming, all will be written. If this value is null then all - bases left after trimming will be written. Default value: null. + The maximum number of bases to write from read 1 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. - id: read1_trim type: int? inputBinding: position: 0 - prefix: --READ1_TRIM + prefix: '--READ1_TRIM' doc: >- - The number of bases to trim from the beginning of read 1. Default value: 0. + The number of bases to trim from the beginning of read 1. Default value: + 0. - id: read2_max_bases_to_write type: int? inputBinding: position: 0 - prefix: --READ2_MAX_BASES_TO_WRITE + prefix: '--READ2_MAX_BASES_TO_WRITE' doc: >- - The maximum number of bases to write from read 2 after trimming. If there are fewer than - this many bases left after trimming, all will be written. If this value is null then all - bases left after trimming will be written. Default value: null. + The maximum number of bases to write from read 2 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. - id: read2_trim type: int? inputBinding: position: 0 - prefix: --READ2_TRIM + prefix: '--READ2_TRIM' doc: >- - The number of bases to trim from the beginning of read 2. Default value: 0. + The number of bases to trim from the beginning of read 2. Default value: + 0. - id: reference_sequence type: File? inputBinding: position: 0 - prefix: --REFERENCE_SEQUENCE - doc: >- - Reference sequence file. Default value: null. + prefix: '--REFERENCE_SEQUENCE' + doc: 'Reference sequence file. Default value: null.' - id: rg_tag type: string? inputBinding: position: 0 - prefix: --RG_TAG + prefix: '--RG_TAG' doc: >- - The read group tag (PU or ID) to be used to output a FASTQ file per read group. Default - value: PU. + The read group tag (PU or ID) to be used to output a FASTQ file per read + group. Default value: PU. - id: second_end_fastq type: string? inputBinding: position: 0 - prefix: --SECOND_END_FASTQ + prefix: '--SECOND_END_FASTQ' doc: >- - Output FASTQ file (if paired, second end of the pair FASTQ). Default value: null. Cannot - be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG - (GZOPRG) + Output FASTQ file (if paired, second end of the pair FASTQ). Default + value: null. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG + (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) - id: unpaired_fastq type: string? inputBinding: position: 0 - prefix: --UNPAIRED_FASTQ + prefix: '--UNPAIRED_FASTQ' doc: >- - Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ mode Default - value: null. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG (OPRG) - COMPRESS_OUTPUTS_PER_RG (GZOPRG) + Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ + mode Default value: null. Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) - id: validation_stringency type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- - Validation stringency for all SAM files read by this program. Setting stringency to - SILENT can improve performance when processing a BAM file in which variable-length data - (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. - Possible values: {STRICT, LENIENT, SILENT} + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, + SILENT} outputs: - id: gatk_sam_to_fastq_fastq type: File? @@ -246,7 +253,7 @@ outputs: if(inputs.fastq){ return inputs.fastq } else { - return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') } } - id: gatk_sam_to_fastq_unpaired_fastq @@ -257,7 +264,7 @@ outputs: if(inputs.unpaired_fastq){ return inputs.unpaired_fastq } else { - return inputs.input.basename.replace(/.bam|.sam/, '-unpaired.fastq') + return inputs.input.basename.replace(/.bam|.sam/, '_unpaired.fastq') } } - id: gatk_sam_to_fastq_second_end_fastq @@ -268,10 +275,10 @@ outputs: if(inputs.second_end_fastq){ return inputs.second_end_fastq } else { - return inputs.input.basename.replace(/.bam|.sam/, '-R2.fastq') + return inputs.input.basename.replace(/.bam|.sam/, '_R2.fastq') } } - +label: GATK-SamToFastq arguments: - position: 0 prefix: '--java-options' @@ -302,7 +309,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 2 prefix: '--FASTQ' valueFrom: |- @@ -310,13 +317,13 @@ arguments: if(inputs.fastq){ return inputs.fastq } else { - return inputs.input.basename.replace(/.bam|.sam/, '-R1.fastq') + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') } } requirements: - class: ResourceRequirement - ramMin: 10000 - coresMin: 1 + ramMin: 8000 + coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' - class: InlineJavascriptRequirement From ab914dd6c00e3c0ef13c7febb0a09b22e29dd234 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 6 Oct 2020 19:45:09 -0400 Subject: [PATCH 235/476] Update gatk_sam_to_fastq_4.1.8.0.cwl --- gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index b4e5284e..59880b1c 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -18,6 +18,11 @@ inputs: doc: Memory overhead per job in megabytes - id: number_of_threads type: int? + - id: fastq + type: string? + doc: >- + Output FASTQ file (single-end fastq or, if paired, first end of the pair + FASTQ) - id: input type: File inputBinding: From 629bcee77759638ca48b6d3a87259a130670621e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 6 Oct 2020 23:04:23 -0400 Subject: [PATCH 236/476] Update for TMPDIR --- .../gatk_base_recalibrator_4.1.8.1.cwl | 2 +- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 83 ++-- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 115 +++--- ...tk_collect_insert_size_metrics_4.1.8.0.cwl | 82 ++-- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 383 ++++++++++++------ .../gatk_merge_sam_files_4.1.8.0.cwl | 129 +++--- 6 files changed, 480 insertions(+), 314 deletions(-) diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index 7c2d439c..ec5caec6 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -29,7 +29,7 @@ inputs: One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis secondaryFiles: - - ^.idx + - .idx - id: reference type: File inputBinding: diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index b53a46af..6c462c77 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_collect_alignment_summary_metrics_4.1.8.0 -label: GATK-CollectAlignmentSummaryMetrics baseCommand: - gatk - CollectAlignmentSummaryMetrics @@ -23,7 +22,7 @@ inputs: type: File inputBinding: position: 0 - prefix: -I + prefix: '-I' doc: Input file (bam or sam). Required. - id: output_file_name type: string? @@ -32,22 +31,23 @@ inputs: type: File? inputBinding: position: 0 - prefix: -R + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. secondaryFiles: - ^.fasta.fai - ^.dict - doc: >- - Reference sequence file. Note that while this argument is not required, without it only a - small subset of the metrics will be calculated. Note also that if a reference sequence is - provided, it must be accompanied by a sequence dictionary. Default value: null. - id: adaptor_sequence type: string? inputBinding: position: 0 - prefix: --ADAPTER_SEQUENCE + prefix: '--ADAPTER_SEQUENCE' doc: >- - List of adapter sequences to use when processing the alignment metrics. This argument may - be specified 0 or more times. Default value: + List of adapter sequences to use when processing the alignment metrics. + This argument may be specified 0 or more times. Default value: [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, @@ -58,7 +58,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: --METRIC_ACCUMULATION_LEVEL + prefix: '--METRIC_ACCUMULATION_LEVEL' doc: >- The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible @@ -69,53 +69,53 @@ inputs: type: string? inputBinding: position: 0 - prefix: --EXPECTED_PAIR_ORIENTATIONS + prefix: '--EXPECTED_PAIR_ORIENTATIONS' doc: >- - Paired-end reads that do not have this expected orientation will be considered chimeric. - This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, - RF, TANDEM} + Paired-end reads that do not have this expected orientation will be + considered chimeric. This argument may be specified 0 or more times. + Default value: [FR]. Possible values: {FR, RF, TANDEM} - id: is_bisulfite_sequenced type: boolean? inputBinding: position: 0 - prefix: --IS_BISULFITE_SEQUENCED + prefix: '--IS_BISULFITE_SEQUENCED' doc: >- - Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. - Possible values: {true, false} + Whether the SAM or BAM file consists of bisulfite sequenced reads. + Default value: false. Possible values: {true, false} - id: max_insert_size type: int? inputBinding: position: 0 - prefix: --MAX_INSERT_SIZE + prefix: '--MAX_INSERT_SIZE' doc: >- - Paired-end reads above this insert size will be considered chimeric along with - inter-chromosomal pairs. Default value: 100000. + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. - id: validation_stringency type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT} - - id: assume_sorted - default: true + - default: true + id: assume_sorted type: boolean? inputBinding: position: 0 - prefix: --ASSUME_SORTED + prefix: '--ASSUME_SORTED' doc: >- - If true (default), then the sort order in the header file will be ignored. Default value: - true. This option can be set to 'null' to clear the default value. Possible values: {true, - false} + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} - id: stop_after type: int? inputBinding: position: 0 - prefix: --STOP_AFTER + prefix: '--STOP_AFTER' doc: >- Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. @@ -123,32 +123,34 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: - false. Possible values: {true, false} + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} - id: create_md5_file type: boolean? inputBinding: position: 0 - prefix: --CREATE_MD5_FILE + prefix: '--CREATE_MD5_FILE' doc: >- - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: - false. Possible values: {true, false} + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} - id: use_jdk_deflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input outputs: - id: gatk_collect_alignment_summary_metrics_txt type: File @@ -161,6 +163,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') } } +label: GATK-CollectAlignmentSummaryMetrics arguments: - position: 0 prefix: '--java-options' @@ -191,7 +194,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 0 prefix: '--COMPRESSION_LEVEL' valueFrom: '2' diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index 323180fb..aff4264b 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_collect_hs_metrics_4_1_8_0 -label: GATK-CollectHsMetrics baseCommand: - gatk - CollectHsMetrics @@ -15,133 +14,138 @@ inputs: type: File inputBinding: position: 0 - prefix: -I + prefix: '-I' doc: An aligned SAM or BAM file. Required. - id: bait_intervals type: File inputBinding: position: 0 - prefix: --BAIT_INTERVALS + prefix: '--BAIT_INTERVALS' doc: >- - An interval list file that contains the locations of the baits used. This argument must - be specified at least once. Required. + An interval list file that contains the locations of the baits used. This + argument must be specified at least once. Required. - id: target_intervals type: File inputBinding: position: 0 - prefix: --TARGET_INTERVALS + prefix: '--TARGET_INTERVALS' doc: >- - An interval list file that contains the locations of the targets. This argument must be - specified at least once. Required. + An interval list file that contains the locations of the targets. This + argument must be specified at least once. Required. - id: output_file_name type: string? doc: The output file to write the metrics to. Required. - id: per_base_coverage type: string? doc: >- - An optional file to output per base coverage information to. The per-base file contains - one line per target base and can grow very large. It is not recommended for use with large - target sets. Default value: null. + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. - id: per_target_coverage type: string? doc: >- - An optional file to output per target coverage information to. Default value: null. + An optional file to output per target coverage information to. Default + value: null. - id: theoretical_sensitivity_output type: string? inputBinding: position: 0 - prefix: --THEORETICAL_SENSITIVITY_OUTPUT + prefix: '--THEORETICAL_SENSITIVITY_OUTPUT' doc: >- - Output for Theoretical Sensitivity metrics where the allele fractions are provided by the - ALLELE_FRACTION argument. Default value: null. + Output for Theoretical Sensitivity metrics where the allele fractions are + provided by the ALLELE_FRACTION argument. Default value: null. - id: allele_fraction type: float? inputBinding: position: 0 - prefix: --ALLELE_FRACTION + prefix: '--ALLELE_FRACTION' doc: >- - Allele fraction for which to calculate theoretical sensitivity. This argument may be - specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, - 0.5]. + Allele fraction for which to calculate theoretical sensitivity. This + argument may be specified 0 or more times. Default value: [0.001, 0.005, + 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]. - id: bait_set_name type: string? inputBinding: position: 0 - prefix: --BAIT_SET_NAME + prefix: '--BAIT_SET_NAME' doc: >- - Bait set name. If not provided it is inferred from the filename of the bait intervals. - Default value: null. + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null. - id: clip_overlapping_reads type: boolean? inputBinding: position: 0 - prefix: --CLIP_OVERLAPPING_READS + prefix: '--CLIP_OVERLAPPING_READS' doc: >- - True if we are to clip overlapping reads, false otherwise. Default value: true. Possible - values: {true, false} + True if we are to clip overlapping reads, false otherwise. Default value: + true. Possible values: {true, false} - id: coverage_cap type: int? inputBinding: position: 0 - prefix: --COVERAGE_CAP + prefix: '--COVERAGE_CAP' doc: >- - Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is - 200. Default value: 200. + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. - id: include_indels type: boolean? inputBinding: position: 0 - prefix: --INCLUDE_INDELS + prefix: '--INCLUDE_INDELS' doc: >- - If true count inserted bases as on target and deleted bases as covered by a read. Default - value: false. Possible values: {true, false} + If true count inserted bases as on target and deleted bases as covered by + a read. Default value: false. Possible values: {true, false} - id: minimum_base_quality type: int? inputBinding: position: 0 - prefix: --MINIMUM_BASE_QUALITY + prefix: '--MINIMUM_BASE_QUALITY' doc: >- - Minimum base quality for a base to contribute coverage. Default value: 20. + Minimum base quality for a base to contribute coverage. Default value: + 20. - id: minimum_mapping_quality type: int? inputBinding: position: 0 - prefix: --MINIMUM_MAPPING_QUALITY + prefix: '--MINIMUM_MAPPING_QUALITY' doc: >- - Minimum mapping quality for a read to contribute coverage. Default value: 20. + Minimum mapping quality for a read to contribute coverage. Default value: + 20. - id: near_distance type: int? inputBinding: position: 0 - prefix: --NEAR_DISTANCE + prefix: '--NEAR_DISTANCE' doc: >- - The maximum distance between a read and the nearest probe/bait/amplicon for the read to be - considered 'near probe' and included in percent selected. Default value: 250. + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. - id: sample_size type: int? inputBinding: position: 0 - prefix: --SAMPLE_SIZE + prefix: '--SAMPLE_SIZE' doc: >- - Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default - value: 10000. + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. - id: reference type: File? inputBinding: position: 0 - prefix: -R + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. secondaryFiles: - ^.fasta.fai - ^.dict - doc: >- - Reference sequence file. Note that while this argument is not required, without it only a - small subset of the metrics will be calculated. Note also that if a reference sequence is - provided, it must be accompanied by a sequence dictionary. Default value: null. - id: metrics_acciumulation_level type: string? inputBinding: position: 0 - prefix: --METRIC_ACCUMULATION_LEVEL + prefix: '--METRIC_ACCUMULATION_LEVEL' doc: >- The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible @@ -152,7 +156,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -163,18 +167,18 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: - false. Possible values: {true, false} + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} - id: create_md5_file type: boolean? inputBinding: position: 0 - prefix: --CREATE_MD5_FILE + prefix: '--CREATE_MD5_FILE' doc: >- - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: - false. Possible values: {true, false} + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} - id: memory_per_job type: int? doc: Memory per job in megabytes @@ -217,6 +221,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') } } +label: GATK-CollectHsMetrics arguments: - position: 0 prefix: '--java-options' @@ -247,7 +252,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 0 prefix: '--COMPRESSION_LEVEL' valueFrom: '2' diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index 8b6ca7a5..f8b39cbd 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_collect_insert_size_metrics_4_1_8_0 -label: GATK-CollectInsertSizeMetrics baseCommand: - gatk - CollectInsertSizeMetrics @@ -23,7 +22,7 @@ inputs: type: File inputBinding: position: 0 - prefix: -I + prefix: '-I' doc: Input file (bam or sam). Required. - id: output_file_name type: string? @@ -35,36 +34,38 @@ inputs: type: float? inputBinding: position: 0 - prefix: --DEVIATIONS + prefix: '--DEVIATIONS' doc: >- Generate mean, sd and plots by trimming the data down to MEDIAN + - DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically - includes enough anomalous values from chimeras and other artifacts to make the mean and sd - grossly misleading regarding the real distribution. Default value: 10.0. This option can - be set to 'null' to clear the default value. + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size + data typically includes enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading regarding the real + distribution. Default value: 10.0. This option can be set to 'null' to + clear the default value. - id: histogram_width type: int? inputBinding: position: 0 - prefix: --HISTOGRAM_WIDTH + prefix: '--HISTOGRAM_WIDTH' doc: >- - Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. - Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be - included. Default value: null. + Explicitly sets the Histogram width, overriding automatic truncation of + Histogram tail. Also, when calculating mean and standard deviation, only + bins <= Histogram_WIDTH will be included. Default value: null. - id: minimum_pct type: float? inputBinding: position: 0 - prefix: --MINIMUM_PCT + prefix: '--MINIMUM_PCT' doc: >- - When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that - have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. - This option can be set to 'null' to clear the default value. + When generating the Histogram, discard any data categories (out of FR, + TANDEM, RF) that have fewer than this percentage of overall reads. (Range: + 0 to 1). Default value: 0.05. This option can be set to 'null' to clear + the default value. - id: metrics_acciumulation_level type: string? inputBinding: position: 0 - prefix: --METRIC_ACCUMULATION_LEVEL + prefix: '--METRIC_ACCUMULATION_LEVEL' doc: >- The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible @@ -75,37 +76,37 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --INCLUDE_DUPLICATES + prefix: '--INCLUDE_DUPLICATES' doc: >- - If true, also include reads marked as duplicates in the insert size histogram. Default - value: false. This option can be set to 'null' to clear the default value. Possible - values: {true, false} + If true, also include reads marked as duplicates in the insert size + histogram. Default value: false. This option can be set to 'null' to + clear the default value. Possible values: {true, false} - id: validation_stringency type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT} - - id: assume_sorted - default: true + - default: true + id: assume_sorted type: boolean? inputBinding: position: 0 - prefix: --ASSUME_SORTED + prefix: '--ASSUME_SORTED' doc: >- - If true (default), then the sort order in the header file will be ignored. Default value: - true. This option can be set to 'null' to clear the default value. Possible values: {true, - false} + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} - id: stop_after type: int? inputBinding: position: 0 - prefix: --STOP_AFTER + prefix: '--STOP_AFTER' doc: >- Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. @@ -113,32 +114,34 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: - false. Possible values: {true, false} + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} - id: create_md5_file type: boolean? inputBinding: position: 0 - prefix: --CREATE_MD5_FILE + prefix: '--CREATE_MD5_FILE' doc: >- - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: - false. Possible values: {true, false} + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} - id: use_jdk_deflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input outputs: - id: gatk_collect_insert_size_metrics_txt type: File @@ -162,6 +165,7 @@ outputs: return inputs.input.basename.replace(/.bam/, '_histogram.pdf') } } +label: GATK-CollectInsertSizeMetrics arguments: - position: 0 prefix: '--java-options' @@ -192,7 +196,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 0 prefix: '--COMPRESSION_LEVEL' valueFrom: '2' diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 0ca74322..72b01eeb 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_merge_bam_alignment_4_1_8_0 -label: GATK-MergeBamAlignment baseCommand: - gatk - MergeBamAlignment @@ -23,15 +22,17 @@ inputs: type: File inputBinding: position: 0 - prefix: --UNMAPPED_BAM - doc: | - Original SAM or BAM file of unmapped reads, which must be in queryname order. Reads MUST + prefix: '--UNMAPPED_BAM' + doc: > + Original SAM or BAM file of unmapped reads, which must be in queryname + order. Reads MUST + be unmapped. Required. - id: reference type: File inputBinding: position: 0 - prefix: --REFERENCE_SEQUENCE + prefix: '--REFERENCE_SEQUENCE' doc: | Reference sequence file. Required. secondaryFiles: @@ -44,274 +45,389 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --ADD_MATE_CIGAR - doc: | - Adds the mate CIGAR tag (MC) if true, does not if false. Default value: true. Possible + prefix: '--ADD_MATE_CIGAR' + doc: > + Adds the mate CIGAR tag (MC) if true, does not if false. Default value: + true. Possible + values: {true, false} - id: add_pg_tag_to_reads type: boolean? inputBinding: position: 0 - prefix: --ADD_PG_TAG_TO_READS - doc: | - Add PG tag to each read in a SAM or BAM Default value: true. Possible values: {true, + prefix: '--ADD_PG_TAG_TO_READS' + doc: > + Add PG tag to each read in a SAM or BAM Default value: true. Possible + values: {true, + false} - id: aligned_bam type: - - "null" + - 'null' - type: array items: File inputBinding: - prefix: --ALIGNED_BAM + prefix: '--ALIGNED_BAM' inputBinding: position: 1 - doc: | - SAM or BAM file(s) with alignment data. This argument may be specified 0 or more times. - Default value: null. Cannot be used in conjunction with argument(s) READ1_ALIGNED_BAM + doc: > + SAM or BAM file(s) with alignment data. This argument may be specified 0 + or more times. + + Default value: null. Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM + (R1_ALIGNED) READ2_ALIGNED_BAM (R2_ALIGNED) - id: aligned_reads_only type: boolean? inputBinding: position: 0 - prefix: --ALIGNED_READS_ONLY - doc: | - Whether to output only aligned reads. Default value: false. Possible values: {true, + prefix: '--ALIGNED_READS_ONLY' + doc: > + Whether to output only aligned reads. Default value: false. Possible + values: {true, + false} - id: aligner_proper_pair_flags type: boolean? inputBinding: position: 0 - prefix: --ALIGNER_PROPER_PAIR_FLAGS - doc: | - Use the aligners idea of what a proper pair is rather than computing in this program. + prefix: '--ALIGNER_PROPER_PAIR_FLAGS' + doc: > + Use the aligners idea of what a proper pair is rather than computing in + this program. + Default value: false. Possible values: {true, false} - id: attributes_to_remove type: string? inputBinding: position: 0 - prefix: --ATTRIBUTES_TO_REMOVE - doc: | - Attributes from the alignment record that should be removed when merging. This overrides - ATTRIBUTES_TO_RETAIN if they share common tags. This argument may be specified 0 or more + prefix: '--ATTRIBUTES_TO_REMOVE' + doc: > + Attributes from the alignment record that should be removed when merging. + This overrides + + ATTRIBUTES_TO_RETAIN if they share common tags. This argument may be + specified 0 or more + times. Default value: null. - id: attributes_to_retain type: string? inputBinding: position: 0 - prefix: --ATTRIBUTES_TO_RETAIN - doc: | - Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over - from the alignment data when merging. This argument may be specified 0 or more times. + prefix: '--ATTRIBUTES_TO_RETAIN' + doc: > + Reserved alignment attributes (tags starting with X, Y, or Z) that should + be brought over + + from the alignment data when merging. This argument may be specified 0 or + more times. + Default value: null. - id: attributes_to_reverse type: string? inputBinding: position: 0 - prefix: --ATTRIBUTES_TO_REVERSE - doc: | - Attributes on negative strand reads that need to be reversed. This argument may be + prefix: '--ATTRIBUTES_TO_REVERSE' + doc: > + Attributes on negative strand reads that need to be reversed. This + argument may be + specified 0 or more times. Default value: [OQ, U2]. - id: attributes_to_reverse_complement type: string? inputBinding: position: 0 - prefix: --ATTRIBUTES_TO_REVERSE_COMPLEMENT - doc: | - Attributes on negative strand reads that need to be reverse complemented. This argument + prefix: '--ATTRIBUTES_TO_REVERSE_COMPLEMENT' + doc: > + Attributes on negative strand reads that need to be reverse complemented. + This argument + may be specified 0 or more times. Default value: [E2, SQ]. - id: clip_adapters type: boolean? inputBinding: position: 0 - prefix: --CLIP_ADAPTERS - doc: | - Whether to clip adapters where identified. Default value: true. Possible values: {true, + prefix: '--CLIP_ADAPTERS' + doc: > + Whether to clip adapters where identified. Default value: true. Possible + values: {true, + false} - id: clip_overlapping_reads type: boolean? inputBinding: position: 0 - prefix: --CLIP_OVERLAPPING_READS - doc: | - For paired reads, clip the 3' end of each read if necessary so that it does not extend - past the 5' end of its mate. Clipping will be either soft or hard clipping, depending on - CLIP_OVERLAPPING_READS_OPERATOR setting. Hard clipped bases and their qualities will be - stored in the XB and XQ tags respectively. Default value: true. Possible values: {true, + prefix: '--CLIP_OVERLAPPING_READS' + doc: > + For paired reads, clip the 3' end of each read if necessary so that it + does not extend + + past the 5' end of its mate. Clipping will be either soft or hard + clipping, depending on + + CLIP_OVERLAPPING_READS_OPERATOR setting. Hard clipped bases and their + qualities will be + + stored in the XB and XQ tags respectively. Default value: true. Possible + values: {true, + false} - id: expected_orientations type: string? inputBinding: position: 0 - prefix: --EXPECTED_ORIENTATIONS - doc: | - The expected orientation of proper read pairs. Replaces JUMP_SIZE This argument may be - specified 0 or more times. Default value: null. Possible values: {FR, RF, TANDEM} Cannot + prefix: '--EXPECTED_ORIENTATIONS' + doc: > + The expected orientation of proper read pairs. Replaces JUMP_SIZE This + argument may be + + specified 0 or more times. Default value: null. Possible values: {FR, RF, + TANDEM} Cannot + be used in conjunction with argument(s) JUMP_SIZE (JUMP) - id: hard_clip_overlapping_reads type: boolean? inputBinding: position: 0 - prefix: --HARD_CLIP_OVERLAPPING_READS - doc: | - If true, hard clipping will be applied to overlapping reads. By default, soft clipping is + prefix: '--HARD_CLIP_OVERLAPPING_READS' + doc: > + If true, hard clipping will be applied to overlapping reads. By default, + soft clipping is + used. Default value: false. Possible values: {true, false} - id: include_secondary_alignments type: boolean? inputBinding: position: 0 - prefix: --INCLUDE_SECONDARY_ALIGNMENTS - doc: | - If false, do not write secondary alignments to output. Default value: true. Possible + prefix: '--INCLUDE_SECONDARY_ALIGNMENTS' + doc: > + If false, do not write secondary alignments to output. Default value: + true. Possible + values: {true, false} - id: is_bisulfite_sequence type: boolean? inputBinding: position: 0 - prefix: --IS_BISULFITE_SEQUENCE - doc: | - Whether the lane is bisulfite sequence (used when calculating the NM tag). Default value: + prefix: '--IS_BISULFITE_SEQUENCE' + doc: > + Whether the lane is bisulfite sequence (used when calculating the NM + tag). Default value: + false. Possible values: {true, false} - id: jump_size type: int? inputBinding: position: 0 - prefix: --JUMP_SIZE - doc: | - The expected jump size (required if this is a jumping library). Deprecated. Use - EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in conjunction with + prefix: '--JUMP_SIZE' + doc: > + The expected jump size (required if this is a jumping library). + Deprecated. Use + + EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in + conjunction with + argument(s) EXPECTED_ORIENTATIONS (ORIENTATIONS) - id: matching_dictionary_tags type: string? inputBinding: position: 0 - prefix: --MATCHING_DICTIONARY_TAGS - doc: | - List of Sequence Records tags that must be equal (if present) in the reference dictionary - and in the aligned file. Mismatching tags will cause an error if in this list, and a - warning otherwise. This argument may be specified 0 or more times. Default value: [M5, + prefix: '--MATCHING_DICTIONARY_TAGS' + doc: > + List of Sequence Records tags that must be equal (if present) in the + reference dictionary + + and in the aligned file. Mismatching tags will cause an error if in this + list, and a + + warning otherwise. This argument may be specified 0 or more times. + Default value: [M5, + LN]. - id: max_insertions_or_deletions type: int? inputBinding: position: 0 - prefix: --MAX_INSERTIONS_OR_DELETIONS - doc: | - The maximum number of insertions or deletions permitted for an alignment to be included. - Alignments with more than this many insertions or deletions will be ignored. Set to -1 to + prefix: '--MAX_INSERTIONS_OR_DELETIONS' + doc: > + The maximum number of insertions or deletions permitted for an alignment + to be included. + + Alignments with more than this many insertions or deletions will be + ignored. Set to -1 to + allow any number of insertions or deletions. Default value: 1. - id: min_unclipped_bases type: int? inputBinding: position: 0 - prefix: --MIN_UNCLIPPED_BASES - doc: | - If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or else the read will + prefix: '--MIN_UNCLIPPED_BASES' + doc: > + If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or + else the read will + be marked as contaminant. Default value: 32. - id: paired_run type: boolean? inputBinding: position: 0 - prefix: --PAIRED_RUN - doc: | - DEPRECATED. This argument is ignored and will be removed. Default value: true. Possible + prefix: '--PAIRED_RUN' + doc: > + DEPRECATED. This argument is ignored and will be removed. Default value: + true. Possible + values: {true, false} - id: primary_alignment_strategy type: string? inputBinding: position: 0 - prefix: --PRIMARY_ALIGNMENT_STRATEGY - doc: | - Strategy for selecting primary alignment when the aligner has provided more than one - alignment for a pair or fragment, and none are marked as primary, more than one is marked - as primary, or the primary alignment is filtered out for some reason. For all strategies, - ties are resolved arbitrarily. Default value: BestMapq. BestMapq (Expects that multiple - alignments will be correlated with HI tag, and prefers the pair of alignments with the + prefix: '--PRIMARY_ALIGNMENT_STRATEGY' + doc: > + Strategy for selecting primary alignment when the aligner has provided + more than one + + alignment for a pair or fragment, and none are marked as primary, more + than one is marked + + as primary, or the primary alignment is filtered out for some reason. For + all strategies, + + ties are resolved arbitrarily. Default value: BestMapq. BestMapq (Expects + that multiple + + alignments will be correlated with HI tag, and prefers the pair of + alignments with the + largest MAPQ, in the absence of a primary selected by the aligner.) - EarliestFragment (Prefers the alignment which maps the earliest base in the read. Note + + EarliestFragment (Prefers the alignment which maps the earliest base in + the read. Note + that EarliestFragment may not be used for paired reads.) - BestEndMapq (Appropriate for cases in which the aligner is not pair-aware, and does not - output the HI tag. It simply picks the alignment for each end with the highest MAPQ, and - makes those alignments primary, regardless of whether the two alignments make sense + + BestEndMapq (Appropriate for cases in which the aligner is not pair-aware, + and does not + + output the HI tag. It simply picks the alignment for each end with the + highest MAPQ, and + + makes those alignments primary, regardless of whether the two alignments + make sense + together.) - MostDistant (Appropriate for a non-pair-aware aligner. Picks the alignment pair with the - largest insert size. If all alignments would be chimeric, it picks the alignments for each + + MostDistant (Appropriate for a non-pair-aware aligner. Picks the alignment + pair with the + + largest insert size. If all alignments would be chimeric, it picks the + alignments for each + end with the best MAPQ.) - id: read1_aligned_bam type: - - "null" + - 'null' - type: array items: File inputBinding: - prefix: --READ1_ALIGNED_BAM + prefix: '--READ1_ALIGNED_BAM' inputBinding: position: 1 - doc: | - SAM or BAM file(s) with alignment data from the first read of a pair. This argument may - be specified 0 or more times. Default value: null. Cannot be used in conjunction with + doc: > + SAM or BAM file(s) with alignment data from the first read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + argument(s) ALIGNED_BAM (ALIGNED) - id: read1_trim type: int? inputBinding: position: 0 - prefix: --READ1_TRIM - doc: | - The number of bases trimmed from the beginning of read 1 prior to alignment Default + prefix: '--READ1_TRIM' + doc: > + The number of bases trimmed from the beginning of read 1 prior to + alignment Default + value: 0. - id: read2_aligned_bam type: - - "null" + - 'null' - type: array items: File inputBinding: - prefix: --READ2_ALIGNED_BAM + prefix: '--READ2_ALIGNED_BAM' inputBinding: position: 1 - doc: | - SAM or BAM file(s) with alignment data from the second read of a pair. This argument may - be specified 0 or more times. Default value: null. Cannot be used in conjunction with + doc: > + SAM or BAM file(s) with alignment data from the second read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + argument(s) ALIGNED_BAM (ALIGNED) - id: read2_trim type: int? inputBinding: position: 0 - prefix: --READ2_TRIM - doc: | - The number of bases trimmed from the beginning of read 2 prior to alignment Default + prefix: '--READ2_TRIM' + doc: > + The number of bases trimmed from the beginning of read 2 prior to + alignment Default + value: 0. - id: sort_order type: string? inputBinding: position: 1 - prefix: --SORT_ORDER - doc: | - The order in which the merged reads should be output. Default value: coordinate. Possible + prefix: '--SORT_ORDER' + doc: > + The order in which the merged reads should be output. Default value: + coordinate. Possible + values: {unsorted, queryname, coordinate, duplicate, unknown} - id: unmap_contaminant_reads type: boolean? inputBinding: position: 1 - prefix: --UNMAP_CONTAMINANT_READS - doc: | - Detect reads originating from foreign organisms (e.g. bacterial DNA in a non-bacterial - sample),and unmap + label those reads accordingly. Default value: false. Possible values: + prefix: '--UNMAP_CONTAMINANT_READS' + doc: > + Detect reads originating from foreign organisms (e.g. bacterial DNA in a + non-bacterial + + sample),and unmap + label those reads accordingly. Default value: false. + Possible values: + {true, false} - id: unmapped_read_strategy type: string? inputBinding: position: 1 - prefix: --UNMAPPED_READ_STRATEGY - doc: | - How to deal with alignment information in reads that are being unmapped (e.g. due to - cross-species contamination.) Currently ignored unless UNMAP_CONTAMINANT_READS = true. - Note that the DO_NOT_CHANGE strategy will actually reset the cigar and set the mapping - quality on unmapped reads since otherwisethe result will be an invalid record. To force no - change use the DO_NOT_CHANGE_INVALID strategy. Default value: DO_NOT_CHANGE. Possible + prefix: '--UNMAPPED_READ_STRATEGY' + doc: > + How to deal with alignment information in reads that are being unmapped + (e.g. due to + + cross-species contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. + + Note that the DO_NOT_CHANGE strategy will actually reset the cigar and set + the mapping + + quality on unmapped reads since otherwisethe result will be an invalid + record. To force no + + change use the DO_NOT_CHANGE_INVALID strategy. Default value: + DO_NOT_CHANGE. Possible + values: {COPY_TO_TAG, DO_NOT_CHANGE, DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} - id: validation_stringency type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -322,32 +438,34 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: - false. Possible values: {true, false} + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} - id: create_md5_file type: boolean? inputBinding: position: 0 - prefix: --CREATE_MD5_FILE + prefix: '--CREATE_MD5_FILE' doc: >- - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: - false. Possible values: {true, false} + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} - id: use_jdk_deflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input outputs: - id: gatk_merge_bam_alignment_bam type: File @@ -360,6 +478,7 @@ outputs: return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') } } +label: GATK-MergeBamAlignment arguments: - position: 0 prefix: '--java-options' @@ -400,7 +519,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 0 prefix: '--COMPRESSION_LEVEL' valueFrom: '2' diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index d0551a31..53001cfd 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -6,7 +6,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: gatk_merge_sam_files_4_1_8_0 -label: GATK-MergeSamFiles baseCommand: - gatk - MergeSamFiles @@ -24,11 +23,12 @@ inputs: type: array items: File inputBinding: - prefix: -I + prefix: '-I' inputBinding: position: 1 - doc: | - SAM or BAM input file This argument must be specified at least once. Required. + doc: > + SAM or BAM input file This argument must be specified at least once. + Required. - id: output_file_name type: string? doc: SAM or BAM file to write merged result to Required. @@ -36,108 +36,142 @@ inputs: type: boolean? inputBinding: position: 1 - prefix: --ASSUME_SORTED - doc: | - If true, assume that the input files are in the same sort order as the requested output - sort order, even if their headers say otherwise. Default value: false. Possible values: + prefix: '--ASSUME_SORTED' + doc: > + If true, assume that the input files are in the same sort order as the + requested output + + sort order, even if their headers say otherwise. Default value: false. + Possible values: + {true, false} - id: comment type: string? inputBinding: position: 1 - prefix: --COMMENT - doc: | - Comment(s) to include in the merged output files header. This argument may be specified + prefix: '--COMMENT' + doc: > + Comment(s) to include in the merged output files header. This argument + may be specified + 0 or more times. Default value: null. - id: create_index type: boolean? inputBinding: position: 1 - prefix: --CREATE_INDEX - doc: | - Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: + prefix: '--CREATE_INDEX' + doc: > + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: + false. Possible values: {true, false} - id: create_md5_file type: boolean? inputBinding: position: 1 - prefix: --CREATE_MD5_FILE - doc: | - Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: + prefix: '--CREATE_MD5_FILE' + doc: > + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: + false. Possible values: {true, false} - id: intervals type: File? inputBinding: position: 1 - prefix: --INTERVALS - doc: | - An interval list file that contains the locations of the positions to merge. Assume bam - are sorted and indexed. The resulting file will contain alignments that may overlap with - genomic regions outside the requested region. Unmapped reads are discarded. Default + prefix: '--INTERVALS' + doc: > + An interval list file that contains the locations of the positions to + merge. Assume bam + + are sorted and indexed. The resulting file will contain alignments that + may overlap with + + genomic regions outside the requested region. Unmapped reads are + discarded. Default + value: null. - id: merge_sequence_dictionaries type: boolean? inputBinding: position: 1 - prefix: --MERGE_SEQUENCE_DICTIONARIES - doc: | - Merge the sequence dictionaries Default value: false. Possible values: {true, false} + prefix: '--MERGE_SEQUENCE_DICTIONARIES' + doc: > + Merge the sequence dictionaries Default value: false. Possible values: + {true, false} - id: reference_sequence type: File? inputBinding: position: 1 - prefix: --REFERENCE_SEQUENCE + prefix: '--REFERENCE_SEQUENCE' doc: | Reference sequence file. Default value: null. - id: sort_order type: string? inputBinding: position: 1 - prefix: --SORT_ORDER - doc: | - Sort order of output file Default value: coordinate. Possible values: {unsorted, + prefix: '--SORT_ORDER' + doc: > + Sort order of output file Default value: coordinate. Possible values: + {unsorted, + queryname, coordinate, duplicate, unknown} - id: use_threading type: boolean? inputBinding: position: 1 - prefix: --USE_THREADING - doc: | - Option to create a background thread to encode, compress and write to disk the output - file. The threaded version uses about 20% more CPU and decreases runtime by ~20% when - writing out a compressed BAM file. Default value: false. Possible values: {true, false} + prefix: '--USE_THREADING' + doc: > + Option to create a background thread to encode, compress and write to disk + the output + + file. The threaded version uses about 20% more CPU and decreases runtime + by ~20% when + + writing out a compressed BAM file. Default value: false. Possible values: + {true, false} - id: validation_stringency type: string? inputBinding: position: 1 - prefix: --VALIDATION_STRINGENCY - doc: | - Validation stringency for all SAM files read by this program. Setting stringency to - SILENT can improve performance when processing a BAM file in which variable-length data - (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. + prefix: '--VALIDATION_STRINGENCY' + doc: > + Validation stringency for all SAM files read by this program. Setting + stringency to + + SILENT can improve performance when processing a BAM file in which + variable-length data + + (read, qualities, tags) do not otherwise need to be decoded. Default + value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} - id: verbosity type: string? inputBinding: position: 1 - prefix: --VERBOSITY - doc: | - Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, + prefix: '--VERBOSITY' + doc: > + Control verbosity of logging. Default value: INFO. Possible values: + {ERROR, WARNING, + INFO, DEBUG} - id: use_jdk_deflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input outputs: - id: gatk_merge_sam_files_bam type: File @@ -150,6 +184,7 @@ outputs: return 'merged.bam' } } +label: GATK-MergeSamFiles arguments: - position: 0 prefix: '--java-options' @@ -180,7 +215,7 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: . + valueFrom: $(runtime.tmpdir) - position: 0 prefix: '--COMPRESSION_LEVEL' valueFrom: '2' From 4bb12d4d52eedddb0c7bd0395c0ad1d1f2df0be5 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 11:06:31 -0400 Subject: [PATCH 237/476] Update picard_add_or_replace_read_groups_4.1.8.1.cwl --- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 51 ++++++++++--------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 4357a544..fd0d0ec5 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -21,7 +21,7 @@ inputs: type: File inputBinding: position: 0 - prefix: -I + prefix: '-I' doc: Input file ( sam). Required. - id: output_file_name type: string? @@ -30,7 +30,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: -SO + prefix: '-SO' doc: >- Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, @@ -39,7 +39,7 @@ inputs: type: string inputBinding: position: 0 - prefix: --RGID + prefix: '--RGID' doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required @@ -47,49 +47,49 @@ inputs: type: string inputBinding: position: 0 - prefix: --RGCN + prefix: '--RGCN' doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library type: string inputBinding: position: 0 - prefix: --RGLB + prefix: '--RGLB' doc: Read Group Library. Required - id: read_group_platform_unit type: string inputBinding: position: 0 - prefix: --RGPU + prefix: '--RGPU' doc: Read Group platform unit (eg. run barcode) Required. - id: read_group_sample_name type: string inputBinding: position: 0 - prefix: --RGSM + prefix: '--RGSM' doc: Read Group sample name. Required - id: read_group_sequencing_platform type: string inputBinding: position: 0 - prefix: --RGPL + prefix: '--RGPL' doc: 'Read Group platform (e.g. illumina, solid) Required.' - id: read_group_description type: string? inputBinding: position: 0 - prefix: --RGDS + prefix: '--RGDS' doc: 'Read Group description Default value: null.' - id: read_group_run_date type: string? inputBinding: position: 0 - prefix: --RGDT + prefix: '--RGDT' doc: 'Read Group run date Default value: null.' - id: validation_stringency type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -100,7 +100,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: --COMPRESSION_LEVEL + prefix: '--COMPRESSION_LEVEL' doc: >- Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default @@ -109,22 +109,24 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input - default: true id: create_bam_index type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default @@ -140,7 +142,7 @@ outputs: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } secondaryFiles: - - ^.bai + - ^.bai? label: picard_add_or_replace_read_groups_4.1.8.1 arguments: - position: 0 @@ -170,21 +172,21 @@ arguments: } } - position: 0 - valueFrom: "-XX:-UseGCOverheadLimit" shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 - valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" shellQuote: false + valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' - position: 0 prefix: '-jar' valueFrom: /gatk/gatk-package-4.1.8.1-local.jar - position: 0 valueFrom: AddOrReplaceReadGroups - position: 0 - prefix: --TMP_DIR - valueFrom: "$(runtime.tmpdir)" + prefix: '--TMP_DIR' + valueFrom: $(runtime.tmpdir) - position: 0 - prefix: -O + prefix: '-O' valueFrom: |- ${ if(inputs.output_file_name) @@ -192,8 +194,9 @@ arguments: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 25000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.1' From b76943d28a779a199c8873c7345629c536ae1668 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 11:29:25 -0400 Subject: [PATCH 238/476] Update abra2_2.22.cwl --- abra2_2.22/abra2_2.22.cwl | 89 ++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 8575e4ae..3367e5f3 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -149,8 +149,7 @@ inputs: inputBinding: position: 0 prefix: '--no-edge-ci' - doc: >- - Prevent output of complex indels at read start or read end + doc: Prevent output of complex indels at read start or read end - id: no_sort type: boolean? inputBinding: @@ -165,65 +164,57 @@ outputs: - type: array items: File outputBinding: - glob: '*abra.bam' + glob: |- + ${ + return inputs.output_bams + } secondaryFiles: - ^.bai label: abra2_2.22 arguments: - position: 0 - valueFrom: "${ - if(inputs.memory_per_job && inputs.memory_overhead) { - if(inputs.memory_per_job % 1000 == 0) { - return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" - } - else { - return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" - } - } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - if(inputs.memory_per_job % 1000 == 0) { - return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" - } - else { - return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + + if(inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + + if(inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + + return "-Xmx15G" + } + else { + + return "-Xmx15G" + } } - } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - return \"-Xmx15G\" - } - else { - return \"-Xmx15G\" - } - }" - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/abra2.jar requirements: - class: ResourceRequirement - ramMin: "${ - if(inputs.memory_per_job && inputs.memory_overhead) { - return inputs.memory_per_job + inputs.memory_overhead - } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - return inputs.memory_per_job + 2000 - } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - return 15000 + inputs.memory_overhead - } - else { - return 17000 - } - }" - coresMin: "${ - if (inputs.number_of_threads) { - return inputs.number_of_threads - } - else { - return 4 - } - }" + ramMin: 60000 + coresMin: 16 - class: DockerRequirement - dockerPull: mskaccess/abra2:2.22 + dockerPull: 'mskaccess/abra2:2.22' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 6a8d8e1fd0ee2f3b6fb1147acd0ce83ce334d7a1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 11:31:38 -0400 Subject: [PATCH 239/476] Update bedtools_merge_v2.28.0_cv2.cwl --- bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index 806f21b2..ffd7c792 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -34,7 +34,7 @@ inputs: doc: Maximum distance between features allowed for features to be merged. outputs: - id: bedtools_merge_bed - type: File? + type: File outputBinding: glob: |- ${ From 13b42322c00ccf4349b1ba6529572806d37570e1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 11:38:04 -0400 Subject: [PATCH 240/476] Update abra2_2.22.cwl --- abra2_2.22/abra2_2.22.cwl | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 3367e5f3..8bb44d90 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -169,43 +169,43 @@ outputs: return inputs.output_bams } secondaryFiles: - - ^.bai + - ^.bai? label: abra2_2.22 arguments: - position: 0 valueFrom: |- ${ - if(inputs.memory_per_job && inputs.memory_overhead) { - - if(inputs.memory_per_job % 1000 == 0) { - - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } + if (inputs.memory_per_job && inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" } - else if (inputs.memory_per_job && !inputs.memory_overhead){ - - if(inputs.memory_per_job % 1000 == 0) { - - return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" - } - else { - - return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" - } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" } - else if(!inputs.memory_per_job && inputs.memory_overhead){ - - return "-Xmx15G" + } + else if (inputs.memory_per_job && !inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" } else { - - return "-Xmx15G" + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" } } + else if (!inputs.memory_per_job && inputs.memory_overhead) { + + return "-Xmx15G" + } + else { + + return "-Xmx15G" + } + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/abra2.jar From e471d593346ecb69c310701a7c79f4e03a15066b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 12:25:17 -0400 Subject: [PATCH 241/476] Update gatk_apply_bqsr_4.1.8.1.cwl --- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index a25ddb4c..53ac4a3e 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -235,7 +235,7 @@ inputs: type: int? outputs: - id: gatk_apply_bqsr_bam - type: File? + type: File outputBinding: glob: |- ${ @@ -246,7 +246,7 @@ outputs: } } secondaryFiles: - - ^.bai + - ^.bai? label: gatk_apply_bqsr_4.1.8.1 arguments: - position: 0 From 6b6127019bc78d606dc76d4a3bfc4087a3478c44 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 12:36:15 -0400 Subject: [PATCH 242/476] Update gatk_sam_to_fastq_4.1.8.0.cwl --- gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index 59880b1c..a5b47580 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -251,7 +251,7 @@ inputs: SILENT} outputs: - id: gatk_sam_to_fastq_fastq - type: File? + type: File outputBinding: glob: |- ${ From bcc0fee9a444e2072e0cb389d894d62650291673 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 13:00:54 -0400 Subject: [PATCH 243/476] Removing Null from read_filter --- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 2 -- .../gatk_base_recalibrator_4.1.8.1.cwl | 2 -- 2 files changed, 4 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 53ac4a3e..ac141e85 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -89,7 +89,6 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - - 'null' - type: array items: string inputBinding: @@ -180,7 +179,6 @@ inputs: prefix: '--QUIET' - id: read_filter type: - - 'null' - type: array items: string inputBinding: diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index ec5caec6..e204c8b8 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -117,7 +117,6 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - - 'null' - type: array items: string inputBinding: @@ -227,7 +226,6 @@ inputs: prefix: '--QUIET' - id: read_filter type: - - 'null' - type: array items: string inputBinding: From 14163d2bd6be565e0090b20340637f8eef9a4077 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 13:15:49 -0400 Subject: [PATCH 244/476] Removing additional indentation --- .../gatk_apply_bqsr_4.1.8.1.cwl | 16 ++++++++-------- .../gatk_base_recalibrator_4.1.8.1.cwl | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index ac141e85..36590276 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -89,10 +89,10 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - - type: array - items: string - inputBinding: - prefix: '--disable-read-filter' + type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 6 doc: Read filters to be disabled before analysis @@ -179,10 +179,10 @@ inputs: prefix: '--QUIET' - id: read_filter type: - - type: array - items: string - inputBinding: - prefix: '--read-filter' + type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 6 - id: read_index diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index e204c8b8..8ed59689 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -117,10 +117,10 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - - type: array - items: string - inputBinding: - prefix: '--disable-read-filter' + type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 10 doc: Read filters to be disabled before analysis @@ -226,10 +226,10 @@ inputs: prefix: '--QUIET' - id: read_filter type: - - type: array - items: string - inputBinding: - prefix: '--read-filter' + type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 10 - id: read_index From b823dc1a99dc1edde9561f4ab1ef2de4b21adef3 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 13:57:42 -0400 Subject: [PATCH 245/476] making it flexible --- .../gatk_apply_bqsr_4.1.8.1.cwl | 18 ++++++++++-------- .../gatk_base_recalibrator_4.1.8.1.cwl | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 36590276..53ac4a3e 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -89,10 +89,11 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - type: array - items: string - inputBinding: - prefix: '--disable-read-filter' + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 6 doc: Read filters to be disabled before analysis @@ -179,10 +180,11 @@ inputs: prefix: '--QUIET' - id: read_filter type: - type: array - items: string - inputBinding: - prefix: '--read-filter' + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 6 - id: read_index diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index 8ed59689..ec5caec6 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -117,10 +117,11 @@ inputs: prefix: '--disable-bam-index-caching' - id: disable_read_filter type: - type: array - items: string - inputBinding: - prefix: '--disable-read-filter' + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' inputBinding: position: 10 doc: Read filters to be disabled before analysis @@ -226,10 +227,11 @@ inputs: prefix: '--QUIET' - id: read_filter type: - type: array - items: string - inputBinding: - prefix: '--read-filter' + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 10 - id: read_index From 5ebfaaeda354a55ab8fd71512ae4dc15e66bcd5d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 15:50:51 -0400 Subject: [PATCH 246/476] Update picard_add_or_replace_read_groups_4.1.8.1.cwl --- .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index fd0d0ec5..4360cd1e 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -142,7 +142,8 @@ outputs: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } secondaryFiles: - - ^.bai? + - pattern: ^.bai + required: false label: picard_add_or_replace_read_groups_4.1.8.1 arguments: - position: 0 From 2ad314dcdd4ee616b79acb01415da1a6d81f2ab1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 15:58:24 -0400 Subject: [PATCH 247/476] Update picard_add_or_replace_read_groups_4.1.8.1.cwl --- .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 4360cd1e..dcfc1984 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -142,8 +142,7 @@ outputs: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } secondaryFiles: - - pattern: ^.bai - required: false + - "^.bai?" label: picard_add_or_replace_read_groups_4.1.8.1 arguments: - position: 0 From 36eedfc7877f1400d486a624b43fcece2335131c Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 8 Oct 2020 16:08:01 -0400 Subject: [PATCH 248/476] Removing `?` as it not in 1.0 specs --- abra2_2.22/abra2_2.22.cwl | 2 +- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 2 +- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 2 ++ .../picard_add_or_replace_read_groups_4.1.8.1.cwl | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 8bb44d90..57f12d3c 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -169,7 +169,7 @@ outputs: return inputs.output_bams } secondaryFiles: - - ^.bai? + - ^.bai label: abra2_2.22 arguments: - position: 0 diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 53ac4a3e..569ce5c3 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -246,7 +246,7 @@ outputs: } } secondaryFiles: - - ^.bai? + - ^.bai label: gatk_apply_bqsr_4.1.8.1 arguments: - position: 0 diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 72b01eeb..ec80e647 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -478,6 +478,8 @@ outputs: return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') } } + secondaryFiles: + - ^.bai label: GATK-MergeBamAlignment arguments: - position: 0 diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index dcfc1984..f2a4d7dd 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -142,7 +142,7 @@ outputs: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } secondaryFiles: - - "^.bai?" + - ^.bai label: picard_add_or_replace_read_groups_4.1.8.1 arguments: - position: 0 From 4bada118d4f3f3590297e6a17f9c65b243b989ea Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:03:25 -0400 Subject: [PATCH 249/476] Update fgbio_call_duplex_consensus_reads_1.2.0.cwl --- .../fgbio_call_duplex_consensus_reads_1.2.0.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index 90cc1fc5..f17d0c74 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -131,7 +131,6 @@ doc: >- label: fgbio_call_duplex_consensus_reads_1.2.0 arguments: - position: 0 - prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { From 041246de3db95f983801cde3a913af56daaa2c24 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:09:01 -0400 Subject: [PATCH 250/476] Update fgbio_call_duplex_consensus_reads_1.2.0.cwl --- .../fgbio_call_duplex_consensus_reads_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index f17d0c74..ddf61251 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -71,7 +71,7 @@ inputs: type: string? inputBinding: position: 0 - valueFrom: '--sort-order' + prefix: '--sort-order' doc: 'The sort order of the output, if :none: then the same as the input.' - id: min_reads type: 'int[]' From f787127e66fc4dab318d53c91cf35836b85620ed Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:22:16 -0400 Subject: [PATCH 251/476] Update fgbio_call_duplex_consensus_reads_1.2.0.cwl --- .../fgbio_call_duplex_consensus_reads_1.2.0.cwl | 1 + 1 file changed, 1 insertion(+) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index ddf61251..c03469b0 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -79,6 +79,7 @@ inputs: position: 0 prefix: '--min-reads' itemSeparator: ' ' + shellQuote: false doc: The minimum number of input reads to a consensus read. - id: max_reads_per_strand type: int? From da24bc1c177993131f68d37afd91a1887296a819 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:41:18 -0400 Subject: [PATCH 252/476] Update fgbio_call_duplex_consensus_reads_1.2.0.cwl --- .../fgbio_call_duplex_consensus_reads_1.2.0.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index c03469b0..ece82c14 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -165,7 +165,6 @@ arguments: shellQuote: false valueFrom: '${ return runtime.tmpdir}' - position: 0 - prefix: '' valueFrom: CallDuplexConsensusReads - position: 0 prefix: '--output' From 05157f1912677ba2830f4f47f99c8f840f4f2707 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:48:07 -0400 Subject: [PATCH 253/476] Update fgbio_group_reads_by_umi_1.2.0.cwl --- .../fgbio_group_reads_by_umi_1.2.0.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index a51475e9..d83da090 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -171,7 +171,6 @@ doc: >- label: fgbio_group_reads_by_umi_1.2.0 arguments: - position: 0 - prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { From bcd496de5866eb9ad7c9d5fc8a6845e459e6670e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:50:37 -0400 Subject: [PATCH 254/476] Update fgbio_group_reads_by_umi_1.2.0.cwl --- .../fgbio_group_reads_by_umi_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index d83da090..6053267f 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -88,7 +88,7 @@ outputs: return inputs.input.basename.replace(/.bam/,'_group.bam'); } - id: fgbio_group_reads_by_umi_histogram - type: File + type: File? outputBinding: glob: |- ${ From 589b12262594ce977b445ccdb5648d988fa91f9d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 11:59:54 -0400 Subject: [PATCH 255/476] Update fgbio_collect_duplex_seq_metrics_1.2.0.cwl --- .../fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index d69075fe..a1f61681 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -103,14 +103,14 @@ outputs: return inputs.output_prefix + '.umi_counts.txt' } - id: fgbio_collect_duplex_seq_metrics_duplex_qc - type: File + type: File? outputBinding: glob: |- ${ return inputs.output_prefix + '.duplex_qc.pdf' } - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts - type: File + type: File? outputBinding: glob: |- ${ From f471fe41e9cf98392f1d56ad406d24a66ece7f70 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 12:02:36 -0400 Subject: [PATCH 256/476] Update fgbio_collect_duplex_seq_metrics_1.2.0.cwl --- ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 87 +++++++++++-------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index a1f61681..7fd2e589 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -34,45 +34,47 @@ inputs: inputBinding: position: 0 prefix: '--intervals' - doc: Optional set of intervals over which to restrict analysis. [Optional]. + doc: 'Optional set of intervals over which to restrict analysis. [Optional].' - id: description type: string? inputBinding: position: 0 prefix: '--description' - doc: Description of data set used to label plots. Defaults to sample/library. [Optional]. + doc: >- + Description of data set used to label plots. Defaults to sample/library. + [Optional]. - id: duplex_umi_counts - type: string? + type: boolean? inputBinding: position: 0 prefix: '--duplex-umi-counts' - doc: If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI observations. [Optional]. + doc: >- + If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI + observations. [Optional]. - id: min_ab_reads type: int? inputBinding: position: 0 prefix: '--min-ab-reads' - doc: Minimum AB reads to call a tag family a 'duplex'. [Optional]. + doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].' - id: min_ba_reads type: int? inputBinding: position: 0 prefix: '--min-ba-reads' - doc: >- - Minimum BA reads to call a tag family a 'duplex'. [Optional]. + doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].' - id: umi_tag type: string? inputBinding: position: 0 prefix: '--umi-tag' - doc: >- - The tag containing the raw UMI. [Optional]. + doc: 'The tag containing the raw UMI. [Optional].' - id: mi_tag type: string? inputBinding: position: 0 prefix: '--mi-tag' - doc: The output tag for UMI grouping. [Optional]. + doc: 'The output tag for UMI grouping. [Optional].' outputs: - id: fgbio_collect_duplex_seq_metrics_family_size type: File @@ -119,55 +121,63 @@ outputs: doc: >- Collects a suite of metrics to QC duplex sequencing data. - Inputs - ------ + Inputs ------ The input to this tool must be a BAM file that is either: - 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it was produced in) - 2. A BAM file that has MI tags present on all reads (usually set by 'GroupReadsByUmi' and has been sorted with + 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it + was produced in) 2. A BAM file that has MI tags present on all reads (usually + set by 'GroupReadsByUmi' and has been sorted with 'SortBam' into 'TemplateCoordinate' order. - Calculation of metrics may be restricted to a set of regions using the '--intervals' parameter. This can significantly - affect results as off-target reads in duplex sequencing experiments often have very different properties than on-target - reads due to the lack of enrichment. + Calculation of metrics may be restricted to a set of regions using the + '--intervals' parameter. This can significantly affect results as off-target + reads in duplex sequencing experiments often have very different properties + than on-target reads due to the lack of enrichment. - Several metrics are calculated related to the fraction of tag families that have duplex coverage. The definition of - "duplex" is controlled by the '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any tag family - with at least one observation of each strand as a duplex, but this could be made more stringent, e.g. by setting - '--min-ab-reads=3 --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must be the higher value. + Several metrics are calculated related to the fraction of tag families that + have duplex coverage. The definition of "duplex" is controlled by the + '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any + tag family with at least one observation of each strand as a duplex, but this + could be made more stringent, e.g. by setting '--min-ab-reads=3 + --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must + be the higher value. - Outputs - ------- + Outputs ------- The following output files are produced: - 1. .family_sizes.txt: metrics on the frequency of different types of families of different sizes - 2. .duplex_family_sizes.txt: metrics on the frequency of duplex tag families by the number of observations + 1. .family_sizes.txt: metrics on the frequency of different types of + families of different sizes 2. .duplex_family_sizes.txt: metrics on + the frequency of duplex tag families by the number of observations from each strand - 3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, 10%, 15%...100% of the data - 4. .umi_counts.txt: metrics on the frequency of observations of UMIs within reads and tag families - 5. .duplex_qc.pdf: a series of plots generated from the preceding metrics files for visualization - 6. .duplex_umi_counts.txt: (optional) metrics on the frequency of observations of duplex UMIs within reads + 3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, + 10%, 15%...100% of the data 4. .umi_counts.txt: metrics on the + frequency of observations of UMIs within reads and tag families 5. + .duplex_qc.pdf: a series of plots generated from the preceding metrics + files for visualization 6. .duplex_umi_counts.txt: (optional) metrics + on the frequency of observations of duplex UMIs within reads and tag families. This file is only produced if the '--duplex-umi-counts' option is used as it requires significantly more memory to track all pairs of UMIs seen when a large number of UMI sequences are present. Within the metrics files the prefixes 'CS', 'SS' and 'DS' are used to mean: - * CS: tag families where membership is defined solely on matching genome coordinates and strand - * SS: single-stranded tag families where membership is defined by genome coordinates, strand and UMI; ie. 50/A and + * CS: tag families where membership is defined solely on matching genome + coordinates and strand * SS: single-stranded tag families where membership is + defined by genome coordinates, strand and UMI; ie. 50/A and 50/B are considered different tag families. - * DS: double-stranded tag families where membership is collapsed across single-stranded tag families from the same + * DS: double-stranded tag families where membership is collapsed across + single-stranded tag families from the same double-stranded source molecule; i.e. 50/A and 50/B become one family - Requirements - ------------ - - For plots to be generated R must be installed and the ggplot2 package installed with suggested dependencies. - Successfully executing the following in R will ensure a working installation: + Requirements ------------ - install.packages("ggplot2", repos="http://cran.us.r-project.org", dependencies=TRUE) + For plots to be generated R must be installed and the ggplot2 package + installed with suggested dependencies. Successfully executing the following in + R will ensure a working installation: + install.packages("ggplot2", repos="http://cran.us.r-project.org", + dependencies=TRUE) label: fgbio_collect_duplex_seq_metrics_1.2.0 arguments: - position: 0 @@ -206,6 +216,7 @@ arguments: - position: 0 valueFrom: CollectDuplexSeqMetrics requirements: + - class: ShellCommandRequirement - class: ResourceRequirement ramMin: 1000 coresMin: 4 From 3f25268b151fb281f2d0cdaf35d49d0cdf541be6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 12:34:13 -0400 Subject: [PATCH 257/476] Update fgbio_filter_consensus_reads_1.2.0.cwl --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 47fce2ac..b7c36bde 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -34,7 +34,7 @@ inputs: prefix: '--ref' doc: Reference fasta file. secondaryFiles: - - ^.fai + - .fai - ^.dict - id: reverse_per_base_tags type: boolean? @@ -48,6 +48,7 @@ inputs: position: 0 prefix: '--min-reads' itemSeparator: ' ' + shellQuote: false doc: >- The minimum number of reads supporting a consensus base/read. (Max 3 values) From 4446e9adc396bbe42b13469919651a9bf6da6a11 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 12:56:31 -0400 Subject: [PATCH 258/476] Update fgbio_filter_consensus_reads_1.2.0.cwl --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 -- 1 file changed, 2 deletions(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index b7c36bde..cc51c60e 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -152,7 +152,6 @@ doc: >- label: fgbio_filter_consensus_reads_1.2.0 arguments: - position: 0 - prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { @@ -186,7 +185,6 @@ arguments: shellQuote: false valueFrom: '${ return runtime.tmpdir}' - position: 0 - prefix: '' valueFrom: FilterConsensusReads - position: 0 prefix: '--output' From ac506fd09295b0dadb2567a2972979a112bf28d2 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 13:01:20 -0400 Subject: [PATCH 259/476] Update fgbio_filter_consensus_reads_1.2.0.cwl --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index cc51c60e..1cddba03 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -195,14 +195,6 @@ arguments: return inputs.output_file_name; return inputs.input.basename.replace(/.bam/,'_filtered.bam'); } - - position: 0 - prefix: '--threads' - valueFrom: |- - ${ - if(inputs.number_of_threads) - return inputs.number_of_threads - return runtime.cores - } requirements: - class: ShellCommandRequirement - class: ResourceRequirement From d9b76e9c2ef5c782bd3a19210dbcd83167b319c3 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 13:05:56 -0400 Subject: [PATCH 260/476] Update fgbio_filter_consensus_reads_1.2.0.cwl --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 1cddba03..82b8c255 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -81,7 +81,7 @@ inputs: prefix: '--max-no-call-fraction' doc: Maximum fraction of no-calls in the read after filtering - id: min_mean_base_quality - type: int? + type: int inputBinding: position: 0 prefix: '--min-mean-base-quality' From d5faf93f28c8988293fe790a699dfe05a993dbf3 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 13:24:17 -0400 Subject: [PATCH 261/476] Update fgbio_filter_consensus_reads_1.2.0.cwl --- .../fgbio_filter_consensus_reads_1.2.0.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 82b8c255..b857667e 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -69,7 +69,7 @@ inputs: itemSeparator: ' ' doc: The maximum error rate for a single consensus base. (Max 3 values) - id: min_base_quality - type: int? + type: int inputBinding: position: 0 prefix: '--min-base-quality' @@ -81,7 +81,7 @@ inputs: prefix: '--max-no-call-fraction' doc: Maximum fraction of no-calls in the read after filtering - id: min_mean_base_quality - type: int + type: int? inputBinding: position: 0 prefix: '--min-mean-base-quality' From 562c9ec9134c7ca63fbf9d094aeb2c07bf8c3331 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 16 Oct 2020 13:57:30 -0400 Subject: [PATCH 262/476] Update fgbio_collect_duplex_seq_metrics_1.2.0.cwl --- ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 7fd2e589..f1959ad5 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -24,10 +24,7 @@ inputs: prefix: '--input' doc: Input BAM file generated by GroupReadByUmi. - id: output_prefix - type: string - inputBinding: - position: 0 - prefix: '--output' + type: string? doc: Prefix of output files to write. - id: intervals type: File? @@ -81,42 +78,74 @@ outputs: outputBinding: glob: |- ${ - return inputs.output_prefix + '.family_sizes.txt' + if(inputs.output_prefix){ + return inputs.output_prefix + '.family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.family_sizes.txt') + } } - id: fgbio_collect_duplex_seq_metrics_duplex_family_size type: File outputBinding: glob: |- ${ - return inputs.output_prefix + '.duplex_family_sizes.txt' + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt') + } } - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics type: File outputBinding: glob: |- ${ - return inputs.output_prefix + '.duplex_yield_metrics.txt' + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_yield_metrics.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt') + } } - id: fgbio_collect_duplex_seq_metrics_umi_counts type: File outputBinding: glob: |- ${ - return inputs.output_prefix + '.umi_counts.txt' + if(inputs.output_prefix){ + return inputs.output_prefix + '.umi_counts.txt' + } + else{ + return inputs.input.basename.replace('.bam','.umi_counts.txt') + } } - id: fgbio_collect_duplex_seq_metrics_duplex_qc type: File? outputBinding: glob: |- ${ - return inputs.output_prefix + '.duplex_qc.pdf' + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_qc.pdf' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_qc.pdf') + } } - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts type: File? outputBinding: glob: |- ${ - return inputs.output_prefix + '.duplex_umi_counts.txt' + if(inputs.duplex_umi_counts){ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_umi_counts.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt') + } + } } doc: >- Collects a suite of metrics to QC duplex sequencing data. @@ -215,6 +244,17 @@ arguments: valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: CollectDuplexSeqMetrics + - position: 0 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_prefix){ + return input.output_prefix + } + else{ + return inputs.input.basename.replace(/.bam/,'') + } + } requirements: - class: ShellCommandRequirement - class: ResourceRequirement From 2e720d26ee4534985794f3419bb30f2434289f1f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 17 Oct 2020 01:05:09 -0400 Subject: [PATCH 263/476] Update fgbio_collect_duplex_seq_metrics_1.2.0.cwl --- .../fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index f1959ad5..0880f1b5 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -249,7 +249,7 @@ arguments: valueFrom: |- ${ if(inputs.output_prefix){ - return input.output_prefix + return inputs.output_prefix } else{ return inputs.input.basename.replace(/.bam/,'') From 9692f943506179b8eb7603ea95bf55076f00a91b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Nov 2020 13:54:18 -0500 Subject: [PATCH 264/476] Update SUMMARY.md --- docs/SUMMARY.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 18f31ee0..d215c765 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -3,7 +3,6 @@ - [MSK-ACCESS command-line tools](README.md) - ABRA2 - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) - - ABRA2 - [v2.22](../abra2_2.22/README.md) \* [v2.22](../abra2_2.22/README.md) - Bedtools _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) From b0856e72cc1f4063b1ffc02e4ea22aed0ddf3249 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Nov 2020 14:01:09 -0500 Subject: [PATCH 265/476] Update SUMMARY.md --- docs/SUMMARY.md | 118 ++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d215c765..9def4653 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,61 +1,61 @@ # Table of contents -- [MSK-ACCESS command-line tools](README.md) - - ABRA2 - - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) - - [v2.22](../abra2_2.22/README.md) \* [v2.22](../abra2_2.22/README.md) - - Bedtools - _ [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - _ [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - - Disambiguate - - [v1.0.0](../disambiguate_1.0.0/README.md) - - Fgbio - - [CallDuplexConsensusReads v1.2.0](../fgbio_call_duplex_consensus_reads_1.2.0/README.md) - - [CollectDuplexSeqMetrics v1.2.0](../fgbio_collect_duplex_seq_metrics_1.2.0/README.md) - - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) - - [FilterConsensusReads v1.2.0](../fgbio_filter_consensus_reads_1.2.0/README.md) - - [simplex_filter v0.1.8](../fgbio_postprocessing_simplex_filter_0.1.8/README.md) - - GATK - - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - - [ApplyBQSR v4.1.8.1](../gatk_apply_bqsr_4.1.8.1/README.md) - - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - - [BaseRecalibrator v4.1.8.1](../gatk_base_recalibrator_4.1.8.1/README.md) - - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) - - Manta - - [Manta v1.5.1](../manta_1.5.1/README.md) - - Marianas - - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) - - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) - - MuTect - - [MuTect 1.1.5](../mutect_1.1.5/README.md) - - Merge Fastq - - [v0.1.7](../merge_fastq_0.1.7/README.md) - - Picard Tools - - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) - - [AddOrReplaceReadGroups v4.1.8.1](../picard_add_or_replace_read_groups_4.1.8.1/README.md) - - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) - - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) - - [FixMateInformation v4.1.8.1](../picard_fix_mate_information_4.1.8.1/README.md) - - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) - - [MarkDuplicates v4.1.8.1](../picard_mark_duplicates_4.1.8.1/README.md) - - Trim Galore - - [v0.6.2](../trim_galore_0.6.2/README.md) - - Ubuntu utilites - - [v18.04](../utilities_ubuntu_18.04/README.md) - - Waltz - - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) - - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) +- [MSK-ACCESS command-line tools](README.md) + - ABRA2 + - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) + - [v2.22](../abra2_2.22/README.md) \* [v2.22](../abra2_2.22/README.md) + - Bedtools + - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) + - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + - Disambiguate + - [v1.0.0](../disambiguate_1.0.0/README.md) + - Fgbio + - [CallDuplexConsensusReads v1.2.0](../fgbio_call_duplex_consensus_reads_1.2.0/README.md) + - [CollectDuplexSeqMetrics v1.2.0](../fgbio_collect_duplex_seq_metrics_1.2.0/README.md) + - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) + - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) + - [FilterConsensusReads v1.2.0](../fgbio_filter_consensus_reads_1.2.0/README.md) + - [simplex_filter v0.1.8](../fgbio_postprocessing_simplex_filter_0.1.8/README.md) + - GATK + - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) + - [ApplyBQSR v4.1.8.1](../gatk_apply_bqsr_4.1.8.1/README.md) + - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) + - [BaseRecalibrator v4.1.8.1](../gatk_base_recalibrator_4.1.8.1/README.md) + - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) + - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) + - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) + - Manta + - [Manta v1.5.1](../manta_1.5.1/README.md) + - Marianas + - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) + - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) + - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) + - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + - MuTect + - [MuTect 1.1.5](../mutect_1.1.5/README.md) + - Merge Fastq + - [v0.1.7](../merge_fastq_0.1.7/README.md) + - Picard Tools + - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) + - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) + - [AddOrReplaceReadGroups v4.1.8.1](../picard_add_or_replace_read_groups_4.1.8.1/README.md) + - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) + - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) + - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) + - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) + - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) + - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) + - [FixMateInformation v4.1.8.1](../picard_fix_mate_information_4.1.8.1/README.md) + - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) + - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) + - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) + - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) + - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) + - [MarkDuplicates v4.1.8.1](../picard_mark_duplicates_4.1.8.1/README.md) + - Trim Galore + - [v0.6.2](../trim_galore_0.6.2/README.md) + - Ubuntu utilites + - [v18.04](../utilities_ubuntu_18.04/README.md) + - Waltz + - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) + - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) From 76a801fb9e6f240b9d6316f6b585e5631ea0bc75 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Nov 2020 14:09:19 -0500 Subject: [PATCH 266/476] Update SUMMARY.md --- docs/SUMMARY.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 9def4653..e86b3b5e 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -2,8 +2,8 @@ - [MSK-ACCESS command-line tools](README.md) - ABRA2 - - [v2.17](../abra2_2.17/README.md) \* [v2.19](../abra2_2.19/README.md) - - [v2.22](../abra2_2.22/README.md) \* [v2.22](../abra2_2.22/README.md) + - [v2.17](../abra2_2.17/README.md) + - [v2.22](../abra2_2.22/README.md) - Bedtools - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) From 4fb627aa0d18d1dacab3cd1b6582e9725f29ea78 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Nov 2020 14:13:34 -0500 Subject: [PATCH 267/476] Update README --- README.md | 4 +--- docs/README.md | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3c2b02cd..650f0f8e 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,7 @@ description: Central location for storing common workflow language based command # MSK-ACCESS command-line tools [![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) -[![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) -[![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) -[![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.com/msk-access/cwl-commandlinetools) - Free software: Apache Software License 2.0 * Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) diff --git a/docs/README.md b/docs/README.md index 3545e793..324e5df3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,7 +6,7 @@ description: >- # MSK-ACCESS command-line tools -[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.org/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.org/msk-access/cwl-commandlinetools/) [![Updates](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) [![Python 3](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/python-3-shield.svg)](https://pyup.io/repos/github/msk-access/cwl-commandlinetools/) +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.com/msk-access/cwl-commandlinetools) * Free software: Apache Software License 2.0 * Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) From ab5c9f7836acb0d49fb26100cc5d3db8d73e2e96 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 1 Dec 2020 00:40:00 -0500 Subject: [PATCH 268/476] Updating Memory requeirements and removing some default params --- abra2_2.22/abra2_2.22.cwl | 4 +- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 15 +--- fastp_0.20.1/fastp_0.20.1.cwl | 73 ++++++++++++------- ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 7 +- ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 14 +--- .../fgbio_fastq_to_bam_1.2.0.cwl | 11 +-- .../fgbio_filter_consensus_reads_1.2.0.cwl | 11 +-- .../fgbio_group_reads_by_umi_1.2.0.cwl | 11 +-- ...io_postprocessing_simplex_filter_0.1.8.cwl | 13 ++-- .../gatk_apply_bqsr_4.1.8.1.cwl | 8 +- .../gatk_base_recalibrator_4.1.8.1.cwl | 7 +- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 6 -- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 8 +- .../gatk_merge_sam_files_4.1.8.0.cwl | 8 +- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 5 +- .../picard_fix_mate_information_4.1.8.1.cwl | 39 +++++----- 16 files changed, 101 insertions(+), 139 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 57f12d3c..c26a5feb 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -199,11 +199,11 @@ arguments: } else if (!inputs.memory_per_job && inputs.memory_overhead) { - return "-Xmx15G" + return "-Xmx20G" } else { - return "-Xmx15G" + return "-Xmx20G" } } - position: 0 diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 9c630577..6eee0064 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -202,8 +202,7 @@ inputs: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) intractg: -B9 -O16 -L5 (intra-species contigs to ref) - id: H - type: - - boolean? + type: boolean? inputBinding: position: 0 prefix: '-H' @@ -321,16 +320,8 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: >- - ${ if(inputs.memory_per_job && inputs.memory_overhead) { return - inputs.memory_per_job + inputs.memory_overhead } else if - (inputs.memory_per_job && !inputs.memory_overhead){ return - inputs.memory_per_job + 2000 } else if(!inputs.memory_per_job && - inputs.memory_overhead){ return 32000 + inputs.memory_overhead } else { - return 32000 } } - coresMin: >- - ${ if (inputs.number_of_threads) { return inputs.number_of_threads } else - { return 16 } } + ramMin: 34000 + coresMin: 16 - class: DockerRequirement dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' - class: InlineJavascriptRequirement diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 265185e2..633b1d70 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -12,80 +12,94 @@ inputs: - id: read1_input type: File inputBinding: + position: 0 prefix: '--in1' doc: | - read1 input file name + read1 input file name - id: read1_output_path type: string inputBinding: + position: 0 prefix: '--out1' doc: | - read1 output file name + read1 output file name - id: read2_input type: File? inputBinding: + position: 0 prefix: '--in2' doc: | - read2 input file name, for PE data + read2 input file name, for PE data - id: read2_output_path type: string? inputBinding: + position: 0 prefix: '--out2' doc: | - read2 output file name + read2 output file name - id: unpaired1_path type: string? inputBinding: + position: 0 prefix: '--unpaired1' - doc: | - for PE input, if read1 passed QC but read2 not, it will be written to unpaired1. + doc: > + for PE input, if read1 passed QC but read2 not, it will be written to + unpaired1. - id: unpaired2_path type: string? inputBinding: + position: 0 prefix: '--unpaired2' - doc: | - for PE input, if read2 passed QC but read1 not, it will be written to unpaired2. + doc: > + for PE input, if read2 passed QC but read1 not, it will be written to + unpaired2. - id: failed_reads_path type: string? inputBinding: + position: 0 prefix: '--failed_out' doc: | - specify the file to store reads that cannot pass the filters. + specify the file to store reads that cannot pass the filters. - id: read1_adapter_sequence type: string? inputBinding: - prefix: '--adapter_sequence' - doc: | - the adapter for read1. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped. + position: 0 + prefix: '--adapter_sequence' + doc: > + the adapter for read1. For SE data, if not specified, the adapter will be + auto-detected. For PE data, this is used if R1/R2 are found not + overlapped. - id: read2_adapter_sequence type: string? inputBinding: - prefix: '--adapter_sequence_r2' - doc: | - the adapter for read2. For PE data, this is used if R1/R2 are found not overlapped. + position: 0 + prefix: '--adapter_sequence_r2' + doc: > + the adapter for read2. For PE data, this is used if R1/R2 are found not + overlapped. - id: minimum_read_length type: int? inputBinding: - prefix: '--length_required' + position: 0 + prefix: '--length_required' doc: | - reads shorter than length_required will be discarded, default is 15. - - - id: json_output_path + reads shorter than length_required will be discarded, default is 15. + - default: fastp.json + id: json_output_path type: string inputBinding: - prefix: '--json' + position: 0 + prefix: '--json' doc: | - the json format report file name - default: "fastp.json" - - - id: html_output_path + the json format report file name + - default: fastp.html + id: html_output_path type: string - default: "fastp.html" inputBinding: - prefix: '--html' + position: 0 + prefix: '--html' doc: | - the html format report file name - + the html format report file name outputs: - id: fastp_json_output type: File @@ -114,6 +128,9 @@ outputs: doc: Setup and execute Fastp label: fastp_0.20.1 requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' - class: InlineJavascriptRequirement diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index ece82c14..fbbdebb7 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -159,11 +159,6 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - prefix: '-Djava.io.tmpdir=' - separate: false - shellQuote: false - valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: CallDuplexConsensusReads - position: 0 @@ -186,7 +181,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 1000 + ramMin: 16000 coresMin: 16 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 0880f1b5..8f0aa087 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -229,19 +229,14 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx10G" + return "-Xmx12G" } else { - return "-Xmx10G" + return "-Xmx12G" } } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - prefix: '-Djava.io.tmpdir=' - separate: false - shellQuote: false - valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: CollectDuplexSeqMetrics - position: 0 @@ -256,10 +251,9 @@ arguments: } } requirements: - - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 1000 - coresMin: 4 + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' - class: InlineJavascriptRequirement diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index 7fa258b0..393e64ab 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -188,19 +188,14 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx4G" + return "-Xmx12G" } else { - return "-Xmx4G" + return "-Xmx12G" } } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - prefix: '-Djava.io.tmpdir=' - separate: false - shellQuote: false - valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: FastqToBam - position: 0 @@ -215,7 +210,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 10000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index b857667e..7b041d08 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -171,19 +171,14 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx10G" + return "-Xmx12G" } else { - return "-Xmx10G" + return "-Xmx12G" } } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - prefix: '-Djava.io.tmpdir=' - separate: false - shellQuote: false - valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: FilterConsensusReads - position: 0 @@ -198,7 +193,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 4000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 6053267f..0463147b 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -190,19 +190,14 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx10G" + return "-Xmx12G" } else { - return "-Xmx10G" + return "-Xmx12G" } } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - prefix: '-Djava.io.tmpdir=' - separate: false - shellQuote: false - valueFrom: '${ return runtime.tmpdir}' - position: 0 valueFrom: GroupReadsByUmi - position: 0 @@ -217,7 +212,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 15000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index ad1514f3..5f16fdc3 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -12,19 +12,22 @@ inputs: - id: input_bam type: File inputBinding: - prefix: --input_bam + position: 0 + prefix: '--input_bam' doc: Input file (bam or sam). Required. secondaryFiles: - ^.bai - id: output_file_name type: string? inputBinding: - prefix: --output_filename + position: 0 + prefix: '--output_filename' doc: Output file (bam or sam). - id: min_simplex_reads type: int? inputBinding: - prefix: --min_simplex_reads + position: 0 + prefix: '--min_simplex_reads' doc: Minimum number of simplex reads to pass filter for consensus reads outputs: - id: fgbio_postprocessing_simplex_bam @@ -43,8 +46,8 @@ outputs: label: fgbio_postprocessing_simplex_filter_0.1.8 requirements: - class: ResourceRequirement - ramMin: 2000 - coresMin: 1 + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'mskaccess/fgbio_postprocessing:0.2.0' - class: InlineJavascriptRequirement diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 569ce5c3..9e1b6b74 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -266,9 +266,9 @@ arguments: return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx4G" + return "-Xmx12G" } else { - return "-Xmx4G" + return "-Xmx12G" } } - position: 2 @@ -290,8 +290,8 @@ arguments: valueFrom: ApplyBQSR requirements: - class: ResourceRequirement - ramMin: 10000 - coresMin: 8 + ramMin: 16000 + coresMin: 4 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.1' - class: InlineJavascriptRequirement diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index ec5caec6..5b280ef1 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -296,9 +296,9 @@ arguments: return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx4G" + return "-Xmx12G" } else { - return "-Xmx4G" + return "-Xmx12G" } } - position: 1 @@ -318,9 +318,6 @@ arguments: return inputs.input.basename.replace(/.bam/, '_bqsr.table') } } - - position: 2 - prefix: '--verbosity' - valueFrom: INFO requirements: - class: ResourceRequirement ramMin: 32000 diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index 6c462c77..1ecbbe62 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -195,12 +195,6 @@ arguments: - position: 0 prefix: '--TMP_DIR' valueFrom: $(runtime.tmpdir) - - position: 0 - prefix: '--COMPRESSION_LEVEL' - valueFrom: '2' - - position: 0 - prefix: '--MAX_RECORDS_IN_RAM' - valueFrom: '50000' - position: 2 prefix: '-O' valueFrom: |- diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index ec80e647..147786c6 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -522,15 +522,9 @@ arguments: - position: 0 prefix: '--TMP_DIR' valueFrom: $(runtime.tmpdir) - - position: 0 - prefix: '--COMPRESSION_LEVEL' - valueFrom: '2' - - position: 0 - prefix: '--MAX_RECORDS_IN_RAM' - valueFrom: '50000' requirements: - class: ResourceRequirement - ramMin: 12000 + ramMin: 17000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index 53001cfd..e18ab0ce 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -216,12 +216,6 @@ arguments: - position: 0 prefix: '--TMP_DIR' valueFrom: $(runtime.tmpdir) - - position: 0 - prefix: '--COMPRESSION_LEVEL' - valueFrom: '2' - - position: 0 - prefix: '--MAX_RECORDS_IN_RAM' - valueFrom: '50000' - position: 2 prefix: '-O' valueFrom: |- @@ -234,7 +228,7 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: 12000 + ramMin: 17000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.0' diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index f2a4d7dd..361b7b94 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -174,9 +174,6 @@ arguments: - position: 0 shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 - shellQuote: false - valueFrom: '-Djava.io.tmpdir=$(runtime.tmpdir)' - position: 0 prefix: '-jar' valueFrom: /gatk/gatk-package-4.1.8.1-local.jar @@ -196,7 +193,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 17000 coresMin: 2 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.1' diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index d1fcb70a..11db16cf 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -21,7 +21,7 @@ inputs: type: File inputBinding: position: 0 - prefix: -I + prefix: '-I' doc: The input file to fix. This option may be specified 0 or more times secondaryFiles: - ^.bai @@ -32,7 +32,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: -SO + prefix: '-SO' doc: >- Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, @@ -41,7 +41,7 @@ inputs: type: string? inputBinding: position: 0 - prefix: --VALIDATION_STRINGENCY + prefix: '--VALIDATION_STRINGENCY' doc: >- Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in @@ -52,7 +52,7 @@ inputs: type: int? inputBinding: position: 0 - prefix: --COMPRESSION_LEVEL + prefix: '--COMPRESSION_LEVEL' doc: >- Compression level for all compressed files created (e.g. BAM and GELI). Default value:5. This option can be set to 'null' to clear the default @@ -61,22 +61,24 @@ inputs: type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_DEFLATER + prefix: '--USE_JDK_DEFLATER' doc: >- - Use the JDK Deflater instead of the Intel Deflater for writing compressed output + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output - id: use_jdk_inflater type: boolean? inputBinding: position: 0 - prefix: --USE_JDK_INFLATER + prefix: '--USE_JDK_INFLATER' doc: >- - Use the JDK Inflater instead of the Intel Inflater for reading compressed input + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input - default: true id: create_bam_index type: boolean? inputBinding: position: 0 - prefix: --CREATE_INDEX + prefix: '--CREATE_INDEX' doc: >- Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default @@ -98,6 +100,7 @@ outputs: label: picard_fix_mate_information_4.1.8.1 arguments: - position: 0 + prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { @@ -117,28 +120,25 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx15G" + return "-Xmx20G" } else { - return "-Xmx15G" + return "-Xmx20G" } } - position: 0 - valueFrom: "-XX:-UseGCOverheadLimit" - shellQuote: false - - position: 0 - valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 prefix: '-jar' valueFrom: /gatk/gatk-package-4.1.8.1-local.jar - position: 0 valueFrom: FixMateInformation - position: 0 - prefix: --TMP_DIR - valueFrom: "$(runtime.tmpdir)" + prefix: '--TMP_DIR' + valueFrom: $(runtime.tmpdir) - position: 0 - prefix: -O + prefix: '-O' valueFrom: |- ${ if(inputs.output_file_name){ @@ -148,9 +148,10 @@ arguments: } } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement ramMin: 25000 - coresMin: 2 + coresMin: 4 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.1' - class: InlineJavascriptRequirement From 48e6033ff124d09567acfc0ae860049b426d1398 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 1 Dec 2020 01:40:57 -0500 Subject: [PATCH 269/476] Update picard_fix_mate_information_4.1.8.1.cwl --- .../picard_fix_mate_information_4.1.8.1.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 11db16cf..b544cb4d 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -100,7 +100,6 @@ outputs: label: picard_fix_mate_information_4.1.8.1 arguments: - position: 0 - prefix: '' valueFrom: |- ${ if(inputs.memory_per_job && inputs.memory_overhead) { From 1f6e1cf20433bd53bbc0730770ab3662912cadad Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 2 Dec 2020 12:25:53 -0500 Subject: [PATCH 270/476] Adding use of threads correctly --- abra2_2.22/abra2_2.22.cwl | 11 ++++++++--- .../fgbio_call_duplex_consensus_reads_1.2.0.cwl | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index c26a5feb..4a50234f 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -17,9 +17,6 @@ inputs: doc: Memory overhead per job in megabytes - id: number_of_threads type: int? - inputBinding: - position: 0 - prefix: '--threads' - id: input_bam type: - File @@ -209,6 +206,14 @@ arguments: - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/abra2.jar + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } requirements: - class: ResourceRequirement ramMin: 60000 diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index fbbdebb7..27696dc1 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -181,7 +181,7 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 20000 coresMin: 16 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' From 82e3e8e744cb7544e8dbf45bbbd77b8cec7f2fa8 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 3 Dec 2020 12:27:02 -0500 Subject: [PATCH 271/476] Update fastp_0.20.1.cwl --- fastp_0.20.1/fastp_0.20.1.cwl | 47 +++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 633b1d70..5b08d619 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -9,6 +9,15 @@ id: fastp_0_20_1 baseCommand: - fastp inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + id: number_of_threads + type: int? + doc: 'worker thread number, default is 2 (int [=2])' - id: read1_input type: File inputBinding: @@ -100,6 +109,28 @@ inputs: prefix: '--html' doc: | the html format report file name + - id: disable_quality_filtering + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_quality_filtering' + doc: >- + quality filtering is enabled by default. If this option is specified, + quality filtering is disabled + - id: disable_trim_poly_g + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_trim_poly_g' + doc: >- + disable polyG tail trimming, by default trimming is automatically enabled + for Illumina NextSeq/NovaSeq data + - id: verbose + type: File? + inputBinding: + position: 0 + prefix: '--verbose' + doc: output verbose log information (i.e. when every 1M reads are processed) outputs: - id: fastp_json_output type: File @@ -127,10 +158,19 @@ outputs: glob: $(inputs.unpaired2_path) doc: Setup and execute Fastp label: fastp_0.20.1 +arguments: + - position: 0 + prefix: '--thread' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } requirements: - class: ResourceRequirement - ramMin: 16000 - coresMin: 2 + ramMin: 17000 + coresMin: 4 - class: DockerRequirement dockerPull: 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' - class: InlineJavascriptRequirement @@ -143,6 +183,9 @@ requirements: - class: 'foaf:Person' 'foaf:mbox': 'mailto:fraihaa@mskcc.org' 'foaf:name': Adrian Fraiha + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' From 41b12f3332609145ade56a7fb9486bab1a600c0f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 3 Dec 2020 15:22:46 -0500 Subject: [PATCH 272/476] update docker tag fgbio --- .../fgbio_postprocessing_simplex_filter_0.1.8.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index 5f16fdc3..12545f82 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -49,7 +49,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/fgbio_postprocessing:0.2.0' + dockerPull: 'mskaccess/fgbio_postprocessing:0.2.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From d0d0b42762d5419d892d08ff2ae74841a0ee0af6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 3 Dec 2020 15:42:49 -0500 Subject: [PATCH 273/476] Update fastp_0.20.1.cwl --- fastp_0.20.1/fastp_0.20.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 5b08d619..7e29582b 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -15,7 +15,7 @@ inputs: - id: memory_overhead type: int? doc: Memory overhead per job in megabytes - id: number_of_threads + - id: number_of_threads type: int? doc: 'worker thread number, default is 2 (int [=2])' - id: read1_input From 1c10693e5f5aaad4531111469453666ca22a34a7 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 7 Dec 2020 22:18:51 -0500 Subject: [PATCH 274/476] Update picard_fix_mate_information_4.1.8.1.cwl --- .../picard_fix_mate_information_4.1.8.1.cwl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index b544cb4d..0b607d59 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -23,8 +23,6 @@ inputs: position: 0 prefix: '-I' doc: The input file to fix. This option may be specified 0 or more times - secondaryFiles: - - ^.bai - id: output_file_name type: string? doc: Output file name (bam or sam). Not Required @@ -149,8 +147,8 @@ arguments: requirements: - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 25000 - coresMin: 4 + ramMin: 30000 + coresMin: 12 - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.8.1' - class: InlineJavascriptRequirement From f79ef030926304ccd2c8ed788f5c14908760c2d2 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:00:36 -0500 Subject: [PATCH 275/476] Create bwa.yaml --- .github/workflows/bwa.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/bwa.yaml diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml new file mode 100644 index 00000000..c251d245 --- /dev/null +++ b/.github/workflows/bwa.yaml @@ -0,0 +1,34 @@ +name: bwa_0.7.17 + +on: + push: + branches: develop + +jobs: + path-context: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v2 + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - + name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: ./bwa_mem_0.7.17/container/Dockerfile + platforms: linux/amd64,linux/arm64,linux/386 + push: true + tags: msk-access/bwa:0.7.17 From 86143f6eccf0fde23de9e0d5e08efc8cc2177a8b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:04:12 -0500 Subject: [PATCH 276/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index c251d245..f5ea0736 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -8,23 +8,13 @@ jobs: path-context: runs-on: ubuntu-latest steps: - - - name: Checkout + - name: Checkout uses: actions/checkout@v2 - - - name: Set up QEMU + - name: Set up QEMU uses: docker/setup-qemu-action@v1 - - - name: Set up Docker Buildx + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push + - name: Build and push uses: docker/build-push-action@v2 with: context: . @@ -32,3 +22,8 @@ jobs: platforms: linux/amd64,linux/arm64,linux/386 push: true tags: msk-access/bwa:0.7.17 + registry: ghcr.io + username: "rhshah" + password: ${{ secrets.PAT_RONAK }} + repository: msk-access/cwl_commandlinetools + From f74af05f39a8aec2ed086d57ccecfdea60a97e9c Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:14:27 -0500 Subject: [PATCH 277/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index f5ea0736..276505f9 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -5,16 +5,13 @@ on: branches: develop jobs: - path-context: + push_to_registry: + name: Push Docker image to GitHub Packages runs-on: ubuntu-latest steps: - - name: Checkout + - name: Check out the repo uses: actions/checkout@v2 - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - name: Build and push + - name: Build and Push to GitHub Packages uses: docker/build-push-action@v2 with: context: . @@ -22,8 +19,7 @@ jobs: platforms: linux/amd64,linux/arm64,linux/386 push: true tags: msk-access/bwa:0.7.17 - registry: ghcr.io - username: "rhshah" - password: ${{ secrets.PAT_RONAK }} + registry: docker.pkg.github.co + secrets: ${{ secrets.PAT_RONAK }} repository: msk-access/cwl_commandlinetools From 160b2857cf913881492278032ed8ad9f276754bb Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:21:05 -0500 Subject: [PATCH 278/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index 276505f9..c318a86a 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -9,17 +9,33 @@ jobs: name: Push Docker image to GitHub Packages runs-on: ubuntu-latest steps: - - name: Check out the repo + - + name: Check out the repo uses: actions/checkout@v2 - - name: Build and Push to GitHub Packages + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.RONAK_PAT }} + - + name: Build and Push to GitHub Packages uses: docker/build-push-action@v2 with: context: . file: ./bwa_mem_0.7.17/container/Dockerfile platforms: linux/amd64,linux/arm64,linux/386 push: true - tags: msk-access/bwa:0.7.17 - registry: docker.pkg.github.co - secrets: ${{ secrets.PAT_RONAK }} + tags: | + ghcr.io/msk-access/bwa:latest + ghcr.io/msk-access/bwa:0.7.17 repository: msk-access/cwl_commandlinetools + From dd1830741346b1f1bd34c3e36364fce3bd350903 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:22:37 -0500 Subject: [PATCH 279/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index c318a86a..dd7cffcd 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -23,7 +23,7 @@ jobs: uses: docker/login-action@v1 with: registry: ghcr.io - username: ${{ github.repository_owner }} + username: ${{ github.actor }} password: ${{ secrets.RONAK_PAT }} - name: Build and Push to GitHub Packages From 218fe48ea5b6dcfc29f40835ae7308bed9c58cb5 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:27:09 -0500 Subject: [PATCH 280/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index dd7cffcd..b7b70b16 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -23,8 +23,8 @@ jobs: uses: docker/login-action@v1 with: registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.RONAK_PAT }} + username: ${{ github.repository_owner }} + password: ${{ secrets.RS_PAT }} - name: Build and Push to GitHub Packages uses: docker/build-push-action@v2 From 0b6d3c7f21d67459dae0af8f409fbde267f8fe27 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:39:17 -0500 Subject: [PATCH 281/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index b7b70b16..b1f277bf 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -31,7 +31,7 @@ jobs: with: context: . file: ./bwa_mem_0.7.17/container/Dockerfile - platforms: linux/amd64,linux/arm64,linux/386 + platforms: linux/amd64,linux/arm64 push: true tags: | ghcr.io/msk-access/bwa:latest From 4e404134bcfafd6e2073995b68b4ca88882e977f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:43:06 -0500 Subject: [PATCH 282/476] Update bwa.yaml --- .github/workflows/bwa.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index b1f277bf..19400cf8 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -31,11 +31,11 @@ jobs: with: context: . file: ./bwa_mem_0.7.17/container/Dockerfile - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 push: true tags: | ghcr.io/msk-access/bwa:latest ghcr.io/msk-access/bwa:0.7.17 - repository: msk-access/cwl_commandlinetools + From e4d10fc2700719539103a59ffd3409885bba29b4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:52:05 -0500 Subject: [PATCH 283/476] Adding Abra2 actions --- .github/workflows/abra2.yaml | 39 ++++++++++++++++++++++++++++++++++++ .github/workflows/bwa.yaml | 2 -- 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/abra2.yaml diff --git a/.github/workflows/abra2.yaml b/.github/workflows/abra2.yaml new file mode 100644 index 00000000..75fc55ee --- /dev/null +++ b/.github/workflows/abra2.yaml @@ -0,0 +1,39 @@ +name: abra2_2.22 + +on: + push: + branches: develop + +jobs: + push_to_registry: + name: Push Docker image to GitHub Packages + runs-on: ubuntu-latest + steps: + - + name: Check out the repo + uses: actions/checkout@v2 + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.RS_PAT }} + - + name: Build and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./abra2_2.22/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/abra2:2.22 + + + diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa.yaml index 19400cf8..87de5c3e 100644 --- a/.github/workflows/bwa.yaml +++ b/.github/workflows/bwa.yaml @@ -31,10 +31,8 @@ jobs: with: context: . file: ./bwa_mem_0.7.17/container/Dockerfile - platforms: linux/amd64 push: true tags: | - ghcr.io/msk-access/bwa:latest ghcr.io/msk-access/bwa:0.7.17 From b60fe495fac2f9d44c8ec9ae7d88631c42b37c7c Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 13:52:47 -0500 Subject: [PATCH 284/476] Renames --- .github/workflows/{abra2.yaml => abra2_2.22.yaml} | 0 .github/workflows/{bwa.yaml => bwa_0.7.17.yaml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{abra2.yaml => abra2_2.22.yaml} (100%) rename .github/workflows/{bwa.yaml => bwa_0.7.17.yaml} (100%) diff --git a/.github/workflows/abra2.yaml b/.github/workflows/abra2_2.22.yaml similarity index 100% rename from .github/workflows/abra2.yaml rename to .github/workflows/abra2_2.22.yaml diff --git a/.github/workflows/bwa.yaml b/.github/workflows/bwa_0.7.17.yaml similarity index 100% rename from .github/workflows/bwa.yaml rename to .github/workflows/bwa_0.7.17.yaml From 905d9078e55e3d3f5fc1957dfeeaf0fccf76b9fd Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 14:03:14 -0500 Subject: [PATCH 285/476] Checking if one config works --- .github/workflows/abra2_2.22.yaml | 39 ------------------- ...17.yaml => build_and_push_containers.yaml} | 22 ++++++++++- 2 files changed, 20 insertions(+), 41 deletions(-) delete mode 100644 .github/workflows/abra2_2.22.yaml rename .github/workflows/{bwa_0.7.17.yaml => build_and_push_containers.yaml} (56%) diff --git a/.github/workflows/abra2_2.22.yaml b/.github/workflows/abra2_2.22.yaml deleted file mode 100644 index 75fc55ee..00000000 --- a/.github/workflows/abra2_2.22.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: abra2_2.22 - -on: - push: - branches: develop - -jobs: - push_to_registry: - name: Push Docker image to GitHub Packages - runs-on: ubuntu-latest - steps: - - - name: Check out the repo - uses: actions/checkout@v2 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.RS_PAT }} - - - name: Build and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./abra2_2.22/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/abra2:2.22 - - - diff --git a/.github/workflows/bwa_0.7.17.yaml b/.github/workflows/build_and_push_containers.yaml similarity index 56% rename from .github/workflows/bwa_0.7.17.yaml rename to .github/workflows/build_and_push_containers.yaml index 87de5c3e..cdeee632 100644 --- a/.github/workflows/bwa_0.7.17.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -1,4 +1,4 @@ -name: bwa_0.7.17 +name: build_and_push_containers on: push: @@ -26,7 +26,7 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - name: Build and Push to GitHub Packages + name: Build BWA 0.7.17 and Push to GitHub Packages uses: docker/build-push-action@v2 with: context: . @@ -34,6 +34,24 @@ jobs: push: true tags: | ghcr.io/msk-access/bwa:0.7.17 + - + name: Build ABRA2 2.22 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./abra2_2.22/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/abra2:2.22 + - + name: Build bedtools 2.28 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./abra2_2.22/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/abra2:2.22 From 6c804771af8ed44f76defd5b1440fec8a3410b5e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 10 Dec 2020 14:04:49 -0500 Subject: [PATCH 286/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index cdeee632..262765b8 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -43,15 +43,7 @@ jobs: push: true tags: | ghcr.io/msk-access/abra2:2.22 - - - name: Build bedtools 2.28 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./abra2_2.22/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/abra2:2.22 + From 7832f4caef43e0609b2ae1827a2db394c2e1ce2a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 13:33:02 -0500 Subject: [PATCH 287/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 262765b8..e4580407 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -2,12 +2,19 @@ name: build_and_push_containers on: push: - branches: develop + paths-ignore: + - 'docs/**' + - '**.md' + pull_request: + paths-ignore: + - 'docs/**' + - '**.md' jobs: push_to_registry: name: Push Docker image to GitHub Packages runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, 'ci skip')" steps: - name: Check out the repo @@ -25,6 +32,15 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} + - + name: Build merge_fastq 0.1.7 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./merge_fastq_0.1.7/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/merge_fastq:0.1.7 - name: Build BWA 0.7.17 and Push to GitHub Packages uses: docker/build-push-action@v2 @@ -43,6 +59,78 @@ jobs: push: true tags: | ghcr.io/msk-access/abra2:2.22 + - + name: Build Fgbio Postprocessing 0.2.1 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./fgbio_postprocessing_simplex_filter_0.1.8/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/fgbio_postprocessing:0.2.1 + - + name: Build bcftools 1.6 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./bcftools_1.6/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/bcftools:1.6 + - + name: Build vcf2maf 1.6.17 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./vcf2maf_1.6.17/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/vcf2maf:1.6.17 + - + name: Build MuTect 1.1.5 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./mutect_1.1.5/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/mutect:1.1.5 + - + name: Build Trim Galore 0.6.2 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./trim_galore_0.6.2/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/trim_galore:0.6.2 + - + name: Build Waltz 3.1.1 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./waltz_count_reads_3.1.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/waltz:3.1.1 + - + name: Build Waltz 3.1.1 and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/marianas:1.8.1 + - + name: Build Manta and Push to GitHub Packages + uses: docker/build-push-action@v2 + with: + context: . + file: ./manta_1.5.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/manta:1.5.1 From e6a7c578271d8fcc830b366bda3e998a159e1a8b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 13:35:58 -0500 Subject: [PATCH 288/476] Update build_and_push_containers.yaml ci skip --- .github/workflows/build_and_push_containers.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index e4580407..88dae3e7 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -114,7 +114,7 @@ jobs: tags: | ghcr.io/msk-access/waltz:3.1.1 - - name: Build Waltz 3.1.1 and Push to GitHub Packages + name: Build Marianas 1.8.1 and Push to GitHub Packages uses: docker/build-push-action@v2 with: context: . From 43a89907833330c0f76aa89021c21a726e8cfb17 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 13:36:53 -0500 Subject: [PATCH 289/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 88dae3e7..875031ba 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -59,15 +59,6 @@ jobs: push: true tags: | ghcr.io/msk-access/abra2:2.22 - - - name: Build Fgbio Postprocessing 0.2.1 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./fgbio_postprocessing_simplex_filter_0.1.8/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/fgbio_postprocessing:0.2.1 - name: Build bcftools 1.6 and Push to GitHub Packages uses: docker/build-push-action@v2 From 2512cb84b58c8e97daf709e2ac758ae5236afcb9 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 13:44:45 -0500 Subject: [PATCH 290/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 875031ba..f1d2cef1 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -59,15 +59,6 @@ jobs: push: true tags: | ghcr.io/msk-access/abra2:2.22 - - - name: Build bcftools 1.6 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./bcftools_1.6/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/bcftools:1.6 - name: Build vcf2maf 1.6.17 and Push to GitHub Packages uses: docker/build-push-action@v2 From 117d45f48a0d34f0195270d89c7b450e0e46cecb Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 15:27:12 -0500 Subject: [PATCH 291/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 180 ++++++++++-------- 1 file changed, 99 insertions(+), 81 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index f1d2cef1..2fadeee1 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -5,10 +5,12 @@ on: paths-ignore: - 'docs/**' - '**.md' + - '**.cwl' pull_request: paths-ignore: - 'docs/**' - '**.md' + - '**.cwl' jobs: push_to_registry: @@ -32,87 +34,103 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - - name: Build merge_fastq 0.1.7 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./merge_fastq_0.1.7/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/merge_fastq:0.1.7 - - - name: Build BWA 0.7.17 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./bwa_mem_0.7.17/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/bwa:0.7.17 - - - name: Build ABRA2 2.22 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./abra2_2.22/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/abra2:2.22 - - - name: Build vcf2maf 1.6.17 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./vcf2maf_1.6.17/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/vcf2maf:1.6.17 - - - name: Build MuTect 1.1.5 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./mutect_1.1.5/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/mutect:1.1.5 - - - name: Build Trim Galore 0.6.2 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./trim_galore_0.6.2/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/trim_galore:0.6.2 - - - name: Build Waltz 3.1.1 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./waltz_count_reads_3.1.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/waltz:3.1.1 - - - name: Build Marianas 1.8.1 and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/marianas:1.8.1 - - - name: Build Manta and Push to GitHub Packages - uses: docker/build-push-action@v2 - with: - context: . - file: ./manta_1.5.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/manta:1.5.1 + - + name: Check package version 1 + run: | + echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + - + name: echo output 1 + run: | + echo ${{ steps.getfile.outputs.manifest }} + - + name: Check package version 2 + run: | + echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manta/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + - + name: echo output 2 + run: | + echo ${{ steps.getfile.outputs.manifest }} + # - + # name: Build merge_fastq 0.1.7 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./merge_fastq_0.1.7/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/merge_fastq:0.1.7 + # - + # name: Build BWA 0.7.17 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./bwa_mem_0.7.17/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/bwa:0.7.17 + # - + # name: Build ABRA2 2.22 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./abra2_2.22/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/abra2:2.22 + # - + # name: Build vcf2maf 1.6.17 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./vcf2maf_1.6.17/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/vcf2maf:1.6.17 + # - + # name: Build MuTect 1.1.5 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./mutect_1.1.5/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/mutect:1.1.5 + # - + # name: Build Trim Galore 0.6.2 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./trim_galore_0.6.2/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/trim_galore:0.6.2 + # - + # name: Build Waltz 3.1.1 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./waltz_count_reads_3.1.1/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/waltz:3.1.1 + # - + # name: Build Marianas 1.8.1 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/marianas:1.8.1 + # - + # name: Build Manta and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./manta_1.5.1/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/manta:1.5.1 From 1edc3bf2e209732ffeaaa034f3bae4a6a388c9a1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 16:02:02 -0500 Subject: [PATCH 292/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 2fadeee1..4d8c2e7c 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -29,6 +29,7 @@ jobs: uses: docker/setup-buildx-action@v1 - name: Login to GitHub Container Registry + #if: github.event_name != 'pull_request' uses: docker/login-action@v1 with: registry: ghcr.io @@ -37,7 +38,7 @@ jobs: - name: Check package version 1 run: | - echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 1 run: | @@ -45,7 +46,7 @@ jobs: - name: Check package version 2 run: | - echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manta/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 2 run: | From be8284f5f1a5ebf7987424e16706712fc64aeda4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 16:04:37 -0500 Subject: [PATCH 293/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 4d8c2e7c..df89ac23 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,7 +38,7 @@ jobs: - name: Check package version 1 run: | - echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 1 run: | @@ -46,7 +46,7 @@ jobs: - name: Check package version 2 run: | - echo "::set-output name=manifest=$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 2 run: | From f668aba74d33a3db194840cea016986eba9721c6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 16:06:58 -0500 Subject: [PATCH 294/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index df89ac23..11cbfce8 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,7 +38,7 @@ jobs: - name: Check package version 1 run: | - echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 1 run: | @@ -46,7 +46,7 @@ jobs: - name: Check package version 2 run: | - echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" + echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manta/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - name: echo output 2 run: | From 3b43ba91eaa57f69ad755a548d4fcd6a58fa0833 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 16:40:48 -0500 Subject: [PATCH 295/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 11cbfce8..d8654c48 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,31 +35,16 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - - name: Check package version 1 - run: | - echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/merge_fastq/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - - - name: echo output 1 - run: | - echo ${{ steps.getfile.outputs.manifest }} - - - name: Check package version 2 - run: | - echo "::set-output name=manifest::$(curl -X GET https://docker.pkg.github.com/v2/msk-access/cwl_commandlinetools/manta/manifests/$GITHUB_SHA -u $GITHUB_ACTOR:${{ secrets.RS_PAT }} | jq '.')" - - - name: echo output 2 - run: | - echo ${{ steps.getfile.outputs.manifest }} - # - - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./merge_fastq_0.1.7/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/merge_fastq:0.1.7 + - + name: Build merge_fastq 0.1.7 and Push to GitHub Packages + if: github.event.registry_package.package.name != 'merge_fastq:0.1.7' + uses: docker/build-push-action@v2 + with: + context: . + file: ./merge_fastq_0.1.7/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/merge_fastq:0.1.7 # - # name: Build BWA 0.7.17 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 88567a726df156e7bd34030f74175907f853ef98 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:09:20 -0500 Subject: [PATCH 296/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index d8654c48..838fce84 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,16 +35,31 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - - name: Build merge_fastq 0.1.7 and Push to GitHub Packages - if: github.event.registry_package.package.name != 'merge_fastq:0.1.7' - uses: docker/build-push-action@v2 - with: - context: . - file: ./merge_fastq_0.1.7/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/merge_fastq:0.1.7 + - + name: Check package version ABRA2 + run: | + echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" + - + name: echo output 1 + run: | + echo ${{ steps.getfile.outputs.abra2_manifest }} + - + name: Check package version MANTA + run: | + echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" + - + name: echo output 2 + run: | + echo ${{ steps.getfile.outputs.manta_manifest }} + # - + # name: Build merge_fastq 0.1.7 and Push to GitHub Packages + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./merge_fastq_0.1.7/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/merge_fastq:0.1.7 # - # name: Build BWA 0.7.17 and Push to GitHub Packages # uses: docker/build-push-action@v2 From f44c8e3c927aca483874acd174edfc50bb8bcad8 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:11:44 -0500 Subject: [PATCH 297/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 838fce84..b33383f8 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,19 +38,19 @@ jobs: - name: Check package version ABRA2 run: | - echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" + echo "::set-output name=abra2-manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" - - name: echo output 1 + name: echo output ABRA2 run: | - echo ${{ steps.getfile.outputs.abra2_manifest }} + echo ${{ steps.getfile.outputs.abra2-manifest }} - name: Check package version MANTA run: | - echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" + echo "::set-output name=manta-manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" - - name: echo output 2 + name: echo output Manta run: | - echo ${{ steps.getfile.outputs.manta_manifest }} + echo ${{ steps.getfile.outputs.manta-manifest }} # - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 35901651695d520e318c62078bbc156327bd294f Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:17:13 -0500 Subject: [PATCH 298/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index b33383f8..d3139b58 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,19 +38,21 @@ jobs: - name: Check package version ABRA2 run: | - echo "::set-output name=abra2-manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" + echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" - name: echo output ABRA2 + id: abra2 run: | - echo ${{ steps.getfile.outputs.abra2-manifest }} + echo ${{ steps.abra2.outputs.abra2-manifest }} - name: Check package version MANTA run: | - echo "::set-output name=manta-manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" + echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" - - name: echo output Manta + name: echo output MANTA + id: manta run: | - echo ${{ steps.getfile.outputs.manta-manifest }} + echo ${{ steps.manta.outputs.manta_manifest }} # - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 7fab513c0827b2baf8f36895dc39c62840d9624d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:26:42 -0500 Subject: [PATCH 299/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index d3139b58..e5efbd32 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -37,20 +37,20 @@ jobs: password: ${{ secrets.RS_PAT }} - name: Check package version ABRA2 + id: abra2 run: | echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" - name: echo output ABRA2 - id: abra2 run: | echo ${{ steps.abra2.outputs.abra2-manifest }} - - name: Check package version MANTA + name: Check package version MANTA + id: manta run: | echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" - name: echo output MANTA - id: manta run: | echo ${{ steps.manta.outputs.manta_manifest }} # - @@ -71,15 +71,16 @@ jobs: # push: true # tags: | # ghcr.io/msk-access/bwa:0.7.17 - # - - # name: Build ABRA2 2.22 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./abra2_2.22/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/abra2:2.22 + - + name: Build ABRA2 2.22 and Push to GitHub Packages + if: ${{ !steps.abra2.outputs.abra2-manifest }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./abra2_2.22/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/abra2:2.22 # - # name: Build vcf2maf 1.6.17 and Push to GitHub Packages # uses: docker/build-push-action@v2 @@ -125,15 +126,16 @@ jobs: # push: true # tags: | # ghcr.io/msk-access/marianas:1.8.1 - # - - # name: Build Manta and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./manta_1.5.1/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/manta:1.5.1 + - + name: Build Manta and Push to GitHub Packages + if: ${{ !steps.manta.outputs.manta-manifest }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./manta_1.5.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/manta:1.5.1 From a4bc269d76060cadd57eb43cf63e811720387ba4 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:37:12 -0500 Subject: [PATCH 300/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index e5efbd32..928f513c 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -36,16 +36,14 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - name: Check package version ABRA2 id: abra2 run: | echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" - - name: echo output ABRA2 + name: echo output ABRA2 run: | echo ${{ steps.abra2.outputs.abra2-manifest }} - - name: Check package version MANTA id: manta run: | echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" From e487b1c748d2bd882adcaebdd937d2288baf9a45 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:52:21 -0500 Subject: [PATCH 301/476] Testing --- .../workflows/build_and_push_containers.yaml | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 928f513c..65de8b4d 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,7 +38,7 @@ jobs: - id: abra2 run: | - echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" + echo '::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')' - name: echo output ABRA2 run: | @@ -46,11 +46,18 @@ jobs: - id: manta run: | - echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" + echo '::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')' - name: echo output MANTA run: | echo ${{ steps.manta.outputs.manta_manifest }} + - + name: Set selected color + run: echo '::set-output name=SELECTED_COLOR::green' + id: random-color-generator + - + name: Get color + run: echo "The selected color is ${{ steps.random-color-generator.outputs.SELECTED_COLOR }}" # - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages # uses: docker/build-push-action@v2 @@ -69,16 +76,16 @@ jobs: # push: true # tags: | # ghcr.io/msk-access/bwa:0.7.17 - - - name: Build ABRA2 2.22 and Push to GitHub Packages - if: ${{ !steps.abra2.outputs.abra2-manifest }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./abra2_2.22/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/abra2:2.22 + # - + # name: Build ABRA2 2.22 and Push to GitHub Packages + # if: ${{ !steps.abra2.outputs.abra2-manifest }} + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./abra2_2.22/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/abra2:2.22 # - # name: Build vcf2maf 1.6.17 and Push to GitHub Packages # uses: docker/build-push-action@v2 @@ -124,16 +131,16 @@ jobs: # push: true # tags: | # ghcr.io/msk-access/marianas:1.8.1 - - - name: Build Manta and Push to GitHub Packages - if: ${{ !steps.manta.outputs.manta-manifest }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./manta_1.5.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/manta:1.5.1 + # - + # name: Build Manta and Push to GitHub Packages + # if: ${{ !steps.manta.outputs.manta-manifest }} + # uses: docker/build-push-action@v2 + # with: + # context: . + # file: ./manta_1.5.1/container/Dockerfile + # push: true + # tags: | + # ghcr.io/msk-access/manta:1.5.1 From d9c8e2e98ad3eeffb99618e82fb52cebcfc2e44b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 11 Dec 2020 23:57:29 -0500 Subject: [PATCH 302/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 65de8b4d..ce223331 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -38,15 +38,15 @@ jobs: - id: abra2 run: | - echo '::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')' + echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" - name: echo output ABRA2 run: | - echo ${{ steps.abra2.outputs.abra2-manifest }} + echo ${{ steps.abra2.outputs.abra2_manifest }} - id: manta run: | - echo '::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')' + echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" - name: echo output MANTA run: | From c97f52525548a19b6727278968946ba70a918796 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 00:09:37 -0500 Subject: [PATCH 303/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index ce223331..ebe74799 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -36,17 +36,19 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - + name: Get Manifest for ABRA2 id: abra2 run: | - echo "::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.')" + echo ::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.') - name: echo output ABRA2 run: | echo ${{ steps.abra2.outputs.abra2_manifest }} - + name: Get Manifest for MANTA id: manta run: | - echo "::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.')" + echo ::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.') - name: echo output MANTA run: | From ea0a5cd221986f257c7a503ee532eb80c8c36f8a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 00:13:42 -0500 Subject: [PATCH 304/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index ebe74799..b336dfa8 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -53,13 +53,15 @@ jobs: name: echo output MANTA run: | echo ${{ steps.manta.outputs.manta_manifest }} - - - name: Set selected color - run: echo '::set-output name=SELECTED_COLOR::green' - id: random-color-generator - - - name: Get color - run: echo "The selected color is ${{ steps.random-color-generator.outputs.SELECTED_COLOR }}" + - + name: Get Manifest for Marianas + id: manta + run: | + echo ::set-output name=marianas_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/marianas:1.8.1 | jq '.') + - + name: echo output marianas + run: | + echo ${{ steps.manta.outputs.marianas_manifest }} # - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 0838e1eb65037e63835ae271d09897a3463e7d79 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 00:14:26 -0500 Subject: [PATCH 305/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index b336dfa8..9f5ce5cd 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -55,13 +55,13 @@ jobs: echo ${{ steps.manta.outputs.manta_manifest }} - name: Get Manifest for Marianas - id: manta + id: marianas run: | echo ::set-output name=marianas_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/marianas:1.8.1 | jq '.') - name: echo output marianas run: | - echo ${{ steps.manta.outputs.marianas_manifest }} + echo ${{ steps.marianas.outputs.marianas_manifest }} # - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 0e1799232a47f591cb62eb35c0832e656f461226 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 00:16:47 -0500 Subject: [PATCH 306/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 9f5ce5cd..640eb0ef 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -80,16 +80,16 @@ jobs: # push: true # tags: | # ghcr.io/msk-access/bwa:0.7.17 - # - - # name: Build ABRA2 2.22 and Push to GitHub Packages - # if: ${{ !steps.abra2.outputs.abra2-manifest }} - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./abra2_2.22/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/abra2:2.22 + - + name: Build ABRA2 2.22 and Push to GitHub Packages + if: ${{ !steps.abra2.outputs.abra2_manifest }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./abra2_2.22/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/abra2:2.22 # - # name: Build vcf2maf 1.6.17 and Push to GitHub Packages # uses: docker/build-push-action@v2 From 83a91ebf98d19d39c20c0cf210791aeaf2bf1334 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 00:35:02 -0500 Subject: [PATCH 307/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 181 ++++++++---------- 1 file changed, 82 insertions(+), 99 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 640eb0ef..c1a35ced 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -36,53 +36,41 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - name: Get Manifest for ABRA2 - id: abra2 + name: Get Manifest + id: manifest run: | - echo ::set-output name=abra2_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.') - - - name: echo output ABRA2 - run: | - echo ${{ steps.abra2.outputs.abra2_manifest }} - - - name: Get Manifest for MANTA - id: manta - run: | - echo ::set-output name=manta_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.') - - - name: echo output MANTA - run: | - echo ${{ steps.manta.outputs.manta_manifest }} - - - name: Get Manifest for Marianas - id: marianas - run: | - echo ::set-output name=marianas_manifest::$(docker buildx imagetools inspect ghcr.io/msk-access/marianas:1.8.1 | jq '.') - - - name: echo output marianas - run: | - echo ${{ steps.marianas.outputs.marianas_manifest }} - # - - # name: Build merge_fastq 0.1.7 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./merge_fastq_0.1.7/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/merge_fastq:0.1.7 - # - - # name: Build BWA 0.7.17 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./bwa_mem_0.7.17/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/bwa:0.7.17 + echo ::set-output name=abra2::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.') + echo ::set-output name=manta::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.') + echo ::set-output name=marianas::$(docker buildx imagetools inspect ghcr.io/msk-access/marianas:1.8.1 | jq '.') + echo ::set-output name=bwa::$(docker buildx imagetools inspect ghcr.io/msk-access/bwa:0.7.17 | jq '.') + echo ::set-output name=merge_fastq::$(docker buildx imagetools inspect ghcr.io/msk-access/merge_fastq:0.1.7 | jq '.') + echo ::set-output name=fgbio_postprocessing::$(docker buildx imagetools inspect ghcr.io/msk-access/fgbio-postprocessing:0.2.1 | jq '.') + echo ::set-output name=mutect::$(docker buildx imagetools inspect ghcr.io/msk-access/mutect:1.1.5 | jq '.') + echo ::set-output name=trim_galore::$(docker buildx imagetools inspect ghcr.io/msk-access/trim_galore:0.6.2 | jq '.') + echo ::set-output name=waltz::$(docker buildx imagetools inspect ghcr.io/msk-access/waltz:3.1.1 | jq '.') + - + name: Build merge_fastq 0.1.7 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.merge_fastq }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./merge_fastq_0.1.7/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/merge_fastq:0.1.7 + - + name: Build BWA 0.7.17 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.bwa }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./bwa_mem_0.7.17/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/bwa:0.7.17 - name: Build ABRA2 2.22 and Push to GitHub Packages - if: ${{ !steps.abra2.outputs.abra2_manifest }} + if: ${{ !steps.manifest.outputs.abra2 }} uses: docker/build-push-action@v2 with: context: . @@ -90,61 +78,56 @@ jobs: push: true tags: | ghcr.io/msk-access/abra2:2.22 - # - - # name: Build vcf2maf 1.6.17 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./vcf2maf_1.6.17/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/vcf2maf:1.6.17 - # - - # name: Build MuTect 1.1.5 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./mutect_1.1.5/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/mutect:1.1.5 - # - - # name: Build Trim Galore 0.6.2 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./trim_galore_0.6.2/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/trim_galore:0.6.2 - # - - # name: Build Waltz 3.1.1 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./waltz_count_reads_3.1.1/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/waltz:3.1.1 - # - - # name: Build Marianas 1.8.1 and Push to GitHub Packages - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/marianas:1.8.1 - # - - # name: Build Manta and Push to GitHub Packages - # if: ${{ !steps.manta.outputs.manta-manifest }} - # uses: docker/build-push-action@v2 - # with: - # context: . - # file: ./manta_1.5.1/container/Dockerfile - # push: true - # tags: | - # ghcr.io/msk-access/manta:1.5.1 + - + name: Build MuTect 1.1.5 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.mutect }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./mutect_1.1.5/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/mutect:1.1.5 + - + name: Build Trim Galore 0.6.2 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.trim_galore }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./trim_galore_0.6.2/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/trim_galore:0.6.2 + - + name: Build Waltz 3.1.1 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.waltz }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./waltz_count_reads_3.1.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/waltz:3.1.1 + - + name: Build Marianas 1.8.1 and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.marianas }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/marianas:1.8.1 + - + name: Build Manta and Push to GitHub Packages + if: ${{ !steps.manifest.outputs.manta }} + uses: docker/build-push-action@v2 + with: + context: . + file: ./manta_1.5.1/container/Dockerfile + push: true + tags: | + ghcr.io/msk-access/manta:1.5.1 From 989d8b9f7002f9c5ff79138322e665f8206fcfd1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Sat, 12 Dec 2020 02:19:43 -0500 Subject: [PATCH 308/476] Testing --- .github/workflows/build_and_push_containers.yaml | 6 +++--- marianas_process_loop_umi_1.8.1/container/Dockerfile | 6 +++--- waltz_count_reads_3.1.1/container/Dockerfile | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index c1a35ced..a855f824 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -47,7 +47,7 @@ jobs: echo ::set-output name=fgbio_postprocessing::$(docker buildx imagetools inspect ghcr.io/msk-access/fgbio-postprocessing:0.2.1 | jq '.') echo ::set-output name=mutect::$(docker buildx imagetools inspect ghcr.io/msk-access/mutect:1.1.5 | jq '.') echo ::set-output name=trim_galore::$(docker buildx imagetools inspect ghcr.io/msk-access/trim_galore:0.6.2 | jq '.') - echo ::set-output name=waltz::$(docker buildx imagetools inspect ghcr.io/msk-access/waltz:3.1.1 | jq '.') + echo ::set-output name=waltz::$(docker buildx imagetools inspect ghcr.io/msk-access/waltz:3.2.0 | jq '.') - name: Build merge_fastq 0.1.7 and Push to GitHub Packages if: ${{ !steps.manifest.outputs.merge_fastq }} @@ -99,7 +99,7 @@ jobs: tags: | ghcr.io/msk-access/trim_galore:0.6.2 - - name: Build Waltz 3.1.1 and Push to GitHub Packages + name: Build Waltz 3.2.0 and Push to GitHub Packages if: ${{ !steps.manifest.outputs.waltz }} uses: docker/build-push-action@v2 with: @@ -107,7 +107,7 @@ jobs: file: ./waltz_count_reads_3.1.1/container/Dockerfile push: true tags: | - ghcr.io/msk-access/waltz:3.1.1 + ghcr.io/msk-access/waltz:3.2.0 - name: Build Marianas 1.8.1 and Push to GitHub Packages if: ${{ !steps.manifest.outputs.marianas }} diff --git a/marianas_process_loop_umi_1.8.1/container/Dockerfile b/marianas_process_loop_umi_1.8.1/container/Dockerfile index 343ec6a6..ef4d2eff 100644 --- a/marianas_process_loop_umi_1.8.1/container/Dockerfile +++ b/marianas_process_loop_umi_1.8.1/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.marianas=${MARIANAS_VERSION} \ - org.opencontainers.image.source.marianas="https://github.com/mskcc/Marianas/releases/" + org.opencontainers.image.source="https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to use Marianas version ${MARIANAS_VERSION}" @@ -30,5 +30,5 @@ ENV MARIANAS_VERSION 1.8.1 WORKDIR /usr/src/ -RUN wget https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar -RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar +RUN wget https://github.com/mskcc/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar +RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar \ No newline at end of file diff --git a/waltz_count_reads_3.1.1/container/Dockerfile b/waltz_count_reads_3.1.1/container/Dockerfile index 70d13a31..cfad2f10 100644 --- a/waltz_count_reads_3.1.1/container/Dockerfile +++ b/waltz_count_reads_3.1.1/container/Dockerfile @@ -5,14 +5,14 @@ FROM java:8 ################## ARGUMENTS ####################### ARG BUILD_DATE ARG BUILD_VERSION=1.0.0 -ARG WALTZ_VERSION_cmd=3.1.1 +ARG WALTZ_VERSION_cmd=3.2.0 ################## LABELS ####################### LABEL org.opencontainers.image.authors='Shalabh Suman (sumans@mskcc.org)' LABEL org.opencontainers.image.created=$BUILD_DATE #LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/blob/master/Waltz-2.0.jar" -LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar" +LABEL org.opencontainers.image.source = "https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.revision = "10037a1" LABEL org.opencontainers.image.title = "waltz" LABEL org.opencontainers.image.description = "Syntax to build image: docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') -t mskcc/waltz:1.0.0 ." @@ -24,7 +24,7 @@ ENV WALTZ_VERSION=${WALTZ_VERSION_cmd} # Install Waltz #RUN wget https://github.com/juberpatel/Waltz/blob/master/Waltz-${WALTZ_VERSION}.jar -RUN wget https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-${WALTZ_VERSION}.jar +RUN wget https://github.com/mskcc/Waltz/releases/download/v${WALTZ_VERSION}/Waltz-${WALTZ_VERSION}.jar RUN mv Waltz-${WALTZ_VERSION}.jar /usr/local/bin/Waltz.jar #ENV PATH=$PATH:/usr/local/bin/Waltz.jar \ No newline at end of file From 5aebe1eedbb0bbd704f95b1ddf7b7382c1e3c83d Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 14:57:18 -0500 Subject: [PATCH 309/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index a855f824..c3c4516a 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,6 +35,14 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} + - + uses: actions/delete-package-versions@v1 + with: + package-name: 'abra2' + - + uses: actions/delete-package-versions@v1 + with: + package-name: 'mutect' - name: Get Manifest id: manifest From 5d80ecbd3e8f5ed880d19d4101114ea6c0e6b560 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 14:59:43 -0500 Subject: [PATCH 310/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index c3c4516a..cb4d3e72 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -39,10 +39,12 @@ jobs: uses: actions/delete-package-versions@v1 with: package-name: 'abra2' + token: ${{ secrets.RS_PAT }} - uses: actions/delete-package-versions@v1 with: package-name: 'mutect' + token: ${{ secrets.RS_PAT }} - name: Get Manifest id: manifest From c43f2f5d5692d55d9e784105cd89e6a4ace88451 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 15:06:36 -0500 Subject: [PATCH 311/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index cb4d3e72..49186efb 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -36,12 +36,12 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - uses: actions/delete-package-versions@v1 + uses: actions/delete-package-versions@v1.0.3 with: package-name: 'abra2' token: ${{ secrets.RS_PAT }} - - uses: actions/delete-package-versions@v1 + uses: actions/delete-package-versions@v1.0.3 with: package-name: 'mutect' token: ${{ secrets.RS_PAT }} From 75e74534e4071df16409cd2f4623f867c31e3da0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 15:41:03 -0500 Subject: [PATCH 312/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 49186efb..d1fbde42 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,6 +35,13 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2-beta + with: + node-version: '14' + check-latest: true + - run: npm install + - run: npm test - uses: actions/delete-package-versions@v1.0.3 with: From d42f8a612edea7d488e51bfd3efe7316ebb69a79 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 15:45:44 -0500 Subject: [PATCH 313/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index d1fbde42..feaaffaa 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,13 +35,13 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2-beta + - + uses: actions/checkout@v2 + - + uses: actions/setup-node@v2-beta with: node-version: '14' check-latest: true - - run: npm install - - run: npm test - uses: actions/delete-package-versions@v1.0.3 with: From d5ec664da257730c5842aa294dd4fabc99bd5193 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 15:51:03 -0500 Subject: [PATCH 314/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index feaaffaa..36ad2e03 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -36,12 +36,9 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v2-beta + uses: actions/setup-node@v1 with: node-version: '14' - check-latest: true - uses: actions/delete-package-versions@v1.0.3 with: From 4513096ae6468f377ba3935fca1d438303657806 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 16:02:47 -0500 Subject: [PATCH 315/476] Update build_and_push_containers.yaml --- .../workflows/build_and_push_containers.yaml | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 36ad2e03..e1800e95 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -35,20 +35,27 @@ jobs: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.RS_PAT }} - - - uses: actions/setup-node@v1 - with: - node-version: '14' - - - uses: actions/delete-package-versions@v1.0.3 - with: - package-name: 'abra2' - token: ${{ secrets.RS_PAT }} - - - uses: actions/delete-package-versions@v1.0.3 + - name: Fetch releases + run: | + curl -X POST \ + -s \ + -H "Accept: application/vnd.github.package-deletes-preview+json" \ + -H "Authorization: bearer ${{ secrets.RS_PAT }}" \ + -d '{"query":"query {repository(owner:\"${{ OWNER }}\", name:\"${{ REPOSITORY }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ + -o /tmp/response.json \ + --url https://api.github.com/graphql + - name: Filter Releases + run: "cat /tmp/response.json | jq -r 'def daysAgo(days): (now | floor) - (days * 86400); [.data.repository.registryPackages.edges[0].node.versions.edges | sort_by(.node.updatedAt|fromdate) | reverse | .[] | select( .node.version != \"docker-base-layer\" ) | .value[].node.id] | unique_by(.) | @csv' | cut -d, -f1 | sed -e 's/^\"//' -e 's/\"$//' > /tmp/release.json" + - name: Show Release + id: release + run: printf "::set-output name=id::%s" $(cat /tmp/release.json) + - name: Delete Release + uses: WyriHaximus/github-action-delete-package@master + if: steps.release.outputs.id != '' with: - package-name: 'mutect' - token: ${{ secrets.RS_PAT }} + packageVersionId: ${{ steps.release.outputs.id }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Get Manifest id: manifest From 4c65ec5ed56eaf94d29245e3e3bfb95edae4c5a2 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 16:08:03 -0500 Subject: [PATCH 316/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index e1800e95..958b56fe 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -41,7 +41,7 @@ jobs: -s \ -H "Accept: application/vnd.github.package-deletes-preview+json" \ -H "Authorization: bearer ${{ secrets.RS_PAT }}" \ - -d '{"query":"query {repository(owner:\"${{ OWNER }}\", name:\"${{ REPOSITORY }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ + -d '{"query":"query {repository(owner:\"${{ GITHUB_ACTOR }}\", name:\"${{ GITHUB_REPOSITORY }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ -o /tmp/response.json \ --url https://api.github.com/graphql - name: Filter Releases From 35697a379ae455cd896c074b38e8c85aa0268401 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 15 Dec 2020 16:09:15 -0500 Subject: [PATCH 317/476] Update build_and_push_containers.yaml --- .github/workflows/build_and_push_containers.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml index 958b56fe..f47f9b2c 100644 --- a/.github/workflows/build_and_push_containers.yaml +++ b/.github/workflows/build_and_push_containers.yaml @@ -41,7 +41,7 @@ jobs: -s \ -H "Accept: application/vnd.github.package-deletes-preview+json" \ -H "Authorization: bearer ${{ secrets.RS_PAT }}" \ - -d '{"query":"query {repository(owner:\"${{ GITHUB_ACTOR }}\", name:\"${{ GITHUB_REPOSITORY }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ + -d '{"query":"query {repository(owner:\"${{ github.repository_owner }}\", name:\"${{ github.repository }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ -o /tmp/response.json \ --url https://api.github.com/graphql - name: Filter Releases From 8ba6c227b34af261e0f57b36d1d980dd89e60bfa Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 16 Dec 2020 18:24:17 -0500 Subject: [PATCH 318/476] Update to ghcr.io --- .../workflows/build_and_push_containers.yaml | 155 ------------------ abra2_2.17/container/Dockerfile | 4 +- abra2_2.22/container/Dockerfile | 2 +- .../bedtools_genomecov_v2.28.0_cv2.cwl | 2 +- .../bedtools_merge_v2.28.0_cv2.cwl | 2 +- bwa_mem_0.7.17/bwa_mem_0.7.17.cwl | 2 +- fastp_0.20.1/fastp_0.20.1.cwl | 2 +- ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 2 +- ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 2 +- .../fgbio_fastq_to_bam_1.2.0.cwl | 2 +- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 +- .../fgbio_group_reads_by_umi_1.2.0.cwl | 2 +- ...io_postprocessing_simplex_filter_0.1.8.cwl | 2 +- .../gatk_apply_bqsr_4.1.8.1.cwl | 2 +- .../gatk_base_recalibrator_4.1.8.1.cwl | 2 +- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 2 +- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 2 +- ...tk_collect_insert_size_metrics_4.1.8.0.cwl | 2 +- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 2 +- .../gatk_merge_sam_files_4.1.8.0.cwl | 2 +- .../gatk_sam_to_fastq_4.1.8.0.cwl | 2 +- manta_1.5.1/manta_1.5.1.cwl | 2 +- .../marianas_first_pass.cwl | 2 +- .../marianas_second_pass.cwl | 2 +- .../marianas_process_loop_umi.cwl | 2 +- .../marianas_separate_bams_1.8.1.cwl | 2 +- merge_fastq_0.1.7/merge_fastq_0.1.7.cwl | 2 +- mutect_1.1.5/mutect_1.1.5.cwl | 2 +- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 2 +- .../picard_fix_mate_information_4.1.8.1.cwl | 2 +- .../picard_mark_duplicates_4.1.8.1.cwl | 2 +- trim_galore_0.6.2/trim_galore_0.6.2.cwl | 2 +- .../waltz_count_reads_3.1.1.cwl | 2 +- .../waltz_pileupmatrices_3.1.1.cwl | 2 +- 34 files changed, 34 insertions(+), 189 deletions(-) delete mode 100644 .github/workflows/build_and_push_containers.yaml diff --git a/.github/workflows/build_and_push_containers.yaml b/.github/workflows/build_and_push_containers.yaml deleted file mode 100644 index f47f9b2c..00000000 --- a/.github/workflows/build_and_push_containers.yaml +++ /dev/null @@ -1,155 +0,0 @@ -name: build_and_push_containers - -on: - push: - paths-ignore: - - 'docs/**' - - '**.md' - - '**.cwl' - pull_request: - paths-ignore: - - 'docs/**' - - '**.md' - - '**.cwl' - -jobs: - push_to_registry: - name: Push Docker image to GitHub Packages - runs-on: ubuntu-latest - if: "!contains(github.event.head_commit.message, 'ci skip')" - steps: - - - name: Check out the repo - uses: actions/checkout@v2 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to GitHub Container Registry - #if: github.event_name != 'pull_request' - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.RS_PAT }} - - name: Fetch releases - run: | - curl -X POST \ - -s \ - -H "Accept: application/vnd.github.package-deletes-preview+json" \ - -H "Authorization: bearer ${{ secrets.RS_PAT }}" \ - -d '{"query":"query {repository(owner:\"${{ github.repository_owner }}\", name:\"${{ github.repository }}\") {registryPackages(last:10) {edges{node{id, name, versions(last:100){edges {node {id, updatedAt, version}}}}}}}}"}' \ - -o /tmp/response.json \ - --url https://api.github.com/graphql - - name: Filter Releases - run: "cat /tmp/response.json | jq -r 'def daysAgo(days): (now | floor) - (days * 86400); [.data.repository.registryPackages.edges[0].node.versions.edges | sort_by(.node.updatedAt|fromdate) | reverse | .[] | select( .node.version != \"docker-base-layer\" ) | .value[].node.id] | unique_by(.) | @csv' | cut -d, -f1 | sed -e 's/^\"//' -e 's/\"$//' > /tmp/release.json" - - name: Show Release - id: release - run: printf "::set-output name=id::%s" $(cat /tmp/release.json) - - name: Delete Release - uses: WyriHaximus/github-action-delete-package@master - if: steps.release.outputs.id != '' - with: - packageVersionId: ${{ steps.release.outputs.id }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Get Manifest - id: manifest - run: | - echo ::set-output name=abra2::$(docker buildx imagetools inspect ghcr.io/msk-access/abra2:2.22 | jq '.') - echo ::set-output name=manta::$(docker buildx imagetools inspect ghcr.io/msk-access/manta:1.5.1 | jq '.') - echo ::set-output name=marianas::$(docker buildx imagetools inspect ghcr.io/msk-access/marianas:1.8.1 | jq '.') - echo ::set-output name=bwa::$(docker buildx imagetools inspect ghcr.io/msk-access/bwa:0.7.17 | jq '.') - echo ::set-output name=merge_fastq::$(docker buildx imagetools inspect ghcr.io/msk-access/merge_fastq:0.1.7 | jq '.') - echo ::set-output name=fgbio_postprocessing::$(docker buildx imagetools inspect ghcr.io/msk-access/fgbio-postprocessing:0.2.1 | jq '.') - echo ::set-output name=mutect::$(docker buildx imagetools inspect ghcr.io/msk-access/mutect:1.1.5 | jq '.') - echo ::set-output name=trim_galore::$(docker buildx imagetools inspect ghcr.io/msk-access/trim_galore:0.6.2 | jq '.') - echo ::set-output name=waltz::$(docker buildx imagetools inspect ghcr.io/msk-access/waltz:3.2.0 | jq '.') - - - name: Build merge_fastq 0.1.7 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.merge_fastq }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./merge_fastq_0.1.7/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/merge_fastq:0.1.7 - - - name: Build BWA 0.7.17 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.bwa }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./bwa_mem_0.7.17/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/bwa:0.7.17 - - - name: Build ABRA2 2.22 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.abra2 }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./abra2_2.22/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/abra2:2.22 - - - name: Build MuTect 1.1.5 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.mutect }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./mutect_1.1.5/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/mutect:1.1.5 - - - name: Build Trim Galore 0.6.2 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.trim_galore }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./trim_galore_0.6.2/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/trim_galore:0.6.2 - - - name: Build Waltz 3.2.0 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.waltz }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./waltz_count_reads_3.1.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/waltz:3.2.0 - - - name: Build Marianas 1.8.1 and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.marianas }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./marianas_process_loop_umi_1.8.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/marianas:1.8.1 - - - name: Build Manta and Push to GitHub Packages - if: ${{ !steps.manifest.outputs.manta }} - uses: docker/build-push-action@v2 - with: - context: . - file: ./manta_1.5.1/container/Dockerfile - push: true - tags: | - ghcr.io/msk-access/manta:1.5.1 - - - - diff --git a/abra2_2.17/container/Dockerfile b/abra2_2.17/container/Dockerfile index 9f1bc82f..e7c64df9 100644 --- a/abra2_2.17/container/Dockerfile +++ b/abra2_2.17/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ - org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/" + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" @@ -37,4 +37,4 @@ RUN apt-get update && \ RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ - cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar \ No newline at end of file + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile index 2c882f60..0ec95e41 100644 --- a/abra2_2.22/container/Dockerfile +++ b/abra2_2.22/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ - org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/" + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl index 3001b154..396931b9 100644 --- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl +++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl @@ -55,7 +55,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index ffd7c792..e6259ff3 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -49,7 +49,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl index 6eee0064..5e7e55dd 100644 --- a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -323,7 +323,7 @@ requirements: ramMin: 34000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/bwa_mem_0.7.17:0.1.0' + dockerPull: 'ghcr.io/msk-access/bwa:0.7.17' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 7e29582b..b454079f 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -172,7 +172,7 @@ requirements: ramMin: 17000 coresMin: 4 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fastp:0.20.1--h8b12597_0' + dockerPull: 'ghcr.io/msk-access/fastp:0.20.1--h8b12597_0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index 27696dc1..e0b1a766 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -184,7 +184,7 @@ requirements: ramMin: 20000 coresMin: 16 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 8f0aa087..5cd35ce5 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -255,7 +255,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index 393e64ab..2313da02 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -213,7 +213,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 7b041d08..4d72c68f 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -196,7 +196,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 0463147b..65af9b26 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -215,7 +215,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'quay.io/biocontainers/fgbio:1.2.0--0' + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl index 12545f82..2599e4af 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -49,7 +49,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskaccess/fgbio_postprocessing:0.2.1' + dockerPull: 'ghcr.io/msk-access/fgbio_postprocessing:0.2.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 9e1b6b74..1d967a84 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -293,7 +293,7 @@ requirements: ramMin: 16000 coresMin: 4 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.1' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index 5b280ef1..e9a2874d 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -323,7 +323,7 @@ requirements: ramMin: 32000 coresMin: 8 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.1' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index 1ecbbe62..f5e6a8a2 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -210,7 +210,7 @@ requirements: ramMin: 32000 coresMin: 1 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index aff4264b..f4031ec9 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -294,7 +294,7 @@ requirements: ramMin: 32000 coresMin: 1 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index f8b39cbd..dfa60613 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -228,7 +228,7 @@ requirements: ramMin: 32000 coresMin: 1 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 147786c6..681fd3dd 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -527,7 +527,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index e18ab0ce..e7f9f6b1 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -231,7 +231,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index a5b47580..4a7df241 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -330,7 +330,7 @@ requirements: ramMin: 8000 coresMin: 2 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.0' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl index f5e00c65..9c07140f 100644 --- a/manta_1.5.1/manta_1.5.1.cwl +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -155,7 +155,7 @@ requirements: ramMin: 24000 coresMin: 12 - class: DockerRequirement - dockerPull: 'mskaccess/manta:0.6.3' + dockerPull: 'ghcr.io/msk-access/manta:1.5.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index 81c90c62..024f9b53 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -126,7 +126,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.3' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index ee094cd0..4ec6b3ac 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -130,7 +130,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.3' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 3e3f1e12..018aff9a 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -113,7 +113,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.3' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index cc035c5b..4488cf91 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -92,7 +92,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/marianas:0.6.3' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl index 0d94d7f8..cad51a26 100644 --- a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -91,7 +91,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/merge_fastq:0.6.3' + dockerPull: 'ghcr.io/msk-access/merge_fastq:0.1.7' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl index c8e16fe9..456c328a 100644 --- a/mutect_1.1.5/mutect_1.1.5.cwl +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -717,7 +717,7 @@ requirements: ramMin: 34000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/mutect:0.6.3' + dockerPull: 'ghcr.io/msk-access/mutect:1.1.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 361b7b94..5544a296 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -196,7 +196,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.1' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 0b607d59..92ad20f1 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -150,7 +150,7 @@ requirements: ramMin: 30000 coresMin: 12 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.1' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index 7e7078a8..4d94adac 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -221,7 +221,7 @@ requirements: ramMin: 17000 coresMin: 2 - class: DockerRequirement - dockerPull: 'broadinstitute/gatk:4.1.8.1' + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 991f19ba..95d53149 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -160,7 +160,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskaccess/trim_galore:0.6.3' + dockerPull: 'ghcr.io/msk-access/trim_galore:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index 1a766160..f177b5d5 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -77,7 +77,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.3' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 27ae5b86..1fdcdb1d 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -89,7 +89,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/waltz:0.6.3' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 67d30f0e0da5adeefa0927480a945c5b9b476c06 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 16 Dec 2020 22:31:11 -0500 Subject: [PATCH 319/476] Updating ghcr --- abra2_2.17/abra2_2.17.cwl | 2 +- abra2_2.22/abra2_2.22.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index 12f7af16..0f31e4d4 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -184,7 +184,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/abra2:0.6.3' + dockerPull: 'ghcr.io/msk-access/abra2:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 4a50234f..416b4795 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -219,7 +219,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskaccess/abra2:2.22' + dockerPull: 'ghcr.io/msk-access/abra2:2.22' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From acb830770502cf438ed8ab7384d006ba01e80af1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Dec 2020 15:12:58 -0500 Subject: [PATCH 320/476] Exposed TMPDIR option where possible --- abra2_2.22/abra2_2.22.cwl | 13 ++++++++---- ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 11 ++++++++++ ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 11 ++++++++++ .../fgbio_fastq_to_bam_1.2.0.cwl | 11 ++++++++++ .../fgbio_filter_consensus_reads_1.2.0.cwl | 11 ++++++++++ .../fgbio_group_reads_by_umi_1.2.0.cwl | 11 ++++++++++ .../gatk_apply_bqsr_4.1.8.1.cwl | 21 ++++++++++++------- .../gatk_base_recalibrator_4.1.8.1.cwl | 12 +++++++++-- ...lect_alignment_summary_metrics_4.1.8.0.cwl | 12 +++++++++-- .../gatk_collect_hs_metrics_4.1.8.0.cwl | 20 ++++++++++-------- ...tk_collect_insert_size_metrics_4.1.8.0.cwl | 16 +++++++------- .../gatk_merge_bam_alignment_4.1.8.0.cwl | 10 ++++++++- .../gatk_merge_sam_files_4.1.8.0.cwl | 10 ++++++++- .../gatk_sam_to_fastq_4.1.8.0.cwl | 12 +++++++++-- ...ard_add_or_replace_read_groups_4.1.8.1.cwl | 19 ++++++++++++++++- .../picard_fix_mate_information_4.1.8.1.cwl | 10 ++++++++- .../picard_mark_duplicates_4.1.8.1.cwl | 12 +++++++++-- 17 files changed, 183 insertions(+), 39 deletions(-) diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl index 416b4795..e24cb107 100644 --- a/abra2_2.22/abra2_2.22.cwl +++ b/abra2_2.22/abra2_2.22.cwl @@ -29,10 +29,7 @@ inputs: secondaryFiles: - ^.bai - id: working_directory - type: Directory? - inputBinding: - position: 0 - prefix: '--tmpdir' + type: string? doc: Set the temp directory (overrides java.io.tmpdir) - id: reference_fasta type: File @@ -214,6 +211,14 @@ arguments: return inputs.number_of_threads return runtime.cores } + - position: 0 + prefix: '--tmpdir' + valueFrom: |- + ${ + if(inputs.working_directory) + return inputs.working_directory; + return runtime.tmpdir + } requirements: - class: ResourceRequirement ramMin: 60000 diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index e0b1a766..22bf8790 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -90,6 +90,9 @@ inputs: The maximum number of reads to use when building a single-strand consensus. If more than this many reads are present in a tag family, the family is randomly downsampled to exactly max-reads reads. + - id: temporary_directory + type: string? + doc: 'Default value: null.' outputs: - id: fgbio_call_duplex_consensus_reads_bam type: File @@ -161,6 +164,14 @@ arguments: valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 valueFrom: CallDuplexConsensusReads + - position: 0 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '--output' shellQuote: false diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 5cd35ce5..0e67341b 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -72,6 +72,9 @@ inputs: position: 0 prefix: '--mi-tag' doc: 'The output tag for UMI grouping. [Optional].' + - id: temporary_directory + type: string? + doc: 'Default value: null.' outputs: - id: fgbio_collect_duplex_seq_metrics_family_size type: File @@ -239,6 +242,14 @@ arguments: valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 valueFrom: CollectDuplexSeqMetrics + - position: 0 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '--output' valueFrom: |- diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index 2313da02..d91ff89c 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -130,6 +130,9 @@ inputs: prefix: '--run-date' shellQuote: false doc: 'Date the run was produced, to insert into the read group header' + - id: temporary_directory + type: string? + doc: 'Default value: null.' outputs: - id: fgbio_fastq_to_bam_ubam type: File @@ -198,6 +201,14 @@ arguments: valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 valueFrom: FastqToBam + - position: 0 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '--output' shellQuote: false diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index 4d72c68f..d39e7c6a 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -94,6 +94,9 @@ inputs: doc: >- Mask (make N) consensus bases where the AB and BA consensus reads disagree (for duplex-sequencing only). + - id: temporary_directory + type: string? + doc: 'Default value: null.' outputs: - id: fgbio_filter_consensus_reads_bam type: File @@ -181,6 +184,14 @@ arguments: valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 valueFrom: FilterConsensusReads + - position: 0 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '--output' shellQuote: false diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 65af9b26..64106ad8 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -77,6 +77,9 @@ inputs: The minimum UMI length. If not specified then all UMIs must have the same length, otherwise discard reads with UMIs shorter than this length and allow for differing UMI lengths. + - id: temporary_directory + type: string? + doc: 'Default value: null.' outputs: - id: fgbio_group_reads_by_umi_bam type: File @@ -200,6 +203,14 @@ arguments: valueFrom: '-XX:-UseGCOverheadLimit' - position: 0 valueFrom: GroupReadsByUmi + - position: 0 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '--output' shellQuote: false diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 1d967a84..2d220db8 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -233,6 +233,9 @@ inputs: type: int? - id: number_of_threads type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_apply_bqsr_bam type: File @@ -271,6 +274,17 @@ arguments: return "-Xmx12G" } } + - position: 1 + separate: false + valueFrom: ApplyBQSR + - position: 2 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 2 prefix: '--output' valueFrom: |- @@ -281,13 +295,6 @@ arguments: return inputs.input.basename.replace(/.bam/, '_bqsr.bam') } } - - position: 2 - prefix: '--tmp-dir' - valueFrom: $(runtime.tmpdir) - - position: 1 - prefix: '' - separate: false - valueFrom: ApplyBQSR requirements: - class: ResourceRequirement ramMin: 16000 diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index e9a2874d..b02a2012 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -265,6 +265,9 @@ inputs: type: int? - id: memory_overhead type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_base_recalibrator_output type: File @@ -306,8 +309,13 @@ arguments: separate: false valueFrom: BaseRecalibrator - position: 2 - prefix: '--tmp-dir' - valueFrom: $(runtime.tmpdir) + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 2 prefix: '--output' valueFrom: |- diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl index f5e6a8a2..13a024dd 100644 --- a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -151,6 +151,9 @@ inputs: doc: >- Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_collect_alignment_summary_metrics_txt type: File @@ -194,8 +197,13 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) - - position: 2 + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 prefix: '-O' valueFrom: |- ${ diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl index f4031ec9..44a42fc1 100644 --- a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -187,6 +187,9 @@ inputs: doc: Memory overhead per job in megabytes - id: number_of_threads type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_collect_hs_metrics_txt type: File @@ -252,14 +255,13 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) - - position: 0 - prefix: '--COMPRESSION_LEVEL' - valueFrom: '2' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 - prefix: '--MAX_RECORDS_IN_RAM' - valueFrom: '50000' - - position: 2 prefix: '-O' valueFrom: |- ${ @@ -269,7 +271,7 @@ arguments: return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') } } - - position: 2 + - position: 0 prefix: '--PER_TARGET_COVERAGE' valueFrom: |- ${ @@ -279,7 +281,7 @@ arguments: return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') } } - - position: 2 + - position: 0 prefix: '--PER_BASE_COVERAGE' valueFrom: |- ${ diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl index dfa60613..bc83b149 100644 --- a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -142,6 +142,9 @@ inputs: doc: >- Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_collect_insert_size_metrics_txt type: File @@ -196,13 +199,12 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) - - position: 0 - prefix: '--COMPRESSION_LEVEL' - valueFrom: '2' - - position: 0 - prefix: '--MAX_RECORDS_IN_RAM' - valueFrom: '50000' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 2 prefix: '-O' valueFrom: |- diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl index 681fd3dd..1dd7658b 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -466,6 +466,9 @@ inputs: doc: >- Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_merge_bam_alignment_bam type: File @@ -521,7 +524,12 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } requirements: - class: ResourceRequirement ramMin: 17000 diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl index e7f9f6b1..c38ae9d9 100644 --- a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -172,6 +172,9 @@ inputs: doc: >- Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_merge_sam_files_bam type: File @@ -215,7 +218,12 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 2 prefix: '-O' valueFrom: |- diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl index 4a7df241..6516f711 100644 --- a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -249,6 +249,9 @@ inputs: which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, SILENT} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: gatk_sam_to_fastq_fastq type: File @@ -314,8 +317,13 @@ arguments: } - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) - - position: 2 + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 prefix: '--FASTQ' valueFrom: |- ${ diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl index 5544a296..07d930e5 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -131,6 +131,9 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: picard_add_or_replace_read_groups_bam type: File @@ -171,6 +174,15 @@ arguments: return "-Xmx15G" } } + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 shellQuote: false valueFrom: '-XX:-UseGCOverheadLimit' @@ -181,7 +193,12 @@ arguments: valueFrom: AddOrReplaceReadGroups - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-O' valueFrom: |- diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl index 92ad20f1..5140be34 100644 --- a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -81,6 +81,9 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: picard_fix_mate_information_bam type: File @@ -133,7 +136,12 @@ arguments: valueFrom: FixMateInformation - position: 0 prefix: '--TMP_DIR' - valueFrom: $(runtime.tmpdir) + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-O' valueFrom: |- diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl index 4d94adac..c6418246 100644 --- a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -146,6 +146,9 @@ inputs: is moreappropriate. For other platforms and models, users should experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - id: picard_mark_duplicates_bam type: File @@ -214,8 +217,13 @@ arguments: } } - position: 0 - prefix: --TMP_DIR - valueFrom: $(runtime.tmpdir) + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } requirements: - class: ResourceRequirement ramMin: 17000 From 100ba032bc93be586cb33a701f72b089f8e39b34 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Dec 2020 22:10:09 -0500 Subject: [PATCH 321/476] Update to fix Fgbio --- ...gbio_call_duplex_consensus_reads_1.2.0.cwl | 37 ++++++++++------ ...fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 29 ++++++++----- .../fgbio_fastq_to_bam_1.2.0.cwl | 43 +++++++++++-------- .../fgbio_filter_consensus_reads_1.2.0.cwl | 33 ++++++++------ .../fgbio_group_reads_by_umi_1.2.0.cwl | 33 ++++++++------ 5 files changed, 110 insertions(+), 65 deletions(-) diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl index 22bf8790..96211f57 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -20,7 +20,7 @@ inputs: - id: input type: File inputBinding: - position: 0 + position: 2 prefix: '--input' shellQuote: false doc: The input SAM or BAM file. @@ -30,19 +30,19 @@ inputs: - id: read_name_prefix type: string? inputBinding: - position: 0 + position: 2 prefix: '--read-name-prefix' doc: The prefix all consensus read names - id: read_group_id type: string? inputBinding: - position: 0 + position: 2 prefix: '--read-group-id' doc: The new read group ID for all the consensus reads. - id: error_rate_pre_umi type: int? inputBinding: - position: 0 + position: 2 prefix: '--error-rate-pre-umi' doc: >- The Phred-scaled error rate for an error prior to the UMIs being @@ -50,7 +50,7 @@ inputs: - id: error_rate_post_umi type: int? inputBinding: - position: 0 + position: 2 prefix: '--error-rate-post-umi' doc: >- The Phred-scaled error rate for an error post the UMIs have been @@ -58,25 +58,25 @@ inputs: - id: min_input_base_quality type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-input-base-quality' doc: Ignore bases in raw reads that have Q below this value. - id: trim type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--trim' doc: 'If true, quality trim input reads in addition to masking low Q bases' - id: sort_order type: string? inputBinding: - position: 0 + position: 2 prefix: '--sort-order' doc: 'The sort order of the output, if :none: then the same as the input.' - id: min_reads type: 'int[]' inputBinding: - position: 0 + position: 2 prefix: '--min-reads' itemSeparator: ' ' shellQuote: false @@ -84,7 +84,7 @@ inputs: - id: max_reads_per_strand type: int? inputBinding: - position: 0 + position: 2 prefix: '--max-reads-per-strand' doc: >- The maximum number of reads to use when building a single-strand @@ -93,6 +93,14 @@ inputs: - id: temporary_directory type: string? doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' outputs: - id: fgbio_call_duplex_consensus_reads_bam type: File @@ -162,17 +170,18 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 + - position: 1 valueFrom: CallDuplexConsensusReads - position: 0 - prefix: '--tmp-dir' + prefix: '--tmp-dir=' + separate: false valueFrom: |- ${ if(inputs.temporary_directory) return inputs.temporary_directory; return runtime.tmpdir } - - position: 0 + - position: 2 prefix: '--output' shellQuote: false valueFrom: |- @@ -181,7 +190,7 @@ arguments: return inputs.output_file_name; return inputs.input.basename.replace(/.bam/,'_cons.bam'); } - - position: 0 + - position: 2 prefix: '--threads' valueFrom: |- ${ diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 0e67341b..4d11e6c5 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -29,13 +29,13 @@ inputs: - id: intervals type: File? inputBinding: - position: 0 + position: 2 prefix: '--intervals' doc: 'Optional set of intervals over which to restrict analysis. [Optional].' - id: description type: string? inputBinding: - position: 0 + position: 2 prefix: '--description' doc: >- Description of data set used to label plots. Defaults to sample/library. @@ -43,7 +43,7 @@ inputs: - id: duplex_umi_counts type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--duplex-umi-counts' doc: >- If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI @@ -51,30 +51,38 @@ inputs: - id: min_ab_reads type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-ab-reads' doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].' - id: min_ba_reads type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-ba-reads' doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].' - id: umi_tag type: string? inputBinding: - position: 0 + position: 2 prefix: '--umi-tag' doc: 'The tag containing the raw UMI. [Optional].' - id: mi_tag type: string? inputBinding: - position: 0 + position: 2 prefix: '--mi-tag' doc: 'The output tag for UMI grouping. [Optional].' - id: temporary_directory type: string? doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' outputs: - id: fgbio_collect_duplex_seq_metrics_family_size type: File @@ -240,17 +248,18 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 + - position: 1 valueFrom: CollectDuplexSeqMetrics - position: 0 - prefix: '--tmp-dir' + prefix: '--tmp-dir=' + separate: false valueFrom: |- ${ if(inputs.temporary_directory) return inputs.temporary_directory; return runtime.tmpdir } - - position: 0 + - position: 2 prefix: '--output' valueFrom: |- ${ diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index d91ff89c..0496cc33 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -32,7 +32,7 @@ inputs: - id: read-structures type: 'string[]?' inputBinding: - position: 0 + position: 2 prefix: '--read-structures' itemSeparator: ' ' shellQuote: false @@ -42,56 +42,56 @@ inputs: - id: sort type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--sort' shellQuote: false doc: 'If true, queryname sort the BAM file, otherwise preserve input order.' - id: umi-tag type: string? inputBinding: - position: 0 + position: 2 prefix: '--umi-tag' shellQuote: false doc: Tag in which to store molecular barcodes/UMIs - id: read-group-id type: string? inputBinding: - position: 0 + position: 2 prefix: '--read-group-id' shellQuote: false doc: Read group ID to use in the file header. - id: sample type: string? inputBinding: - position: 0 + position: 2 prefix: '--sample' shellQuote: false doc: The name of the sequenced sample. - id: library type: string? inputBinding: - position: 0 + position: 2 prefix: '--library' shellQuote: false doc: The name/ID of the sequenced library. - id: platform type: string? inputBinding: - position: 0 + position: 2 prefix: '--platform' shellQuote: false doc: Sequencing Platform - id: platform-unit type: string? inputBinding: - position: 0 + position: 2 prefix: '--platform-unit' shellQuote: false doc: Platform unit (e.g. ‘..') - id: platform-model type: string? inputBinding: - position: 0 + position: 2 prefix: '--platform-model' shellQuote: false doc: >- @@ -100,39 +100,47 @@ inputs: - id: sequencing-center type: string? inputBinding: - position: 0 + position: 2 prefix: '--sequencing-center' shellQuote: false doc: The sequencing center from which the data originated - id: predicted-insert-size type: int? inputBinding: - position: 0 + position: 2 prefix: '--predicted-insert-size' shellQuote: false doc: 'Predicted median insert size, to insert into the read group header' - id: description type: string? inputBinding: - position: 0 + position: 2 prefix: '--description' doc: Description of the read group. - id: comment type: string? inputBinding: - position: 0 + position: 2 prefix: '--comment' doc: Comment(s) to include in the output file’s header - id: run-date type: string? inputBinding: - position: 0 + position: 2 prefix: '--run-date' shellQuote: false doc: 'Date the run was produced, to insert into the read group header' - id: temporary_directory type: string? doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' outputs: - id: fgbio_fastq_to_bam_ubam type: File @@ -199,17 +207,18 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 + - position: 1 valueFrom: FastqToBam - position: 0 - prefix: '--tmp-dir' + prefix: '--tmp-dir=' + separate: false valueFrom: |- ${ if(inputs.temporary_directory) return inputs.temporary_directory; return runtime.tmpdir } - - position: 0 + - position: 2 prefix: '--output' shellQuote: false valueFrom: |- diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index d39e7c6a..e9b8c6d7 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -30,7 +30,7 @@ inputs: - id: reference_fasta type: File inputBinding: - position: 0 + position: 2 prefix: '--ref' doc: Reference fasta file. secondaryFiles: @@ -39,13 +39,13 @@ inputs: - id: reverse_per_base_tags type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--reverse-per-base-tags' doc: 'Reverse [complement] per base tags on reverse strand reads.' - id: min_reads type: 'int[]?' inputBinding: - position: 0 + position: 2 prefix: '--min-reads' itemSeparator: ' ' shellQuote: false @@ -55,7 +55,7 @@ inputs: - id: max_read_error_rate type: 'float[]?' inputBinding: - position: 0 + position: 2 prefix: '--max-read-error-rate' itemSeparator: ' ' doc: >- @@ -64,32 +64,32 @@ inputs: - id: max_base_error_rate type: 'float[]?' inputBinding: - position: 0 + position: 2 prefix: '--max-base-error-rate' itemSeparator: ' ' doc: The maximum error rate for a single consensus base. (Max 3 values) - id: min_base_quality type: int inputBinding: - position: 0 + position: 2 prefix: '--min-base-quality' doc: Mask (make N) consensus bases with quality less than this threshold. - id: max_no_call_fraction type: float? inputBinding: - position: 0 + position: 2 prefix: '--max-no-call-fraction' doc: Maximum fraction of no-calls in the read after filtering - id: min_mean_base_quality type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-mean-base-quality' doc: The minimum mean base quality across the consensus read - id: require_single_strand_agreement type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--require-single-strand-agreement' doc: >- Mask (make N) consensus bases where the AB and BA consensus reads disagree @@ -97,6 +97,14 @@ inputs: - id: temporary_directory type: string? doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' outputs: - id: fgbio_filter_consensus_reads_bam type: File @@ -182,17 +190,18 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 + - position: 1 valueFrom: FilterConsensusReads - position: 0 - prefix: '--tmp-dir' + prefix: '--tmp-dir=' + separate: false valueFrom: |- ${ if(inputs.temporary_directory) return inputs.temporary_directory; return runtime.tmpdir } - - position: 0 + - position: 2 prefix: '--output' shellQuote: false valueFrom: |- diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl index 64106ad8..b30a4078 100644 --- a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -20,7 +20,7 @@ inputs: - id: input type: File inputBinding: - position: 0 + position: 2 prefix: '--input' shellQuote: false doc: The input BAM file. @@ -30,48 +30,48 @@ inputs: - id: family_size_histogram type: string? inputBinding: - position: 0 + position: 2 prefix: '--family-size-histogram' doc: Optional output of tag family size counts. - id: raw_tag type: string? inputBinding: - position: 0 + position: 2 prefix: '--raw-tag' doc: The tag containing the raw UMI. - id: assign_tag type: string? inputBinding: - position: 0 + position: 2 prefix: '--assign-tag' doc: The output tag for UMI grouping. - id: min_map_q type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-map-q' doc: Minimum mapping quality. - id: include_non_pf_reads type: boolean? inputBinding: - position: 0 + position: 2 prefix: '--include-non-pf-reads' - id: strategy type: string inputBinding: - position: 0 + position: 2 prefix: '--strategy' doc: 'The UMI assignment strategy. (identity,edit,adjacency,paired)' - id: edits type: int? inputBinding: - position: 0 + position: 2 prefix: '--edits' doc: The allowable number of edits between UMIs. - id: min_umi_length type: int? inputBinding: - position: 0 + position: 2 prefix: '--min-umi-length' doc: >- The minimum UMI length. If not specified then all UMIs must have the same @@ -80,6 +80,14 @@ inputs: - id: temporary_directory type: string? doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' outputs: - id: fgbio_group_reads_by_umi_bam type: File @@ -201,17 +209,18 @@ arguments: } - position: 0 valueFrom: '-XX:-UseGCOverheadLimit' - - position: 0 + - position: 1 valueFrom: GroupReadsByUmi - position: 0 - prefix: '--tmp-dir' + prefix: '--tmp-dir=' + separate: false valueFrom: |- ${ if(inputs.temporary_directory) return inputs.temporary_directory; return runtime.tmpdir } - - position: 0 + - position: 2 prefix: '--output' shellQuote: false valueFrom: |- From 06475b7dd2ec312b579f8080475edbb0ff8da1be Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Dec 2020 22:19:55 -0500 Subject: [PATCH 322/476] Fix Fgbio --- .../fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 2 +- fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl | 2 +- .../fgbio_filter_consensus_reads_1.2.0.cwl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index 4d11e6c5..e9adcf9f 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -20,7 +20,7 @@ inputs: - id: input type: File inputBinding: - position: 0 + position: 2 prefix: '--input' doc: Input BAM file generated by GroupReadByUmi. - id: output_prefix diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl index 0496cc33..638e8449 100644 --- a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -20,7 +20,7 @@ inputs: - id: input type: 'File[]' inputBinding: - position: 0 + position: 2 prefix: '--input' itemSeparator: ' ' shellQuote: false diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl index e9b8c6d7..91687479 100644 --- a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -20,7 +20,7 @@ inputs: - id: input type: File inputBinding: - position: 0 + position: 2 prefix: '--input' shellQuote: false doc: The input SAM or BAM file. From eb1625d347b4fbcd144e2eba7400ec14491ba07a Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Dec 2020 23:01:52 -0500 Subject: [PATCH 323/476] Fix BQSR --- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 2 +- .../gatk_base_recalibrator_4.1.8.1.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 2d220db8..9a811169 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -278,7 +278,7 @@ arguments: separate: false valueFrom: ApplyBQSR - position: 2 - prefix: '--TMP_DIR' + prefix: '--tmp-dir' valueFrom: |- ${ if(inputs.temporary_directory) diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index b02a2012..1754a304 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -309,7 +309,7 @@ arguments: separate: false valueFrom: BaseRecalibrator - position: 2 - prefix: '--TMP_DIR' + prefix: '--tmp-dir' valueFrom: |- ${ if(inputs.temporary_directory) From 03b3fd92c9d40cc62133ad73af25e72a8799a771 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Dec 2020 23:21:42 -0500 Subject: [PATCH 324/476] Fix BQSR --- gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl | 4 +--- .../gatk_base_recalibrator_4.1.8.1.cwl | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl index 9a811169..efe37787 100644 --- a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -8,6 +8,7 @@ $namespaces: id: gatk_apply_bqsr_4_1_8_1 baseCommand: - gatk + - ApplyBQSR inputs: - id: reference type: File @@ -274,9 +275,6 @@ arguments: return "-Xmx12G" } } - - position: 1 - separate: false - valueFrom: ApplyBQSR - position: 2 prefix: '--tmp-dir' valueFrom: |- diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl index 1754a304..a38cddb6 100644 --- a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -8,6 +8,7 @@ $namespaces: id: gatk_base_recalibrator_4_1_8_1 baseCommand: - gatk + - BaseRecalibrator inputs: - id: input type: File @@ -304,10 +305,6 @@ arguments: return "-Xmx12G" } } - - position: 1 - prefix: '' - separate: false - valueFrom: BaseRecalibrator - position: 2 prefix: '--tmp-dir' valueFrom: |- From 28551d5bfbbcd4cd2e6755395a01c7494fa244a1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 21 Dec 2020 13:55:01 -0500 Subject: [PATCH 325/476] =?UTF-8?q?Bump=20version:=200.6.3=20=E2=86=92=201?= =?UTF-8?q?.0.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 3 +-- setup.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index 687dc425..9fc9a3c6 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.3' +__version__ = '1.0.0' diff --git a/setup.cfg b/setup.cfg index c3ff1fe4..fd44fded 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.3 +current_version = 1.0.0 commit = True tag = True @@ -22,4 +22,3 @@ test = pytest [tool:pytest] collect_ignore = ['setup.py'] - diff --git a/setup.py b/setup.py index c835f032..79f00425 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl-commandlinetools', - version='0.6.3', + version='1.0.0', zip_safe=False, ) From 3c0ecba31b7559f7ee206770c4dd405c89c9d772 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 21 Dec 2020 15:40:01 -0500 Subject: [PATCH 326/476] initi docker and cwl for biometrics --- biometrics_1.0/README.md | 34 +++++++ biometrics_1.0/biometrics_1.0.cwl | 139 ++++++++++++++++++++++++++++ biometrics_1.0/container/Dockerfile | 32 +++++++ biometrics_1.0/example_inputs.yaml | 17 ++++ 4 files changed, 222 insertions(+) create mode 100644 biometrics_1.0/README.md create mode 100644 biometrics_1.0/biometrics_1.0.cwl create mode 100644 biometrics_1.0/container/Dockerfile create mode 100644 biometrics_1.0/example_inputs.yaml diff --git a/biometrics_1.0/README.md b/biometrics_1.0/README.md new file mode 100644 index 00000000..9c081359 --- /dev/null +++ b/biometrics_1.0/README.md @@ -0,0 +1,34 @@ +# CWL and Dockerfile for running sequence_qc + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 1.0 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_1.0.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/biometrics_1.0/biometrics_1.0.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/biometrics_1.0/biometrics_1.0.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash + +``` diff --git a/biometrics_1.0/biometrics_1.0.cwl b/biometrics_1.0/biometrics_1.0.cwl new file mode 100644 index 00000000..33ae4694 --- /dev/null +++ b/biometrics_1.0/biometrics_1.0.cwl @@ -0,0 +1,139 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_1_16 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_positions.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/sequence_qc:0.1.16' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.1.16 diff --git a/biometrics_1.0/container/Dockerfile b/biometrics_1.0/container/Dockerfile new file mode 100644 index 00000000..1318912a --- /dev/null +++ b/biometrics_1.0/container/Dockerfile @@ -0,0 +1,32 @@ +################## BASE IMAGE ###################### + +FROM python:3.6-slim + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG BIOMETRICS_VERSION_VERSION=1.0 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.biometrics=${BIOMETRICS_VERSION} \ + org.opencontainers.image.source.biometrics="https://pypi.org/project/biometrics/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ + biometrics version ${BIOMETRICS_VERSION}" + +################## INSTALL ########################## + +RUN apt-get update \ + && apt-get install gcc g++ zlib1g-dev -y \ + && pip install cython plotly \ + && pip install biometrics==1.0 diff --git a/biometrics_1.0/example_inputs.yaml b/biometrics_1.0/example_inputs.yaml new file mode 100644 index 00000000..04cf48b4 --- /dev/null +++ b/biometrics_1.0/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /path/to/fasta +bam_file: + class: File + metadata: {} + path: /path/to/bam +bed_file: + class: File + metadata: {} + path: /path/to/bed +sample_id: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 From 8bed464660b2e6c0f59aa209b27eb834f1f6b467 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 00:12:23 -0400 Subject: [PATCH 327/476] intiial add biometrics --- biometrics_1.0/README.md | 34 ---- biometrics_1.0/biometrics_1.0.cwl | 139 ---------------- biometrics_1.0/container/Dockerfile | 32 ---- biometrics_1.0/example_inputs.yaml | 17 -- biometrics_extract_0.2.4/README.md | 67 ++++++++ .../biometrics_extract_0.2.4.cwl | 155 ++++++++++++++++++ biometrics_extract_0.2.4/example_inputs.yaml | 22 +++ biometrics_genotype_0.2.4/README.md | 51 ++++++ .../biometrics_genotype_0.2.4.cwl | 146 +++++++++++++++++ biometrics_genotype_0.2.4/example_inputs.yaml | 12 ++ biometrics_major_0.2.4/README.md | 49 ++++++ .../biometrics_major_0.2.4.cwl | 120 ++++++++++++++ biometrics_major_0.2.4/example_inputs.yaml | 11 ++ biometrics_minor_0.2.4/README.md | 49 ++++++ .../biometrics_minor_0.2.4.cwl | 127 ++++++++++++++ biometrics_minor_0.2.4/example_inputs.yaml | 11 ++ biometrics_sexmismatch_0.2.4/README.md | 50 ++++++ .../biometrics_sexmismatch_0.2.4.cwl | 106 ++++++++++++ .../example_inputs.yaml | 10 ++ 19 files changed, 986 insertions(+), 222 deletions(-) delete mode 100644 biometrics_1.0/README.md delete mode 100644 biometrics_1.0/biometrics_1.0.cwl delete mode 100644 biometrics_1.0/container/Dockerfile delete mode 100644 biometrics_1.0/example_inputs.yaml create mode 100644 biometrics_extract_0.2.4/README.md create mode 100644 biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl create mode 100644 biometrics_extract_0.2.4/example_inputs.yaml create mode 100644 biometrics_genotype_0.2.4/README.md create mode 100644 biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl create mode 100644 biometrics_genotype_0.2.4/example_inputs.yaml create mode 100644 biometrics_major_0.2.4/README.md create mode 100644 biometrics_major_0.2.4/biometrics_major_0.2.4.cwl create mode 100644 biometrics_major_0.2.4/example_inputs.yaml create mode 100644 biometrics_minor_0.2.4/README.md create mode 100644 biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl create mode 100644 biometrics_minor_0.2.4/example_inputs.yaml create mode 100644 biometrics_sexmismatch_0.2.4/README.md create mode 100644 biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl create mode 100644 biometrics_sexmismatch_0.2.4/example_inputs.yaml diff --git a/biometrics_1.0/README.md b/biometrics_1.0/README.md deleted file mode 100644 index 9c081359..00000000 --- a/biometrics_1.0/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# CWL and Dockerfile for running sequence_qc - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| biometrics | 1.0 | | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner biometrics_1.0.cwl example_inputs.yaml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/biometrics_1.0/biometrics_1.0.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> mkdir tool_toil_log -> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/biometrics_1.0/biometrics_1.0.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & -``` - -### Usage - -```bash - -``` diff --git a/biometrics_1.0/biometrics_1.0.cwl b/biometrics_1.0/biometrics_1.0.cwl deleted file mode 100644 index 33ae4694..00000000 --- a/biometrics_1.0/biometrics_1.0.cwl +++ /dev/null @@ -1,139 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.0 -$namespaces: - dct: 'http://purl.org/dc/terms/' - doap: 'http://usefulinc.com/ns/doap#' - foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' -id: calculate_noise_0_1_16 -baseCommand: - - calculate_noise -inputs: - - id: reference - type: File - inputBinding: - position: 0 - prefix: --ref_fasta - secondaryFiles: - - ^.fasta.fai - doc: >- - Path to reference fasta, containing all regions in bed_file - - id: bam_file - type: File - inputBinding: - position: 0 - prefix: --bam_file - secondaryFiles: - - ^.bai - doc: >- - Path to BAM file for calculating noise [required] - - id: bed_file - type: File - inputBinding: - position: 0 - prefix: --bed_file - doc: >- - Path to BED file containing regions over which to calculate noise [required] - - id: sample_id - type: string - inputBinding: - position: 0 - prefix: --sample_id - doc: >- - Prefix to include in all output file names - - id: threshold - type: float? - inputBinding: - position: 0 - prefix: --threshold - doc: >- - Alt allele frequency past which to ignore positions from the calculation. - - id: truncate - type: int? - inputBinding: - position: 0 - prefix: --truncate - doc: >- - Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) - - id: min_mapq - type: int? - inputBinding: - position: 0 - prefix: --min_mapq - doc: >- - Exclude reads with a lower mapping quality - - id: min_basq - type: int? - inputBinding: - position: 0 - prefix: --min_basq - doc: >- - Exclude bases with a lower base quality -outputs: - - id: sequence_qc_pileup - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + 'pileup.tsv' - } - - id: sequence_qc_noise_positions - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + 'noise_positions.tsv' - } - - id: sequence_qc_noise_acgt - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + 'noise_acgt.tsv' - } - - id: sequence_qc_noise_n - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + 'noise_n.tsv' - } - - id: sequence_qc_noise_del - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + 'noise_del.tsv' - } - - id: sequence_qc_figures - type: File - outputBinding: - glob: |- - ${ - return inputs.sample_id + '_noise.html' - } -requirements: - - class: ResourceRequirement - ramMin: 8000 - coresMin: 1 - - class: DockerRequirement - dockerPull: 'mskaccess/sequence_qc:0.1.16' - - class: InlineJavascriptRequirement -'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:murphyc4@mskcc.org' - 'foaf:name': Charlie Murphy - 'foaf:name': Memorial Sloan Kettering Cancer Center -'dct:creator': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:murphyc4@mskcc.org' - 'foaf:name': Charlie Murphy - 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': sesquence_qc - 'doap:revision': 0.1.16 diff --git a/biometrics_1.0/container/Dockerfile b/biometrics_1.0/container/Dockerfile deleted file mode 100644 index 1318912a..00000000 --- a/biometrics_1.0/container/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -################## BASE IMAGE ###################### - -FROM python:3.6-slim - -################## ARGUMENTS/Environments ########## - -ARG BUILD_DATE -ARG BUILD_VERSION -ARG LICENSE="Apache-2.0" -ARG BIOMETRICS_VERSION_VERSION=1.0 -ARG VCS_REF -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" - -LABEL org.opencontainers.image.created=${BUILD_DATE} \ - org.opencontainers.image.version=${BUILD_VERSION} \ - org.opencontainers.image.licenses=${LICENSE} \ - org.opencontainers.image.version.biometrics=${BIOMETRICS_VERSION} \ - org.opencontainers.image.source.biometrics="https://pypi.org/project/biometrics/" \ - org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ - org.opencontainers.image.vcs-ref=${VCS_REF} - -LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ - biometrics version ${BIOMETRICS_VERSION}" - -################## INSTALL ########################## - -RUN apt-get update \ - && apt-get install gcc g++ zlib1g-dev -y \ - && pip install cython plotly \ - && pip install biometrics==1.0 diff --git a/biometrics_1.0/example_inputs.yaml b/biometrics_1.0/example_inputs.yaml deleted file mode 100644 index 04cf48b4..00000000 --- a/biometrics_1.0/example_inputs.yaml +++ /dev/null @@ -1,17 +0,0 @@ -reference: - class: File - metadata: {} - path: /path/to/fasta -bam_file: - class: File - metadata: {} - path: /path/to/bam -bed_file: - class: File - metadata: {} - path: /path/to/bed -sample_id: test_sample_ -threshold: 0.01 -truncate: 1 -min_mapq: 10 -min_basq: 10 diff --git a/biometrics_extract_0.2.4/README.md b/biometrics_extract_0.2.4/README.md new file mode 100644 index 00000000..f4e01eb3 --- /dev/null +++ b/biometrics_extract_0.2.4/README.md @@ -0,0 +1,67 @@ +# CWL and Dockerfile for running biometrics extract tool. + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 0.2.4 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_extract_0.2.4.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_extract_0.2.4.cwl [-h] [--sample_bam SAMPLE_BAM] + [--sample_type SAMPLE_TYPE] + [--sample_sex SAMPLE_SEX] + [--sample_group SAMPLE_GROUP] + [--sample_name SAMPLE_NAME] --fafile + FAFILE --vcf_file VCF_FILE --bed_file + BED_FILE --database DATABASE + [--min_mapping_quality MIN_MAPPING_QUALITY] + [--min_base_quality MIN_BASE_QUALITY] + [--min_coverage MIN_COVERAGE] + [--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH] + [--default_genotype DEFAULT_GENOTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --sample_bam SAMPLE_BAM + BAM file. + --sample_type SAMPLE_TYPE + Sample types: Normal or Tumor. + --sample_sex SAMPLE_SEX + Expected sample sex (i.e. M or F). + --sample_group SAMPLE_GROUP + The sample group (e.g. the sample patient ID). + --sample_name SAMPLE_NAME + Sample name. If not specified, sample name is + automatically figured out from the BAM file. + --fafile FAFILE Path to reference fasta. + --vcf_file VCF_FILE VCF file containing the SNPs to be queried. + --bed_file BED_FILE BED file containing the intervals to be queried. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --min_mapping_quality MIN_MAPPING_QUALITY + Minimum mapping quality of reads to be used for + pileup. + --min_base_quality MIN_BASE_QUALITY + Minimum base quality of reads to be used for pileup. + --min_coverage MIN_COVERAGE + Minimum coverage to count a site. + --min_homozygous_thresh MIN_HOMOZYGOUS_THRESH + Minimum threshold to define homozygous. + --default_genotype DEFAULT_GENOTYPE + Default genotype if coverage is too low (options are + Het or Hom). +``` diff --git a/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl b/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl new file mode 100644 index 00000000..285af85b --- /dev/null +++ b/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl @@ -0,0 +1,155 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: File? + inputBinding: + position: 0 + prefix: --sample-bam + secondaryFiles: + - ^.bai + doc: >- + BAM file. + - id: sample_type + type: string? + inputBinding: + position: 0 + prefix: --sample-type + doc: >- + Sample types: Normal or Tumor. + - id: sample_sex + type: string? + inputBinding: + position: 0 + prefix: --sample-sex + doc: >- + Expected sample sex (i.e. M or F). + - id: sample_group + type: string? + inputBinding: + position: 0 + prefix: --sample-group + doc: >- + The sample group (e.g. the sample patient ID). + - id: sample_name + type: string? + inputBinding: + position: 0 + prefix: --sample-name + doc: >- + Sample name. If not specified, sample name is automatically figured out from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: --fafile + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta. + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: --vcf + doc: >- + VCF file containing the SNPs to be queried. + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed + doc: >- + BED file containing the intervals to be queried. + - id: database + type: string + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: min_mapping_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-mapping-quality + doc: >- + Minimum mapping quality of reads to be used for pileup. + - id: min_base_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-base-quality + doc: >- + Minimum base quality of reads to be used for pileup. + - id: min_coverage + type: int? + default: 10 + inputBinding: + position: 0 + prefix: --min-coverage + doc: >- + Minimum coverage to count a site. + - id: min_homozygous_thresh + type: float? + default: 0.1 + inputBinding: + position: 0 + prefix: --min-homozygous-thresh + doc: >- + Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: --default-genotype + doc: >- + Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: File + outputBinding: + glob: |- + ${ + if (inputs.database) { + return inputs.database + '/' + inputs.sample_name + '.pk' + } else { + return inputs.sample_name + '.pk' + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'biometrics:0.2.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.4 diff --git a/biometrics_extract_0.2.4/example_inputs.yaml b/biometrics_extract_0.2.4/example_inputs.yaml new file mode 100644 index 00000000..17567c06 --- /dev/null +++ b/biometrics_extract_0.2.4/example_inputs.yaml @@ -0,0 +1,22 @@ +sample_type: Tumor +sample_sex: M +sample_name: test +sample_group: test_patient +fafile: + class: File + path: /path/to/fafile +sample_bam: + class: File + path: /path/to/bam +bed_file: + class: File + path: /path/to/bed +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_genotype_0.2.4/README.md b/biometrics_genotype_0.2.4/README.md new file mode 100644 index 00000000..fede021c --- /dev/null +++ b/biometrics_genotype_0.2.4/README.md @@ -0,0 +1,51 @@ +# CWL and Dockerfile for running biometrics genotype tool. + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 0.2.4 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_genotype_0.2.4.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_genotype_0.2.4.cwl [-h] --input INPUT [--database DATABASE] + [--discordance_threshold DISCORDANCE_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --discordance_threshold DISCORDANCE_THRESHOLD + Discordance values less than this are regarded as + matching samples. (default: 0.05) + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. + --threads THREADS Number of threads to use. +``` diff --git a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl new file mode 100644 index 00000000..ca51e1e5 --- /dev/null +++ b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl @@ -0,0 +1,146 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: discordance_threshold + type: float? + default: 0.05 + inputBinding: + position: 0 + prefix: --discordance-threshold + doc: >- + Discordance values less than this are regarded as matching samples. (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + - id: threads + type: int? + default: 2 + inputBinding: + position: 0 + prefix: --threads + doc: >- + Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'biometrics:0.2.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.4 diff --git a/biometrics_genotype_0.2.4/example_inputs.yaml b/biometrics_genotype_0.2.4/example_inputs.yaml new file mode 100644 index 00000000..f8bb23ca --- /dev/null +++ b/biometrics_genotype_0.2.4/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.4/test.pk" + - class: File + path: "../biometrics_extract_0.2.4/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_major_0.2.4/README.md b/biometrics_major_0.2.4/README.md new file mode 100644 index 00000000..18e1ddf7 --- /dev/null +++ b/biometrics_major_0.2.4/README.md @@ -0,0 +1,49 @@ +# CWL and Dockerfile for running biometrics major tool. + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 0.2.4 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_sexmismatch_0.2.4.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_major_0.2.4.cwl [-h] --input INPUT [--database DATABASE] + [--major_threshold MAJOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --major_threshold MAJOR_THRESHOLD + Major contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl b/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl new file mode 100644 index 00000000..8a111d9a --- /dev/null +++ b/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl @@ -0,0 +1,120 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: major_threshold + type: float? + default: 0.6 + inputBinding: + position: 0 + prefix: --major-threshold + doc: >- + Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'biometrics:0.2.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.4 diff --git a/biometrics_major_0.2.4/example_inputs.yaml b/biometrics_major_0.2.4/example_inputs.yaml new file mode 100644 index 00000000..598b63b3 --- /dev/null +++ b/biometrics_major_0.2.4/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.4/test.pk" + - class: File + path: "../biometrics_extract_0.2.4/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor_0.2.4/README.md b/biometrics_minor_0.2.4/README.md new file mode 100644 index 00000000..13a05685 --- /dev/null +++ b/biometrics_minor_0.2.4/README.md @@ -0,0 +1,49 @@ +# CWL and Dockerfile for running biometrics major tool. + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 0.2.4 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_major_0.2.4.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_minor_0.2.4.cwl [-h] --input INPUT [--database DATABASE] + [--minor_threshold MINOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --minor_threshold MINOR_THRESHOLD + Minor contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl b/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl new file mode 100644 index 00000000..78a3a13b --- /dev/null +++ b/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl @@ -0,0 +1,127 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: minor_threshold + type: float? + default: 0.002 + inputBinding: + position: 0 + prefix: --minor-threshold + doc: >- + Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'biometrics:0.2.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.4 diff --git a/biometrics_minor_0.2.4/example_inputs.yaml b/biometrics_minor_0.2.4/example_inputs.yaml new file mode 100644 index 00000000..f80776ff --- /dev/null +++ b/biometrics_minor_0.2.4/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.4/test.pk" + - class: File + path: "../biometrics_extract_0.2.4/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch_0.2.4/README.md b/biometrics_sexmismatch_0.2.4/README.md new file mode 100644 index 00000000..bee53cb7 --- /dev/null +++ b/biometrics_sexmismatch_0.2.4/README.md @@ -0,0 +1,50 @@ +# CWL and Dockerfile for running biometrics sexmismatch tool. + +| Tool | Version | Location | +|--- |--- |--- | +| biometrics | 0.2.4 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_sexmismatch_0.2.4.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_sexmismatch_0.2.4.cwl [-h] --input INPUT + [--database DATABASE] + [--coverage_threshold COVERAGE_THRESHOLD] + [--prefix PREFIX] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --coverage_threshold COVERAGE_THRESHOLD + Samples with Y chromosome above this value will be + considered male. + --prefix PREFIX Output file prefix. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl b/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl new file mode 100644 index 00000000..ccda3b88 --- /dev/null +++ b/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl @@ -0,0 +1,106 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: coverage_threshold + type: int? + default: 50 + inputBinding: + position: 0 + prefix: --coverage-threshold + doc: >- + Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'biometrics:0.2.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.4 diff --git a/biometrics_sexmismatch_0.2.4/example_inputs.yaml b/biometrics_sexmismatch_0.2.4/example_inputs.yaml new file mode 100644 index 00000000..3be7fbb0 --- /dev/null +++ b/biometrics_sexmismatch_0.2.4/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.4/test.pk" + - class: File + path: "../biometrics_extract_0.2.4/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null From eafb35f73e77dbc14b7ab1e0e87f41998287b4b9 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 00:14:06 -0400 Subject: [PATCH 328/476] Update SUMMARY.md --- docs/SUMMARY.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e86b3b5e..f0e42d1c 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -7,6 +7,12 @@ - Bedtools - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) + - Biometrics + - [extract v0.2.4](../biometrics_extract_0.2.4/README.md) + - [minor v0.2.4](../biometrics_minor_0.2.4/README.md) + - [major v0.2.4](../biometrics_major_0.2.4/README.md) + - [genotype v0.2.4](../biometrics_genotype_0.2.4/README.md) + - [sexmismatch v0.2.4](../biometrics_sexmismatch_0.2.4/README.md) - Disambiguate - [v1.0.0](../disambiguate_1.0.0/README.md) - Fgbio From 61256ebd12a26cc8a2e4f4c237ebdb125579a9ad Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 09:52:51 -0400 Subject: [PATCH 329/476] typos --- biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl | 2 +- biometrics_major_0.2.4/README.md | 2 +- biometrics_minor_0.2.4/README.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl index ca51e1e5..c430dd0e 100644 --- a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl +++ b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_extract +id: biometrics_genotype baseCommand: - biometrics - genotype diff --git a/biometrics_major_0.2.4/README.md b/biometrics_major_0.2.4/README.md index 18e1ddf7..c8ca3164 100644 --- a/biometrics_major_0.2.4/README.md +++ b/biometrics_major_0.2.4/README.md @@ -11,7 +11,7 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_sexmismatch_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_major_0.2.4.cwl example_inputs.yaml ``` ### Usage diff --git a/biometrics_minor_0.2.4/README.md b/biometrics_minor_0.2.4/README.md index 13a05685..4a1cf24c 100644 --- a/biometrics_minor_0.2.4/README.md +++ b/biometrics_minor_0.2.4/README.md @@ -1,4 +1,4 @@ -# CWL and Dockerfile for running biometrics major tool. +# CWL and Dockerfile for running biometrics minor tool. | Tool | Version | Location | |--- |--- |--- | @@ -11,7 +11,7 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_major_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_minor_0.2.4.cwl example_inputs.yaml ``` ### Usage From 90ac477d885f63b5fc9aafbba954e0835ba861e7 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 17:10:43 -0400 Subject: [PATCH 330/476] fixes in response to PR --- biometrics_extract_0.2.4/README.md | 2 +- biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl | 6 +++--- biometrics_genotype_0.2.4/README.md | 2 +- biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl | 4 ++-- biometrics_major_0.2.4/README.md | 2 +- biometrics_major_0.2.4/biometrics_major_0.2.4.cwl | 4 ++-- biometrics_minor_0.2.4/README.md | 2 +- biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl | 4 ++-- biometrics_sexmismatch_0.2.4/README.md | 2 +- .../biometrics_sexmismatch_0.2.4.cwl | 4 ++-- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/biometrics_extract_0.2.4/README.md b/biometrics_extract_0.2.4/README.md index f4e01eb3..d323014f 100644 --- a/biometrics_extract_0.2.4/README.md +++ b/biometrics_extract_0.2.4/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.4 | | ## CWL diff --git a/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl b/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl index 285af85b..893f7a2c 100644 --- a/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl +++ b/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl @@ -130,10 +130,10 @@ outputs: } requirements: - class: ResourceRequirement - ramMin: 8000 - coresMin: 1 + ramMin: 16000 + coresMin: 2 - class: DockerRequirement - dockerPull: 'biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/biometrics_genotype_0.2.4/README.md b/biometrics_genotype_0.2.4/README.md index fede021c..fdc50c9a 100644 --- a/biometrics_genotype_0.2.4/README.md +++ b/biometrics_genotype_0.2.4/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.4 | | ## CWL diff --git a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl index c430dd0e..23cf2a96 100644 --- a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl +++ b/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl @@ -121,10 +121,10 @@ outputs: } requirements: - class: ResourceRequirement - ramMin: 8000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/biometrics_major_0.2.4/README.md b/biometrics_major_0.2.4/README.md index c8ca3164..8d4bfbe2 100644 --- a/biometrics_major_0.2.4/README.md +++ b/biometrics_major_0.2.4/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.4 | | ## CWL diff --git a/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl b/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl index 8a111d9a..32b2d638 100644 --- a/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl +++ b/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl @@ -95,10 +95,10 @@ outputs: } requirements: - class: ResourceRequirement - ramMin: 8000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/biometrics_minor_0.2.4/README.md b/biometrics_minor_0.2.4/README.md index 4a1cf24c..1502c10f 100644 --- a/biometrics_minor_0.2.4/README.md +++ b/biometrics_minor_0.2.4/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.4 | | ## CWL diff --git a/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl b/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl index 78a3a13b..54070fe1 100644 --- a/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl +++ b/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl @@ -102,10 +102,10 @@ outputs: } requirements: - class: ResourceRequirement - ramMin: 8000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/biometrics_sexmismatch_0.2.4/README.md b/biometrics_sexmismatch_0.2.4/README.md index bee53cb7..8034fd04 100644 --- a/biometrics_sexmismatch_0.2.4/README.md +++ b/biometrics_sexmismatch_0.2.4/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.4 | | ## CWL diff --git a/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl b/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl index ccda3b88..add1399d 100644 --- a/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl +++ b/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl @@ -81,10 +81,10 @@ outputs: } requirements: - class: ResourceRequirement - ramMin: 8000 + ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 772e6a1576340ffc39cbcc9a9b996e3c77e291ec Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 22:51:36 -0400 Subject: [PATCH 331/476] update 0.2.5 biometrics --- .../README.md | 6 +++--- .../biometrics_extract_0.2.5.cwl | 4 ++-- .../example_inputs.yaml | 0 .../README.md | 6 +++--- .../biometrics_genotype_0.2.5.cwl | 4 ++-- .../example_inputs.yaml | 4 ++-- .../README.md | 6 +++--- .../biometrics_major_0.2.5.cwl | 4 ++-- .../example_inputs.yaml | 4 ++-- .../README.md | 6 +++--- .../biometrics_minor_0.2.5.cwl | 4 ++-- .../example_inputs.yaml | 4 ++-- .../README.md | 6 +++--- .../biometrics_sexmismatch_0.2.5.cwl | 4 ++-- .../example_inputs.yaml | 4 ++-- docs/SUMMARY.md | 10 +++++----- 16 files changed, 38 insertions(+), 38 deletions(-) rename {biometrics_extract_0.2.4 => biometrics_extract_0.2.5}/README.md (93%) rename biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl => biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl (97%) rename {biometrics_extract_0.2.4 => biometrics_extract_0.2.5}/example_inputs.yaml (100%) rename {biometrics_genotype_0.2.4 => biometrics_genotype_0.2.5}/README.md (91%) rename biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl => biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl (97%) rename {biometrics_genotype_0.2.4 => biometrics_genotype_0.2.5}/example_inputs.yaml (62%) rename {biometrics_major_0.2.4 => biometrics_major_0.2.5}/README.md (90%) rename biometrics_major_0.2.4/biometrics_major_0.2.4.cwl => biometrics_major_0.2.5/biometrics_major_0.2.5.cwl (97%) rename {biometrics_major_0.2.4 => biometrics_major_0.2.5}/example_inputs.yaml (58%) rename {biometrics_minor_0.2.4 => biometrics_minor_0.2.5}/README.md (90%) rename biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl => biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl (97%) rename {biometrics_minor_0.2.4 => biometrics_minor_0.2.5}/example_inputs.yaml (58%) rename {biometrics_sexmismatch_0.2.4 => biometrics_sexmismatch_0.2.5}/README.md (91%) rename biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl => biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl (97%) rename {biometrics_sexmismatch_0.2.4 => biometrics_sexmismatch_0.2.5}/example_inputs.yaml (56%) diff --git a/biometrics_extract_0.2.4/README.md b/biometrics_extract_0.2.5/README.md similarity index 93% rename from biometrics_extract_0.2.4/README.md rename to biometrics_extract_0.2.5/README.md index d323014f..fe6a70dc 100644 --- a/biometrics_extract_0.2.4/README.md +++ b/biometrics_extract_0.2.5/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.5 | | ## CWL @@ -11,13 +11,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_extract_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_extract_0.2.5.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_extract_0.2.4.cwl [-h] [--sample_bam SAMPLE_BAM] +usage: biometrics_extract_0.2.5.cwl [-h] [--sample_bam SAMPLE_BAM] [--sample_type SAMPLE_TYPE] [--sample_sex SAMPLE_SEX] [--sample_group SAMPLE_GROUP] diff --git a/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl similarity index 97% rename from biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl rename to biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl index 893f7a2c..c8a51c39 100644 --- a/biometrics_extract_0.2.4/biometrics_extract_0.2.4.cwl +++ b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl @@ -133,7 +133,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -152,4 +152,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.4 + 'doap:revision': 0.2.5 diff --git a/biometrics_extract_0.2.4/example_inputs.yaml b/biometrics_extract_0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_extract_0.2.4/example_inputs.yaml rename to biometrics_extract_0.2.5/example_inputs.yaml diff --git a/biometrics_genotype_0.2.4/README.md b/biometrics_genotype_0.2.5/README.md similarity index 91% rename from biometrics_genotype_0.2.4/README.md rename to biometrics_genotype_0.2.5/README.md index fdc50c9a..00e42c59 100644 --- a/biometrics_genotype_0.2.4/README.md +++ b/biometrics_genotype_0.2.5/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.5 | | ## CWL @@ -11,13 +11,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_genotype_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_genotype_0.2.5.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_genotype_0.2.4.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_genotype_0.2.5.cwl [-h] --input INPUT [--database DATABASE] [--discordance_threshold DISCORDANCE_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] [--threads THREADS] diff --git a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl b/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl similarity index 97% rename from biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl rename to biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl index 23cf2a96..7e8c4d5e 100644 --- a/biometrics_genotype_0.2.4/biometrics_genotype_0.2.4.cwl +++ b/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl @@ -124,7 +124,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -143,4 +143,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.4 + 'doap:revision': 0.2.5 diff --git a/biometrics_genotype_0.2.4/example_inputs.yaml b/biometrics_genotype_0.2.5/example_inputs.yaml similarity index 62% rename from biometrics_genotype_0.2.4/example_inputs.yaml rename to biometrics_genotype_0.2.5/example_inputs.yaml index f8bb23ca..0bc68d94 100644 --- a/biometrics_genotype_0.2.4/example_inputs.yaml +++ b/biometrics_genotype_0.2.5/example_inputs.yaml @@ -1,8 +1,8 @@ input: - class: File - path: "../biometrics_extract_0.2.4/test.pk" + path: "../biometrics_extract_0.2.5/test.pk" - class: File - path: "../biometrics_extract_0.2.4/test2.pk" + path: "../biometrics_extract_0.2.5/test2.pk" database: null prefix: 'test' outdir: null diff --git a/biometrics_major_0.2.4/README.md b/biometrics_major_0.2.5/README.md similarity index 90% rename from biometrics_major_0.2.4/README.md rename to biometrics_major_0.2.5/README.md index 8d4bfbe2..89882940 100644 --- a/biometrics_major_0.2.4/README.md +++ b/biometrics_major_0.2.5/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.5 | | ## CWL @@ -11,13 +11,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_major_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_major_0.2.5.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_major_0.2.4.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_major_0.2.5.cwl [-h] --input INPUT [--database DATABASE] [--major_threshold MAJOR_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] diff --git a/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl b/biometrics_major_0.2.5/biometrics_major_0.2.5.cwl similarity index 97% rename from biometrics_major_0.2.4/biometrics_major_0.2.4.cwl rename to biometrics_major_0.2.5/biometrics_major_0.2.5.cwl index 32b2d638..217c9d96 100644 --- a/biometrics_major_0.2.4/biometrics_major_0.2.4.cwl +++ b/biometrics_major_0.2.5/biometrics_major_0.2.5.cwl @@ -98,7 +98,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +117,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.4 + 'doap:revision': 0.2.5 diff --git a/biometrics_major_0.2.4/example_inputs.yaml b/biometrics_major_0.2.5/example_inputs.yaml similarity index 58% rename from biometrics_major_0.2.4/example_inputs.yaml rename to biometrics_major_0.2.5/example_inputs.yaml index 598b63b3..da03de55 100644 --- a/biometrics_major_0.2.4/example_inputs.yaml +++ b/biometrics_major_0.2.5/example_inputs.yaml @@ -1,8 +1,8 @@ input: - class: File - path: "../biometrics_extract_0.2.4/test.pk" + path: "../biometrics_extract_0.2.5/test.pk" - class: File - path: "../biometrics_extract_0.2.4/test2.pk" + path: "../biometrics_extract_0.2.5/test2.pk" database: null minor_threshold: null prefix: null diff --git a/biometrics_minor_0.2.4/README.md b/biometrics_minor_0.2.5/README.md similarity index 90% rename from biometrics_minor_0.2.4/README.md rename to biometrics_minor_0.2.5/README.md index 1502c10f..ad71f05d 100644 --- a/biometrics_minor_0.2.4/README.md +++ b/biometrics_minor_0.2.5/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.5 | | ## CWL @@ -11,13 +11,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_minor_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_minor_0.2.5.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_minor_0.2.4.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_minor_0.2.5.cwl [-h] --input INPUT [--database DATABASE] [--minor_threshold MINOR_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] diff --git a/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl b/biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl similarity index 97% rename from biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl rename to biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl index 54070fe1..dc0410cb 100644 --- a/biometrics_minor_0.2.4/biometrics_minor_0.2.4.cwl +++ b/biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl @@ -105,7 +105,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +124,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.4 + 'doap:revision': 0.2.5 diff --git a/biometrics_minor_0.2.4/example_inputs.yaml b/biometrics_minor_0.2.5/example_inputs.yaml similarity index 58% rename from biometrics_minor_0.2.4/example_inputs.yaml rename to biometrics_minor_0.2.5/example_inputs.yaml index f80776ff..bddb4c72 100644 --- a/biometrics_minor_0.2.4/example_inputs.yaml +++ b/biometrics_minor_0.2.5/example_inputs.yaml @@ -1,8 +1,8 @@ input: - class: File - path: "../biometrics_extract_0.2.4/test.pk" + path: "../biometrics_extract_0.2.5/test.pk" - class: File - path: "../biometrics_extract_0.2.4/test2.pk" + path: "../biometrics_extract_0.2.5/test2.pk" database: null major_threshold: null prefix: null diff --git a/biometrics_sexmismatch_0.2.4/README.md b/biometrics_sexmismatch_0.2.5/README.md similarity index 91% rename from biometrics_sexmismatch_0.2.4/README.md rename to biometrics_sexmismatch_0.2.5/README.md index 8034fd04..36a800ca 100644 --- a/biometrics_sexmismatch_0.2.4/README.md +++ b/biometrics_sexmismatch_0.2.5/README.md @@ -2,7 +2,7 @@ | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.4 | | +| biometrics | 0.2.5 | | ## CWL @@ -11,13 +11,13 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_sexmismatch_0.2.4.cwl example_inputs.yaml + > toil-cwl-runner biometrics_sexmismatch_0.2.5.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_sexmismatch_0.2.4.cwl [-h] --input INPUT +usage: biometrics_sexmismatch_0.2.5.cwl [-h] --input INPUT [--database DATABASE] [--coverage_threshold COVERAGE_THRESHOLD] [--prefix PREFIX] [--json] diff --git a/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl b/biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl similarity index 97% rename from biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl rename to biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl index add1399d..bae28a19 100644 --- a/biometrics_sexmismatch_0.2.4/biometrics_sexmismatch_0.2.4.cwl +++ b/biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl @@ -84,7 +84,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.4' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +103,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.4 + 'doap:revision': 0.2.5 diff --git a/biometrics_sexmismatch_0.2.4/example_inputs.yaml b/biometrics_sexmismatch_0.2.5/example_inputs.yaml similarity index 56% rename from biometrics_sexmismatch_0.2.4/example_inputs.yaml rename to biometrics_sexmismatch_0.2.5/example_inputs.yaml index 3be7fbb0..60832e43 100644 --- a/biometrics_sexmismatch_0.2.4/example_inputs.yaml +++ b/biometrics_sexmismatch_0.2.5/example_inputs.yaml @@ -1,8 +1,8 @@ input: - class: File - path: "../biometrics_extract_0.2.4/test.pk" + path: "../biometrics_extract_0.2.5/test.pk" - class: File - path: "../biometrics_extract_0.2.4/test2.pk" + path: "../biometrics_extract_0.2.5/test2.pk" database: null coverage_threshold: null prefix: null diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index f0e42d1c..385c5c4b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -8,11 +8,11 @@ - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - Biometrics - - [extract v0.2.4](../biometrics_extract_0.2.4/README.md) - - [minor v0.2.4](../biometrics_minor_0.2.4/README.md) - - [major v0.2.4](../biometrics_major_0.2.4/README.md) - - [genotype v0.2.4](../biometrics_genotype_0.2.4/README.md) - - [sexmismatch v0.2.4](../biometrics_sexmismatch_0.2.4/README.md) + - [extract v0.2.5](../biometrics_extract_0.2.5/README.md) + - [minor v0.2.5](../biometrics_minor_0.2.5/README.md) + - [major v0.2.5](../biometrics_major_0.2.5/README.md) + - [genotype v0.2.5](../biometrics_genotype_0.2.5/README.md) + - [sexmismatch v0.2.5](../biometrics_sexmismatch_0.2.5/README.md) - Disambiguate - [v1.0.0](../disambiguate_1.0.0/README.md) - Fgbio From d1b7e69e1f938aedd4e7e5dd363ce35b36f03d14 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 26 Mar 2021 22:56:43 -0400 Subject: [PATCH 332/476] make database and bed not required --- biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl index c8a51c39..ce20c290 100644 --- a/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl +++ b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl @@ -64,14 +64,14 @@ inputs: doc: >- VCF file containing the SNPs to be queried. - id: bed_file - type: File + type: File? inputBinding: position: 0 prefix: --bed doc: >- BED file containing the intervals to be queried. - id: database - type: string + type: string? inputBinding: position: 0 prefix: --database From 582d81fb4c6dbf3b0f83ae4de6380e0ea17b55d0 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 29 Mar 2021 09:38:50 -0400 Subject: [PATCH 333/476] update biometrics readmes --- biometrics_extract_0.2.5/README.md | 6 ++++-- biometrics_genotype_0.2.5/README.md | 6 ++++-- biometrics_major_0.2.5/README.md | 6 ++++-- biometrics_minor_0.2.5/README.md | 6 ++++-- biometrics_sexmismatch_0.2.5/README.md | 6 ++++-- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/biometrics_extract_0.2.5/README.md b/biometrics_extract_0.2.5/README.md index fe6a70dc..a000da5c 100644 --- a/biometrics_extract_0.2.5/README.md +++ b/biometrics_extract_0.2.5/README.md @@ -1,8 +1,10 @@ -# CWL and Dockerfile for running biometrics extract tool. +# CWL for running biometrics extract tool. | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.5 | | + +The python package source code and Docker file are located on GitHub. ## CWL diff --git a/biometrics_genotype_0.2.5/README.md b/biometrics_genotype_0.2.5/README.md index 00e42c59..8134ea40 100644 --- a/biometrics_genotype_0.2.5/README.md +++ b/biometrics_genotype_0.2.5/README.md @@ -1,8 +1,10 @@ -# CWL and Dockerfile for running biometrics genotype tool. +# CWL for running biometrics genotype tool. | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.5 | | + +The python package source code and Docker file are located on GitHub. ## CWL diff --git a/biometrics_major_0.2.5/README.md b/biometrics_major_0.2.5/README.md index 89882940..23f0efc9 100644 --- a/biometrics_major_0.2.5/README.md +++ b/biometrics_major_0.2.5/README.md @@ -1,8 +1,10 @@ -# CWL and Dockerfile for running biometrics major tool. +# CWL for running biometrics major tool. | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.5 | | + +The python package source code and Docker file are located on GitHub. ## CWL diff --git a/biometrics_minor_0.2.5/README.md b/biometrics_minor_0.2.5/README.md index ad71f05d..2d991e76 100644 --- a/biometrics_minor_0.2.5/README.md +++ b/biometrics_minor_0.2.5/README.md @@ -1,8 +1,10 @@ -# CWL and Dockerfile for running biometrics minor tool. +# CWL for running biometrics minor tool. | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.5 | | + +The python package source code and Docker file are located on GitHub. ## CWL diff --git a/biometrics_sexmismatch_0.2.5/README.md b/biometrics_sexmismatch_0.2.5/README.md index 36a800ca..e3459b13 100644 --- a/biometrics_sexmismatch_0.2.5/README.md +++ b/biometrics_sexmismatch_0.2.5/README.md @@ -1,8 +1,10 @@ -# CWL and Dockerfile for running biometrics sexmismatch tool. +# CWL for running biometrics sexmismatch tool. | Tool | Version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.5 | | + +The python package source code and Docker file are located on GitHub. ## CWL From 0ba46070b86a1d797a92aee9d401a90ee9468572 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 8 Apr 2021 17:06:01 -0400 Subject: [PATCH 334/476] Create put_in_dir.cwl --- utilities_ubuntu_18.04/put_in_dir.cwl | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 utilities_ubuntu_18.04/put_in_dir.cwl diff --git a/utilities_ubuntu_18.04/put_in_dir.cwl b/utilities_ubuntu_18.04/put_in_dir.cwl new file mode 100644 index 00000000..bdab46fc --- /dev/null +++ b/utilities_ubuntu_18.04/put_in_dir.cwl @@ -0,0 +1,52 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 + +class: ExpressionTool +# class: CommandLineTool +id: put-in-dir + +requirements: + - class: InlineJavascriptRequirement + +inputs: + + output_directory_name: string + + files: + type: + type: array + items: + - File + - Directory + - string + - 'null' + + +outputs: + + directory: + type: Directory + +# This tool returns a Directory object, +# which holds all output files from the list +# of supplied input files +expression: | + ${ + var output_files = []; + var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); + + + for (var i = 0; i < inputs.files.length; i++) { + if(input_files[i]){ + output_files.push(inputs.files[i]); + } + } + + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': output_files + } + }; + } From bd8d864f68852555ab4c205faf600fbf250afa47 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 8 Apr 2021 17:08:28 -0400 Subject: [PATCH 335/476] Update put_in_dir.cwl --- utilities_ubuntu_18.04/put_in_dir.cwl | 40 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/utilities_ubuntu_18.04/put_in_dir.cwl b/utilities_ubuntu_18.04/put_in_dir.cwl index bdab46fc..eb3a398b 100644 --- a/utilities_ubuntu_18.04/put_in_dir.cwl +++ b/utilities_ubuntu_18.04/put_in_dir.cwl @@ -1,17 +1,18 @@ #!/usr/bin/env cwl-runner -cwlVersion: v1.0 +# originally from https://github.com/mskcc/pluto-cwl +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' class: ExpressionTool # class: CommandLineTool id: put-in-dir -requirements: - - class: InlineJavascriptRequirement - inputs: - output_directory_name: string - files: type: type: array @@ -21,9 +22,7 @@ inputs: - string - 'null' - outputs: - directory: type: Directory @@ -35,7 +34,6 @@ expression: | var output_files = []; var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); - for (var i = 0; i < inputs.files.length; i++) { if(input_files[i]){ output_files.push(inputs.files[i]); @@ -50,3 +48,27 @@ expression: | } }; } + +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': put_in_dir + 'doap:revision': 18.04 From 3b57ce40e90f75560022c39d898633c6688a4200 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 8 Apr 2021 21:08:45 -0400 Subject: [PATCH 336/476] add tool --- gatk_mean_quality_by_cycle/4.1.8.0/README.md | 76 +++++++ .../4.1.8.0/example_inputs.yaml | 21 ++ .../gatk_mean_quality_by_cycle_4.1.8.0.cwl | 195 ++++++++++++++++++ 3 files changed, 292 insertions(+) create mode 100644 gatk_mean_quality_by_cycle/4.1.8.0/README.md create mode 100644 gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml create mode 100644 gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/README.md b/gatk_mean_quality_by_cycle/4.1.8.0/README.md new file mode 100644 index 00000000..00c43f60 --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/README.md @@ -0,0 +1,76 @@ +# CWL for running GATK - MeanQualityByCycle + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_mean_quality_by_cycle_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl + [-h] --input INPUT [--output_file_name OUTPUT_FILE_NAME] + [--chart_output CHART_OUTPUT] [--assume_sorted] [--pf_reads_only] + [--reference REFERENCE] [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. + --chart_output CHART_OUTPUT + A file (with .pdf extension) to write the chart to. + --assume_sorted If true (default), then the sort order in the header + file will be ignored. + --pf_reads_only If set to true calculate mean quality over PF reads + only. Default value: false. Possible values: {true, + false} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..c45d0faa --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml @@ -0,0 +1,21 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +output_file_name: null +chart_output: null +validation_stringency: null +assume_sorted: null +pf_reads_only: null +reference: + class: File + metadata: {} + path: ref.fasta +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl new file mode 100644 index 00000000..6b0ce8d0 --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl @@ -0,0 +1,195 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_mean_quality_by_cycle_4_1_8_0 +baseCommand: + - gatk + - MeanQualityByCycle +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output_file_name + type: string? + doc: The output file to write the metrics to. + - id: chart_output + type: string? + doc: A file (with .pdf extension) to write the chart to. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: '--ASSUME_SORTED' + doc: | + If true (default), then the sort order in the header file will be ignored. + - id: pf_reads_only + type: boolean? + inputBinding: + position: 1 + prefix: '--PF_READS_ONLY' + doc: | + If set to true calculate mean quality over PF reads only. Default value: false. Possible values: {true, false} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_mean_quality_by_cycle_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - id: gatk_mean_quality_by_cycle_chart_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } +label: GATK-MeanQualityByCycle +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - position: 0 + prefix: '--CHART_OUTPUT' + valueFrom: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } + +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From 8642c3ca4dd03ea33c86c0e1ce3a484810af4e28 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 8 Apr 2021 21:14:15 -0400 Subject: [PATCH 337/476] update docs --- docs/SUMMARY.md | 1 + gatk_mean_quality_by_cycle/{4.1.8.0 => }/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) rename gatk_mean_quality_by_cycle/{4.1.8.0 => }/README.md (96%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 385c5c4b..6f8e60e1 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -27,6 +27,7 @@ - [ApplyBQSR v4.1.8.1](../gatk_apply_bqsr_4.1.8.1/README.md) - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - [BaseRecalibrator v4.1.8.1](../gatk_base_recalibrator_4.1.8.1/README.md) + - [MeanQualityByCycle](../gatk_mean_quality_by_cycle/README.md) - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/README.md b/gatk_mean_quality_by_cycle/README.md similarity index 96% rename from gatk_mean_quality_by_cycle/4.1.8.0/README.md rename to gatk_mean_quality_by_cycle/README.md index 00c43f60..53b29701 100644 --- a/gatk_mean_quality_by_cycle/4.1.8.0/README.md +++ b/gatk_mean_quality_by_cycle/README.md @@ -20,7 +20,7 @@ ### Usage ```bash -usage: ./GitHub/cwl-commandlinetools/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl +usage: gatk_mean_quality_by_cycle_4.1.8.0.cwl [-h] --input INPUT [--output_file_name OUTPUT_FILE_NAME] [--chart_output CHART_OUTPUT] [--assume_sorted] [--pf_reads_only] [--reference REFERENCE] [--validation_stringency VALIDATION_STRINGENCY] From cbf75cbc82f44699f243f821657fa3e3c174fe6f Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 9 Apr 2021 14:25:23 -0400 Subject: [PATCH 338/476] extract output array, make some inputs optional --- .../biometrics_extract_0.2.5.cwl | 69 ++++++++++++------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl index ce20c290..0abf2aec 100644 --- a/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl +++ b/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl @@ -11,40 +11,53 @@ baseCommand: - extract inputs: - id: sample_bam - type: File? - inputBinding: - position: 0 - prefix: --sample-bam + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --sample-bam secondaryFiles: - ^.bai doc: >- BAM file. - id: sample_type - type: string? - inputBinding: - position: 0 - prefix: --sample-type + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-type doc: >- Sample types: Normal or Tumor. - id: sample_sex - type: string? - inputBinding: - position: 0 - prefix: --sample-sex + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-sex doc: >- Expected sample sex (i.e. M or F). - id: sample_group - type: string? - inputBinding: - position: 0 - prefix: --sample-group + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-group doc: >- The sample group (e.g. the sample patient ID). - id: sample_name - type: string? - inputBinding: - position: 0 - prefix: --sample-name + type: + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-name doc: >- Sample name. If not specified, sample name is automatically figured out from the BAM file. - id: fafile @@ -118,15 +131,19 @@ inputs: Default genotype if coverage is too low (options are Het or Hom). outputs: - id: biometrics_extract_pickle - type: File + type: + type: array + items: File outputBinding: glob: |- ${ - if (inputs.database) { - return inputs.database + '/' + inputs.sample_name + '.pk' - } else { - return inputs.sample_name + '.pk' - } + return inputs.sample_name.map(val => { + if (inputs.database) { + return inputs.database + '/' + val + '.pk'; + } else { + return val + '.pk'; + } + }); } requirements: - class: ResourceRequirement From 05d6eec2275da670e53b9f98456810b9704a02e2 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 9 Apr 2021 14:25:59 -0400 Subject: [PATCH 339/476] genotype input array --- biometrics_extract_0.2.5/example_inputs.yaml | 22 ++++++++++--------- .../biometrics_genotype_0.2.5.cwl | 11 +++++----- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/biometrics_extract_0.2.5/example_inputs.yaml b/biometrics_extract_0.2.5/example_inputs.yaml index 17567c06..566b496a 100644 --- a/biometrics_extract_0.2.5/example_inputs.yaml +++ b/biometrics_extract_0.2.5/example_inputs.yaml @@ -1,16 +1,18 @@ -sample_type: Tumor -sample_sex: M -sample_name: test -sample_group: test_patient +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" fafile: class: File - path: /path/to/fafile + path: /path/to/fasta sample_bam: - class: File - path: /path/to/bam -bed_file: - class: File - path: /path/to/bed + - class: File + path: /path/to/bam +bed_file: null vcf_file: class: File path: /path/to/vcf diff --git a/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl b/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl index 7e8c4d5e..51285328 100644 --- a/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl +++ b/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl @@ -12,12 +12,11 @@ baseCommand: inputs: - id: input type: - type: array - items: File - inputBinding: - prefix: --input - inputBinding: - position: 0 + - type: array + items: File + inputBinding: + position: 0 + prefix: --input doc: >- Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. - id: database From 9da841af2e48aa566bf9fedd96e7d283bbc732bd Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 9 Apr 2021 16:01:20 -0400 Subject: [PATCH 340/476] reorganize biometrics, get latest version --- .../0.2.5/biometrics_extract.cwl | 0 .../0.2.5}/example_inputs.yaml | 0 .../0.2.7/biometrics_extract.cwl | 172 ++++++++++++++++++ biometrics_extract/0.2.7/example_inputs.yaml | 24 +++ .../README.md | 8 +- .../0.2.5/biometrics_genotype.cwl | 0 .../0.2.5}/example_inputs.yaml | 0 .../0.2.7/biometrics_genotype.cwl | 145 +++++++++++++++ biometrics_genotype/0.2.7/example_inputs.yaml | 12 ++ .../README.md | 8 +- .../0.2.5/biometrics_major.cwl | 0 .../0.2.5}/example_inputs.yaml | 0 biometrics_major/0.2.7/biometrics_major.cwl | 120 ++++++++++++ biometrics_major/0.2.7/example_inputs.yaml | 11 ++ .../README.md | 8 +- .../0.2.5/biometrics_minor.cwl | 0 .../0.2.5}/example_inputs.yaml | 0 biometrics_minor/0.2.7/biometrics_minor.cwl | 127 +++++++++++++ biometrics_minor/0.2.7/example_inputs.yaml | 11 ++ .../README.md | 8 +- .../0.2.5/biometrics_sexmismatch.cwl | 0 .../0.2.5}/example_inputs.yaml | 0 .../0.2.7/biometrics_sexmismatch.cwl | 106 +++++++++++ .../0.2.7/example_inputs.yaml | 10 + .../README.md | 8 +- docs/SUMMARY.md | 10 +- 26 files changed, 763 insertions(+), 25 deletions(-) rename biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl => biometrics_extract/0.2.5/biometrics_extract.cwl (100%) rename {biometrics_extract_0.2.5 => biometrics_extract/0.2.5}/example_inputs.yaml (100%) create mode 100644 biometrics_extract/0.2.7/biometrics_extract.cwl create mode 100644 biometrics_extract/0.2.7/example_inputs.yaml rename {biometrics_extract_0.2.5 => biometrics_extract}/README.md (92%) rename biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl => biometrics_genotype/0.2.5/biometrics_genotype.cwl (100%) rename {biometrics_genotype_0.2.5 => biometrics_genotype/0.2.5}/example_inputs.yaml (100%) create mode 100644 biometrics_genotype/0.2.7/biometrics_genotype.cwl create mode 100644 biometrics_genotype/0.2.7/example_inputs.yaml rename {biometrics_genotype_0.2.5 => biometrics_genotype}/README.md (89%) rename biometrics_major_0.2.5/biometrics_major_0.2.5.cwl => biometrics_major/0.2.5/biometrics_major.cwl (100%) rename {biometrics_major_0.2.5 => biometrics_major/0.2.5}/example_inputs.yaml (100%) create mode 100644 biometrics_major/0.2.7/biometrics_major.cwl create mode 100644 biometrics_major/0.2.7/example_inputs.yaml rename {biometrics_major_0.2.5 => biometrics_major}/README.md (88%) rename biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl => biometrics_minor/0.2.5/biometrics_minor.cwl (100%) rename {biometrics_minor_0.2.5 => biometrics_minor/0.2.5}/example_inputs.yaml (100%) create mode 100644 biometrics_minor/0.2.7/biometrics_minor.cwl create mode 100644 biometrics_minor/0.2.7/example_inputs.yaml rename {biometrics_minor_0.2.5 => biometrics_minor}/README.md (88%) rename biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl => biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl (100%) rename {biometrics_sexmismatch_0.2.5 => biometrics_sexmismatch/0.2.5}/example_inputs.yaml (100%) create mode 100644 biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl create mode 100644 biometrics_sexmismatch/0.2.7/example_inputs.yaml rename {biometrics_sexmismatch_0.2.5 => biometrics_sexmismatch}/README.md (89%) diff --git a/biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl b/biometrics_extract/0.2.5/biometrics_extract.cwl similarity index 100% rename from biometrics_extract_0.2.5/biometrics_extract_0.2.5.cwl rename to biometrics_extract/0.2.5/biometrics_extract.cwl diff --git a/biometrics_extract_0.2.5/example_inputs.yaml b/biometrics_extract/0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_extract_0.2.5/example_inputs.yaml rename to biometrics_extract/0.2.5/example_inputs.yaml diff --git a/biometrics_extract/0.2.7/biometrics_extract.cwl b/biometrics_extract/0.2.7/biometrics_extract.cwl new file mode 100644 index 00000000..d8a4b44b --- /dev/null +++ b/biometrics_extract/0.2.7/biometrics_extract.cwl @@ -0,0 +1,172 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --sample-bam + secondaryFiles: + - ^.bai + doc: >- + BAM file. + - id: sample_type + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-type + doc: >- + Sample types: Normal or Tumor. + - id: sample_sex + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-sex + doc: >- + Expected sample sex (i.e. M or F). + - id: sample_group + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-group + doc: >- + The sample group (e.g. the sample patient ID). + - id: sample_name + type: + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-name + doc: >- + Sample name. If not specified, sample name is automatically figured out from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: --fafile + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta. + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: --vcf + doc: >- + VCF file containing the SNPs to be queried. + - id: bed_file + type: File? + inputBinding: + position: 0 + prefix: --bed + doc: >- + BED file containing the intervals to be queried. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: min_mapping_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-mapping-quality + doc: >- + Minimum mapping quality of reads to be used for pileup. + - id: min_base_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-base-quality + doc: >- + Minimum base quality of reads to be used for pileup. + - id: min_coverage + type: int? + default: 10 + inputBinding: + position: 0 + prefix: --min-coverage + doc: >- + Minimum coverage to count a site. + - id: min_homozygous_thresh + type: float? + default: 0.1 + inputBinding: + position: 0 + prefix: --min-homozygous-thresh + doc: >- + Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: --default-genotype + doc: >- + Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: + type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.sample_name.map(val => { + if (inputs.database) { + return inputs.database + '/' + val + '.pk'; + } else { + return val + '.pk'; + } + }); + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.7 diff --git a/biometrics_extract/0.2.7/example_inputs.yaml b/biometrics_extract/0.2.7/example_inputs.yaml new file mode 100644 index 00000000..566b496a --- /dev/null +++ b/biometrics_extract/0.2.7/example_inputs.yaml @@ -0,0 +1,24 @@ +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" +fafile: + class: File + path: /path/to/fasta +sample_bam: + - class: File + path: /path/to/bam +bed_file: null +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_extract_0.2.5/README.md b/biometrics_extract/README.md similarity index 92% rename from biometrics_extract_0.2.5/README.md rename to biometrics_extract/README.md index a000da5c..3475e69b 100644 --- a/biometrics_extract_0.2.5/README.md +++ b/biometrics_extract/README.md @@ -1,8 +1,8 @@ # CWL for running biometrics extract tool. -| Tool | Version | Location | +| Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.7 | | The python package source code and Docker file are located on GitHub. @@ -13,13 +13,13 @@ The python package source code and Docker file are located on GitHub. - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_extract_0.2.5.cwl example_inputs.yaml + > toil-cwl-runner biometrics_extract.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_extract_0.2.5.cwl [-h] [--sample_bam SAMPLE_BAM] +usage: biometrics_extract.cwl [-h] [--sample_bam SAMPLE_BAM] [--sample_type SAMPLE_TYPE] [--sample_sex SAMPLE_SEX] [--sample_group SAMPLE_GROUP] diff --git a/biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl b/biometrics_genotype/0.2.5/biometrics_genotype.cwl similarity index 100% rename from biometrics_genotype_0.2.5/biometrics_genotype_0.2.5.cwl rename to biometrics_genotype/0.2.5/biometrics_genotype.cwl diff --git a/biometrics_genotype_0.2.5/example_inputs.yaml b/biometrics_genotype/0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_genotype_0.2.5/example_inputs.yaml rename to biometrics_genotype/0.2.5/example_inputs.yaml diff --git a/biometrics_genotype/0.2.7/biometrics_genotype.cwl b/biometrics_genotype/0.2.7/biometrics_genotype.cwl new file mode 100644 index 00000000..5808f7b9 --- /dev/null +++ b/biometrics_genotype/0.2.7/biometrics_genotype.cwl @@ -0,0 +1,145 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_genotype +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --input + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: discordance_threshold + type: float? + default: 0.05 + inputBinding: + position: 0 + prefix: --discordance-threshold + doc: >- + Discordance values less than this are regarded as matching samples. (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + - id: threads + type: int? + default: 2 + inputBinding: + position: 0 + prefix: --threads + doc: >- + Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.7 diff --git a/biometrics_genotype/0.2.7/example_inputs.yaml b/biometrics_genotype/0.2.7/example_inputs.yaml new file mode 100644 index 00000000..0bc68d94 --- /dev/null +++ b/biometrics_genotype/0.2.7/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_genotype_0.2.5/README.md b/biometrics_genotype/README.md similarity index 89% rename from biometrics_genotype_0.2.5/README.md rename to biometrics_genotype/README.md index 8134ea40..318955cf 100644 --- a/biometrics_genotype_0.2.5/README.md +++ b/biometrics_genotype/README.md @@ -1,8 +1,8 @@ # CWL for running biometrics genotype tool. -| Tool | Version | Location | +| Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.7 | | The python package source code and Docker file are located on GitHub. @@ -13,13 +13,13 @@ The python package source code and Docker file are located on GitHub. - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_genotype_0.2.5.cwl example_inputs.yaml + > toil-cwl-runner biometrics_genotype.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_genotype_0.2.5.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_genotype.cwl [-h] --input INPUT [--database DATABASE] [--discordance_threshold DISCORDANCE_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] [--threads THREADS] diff --git a/biometrics_major_0.2.5/biometrics_major_0.2.5.cwl b/biometrics_major/0.2.5/biometrics_major.cwl similarity index 100% rename from biometrics_major_0.2.5/biometrics_major_0.2.5.cwl rename to biometrics_major/0.2.5/biometrics_major.cwl diff --git a/biometrics_major_0.2.5/example_inputs.yaml b/biometrics_major/0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_major_0.2.5/example_inputs.yaml rename to biometrics_major/0.2.5/example_inputs.yaml diff --git a/biometrics_major/0.2.7/biometrics_major.cwl b/biometrics_major/0.2.7/biometrics_major.cwl new file mode 100644 index 00000000..6730dfb0 --- /dev/null +++ b/biometrics_major/0.2.7/biometrics_major.cwl @@ -0,0 +1,120 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: major_threshold + type: float? + default: 0.6 + inputBinding: + position: 0 + prefix: --major-threshold + doc: >- + Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.7 diff --git a/biometrics_major/0.2.7/example_inputs.yaml b/biometrics_major/0.2.7/example_inputs.yaml new file mode 100644 index 00000000..da03de55 --- /dev/null +++ b/biometrics_major/0.2.7/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_major_0.2.5/README.md b/biometrics_major/README.md similarity index 88% rename from biometrics_major_0.2.5/README.md rename to biometrics_major/README.md index 23f0efc9..9f6d0343 100644 --- a/biometrics_major_0.2.5/README.md +++ b/biometrics_major/README.md @@ -1,8 +1,8 @@ # CWL for running biometrics major tool. -| Tool | Version | Location | +| Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.7 | | The python package source code and Docker file are located on GitHub. @@ -13,13 +13,13 @@ The python package source code and Docker file are located on GitHub. - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_major_0.2.5.cwl example_inputs.yaml + > toil-cwl-runner biometrics_major.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_major_0.2.5.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_major.cwl [-h] --input INPUT [--database DATABASE] [--major_threshold MAJOR_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] diff --git a/biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl b/biometrics_minor/0.2.5/biometrics_minor.cwl similarity index 100% rename from biometrics_minor_0.2.5/biometrics_minor_0.2.5.cwl rename to biometrics_minor/0.2.5/biometrics_minor.cwl diff --git a/biometrics_minor_0.2.5/example_inputs.yaml b/biometrics_minor/0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_minor_0.2.5/example_inputs.yaml rename to biometrics_minor/0.2.5/example_inputs.yaml diff --git a/biometrics_minor/0.2.7/biometrics_minor.cwl b/biometrics_minor/0.2.7/biometrics_minor.cwl new file mode 100644 index 00000000..67857984 --- /dev/null +++ b/biometrics_minor/0.2.7/biometrics_minor.cwl @@ -0,0 +1,127 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: minor_threshold + type: float? + default: 0.002 + inputBinding: + position: 0 + prefix: --minor-threshold + doc: >- + Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.7 diff --git a/biometrics_minor/0.2.7/example_inputs.yaml b/biometrics_minor/0.2.7/example_inputs.yaml new file mode 100644 index 00000000..bddb4c72 --- /dev/null +++ b/biometrics_minor/0.2.7/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor_0.2.5/README.md b/biometrics_minor/README.md similarity index 88% rename from biometrics_minor_0.2.5/README.md rename to biometrics_minor/README.md index 2d991e76..e014845e 100644 --- a/biometrics_minor_0.2.5/README.md +++ b/biometrics_minor/README.md @@ -1,8 +1,8 @@ # CWL for running biometrics minor tool. -| Tool | Version | Location | +| Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.7 | | The python package source code and Docker file are located on GitHub. @@ -13,13 +13,13 @@ The python package source code and Docker file are located on GitHub. - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_minor_0.2.5.cwl example_inputs.yaml + > toil-cwl-runner biometrics_minor.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_minor_0.2.5.cwl [-h] --input INPUT [--database DATABASE] +usage: biometrics_minor.cwl [-h] --input INPUT [--database DATABASE] [--minor_threshold MINOR_THRESHOLD] [--prefix PREFIX] [--plot] [--json] [--no_db_comparison] diff --git a/biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl similarity index 100% rename from biometrics_sexmismatch_0.2.5/biometrics_sexmismatch_0.2.5.cwl rename to biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl diff --git a/biometrics_sexmismatch_0.2.5/example_inputs.yaml b/biometrics_sexmismatch/0.2.5/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch_0.2.5/example_inputs.yaml rename to biometrics_sexmismatch/0.2.5/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl new file mode 100644 index 00000000..80ef0363 --- /dev/null +++ b/biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl @@ -0,0 +1,106 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: coverage_threshold + type: int? + default: 50 + inputBinding: + position: 0 + prefix: --coverage-threshold + doc: >- + Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.7 diff --git a/biometrics_sexmismatch/0.2.7/example_inputs.yaml b/biometrics_sexmismatch/0.2.7/example_inputs.yaml new file mode 100644 index 00000000..60832e43 --- /dev/null +++ b/biometrics_sexmismatch/0.2.7/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch_0.2.5/README.md b/biometrics_sexmismatch/README.md similarity index 89% rename from biometrics_sexmismatch_0.2.5/README.md rename to biometrics_sexmismatch/README.md index e3459b13..f028957b 100644 --- a/biometrics_sexmismatch_0.2.5/README.md +++ b/biometrics_sexmismatch/README.md @@ -1,8 +1,8 @@ # CWL for running biometrics sexmismatch tool. -| Tool | Version | Location | +| Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.5 | | +| biometrics | 0.2.7 | | The python package source code and Docker file are located on GitHub. @@ -13,13 +13,13 @@ The python package source code and Docker file are located on GitHub. - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner biometrics_sexmismatch_0.2.5.cwl example_inputs.yaml + > toil-cwl-runner biometrics_sexmismatch.cwl example_inputs.yaml ``` ### Usage ```bash -usage: biometrics_sexmismatch_0.2.5.cwl [-h] --input INPUT +usage: biometrics_sexmismatch.cwl [-h] --input INPUT [--database DATABASE] [--coverage_threshold COVERAGE_THRESHOLD] [--prefix PREFIX] [--json] diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 385c5c4b..4587aff4 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -8,11 +8,11 @@ - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - Biometrics - - [extract v0.2.5](../biometrics_extract_0.2.5/README.md) - - [minor v0.2.5](../biometrics_minor_0.2.5/README.md) - - [major v0.2.5](../biometrics_major_0.2.5/README.md) - - [genotype v0.2.5](../biometrics_genotype_0.2.5/README.md) - - [sexmismatch v0.2.5](../biometrics_sexmismatch_0.2.5/README.md) + - [extract](../biometrics_extract/README.md) + - [minor](../biometrics_minor/README.md) + - [major](../biometrics_major/README.md) + - [genotype](../biometrics_genotype/README.md) + - [sexmismatch](../biometrics_sexmismatch/README.md) - Disambiguate - [v1.0.0](../disambiguate_1.0.0/README.md) - Fgbio From 908578ebd960b77ef09f5d26ac7ce720a9af8404 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 12 Apr 2021 11:37:35 -0400 Subject: [PATCH 341/476] move put in dir --- expression_tools/README.md | 17 ++++++++ expression_tools/put_in_dir.cwl | 70 +++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 expression_tools/README.md create mode 100644 expression_tools/put_in_dir.cwl diff --git a/expression_tools/README.md b/expression_tools/README.md new file mode 100644 index 00000000..f0c79bf1 --- /dev/null +++ b/expression_tools/README.md @@ -0,0 +1,17 @@ +# CWL Expression tools + +## Available tools + +| Tool | Description | +| -------- | ------------------------ | +| put_in_dir.cwl | put the list of files into the same directory | + +## CWL + +- CWL specification 1.0 +- Use example_inputs_toolname.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner put_in_dir.cwl example_inputs_gzip.yaml +``` diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl new file mode 100644 index 00000000..bbf5889a --- /dev/null +++ b/expression_tools/put_in_dir.cwl @@ -0,0 +1,70 @@ +#!/usr/bin/env cwl-runner +# originally from https://github.com/mskcc/pluto-cwl + +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +class: ExpressionTool +# class: CommandLineTool +id: put-in-dir + +inputs: + output_directory_name: string + files: + type: + type: array + items: + - File + - Directory + - string + - 'null' + +outputs: + directory: + type: Directory + +# This tool returns a Directory object, +# which holds all output files from the list +# of supplied input files +expression: | + ${ + var output_files = []; + var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); + + for (var i = 0; i < inputs.files.length; i++) { + if(input_files[i]){ + output_files.push(inputs.files[i]); + } + } + + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': output_files + } + }; + } + +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center From e0a71fabb160827ffbf9b8b1ce7b060b65483036 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 12 Apr 2021 11:39:08 -0400 Subject: [PATCH 342/476] update ram and cores --- .../4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl index 6b0ce8d0..177ad27e 100644 --- a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl +++ b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl @@ -133,10 +133,10 @@ arguments: } } else if(!inputs.memory_per_job && inputs.memory_overhead){ - return "-Xmx15G" + return "-Xmx14G" } else { - return "-Xmx15G" + return "-Xmx14G" } } - position: 0 @@ -170,8 +170,8 @@ arguments: requirements: - class: ResourceRequirement - ramMin: 32000 - coresMin: 1 + ramMin: 16000 + coresMin: 2 - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' - class: InlineJavascriptRequirement From 69ac3170544709a34e00385096353c6c5a19c829 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 12 Apr 2021 11:46:00 -0400 Subject: [PATCH 343/476] Update example_inputs.yaml --- gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml index c45d0faa..11eb7e0d 100644 --- a/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml +++ b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml @@ -2,9 +2,6 @@ input: class: File metadata: {} path: "/path/to/bam" - secondaryFiles: - - class: File - path: "/path/to/bam.bai" output_file_name: null chart_output: null validation_stringency: null From c3c4cc0159015a3a5a828e31cad7e6eb137e30d4 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 13 Apr 2021 16:07:31 -0400 Subject: [PATCH 344/476] Update gatk_mean_quality_by_cycle_4.1.8.0.cwl --- .../4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl index 177ad27e..14044a27 100644 --- a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl +++ b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl @@ -86,7 +86,8 @@ inputs: type: int? - id: temporary_directory type: string? - doc: 'Default value: null. This option may be specified 0 or more times.' + doc: >- + Directory with space available to be used by this program for temporary storage of working files. outputs: - id: gatk_mean_quality_by_cycle_output type: File @@ -143,9 +144,10 @@ arguments: prefix: '--TMP_DIR' valueFrom: |- ${ - if(inputs.temporary_directory) - return inputs.temporary_directory; - return runtime.tmpdir + if(inputs.temporary_directory) { + return inputs.temporary_directory; + } + return runtime.tmpdir; } - position: 0 prefix: '-O' From 93d8296514aefb7d4a93e09feeb583a9fb3044de Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 13 Apr 2021 16:19:49 -0400 Subject: [PATCH 345/476] fix for loop over input_files --- expression_tools/put_in_dir.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index bbf5889a..d91300ba 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -34,9 +34,9 @@ expression: | var output_files = []; var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); - for (var i = 0; i < inputs.files.length; i++) { + for (var i = 0; i < input_files.length; i++) { if(input_files[i]){ - output_files.push(inputs.files[i]); + output_files.push(input_files[i]); } } From 52c07a24f48b9fdbf559a0e2b60fbdf6b82ba07b Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 13 Apr 2021 16:28:02 -0400 Subject: [PATCH 346/476] remove string --- expression_tools/put_in_dir.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index d91300ba..74e4f98b 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -19,7 +19,6 @@ inputs: items: - File - Directory - - string - 'null' outputs: From 3b58f41c5234ae10249ee92f8c1d27ae68345e47 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 13 Apr 2021 21:31:09 -0400 Subject: [PATCH 347/476] change to es5 --- expression_tools/put_in_dir.cwl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index 74e4f98b..9d1636b4 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -31,7 +31,9 @@ outputs: expression: | ${ var output_files = []; - var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); + var input_files = inputs.files.filter(function(single_file) { + return String(single_file).toUpperCase() != 'NONE'; + }); for (var i = 0; i < input_files.length; i++) { if(input_files[i]){ From d559c5d908ca18c2b7821ef3c65a12d18cf78813 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 14 Apr 2021 10:58:26 -0400 Subject: [PATCH 348/476] Delete put_in_dir.cwl --- utilities_ubuntu_18.04/put_in_dir.cwl | 74 --------------------------- 1 file changed, 74 deletions(-) delete mode 100644 utilities_ubuntu_18.04/put_in_dir.cwl diff --git a/utilities_ubuntu_18.04/put_in_dir.cwl b/utilities_ubuntu_18.04/put_in_dir.cwl deleted file mode 100644 index eb3a398b..00000000 --- a/utilities_ubuntu_18.04/put_in_dir.cwl +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env cwl-runner -# originally from https://github.com/mskcc/pluto-cwl - -cwlVersion: v1.0 -$namespaces: - dct: 'http://purl.org/dc/terms/' - doap: 'http://usefulinc.com/ns/doap#' - foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' -class: ExpressionTool -# class: CommandLineTool -id: put-in-dir - -inputs: - output_directory_name: string - files: - type: - type: array - items: - - File - - Directory - - string - - 'null' - -outputs: - directory: - type: Directory - -# This tool returns a Directory object, -# which holds all output files from the list -# of supplied input files -expression: | - ${ - var output_files = []; - var input_files = inputs.files.filter(single_file => String(single_file).toUpperCase() != 'NONE'); - - for (var i = 0; i < inputs.files.length; i++) { - if(input_files[i]){ - output_files.push(inputs.files[i]); - } - } - - return { - 'directory': { - 'class': 'Directory', - 'basename': inputs.output_directory_name, - 'listing': output_files - } - }; - } - -requirements: - - class: ResourceRequirement - ramMin: 2000 - coresMin: 1 - - class: InlineJavascriptRequirement -'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:murphyc4@mskcc.org' - 'foaf:name': Charlie Murphy - 'foaf:name': Memorial Sloan Kettering Cancer Center -'dct:creator': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:murphyc4@mskcc.org' - 'foaf:name': Charlie Murphy - 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': put_in_dir - 'doap:revision': 18.04 From 189fd9558978b6ad3c65e3fcfe854c6b2b27aab4 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 14 Apr 2021 16:58:56 -0400 Subject: [PATCH 349/476] update to 0.2.8 --- .../biometrics_extract_0.2.8.cwl} | 4 ++-- biometrics_extract/{0.2.7 => 0.2.8}/example_inputs.yaml | 0 .../biometrics_genotype_0.2.8.cwl} | 4 ++-- biometrics_genotype/{0.2.7 => 0.2.8}/example_inputs.yaml | 0 .../biometrics_major.cwl => 0.2.8/biometrics_major_0.2.8.cwl} | 4 ++-- biometrics_major/{0.2.7 => 0.2.8}/example_inputs.yaml | 0 .../biometrics_minor.cwl => 0.2.8/biometrics_minor_0.2.8.cwl} | 4 ++-- biometrics_minor/{0.2.7 => 0.2.8}/example_inputs.yaml | 0 .../biometrics_sexmismatch_0.2.8.cwl} | 4 ++-- biometrics_sexmismatch/{0.2.7 => 0.2.8}/example_inputs.yaml | 0 10 files changed, 10 insertions(+), 10 deletions(-) rename biometrics_extract/{0.2.7/biometrics_extract.cwl => 0.2.8/biometrics_extract_0.2.8.cwl} (98%) rename biometrics_extract/{0.2.7 => 0.2.8}/example_inputs.yaml (100%) rename biometrics_genotype/{0.2.7/biometrics_genotype.cwl => 0.2.8/biometrics_genotype_0.2.8.cwl} (97%) rename biometrics_genotype/{0.2.7 => 0.2.8}/example_inputs.yaml (100%) rename biometrics_major/{0.2.7/biometrics_major.cwl => 0.2.8/biometrics_major_0.2.8.cwl} (97%) rename biometrics_major/{0.2.7 => 0.2.8}/example_inputs.yaml (100%) rename biometrics_minor/{0.2.7/biometrics_minor.cwl => 0.2.8/biometrics_minor_0.2.8.cwl} (97%) rename biometrics_minor/{0.2.7 => 0.2.8}/example_inputs.yaml (100%) rename biometrics_sexmismatch/{0.2.7/biometrics_sexmismatch.cwl => 0.2.8/biometrics_sexmismatch_0.2.8.cwl} (97%) rename biometrics_sexmismatch/{0.2.7 => 0.2.8}/example_inputs.yaml (100%) diff --git a/biometrics_extract/0.2.7/biometrics_extract.cwl b/biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl similarity index 98% rename from biometrics_extract/0.2.7/biometrics_extract.cwl rename to biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl index d8a4b44b..4d263ae3 100644 --- a/biometrics_extract/0.2.7/biometrics_extract.cwl +++ b/biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl @@ -150,7 +150,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -169,4 +169,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.7 + 'doap:revision': 0.2.8 diff --git a/biometrics_extract/0.2.7/example_inputs.yaml b/biometrics_extract/0.2.8/example_inputs.yaml similarity index 100% rename from biometrics_extract/0.2.7/example_inputs.yaml rename to biometrics_extract/0.2.8/example_inputs.yaml diff --git a/biometrics_genotype/0.2.7/biometrics_genotype.cwl b/biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl similarity index 97% rename from biometrics_genotype/0.2.7/biometrics_genotype.cwl rename to biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl index 5808f7b9..0b058030 100644 --- a/biometrics_genotype/0.2.7/biometrics_genotype.cwl +++ b/biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl @@ -123,7 +123,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -142,4 +142,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.7 + 'doap:revision': 0.2.8 diff --git a/biometrics_genotype/0.2.7/example_inputs.yaml b/biometrics_genotype/0.2.8/example_inputs.yaml similarity index 100% rename from biometrics_genotype/0.2.7/example_inputs.yaml rename to biometrics_genotype/0.2.8/example_inputs.yaml diff --git a/biometrics_major/0.2.7/biometrics_major.cwl b/biometrics_major/0.2.8/biometrics_major_0.2.8.cwl similarity index 97% rename from biometrics_major/0.2.7/biometrics_major.cwl rename to biometrics_major/0.2.8/biometrics_major_0.2.8.cwl index 6730dfb0..d6298c34 100644 --- a/biometrics_major/0.2.7/biometrics_major.cwl +++ b/biometrics_major/0.2.8/biometrics_major_0.2.8.cwl @@ -98,7 +98,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +117,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.7 + 'doap:revision': 0.2.8 diff --git a/biometrics_major/0.2.7/example_inputs.yaml b/biometrics_major/0.2.8/example_inputs.yaml similarity index 100% rename from biometrics_major/0.2.7/example_inputs.yaml rename to biometrics_major/0.2.8/example_inputs.yaml diff --git a/biometrics_minor/0.2.7/biometrics_minor.cwl b/biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl similarity index 97% rename from biometrics_minor/0.2.7/biometrics_minor.cwl rename to biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl index 67857984..f7b9c47d 100644 --- a/biometrics_minor/0.2.7/biometrics_minor.cwl +++ b/biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl @@ -105,7 +105,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +124,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.7 + 'doap:revision': 0.2.8 diff --git a/biometrics_minor/0.2.7/example_inputs.yaml b/biometrics_minor/0.2.8/example_inputs.yaml similarity index 100% rename from biometrics_minor/0.2.7/example_inputs.yaml rename to biometrics_minor/0.2.8/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl similarity index 97% rename from biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl rename to biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl index 80ef0363..5f6f5ed9 100644 --- a/biometrics_sexmismatch/0.2.7/biometrics_sexmismatch.cwl +++ b/biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl @@ -84,7 +84,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.7' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +103,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.7 + 'doap:revision': 0.2.8 diff --git a/biometrics_sexmismatch/0.2.7/example_inputs.yaml b/biometrics_sexmismatch/0.2.8/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch/0.2.7/example_inputs.yaml rename to biometrics_sexmismatch/0.2.8/example_inputs.yaml From fb7c468b2e376079a297457942db9055fa2a3f3a Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 14 Apr 2021 16:59:48 -0400 Subject: [PATCH 350/476] update version in readme --- biometrics_extract/README.md | 2 +- biometrics_genotype/README.md | 2 +- biometrics_major/README.md | 2 +- biometrics_minor/README.md | 2 +- biometrics_sexmismatch/README.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md index 3475e69b..083ea3d1 100644 --- a/biometrics_extract/README.md +++ b/biometrics_extract/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.7 | | +| biometrics | 0.2.8 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md index 318955cf..6b6fcd1f 100644 --- a/biometrics_genotype/README.md +++ b/biometrics_genotype/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.7 | | +| biometrics | 0.2.8 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_major/README.md b/biometrics_major/README.md index 9f6d0343..c281de1e 100644 --- a/biometrics_major/README.md +++ b/biometrics_major/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.7 | | +| biometrics | 0.2.8 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md index e014845e..cd9f3434 100644 --- a/biometrics_minor/README.md +++ b/biometrics_minor/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.7 | | +| biometrics | 0.2.8 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md index f028957b..2cba1841 100644 --- a/biometrics_sexmismatch/README.md +++ b/biometrics_sexmismatch/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.7 | | +| biometrics | 0.2.8 | | The python package source code and Docker file are located on GitHub. From 19f39fb8a04982865f787708ad078ced235a77b2 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 19 Apr 2021 10:18:33 -0400 Subject: [PATCH 351/476] update to 0.1.19 --- .../0.1.19/sequence_qc_0.1.19.cwl | 4 +-- {sequence_qc_0.1.16 => sequence_qc}/README.md | 12 +++---- sequence_qc/example_inputs.yaml | 17 ++++++++++ sequence_qc_0.1.16/container/Dockerfile | 32 ------------------- sequence_qc_0.1.16/example_inputs.yaml | 17 ---------- 5 files changed, 25 insertions(+), 57 deletions(-) rename sequence_qc_0.1.16/sequence_qc_0.1.16.cwl => sequence_qc/0.1.19/sequence_qc_0.1.19.cwl (97%) rename {sequence_qc_0.1.16 => sequence_qc}/README.md (84%) create mode 100644 sequence_qc/example_inputs.yaml delete mode 100644 sequence_qc_0.1.16/container/Dockerfile delete mode 100644 sequence_qc_0.1.16/example_inputs.yaml diff --git a/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl b/sequence_qc/0.1.19/sequence_qc_0.1.19.cwl similarity index 97% rename from sequence_qc_0.1.16/sequence_qc_0.1.16.cwl rename to sequence_qc/0.1.19/sequence_qc_0.1.19.cwl index 33ae4694..bd87744b 100644 --- a/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl +++ b/sequence_qc/0.1.19/sequence_qc_0.1.19.cwl @@ -117,7 +117,7 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskaccess/sequence_qc:0.1.16' + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.1.19' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -136,4 +136,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': sesquence_qc - 'doap:revision': 0.1.16 + 'doap:revision': 0.1.19 diff --git a/sequence_qc_0.1.16/README.md b/sequence_qc/README.md similarity index 84% rename from sequence_qc_0.1.16/README.md rename to sequence_qc/README.md index fcce4813..c7b71103 100644 --- a/sequence_qc_0.1.16/README.md +++ b/sequence_qc/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| sequence_qc | 0.1.16 | | +| sequence_qc | 0.1.19 | | ## CWL @@ -13,26 +13,26 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner sequence_qc_0.1.16.cwl example_inputs.yaml + > toil-cwl-runner sequence_qc_0.1.19.cwl example_inputs.yaml ``` **If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir tool_toil_log -> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.16/sequence_qc_0.1.16.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & ``` ### Usage ```bash -toil-cwl-runner sequence_qc_0.1.16.cwl -h +toil-cwl-runner sequence_qc_0.1.19.cwl -h -usage: sequence_qc_0.1.16.cwl [-h] --reference REFERENCE --bam_file BAM_FILE +usage: sequence_qc_0.1.19.cwl [-h] --reference REFERENCE --bam_file BAM_FILE --bed_file BED_FILE --sample_id SAMPLE_ID [--threshold THRESHOLD] [--truncate TRUNCATE] [--min_mapq MIN_MAPQ] [--min_basq MIN_BASQ] diff --git a/sequence_qc/example_inputs.yaml b/sequence_qc/example_inputs.yaml new file mode 100644 index 00000000..493b28df --- /dev/null +++ b/sequence_qc/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta +bam_file: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/mskcc-analyses/210414_qc_workflow/downsample/MSK-L-225-T_collapsed_duplex.bam +bed_file: + class: File + metadata: {} + path: /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-B.sorted.bed +sample_id: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 diff --git a/sequence_qc_0.1.16/container/Dockerfile b/sequence_qc_0.1.16/container/Dockerfile deleted file mode 100644 index f165b659..00000000 --- a/sequence_qc_0.1.16/container/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -################## BASE IMAGE ###################### - -FROM python:3.6-slim - -################## ARGUMENTS/Environments ########## - -ARG BUILD_DATE -ARG BUILD_VERSION -ARG LICENSE="Apache-2.0" -ARG SEQUENCE_QC_VERSION=0.1.16 -ARG VCS_REF -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Charlie Murphy (murphyc4@mskcc.org)" - -LABEL org.opencontainers.image.created=${BUILD_DATE} \ - org.opencontainers.image.version=${BUILD_VERSION} \ - org.opencontainers.image.licenses=${LICENSE} \ - org.opencontainers.image.version.sequence_qc=${SEQUENCE_QC_VERSION} \ - org.opencontainers.image.source.sequence_qc="https://pypi.org/project/sequence_qc/" \ - org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ - org.opencontainers.image.vcs-ref=${VCS_REF} - -LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ - sequence_qc version ${SEQUENCE_QC_VERSION}" - -################## INSTALL ########################## - -RUN apt-get update \ - && apt-get install gcc g++ zlib1g-dev -y \ - && pip install cython plotly \ - && pip install sequence_qc==0.1.16 diff --git a/sequence_qc_0.1.16/example_inputs.yaml b/sequence_qc_0.1.16/example_inputs.yaml deleted file mode 100644 index 04cf48b4..00000000 --- a/sequence_qc_0.1.16/example_inputs.yaml +++ /dev/null @@ -1,17 +0,0 @@ -reference: - class: File - metadata: {} - path: /path/to/fasta -bam_file: - class: File - metadata: {} - path: /path/to/bam -bed_file: - class: File - metadata: {} - path: /path/to/bed -sample_id: test_sample_ -threshold: 0.01 -truncate: 1 -min_mapq: 10 -min_basq: 10 From c988c3f5723306782e793ab410db415352b3695a Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 19 Apr 2021 10:21:08 -0400 Subject: [PATCH 352/476] Update example_inputs.yaml --- sequence_qc/example_inputs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sequence_qc/example_inputs.yaml b/sequence_qc/example_inputs.yaml index 493b28df..04cf48b4 100644 --- a/sequence_qc/example_inputs.yaml +++ b/sequence_qc/example_inputs.yaml @@ -1,15 +1,15 @@ reference: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/data/ref/hg19/Homo_sapiens_assembly19.fasta + path: /path/to/fasta bam_file: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/mskcc-analyses/210414_qc_workflow/downsample/MSK-L-225-T_collapsed_duplex.bam + path: /path/to/bam bed_file: class: File metadata: {} - path: /Users/charlesmurphy/Desktop/data/innovation/resources/MSK-ACCESS-v1.0/MSK-ACCESS-v1_0-probe-B.sorted.bed + path: /path/to/bed sample_id: test_sample_ threshold: 0.01 truncate: 1 From 3532cb643f7842158f39e649d2a83063b0e666b9 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 19 Apr 2021 12:48:24 -0400 Subject: [PATCH 353/476] fgbio metrics not returning an output --- .../fgbio_collect_duplex_seq_metrics_1.2.0.cwl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl index e9adcf9f..28697020 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -149,13 +149,10 @@ outputs: outputBinding: glob: |- ${ - if(inputs.duplex_umi_counts){ - if(inputs.output_prefix){ - return inputs.output_prefix + '.duplex_umi_counts.txt' - } - else{ - return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt') - } + if (inputs.output_prefix) { + return inputs.output_prefix + '.duplex_umi_counts.txt' + } else { + return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt') } } doc: >- From b405f6cd0cdf4fe5e6d552629a2de3d9885b7713 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 19 Apr 2021 13:05:52 -0400 Subject: [PATCH 354/476] update biometrics version --- .../biometrics_extract.cwl} | 6 +++--- biometrics_extract/{0.2.8 => 0.2.9}/example_inputs.yaml | 0 .../biometrics_genotype.cwl} | 6 +++--- biometrics_genotype/{0.2.8 => 0.2.9}/example_inputs.yaml | 0 .../biometrics_major.cwl} | 6 +++--- biometrics_major/{0.2.8 => 0.2.9}/example_inputs.yaml | 0 .../biometrics_minor.cwl} | 6 +++--- biometrics_minor/{0.2.8 => 0.2.9}/example_inputs.yaml | 0 .../biometrics_sexmismatch.cwl} | 6 +++--- biometrics_sexmismatch/{0.2.8 => 0.2.9}/example_inputs.yaml | 0 10 files changed, 15 insertions(+), 15 deletions(-) rename biometrics_extract/{0.2.8/biometrics_extract_0.2.8.cwl => 0.2.9/biometrics_extract.cwl} (97%) rename biometrics_extract/{0.2.8 => 0.2.9}/example_inputs.yaml (100%) rename biometrics_genotype/{0.2.8/biometrics_genotype_0.2.8.cwl => 0.2.9/biometrics_genotype.cwl} (97%) rename biometrics_genotype/{0.2.8 => 0.2.9}/example_inputs.yaml (100%) rename biometrics_major/{0.2.8/biometrics_major_0.2.8.cwl => 0.2.9/biometrics_major.cwl} (96%) rename biometrics_major/{0.2.8 => 0.2.9}/example_inputs.yaml (100%) rename biometrics_minor/{0.2.8/biometrics_minor_0.2.8.cwl => 0.2.9/biometrics_minor.cwl} (96%) rename biometrics_minor/{0.2.8 => 0.2.9}/example_inputs.yaml (100%) rename biometrics_sexmismatch/{0.2.8/biometrics_sexmismatch_0.2.8.cwl => 0.2.9/biometrics_sexmismatch.cwl} (96%) rename biometrics_sexmismatch/{0.2.8 => 0.2.9}/example_inputs.yaml (100%) diff --git a/biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl b/biometrics_extract/0.2.9/biometrics_extract.cwl similarity index 97% rename from biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl rename to biometrics_extract/0.2.9/biometrics_extract.cwl index 4d263ae3..152e44f3 100644 --- a/biometrics_extract/0.2.8/biometrics_extract_0.2.8.cwl +++ b/biometrics_extract/0.2.9/biometrics_extract.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_extract +id: biometrics_extract_0_2_9 baseCommand: - biometrics - extract @@ -150,7 +150,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -169,4 +169,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.8 + 'doap:revision': 0.2.9 diff --git a/biometrics_extract/0.2.8/example_inputs.yaml b/biometrics_extract/0.2.9/example_inputs.yaml similarity index 100% rename from biometrics_extract/0.2.8/example_inputs.yaml rename to biometrics_extract/0.2.9/example_inputs.yaml diff --git a/biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl b/biometrics_genotype/0.2.9/biometrics_genotype.cwl similarity index 97% rename from biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl rename to biometrics_genotype/0.2.9/biometrics_genotype.cwl index 0b058030..f887634d 100644 --- a/biometrics_genotype/0.2.8/biometrics_genotype_0.2.8.cwl +++ b/biometrics_genotype/0.2.9/biometrics_genotype.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_genotype +id: biometrics_genotype_0_2_9 baseCommand: - biometrics - genotype @@ -123,7 +123,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -142,4 +142,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.8 + 'doap:revision': 0.2.9 diff --git a/biometrics_genotype/0.2.8/example_inputs.yaml b/biometrics_genotype/0.2.9/example_inputs.yaml similarity index 100% rename from biometrics_genotype/0.2.8/example_inputs.yaml rename to biometrics_genotype/0.2.9/example_inputs.yaml diff --git a/biometrics_major/0.2.8/biometrics_major_0.2.8.cwl b/biometrics_major/0.2.9/biometrics_major.cwl similarity index 96% rename from biometrics_major/0.2.8/biometrics_major_0.2.8.cwl rename to biometrics_major/0.2.9/biometrics_major.cwl index d6298c34..60ed0440 100644 --- a/biometrics_major/0.2.8/biometrics_major_0.2.8.cwl +++ b/biometrics_major/0.2.9/biometrics_major.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_major +id: biometrics_major_0_2_9 baseCommand: - biometrics - major @@ -98,7 +98,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +117,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.8 + 'doap:revision': 0.2.9 diff --git a/biometrics_major/0.2.8/example_inputs.yaml b/biometrics_major/0.2.9/example_inputs.yaml similarity index 100% rename from biometrics_major/0.2.8/example_inputs.yaml rename to biometrics_major/0.2.9/example_inputs.yaml diff --git a/biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl b/biometrics_minor/0.2.9/biometrics_minor.cwl similarity index 96% rename from biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl rename to biometrics_minor/0.2.9/biometrics_minor.cwl index f7b9c47d..8352adce 100644 --- a/biometrics_minor/0.2.8/biometrics_minor_0.2.8.cwl +++ b/biometrics_minor/0.2.9/biometrics_minor.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_minor +id: biometrics_minor_0_2_9 baseCommand: - biometrics - minor @@ -105,7 +105,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +124,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.8 + 'doap:revision': 0.2.9 diff --git a/biometrics_minor/0.2.8/example_inputs.yaml b/biometrics_minor/0.2.9/example_inputs.yaml similarity index 100% rename from biometrics_minor/0.2.8/example_inputs.yaml rename to biometrics_minor/0.2.9/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl b/biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl similarity index 96% rename from biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl rename to biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl index 5f6f5ed9..90880263 100644 --- a/biometrics_sexmismatch/0.2.8/biometrics_sexmismatch_0.2.8.cwl +++ b/biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_sexmismatch +id: biometrics_sexmismatch_0_2_9 baseCommand: - biometrics - sexmismatch @@ -84,7 +84,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.8' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +103,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.8 + 'doap:revision': 0.2.9 diff --git a/biometrics_sexmismatch/0.2.8/example_inputs.yaml b/biometrics_sexmismatch/0.2.9/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch/0.2.8/example_inputs.yaml rename to biometrics_sexmismatch/0.2.9/example_inputs.yaml From 6a3e32cdc288ec3c5ebf5e41f1aa8c1d07d835d1 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 17 May 2021 17:11:23 -0400 Subject: [PATCH 355/476] add multiqc tool --- multiqc_1.10.1/multiqc_1.10.1.cwl | 132 ++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 multiqc_1.10.1/multiqc_1.10.1.cwl diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl new file mode 100644 index 00000000..afb97463 --- /dev/null +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -0,0 +1,132 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool + +doc: | + Run multiqc on log files from supported bioinformatic tools. + +requirements: + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + # This step is necessary since the input files + # must be loaded into the working directory as there + # is no way to specify the input file directly on the + # command line. + listing: | + ${ + // script merges the input arrays + // into one array that fulfills the type + // requirement for "listing", which is + // "{type: array, items: [File, Directory]}" + + var qc_files_directory = inputs.qc_files_directory; + var qc_files_array = inputs.qc_files_array; + var qc_files_array_of_array = inputs.qc_files_array_of_array; + var output_array = []; + + // add items of the qc_files_array to the output_array + if ( qc_files_array != null ){ + // Fix to allow single qc_file_array + if (qc_files_array.length === undefined){ + output_array.push(qc_files_array) + } else { + for (var i=0; i Date: Mon, 17 May 2021 17:15:35 -0400 Subject: [PATCH 356/476] allow for list of dirs in multiqc input --- multiqc_1.10.1/multiqc_1.10.1.cwl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index afb97463..8e6d103f 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -20,6 +20,7 @@ requirements: // "{type: array, items: [File, Directory]}" var qc_files_directory = inputs.qc_files_directory; + var qc_list_of_dirs = inputs.qc_list_of_dirs; var qc_files_array = inputs.qc_files_array; var qc_files_array_of_array = inputs.qc_files_array_of_array; var output_array = []; @@ -52,6 +53,14 @@ requirements: } } + if ( qc_list_of_dirs != null){ + for ( var i=0; i Date: Mon, 17 May 2021 17:52:36 -0400 Subject: [PATCH 357/476] add Dockerfile --- multiqc_1.10.1/Dockerfile | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 multiqc_1.10.1/Dockerfile diff --git a/multiqc_1.10.1/Dockerfile b/multiqc_1.10.1/Dockerfile new file mode 100644 index 00000000..2b25b6ac --- /dev/null +++ b/multiqc_1.10.1/Dockerfile @@ -0,0 +1,31 @@ +################## BASE IMAGE ###################### + +FROM python:3.6-slim + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG MULTIQC_VERSION +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ian Johnson (johnsoni@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.version.multiqc=${MULTIQC_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/multiqc.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ + multiqc version ${MULTIQC_VERSION}" + +################## INSTALL ########################## + +RUN apt-get update \ + && apt-get install git gcc g++ zlib1g-dev -y \ + && git clone https://github.com/msk-access/MultiQC.git --branch msk-access \ + && cd MultiQC \ + && pip install . From 1c8d520fec60ce641ed3704443b9fd9a9c6a27b1 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 18 May 2021 09:37:55 -0400 Subject: [PATCH 358/476] comments --- multiqc_1.10.1/multiqc_1.10.1.cwl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 8e6d103f..e9a27e2a 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -8,13 +8,9 @@ doc: | requirements: InlineJavascriptRequirement: {} InitialWorkDirRequirement: - # This step is necessary since the input files - # must be loaded into the working directory as there - # is no way to specify the input file directly on the - # command line. listing: | ${ - // script merges the input arrays + // script merges the inputs // into one array that fulfills the type // requirement for "listing", which is // "{type: array, items: [File, Directory]}" @@ -25,7 +21,6 @@ requirements: var qc_files_array_of_array = inputs.qc_files_array_of_array; var output_array = []; - // add items of the qc_files_array to the output_array if ( qc_files_array != null ){ // Fix to allow single qc_file_array if (qc_files_array.length === undefined){ @@ -38,7 +33,6 @@ requirements: } } - // add items of the qc_files_array_of_array to the output_array if ( qc_files_array_of_array != null ){ for (var i=0; i Date: Tue, 18 May 2021 12:36:34 -0400 Subject: [PATCH 359/476] update --config param --- multiqc_1.10.1/multiqc_1.10.1.cwl | 225 ++++++++++++++---------------- 1 file changed, 107 insertions(+), 118 deletions(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index e9a27e2a..67b80cc9 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -1,141 +1,130 @@ -#!/usr/bin/env cwl-runner -cwlVersion: v1.0 class: CommandLineTool - -doc: | - Run multiqc on log files from supported bioinformatic tools. - -requirements: - InlineJavascriptRequirement: {} - InitialWorkDirRequirement: - listing: | - ${ - // script merges the inputs - // into one array that fulfills the type - // requirement for "listing", which is - // "{type: array, items: [File, Directory]}" - - var qc_files_directory = inputs.qc_files_directory; - var qc_list_of_dirs = inputs.qc_list_of_dirs; - var qc_files_array = inputs.qc_files_array; - var qc_files_array_of_array = inputs.qc_files_array_of_array; - var output_array = []; - - if ( qc_files_array != null ){ - // Fix to allow single qc_file_array - if (qc_files_array.length === undefined){ - output_array.push(qc_files_array) - } else { - for (var i=0; i Date: Wed, 19 May 2021 16:26:05 -0400 Subject: [PATCH 360/476] update Docker reference, + fix Directory JS expression --- multiqc_1.10.1/multiqc_1.10.1.cwl | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 67b80cc9..1e679f50 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -62,13 +62,9 @@ outputs: doc: | Run multiqc on log files from supported bioinformatic tools. arguments: - - position: 1 - valueFrom: '--zip-data-dir' - - position: 2 - prefix: '--outdir' - valueFrom: $(runtime.outdir) - - position: 4 - valueFrom: $(runtime.outdir) + - position: 0 + prefix: '' + valueFrom: . requirements: - class: InitialWorkDirRequirement listing: @@ -79,7 +75,7 @@ requirements: // requirement for "listing", which is // "{type: array, items: [File, Directory]}" - var qc_files_directory = inputs.qc_files_directory; + var qc_files_directory = inputs.qc_files_dir; var qc_list_of_dirs = inputs.qc_list_of_dirs; var qc_files_array = inputs.qc_files_array; var qc_files_array_of_array = inputs.qc_files_array_of_array; @@ -106,18 +102,10 @@ requirements: } if ( qc_files_directory != null ){ - for ( var i=0; i Date: Wed, 19 May 2021 16:27:08 -0400 Subject: [PATCH 361/476] remove multiqc Dockerfile (using one from msk-access/multiqc repo --- multiqc_1.10.1/Dockerfile | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 multiqc_1.10.1/Dockerfile diff --git a/multiqc_1.10.1/Dockerfile b/multiqc_1.10.1/Dockerfile deleted file mode 100644 index 2b25b6ac..00000000 --- a/multiqc_1.10.1/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -################## BASE IMAGE ###################### - -FROM python:3.6-slim - -################## ARGUMENTS/Environments ########## - -ARG BUILD_DATE -ARG BUILD_VERSION -ARG LICENSE="Apache-2.0" -ARG MULTIQC_VERSION -ARG VCS_REF -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Ian Johnson (johnsoni@mskcc.org)" - -LABEL org.opencontainers.image.created=${BUILD_DATE} \ - org.opencontainers.image.version=${BUILD_VERSION} \ - org.opencontainers.image.version.multiqc=${MULTIQC_VERSION} \ - org.opencontainers.image.vcs-url="https://github.com/msk-access/multiqc.git" \ - org.opencontainers.image.vcs-ref=${VCS_REF} - -LABEL org.opencontainers.image.description="This container uses python3.6 as the base image to build \ - multiqc version ${MULTIQC_VERSION}" - -################## INSTALL ########################## - -RUN apt-get update \ - && apt-get install git gcc g++ zlib1g-dev -y \ - && git clone https://github.com/msk-access/MultiQC.git --branch msk-access \ - && cd MultiQC \ - && pip install . From eeba84c118b51dd93431ccc9d503ff42e23c9970 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Wed, 19 May 2021 16:28:24 -0400 Subject: [PATCH 362/476] fix JS expression --- multiqc_1.10.1/multiqc_1.10.1.cwl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 1e679f50..592c098c 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -105,7 +105,11 @@ requirements: output_array = output_array.concat(qc_files_directory.listing); } - + if ( qc_list_of_dirs != null ){ + for (var i=0; i Date: Wed, 19 May 2021 16:37:23 -0400 Subject: [PATCH 363/476] use directories themselves instead of adding files inside directory --- multiqc_1.10.1/multiqc_1.10.1.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 592c098c..5a7fc5ec 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -102,12 +102,12 @@ requirements: } if ( qc_files_directory != null ){ - output_array = output_array.concat(qc_files_directory.listing); + output_array = output_array.concat(qc_files_directory); } if ( qc_list_of_dirs != null ){ for (var i=0; i Date: Thu, 20 May 2021 10:17:09 -0400 Subject: [PATCH 364/476] add readme, + update docker image for cwltool CMD fix --- multiqc_1.10.1/README.md | 56 ++++++++++++++++++++++++++++++ multiqc_1.10.1/example_inputs.json | 20 +++++++++++ multiqc_1.10.1/multiqc_1.10.1.cwl | 5 ++- 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 multiqc_1.10.1/README.md create mode 100644 multiqc_1.10.1/example_inputs.json diff --git a/multiqc_1.10.1/README.md b/multiqc_1.10.1/README.md new file mode 100644 index 00000000..4870d2b1 --- /dev/null +++ b/multiqc_1.10.1/README.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.10.1 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc_1.10.1.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner multiqc_1.10.1.cwl --helpusage: multiqc_1.10.1.cwl [-h] + [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY] + [--qc_files_dir QC_FILES_DIR] + [--qc_list_of_dirs QC_LIST_OF_DIRS] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY + qc files which shall be part of the multiqc summary; + optional, only one of qc_files_array or + qc_files_array_of_array must be provided + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --qc_list_of_dirs QC_LIST_OF_DIRS + qc files in multiple directories + --report_name REPORT_NAME + name used for the html report + --config CONFIG +``` diff --git a/multiqc_1.10.1/example_inputs.json b/multiqc_1.10.1/example_inputs.json new file mode 100644 index 00000000..87e4b59e --- /dev/null +++ b/multiqc_1.10.1/example_inputs.json @@ -0,0 +1,20 @@ +{ + "config": { + "class": "File", + "path": "/path_to/config.yaml" + }, + "qc_files_array": null, + "qc_files_array_of_array": [], + "qc_files_dir": null, + "qc_list_of_dirs": [ + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_a" + }, + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_b" + } + ], + "report_name": null +} diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 5a7fc5ec..01140aad 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -56,14 +56,13 @@ outputs: outputBinding: glob: $(inputs.report_name).html - id: multiqc_zip - type: File + type: File? outputBinding: glob: $(inputs.report_name)_data.zip doc: | Run multiqc on log files from supported bioinformatic tools. arguments: - position: 0 - prefix: '' valueFrom: . requirements: - class: InitialWorkDirRequirement @@ -119,4 +118,4 @@ hints: ramMin: 10000 coresMin: 1 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.2' + dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.3' From bc6f23a18bdabc0b4495c3a19df9451118c73255 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 21 May 2021 10:57:49 -0400 Subject: [PATCH 365/476] also add updated sequence_qc tool --- sequence_qc/0.2.2/sequence_qc_0.2.2.cwl | 146 ++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 sequence_qc/0.2.2/sequence_qc_0.2.2.cwl diff --git a/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl new file mode 100644 index 00000000..68a9cec9 --- /dev/null +++ b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl @@ -0,0 +1,146 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_2_2 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality + - id: max_depth + type: int? + inputBinding: + position: 0 + prefix: --max_depth + doc: >- + Maximum read depth for calculation +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_positions.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.2.2 From 67ff923e782e2f98223cbaa6c86c8c236e3ce4bc Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 21 May 2021 15:48:07 -0400 Subject: [PATCH 366/476] can handle list of files now --- expression_tools/put_in_dir.cwl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index 9d1636b4..71deab3f 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -18,6 +18,9 @@ inputs: type: array items: - File + - type: array + items: + - File - Directory - 'null' @@ -36,7 +39,13 @@ expression: | }); for (var i = 0; i < input_files.length; i++) { - if(input_files[i]){ + // Handle list of list of files + if (input_files[i] && input_files[i].length) { + for (var ii = 0; ii < input_files[i].length; ii++) { + output_files.push(input_files[i][ii]); + } + // Handle list of files + } else if (input_files[i]) { output_files.push(input_files[i]); } } From eafd95767b191d1dd9c188f2eac56ce86b9ccf62 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 24 May 2021 12:23:11 -0400 Subject: [PATCH 367/476] Create gatk_revert_sam_4.1.8.0.cwl --- .../4.1.8.0/gatk_revert_sam_4.1.8.0.cwl | 310 ++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl diff --git a/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl new file mode 100644 index 00000000..55363f08 --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl @@ -0,0 +1,310 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_revert_sam_4_1_8_0 +baseCommand: + - gatk + - RevertSam +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output + type: string? + doc: >- + The output SAM/BAM file to create, or an output directory if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT_MAP (OM) + - id: output_map + type: string? + doc: >- + Tab separated file with two columns, READ_GROUP_ID and OUTPUT, providing file mapping only used if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT (O) + - id: attribute_to_clear + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: '--ATTRIBUTE_TO_CLEAR' + doc: >- + When removing alignment information, the set of optional tags to remove. This may be specified 0 or more times. Default value: [NM, UQ, PG, MD, MQ, SA, MC, AS]. + - id: max_discard_fraction + type: float? + inputBinding: + position: 0 + prefix: '--MAX_DISCARD_FRACTION' + doc: >- + If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to + sanitization thenthe program will exit with an Exception instead of exiting cleanly. + Output BAM will still be valid. Default value: 0.01. + - id: library_name + type: string? + inputBinding: + position: 0 + prefix: '--LIBRARY_NAME' + doc: >- + The library name to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same library name. Default value: null. + - id: max_records_in_ram + type: int? + inputBinding: + position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + doc: >- + When writing files that need to be sorted, this will specify the number of records stored + in RAM before spilling to disk. Increasing this number reduces the number of file handles + needed to sort the file, and increases the amount of RAM needed. Default value: 500000. + - id: output_by_readgroup + type: string? + default: 'false' + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP' + doc: >- + When true, outputs each read group in a separate file. Default value: false. Possible values: {true, false} + - id: output_by_readgroup_file_format + type: string? + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP_FILE_FORMAT' + doc: >- + When using OUTPUT_BY_READGROUP, the output file format can be set to a certain format. + Default value: dynamic. sam (Generate SAM files.) + bam (Generate BAM files.) + cram (Generate CRAM files.) + dynamic (Generate files based on the extention of INPUT.) + - id: remove_alignment_information + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--REMOVE_ALIGNMENT_INFORMATION' + doc: >- + Remove all alignment information from the file. Default value: true. Possible values: {true, false} + - id: remove_duplicate_information + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--REMOVE_DUPLICATE_INFORMATION' + doc: | + Remove duplicate read flags from all reads. Note that if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may have the unusual but sometimes + desirable trait of having unmapped reads that are marked as duplicates. Default value: + true. Possible values: {true, false} + - id: restore_hardclips + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--RESTORE_HARDCLIPS' + doc: >- + When true, restores reads and qualities of records with hard-clips containing XB and XQ tags. Default value: true. Possible values: {true, false} + - id: restore_original_qualities + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--RESTORE_ORIGINAL_QUALITIES' + doc: | + True to restore original qualities from the OQ field to the QUAL field if available. Default value: true. Possible values: {true, false} + - id: sample_alias + type: string? + inputBinding: + position: 1 + prefix: '--SAMPLE_ALIAS' + doc: | + The sample alias to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same sample alias. Default value: null. + - id: sanitize + type: string? + default: 'false' + inputBinding: + position: 1 + prefix: '--SANITIZE' + doc: | + WARNING: This option is potentially destructive. If enabled will discard reads in order to + produce a consistent output BAM. Reads discarded include (but are not limited to) paired + reads with missing mates, duplicated records, records with mismatches in length of bases + and qualities. This option can only be enabled if the output sort order is queryname and + will always cause sorting to occur. Default value: false. Possible values: {true, false} + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: | + The sort order to create the reverted output file with. Default value: queryname. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_revert_sam_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output){ + return inputs.output + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam') + } + } + - id: gatk_revert_sam_output_map + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_map){ + return inputs.output_map + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.tsv') + } + } +label: GATK-CollectHsMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output){ + return inputs.output; + } else if (inputs.output_map) { + return null; + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam'); + } + } + - position: 0 + prefix: '-OM' + valueFrom: |- + ${ + if(inputs.output_map){ + return inputs.output_map; + } else { + return null; + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 From ff1aec42bbc3fc9b6ffe96814b188b60dccdf87c Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 24 May 2021 12:23:14 -0400 Subject: [PATCH 368/476] Create example_inputs.yaml --- gatk_revert_sam/4.1.8.0/example_inputs.yaml | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 gatk_revert_sam/4.1.8.0/example_inputs.yaml diff --git a/gatk_revert_sam/4.1.8.0/example_inputs.yaml b/gatk_revert_sam/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..b195dfa8 --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/example_inputs.yaml @@ -0,0 +1,26 @@ +input: + class: File + path: "/path" +output: null +output_map: null +attribute_to_clear: null +max_discard_fraction: null +library_name: null +max_records_in_ram: null +output_by_readgroup: null +output_by_readgroup_file_format: null +remove_alignment_information: 'false' +remove_duplicate_information: 'true' +restore_hardclips: 'false' +restore_original_qualities: 'false' +sample_alias: null +sanitize: null +sort_order: 'unsorted' +reference_sequence: null +validation_stringency: 'SILENT' +compression_level: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null From 117bfb2fda6653569a1eb2e8f11a2b09105f7492 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Mon, 24 May 2021 12:24:47 -0400 Subject: [PATCH 369/476] update docs --- docs/SUMMARY.md | 1 + gatk_revert_sam/README.md | 164 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 gatk_revert_sam/README.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d8876d1c..2d49572a 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -30,6 +30,7 @@ - [MeanQualityByCycle](../gatk_mean_quality_by_cycle/README.md) - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) + - [RevertSam v4.1.8.0](../gatk_revert_sam/README.md) - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) - Manta - [Manta v1.5.1](../manta_1.5.1/README.md) diff --git a/gatk_revert_sam/README.md b/gatk_revert_sam/README.md new file mode 100644 index 00000000..90ea2bfa --- /dev/null +++ b/gatk_revert_sam/README.md @@ -0,0 +1,164 @@ +# CWL for running GATK - RevertSam + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_revert_sam_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_revert_sam_4.1.8.0.cwl [-h] --input INPUT [--output OUTPUT] + [--output_map OUTPUT_MAP] + [--attribute_to_clear ATTRIBUTE_TO_CLEAR] + [--max_discard_fraction MAX_DISCARD_FRACTION] + [--library_name LIBRARY_NAME] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_by_readgroup OUTPUT_BY_READGROUP] + [--output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT] + [--remove_alignment_information REMOVE_ALIGNMENT_INFORMATION] + [--remove_duplicate_information REMOVE_DUPLICATE_INFORMATION] + [--restore_hardclips RESTORE_HARDCLIPS] + [--restore_original_qualities RESTORE_ORIGINAL_QUALITIES] + [--sample_alias SAMPLE_ALIAS] + [--sanitize SANITIZE] + [--sort_order SORT_ORDER] + [--reference REFERENCE] + [--validation_stringency VALIDATION_STRINGENCY] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--create_md5_file] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output OUTPUT The output SAM/BAM file to create, or an output + directory if OUTPUT_BY_READGROUP is true. Required. + Cannot be used in conjunction with argument(s) + OUTPUT_MAP (OM) + --output_map OUTPUT_MAP + Tab separated file with two columns, READ_GROUP_ID and + OUTPUT, providing file mapping only used if + OUTPUT_BY_READGROUP is true. Required. Cannot be used + in conjunction with argument(s) OUTPUT (O) + --attribute_to_clear ATTRIBUTE_TO_CLEAR + When removing alignment information, the set of + optional tags to remove. This may be specified 0 or + more times. Default value: [NM, UQ, PG, MD, MQ, SA, + MC, AS]. + --max_discard_fraction MAX_DISCARD_FRACTION + If SANITIZE=true and higher than MAX_DISCARD_FRACTION + reads are discarded due to sanitization thenthe + program will exit with an Exception instead of exiting + cleanly. Output BAM will still be valid. Default + value: 0.01. + --library_name LIBRARY_NAME + The library name to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same library name. Default value: + null. + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_by_readgroup OUTPUT_BY_READGROUP + When true, outputs each read group in a separate file. + Default value: false. Possible values: {true, false} + --output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT + When using OUTPUT_BY_READGROUP, the output file format + can be set to a certain format. Default value: + dynamic. sam (Generate SAM files.) bam (Generate BAM + files.) cram (Generate CRAM files.) dynamic (Generate + files based on the extention of INPUT.) + --remove_alignment_information REMOVE_ALIGNMENT_INFORMATION + Remove all alignment information from the file. + Default value: true. Possible values: {true, false} + --remove_duplicate_information REMOVE_DUPLICATE_INFORMATION + Remove duplicate read flags from all reads. Note that + if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may + have the unusual but sometimes desirable trait of + having unmapped reads that are marked as duplicates. + Default value: true. Possible values: {true, false} + --restore_hardclips RESTORE_HARDCLIPS + When true, restores reads and qualities of records + with hard-clips containing XB and XQ tags. Default + value: true. Possible values: {true, false} + --restore_original_qualities RESTORE_ORIGINAL_QUALITIES + True to restore original qualities from the OQ field + to the QUAL field if available. Default value: true. + Possible values: {true, false} + --sample_alias SAMPLE_ALIAS + The sample alias to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same sample alias. Default value: + null. + --sanitize SANITIZE WARNING: This option is potentially destructive. If + enabled will discard reads in order to produce a + consistent output BAM. Reads discarded include (but + are not limited to) paired reads with missing mates, + duplicated records, records with mismatches in length + of bases and qualities. This option can only be + enabled if the output sort order is queryname and will + always cause sorting to occur. Default value: false. + Possible values: {true, false} + --sort_order SORT_ORDER + The sort order to create the reverted output file + with. Default value: queryname. Possible values: + {unsorted, queryname, coordinate, duplicate, unknown} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` From 99b041ecd10d2e6be3bc2ef66df0dfb45bc99341 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 25 May 2021 16:08:19 +0000 Subject: [PATCH 370/476] GitBook: [master] 103 pages modified --- abra2_2.22/README.md | 21 --- bedtools_genomecov_v2.28.0_cv2/README.md | 56 ------- bedtools_merge_v2.28.0_cv2/README.md | 57 ------- disambiguate_1.0.0/README.md | 52 ------- docs/SUMMARY.md | 119 +++++++-------- docs/abra2/README.md | 2 + .../README.md => docs/abra2/abra2_2.17.md | 29 ++-- docs/abra2/abra2_2.22.md | 21 +++ docs/bedtools/README.md | 2 + .../bedtools_genomecov_v2.28.0_cv2.md | 43 ++++++ docs/bedtools/bedtools_merge_v2.28.0_cv2.md | 43 ++++++ docs/disambiguate/README.md | 2 + docs/disambiguate/disambiguate_1.0.0.md | 53 +++++++ docs/fgbio/README.md | 2 + ...fgbio_call_duplex_consensus_reads_1.2.0.md | 16 +- .../fgbio_collect_duplex_seq_metrics_1.2.0.md | 15 +- .../fgbio/fgbio_fastq_to_bam_1.2.0.md | 16 +- .../fgbio_filter_consensus_reads_1.2.0.md | 16 +- .../fgbio/fgbio_group_reads_by_umi_1.2.0.md | 16 +- ...bio_postprocessing_simplex_filter_0.1.8.md | 15 +- docs/gatk/README.md | 2 + docs/gatk/gatk_apply_bqsr_4.1.8.1.md | 43 ++++++ docs/gatk/gatk_applybqsr_4.1.2.0.md | 43 ++++++ docs/gatk/gatk_base_recalibrator_4.1.8.1.md | 43 ++++++ docs/gatk/gatk_baserecalibrator_4.1.2.0.md | 43 ++++++ .../gatk/gatk_merge_bam_alignment_4.1.8.0.md | 16 +- .../gatk/gatk_merge_sam_files_4.1.8.0.md | 16 +- docs/gatk/samtofastq-v4.1.8.0.md | 2 + docs/manta/README.md | 2 + .../README.md => docs/manta/manta_1.5.1.md | 27 ++-- docs/marianas/README.md | 2 + .../marianas_collapsing_first_pass_1.8.1.md | 19 +++ .../marianas_collapsing_second_pass_1.8.1.md | 19 +++ .../marianas_process_loop_umi_1.8.1.md | 19 +++ docs/marianas/marianas_separate_bams_1.8.1.md | 33 ++++ docs/merge-fastq/README.md | 2 + .../merge-fastq/merge_fastq_0.1.7.md | 22 +-- docs/mutect/README.md | 2 + .../README.md => docs/mutect/mutect_1.1.5.md | 25 ++- docs/picard-tools/README.md | 2 + .../picard_add_or_replace_read_groups_1.96.md | 24 +-- ...icard_add_or_replace_read_groups_2.21.2.md | 17 +-- ...card_add_or_replace_read_groups_4.1.8.1.md | 17 +-- ...ollect_alignment_summary_metrics_2.21.2.md | 16 +- ...collect_alignment_summary_metrics_2.8.1.md | 22 +-- .../picard_collectmultiplemetric_2.21.2.md | 16 +- .../picard_collectmultiplemetric_2.8.1.md | 22 +-- .../picard_fix_mate_information_1.96.md | 29 ++-- .../picard_fix_mate_information_2.21.2.md | 20 +-- .../picard_fix_mate_information_4.1.8.1.md | 20 +-- .../picard-tools/picard_hsmetrics_2.21.2.md | 17 +-- docs/picard-tools/picard_hsmetrics_2.8.1.md | 26 ++++ .../picard_mark_duplicates_1.96.md | 20 +++ .../picard_mark_duplicates_2.21.2.md | 16 +- .../picard_mark_duplicates_2.8.1.md | 20 +++ .../picard_mark_duplicates_4.1.8.1.md | 16 +- docs/trim-galore/README.md | 2 + .../trim-galore/trim_galore_0.6.2.md | 29 ++-- docs/ubuntu-utilites/README.md | 2 + .../ubuntu-utilites/utilities_ubuntu_18.04.md | 26 ++++ docs/waltz/README.md | 2 + .../waltz/waltz_count_reads_3.1.1.md | 22 +-- .../waltz/waltz_pileupmatrices_3.1.1.md | 22 +-- gatk_ApplyBQSR_4.1.2.0/README.md | 128 ---------------- gatk_BaseRecalibrator_4.1.2.0/README.md | 142 ------------------ gatk_apply_bqsr_4.1.8.1/README.md | 128 ---------------- gatk_base_recalibrator_4.1.8.1/README.md | 142 ------------------ .../README.md | 19 --- .../README.md | 19 --- marianas_process_loop_umi_1.8.1/README.md | 19 --- marianas_separate_bams_1.8.1/README.md | 33 ---- picard_hsmetrics_2.8.1/README.md | 26 ---- picard_mark_duplicates_1.96/README.md | 20 --- picard_mark_duplicates_2.8.1/README.md | 20 --- utilities_ubuntu_18.04/README.md | 25 --- 75 files changed, 882 insertions(+), 1240 deletions(-) delete mode 100644 abra2_2.22/README.md delete mode 100644 bedtools_genomecov_v2.28.0_cv2/README.md delete mode 100644 bedtools_merge_v2.28.0_cv2/README.md delete mode 100644 disambiguate_1.0.0/README.md create mode 100644 docs/abra2/README.md rename abra2_2.17/README.md => docs/abra2/abra2_2.17.md (73%) create mode 100644 docs/abra2/abra2_2.22.md create mode 100644 docs/bedtools/README.md create mode 100644 docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md create mode 100644 docs/bedtools/bedtools_merge_v2.28.0_cv2.md create mode 100644 docs/disambiguate/README.md create mode 100644 docs/disambiguate/disambiguate_1.0.0.md create mode 100644 docs/fgbio/README.md rename fgbio_call_duplex_consensus_reads_1.2.0/README.md => docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md (90%) rename fgbio_collect_duplex_seq_metrics_1.2.0/README.md => docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md (84%) rename fgbio_fastq_to_bam_1.2.0/README.md => docs/fgbio/fgbio_fastq_to_bam_1.2.0.md (90%) rename fgbio_filter_consensus_reads_1.2.0/README.md => docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md (90%) rename fgbio_group_reads_by_umi_1.2.0/README.md => docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md (87%) rename fgbio_postprocessing_simplex_filter_0.1.8/README.md => docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md (70%) create mode 100644 docs/gatk/README.md create mode 100644 docs/gatk/gatk_apply_bqsr_4.1.8.1.md create mode 100644 docs/gatk/gatk_applybqsr_4.1.2.0.md create mode 100644 docs/gatk/gatk_base_recalibrator_4.1.8.1.md create mode 100644 docs/gatk/gatk_baserecalibrator_4.1.2.0.md rename gatk_merge_bam_alignment_4.1.8.0/README.md => docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md (98%) rename gatk_merge_sam_files_4.1.8.0/README.md => docs/gatk/gatk_merge_sam_files_4.1.8.0.md (94%) create mode 100644 docs/gatk/samtofastq-v4.1.8.0.md create mode 100644 docs/manta/README.md rename manta_1.5.1/README.md => docs/manta/manta_1.5.1.md (61%) create mode 100644 docs/marianas/README.md create mode 100644 docs/marianas/marianas_collapsing_first_pass_1.8.1.md create mode 100644 docs/marianas/marianas_collapsing_second_pass_1.8.1.md create mode 100644 docs/marianas/marianas_process_loop_umi_1.8.1.md create mode 100644 docs/marianas/marianas_separate_bams_1.8.1.md create mode 100644 docs/merge-fastq/README.md rename merge_fastq_0.1.7/README.md => docs/merge-fastq/merge_fastq_0.1.7.md (63%) create mode 100644 docs/mutect/README.md rename mutect_1.1.5/README.md => docs/mutect/mutect_1.1.5.md (95%) create mode 100644 docs/picard-tools/README.md rename picard_add_or_replace_read_groups_1.96/README.md => docs/picard-tools/picard_add_or_replace_read_groups_1.96.md (78%) rename picard_add_or_replace_read_groups_2.21.2/README.md => docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md (87%) rename picard_add_or_replace_read_groups_4.1.8.1/README.md => docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md (89%) rename picard_collect_alignment_summary_metrics_2.21.2/README.md => docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md (89%) rename picard_collect_alignment_summary_metrics_2.8.1/README.md => docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md (85%) rename picard_collectmultiplemetric_2.21.2/README.md => docs/picard-tools/picard_collectmultiplemetric_2.21.2.md (89%) rename picard_collectmultiplemetric_2.8.1/README.md => docs/picard-tools/picard_collectmultiplemetric_2.8.1.md (85%) rename picard_fix_mate_information_1.96/README.md => docs/picard-tools/picard_fix_mate_information_1.96.md (71%) rename picard_fix_mate_information_2.21.2/README.md => docs/picard-tools/picard_fix_mate_information_2.21.2.md (83%) rename picard_fix_mate_information_4.1.8.1/README.md => docs/picard-tools/picard_fix_mate_information_4.1.8.1.md (85%) rename picard_hsmetrics_2.21.2/README.md => docs/picard-tools/picard_hsmetrics_2.21.2.md (90%) create mode 100644 docs/picard-tools/picard_hsmetrics_2.8.1.md create mode 100644 docs/picard-tools/picard_mark_duplicates_1.96.md rename picard_mark_duplicates_2.21.2/README.md => docs/picard-tools/picard_mark_duplicates_2.21.2.md (89%) create mode 100644 docs/picard-tools/picard_mark_duplicates_2.8.1.md rename picard_mark_duplicates_4.1.8.1/README.md => docs/picard-tools/picard_mark_duplicates_4.1.8.1.md (95%) create mode 100644 docs/trim-galore/README.md rename trim_galore_0.6.2/README.md => docs/trim-galore/trim_galore_0.6.2.md (83%) create mode 100644 docs/ubuntu-utilites/README.md create mode 100644 docs/ubuntu-utilites/utilities_ubuntu_18.04.md create mode 100644 docs/waltz/README.md rename waltz_count_reads_3.1.1/README.md => docs/waltz/waltz_count_reads_3.1.1.md (70%) rename waltz_pileupmatrices_3.1.1/README.md => docs/waltz/waltz_pileupmatrices_3.1.1.md (70%) delete mode 100644 gatk_ApplyBQSR_4.1.2.0/README.md delete mode 100644 gatk_BaseRecalibrator_4.1.2.0/README.md delete mode 100644 gatk_apply_bqsr_4.1.8.1/README.md delete mode 100644 gatk_base_recalibrator_4.1.8.1/README.md delete mode 100644 marianas_collapsing_first_pass_1.8.1/README.md delete mode 100644 marianas_collapsing_second_pass_1.8.1/README.md delete mode 100644 marianas_process_loop_umi_1.8.1/README.md delete mode 100644 marianas_separate_bams_1.8.1/README.md delete mode 100644 picard_hsmetrics_2.8.1/README.md delete mode 100644 picard_mark_duplicates_1.96/README.md delete mode 100644 picard_mark_duplicates_2.8.1/README.md delete mode 100644 utilities_ubuntu_18.04/README.md diff --git a/abra2_2.22/README.md b/abra2_2.22/README.md deleted file mode 100644 index 6e518dd8..00000000 --- a/abra2_2.22/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# CWL and Dockerfile for running ABRA2 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.22 | https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar | - -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0 "Get your own license badge on microbadger.com") - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml -``` - diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md deleted file mode 100644 index 75de8a57..00000000 --- a/bedtools_genomecov_v2.28.0_cv2/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# CWL and Dockerfile for running Bedtools GenomeCov - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help - -usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT - --output_file_name OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--option_bedgraph] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT The input file can be in BAM format (Note: BAM must be - sorted by position) - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --option_bedgraph option flag parameter to choose output file format. - -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md deleted file mode 100644 index 960664db..00000000 --- a/bedtools_merge_v2.28.0_cv2/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# CWL and Dockerfile for running Bedtools Merge - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help - -usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name - OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--distance_between_features DISTANCE_BETWEEN_FEATURES] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BEDgraph format file generated from Bedtools Genomecov - module - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --distance_between_features DISTANCE_BETWEEN_FEATURES - Maximum distance between features allowed for features - to be merged. \ No newline at end of file diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md deleted file mode 100644 index b1cd50f0..00000000 --- a/disambiguate_1.0.0/README.md +++ /dev/null @@ -1,52 +0,0 @@ - # CWL and Dockerfile for running Disambiguate - -## Version of tools in docker image (/container/Dockerfile) - -Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. - -| Tool | Version | Location | Notes | -|--- |--- |--- | - | -| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| -| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | -| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | - -[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") - - -## CWL - -- CWL specification 1.0 -- Use `example_inputs.yaml` to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml -``` - -## Command -``` -USAGE: - - cwltool disambiguate_1.0.0.cwl \ - --prefix \ - --output_dir \ - [--aligner ] \ - - -Where: - - --prefix - (required) Sample ID or name used as prefix. Do not include .bam - - --output_dir - (required) Output directory - - --aligner - Aligner option {bwa(default),tophat,hisat2,star} - - - (required) Species A BAM file - - - (required) Species B BAM file -``` diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e86b3b5e..7230f8fe 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,61 +1,62 @@ # Table of contents -- [MSK-ACCESS command-line tools](README.md) - - ABRA2 - - [v2.17](../abra2_2.17/README.md) - - [v2.22](../abra2_2.22/README.md) - - Bedtools - - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) - - Disambiguate - - [v1.0.0](../disambiguate_1.0.0/README.md) - - Fgbio - - [CallDuplexConsensusReads v1.2.0](../fgbio_call_duplex_consensus_reads_1.2.0/README.md) - - [CollectDuplexSeqMetrics v1.2.0](../fgbio_collect_duplex_seq_metrics_1.2.0/README.md) - - [GroupReadsByUmi v1.2.0](../fgbio_group_reads_by_umi_1.2.0/README.md) - - [FastqToBam v1.2.0](../fgbio_fastq_to_bam_1.2.0/README.md) - - [FilterConsensusReads v1.2.0](../fgbio_filter_consensus_reads_1.2.0/README.md) - - [simplex_filter v0.1.8](../fgbio_postprocessing_simplex_filter_0.1.8/README.md) - - GATK - - [ApplyBQSR v4.1.2.0](../gatk_ApplyBQSR_4.1.2.0/README.md) - - [ApplyBQSR v4.1.8.1](../gatk_apply_bqsr_4.1.8.1/README.md) - - [BaseRecalibrator v4.1.2.0](../gatk_BaseRecalibrator_4.1.2.0/README.md) - - [BaseRecalibrator v4.1.8.1](../gatk_base_recalibrator_4.1.8.1/README.md) - - [MergeBamAlignment v4.1.8.0](../gatk_merge_bam_alignment_4.1.8.0/README.md) - - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) - - Manta - - [Manta v1.5.1](../manta_1.5.1/README.md) - - Marianas - - [Collapsing First Pass v1.8.1](../marianas_collapsing_first_pass_1.8.1/README.md) - - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) - - MuTect - - [MuTect 1.1.5](../mutect_1.1.5/README.md) - - Merge Fastq - - [v0.1.7](../merge_fastq_0.1.7/README.md) - - Picard Tools - - [AddOrReplaceReadGroups v1.96](../picard_add_or_replace_read_groups_1.96/README.md) - - [AddOrReplaceReadGroups v2.21.2](../picard_add_or_replace_read_groups_2.21.2/README.md) - - [AddOrReplaceReadGroups v4.1.8.1](../picard_add_or_replace_read_groups_4.1.8.1/README.md) - - [CollectAlignmentSummaryMetrics v2.8.1](../picard_collect_alignment_summary_metrics_2.8.1/README.md) - - [CollectAlignmentSummaryMetrics v2.21.2](../picard_collect_alignment_summary_metrics_2.21.2/README.md) - - [CollectMultipleMetrics v2.8.1](../picard_collectmultiplemetric_2.8.1/README.md) - - [CollectMultipleMetrics v2.21.2](../picard_collectmultiplemetric_2.21.2/README.md) - - [FixMateInformation v1.96](../picard_fix_mate_information_1.96/README.md) - - [FixMateInformation v2.21.2](../picard_fix_mate_information_2.21.2/README.md) - - [FixMateInformation v4.1.8.1](../picard_fix_mate_information_4.1.8.1/README.md) - - [HSmetrics v2.8.1](../picard_hsmetrics_2.8.1/README.md) - - [HSmetrics v2.21.2](../picard_hsmetrics_2.21.2/README.md) - - [MarkDuplicates v1.96](../picard_mark_duplicates_1.96/README.md) - - [MarkDuplicates v2.8.1](../picard_mark_duplicates_2.8.1/README.md) - - [MarkDuplicates v2.21.2](../picard_mark_duplicates_2.21.2/README.md) - - [MarkDuplicates v4.1.8.1](../picard_mark_duplicates_4.1.8.1/README.md) - - Trim Galore - - [v0.6.2](../trim_galore_0.6.2/README.md) - - Ubuntu utilites - - [v18.04](../utilities_ubuntu_18.04/README.md) - - Waltz - - [CountReads v3.1.1](../waltz_count_reads_3.1.1/README.md) - - [PileupMetrics v3.1.1](../waltz_pileupmatrices_3.1.1/README.md) +* [MSK-ACCESS command-line tools](README.md) +* [ABRA2](abra2/README.md) + * [v2.17](abra2/abra2_2.17.md) + * [v2.22](abra2/abra2_2.22.md) +* [Bedtools](bedtools/README.md) + * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md) + * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md) +* [Disambiguate](disambiguate/README.md) + * [v1.0.0](disambiguate/disambiguate_1.0.0.md) +* [Fgbio](fgbio/README.md) + * [CallDuplexConsensusReads v1.2.0](fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md) + * [CollectDuplexSeqMetrics v1.2.0](fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md) + * [GroupReadsByUmi v1.2.0](fgbio/fgbio_group_reads_by_umi_1.2.0.md) + * [FastqToBam v1.2.0](fgbio/fgbio_fastq_to_bam_1.2.0.md) + * [FilterConsensusReads v1.2.0](fgbio/fgbio_filter_consensus_reads_1.2.0.md) + * [simplex\_filter v0.1.8](fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md) +* [GATK](gatk/README.md) + * [ApplyBQSR v4.1.2.0](gatk/gatk_applybqsr_4.1.2.0.md) + * [ApplyBQSR v4.1.8.1](gatk/gatk_apply_bqsr_4.1.8.1.md) + * [BaseRecalibrator v4.1.2.0](gatk/gatk_baserecalibrator_4.1.2.0.md) + * [BaseRecalibrator v4.1.8.1](gatk/gatk_base_recalibrator_4.1.8.1.md) + * [MergeBamAlignment v4.1.8.0](gatk/gatk_merge_bam_alignment_4.1.8.0.md) + * [MergeSamFiles v4.1.8.0](gatk/gatk_merge_sam_files_4.1.8.0.md) + * [SamToFastq v4.1.8.0](gatk/samtofastq-v4.1.8.0.md) +* [Manta](manta/README.md) + * [Manta v1.5.1](manta/manta_1.5.1.md) +* [Marianas](marianas/README.md) + * [Collapsing First Pass v1.8.1](marianas/marianas_collapsing_first_pass_1.8.1.md) + * [Collapsing Second Pass v1.8.1](marianas/marianas_collapsing_second_pass_1.8.1.md) + * [Process Loop UMI v1.8.1](marianas/marianas_process_loop_umi_1.8.1.md) + * [Seprate BAMs v1.8.1](marianas/marianas_separate_bams_1.8.1.md) +* [MuTect](mutect/README.md) + * [MuTect 1.1.5](mutect/mutect_1.1.5.md) +* [Merge Fastq](merge-fastq/README.md) + * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md) +* [Picard Tools](picard-tools/README.md) + * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md) + * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md) + * [AddOrReplaceReadGroups v4.1.8.1](picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md) + * [CollectAlignmentSummaryMetrics v2.8.1](picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md) + * [CollectAlignmentSummaryMetrics v2.21.2](picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md) + * [CollectMultipleMetrics v2.8.1](picard-tools/picard_collectmultiplemetric_2.8.1.md) + * [CollectMultipleMetrics v2.21.2](picard-tools/picard_collectmultiplemetric_2.21.2.md) + * [FixMateInformation v1.96](picard-tools/picard_fix_mate_information_1.96.md) + * [FixMateInformation v2.21.2](picard-tools/picard_fix_mate_information_2.21.2.md) + * [FixMateInformation v4.1.8.1](picard-tools/picard_fix_mate_information_4.1.8.1.md) + * [HSmetrics v2.8.1](picard-tools/picard_hsmetrics_2.8.1.md) + * [HSmetrics v2.21.2](picard-tools/picard_hsmetrics_2.21.2.md) + * [MarkDuplicates v1.96](picard-tools/picard_mark_duplicates_1.96.md) + * [MarkDuplicates v2.8.1](picard-tools/picard_mark_duplicates_2.8.1.md) + * [MarkDuplicates v2.21.2](picard-tools/picard_mark_duplicates_2.21.2.md) + * [MarkDuplicates v4.1.8.1](picard-tools/picard_mark_duplicates_4.1.8.1.md) +* [Trim Galore](trim-galore/README.md) + * [v0.6.2](trim-galore/trim_galore_0.6.2.md) +* [Ubuntu utilites](ubuntu-utilites/README.md) + * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) +* [Waltz](waltz/README.md) + * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) + * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) + diff --git a/docs/abra2/README.md b/docs/abra2/README.md new file mode 100644 index 00000000..5cd595c7 --- /dev/null +++ b/docs/abra2/README.md @@ -0,0 +1,2 @@ +# ABRA2 + diff --git a/abra2_2.17/README.md b/docs/abra2/abra2_2.17.md similarity index 73% rename from abra2_2.17/README.md rename to docs/abra2/abra2_2.17.md index 89818e32..e2f9457b 100644 --- a/abra2_2.17/README.md +++ b/docs/abra2/abra2_2.17.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running ABRA2 +# v2.17 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) | -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command:** ```bash #Using CWLTOOL @@ -30,9 +30,9 @@ > toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & ``` -### Usage +### Usage -``` +```text usage: abra2_2.17.cwl [-h] positional arguments: @@ -85,4 +85,5 @@ optional arguments: VCF containing known (or suspected) variant sites. Very large files should be avoided. --no_sort Do not attempt to sort final output - ``` \ No newline at end of file +``` + diff --git a/docs/abra2/abra2_2.22.md b/docs/abra2/abra2_2.22.md new file mode 100644 index 00000000..c23aaada --- /dev/null +++ b/docs/abra2/abra2_2.22.md @@ -0,0 +1,21 @@ +# v2.22 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.22 | [https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar](https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar) | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml +``` + diff --git a/docs/bedtools/README.md b/docs/bedtools/README.md new file mode 100644 index 00000000..13a0e51c --- /dev/null +++ b/docs/bedtools/README.md @@ -0,0 +1,2 @@ +# Bedtools + diff --git a/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md new file mode 100644 index 00000000..da41c5b2 --- /dev/null +++ b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# genomecov v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format + diff --git a/docs/bedtools/bedtools_merge_v2.28.0_cv2.md b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md new file mode 100644 index 00000000..507a8994 --- /dev/null +++ b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# merge v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged. + diff --git a/docs/disambiguate/README.md b/docs/disambiguate/README.md new file mode 100644 index 00000000..4850c2fd --- /dev/null +++ b/docs/disambiguate/README.md @@ -0,0 +1,2 @@ +# Disambiguate + diff --git a/docs/disambiguate/disambiguate_1.0.0.md b/docs/disambiguate/disambiguate_1.0.0.md new file mode 100644 index 00000000..8e065892 --- /dev/null +++ b/docs/disambiguate/disambiguate_1.0.0.md @@ -0,0 +1,53 @@ +# v1.0.0 + +## Version of tools in docker image \(/container/Dockerfile\) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +| :--- | :--- | :--- | :--- | +| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub | +| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - | +| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) + +## CWL + +* CWL specification 1.0 +* Use `example_inputs.yaml` to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command + +```text +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` + diff --git a/docs/fgbio/README.md b/docs/fgbio/README.md new file mode 100644 index 00000000..f1d1ca8d --- /dev/null +++ b/docs/fgbio/README.md @@ -0,0 +1,2 @@ +# Fgbio + diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/README.md b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md similarity index 90% rename from fgbio_call_duplex_consensus_reads_1.2.0/README.md rename to docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md index e7a9060e..c8d3d73e 100644 --- a/fgbio_call_duplex_consensus_reads_1.2.0/README.md +++ b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md @@ -1,16 +1,16 @@ -# CWL for running Fgbio - CallDuplexConsensusReads +# CallDuplexConsensusReads v1.2.0 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml @@ -19,7 +19,6 @@ ## Usage ```bash - usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] @@ -77,3 +76,4 @@ optional arguments: are present in a tag family, the family is randomly downsampled to exactly max-reads reads. ``` + diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/README.md b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md similarity index 84% rename from fgbio_collect_duplex_seq_metrics_1.2.0/README.md rename to docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md index 20371c1a..78812a55 100644 --- a/fgbio_collect_duplex_seq_metrics_1.2.0/README.md +++ b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md @@ -1,16 +1,16 @@ -# CWL for running Fgbio - CollectDuplexSeqMetrics +# CollectDuplexSeqMetrics v1.2.0 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml @@ -59,3 +59,4 @@ optional arguments: --umi_tag UMI_TAG The tag containing the raw UMI. [Optional]. --mi_tag MI_TAG The output tag for UMI grouping. [Optional]. ``` + diff --git a/fgbio_fastq_to_bam_1.2.0/README.md b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md similarity index 90% rename from fgbio_fastq_to_bam_1.2.0/README.md rename to docs/fgbio/fgbio_fastq_to_bam_1.2.0.md index ccc256f1..3d4ede7f 100644 --- a/fgbio_fastq_to_bam_1.2.0/README.md +++ b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md @@ -1,16 +1,16 @@ -# CWL for running Fgbio - FastqToBam +# FastqToBam v1.2.0 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml @@ -19,7 +19,6 @@ ## Usage ```bash - usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] [--number_of_threads NUMBER_OF_THREADS] @@ -80,3 +79,4 @@ optional arguments: --run-date RUN_DATE Date the run was produced, to insert into the read group header ``` + diff --git a/fgbio_filter_consensus_reads_1.2.0/README.md b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md similarity index 90% rename from fgbio_filter_consensus_reads_1.2.0/README.md rename to docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md index 496a048b..470166a1 100644 --- a/fgbio_filter_consensus_reads_1.2.0/README.md +++ b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md @@ -1,16 +1,16 @@ -# CWL for running Fgbio - FilterConsensusReads +# FilterConsensusReads v1.2.0 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml @@ -19,7 +19,6 @@ ## Usage ```bash - usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] @@ -78,3 +77,4 @@ optional arguments: Mask (make N) consensus bases where the AB and BA consensus reads disagree (for duplex-sequencing only). ``` + diff --git a/fgbio_group_reads_by_umi_1.2.0/README.md b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md similarity index 87% rename from fgbio_group_reads_by_umi_1.2.0/README.md rename to docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md index 5c4080cb..87ad2aaa 100644 --- a/fgbio_group_reads_by_umi_1.2.0/README.md +++ b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md @@ -1,16 +1,16 @@ -# CWL for running Fgbio - GroupReadsByUmi +# GroupReadsByUmi v1.2.0 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml @@ -19,7 +19,6 @@ ## Usage ```bash - usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] [--memory_overhead MEMORY_OVERHEAD] @@ -66,3 +65,4 @@ optional arguments: with UMIs shorter than this length and allow for differing UMI lengths. ``` + diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/README.md b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md similarity index 70% rename from fgbio_postprocessing_simplex_filter_0.1.8/README.md rename to docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md index 66daa3e9..b12b00a3 100644 --- a/fgbio_postprocessing_simplex_filter_0.1.8/README.md +++ b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md @@ -1,16 +1,16 @@ -# CWL for running simplex_filter script from fgbio_postprocessing package +# simplex\_filter v0.1.8 ## Version of tools in docker image -| Tool | Version | Location | -| ----- | ------- | ------------------------------------ | -| fgbio_postprocessing | 0.1.8 | https://github.com/msk-access/fgbio_postprocessing | +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio\_postprocessing | 0.1.8 | [https://github.com/msk-access/fgbio\_postprocessing](https://github.com/msk-access/fgbio_postprocessing) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml @@ -41,3 +41,4 @@ optional arguments: --output_file_name OUTPUT_FILE_NAME Output file (bam or sam). ``` + diff --git a/docs/gatk/README.md b/docs/gatk/README.md new file mode 100644 index 00000000..ebad94ae --- /dev/null +++ b/docs/gatk/README.md @@ -0,0 +1,2 @@ +# GATK + diff --git a/docs/gatk/gatk_apply_bqsr_4.1.8.1.md b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md new file mode 100644 index 00000000..c919c1f4 --- /dev/null +++ b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_apply\_bqsr\_4.1.8.1.cwl --help + +usage: gatk\_apply\_bqsr\_4.1.8.1.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_applybqsr_4.1.2.0.md b/docs/gatk/gatk_applybqsr_4.1.2.0.md new file mode 100644 index 00000000..709855a1 --- /dev/null +++ b/docs/gatk/gatk_applybqsr_4.1.2.0.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help + +usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_base_recalibrator_4.1.8.1.md b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md new file mode 100644 index 00000000..9b90a39f --- /dev/null +++ b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_base\_recalibrator\_4.1.8.1.cwl --help + +usage: gatk\_base\_recalibrator\_4.1.8.1.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/docs/gatk/gatk_baserecalibrator_4.1.2.0.md b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md new file mode 100644 index 00000000..41f341b4 --- /dev/null +++ b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help + +usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/gatk_merge_bam_alignment_4.1.8.0/README.md b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md similarity index 98% rename from gatk_merge_bam_alignment_4.1.8.0/README.md rename to docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md index 4bd55029..8a593757 100644 --- a/gatk_merge_bam_alignment_4.1.8.0/README.md +++ b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md @@ -1,17 +1,16 @@ -# CWL for running GATK - MergeBamAlignment +# MergeBamAlignment v4.1.8.0 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml @@ -243,3 +242,4 @@ optional arguments: files created. Default value: false. Possible values: {true, false} ``` + diff --git a/gatk_merge_sam_files_4.1.8.0/README.md b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md similarity index 94% rename from gatk_merge_sam_files_4.1.8.0/README.md rename to docs/gatk/gatk_merge_sam_files_4.1.8.0.md index 92147c3f..0602f9be 100644 --- a/gatk_merge_sam_files_4.1.8.0/README.md +++ b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md @@ -1,17 +1,16 @@ -# CWL for running GATK - MergeSamFiles +# MergeSamFiles v4.1.8.0 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml @@ -96,3 +95,4 @@ optional arguments: Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} ``` + diff --git a/docs/gatk/samtofastq-v4.1.8.0.md b/docs/gatk/samtofastq-v4.1.8.0.md new file mode 100644 index 00000000..b2b44b12 --- /dev/null +++ b/docs/gatk/samtofastq-v4.1.8.0.md @@ -0,0 +1,2 @@ +# SamToFastq v4.1.8.0 + diff --git a/docs/manta/README.md b/docs/manta/README.md new file mode 100644 index 00000000..1849796d --- /dev/null +++ b/docs/manta/README.md @@ -0,0 +1,2 @@ +# Manta + diff --git a/manta_1.5.1/README.md b/docs/manta/manta_1.5.1.md similarity index 61% rename from manta_1.5.1/README.md rename to docs/manta/manta_1.5.1.md index 0e7abc83..9b285335 100644 --- a/manta_1.5.1/README.md +++ b/docs/manta/manta_1.5.1.md @@ -1,27 +1,27 @@ -# CWL and Dockerfile for running Manta +# Manta v1.5.1 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| ubuntu base image | 16.04 | - | -| manta | 1.5.1 | https://github.com/Illumina/manta/releases/download/ | -| samtools | 1.9 | https://github.com/samtools/samtools/releases/download/ | -| htslib | 1.9 | "https://github.com/samtools/htslib/releases/download/ | +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image | 16.04 | - | +| manta | 1.5.1 | [https://github.com/Illumina/manta/releases/download/](https://github.com/Illumina/manta/releases/download/) | +| samtools | 1.9 | [https://github.com/samtools/samtools/releases/download/](https://github.com/samtools/samtools/releases/download/) | +| htslib | 1.9 | "[https://github.com/samtools/htslib/releases/download/](https://github.com/samtools/htslib/releases/download/) | -[![](https://images.microbadger.com/badges/image/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own image badge on microbadger.com")[![](https://images.microbadger.com/badges/version/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2)[![](https://images.microbadger.com/badges/version/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/commit/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/license/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -67,3 +67,4 @@ optional arguments: --generateEvidenceBam Generate a bam of supporting reads for all SVs ``` + diff --git a/docs/marianas/README.md b/docs/marianas/README.md new file mode 100644 index 00000000..67939d60 --- /dev/null +++ b/docs/marianas/README.md @@ -0,0 +1,2 @@ +# Marianas + diff --git a/docs/marianas/marianas_collapsing_first_pass_1.8.1.md b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md new file mode 100644 index 00000000..f0b24afe --- /dev/null +++ b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing First Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_collapsing_second_pass_1.8.1.md b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md new file mode 100644 index 00000000..7117bec5 --- /dev/null +++ b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing Second Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` + diff --git a/docs/marianas/marianas_process_loop_umi_1.8.1.md b/docs/marianas/marianas_process_loop_umi_1.8.1.md new file mode 100644 index 00000000..7c1efc78 --- /dev/null +++ b/docs/marianas/marianas_process_loop_umi_1.8.1.md @@ -0,0 +1,19 @@ +# Process Loop UMI v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_separate_bams_1.8.1.md b/docs/marianas/marianas_separate_bams_1.8.1.md new file mode 100644 index 00000000..232c89da --- /dev/null +++ b/docs/marianas/marianas_separate_bams_1.8.1.md @@ -0,0 +1,33 @@ +# Seprate BAMs v1.8.1 + +## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl + [-h] --input_bam INPUT_BAM [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input_bam INPUT_BAM +``` + diff --git a/docs/merge-fastq/README.md b/docs/merge-fastq/README.md new file mode 100644 index 00000000..19544a3d --- /dev/null +++ b/docs/merge-fastq/README.md @@ -0,0 +1,2 @@ +# Merge Fastq + diff --git a/merge_fastq_0.1.7/README.md b/docs/merge-fastq/merge_fastq_0.1.7.md similarity index 63% rename from merge_fastq_0.1.7/README.md rename to docs/merge-fastq/merge_fastq_0.1.7.md index 05ade25a..ec1c9213 100644 --- a/merge_fastq_0.1.7/README.md +++ b/docs/merge-fastq/merge_fastq_0.1.7.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running merge_fastq +# v0.1.7 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) | Tool | Version | Location | -|--- |--- |--- | -| miniconda | 3 | | -| merge_fastq | 0.1.7 | | +| :--- | :--- | :--- | +| miniconda | 3 | [https://hub.docker.com/r/continuumio/miniconda3](https://hub.docker.com/r/continuumio/miniconda3) | +| merge\_fastq | 0.1.7 | [https://pypi.org/project/merge-fastq/](https://pypi.org/project/merge-fastq/) | -[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own commit badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -63,5 +63,5 @@ optional arguments: --out_fastq2_name OUT_FASTQ2_NAME Name of the merged output READ2 fastq file(default: merged_fastq_R2.fastq.gz) - ``` + diff --git a/docs/mutect/README.md b/docs/mutect/README.md new file mode 100644 index 00000000..639d595b --- /dev/null +++ b/docs/mutect/README.md @@ -0,0 +1,2 @@ +# MuTect + diff --git a/mutect_1.1.5/README.md b/docs/mutect/mutect_1.1.5.md similarity index 95% rename from mutect_1.1.5/README.md rename to docs/mutect/mutect_1.1.5.md index cc3f748f..f5a70ddb 100644 --- a/mutect_1.1.5/README.md +++ b/docs/mutect/mutect_1.1.5.md @@ -1,23 +1,23 @@ -# CWL and Dockerfile for running Mutect v1.1.5 +# MuTect 1.1.5 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| openjdk:7 base image | 7 | - | -| muTect | 1.1.5 | https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip | +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk:7 base image | 7 | - | +| muTect | 1.1.5 | [https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip](https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -30,7 +30,7 @@ ### Usage -``` +```text usage: toil-cwl-runner mutect_1.1.5.cwl [-h] positional arguments: @@ -269,6 +269,5 @@ optional arguments: How strict should we be with validation (STRICT|LENIENT|SILENT) --vcf VCF VCF output of mutation candidates - - ``` + diff --git a/docs/picard-tools/README.md b/docs/picard-tools/README.md new file mode 100644 index 00000000..094001ac --- /dev/null +++ b/docs/picard-tools/README.md @@ -0,0 +1,2 @@ +# Picard Tools + diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md similarity index 78% rename from picard_add_or_replace_read_groups_1.96/README.md rename to docs/picard-tools/picard_add_or_replace_read_groups_1.96.md index b07355a4..e5a70249 100644 --- a/picard_add_or_replace_read_groups_1.96/README.md +++ b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups +# AddOrReplaceReadGroups v1.96 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL diff --git a/picard_add_or_replace_read_groups_2.21.2/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md similarity index 87% rename from picard_add_or_replace_read_groups_2.21.2/README.md rename to docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md index 0585eae8..70eb1246 100644 --- a/picard_add_or_replace_read_groups_2.21.2/README.md +++ b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md @@ -1,23 +1,22 @@ -# CWL for running Picard - AddOrReplaceReadGroups +# AddOrReplaceReadGroups v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL diff --git a/picard_add_or_replace_read_groups_4.1.8.1/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md similarity index 89% rename from picard_add_or_replace_read_groups_4.1.8.1/README.md rename to docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md index 59aeafbb..707df7f2 100644 --- a/picard_add_or_replace_read_groups_4.1.8.1/README.md +++ b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md @@ -1,23 +1,22 @@ -# CWL for running Picard - AddOrReplaceReadGroups +# AddOrReplaceReadGroups v4.1.8.1 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL diff --git a/picard_collect_alignment_summary_metrics_2.21.2/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md similarity index 89% rename from picard_collect_alignment_summary_metrics_2.21.2/README.md rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md index ecee2769..b88b626f 100644 --- a/picard_collect_alignment_summary_metrics_2.21.2/README.md +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md @@ -1,17 +1,16 @@ -# CWL for running Picard - CollectAlignmentSummaryMetrics +# CollectAlignmentSummaryMetrics v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml @@ -76,3 +75,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md similarity index 85% rename from picard_collect_alignment_summary_metrics_2.8.1/README.md rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md index 16d0b11b..1a318d60 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/README.md +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md @@ -1,19 +1,18 @@ -# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics +# CollectAlignmentSummaryMetrics v2.8.1 -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml @@ -78,3 +77,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/picard_collectmultiplemetric_2.21.2/README.md b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md similarity index 89% rename from picard_collectmultiplemetric_2.21.2/README.md rename to docs/picard-tools/picard_collectmultiplemetric_2.21.2.md index c8b92500..14b0eaee 100644 --- a/picard_collectmultiplemetric_2.21.2/README.md +++ b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md @@ -1,17 +1,16 @@ -# CWL for running Picard - CollectMultipleMetrics +# CollectMultipleMetrics v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml @@ -76,3 +75,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/picard_collectmultiplemetric_2.8.1/README.md b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md similarity index 85% rename from picard_collectmultiplemetric_2.8.1/README.md rename to docs/picard-tools/picard_collectmultiplemetric_2.8.1.md index cef53fac..259b84d5 100644 --- a/picard_collectmultiplemetric_2.8.1/README.md +++ b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md @@ -1,19 +1,18 @@ -# CWL and Dockerfile for running Picard - CollectMultipleMetrics +# CollectMultipleMetrics v2.8.1 -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml @@ -78,3 +77,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/picard_fix_mate_information_1.96/README.md b/docs/picard-tools/picard_fix_mate_information_1.96.md similarity index 71% rename from picard_fix_mate_information_1.96/README.md rename to docs/picard-tools/picard_fix_mate_information_1.96.md index 567a78e3..22a9cd50 100644 --- a/picard_fix_mate_information_1.96/README.md +++ b/docs/picard-tools/picard_fix_mate_information_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - FixMateInformation +# FixMateInformation v1.96 -## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) +## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -33,7 +33,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_1.96.cwl [-h] positional arguments: @@ -72,4 +72,5 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} -``` \ No newline at end of file +``` + diff --git a/picard_fix_mate_information_2.21.2/README.md b/docs/picard-tools/picard_fix_mate_information_2.21.2.md similarity index 83% rename from picard_fix_mate_information_2.21.2/README.md rename to docs/picard-tools/picard_fix_mate_information_2.21.2.md index 8a4ca7ab..659bf4b1 100644 --- a/picard_fix_mate_information_2.21.2/README.md +++ b/docs/picard-tools/picard_fix_mate_information_2.21.2.md @@ -1,23 +1,22 @@ -# CWL for running Picard - FixMateInformation +# FixMateInformation v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -30,7 +29,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_2.21.2.cwl [-h] positional arguments: @@ -70,3 +69,4 @@ optional arguments: option can be set to 'null' to clear the default value. Possible values:{true, false} ``` + diff --git a/picard_fix_mate_information_4.1.8.1/README.md b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md similarity index 85% rename from picard_fix_mate_information_4.1.8.1/README.md rename to docs/picard-tools/picard_fix_mate_information_4.1.8.1.md index da45f413..7039d4d4 100644 --- a/picard_fix_mate_information_4.1.8.1/README.md +++ b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md @@ -1,23 +1,22 @@ -# CWL for running Picard - FixMateInformation +# FixMateInformation v4.1.8.1 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -30,7 +29,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_4.1.8.1.cwl positional arguments: @@ -75,3 +74,4 @@ optional arguments: option can be set to 'null' to clear the default value. Possible values:{true, false} ``` + diff --git a/picard_hsmetrics_2.21.2/README.md b/docs/picard-tools/picard_hsmetrics_2.21.2.md similarity index 90% rename from picard_hsmetrics_2.21.2/README.md rename to docs/picard-tools/picard_hsmetrics_2.21.2.md index 9afa976b..8c43151b 100644 --- a/picard_hsmetrics_2.21.2/README.md +++ b/docs/picard-tools/picard_hsmetrics_2.21.2.md @@ -1,17 +1,16 @@ -# CWL for running Picard - CollectAlignmentSummaryMetrics +# HSmetrics v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml @@ -83,5 +82,5 @@ optional arguments: sampling. Default is 10000. Default value: 10000. This option can be set to 'null' to clear the default value. - ``` + diff --git a/docs/picard-tools/picard_hsmetrics_2.8.1.md b/docs/picard-tools/picard_hsmetrics_2.8.1.md new file mode 100644 index 00000000..627d3286 --- /dev/null +++ b/docs/picard-tools/picard_hsmetrics_2.8.1.md @@ -0,0 +1,26 @@ +# HSmetrics v2.8.1 + +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.8.1.cwl [-h] +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_1.96.md b/docs/picard-tools/picard_mark_duplicates_1.96.md new file mode 100644 index 00000000..dd13d5ed --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_1.96.md @@ -0,0 +1,20 @@ +# MarkDuplicates v1.96 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml +``` + diff --git a/picard_mark_duplicates_2.21.2/README.md b/docs/picard-tools/picard_mark_duplicates_2.21.2.md similarity index 89% rename from picard_mark_duplicates_2.21.2/README.md rename to docs/picard-tools/picard_mark_duplicates_2.21.2.md index 69f2f101..f978a4ba 100644 --- a/picard_mark_duplicates_2.21.2/README.md +++ b/docs/picard-tools/picard_mark_duplicates_2.21.2.md @@ -1,17 +1,16 @@ -# CWL for running Picard - MarkDuplicates +# MarkDuplicates v2.21.2 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| picard | 2.21.2 | https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml @@ -75,3 +74,4 @@ optional arguments: best. Default value: 100. This option can be set to 'null' to clear the default value. ``` + diff --git a/docs/picard-tools/picard_mark_duplicates_2.8.1.md b/docs/picard-tools/picard_mark_duplicates_2.8.1.md new file mode 100644 index 00000000..cfb0fc92 --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_2.8.1.md @@ -0,0 +1,20 @@ +# MarkDuplicates v2.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml +``` + diff --git a/picard_mark_duplicates_4.1.8.1/README.md b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md similarity index 95% rename from picard_mark_duplicates_4.1.8.1/README.md rename to docs/picard-tools/picard_mark_duplicates_4.1.8.1.md index 00079d8a..fe6c11f1 100644 --- a/picard_mark_duplicates_4.1.8.1/README.md +++ b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md @@ -1,17 +1,16 @@ -# CWL for running Picard - MarkDuplicates +# MarkDuplicates v4.1.8.1 ## Version of tools in docker image -| Tool | Version | Location | -|--- |--- |--- | -| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | - +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml @@ -113,3 +112,4 @@ optional arguments: best. Default value: 100. This option can be set to 'null' to clear the default value. ``` + diff --git a/docs/trim-galore/README.md b/docs/trim-galore/README.md new file mode 100644 index 00000000..2f5f0270 --- /dev/null +++ b/docs/trim-galore/README.md @@ -0,0 +1,2 @@ +# Trim Galore + diff --git a/trim_galore_0.6.2/README.md b/docs/trim-galore/trim_galore_0.6.2.md similarity index 83% rename from trim_galore_0.6.2/README.md rename to docs/trim-galore/trim_galore_0.6.2.md index 3727924e..4ef02a4d 100644 --- a/trim_galore_0.6.2/README.md +++ b/docs/trim-galore/trim_galore_0.6.2.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running Trim Galore +# v0.6.2 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | -| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ | -| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip | -| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz | +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | +| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) | +| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) | +| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -32,7 +32,7 @@ ### Usage -``` +```text usage: trim_galore_0.6.2.cwl [-h] positional arguments: @@ -92,4 +92,5 @@ optional arguments: --error_rate ERROR_RATE Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: 0.1) -``` \ No newline at end of file +``` + diff --git a/docs/ubuntu-utilites/README.md b/docs/ubuntu-utilites/README.md new file mode 100644 index 00000000..5d69693e --- /dev/null +++ b/docs/ubuntu-utilites/README.md @@ -0,0 +1,2 @@ +# Ubuntu utilites + diff --git a/docs/ubuntu-utilites/utilities_ubuntu_18.04.md b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md new file mode 100644 index 00000000..ba695d67 --- /dev/null +++ b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md @@ -0,0 +1,26 @@ +# v18.04 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | + +## Available tools + +| Tool | Description | +| :--- | :--- | +| sort.cwl | sort lines of text files | +| gzip.cwl | compress or expand files | +| mv.cwl | move \(rename\) files | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs\_toolname.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml +``` + diff --git a/docs/waltz/README.md b/docs/waltz/README.md new file mode 100644 index 00000000..0d43eaff --- /dev/null +++ b/docs/waltz/README.md @@ -0,0 +1,2 @@ +# Waltz + diff --git a/waltz_count_reads_3.1.1/README.md b/docs/waltz/waltz_count_reads_3.1.1.md similarity index 70% rename from waltz_count_reads_3.1.1/README.md rename to docs/waltz/waltz_count_reads_3.1.1.md index 8ad83443..e3131e9f 100644 --- a/waltz_count_reads_3.1.1/README.md +++ b/docs/waltz/waltz_count_reads_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - Count Reads +# CountReads v3.1.1 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -55,3 +56,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/waltz_pileupmatrices_3.1.1/README.md b/docs/waltz/waltz_pileupmatrices_3.1.1.md similarity index 70% rename from waltz_pileupmatrices_3.1.1/README.md rename to docs/waltz/waltz_pileupmatrices_3.1.1.md index b5aed666..cc432392 100644 --- a/waltz_pileupmatrices_3.1.1/README.md +++ b/docs/waltz/waltz_pileupmatrices_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - PileupMetrics +# PileupMetrics v3.1.1 -## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) +## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -56,3 +57,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md deleted file mode 100644 index 6dd376f7..00000000 --- a/gatk_ApplyBQSR_4.1.2.0/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help - -usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE - [--create_output_bam_index] - --bqsr_recal_file BQSR_RECAL_FILE --input - INPUT [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--emit_original_quals] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--global_qscore_prior GLOBAL_QSCORE_PRIOR] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantize_quals QUANTIZE_QUALS] [--quiet] - [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--read_validation_stringency READ_VALIDATION_STRINGENCY] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_jdk_deflater] [--use_jdk_inflater] - [--use_original_qualities] - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --reference REFERENCE - Reference sequence - --create_output_bam_index - --bqsr_recal_file BQSR_RECAL_FILE - Input recalibration table for BQSR. Only run ApplyBQSR - with the covariates table created from the input BAM - --input INPUT A BAM file containing input read data - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --emit_original_quals - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --global_qscore_prior GLOBAL_QSCORE_PRIOR - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantize_quals QUANTIZE_QUALS - --quiet - --read_filter READ_FILTER - --read_index READ_INDEX - --read_validation_stringency READ_VALIDATION_STRINGENCY - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_jdk_deflater - --use_jdk_inflater - --use_original_qualities - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md deleted file mode 100644 index 005acb15..00000000 --- a/gatk_BaseRecalibrator_4.1.2.0/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help - -usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 - KNOWN_SITES_1 --reference REFERENCE - [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--binary_tag_name BINARY_TAG_NAME] - [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] - [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_index] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--default_base_qualities DEFAULT_BASE_QUALITIES] - [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--indels_context_size INDELS_CONTEXT_SIZE] - [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--low_quality_tail LOW_QUALITY_TAIL] - [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] - [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] - [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantizing_levels QUANTIZING_LEVELS] - [--QUIET] [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_original_qualities] - [--number_of_threads NUMBER_OF_THREADS] - [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--known_sites_2 KNOWN_SITES_2] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BAM/SAM file containing reads - --known_sites_1 KNOWN_SITES_1 - One or more databases of known polymorphic sites used - to exclude regions around known polymorphisms from - analysis - --reference REFERENCE - Reference sequence file - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --binary_tag_name BINARY_TAG_NAME - --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY - --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_index - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --default_base_qualities DEFAULT_BASE_QUALITIES - --deletions_default_quality DELETIONS_DEFAULT_QUALITY - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --indels_context_size INDELS_CONTEXT_SIZE - --insertions_default_quality INSERTIONS_DEFAULT_QUALITY - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --low_quality_tail LOW_QUALITY_TAIL - --maximum_cycle_value MAXIMUM_CYCLE_VALUE - --mismatches_context_size MISMATCHES_CONTEXT_SIZE - --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantizing_levels QUANTIZING_LEVELS - --QUIET - --read_filter READ_FILTER - --read_index READ_INDEX - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_original_qualities - --number_of_threads NUMBER_OF_THREADS - --memory_per_job MEMORY_PER_JOB - --memory_overhead MEMORY_OVERHEAD - --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_apply_bqsr_4.1.8.1/README.md b/gatk_apply_bqsr_4.1.8.1/README.md deleted file mode 100644 index e27a608e..00000000 --- a/gatk_apply_bqsr_4.1.8.1/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.8.1 | https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1 | - -[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl --help - -usage: gatk_apply_bqsr_4.1.8.1.cwl [-h] --reference REFERENCE - [--create_output_bam_index] - --bqsr_recal_file BQSR_RECAL_FILE --input - INPUT [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--emit_original_quals] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--global_qscore_prior GLOBAL_QSCORE_PRIOR] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantize_quals QUANTIZE_QUALS] [--quiet] - [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--read_validation_stringency READ_VALIDATION_STRINGENCY] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_jdk_deflater] [--use_jdk_inflater] - [--use_original_qualities] - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --reference REFERENCE - Reference sequence - --create_output_bam_index - --bqsr_recal_file BQSR_RECAL_FILE - Input recalibration table for BQSR. Only run ApplyBQSR - with the covariates table created from the input BAM - --input INPUT A BAM file containing input read data - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --emit_original_quals - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --global_qscore_prior GLOBAL_QSCORE_PRIOR - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantize_quals QUANTIZE_QUALS - --quiet - --read_filter READ_FILTER - --read_index READ_INDEX - --read_validation_stringency READ_VALIDATION_STRINGENCY - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_jdk_deflater - --use_jdk_inflater - --use_original_qualities - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS diff --git a/gatk_base_recalibrator_4.1.8.1/README.md b/gatk_base_recalibrator_4.1.8.1/README.md deleted file mode 100644 index db6542cf..00000000 --- a/gatk_base_recalibrator_4.1.8.1/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.8.1 | https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1 | - -[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl --help - -usage: gatk_base_recalibrator_4.1.8.1.cwl [-h] --input INPUT --known_sites_1 - KNOWN_SITES_1 --reference REFERENCE - [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--binary_tag_name BINARY_TAG_NAME] - [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] - [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_index] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--default_base_qualities DEFAULT_BASE_QUALITIES] - [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--indels_context_size INDELS_CONTEXT_SIZE] - [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--low_quality_tail LOW_QUALITY_TAIL] - [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] - [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] - [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantizing_levels QUANTIZING_LEVELS] - [--QUIET] [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_original_qualities] - [--number_of_threads NUMBER_OF_THREADS] - [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--known_sites_2 KNOWN_SITES_2] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BAM/SAM file containing reads - --known_sites_1 KNOWN_SITES_1 - One or more databases of known polymorphic sites used - to exclude regions around known polymorphisms from - analysis - --reference REFERENCE - Reference sequence file - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --binary_tag_name BINARY_TAG_NAME - --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY - --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_index - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --default_base_qualities DEFAULT_BASE_QUALITIES - --deletions_default_quality DELETIONS_DEFAULT_QUALITY - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --indels_context_size INDELS_CONTEXT_SIZE - --insertions_default_quality INSERTIONS_DEFAULT_QUALITY - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --low_quality_tail LOW_QUALITY_TAIL - --maximum_cycle_value MAXIMUM_CYCLE_VALUE - --mismatches_context_size MISMATCHES_CONTEXT_SIZE - --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantizing_levels QUANTIZING_LEVELS - --QUIET - --read_filter READ_FILTER - --read_index READ_INDEX - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_original_qualities - --number_of_threads NUMBER_OF_THREADS - --memory_per_job MEMORY_PER_JOB - --memory_overhead MEMORY_OVERHEAD - --known_sites_2 KNOWN_SITES_2 diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md deleted file mode 100644 index eba9e12c..00000000 --- a/marianas_collapsing_first_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md deleted file mode 100644 index b3cdf8d7..00000000 --- a/marianas_collapsing_second_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml -``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md deleted file mode 100644 index a5b4e900..00000000 --- a/marianas_process_loop_umi_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_separate_bams_1.8.1/README.md b/marianas_separate_bams_1.8.1/README.md deleted file mode 100644 index 998ce5c6..00000000 --- a/marianas_separate_bams_1.8.1/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# CWL and Dockerfile for running Marianas - SeparateBams - -## Version of tools in docker image (../marianas_process_loop_umi_1.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl - [-h] --input_bam INPUT_BAM [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input_bam INPUT_BAM -``` diff --git a/picard_hsmetrics_2.8.1/README.md b/picard_hsmetrics_2.8.1/README.md deleted file mode 100644 index d10b8ca4..00000000 --- a/picard_hsmetrics_2.8.1/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics - -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash -> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -> usage: picard_hsmetrics_2.8.1.cwl [-h] -``` diff --git a/picard_mark_duplicates_1.96/README.md b/picard_mark_duplicates_1.96/README.md deleted file mode 100644 index bb651139..00000000 --- a/picard_mark_duplicates_1.96/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml -``` diff --git a/picard_mark_duplicates_2.8.1/README.md b/picard_mark_duplicates_2.8.1/README.md deleted file mode 100644 index 173d0b7e..00000000 --- a/picard_mark_duplicates_2.8.1/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md deleted file mode 100644 index d6499376..00000000 --- a/utilities_ubuntu_18.04/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# CWL and Dockerfile for running utilities from Ubuntu 18.04 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -| ----------------- | ------- | -------- | -| Ubuntu base image | 18.04 | - | - -## Available tools - -| Tool | Description | -| -------- | ------------------------ | -| sort.cwl | sort lines of text files | -| gzip.cwl | compress or expand files | -| mv.cwl | move (rename) files | - -## CWL - -- CWL specification 1.0 -- Use example_inputs_toolname.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml -``` From ebb2717533c8e690737d45237af0ebc71847ebde Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 26 May 2021 13:18:09 -0400 Subject: [PATCH 371/476] Adding GBCMS --- getbasecountsmutlisample/1.2.5/README.md | 89 ++++++++++++ .../1.2.5/container/Dockerfile | 50 +++++++ .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 137 ++++++++++++++++++ 3 files changed, 276 insertions(+) create mode 100644 getbasecountsmutlisample/1.2.5/README.md create mode 100644 getbasecountsmutlisample/1.2.5/container/Dockerfile create mode 100644 getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl diff --git a/getbasecountsmutlisample/1.2.5/README.md b/getbasecountsmutlisample/1.2.5/README.md new file mode 100644 index 00000000..e44235f8 --- /dev/null +++ b/getbasecountsmutlisample/1.2.5/README.md @@ -0,0 +1,89 @@ +# CWL and Dockerfile for running BWA MEM + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| GetBaseCountMultiSample | 1.2.5 | https://github.com/msk-access/GetBaseCountMultiSample/releases/tag/1.2.5 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner getbasecountmultisample_1.2.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/gbcms_toil.log --jobStore /path/to/gbcms_jobStore --batchSystem lsf --workDir /path/to/gbcms_toil_log --outdir . --writeLogs /path/to/gbcms_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml > gbcms_toil.stdout 2> gbcms_toil.stderr & +``` + +### Usage + +``` +usage: getbasecountsmultisample_1.2.5.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --genotyping_bams GENOTYPING_BAMS + --genotyping_bams_ids + GENOTYPING_BAMS_IDS + --filter_duplicate FILTER_DUPLICATE + --fragment_count FRAGMENT_COUNT + --maf MAF [--maq MAQ] [--omaf] + --output OUTPUT --ref_fasta + REF_FASTA [--vcf VCF] + [--generic_counting GENERIC_COUNTING] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --genotyping_bams GENOTYPING_BAMS + Input bam file + --genotyping_bams_ids GENOTYPING_BAMS_IDS + Input bam, sample identifier to be used for "Tumor + Sample Barcode" for maf or Sample name in the header + for vcf + --filter_duplicate FILTER_DUPLICATE + Whether to filter reads that are marked as duplicate. + 0=off, 1=on. Default 1 + --fragment_count FRAGMENT_COUNT + Whether to output fragment read counts DPF/RDF/ADF. + 0=off, 1=on. Default 0 + --maf MAF Input variant file in TCGA maf format. --maf or --vcf + need to be specified at least once. But --maf and + --vcf are mutually exclusive + --maq MAQ Mapping quality threshold. Default 20 + --omaf Output the result in maf format + --output OUTPUT Filename for output of raw fillout data in MAF/VCF + format + --ref_fasta REF_FASTA + Input reference sequence file + --vcf VCF Input variant file in vcf-like format(the first 5 + columns are used). --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually + exclusive + --generic_counting GENERIC_COUNTING + se the newly implemented generic counting algorithm. + Works better for complex variants. You may get + different allele count result from the default + counting algorithm +``` diff --git a/getbasecountsmutlisample/1.2.5/container/Dockerfile b/getbasecountsmutlisample/1.2.5/container/Dockerfile new file mode 100644 index 00000000..d8c10a6a --- /dev/null +++ b/getbasecountsmutlisample/1.2.5/container/Dockerfile @@ -0,0 +1,50 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG GBCMS_VERSION=1.2.5 +ARG VCS_REF +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.revision=${VCS_REF} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.gbcms=${GBCMS_VERSION} \ + org.opencontainers.image.source="https://github.com/msk-access/GetBaseCountsMultiSample/releases/" + +LABEL org.opencontainers.image.description="This container uses Ubuntu 16.04 as the base image to build GetBaseCountsMultiSample version ${GBCMS_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget ca-certificates openssl gcc g++ make zlib1g-dev cmake libjsoncpp-dev && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget --no-check-certificate "https://github.com/msk-access/GetBaseCountsMultiSample/archive/refs/tags/${GBCMS_VERSION}.tar.gz" && \ + tar xzvf /usr/src/${GBCMS_VERSION}.tar.gz && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ + rm -r build/ && \ + mkdir build && \ + cd build/ && \ + cmake -DCMAKE_CXX_FLAGS=-std=c++03 .. && \ + make && \ + make install && \ + cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/ && \ + make && \ + cp GetBaseCountsMultiSample /usr/local/bin/ + diff --git a/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl new file mode 100644 index 00000000..0ad7c7b5 --- /dev/null +++ b/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -0,0 +1,137 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: getbasecountsmultisample_1_2_5 +baseCommand: + - GetBaseCountsMultiSample +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: genotyping_bams + type: 'File[]' + doc: Input bam file + - id: genotyping_bams_ids + type: 'string[]' + doc: >- + Input bam, sample identifier to be used for "Tumor Sample Barcode" for maf + or Sample name in the header for vcf + - id: filter_duplicate + type: int + inputBinding: + position: 0 + prefix: '--filter_duplicate' + doc: >- + Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default + 1 + - id: fragment_count + type: int + inputBinding: + position: 0 + prefix: '--fragment_count' + doc: 'Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0' + - id: maf + type: File + inputBinding: + position: 0 + prefix: '--maf' + doc: >- + Input variant file in TCGA maf format. --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually exclusive + - id: maq + type: int? + inputBinding: + position: 0 + prefix: '--maq' + doc: Mapping quality threshold. Default 20 + - id: omaf + type: boolean? + inputBinding: + position: 0 + prefix: '--omaf' + doc: Output the result in maf format + - id: output + type: string + inputBinding: + position: 0 + prefix: '--output' + doc: Filename for output of raw fillout data in MAF/VCF format + - id: ref_fasta + type: File + inputBinding: + position: 0 + prefix: '--fasta' + doc: Input reference sequence file + - id: vcf + type: File? + inputBinding: + position: 0 + prefix: '--vcf' + doc: >- + Input variant file in vcf-like format(the first 5 columns are used). --maf + or --vcf need to be specified at least once. But --maf and --vcf are + mutually exclusive + - id: generic_counting + type: File? + inputBinding: + position: 0 + prefix: '--generic_counting' + doc: >- + se the newly implemented generic counting algorithm. Works better for + complex variants. You may get different allele count result from the + default counting algorithm +outputs: + - id: fillout + type: File + outputBinding: + glob: | + $(inputs.output) +label: getbasecountsmultisample_1.2.5 +arguments: + - position: 0 + prefix: '' + shellQuote: false + valueFrom: | + ${ + return inputs.genotyping_bams_ids.map(function(b, i) { + return '--bam ' + b + ':' + inputs.genotyping_bams[i].path + }).join(' ') + } + - position: 0 + prefix: '--thread' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gbcms:1.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': GetBaseCountsMultiSample + 'doap:revision': 1.2.5 From 02bf5355d982f7eb94dd04ed17ff71e37de08071 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 27 May 2021 10:02:23 -0400 Subject: [PATCH 372/476] Adding example inputs files --- .../1.2.5/example_input.yaml | 27 +++++++++++++++++++ .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 21 ++++++++++----- 2 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 getbasecountsmutlisample/1.2.5/example_input.yaml diff --git a/getbasecountsmutlisample/1.2.5/example_input.yaml b/getbasecountsmutlisample/1.2.5/example_input.yaml new file mode 100644 index 00000000..5aa7ca49 --- /dev/null +++ b/getbasecountsmutlisample/1.2.5/example_input.yaml @@ -0,0 +1,27 @@ +filter_duplicate: 0 +fragment_count: 1 +generic_counting: true +genotyping_bams: + - class: File + path: /Users/shahr2/Documents/test_reference/bam/duplex/SeraCare_0-5.bam + - class: File + path: /Users/shahr2/Documents/test_reference/bam/SeraCare_0-5.bam +genotyping_bams_ids: + - test1 + - test2 +maf: + class: File + path: >- + /Users/shahr2/Downloads/SeraCare_0-5.F22.combined-variants.vep_keptrmv_taggedHotspots.maf +maq: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +omaf: true +output: test.maf +ref_fasta: + class: File + path: >- + /Users/shahr2/Documents/test_reference/reference/versions/hg19/Homo_sapiens_assembly19.fasta +vcf: null + diff --git a/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index 0ad7c7b5..64899c88 100644 --- a/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -81,12 +81,12 @@ inputs: or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive - id: generic_counting - type: File? + type: boolean? inputBinding: position: 0 prefix: '--generic_counting' doc: >- - se the newly implemented generic counting algorithm. Works better for + Use the newly implemented generic counting algorithm. Works better for complex variants. You may get different allele count result from the default counting algorithm outputs: @@ -101,11 +101,7 @@ arguments: prefix: '' shellQuote: false valueFrom: | - ${ - return inputs.genotyping_bams_ids.map(function(b, i) { - return '--bam ' + b + ':' + inputs.genotyping_bams[i].path - }).join(' ') - } + $('--bam_fof bam_fof.tsv') - position: 0 prefix: '--thread' valueFrom: $(runtime.cores) @@ -116,6 +112,17 @@ requirements: coresMin: 2 - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/gbcms:1.2.5' + - class: InitialWorkDirRequirement + listing: + - entryname: bam_fof.tsv + entry: |- + $( + inputs.genotyping_bams_ids.map(function(sid, i) { + return sid + "\t" + + inputs.genotyping_bams[i].path + }).join("\n") + ) + writable: false - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From ecfe634918b9df470b1c153f5d15fd4f95321080 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 27 May 2021 10:07:11 -0400 Subject: [PATCH 373/476] Adding GBCMS in docs --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2d49572a..0970bf44 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -32,6 +32,8 @@ - [MergeSamFiles v4.1.8.0](../gatk_merge_sam_files_4.1.8.0/README.md) - [RevertSam v4.1.8.0](../gatk_revert_sam/README.md) - [SamToFastq v4.1.8.0](../gatk_sam_to_fastq_4_1_8_0/README.md) + - GetBaseCountsMultiSample + - [1.2.5](../getbasecountsmultisample/1.2.5/README.md) - Manta - [Manta v1.5.1](../manta_1.5.1/README.md) - Marianas From 18d2f1ad32d1e37e7609c72d2b21e8385cc08a26 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 27 May 2021 10:10:26 -0400 Subject: [PATCH 374/476] Fix Dir Name --- .../1.2.5/README.md | 0 .../1.2.5/container/Dockerfile | 0 .../1.2.5/example_input.yaml | 0 .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {getbasecountsmutlisample => getbasecountsmultisample}/1.2.5/README.md (100%) rename {getbasecountsmutlisample => getbasecountsmultisample}/1.2.5/container/Dockerfile (100%) rename {getbasecountsmutlisample => getbasecountsmultisample}/1.2.5/example_input.yaml (100%) rename {getbasecountsmutlisample => getbasecountsmultisample}/1.2.5/getbasecountsmultisample_1.2.5.cwl (100%) diff --git a/getbasecountsmutlisample/1.2.5/README.md b/getbasecountsmultisample/1.2.5/README.md similarity index 100% rename from getbasecountsmutlisample/1.2.5/README.md rename to getbasecountsmultisample/1.2.5/README.md diff --git a/getbasecountsmutlisample/1.2.5/container/Dockerfile b/getbasecountsmultisample/1.2.5/container/Dockerfile similarity index 100% rename from getbasecountsmutlisample/1.2.5/container/Dockerfile rename to getbasecountsmultisample/1.2.5/container/Dockerfile diff --git a/getbasecountsmutlisample/1.2.5/example_input.yaml b/getbasecountsmultisample/1.2.5/example_input.yaml similarity index 100% rename from getbasecountsmutlisample/1.2.5/example_input.yaml rename to getbasecountsmultisample/1.2.5/example_input.yaml diff --git a/getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl similarity index 100% rename from getbasecountsmutlisample/1.2.5/getbasecountsmultisample_1.2.5.cwl rename to getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl From c4a2bd76c2660d74b965109a1477d31181a31429 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 27 May 2021 12:15:54 -0400 Subject: [PATCH 375/476] Update getbasecountsmultisample_1.2.5.cwl --- .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index 64899c88..f56c09e6 100644 --- a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -18,10 +18,16 @@ inputs: - id: number_of_threads type: int? - id: genotyping_bams - type: 'File[]' + type: + - File + - type: array + items: File doc: Input bam file - id: genotyping_bams_ids - type: 'string[]' + type: + - string + - type: array + items: string doc: >- Input bam, sample identifier to be used for "Tumor Sample Barcode" for maf or Sample name in the header for vcf From 9a9e782fdd32762a3b3a85d51554fcf64ff9c888 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 27 May 2021 13:04:08 -0400 Subject: [PATCH 376/476] allow gbcms to handle single File as well as File[] --- .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index f56c09e6..c0403297 100644 --- a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -122,12 +122,16 @@ requirements: listing: - entryname: bam_fof.tsv entry: |- - $( - inputs.genotyping_bams_ids.map(function(sid, i) { - return sid + "\t" + - inputs.genotyping_bams[i].path - }).join("\n") - ) + ${ + if (typeof(inputs.genotyping_bams_ids) == 'object') { + return inputs.genotyping_bams_ids.map(function(sid, i) { + return sid + "\t" + + inputs.genotyping_bams[i].path + }).join("\n") + } else { + return inputs.genotyping_bams_ids + "\t" + inputs.genotyping_bams.path + "\n" + } + } writable: false - class: InlineJavascriptRequirement 'dct:contributor': From 38b5b1b4bfe1abab971f90345aee895b21c44fc6 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 3 Jun 2021 14:11:37 -0400 Subject: [PATCH 377/476] add CWL for access_utils general_stats_parse.py --- access_utils/example_inputs.yaml | 2 ++ access_utils/general_stats_parse.cwl | 49 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 access_utils/example_inputs.yaml create mode 100644 access_utils/general_stats_parse.cwl diff --git a/access_utils/example_inputs.yaml b/access_utils/example_inputs.yaml new file mode 100644 index 00000000..27fccad3 --- /dev/null +++ b/access_utils/example_inputs.yaml @@ -0,0 +1,2 @@ +dir: /path/to/sample_info_directory +samples-json: /path/to/sample_meta.json diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl new file mode 100644 index 00000000..1d346e89 --- /dev/null +++ b/access_utils/general_stats_parse.cwl @@ -0,0 +1,49 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: general_stats_parse +label: general_stats_parse +baseCommand: general_stats_parse.py +inputs: + - id: dir + type: Directory + inputBinding: + position: 0 + prefix: '--dir' + doc: Directory containing results. + - id: samples-json + type: File + inputBinding: + position: 0 + prefix: '--samples-json' + doc: Sample JSON file. +outputs: + - id: sample_meta_tumor + type: File + outputBinding: + glob: genstats_qc_status_plasma.csv + - id: sample_meta_normal + type: File + outputBinding: + glob: genstats_qc_status_buffy.csv +arguments: [] +requirements: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/access_utils:0.1.0' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center From 4a83a0a672367cef551019635cc876ba20b1cfd5 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 3 Jun 2021 14:18:52 -0400 Subject: [PATCH 378/476] optional outputs --- access_utils/general_stats_parse.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index 1d346e89..c9e95ab8 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -22,11 +22,11 @@ inputs: doc: Sample JSON file. outputs: - id: sample_meta_tumor - type: File + type: File? outputBinding: glob: genstats_qc_status_plasma.csv - id: sample_meta_normal - type: File + type: File? outputBinding: glob: genstats_qc_status_buffy.csv arguments: [] From d745b91c66cdbc0c7c4a142e0bac0819291c32e2 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 4 Jun 2021 15:27:38 -0400 Subject: [PATCH 379/476] biometrics v0.2.11 --- biometrics_extract/{0.2.9 => 0.2.11}/biometrics_extract.cwl | 6 +++--- biometrics_extract/{0.2.9 => 0.2.11}/example_inputs.yaml | 0 .../{0.2.9 => 0.2.11}/biometrics_genotype.cwl | 6 +++--- biometrics_genotype/{0.2.9 => 0.2.11}/example_inputs.yaml | 0 biometrics_major/{0.2.9 => 0.2.11}/biometrics_major.cwl | 6 +++--- biometrics_major/{0.2.9 => 0.2.11}/example_inputs.yaml | 0 biometrics_minor/{0.2.9 => 0.2.11}/biometrics_minor.cwl | 6 +++--- biometrics_minor/{0.2.9 => 0.2.11}/example_inputs.yaml | 0 .../{0.2.9 => 0.2.11}/biometrics_sexmismatch.cwl | 6 +++--- .../{0.2.9 => 0.2.11}/example_inputs.yaml | 0 10 files changed, 15 insertions(+), 15 deletions(-) rename biometrics_extract/{0.2.9 => 0.2.11}/biometrics_extract.cwl (97%) rename biometrics_extract/{0.2.9 => 0.2.11}/example_inputs.yaml (100%) rename biometrics_genotype/{0.2.9 => 0.2.11}/biometrics_genotype.cwl (97%) rename biometrics_genotype/{0.2.9 => 0.2.11}/example_inputs.yaml (100%) rename biometrics_major/{0.2.9 => 0.2.11}/biometrics_major.cwl (96%) rename biometrics_major/{0.2.9 => 0.2.11}/example_inputs.yaml (100%) rename biometrics_minor/{0.2.9 => 0.2.11}/biometrics_minor.cwl (96%) rename biometrics_minor/{0.2.9 => 0.2.11}/example_inputs.yaml (100%) rename biometrics_sexmismatch/{0.2.9 => 0.2.11}/biometrics_sexmismatch.cwl (96%) rename biometrics_sexmismatch/{0.2.9 => 0.2.11}/example_inputs.yaml (100%) diff --git a/biometrics_extract/0.2.9/biometrics_extract.cwl b/biometrics_extract/0.2.11/biometrics_extract.cwl similarity index 97% rename from biometrics_extract/0.2.9/biometrics_extract.cwl rename to biometrics_extract/0.2.11/biometrics_extract.cwl index 152e44f3..5ced2b2f 100644 --- a/biometrics_extract/0.2.9/biometrics_extract.cwl +++ b/biometrics_extract/0.2.11/biometrics_extract.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_extract_0_2_9 +id: biometrics_extract_0_2_11 baseCommand: - biometrics - extract @@ -150,7 +150,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -169,4 +169,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.9 + 'doap:revision': 0.2.11 diff --git a/biometrics_extract/0.2.9/example_inputs.yaml b/biometrics_extract/0.2.11/example_inputs.yaml similarity index 100% rename from biometrics_extract/0.2.9/example_inputs.yaml rename to biometrics_extract/0.2.11/example_inputs.yaml diff --git a/biometrics_genotype/0.2.9/biometrics_genotype.cwl b/biometrics_genotype/0.2.11/biometrics_genotype.cwl similarity index 97% rename from biometrics_genotype/0.2.9/biometrics_genotype.cwl rename to biometrics_genotype/0.2.11/biometrics_genotype.cwl index f887634d..4ee30e30 100644 --- a/biometrics_genotype/0.2.9/biometrics_genotype.cwl +++ b/biometrics_genotype/0.2.11/biometrics_genotype.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_genotype_0_2_9 +id: biometrics_genotype_0_2_11 baseCommand: - biometrics - genotype @@ -123,7 +123,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -142,4 +142,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.9 + 'doap:revision': 0.2.11 diff --git a/biometrics_genotype/0.2.9/example_inputs.yaml b/biometrics_genotype/0.2.11/example_inputs.yaml similarity index 100% rename from biometrics_genotype/0.2.9/example_inputs.yaml rename to biometrics_genotype/0.2.11/example_inputs.yaml diff --git a/biometrics_major/0.2.9/biometrics_major.cwl b/biometrics_major/0.2.11/biometrics_major.cwl similarity index 96% rename from biometrics_major/0.2.9/biometrics_major.cwl rename to biometrics_major/0.2.11/biometrics_major.cwl index 60ed0440..06bca4e9 100644 --- a/biometrics_major/0.2.9/biometrics_major.cwl +++ b/biometrics_major/0.2.11/biometrics_major.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_major_0_2_9 +id: biometrics_major_0_2_11 baseCommand: - biometrics - major @@ -98,7 +98,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +117,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.9 + 'doap:revision': 0.2.11 diff --git a/biometrics_major/0.2.9/example_inputs.yaml b/biometrics_major/0.2.11/example_inputs.yaml similarity index 100% rename from biometrics_major/0.2.9/example_inputs.yaml rename to biometrics_major/0.2.11/example_inputs.yaml diff --git a/biometrics_minor/0.2.9/biometrics_minor.cwl b/biometrics_minor/0.2.11/biometrics_minor.cwl similarity index 96% rename from biometrics_minor/0.2.9/biometrics_minor.cwl rename to biometrics_minor/0.2.11/biometrics_minor.cwl index 8352adce..1d5e4dd4 100644 --- a/biometrics_minor/0.2.9/biometrics_minor.cwl +++ b/biometrics_minor/0.2.11/biometrics_minor.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_minor_0_2_9 +id: biometrics_minor_0_2_11 baseCommand: - biometrics - minor @@ -105,7 +105,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +124,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.9 + 'doap:revision': 0.2.11 diff --git a/biometrics_minor/0.2.9/example_inputs.yaml b/biometrics_minor/0.2.11/example_inputs.yaml similarity index 100% rename from biometrics_minor/0.2.9/example_inputs.yaml rename to biometrics_minor/0.2.11/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl similarity index 96% rename from biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl rename to biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl index 90880263..814f2550 100644 --- a/biometrics_sexmismatch/0.2.9/biometrics_sexmismatch.cwl +++ b/biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_sexmismatch_0_2_9 +id: biometrics_sexmismatch_0_2_11 baseCommand: - biometrics - sexmismatch @@ -84,7 +84,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.9' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +103,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.9 + 'doap:revision': 0.2.11 diff --git a/biometrics_sexmismatch/0.2.9/example_inputs.yaml b/biometrics_sexmismatch/0.2.11/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch/0.2.9/example_inputs.yaml rename to biometrics_sexmismatch/0.2.11/example_inputs.yaml From 75f13716f713af9dc61f43d0b145d276bb9cc5f7 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 7 Jun 2021 15:55:19 -0400 Subject: [PATCH 380/476] remove sample_type --- biometrics_extract/0.2.9/biometrics_extract.cwl | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/biometrics_extract/0.2.9/biometrics_extract.cwl b/biometrics_extract/0.2.9/biometrics_extract.cwl index 152e44f3..9c819261 100644 --- a/biometrics_extract/0.2.9/biometrics_extract.cwl +++ b/biometrics_extract/0.2.9/biometrics_extract.cwl @@ -21,16 +21,6 @@ inputs: - ^.bai doc: >- BAM file. - - id: sample_type - type: - - "null" - - type: array - items: string - inputBinding: - position: 0 - prefix: --sample-type - doc: >- - Sample types: Normal or Tumor. - id: sample_sex type: - "null" From c044ac6aaf9d2e84856d687f242f2eda24c2e042 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 7 Jun 2021 18:43:46 -0400 Subject: [PATCH 381/476] update sequence_qc to 0.2.3 --- sequence_qc/0.2.2/sequence_qc_0.2.2.cwl | 4 ++++ .../sequence_qc_0.2.3.cwl} | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) rename sequence_qc/{0.1.19/sequence_qc_0.1.19.cwl => 0.2.3/sequence_qc_0.2.3.cwl} (94%) diff --git a/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl index 68a9cec9..07d00699 100644 --- a/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl +++ b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl @@ -126,6 +126,10 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.2' - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': diff --git a/sequence_qc/0.1.19/sequence_qc_0.1.19.cwl b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl similarity index 94% rename from sequence_qc/0.1.19/sequence_qc_0.1.19.cwl rename to sequence_qc/0.2.3/sequence_qc_0.2.3.cwl index bd87744b..5f141617 100644 --- a/sequence_qc/0.1.19/sequence_qc_0.1.19.cwl +++ b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: calculate_noise_0_1_16 +id: calculate_noise_0_2_3 baseCommand: - calculate_noise inputs: @@ -117,8 +117,12 @@ requirements: ramMin: 8000 coresMin: 1 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/sequence_qc:0.1.19' + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.3' - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': @@ -136,4 +140,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': sesquence_qc - 'doap:revision': 0.1.19 + 'doap:revision': 0.2.3 From 980f65310dd6fef5b66ad37daea534583f04c872 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 7 Jun 2021 18:56:28 -0400 Subject: [PATCH 382/476] fix sequence_qc output file names --- sequence_qc/0.2.3/sequence_qc_0.2.3.cwl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl index 5f141617..7fc4edd0 100644 --- a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl +++ b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl @@ -75,35 +75,35 @@ outputs: outputBinding: glob: |- ${ - return inputs.sample_id + 'pileup.tsv' + return inputs.sample_id + '_pileup.tsv' } - id: sequence_qc_noise_positions type: File outputBinding: glob: |- ${ - return inputs.sample_id + 'noise_positions.tsv' + return inputs.sample_id + '_noise_positions.tsv' } - id: sequence_qc_noise_acgt type: File outputBinding: glob: |- ${ - return inputs.sample_id + 'noise_acgt.tsv' + return inputs.sample_id + '_noise_acgt.tsv' } - id: sequence_qc_noise_n type: File outputBinding: glob: |- ${ - return inputs.sample_id + 'noise_n.tsv' + return inputs.sample_id + '_noise_n.tsv' } - id: sequence_qc_noise_del type: File outputBinding: glob: |- ${ - return inputs.sample_id + 'noise_del.tsv' + return inputs.sample_id + '_noise_del.tsv' } - id: sequence_qc_figures type: File From 578d6ef941f2bf9385a090edef41355c84895025 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 8 Jun 2021 12:54:46 -0400 Subject: [PATCH 383/476] add output file --- sequence_qc/0.2.3/sequence_qc_0.2.3.cwl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl index 7fc4edd0..16405ef6 100644 --- a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl +++ b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl @@ -84,6 +84,13 @@ outputs: ${ return inputs.sample_id + '_noise_positions.tsv' } + - id: sequence_qc_noise_by_substitution + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_by_substitution.tsv' + } - id: sequence_qc_noise_acgt type: File outputBinding: From 0f53440afa59f0ebeed6c998bd545370b177c07c Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 8 Jun 2021 14:52:02 -0400 Subject: [PATCH 384/476] tool needs to take a list --- access_utils/general_stats_parse.cwl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index c9e95ab8..8f314955 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -6,10 +6,10 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' id: general_stats_parse label: general_stats_parse -baseCommand: general_stats_parse.py +baseCommand: general_stats_parse.py . inputs: - - id: dir - type: Directory + - id: directories + type: Directory[] inputBinding: position: 0 prefix: '--dir' @@ -33,6 +33,18 @@ arguments: [] requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.0' + - class: InitialWorkDirRequirement + listing: + - | + ${ + var directories = inputs.directories; + if ( directories != null ){ + for (var i = 0; i < directories.length; i++){ + output_array.push(directories[i]) + } + } + return output_array + } 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From 2d972c74ec91efd0f3ece7cc3986c5fb5b026b1d Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 8 Jun 2021 14:53:48 -0400 Subject: [PATCH 385/476] fix --- access_utils/general_stats_parse.cwl | 1 + 1 file changed, 1 insertion(+) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index 8f314955..c2606263 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -38,6 +38,7 @@ requirements: - | ${ var directories = inputs.directories; + var output_array = []; if ( directories != null ){ for (var i = 0; i < directories.length; i++){ output_array.push(directories[i]) From e25809235a92717df412ed7cd053524d1224dba1 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Tue, 8 Jun 2021 14:58:19 -0400 Subject: [PATCH 386/476] use . for input param --- access_utils/general_stats_parse.cwl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index c2606263..82f17063 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -6,13 +6,10 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' id: general_stats_parse label: general_stats_parse -baseCommand: general_stats_parse.py . +baseCommand: general_stats_parse.py --dir . inputs: - id: directories type: Directory[] - inputBinding: - position: 0 - prefix: '--dir' doc: Directory containing results. - id: samples-json type: File From cd79c75583e63c28d4dba17911b0f863da1f7d1e Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Tue, 8 Jun 2021 17:28:35 -0400 Subject: [PATCH 387/476] update biometrics version --- .../{0.2.11 => 0.2.12}/biometrics_extract.cwl | 10 +++++----- .../{0.2.11 => 0.2.12}/example_inputs.yaml | 0 .../{0.2.11 => 0.2.12}/biometrics_genotype.cwl | 6 +++--- .../{0.2.11 => 0.2.12}/example_inputs.yaml | 0 .../{0.2.11 => 0.2.12}/biometrics_major.cwl | 6 +++--- .../{0.2.11 => 0.2.12}/example_inputs.yaml | 0 .../{0.2.11 => 0.2.12}/biometrics_minor.cwl | 6 +++--- .../{0.2.11 => 0.2.12}/example_inputs.yaml | 0 .../{0.2.11 => 0.2.12}/biometrics_sexmismatch.cwl | 6 +++--- .../{0.2.11 => 0.2.12}/example_inputs.yaml | 0 10 files changed, 17 insertions(+), 17 deletions(-) rename biometrics_extract/{0.2.11 => 0.2.12}/biometrics_extract.cwl (94%) rename biometrics_extract/{0.2.11 => 0.2.12}/example_inputs.yaml (100%) rename biometrics_genotype/{0.2.11 => 0.2.12}/biometrics_genotype.cwl (97%) rename biometrics_genotype/{0.2.11 => 0.2.12}/example_inputs.yaml (100%) rename biometrics_major/{0.2.11 => 0.2.12}/biometrics_major.cwl (96%) rename biometrics_major/{0.2.11 => 0.2.12}/example_inputs.yaml (100%) rename biometrics_minor/{0.2.11 => 0.2.12}/biometrics_minor.cwl (96%) rename biometrics_minor/{0.2.11 => 0.2.12}/example_inputs.yaml (100%) rename biometrics_sexmismatch/{0.2.11 => 0.2.12}/biometrics_sexmismatch.cwl (96%) rename biometrics_sexmismatch/{0.2.11 => 0.2.12}/example_inputs.yaml (100%) diff --git a/biometrics_extract/0.2.11/biometrics_extract.cwl b/biometrics_extract/0.2.12/biometrics_extract.cwl similarity index 94% rename from biometrics_extract/0.2.11/biometrics_extract.cwl rename to biometrics_extract/0.2.12/biometrics_extract.cwl index de550c3f..a43fb4f8 100644 --- a/biometrics_extract/0.2.11/biometrics_extract.cwl +++ b/biometrics_extract/0.2.12/biometrics_extract.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_extract_0_2_11 +id: biometrics_extract_0_2_12 baseCommand: - biometrics - extract @@ -129,9 +129,9 @@ outputs: ${ return inputs.sample_name.map(val => { if (inputs.database) { - return inputs.database + '/' + val + '.pk'; + return inputs.database + '/' + val + '.pickle'; } else { - return val + '.pk'; + return val + '.pickle'; } }); } @@ -140,7 +140,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -159,4 +159,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.11 + 'doap:revision': 0.2.12 diff --git a/biometrics_extract/0.2.11/example_inputs.yaml b/biometrics_extract/0.2.12/example_inputs.yaml similarity index 100% rename from biometrics_extract/0.2.11/example_inputs.yaml rename to biometrics_extract/0.2.12/example_inputs.yaml diff --git a/biometrics_genotype/0.2.11/biometrics_genotype.cwl b/biometrics_genotype/0.2.12/biometrics_genotype.cwl similarity index 97% rename from biometrics_genotype/0.2.11/biometrics_genotype.cwl rename to biometrics_genotype/0.2.12/biometrics_genotype.cwl index 4ee30e30..3c74d286 100644 --- a/biometrics_genotype/0.2.11/biometrics_genotype.cwl +++ b/biometrics_genotype/0.2.12/biometrics_genotype.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_genotype_0_2_11 +id: biometrics_genotype_0_2_12 baseCommand: - biometrics - genotype @@ -123,7 +123,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -142,4 +142,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.11 + 'doap:revision': 0.2.12 diff --git a/biometrics_genotype/0.2.11/example_inputs.yaml b/biometrics_genotype/0.2.12/example_inputs.yaml similarity index 100% rename from biometrics_genotype/0.2.11/example_inputs.yaml rename to biometrics_genotype/0.2.12/example_inputs.yaml diff --git a/biometrics_major/0.2.11/biometrics_major.cwl b/biometrics_major/0.2.12/biometrics_major.cwl similarity index 96% rename from biometrics_major/0.2.11/biometrics_major.cwl rename to biometrics_major/0.2.12/biometrics_major.cwl index 06bca4e9..83a076de 100644 --- a/biometrics_major/0.2.11/biometrics_major.cwl +++ b/biometrics_major/0.2.12/biometrics_major.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_major_0_2_11 +id: biometrics_major_0_2_12 baseCommand: - biometrics - major @@ -98,7 +98,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +117,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.11 + 'doap:revision': 0.2.12 diff --git a/biometrics_major/0.2.11/example_inputs.yaml b/biometrics_major/0.2.12/example_inputs.yaml similarity index 100% rename from biometrics_major/0.2.11/example_inputs.yaml rename to biometrics_major/0.2.12/example_inputs.yaml diff --git a/biometrics_minor/0.2.11/biometrics_minor.cwl b/biometrics_minor/0.2.12/biometrics_minor.cwl similarity index 96% rename from biometrics_minor/0.2.11/biometrics_minor.cwl rename to biometrics_minor/0.2.12/biometrics_minor.cwl index 1d5e4dd4..e47bcffd 100644 --- a/biometrics_minor/0.2.11/biometrics_minor.cwl +++ b/biometrics_minor/0.2.12/biometrics_minor.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_minor_0_2_11 +id: biometrics_minor_0_2_12 baseCommand: - biometrics - minor @@ -105,7 +105,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +124,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.11 + 'doap:revision': 0.2.12 diff --git a/biometrics_minor/0.2.11/example_inputs.yaml b/biometrics_minor/0.2.12/example_inputs.yaml similarity index 100% rename from biometrics_minor/0.2.11/example_inputs.yaml rename to biometrics_minor/0.2.12/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl similarity index 96% rename from biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl rename to biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl index 814f2550..f2fdc257 100644 --- a/biometrics_sexmismatch/0.2.11/biometrics_sexmismatch.cwl +++ b/biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_sexmismatch_0_2_11 +id: biometrics_sexmismatch_0_2_12 baseCommand: - biometrics - sexmismatch @@ -84,7 +84,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.11' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +103,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.11 + 'doap:revision': 0.2.12 diff --git a/biometrics_sexmismatch/0.2.11/example_inputs.yaml b/biometrics_sexmismatch/0.2.12/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch/0.2.11/example_inputs.yaml rename to biometrics_sexmismatch/0.2.12/example_inputs.yaml From 914948fe493e3dba0841266bd39a0e91ab664824 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 9 Jun 2021 11:34:50 -0400 Subject: [PATCH 388/476] extract now expects only one sample --- .../0.2.12/biometrics_extract.cwl | 58 +++++++------------ 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/biometrics_extract/0.2.12/biometrics_extract.cwl b/biometrics_extract/0.2.12/biometrics_extract.cwl index a43fb4f8..cb9f8dcb 100644 --- a/biometrics_extract/0.2.12/biometrics_extract.cwl +++ b/biometrics_extract/0.2.12/biometrics_extract.cwl @@ -11,43 +11,33 @@ baseCommand: - extract inputs: - id: sample_bam - type: - - type: array - items: File - inputBinding: - position: 0 - prefix: --sample-bam + type: File + inputBinding: + position: 0 + prefix: --sample-bam secondaryFiles: - ^.bai doc: >- BAM file. - id: sample_sex - type: - - "null" - - type: array - items: string - inputBinding: - position: 0 - prefix: --sample-sex + type: string? + inputBinding: + position: 0 + prefix: --sample-sex doc: >- Expected sample sex (i.e. M or F). - id: sample_group - type: - - "null" - - type: array - items: string - inputBinding: - position: 0 - prefix: --sample-group + type: string? + inputBinding: + position: 0 + prefix: --sample-group doc: >- The sample group (e.g. the sample patient ID). - id: sample_name - type: - - type: array - items: string - inputBinding: - position: 0 - prefix: --sample-name + type: string + inputBinding: + position: 0 + prefix: --sample-name doc: >- Sample name. If not specified, sample name is automatically figured out from the BAM file. - id: fafile @@ -121,19 +111,15 @@ inputs: Default genotype if coverage is too low (options are Het or Hom). outputs: - id: biometrics_extract_pickle - type: - type: array - items: File + type: File outputBinding: glob: |- ${ - return inputs.sample_name.map(val => { - if (inputs.database) { - return inputs.database + '/' + val + '.pickle'; - } else { - return val + '.pickle'; - } - }); + if (inputs.database) { + return inputs.database + '/' + inputs.sample_name + '.pickle'; + } else { + return inputs.sample_name + '.pickle'; + } } requirements: - class: ResourceRequirement From 05ba9384dd4f14716a0f9b195b77efa71526b5d5 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 9 Jun 2021 11:36:02 -0400 Subject: [PATCH 389/476] update readmes --- biometrics_extract/README.md | 2 +- biometrics_genotype/README.md | 2 +- biometrics_major/README.md | 2 +- biometrics_minor/README.md | 2 +- biometrics_sexmismatch/README.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md index 083ea3d1..5687fef3 100644 --- a/biometrics_extract/README.md +++ b/biometrics_extract/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.8 | | +| biometrics | 0.2.12 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md index 6b6fcd1f..fd398b99 100644 --- a/biometrics_genotype/README.md +++ b/biometrics_genotype/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.8 | | +| biometrics | 0.2.12 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_major/README.md b/biometrics_major/README.md index c281de1e..10fa476e 100644 --- a/biometrics_major/README.md +++ b/biometrics_major/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.8 | | +| biometrics | 0.2.12 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md index cd9f3434..af94ea40 100644 --- a/biometrics_minor/README.md +++ b/biometrics_minor/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.8 | | +| biometrics | 0.2.12 | | The python package source code and Docker file are located on GitHub. diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md index 2cba1841..81b02145 100644 --- a/biometrics_sexmismatch/README.md +++ b/biometrics_sexmismatch/README.md @@ -2,7 +2,7 @@ | Tool | Latest version | Location | |--- |--- |--- | -| biometrics | 0.2.8 | | +| biometrics | 0.2.12 | | The python package source code and Docker file are located on GitHub. From 9b71b68550568bc6a8cc07caece4235eaecf2826 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 10 Jun 2021 10:38:52 -0400 Subject: [PATCH 390/476] need InlineJSRequirement for access_utils --- access_utils/general_stats_parse.cwl | 1 + 1 file changed, 1 insertion(+) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index c9e95ab8..3de41454 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -31,6 +31,7 @@ outputs: glob: genstats_qc_status_buffy.csv arguments: [] requirements: + - class: InlineJavascriptRequirement - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.0' 'dct:contributor': From 841b69d1acec4d84dbb76b85b134317ddd1ce671 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 10 Jun 2021 11:34:50 -0400 Subject: [PATCH 391/476] default to using bam filenames instead of required output file name --- .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index c0403297..1d5f5b9b 100644 --- a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -66,10 +66,20 @@ inputs: prefix: '--omaf' doc: Output the result in maf format - id: output - type: string + type: string? inputBinding: position: 0 prefix: '--output' + valueFrom: |- + ${ + if (inputs.genotyping_bams.length) { + return inputs.genotyping_bams.map(function(b){ + return b.basename.replace('.bam', '.maf') + }) + } else { + return inputs.genotyping_bams.basename.replace('.bam', '.maf') + } + } doc: Filename for output of raw fillout data in MAF/VCF format - id: ref_fasta type: File From 2e9233e7b58801f65a704f441a0edb8da17f5836 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Thu, 10 Jun 2021 16:08:42 -0400 Subject: [PATCH 392/476] tick up biometrics version --- .../{0.2.12 => 0.2.13}/biometrics_extract.cwl | 87 +++++++++---------- .../{0.2.12 => 0.2.13}/example_inputs.yaml | 0 .../biometrics_genotype.cwl | 66 +++++++------- .../{0.2.12 => 0.2.13}/example_inputs.yaml | 0 .../{0.2.12 => 0.2.13}/biometrics_major.cwl | 49 ++++++----- .../{0.2.12 => 0.2.13}/example_inputs.yaml | 0 .../{0.2.12 => 0.2.13}/biometrics_minor.cwl | 49 ++++++----- .../{0.2.12 => 0.2.13}/example_inputs.yaml | 0 .../biometrics_sexmismatch.cwl | 44 +++++----- .../{0.2.12 => 0.2.13}/example_inputs.yaml | 0 10 files changed, 150 insertions(+), 145 deletions(-) rename biometrics_extract/{0.2.12 => 0.2.13}/biometrics_extract.cwl (63%) rename biometrics_extract/{0.2.12 => 0.2.13}/example_inputs.yaml (100%) rename biometrics_genotype/{0.2.12 => 0.2.13}/biometrics_genotype.cwl (72%) rename biometrics_genotype/{0.2.12 => 0.2.13}/example_inputs.yaml (100%) rename biometrics_major/{0.2.12 => 0.2.13}/biometrics_major.cwl (71%) rename biometrics_major/{0.2.12 => 0.2.13}/example_inputs.yaml (100%) rename biometrics_minor/{0.2.12 => 0.2.13}/biometrics_minor.cwl (72%) rename biometrics_minor/{0.2.12 => 0.2.13}/example_inputs.yaml (100%) rename biometrics_sexmismatch/{0.2.12 => 0.2.13}/biometrics_sexmismatch.cwl (70%) rename biometrics_sexmismatch/{0.2.12 => 0.2.13}/example_inputs.yaml (100%) diff --git a/biometrics_extract/0.2.12/biometrics_extract.cwl b/biometrics_extract/0.2.13/biometrics_extract.cwl similarity index 63% rename from biometrics_extract/0.2.12/biometrics_extract.cwl rename to biometrics_extract/0.2.13/biometrics_extract.cwl index cb9f8dcb..19ee1121 100644 --- a/biometrics_extract/0.2.12/biometrics_extract.cwl +++ b/biometrics_extract/0.2.13/biometrics_extract.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_extract_0_2_12 +id: biometrics_extract_0_2_13 baseCommand: - biometrics - extract @@ -14,101 +14,92 @@ inputs: type: File inputBinding: position: 0 - prefix: --sample-bam + prefix: '--sample-bam' + doc: BAM file. secondaryFiles: - ^.bai - doc: >- - BAM file. - id: sample_sex type: string? inputBinding: position: 0 - prefix: --sample-sex - doc: >- - Expected sample sex (i.e. M or F). + prefix: '--sample-sex' + doc: Expected sample sex (i.e. M or F). - id: sample_group type: string? inputBinding: position: 0 - prefix: --sample-group - doc: >- - The sample group (e.g. the sample patient ID). + prefix: '--sample-group' + doc: The sample group (e.g. the sample patient ID). - id: sample_name type: string inputBinding: position: 0 - prefix: --sample-name + prefix: '--sample-name' doc: >- - Sample name. If not specified, sample name is automatically figured out from the BAM file. + Sample name. If not specified, sample name is automatically figured out + from the BAM file. - id: fafile type: File inputBinding: position: 0 - prefix: --fafile + prefix: '--fafile' + doc: Path to reference fasta. secondaryFiles: - ^.fasta.fai - doc: >- - Path to reference fasta. - id: vcf_file type: File inputBinding: position: 0 - prefix: --vcf - doc: >- - VCF file containing the SNPs to be queried. + prefix: '--vcf' + doc: VCF file containing the SNPs to be queried. - id: bed_file type: File? inputBinding: position: 0 - prefix: --bed - doc: >- - BED file containing the intervals to be queried. + prefix: '--bed' + doc: BED file containing the intervals to be queried. - id: database type: string? inputBinding: position: 0 - prefix: --database + prefix: '--database' doc: >- - Directory to store the intermediate files after running the extraction step. - - id: min_mapping_quality + Directory to store the intermediate files after running the extraction + step. + - default: 1 + id: min_mapping_quality type: int? - default: 1 inputBinding: position: 0 - prefix: --min-mapping-quality - doc: >- - Minimum mapping quality of reads to be used for pileup. - - id: min_base_quality + prefix: '--min-mapping-quality' + doc: Minimum mapping quality of reads to be used for pileup. + - default: 1 + id: min_base_quality type: int? - default: 1 inputBinding: position: 0 - prefix: --min-base-quality - doc: >- - Minimum base quality of reads to be used for pileup. - - id: min_coverage + prefix: '--min-base-quality' + doc: Minimum base quality of reads to be used for pileup. + - default: 10 + id: min_coverage type: int? - default: 10 inputBinding: position: 0 - prefix: --min-coverage - doc: >- - Minimum coverage to count a site. - - id: min_homozygous_thresh + prefix: '--min-coverage' + doc: Minimum coverage to count a site. + - default: 0.1 + id: min_homozygous_thresh type: float? - default: 0.1 inputBinding: position: 0 - prefix: --min-homozygous-thresh - doc: >- - Minimum threshold to define homozygous. + prefix: '--min-homozygous-thresh' + doc: Minimum threshold to define homozygous. - id: default_genotype type: string? inputBinding: position: 0 - prefix: --default-genotype - doc: >- - Default genotype if coverage is too low (options are Het or Hom). + prefix: '--default-genotype' + doc: Default genotype if coverage is too low (options are Het or Hom). outputs: - id: biometrics_extract_pickle type: File @@ -126,7 +117,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -145,4 +136,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.12 + 'doap:revision': 0.2.13 diff --git a/biometrics_extract/0.2.12/example_inputs.yaml b/biometrics_extract/0.2.13/example_inputs.yaml similarity index 100% rename from biometrics_extract/0.2.12/example_inputs.yaml rename to biometrics_extract/0.2.13/example_inputs.yaml diff --git a/biometrics_genotype/0.2.12/biometrics_genotype.cwl b/biometrics_genotype/0.2.13/biometrics_genotype.cwl similarity index 72% rename from biometrics_genotype/0.2.12/biometrics_genotype.cwl rename to biometrics_genotype/0.2.13/biometrics_genotype.cwl index 3c74d286..d4d1449a 100644 --- a/biometrics_genotype/0.2.12/biometrics_genotype.cwl +++ b/biometrics_genotype/0.2.13/biometrics_genotype.cwl @@ -5,71 +5,75 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_genotype_0_2_12 +id: biometrics_genotype_0_2_13 baseCommand: - biometrics - genotype inputs: - id: input type: - - type: array - items: File - inputBinding: - position: 0 - prefix: --input + type: array + items: File + inputBinding: + position: 0 + prefix: '--input' doc: >- - Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. - id: database type: string? inputBinding: position: 0 - prefix: --database + prefix: '--database' doc: >- - Directory to store the intermediate files after running the extraction step. - - id: discordance_threshold + Directory to store the intermediate files after running the extraction + step. + - default: 0.05 + id: discordance_threshold type: float? - default: 0.05 inputBinding: position: 0 - prefix: --discordance-threshold + prefix: '--discordance-threshold' doc: >- - Discordance values less than this are regarded as matching samples. (default: 0.05) + Discordance values less than this are regarded as matching samples. + (default: 0.05) - id: prefix type: string? inputBinding: position: 0 - prefix: --prefix - doc: >- - Output file prefix. + prefix: '--prefix' + doc: Output file prefix. - id: plot type: boolean? inputBinding: position: 0 - prefix: --plot - doc: >- - Also output plots of the data. + prefix: '--plot' + doc: Also output plots of the data. - id: json type: boolean? inputBinding: position: 0 - prefix: --json - doc: >- - Also output data in JSON format. + prefix: '--json' + doc: Also output data in JSON format. - id: no_db_comparison type: boolean? inputBinding: position: 0 - prefix: --no-db-compare + prefix: '--no-db-compare' doc: >- - Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. - - id: threads + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. + - default: 2 + id: threads type: int? - default: 2 inputBinding: position: 0 - prefix: --threads - doc: >- - Number of threads to use. + prefix: '--threads' + doc: Number of threads to use. outputs: - id: biometrics_genotype_comparisons type: File @@ -123,7 +127,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -142,4 +146,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.12 + 'doap:revision': 0.2.13 diff --git a/biometrics_genotype/0.2.12/example_inputs.yaml b/biometrics_genotype/0.2.13/example_inputs.yaml similarity index 100% rename from biometrics_genotype/0.2.12/example_inputs.yaml rename to biometrics_genotype/0.2.13/example_inputs.yaml diff --git a/biometrics_major/0.2.12/biometrics_major.cwl b/biometrics_major/0.2.13/biometrics_major.cwl similarity index 71% rename from biometrics_major/0.2.12/biometrics_major.cwl rename to biometrics_major/0.2.13/biometrics_major.cwl index 83a076de..b5a42a9e 100644 --- a/biometrics_major/0.2.12/biometrics_major.cwl +++ b/biometrics_major/0.2.13/biometrics_major.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_major_0_2_12 +id: biometrics_major_0_2_13 baseCommand: - biometrics - major @@ -15,54 +15,57 @@ inputs: type: array items: File inputBinding: - prefix: --input + prefix: '--input' inputBinding: position: 0 doc: >- - Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. - id: database type: string? inputBinding: position: 0 - prefix: --database + prefix: '--database' doc: >- - Directory to store the intermediate files after running the extraction step. - - id: major_threshold + Directory to store the intermediate files after running the extraction + step. + - default: 0.6 + id: major_threshold type: float? - default: 0.6 inputBinding: position: 0 - prefix: --major-threshold - doc: >- - Major contamination threshold for bad sample. + prefix: '--major-threshold' + doc: Major contamination threshold for bad sample. - id: prefix type: string? inputBinding: position: 0 - prefix: --prefix - doc: >- - Output file prefix. + prefix: '--prefix' + doc: Output file prefix. - id: plot type: boolean? inputBinding: position: 0 - prefix: --plot - doc: >- - Also output plots of the data. + prefix: '--plot' + doc: Also output plots of the data. - id: json type: boolean? inputBinding: position: 0 - prefix: --json - doc: >- - Also output data in JSON format. + prefix: '--json' + doc: Also output data in JSON format. - id: no_db_comparison type: boolean? inputBinding: position: 0 - prefix: --no-db-compare + prefix: '--no-db-compare' doc: >- - Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. outputs: - id: biometrics_major_csv type: File @@ -98,7 +101,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -117,4 +120,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.12 + 'doap:revision': 0.2.13 diff --git a/biometrics_major/0.2.12/example_inputs.yaml b/biometrics_major/0.2.13/example_inputs.yaml similarity index 100% rename from biometrics_major/0.2.12/example_inputs.yaml rename to biometrics_major/0.2.13/example_inputs.yaml diff --git a/biometrics_minor/0.2.12/biometrics_minor.cwl b/biometrics_minor/0.2.13/biometrics_minor.cwl similarity index 72% rename from biometrics_minor/0.2.12/biometrics_minor.cwl rename to biometrics_minor/0.2.13/biometrics_minor.cwl index e47bcffd..2535eb6b 100644 --- a/biometrics_minor/0.2.12/biometrics_minor.cwl +++ b/biometrics_minor/0.2.13/biometrics_minor.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_minor_0_2_12 +id: biometrics_minor_0_2_13 baseCommand: - biometrics - minor @@ -15,54 +15,57 @@ inputs: type: array items: File inputBinding: - prefix: --input + prefix: '--input' inputBinding: position: 0 doc: >- - Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. - id: database type: string? inputBinding: position: 0 - prefix: --database + prefix: '--database' doc: >- - Directory to store the intermediate files after running the extraction step. - - id: minor_threshold + Directory to store the intermediate files after running the extraction + step. + - default: 0.002 + id: minor_threshold type: float? - default: 0.002 inputBinding: position: 0 - prefix: --minor-threshold - doc: >- - Minor contamination threshold for bad sample. + prefix: '--minor-threshold' + doc: Minor contamination threshold for bad sample. - id: prefix type: string? inputBinding: position: 0 - prefix: --prefix - doc: >- - Output file prefix. + prefix: '--prefix' + doc: Output file prefix. - id: plot type: boolean? inputBinding: position: 0 - prefix: --plot - doc: >- - Also output plots of the data. + prefix: '--plot' + doc: Also output plots of the data. - id: json type: boolean? inputBinding: position: 0 - prefix: --json - doc: >- - Also output data in JSON format. + prefix: '--json' + doc: Also output data in JSON format. - id: no_db_comparison type: boolean? inputBinding: position: 0 - prefix: --no-db-compare + prefix: '--no-db-compare' doc: >- - Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. outputs: - id: biometrics_minor_csv type: File @@ -105,7 +108,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -124,4 +127,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.12 + 'doap:revision': 0.2.13 diff --git a/biometrics_minor/0.2.12/example_inputs.yaml b/biometrics_minor/0.2.13/example_inputs.yaml similarity index 100% rename from biometrics_minor/0.2.12/example_inputs.yaml rename to biometrics_minor/0.2.13/example_inputs.yaml diff --git a/biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl similarity index 70% rename from biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl rename to biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl index f2fdc257..e8755e55 100644 --- a/biometrics_sexmismatch/0.2.12/biometrics_sexmismatch.cwl +++ b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl @@ -5,7 +5,7 @@ $namespaces: doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' -id: biometrics_sexmismatch_0_2_12 +id: biometrics_sexmismatch_0_2_13 baseCommand: - biometrics - sexmismatch @@ -15,47 +15,51 @@ inputs: type: array items: File inputBinding: - prefix: --input + prefix: '--input' inputBinding: position: 0 doc: >- - Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. - id: database type: string? inputBinding: position: 0 - prefix: --database + prefix: '--database' doc: >- - Directory to store the intermediate files after running the extraction step. - - id: coverage_threshold + Directory to store the intermediate files after running the extraction + step. + - default: 50 + id: coverage_threshold type: int? - default: 50 inputBinding: position: 0 - prefix: --coverage-threshold - doc: >- - Samples with Y chromosome above this value will be considered male. + prefix: '--coverage-threshold' + doc: Samples with Y chromosome above this value will be considered male. - id: prefix type: string? inputBinding: position: 0 - prefix: --prefix - doc: >- - Output file prefix. + prefix: '--prefix' + doc: Output file prefix. - id: json type: boolean? inputBinding: position: 0 - prefix: --json - doc: >- - Also output data in JSON format. + prefix: '--json' + doc: Also output data in JSON format. - id: no_db_comparison type: boolean? inputBinding: position: 0 - prefix: --no-db-compare + prefix: '--no-db-compare' doc: >- - Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. outputs: - id: biometrics_sexmismatch_csv type: File @@ -84,7 +88,7 @@ requirements: ramMin: 16000 coresMin: 2 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/biometrics:0.2.12' + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -103,4 +107,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': biometrics - 'doap:revision': 0.2.12 + 'doap:revision': 0.2.13 diff --git a/biometrics_sexmismatch/0.2.12/example_inputs.yaml b/biometrics_sexmismatch/0.2.13/example_inputs.yaml similarity index 100% rename from biometrics_sexmismatch/0.2.12/example_inputs.yaml rename to biometrics_sexmismatch/0.2.13/example_inputs.yaml From 1d2ded8f7a113b559349c15ccfc5ee188f8fa9c7 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Fri, 11 Jun 2021 14:07:07 -0400 Subject: [PATCH 393/476] Update gatk_revert_sam_4.1.8.0.cwl --- gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl index 55363f08..bde0f2de 100644 --- a/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl +++ b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl @@ -224,7 +224,7 @@ outputs: return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.tsv') } } -label: GATK-CollectHsMetrics +label: GATK-RevertSam arguments: - position: 0 prefix: '--java-options' From 5fe8cde7785c94a83627ba35b5c6a0d2d23ef616 Mon Sep 17 00:00:00 2001 From: ionox0 Date: Fri, 11 Jun 2021 16:30:07 -0400 Subject: [PATCH 394/476] fix for "file not found" errors (cant use IWDRequirement for Directories) --- access_utils/general_stats_parse.cwl | 28 ++++++++-------------------- expression_tools/put_in_dir.cwl | 1 - 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index e8c26a5e..8fffcf78 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -4,17 +4,18 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: general_stats_parse -label: general_stats_parse -baseCommand: general_stats_parse.py --dir . +baseCommand: general_stats_parse.py inputs: - - id: directories - type: Directory[] + - id: directory + type: Directory + inputBinding: + prefix: '--dir' doc: Directory containing results. - id: samples-json type: File inputBinding: - position: 0 prefix: '--samples-json' doc: Sample JSON file. outputs: @@ -26,24 +27,11 @@ outputs: type: File? outputBinding: glob: genstats_qc_status_buffy.csv -arguments: [] +label: general_stats_parse requirements: - - class: InlineJavascriptRequirement - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.0' - - class: InitialWorkDirRequirement - listing: - - | - ${ - var directories = inputs.directories; - var output_array = []; - if ( directories != null ){ - for (var i = 0; i < directories.length; i++){ - output_array.push(directories[i]) - } - } - return output_array - } + - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index 71deab3f..ca952cad 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -8,7 +8,6 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' class: ExpressionTool -# class: CommandLineTool id: put-in-dir inputs: From 5c26e3f0a45e208f7dfd5a64e0afc6011366960a Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 14 Jun 2021 10:19:19 -0400 Subject: [PATCH 395/476] include update for general_stats_parse.py --- access_utils/general_stats_parse.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index 8fffcf78..b77684cd 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -30,7 +30,7 @@ outputs: label: general_stats_parse requirements: - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/access_utils:0.1.0' + dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From 46508c877929cd47d93e0d9cac3c194858a5a65a Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 14 Jun 2021 11:22:48 -0400 Subject: [PATCH 396/476] output filename needs to match output glob in all cases --- .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index 1d5f5b9b..f75fd6e8 100644 --- a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -72,12 +72,12 @@ inputs: prefix: '--output' valueFrom: |- ${ - if (inputs.genotyping_bams.length) { - return inputs.genotyping_bams.map(function(b){ - return b.basename.replace('.bam', '.maf') - }) + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') } else { - return inputs.genotyping_bams.basename.replace('.bam', '.maf') + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') } } doc: Filename for output of raw fillout data in MAF/VCF format @@ -109,8 +109,16 @@ outputs: - id: fillout type: File outputBinding: - glob: | - $(inputs.output) + glob: |- + ${ + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') + } else { + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') + } + } label: getbasecountsmultisample_1.2.5 arguments: - position: 0 From ddfb4c81389ffbe3ca8a84c1fd702044fb1aa31d Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 14 Jun 2021 14:06:20 -0400 Subject: [PATCH 397/476] fix same File Not Found issue with IWDRequirement --- multiqc_1.10.1/multiqc_1.10.1.cwl | 83 ++----------------------------- 1 file changed, 5 insertions(+), 78 deletions(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 01140aad..528c32b2 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -5,40 +5,17 @@ $namespaces: baseCommand: - multiqc inputs: - - id: qc_files_array - type: - - 'null' - - File - - type: array - items: File - doc: | - qc files which shall be part of the multiqc summary; - optional, only one of qc_files_array or qc_files_array_of_array - must be provided - - id: qc_files_array_of_array - type: - - 'null' - - type: array - items: - items: File - type: array - doc: | - qc files which shall be part of the multiqc summary; - optional, only one of qc_files_array or qc_files_array_of_array - must be provided - id: qc_files_dir type: Directory? + inputBinding: + position: 999 doc: | qc files in a Directory - - id: qc_list_of_dirs - type: 'Directory[]?' - doc: | - qc files in multiple directories - default: multiqc_1.10.1 id: report_name type: string inputBinding: - position: 3 + position: 0 prefix: '--filename' doc: name used for the html report and the corresponding zip file - id: config @@ -61,61 +38,11 @@ outputs: glob: $(inputs.report_name)_data.zip doc: | Run multiqc on log files from supported bioinformatic tools. -arguments: - - position: 0 - valueFrom: . -requirements: - - class: InitialWorkDirRequirement - listing: - - | - ${ - // script merges the inputs - // into one array that fulfills the type - // requirement for "listing", which is - // "{type: array, items: [File, Directory]}" - - var qc_files_directory = inputs.qc_files_dir; - var qc_list_of_dirs = inputs.qc_list_of_dirs; - var qc_files_array = inputs.qc_files_array; - var qc_files_array_of_array = inputs.qc_files_array_of_array; - var output_array = []; - - if ( qc_files_array != null ){ - // Fix to allow single qc_file_array - if (qc_files_array.length === undefined){ - output_array.push(qc_files_array) - } else { - for (var i=0; i Date: Mon, 14 Jun 2021 15:16:53 -0400 Subject: [PATCH 398/476] add more access_utils outputs --- access_utils/general_stats_parse.cwl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/access_utils/general_stats_parse.cwl b/access_utils/general_stats_parse.cwl index b77684cd..e3f3ad61 100644 --- a/access_utils/general_stats_parse.cwl +++ b/access_utils/general_stats_parse.cwl @@ -6,16 +6,19 @@ $namespaces: foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: general_stats_parse -baseCommand: general_stats_parse.py +baseCommand: + - general_stats_parse.py inputs: - id: directory type: Directory inputBinding: + position: 0 prefix: '--dir' doc: Directory containing results. - id: samples-json type: File inputBinding: + position: 0 prefix: '--samples-json' doc: Sample JSON file. outputs: @@ -27,6 +30,22 @@ outputs: type: File? outputBinding: glob: genstats_qc_status_buffy.csv + - id: sequence_qc_mqc + type: File? + outputBinding: + glob: sequence_qc_mqc.html + - id: sequence_qc_substitution_mqc + type: File? + outputBinding: + glob: sequence_qc_substitution_mqc.yaml + - id: sequence_qc_mqc_yaml + type: File? + outputBinding: + glob: sequence_qc_mqc.yaml + - id: minor_contamination_sites_mqc + type: File? + outputBinding: + glob: minor_contamination_sites_mqc.html label: general_stats_parse requirements: - class: DockerRequirement From 319c2594e1d049fe4492317d56897ab0bf392e6e Mon Sep 17 00:00:00 2001 From: ionox0 Date: Mon, 14 Jun 2021 17:08:19 -0400 Subject: [PATCH 399/476] update multiqc container version --- multiqc_1.10.1/multiqc_1.10.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc_1.10.1/multiqc_1.10.1.cwl index 528c32b2..930b47f3 100644 --- a/multiqc_1.10.1/multiqc_1.10.1.cwl +++ b/multiqc_1.10.1/multiqc_1.10.1.cwl @@ -43,6 +43,6 @@ hints: ramMin: 10000 coresMin: 1 - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.3' + dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.7' requirements: - class: InlineJavascriptRequirement From c5ff7df1193e77af1408a182ab7eb5189b0dc154 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Jun 2021 16:49:50 -0400 Subject: [PATCH 400/476] make folder for utils --- access_utils/{ => 0.1.1}/example_inputs.yaml | 0 access_utils/{ => 0.1.1}/general_stats_parse.cwl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename access_utils/{ => 0.1.1}/example_inputs.yaml (100%) rename access_utils/{ => 0.1.1}/general_stats_parse.cwl (100%) diff --git a/access_utils/example_inputs.yaml b/access_utils/0.1.1/example_inputs.yaml similarity index 100% rename from access_utils/example_inputs.yaml rename to access_utils/0.1.1/example_inputs.yaml diff --git a/access_utils/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl similarity index 100% rename from access_utils/general_stats_parse.cwl rename to access_utils/0.1.1/general_stats_parse.cwl From 2505c138d49e48e5887999799d69a33a2d9304af Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Jun 2021 17:04:15 -0400 Subject: [PATCH 401/476] add readmes and update versions --- access_utils/0.1.1/README.md | 32 +++++++++++++++++++ docs/SUMMARY.md | 4 +++ .../1.10.1.7}/README.md | 8 ++--- .../1.10.1.7}/example_inputs.json | 0 .../1.10.1.7/multiqc.cwl | 0 5 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 access_utils/0.1.1/README.md rename {multiqc_1.10.1 => multiqc/1.10.1.7}/README.md (85%) rename {multiqc_1.10.1 => multiqc/1.10.1.7}/example_inputs.json (100%) rename multiqc_1.10.1/multiqc_1.10.1.cwl => multiqc/1.10.1.7/multiqc.cwl (100%) diff --git a/access_utils/0.1.1/README.md b/access_utils/0.1.1/README.md new file mode 100644 index 00000000..45bd9f8b --- /dev/null +++ b/access_utils/0.1.1/README.md @@ -0,0 +1,32 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| access_utils | 0.1.1 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner general_stats_parse.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner general_stats_parse.cwl -h +``` diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 0970bf44..d0e1d1dd 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -4,6 +4,8 @@ - ABRA2 - [v2.17](../abra2_2.17/README.md) - [v2.22](../abra2_2.22/README.md) + - access_utils + - [0.1.1](../access_utils/0.1.1/README.md) - Bedtools - [genomecov v2.28.0_cv2](../bedtools_genomecov_v2.28.0_cv2/README.md) - [merge v2.28.0_cv2](../bedtools_merge_v2.28.0_cv2/README.md) @@ -41,6 +43,8 @@ - [Collapsing Second Pass v1.8.1](../marianas_collapsing_second_pass_1.8.1/README.md) - [Process Loop UMI v1.8.1](../marianas_process_loop_umi_1.8.1/README.md) - [Seprate BAMs v1.8.1](../marianas_separate_bams_1.8.1/README.md) + - MultiQC + - [MultiQC v1.10.1.7](../multiqc/1.10.1.7/README.md) - MuTect - [MuTect 1.1.5](../mutect_1.1.5/README.md) - Merge Fastq diff --git a/multiqc_1.10.1/README.md b/multiqc/1.10.1.7/README.md similarity index 85% rename from multiqc_1.10.1/README.md rename to multiqc/1.10.1.7/README.md index 4870d2b1..02fffc9c 100644 --- a/multiqc_1.10.1/README.md +++ b/multiqc/1.10.1.7/README.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| multiqc | 1.10.1 | | +| multiqc | 1.10.1.7 | | ## CWL @@ -13,12 +13,12 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner multiqc_1.10.1.cwl example_inputs.json + > toil-cwl-runner multiqc.cwl example_inputs.json ``` ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1.7/multiqc.cwl /path/to/example_inputs.json #Using toil-cwl-runner > mkdir tool_toil_log @@ -28,7 +28,7 @@ ### Usage ```bash -> toil-cwl-runner multiqc_1.10.1.cwl --helpusage: multiqc_1.10.1.cwl [-h] +> toil-cwl-runner multiqc.cwl --helpusage: multiqc_1.10.1.cwl [-h] [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY] [--qc_files_dir QC_FILES_DIR] [--qc_list_of_dirs QC_LIST_OF_DIRS] diff --git a/multiqc_1.10.1/example_inputs.json b/multiqc/1.10.1.7/example_inputs.json similarity index 100% rename from multiqc_1.10.1/example_inputs.json rename to multiqc/1.10.1.7/example_inputs.json diff --git a/multiqc_1.10.1/multiqc_1.10.1.cwl b/multiqc/1.10.1.7/multiqc.cwl similarity index 100% rename from multiqc_1.10.1/multiqc_1.10.1.cwl rename to multiqc/1.10.1.7/multiqc.cwl From 3c82466bfea49a2586905e6914065c3ff256e4b0 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 15 Jun 2021 17:49:33 -0400 Subject: [PATCH 402/476] set multiqc output directory name --- multiqc/1.10.1.7/multiqc.cwl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/multiqc/1.10.1.7/multiqc.cwl b/multiqc/1.10.1.7/multiqc.cwl index 930b47f3..6468ef2c 100644 --- a/multiqc/1.10.1.7/multiqc.cwl +++ b/multiqc/1.10.1.7/multiqc.cwl @@ -11,7 +11,7 @@ inputs: position: 999 doc: | qc files in a Directory - - default: multiqc_1.10.1 + - default: multiqc_1.10.1.7 id: report_name type: string inputBinding: @@ -24,14 +24,26 @@ inputs: position: 0 prefix: '--config' outputs: - - id: diree + - id: multiqc_output_dir type: Directory outputBinding: glob: . + outputEval: |- + ${ + self[0].basename = inputs.report_name.replace('.html', ''); + return self[0] + } - id: multiqc_html type: File outputBinding: - glob: $(inputs.report_name).html + glob: |- + ${ + if (inputs.report_name) { + return inputs.report_name + ".html" + } else { + return "multiqc_1.10.1.7.html" + } + } - id: multiqc_zip type: File? outputBinding: From 9716a25945d8558af61bee1aec309c60e57ca7c1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 22 Jun 2021 14:35:31 -0400 Subject: [PATCH 403/476] Update general_stats_parse.cwl --- access_utils/0.1.1/general_stats_parse.cwl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index e3f3ad61..d80c39dd 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -51,6 +51,8 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: $(inputs.directory.listing) 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From 9914c31062ff88da10c28411067a5701b65d3574 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Wed, 23 Jun 2021 10:22:38 -0400 Subject: [PATCH 404/476] Update getbasecountsmultisample_1.2.5.cwl --- .../1.2.5/getbasecountsmultisample_1.2.5.cwl | 1 + 1 file changed, 1 insertion(+) diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl index f75fd6e8..1a073fac 100644 --- a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -152,6 +152,7 @@ requirements: } writable: false - class: InlineJavascriptRequirement + - class: StepInputExpressionRequirement 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From d6f06e06afcb6b055a7e90c4a8b4904bb242dbd0 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 23 Jun 2021 17:23:01 -0400 Subject: [PATCH 405/476] Update put_in_dir.cwl --- expression_tools/put_in_dir.cwl | 35 ++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index ca952cad..3cbe225f 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -12,6 +12,10 @@ id: put-in-dir inputs: output_directory_name: string + output_subdirectory_name: + type: string? + doc: >- + If specified, nest all `files` within a directory called `output_subdirectory_name`, which itself is within `output_directory_name`. files: type: type: array @@ -49,13 +53,30 @@ expression: | } } - return { - 'directory': { - 'class': 'Directory', - 'basename': inputs.output_directory_name, - 'listing': output_files - } - }; + if (inputs.output_subdirectory_name) { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': [ + { + 'class': 'Directory', + 'basename': inputs.output_subdirectory_name, + 'listing': output_files + } + ] + } + }; + } else { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': output_files + } + }; + } + } requirements: From 7bf1fde0c10e4ef5360a63aad218b205330b7a3d Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Wed, 23 Jun 2021 17:27:25 -0400 Subject: [PATCH 406/476] add mroe do doc --- expression_tools/put_in_dir.cwl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl index 3cbe225f..382b64e5 100644 --- a/expression_tools/put_in_dir.cwl +++ b/expression_tools/put_in_dir.cwl @@ -11,7 +11,10 @@ class: ExpressionTool id: put-in-dir inputs: - output_directory_name: string + output_directory_name: + type: string + doc: >- + Put all `files` in a directory called `output_directory_name`. output_subdirectory_name: type: string? doc: >- From ea831e139eec4b3e5ccfe331b9c0fbda1c288017 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 24 Jun 2021 10:42:14 -0400 Subject: [PATCH 407/476] add config input and qc criteria output --- access_utils/0.1.1/general_stats_parse.cwl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index d80c39dd..91b6a2f6 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -21,6 +21,12 @@ inputs: position: 0 prefix: '--samples-json' doc: Sample JSON file. + - id: config + type: File? + inputBinding: + position: 0 + prefix: '--config' + doc: MultQC config file. outputs: - id: sample_meta_tumor type: File? @@ -46,6 +52,10 @@ outputs: type: File? outputBinding: glob: minor_contamination_sites_mqc.html + - id: qc_criterion + type: File? + outputBinding: + glob: qc_criterion.csv label: general_stats_parse requirements: - class: DockerRequirement From a1bd6640fd45d1880393f0a1b76fc38944eb4202 Mon Sep 17 00:00:00 2001 From: murphycj2 Date: Thu, 24 Jun 2021 10:58:19 -0400 Subject: [PATCH 408/476] Update general_stats_parse.cwl --- access_utils/0.1.1/general_stats_parse.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index 91b6a2f6..8d37bf8a 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -52,10 +52,10 @@ outputs: type: File? outputBinding: glob: minor_contamination_sites_mqc.html - - id: qc_criterion + - id: qc_criteria type: File? outputBinding: - glob: qc_criterion.csv + glob: qc_criteria.csv label: general_stats_parse requirements: - class: DockerRequirement From 8cfe7716bac75edb98f8cfb74dbeee617df2d9ef Mon Sep 17 00:00:00 2001 From: Ian Date: Wed, 30 Jun 2021 17:13:30 -0400 Subject: [PATCH 409/476] multiqc requires LC_ALL to be set --- multiqc/1.10.1.7/multiqc.cwl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/multiqc/1.10.1.7/multiqc.cwl b/multiqc/1.10.1.7/multiqc.cwl index 6468ef2c..5a7b677a 100644 --- a/multiqc/1.10.1.7/multiqc.cwl +++ b/multiqc/1.10.1.7/multiqc.cwl @@ -58,3 +58,7 @@ hints: dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.7' requirements: - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 From 4d42dbd93492aa9d623671adae1d021743f7d143 Mon Sep 17 00:00:00 2001 From: Ian Date: Tue, 13 Jul 2021 16:08:45 -0400 Subject: [PATCH 410/476] remove IWDR from general_stats_parse.cwl --- access_utils/0.1.1/general_stats_parse.cwl | 2 -- 1 file changed, 2 deletions(-) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index 8d37bf8a..9c8e5b12 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -61,8 +61,6 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' - class: InlineJavascriptRequirement - - class: InitialWorkDirRequirement - listing: $(inputs.directory.listing) 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': From ab988db7150b115ffe1bf75387ae0b7d1fbeb8b1 Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 16 Jul 2021 12:33:39 -0400 Subject: [PATCH 411/476] allow access_utils script to collect all qc files --- access_utils/0.1.1/general_stats_parse.cwl | 44 +++++++--------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index 9c8e5b12..fb08289e 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -7,13 +7,10 @@ $namespaces: sbg: 'https://www.sevenbridges.com/' id: general_stats_parse baseCommand: - - general_stats_parse.py + - general_stats_parse.py --dir . inputs: - id: directory type: Directory - inputBinding: - position: 0 - prefix: '--dir' doc: Directory containing results. - id: samples-json type: File @@ -28,38 +25,23 @@ inputs: prefix: '--config' doc: MultQC config file. outputs: - - id: sample_meta_tumor - type: File? - outputBinding: - glob: genstats_qc_status_plasma.csv - - id: sample_meta_normal - type: File? - outputBinding: - glob: genstats_qc_status_buffy.csv - - id: sequence_qc_mqc - type: File? - outputBinding: - glob: sequence_qc_mqc.html - - id: sequence_qc_substitution_mqc - type: File? - outputBinding: - glob: sequence_qc_substitution_mqc.yaml - - id: sequence_qc_mqc_yaml - type: File? - outputBinding: - glob: sequence_qc_mqc.yaml - - id: minor_contamination_sites_mqc - type: File? - outputBinding: - glob: minor_contamination_sites_mqc.html - - id: qc_criteria - type: File? + - id: aggregate_parsed_stats + label: aggregate_parsed_stats + type: Directory outputBinding: - glob: qc_criteria.csv + glob: . + outputEval: |- + ${ + self[0].basename = "aggregate_qc_stats"; + return self[0] + } label: general_stats_parse requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' + - class: InitialWorkDirRequirement + listing: + - $(inputs.directory.listing) - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From e1283412b63b62d5133b3e0ab33b186d2881b433 Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 16 Jul 2021 14:22:09 -0400 Subject: [PATCH 412/476] need to use arguments for multi-word command --- access_utils/0.1.1/general_stats_parse.cwl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl index fb08289e..4c39e5db 100644 --- a/access_utils/0.1.1/general_stats_parse.cwl +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -7,7 +7,10 @@ $namespaces: sbg: 'https://www.sevenbridges.com/' id: general_stats_parse baseCommand: - - general_stats_parse.py --dir . + - general_stats_parse.py +arguments: + - --dir + - . inputs: - id: directory type: Directory @@ -15,13 +18,11 @@ inputs: - id: samples-json type: File inputBinding: - position: 0 prefix: '--samples-json' doc: Sample JSON file. - id: config type: File? inputBinding: - position: 0 prefix: '--config' doc: MultQC config file. outputs: @@ -41,7 +42,8 @@ requirements: dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' - class: InitialWorkDirRequirement listing: - - $(inputs.directory.listing) + - entry: $(inputs.directory) + writable: true - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' From c08edc978f4aec1d9e7cb5e974d4955eed638e8c Mon Sep 17 00:00:00 2001 From: Ian Date: Mon, 16 Aug 2021 18:48:31 -0400 Subject: [PATCH 413/476] use IWDR to avoid --bind issue in multiqc --- multiqc/1.10.1.7/multiqc.cwl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/multiqc/1.10.1.7/multiqc.cwl b/multiqc/1.10.1.7/multiqc.cwl index 5a7b677a..1f308ad2 100644 --- a/multiqc/1.10.1.7/multiqc.cwl +++ b/multiqc/1.10.1.7/multiqc.cwl @@ -4,11 +4,12 @@ $namespaces: sbg: 'https://www.sevenbridges.com/' baseCommand: - multiqc +arguments: + - valueFrom: . + position: 999 inputs: - id: qc_files_dir type: Directory? - inputBinding: - position: 999 doc: | qc files in a Directory - default: multiqc_1.10.1.7 @@ -62,3 +63,8 @@ requirements: envDef: LC_ALL: en_US.utf-8 LANG: en_US.utf-8 + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.qc_files_dir) + writable: true + - class: InlineJavascriptRequirement From 2f95be6304b126adf1922cd40ce6e04da7706702 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 18:29:25 -0400 Subject: [PATCH 414/476] Adding downsamplesam --- docs/gatk/gatk_downsamplesam_4.1.8.1.md | 113 ++++++++ gatk_downsamplesam_4.1.8.1/example_inputs.yml | 29 ++ .../gatk_downsamplesam_4.1.8.1.cwl | 270 ++++++++++++++++++ 3 files changed, 412 insertions(+) create mode 100644 docs/gatk/gatk_downsamplesam_4.1.8.1.md create mode 100644 gatk_downsamplesam_4.1.8.1/example_inputs.yml create mode 100644 gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl diff --git a/docs/gatk/gatk_downsamplesam_4.1.8.1.md b/docs/gatk/gatk_downsamplesam_4.1.8.1.md new file mode 100644 index 00000000..b0f093d9 --- /dev/null +++ b/docs/gatk/gatk_downsamplesam_4.1.8.1.md @@ -0,0 +1,113 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_downsamplesam_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_downsamplesam_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl --help + +usage: gatk_downsamplesam_4.1.8.1.cwl [-h] --input INPUT --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--output_file_name_metrics OUTPUT_FILE_NAME_METRICS] + [--probability PROBABILITY] + [--random_seed RANDOM_SEED] + [--strategy STRATEGY] + [--arguments_file ARGUMENTS_FILE] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--lenient] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --output_file_name_metrics OUTPUT_FILE_NAME_METRICS + Output file name for metrics file. Not Required + --probability PROBABILITY + The probability of keeping any individual read, + between 0 and 1. + --random_seed RANDOM_SEED + Random seed used for deterministic results. Setting to + null will cause multiple invocations to produce + different results. + --strategy STRATEGY The --STRATEGY argument is an enumerated type + (Strategy), which can have one of the following + values: HighAccuracy ConstantMemory Chained default + Strategy ConstantMemory + --arguments_file ARGUMENTS_FILE + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + Read filters to be disabled before analysis + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --lenient + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. + + diff --git a/gatk_downsamplesam_4.1.8.1/example_inputs.yml b/gatk_downsamplesam_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..dda5b747 --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/example_inputs.yml @@ -0,0 +1,29 @@ +QUIET: null +arguments_file: null +cloud-index-prefetch-buffer: null +cloud_prefetch_buffer: null +create_output_bam_index: null +create_output_bam_md5: null +disable_bam_index_caching: null +disable_read_filter: null +disable_sequence_dictionary_validation: null +exclude_intervals: null +gatk_config_file: null +gcs_max_retries: null +gcs_project_for_requester_pays: null +input: null +lenient: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +output_file_name_metrics: null +probability: null +random_seed: null +read_filter: null +read_index: null +reference: null +seconds_between_progress_updates: null +strategy: null +temporary_directory: null + diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl new file mode 100644 index 00000000..d235bc0e --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -0,0 +1,270 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_downsamplesam_4_1_8_1 +baseCommand: + - gatk + - DownsampleSam +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--input' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--reference' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: output_file_name_metrics + type: string? + doc: Output file name for metrics file. Not Required + - id: probability + type: float? + inputBinding: + position: 4 + prefix: '--PROBABILITY' + doc: 'The probability of keeping any individual read, between 0 and 1.' + - id: random_seed + type: float? + inputBinding: + position: 4 + prefix: '--RANDOM_SEED' + doc: >- + Random seed used for deterministic results. Setting to null will cause + multiple invocations to produce different results. + - id: strategy + type: string? + inputBinding: + position: 4 + prefix: '--STRATEGY' + doc: >- + The --STRATEGY argument is an enumerated type (Strategy), which can have + one of the following values: + + + HighAccuracy + + ConstantMemory + + Chained + + default Strategy ConstantMemory + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: cloud-index-prefetch-buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-index' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-md5' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' + inputBinding: + position: 10 + doc: Read filters to be disabled before analysis + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-sequence-dictionary-validation' + - id: exclude_intervals + type: string? + inputBinding: + position: 10 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 10 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 10 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 10 + prefix: '--gcs-project-for-requester-pays' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 10 + - id: read_index + type: string? + inputBinding: + position: 10 + prefix: '--read-index' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 10 + prefix: '--seconds-between-progress-updates' + - id: lenient + type: boolean? + inputBinding: + position: 6 + prefix: '--lenient' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null.' +outputs: + - id: gatk_downsamplesam_output_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + secondaryFiles: + - ^.bai + - id: gatk_downsamplesam_output_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +label: gatk_downsample_sam_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + - position: 2 + prefix: '--METRICS_FILE' + valueFrom: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 20000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center From ef4061f2df656a05cec0a99a7d03525b1c86e9db Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 20:32:10 -0400 Subject: [PATCH 415/476] Remove extra options --- gatk_downsamplesam_4.1.8.1/example_inputs.yml | 20 ++---- .../gatk_downsamplesam_4.1.8.1.cwl | 71 +------------------ 2 files changed, 7 insertions(+), 84 deletions(-) diff --git a/gatk_downsamplesam_4.1.8.1/example_inputs.yml b/gatk_downsamplesam_4.1.8.1/example_inputs.yml index dda5b747..8ea893b7 100644 --- a/gatk_downsamplesam_4.1.8.1/example_inputs.yml +++ b/gatk_downsamplesam_4.1.8.1/example_inputs.yml @@ -1,17 +1,10 @@ QUIET: null arguments_file: null -cloud-index-prefetch-buffer: null -cloud_prefetch_buffer: null create_output_bam_index: null create_output_bam_md5: null -disable_bam_index_caching: null -disable_read_filter: null -disable_sequence_dictionary_validation: null -exclude_intervals: null -gatk_config_file: null -gcs_max_retries: null -gcs_project_for_requester_pays: null -input: null +input: + class: File + path: input.bam lenient: null memory_overhead: null memory_per_job: null @@ -20,10 +13,9 @@ output_file_name: null output_file_name_metrics: null probability: null random_seed: null -read_filter: null -read_index: null -reference: null -seconds_between_progress_updates: null +reference: + class: File + path: reference.fasta strategy: null temporary_directory: null diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl index d235bc0e..af86c37e 100644 --- a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -40,7 +40,7 @@ inputs: prefix: '--PROBABILITY' doc: 'The probability of keeping any individual read, between 0 and 1.' - id: random_seed - type: float? + type: int? inputBinding: position: 4 prefix: '--RANDOM_SEED' @@ -72,16 +72,6 @@ inputs: inputBinding: position: 0 prefix: '--arguments_file' - - id: cloud-index-prefetch-buffer - type: int? - inputBinding: - position: 10 - prefix: '--cloud-index-prefetch-buffer' - - id: cloud_prefetch_buffer - type: int? - inputBinding: - position: 10 - prefix: '--cloud-prefetch-buffer' - id: create_output_bam_index type: boolean? inputBinding: @@ -92,70 +82,11 @@ inputs: inputBinding: position: 10 prefix: '--create-output-bam-md5' - - id: disable_bam_index_caching - type: boolean? - inputBinding: - position: 10 - prefix: '--disable-bam-index-caching' - - id: disable_read_filter - type: - - 'null' - - type: array - items: string - inputBinding: - prefix: '--disable-read-filter' - inputBinding: - position: 10 - doc: Read filters to be disabled before analysis - - id: disable_sequence_dictionary_validation - type: boolean? - inputBinding: - position: 10 - prefix: '--disable-sequence-dictionary-validation' - - id: exclude_intervals - type: string? - inputBinding: - position: 10 - prefix: '--exclude-intervals' - - id: gatk_config_file - type: File? - inputBinding: - position: 10 - prefix: '--gatk-config-file' - - id: gcs_max_retries - type: int? - inputBinding: - position: 10 - prefix: '--gcs-max-retries' - - id: gcs_project_for_requester_pays - type: string? - inputBinding: - position: 10 - prefix: '--gcs-project-for-requester-pays' - id: QUIET type: boolean? inputBinding: position: 10 prefix: '--QUIET' - - id: read_filter - type: - - 'null' - - type: array - items: string - inputBinding: - prefix: '--read-filter' - inputBinding: - position: 10 - - id: read_index - type: string? - inputBinding: - position: 10 - prefix: '--read-index' - - id: seconds_between_progress_updates - type: float? - inputBinding: - position: 10 - prefix: '--seconds-between-progress-updates' - id: lenient type: boolean? inputBinding: From b95a0ebee5d4289ce9a0998bed40c3c8c4847c30 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 20:48:20 -0400 Subject: [PATCH 416/476] Update gatk_downsamplesam_4.1.8.1.cwl --- .../gatk_downsamplesam_4.1.8.1.cwl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl index af86c37e..be8b0d37 100644 --- a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -76,22 +76,22 @@ inputs: type: boolean? inputBinding: position: 10 - prefix: '--create-output-bam-index' + prefix: '--CREATE_INDEX' - id: create_output_bam_md5 type: boolean? inputBinding: position: 10 - prefix: '--create-output-bam-md5' + prefix: '--CREATE_MD5_FILE' - id: QUIET type: boolean? inputBinding: position: 10 prefix: '--QUIET' - - id: lenient - type: boolean? + - id: "validation_stringency" + type: string? inputBinding: position: 6 - prefix: '--lenient' + prefix: '--VALIDATION_STRINGENCY' - id: number_of_threads type: int? - id: memory_per_job @@ -151,7 +151,7 @@ arguments: } } - position: 2 - prefix: '--tmp-dir' + prefix: '--TMP_DIR' valueFrom: |- ${ if(inputs.temporary_directory) @@ -159,7 +159,7 @@ arguments: return runtime.tmpdir } - position: 2 - prefix: '--output' + prefix: '--OUTPUT' valueFrom: |- ${ if(inputs.output_file_name){ From 765859ecf14f6fc57f7abd76f1c6f9aebc263e95 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 20:50:32 -0400 Subject: [PATCH 417/476] Update gatk_downsamplesam_4.1.8.1.cwl --- gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl index be8b0d37..cde32448 100644 --- a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -14,7 +14,7 @@ inputs: type: File inputBinding: position: 3 - prefix: '--input' + prefix: '--INPUT' doc: BAM/SAM file containing reads secondaryFiles: - ^.bai @@ -22,7 +22,7 @@ inputs: type: File inputBinding: position: 3 - prefix: '--reference' + prefix: '--REFERENCE' doc: Reference sequence file secondaryFiles: - .fai From e89be7487d6582a77aed5a3f1a48ed2d5c4e5552 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 20:52:25 -0400 Subject: [PATCH 418/476] Update gatk_downsamplesam_4.1.8.1.cwl --- gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl index cde32448..42dee34c 100644 --- a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -22,7 +22,7 @@ inputs: type: File inputBinding: position: 3 - prefix: '--REFERENCE' + prefix: '--REFERENCE_SEQUENCE' doc: Reference sequence file secondaryFiles: - .fai From 3d9bcbf80dc6a82d7fc233c385f110ddac197096 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 23 Aug 2021 21:47:20 -0400 Subject: [PATCH 419/476] Update gatk_downsamplesam_4.1.8.1.cwl --- gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl index 42dee34c..b4078135 100644 --- a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -30,7 +30,7 @@ inputs: - id: output_file_name type: string? doc: Output file name. Not Required - - id: output_file_name_metrics + - id: output_metrics_file_name type: string? doc: Output file name for metrics file. Not Required - id: probability From d3810a58c3753c113f9c9eccb98fab4768635b86 Mon Sep 17 00:00:00 2001 From: Suleyman Vural Date: Mon, 8 Nov 2021 12:37:14 -0500 Subject: [PATCH 420/476] initial commit --- vardictjava_1.8.2/example_inputs.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 vardictjava_1.8.2/example_inputs.yaml diff --git a/vardictjava_1.8.2/example_inputs.yaml b/vardictjava_1.8.2/example_inputs.yaml new file mode 100644 index 00000000..e69de29b From b550dfa7e7ce4c3e31598f429d07853188717a36 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 22 Nov 2021 14:48:36 -0500 Subject: [PATCH 421/476] Adding length_limit option --- fastp_0.20.1/fastp_0.20.1.cwl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index b454079f..0610585d 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -93,6 +93,13 @@ inputs: prefix: '--length_required' doc: | reads shorter than length_required will be discarded, default is 15. + - id: maximum_read_length + type: int? + inputBinding: + position: 0 + prefix: '--length_limit' + doc: | + reads longer than length_limit will be discarded, default 0 means no limitation. - default: fastp.json id: json_output_path type: string From bf8850f232acd0abea93b6d0c65ec5b5e3ae2b47 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 22 Nov 2021 14:55:59 -0500 Subject: [PATCH 422/476] Modifying versions --- __init__.py | 2 +- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/__init__.py b/__init__.py index 687dc425..d62ed77c 100644 --- a/__init__.py +++ b/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '0.6.3' +__version__ = '1.1.1' diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index 9fc9a3c6..d62ed77c 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '1.0.0' +__version__ = '1.1.1' diff --git a/setup.cfg b/setup.cfg index fd44fded..b734aaf6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.0 +current_version = 1.1.1 commit = True tag = True diff --git a/setup.py b/setup.py index 79f00425..b10c17b2 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl-commandlinetools', - version='1.0.0', + version='1.1.1', zip_safe=False, ) From 5dc803edef7722422fb3a3b4713dae974484f0ae Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 23 Nov 2021 10:49:35 -0500 Subject: [PATCH 423/476] Adding max length parmeter --- fastp_0.20.1/fastp_0.20.1.cwl | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl index 0610585d..f6356a6a 100644 --- a/fastp_0.20.1/fastp_0.20.1.cwl +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -98,8 +98,26 @@ inputs: inputBinding: position: 0 prefix: '--length_limit' - doc: | - reads longer than length_limit will be discarded, default 0 means no limitation. + doc: > + reads longer than length_limit will be discarded, default 0 means no + limitation. + - id: max_len_read1 + type: int? + inputBinding: + position: 0 + prefix: '--max_len1' + doc: >- + if read1 is longer than max_len1, then trim read1 at its tail to make it + as long as max_len1. Default 0 means no limitation + - id: max_len_read2 + type: int? + inputBinding: + position: 0 + prefix: '--max_len2' + doc: >- + if read2 is longer than max_len2, then trim read2 at its tail to make it + as long as max_len2. Default 0 means no limitation. If it's not specified, + it will follow read1's settings - default: fastp.json id: json_output_path type: string From 30b92b191fb16602877a9d67e7bdaa32ae1d6ce0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 23 Nov 2021 10:51:29 -0500 Subject: [PATCH 424/476] =?UTF-8?q?Bump=20version:=201.1.1=20=E2=86=92=201?= =?UTF-8?q?.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cwl_commandlinetools/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py index d62ed77c..30f6dd27 100644 --- a/cwl_commandlinetools/__init__.py +++ b/cwl_commandlinetools/__init__.py @@ -4,4 +4,4 @@ __author__ = """msk-access""" __email__ = 'msk.access@gmail.com' -__version__ = '1.1.1' +__version__ = '1.2.0' diff --git a/setup.cfg b/setup.cfg index b734aaf6..1c8fad57 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.1.1 +current_version = 1.2.0 commit = True tag = True diff --git a/setup.py b/setup.py index b10c17b2..53b77ff1 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/msk-access/cwl-commandlinetools', - version='1.1.1', + version='1.2.0', zip_safe=False, ) From 72c2622628da77ad1c232ac7b1671df17cb5f814 Mon Sep 17 00:00:00 2001 From: svural Date: Fri, 3 Dec 2021 12:03:07 -0500 Subject: [PATCH 425/476] created cwl updating older version --- vardictjava_1.8.2/vardict.cwl | 216 ++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 vardictjava_1.8.2/vardict.cwl diff --git a/vardictjava_1.8.2/vardict.cwl b/vardictjava_1.8.2/vardict.cwl new file mode 100644 index 00000000..a7b58f32 --- /dev/null +++ b/vardictjava_1.8.2/vardict.cwl @@ -0,0 +1,216 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +class: Workflow +id: vardict +label: vardict +requirements: + MultipleInputFeatureRequirement: {} + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + InlineJavascriptRequirement: {} + StepInputExpressionRequirement: {} +inputs: + bedfile: + type: File? + B: + type: int? + C: + type: boolean? + D: + type: boolean? + E: + type: string? + F: + type: boolean? + G: + type: File + secondaryFiles: ['.fai'] + H: + type: boolean? + I: + type: string? + J: + type: boolean? + K: + type: boolean? + L: + type: string? + M: + type: string? + N: + type: string? + N2: + type: string? + O: + type: string? + P: + type: string? + Q: + type: string? + R: + type: string? + S: + type: string? + T: + type: string? + U: + type: string? + UN: + type: string? + V: + type: string? + VS: + type: string? + W: + type: string? + X: + type: string? + Y: + type: string? + Z: + type: string? + + a: + type: string? + b2: + type: File? + secondaryFiles: ['.bai'] + b: + type: File? + secondaryFiles: ['.bai'] + c: + type: string? + d: + type: string? + e: + type: string? + f: + type: string? + g: + type: string? + hh: + type: boolean? + i: + type: boolean? + j: + type: string? + k: + type: string? + m: + type: string? + n: + type: string? + o: + type: string? + p: + type: boolean? + q: + type: string? + r: + type: string? + s: + type: string? + t: + type: boolean? + th: + type: string? + x: + type: string? + z: + type: string? + vcf: + type: string? + f_1: + type: string? +outputs: + output: + type: File + outputSource: vardict_1/output +steps: + vardict: + run: ./vardict_app.cwl + in: + B: B + C: C + D: D + E: E + F: F + G: G + H: H + I: I + M: M + N: N + O: O + P: P + Q: Q + R: R + S: S + T: T + V: V + VS: VS + X: X + Z: Z + a: a + b: b + b2: b2 + bedfile: bedfile + c: c + d: d + e: e + f: f + g: g + h: hh + i: i + k: k + m: m + n: n + o: o + p: p + q: q + r: r + t: t + th: th + v: + valueFrom: ${ return inputs.vcf.replace(".vcf", "_tmp.vcf") } + vcf: vcf + x: x + z: z + out: [output] + testsomatic: + run: ./testsomatic.cwl + in: + input_vardict: vardict/output + out: [output_var] + vardict_1: + run: ./var_to_vcf.cwl + in: + N: N + N2: N2 + f: f_1 + vcf: vcf + input_vcf: testsomatic/output_var + out: [output] + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 From 05ab54ad4b780ea416940dedf1a07011e079af4e Mon Sep 17 00:00:00 2001 From: svural Date: Fri, 3 Dec 2021 12:04:41 -0500 Subject: [PATCH 426/476] created cwl updating older version --- vardictjava_1.8.2/testsomatic.cwl | 28 ++ vardictjava_1.8.2/var_to_vcf.cwl | 157 +++++++++ vardictjava_1.8.2/vardict_app.cwl | 423 ++++++++++++++++++++++++ vardictjava_1.8.2/vardictjava_1.8.2.cwl | 246 ++++++++++++++ 4 files changed, 854 insertions(+) create mode 100644 vardictjava_1.8.2/testsomatic.cwl create mode 100644 vardictjava_1.8.2/var_to_vcf.cwl create mode 100644 vardictjava_1.8.2/vardict_app.cwl create mode 100644 vardictjava_1.8.2/vardictjava_1.8.2.cwl diff --git a/vardictjava_1.8.2/testsomatic.cwl b/vardictjava_1.8.2/testsomatic.cwl new file mode 100644 index 00000000..02b00c0f --- /dev/null +++ b/vardictjava_1.8.2/testsomatic.cwl @@ -0,0 +1,28 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- Rscript +- /usr/bin/vardict/testsomatic.R +id: testsomatic + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 2 + ramMin: 12000 + DockerRequirement: + dockerPull: mskcc/roslin-variant-vardict:1.5.1 + + +inputs: + input_vardict: + type: File +outputs: + output_var: + type: File? + outputBinding: + glob: output_testsomatic.var + +stdin: $(inputs.input_vardict.path) +stdout: output_testsomatic.var diff --git a/vardictjava_1.8.2/var_to_vcf.cwl b/vardictjava_1.8.2/var_to_vcf.cwl new file mode 100644 index 00000000..670fcfca --- /dev/null +++ b/vardictjava_1.8.2/var_to_vcf.cwl @@ -0,0 +1,157 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- perl +- /usr/bin/vardict/var2vcf_paired.pl +id: vardict_var2vcf + +arguments: +- position: 0 + prefix: -N + valueFrom: "${\n return inputs.N + \"|\" + inputs.N2;\n}" + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 4 + ramMin: 32000 + DockerRequirement: + dockerPull: mskcc/roslin-variant-vardict:1.5.1 + +inputs: + + C: + type: boolean? + doc: Indicate the chromosome names are just numbers, such as 1, 2, not chr1, chr2 + inputBinding: + position: 0 + prefix: -C + + D: + type: float? + doc: Debug mode. Will print some error messages and append full genotype at the end. + inputBinding: + position: 0 + prefix: -D + F: + type: float? + doc: The hexical to filter reads using samtools. Default - 0x500 (filter 2nd alignments and duplicates). Use -F 0 to turn it off. + inputBinding: + position: 0 + prefix: -F + + I: + type: int? + doc: The indel size. Default - 120bp + inputBinding: + position: 0 + prefix: -I + + M: + type: boolean? + doc: The minimum matches for a read to be considered. If, after soft-clipping, the matched bp is less than INT, then the read is discarded. It's meant for PCR based targeted sequencing where there's no insert and the matching is only the primers. Default - 0, or no filtering + inputBinding: + position: 0 + prefix: -M + + N: + type: string? + doc: Tumor Sample Name + + N2: + type: string? + doc: Normal Sample Name + + P: + type: float? + doc: The read position filter. If the mean variants position is less that specified, it's considered false positive. Default - 5 + inputBinding: + position: 0 + prefix: -P + + Q: + type: string? + doc: If set, reads with mapping quality less than INT will be filtered and ignored + inputBinding: + position: 0 + prefix: -Q + + S: + type: boolean? + doc: The column for region start, e.g. gene start + inputBinding: + position: 0 + prefix: -S + f: + type: string? + doc: The threshold for allele frequency, default - 0.05 or 5%% + inputBinding: + position: 0 + prefix: -f + + m: + type: int? + doc: If set, reads with mismatches more than INT will be filtered and ignored. Gaps are not counted as mismatches. Valid only for bowtie2/TopHat or BWA aln followed by sampe. BWA mem is calculated as NM - Indels. Default - 8, or reads with more than 8 mismatches will not be used. + inputBinding: + position: 0 + prefix: -m + o: + type: float? + doc: The Qratio of (good_quality_reads)/(bad_quality_reads+0.5). The quality is defined by -q option. Default - 1.5 + inputBinding: + position: 0 + prefix: -o + + p: + doc: Do pileup regarless the frequency + type: float? + inputBinding: + position: 0 + prefix: -p + + vcf: + type: string + doc: output vcf file + + A: + type: boolean? + inputBinding: + position: 0 + prefix: -A + + c: + type: int? + inputBinding: + position: 0 + prefix: -c + + q: + type: float? + inputBinding: + position: 0 + prefix: -q + + d: + type: int? + inputBinding: + position: 0 + prefix: -d + + v: + type: int? + inputBinding: + position: 0 + prefix: -v + + input_vcf: + type: File? + +outputs: + output: + type: File + outputBinding: + glob: ${ return inputs.vcf; } + +stdin: $(inputs.input_vcf.path) +stdout: ${ return inputs.vcf; } \ No newline at end of file diff --git a/vardictjava_1.8.2/vardict_app.cwl b/vardictjava_1.8.2/vardict_app.cwl new file mode 100644 index 00000000..db06bb00 --- /dev/null +++ b/vardictjava_1.8.2/vardict_app.cwl @@ -0,0 +1,423 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- /usr/bin/vardict/bin/VarDict +id: vardict + +arguments: +- position: 1 + prefix: -b + valueFrom: "${\n return inputs.b.path + \"|\" + inputs.b2.path;\n}" +- position: 0 + prefix: -N + valueFrom: "${\n if (inputs.N2)\n return [inputs.N, inputs.N2];\n else\n\ + \ return inputs.N;\n}" + + +requirements: + EnvVarRequirement: + envDef: + JAVA_OPTS: '"-Xms8g" "-Xmx95g"' + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 4 + ramMin: 64000 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vardictjava + +inputs: + + B: + type: int? + doc: The minimum # of reads to determine strand bias, default: 2 + inputBinding: + position: 0 + prefix: -B + + C: + type: boolean? + doc: Indicate the chromosome names are just numbers, such as 1, 2, not chr1, chr2 (deprecated!!!) + inputBinding: + position: 0 + prefix: -C + + D: + type: boolean? + doc: Debug mode. Will print some error messages and append full genotype at the end. + inputBinding: + position: 0 + prefix: -D + + E: + type: string? + doc: The column for region end, e.g. gene end + inputBinding: + position: 0 + prefix: -E + + F: + type: boolean? + doc: The hexical to filter reads using samtools. Default - 0x500 (filter 2nd alignments and duplicates). Use -F 0 to turn it off. + inputBinding: + position: 0 + prefix: -F + + G: + type: File + doc: The reference fasta. Should be indexed (.fai) + secondaryFiles: + - .fai + inputBinding: + position: 0 + prefix: -G + + H: + type: boolean? + doc: Print this help page + inputBinding: + position: 0 + prefix: -H + + I: + type: string? + doc: #The indel size. Default: 50bp. Be cautious with -I option, especially in the amplicon mode, as amplicon sequencing is not a way to find large indels. Increasing the search size might be slow and false positives may appear in low complexity regions. Increasing it to 200-300 bp is only recommend for hybrid capture sequencing. + inputBinding: + position: 0 + prefix: -I + + J: + type: boolean? + doc: The genomic position that CRISPR/Cas9 suppose to cut, typically 3bp from the PAM NGG site and within the guide. For CRISPR mode only. It will adjust the variants (mostly In-Del) start and end sites to as close to this location as possible, if there are alternatives. The option should only be used for CRISPR mode. + inputBinding: + position: 0 + prefix: -J + + K: + type: boolean? + doc: Include Ns in the total depth calculation + inputBinding: + position: 0 + prefix: -K + + L: + type: boolean? + doc: The minimum structural variant length to be presented using , etc. Default 1000. Any indel, complex variants less than this will be spelled out with exact nucleotides + inputBinding: + position: 0 + prefix: -L + + M: + type: string? + doc: The minimum matches for a read to be considered. If, after soft-clipping, the matched bp is less than INT, then the read is discarded. It's meant for PCR based targeted sequencing where there's no insert and the matching is only the primers. Default - 0, or no filtering + inputBinding: + position: 0 + prefix: -M + + N: + type: string? + doc: Tumor Sample Name + + N2: + type: string? + doc: Normal Sample Name + + O: + type: string? + doc: The reads should have at least mean MapQ to be considered a valid variant. Default - no filtering + inputBinding: + position: 0 + prefix: -O + + P: + type: string? + doc: The read position filter. If the mean variants position is less that specified, its considered false positive. Default 5 + inputBinding: + position: 0 + prefix: -P + + Q: + type: string? + doc: If set, reads with mapping quality less than INT will be filtered and ignored + inputBinding: + position: 0 + prefix: -Q + + R: + type: string? + doc: The region of interest. In the format of chr -start-end. If end is omitted, then a single position. No BED is needed. + inputBinding: + position: 0 + prefix: -R + + S: + type: string? + doc: The column for region start, e.g. gene start + inputBinding: + position: 0 + prefix: -S + + T: + type: string? + doc: Trim bases after [INT] bases in the reads + inputBinding: + position: 0 + prefix: -T + + U: + type: string? + doc: Turn off structural variant calling + inputBinding: + position: 0 + prefix: -U + + UN: + type: string? + doc: Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using first read only. Default unique mode disabled, all reads are counted. + inputBinding: + position: 0 + prefix: -UN + + V: + type: string? + doc: The lowest frequency in normal sample allowed for a putative somatic mutations. Default to 0.05 + inputBinding: + position: 0 + prefix: -V + + VS: + type: string? + doc: How strict to be when reading a SAM or BAM. STRICT - throw an exception if something looks wrong. LENIENT - Emit warnings but keep going if possible. SILENT - Like LENIENT, only dont emit warning messages. Default - LENIENT + inputBinding: + position: 0 + prefix: -VS + + W: + type: string? + doc: The insert size STD. Used for SV calling. Default 100 + inputBinding: + position: 0 + prefix: -W + + X: + type: string? + doc: Extension of bp to look for mismatches after insersion or deletion. Default to 2 bp, or only calls when they are within 2 bp. + inputBinding: + position: 0 + prefix: -X + + Y: + type: string? + doc: Extension of bp of reference to build lookup table. Default to 1200 bp. Increasing the number will slow down the program. The main purpose is to call large indels with 1000 bp that can be missed by discordant mate pairs. + inputBinding: + position: 0 + prefix: -Y + + Z: + type: string? + doc: For downsampling fraction. e.g. 0.7 means roughly 70%% downsampling. Default - No downsampling. Use with caution. The downsampling will be random and non-reproducible. + inputBinding: + position: 0 + prefix: -Z + + a: + type: string? + doc: Indicate its amplicon based calling. Reads dont map to the amplicon will be skipped. A read pair is considered belonging the amplicon if the edges are less than int bp to the amplicon, and overlap fraction is at least float. Default 10 0.95 + inputBinding: + position: 0 + prefix: -a + + b: + type: File? + secondaryFiles: + - .bai + doc: Tumor bam + + b2: + type: File? + secondaryFiles: + - .bai + doc: Normal bam + + bedfile: + type: File? + inputBinding: + position: 1 + + c: + type: string? + doc: The column for chromosome + inputBinding: + position: 0 + prefix: -c + + d: + type: string? + doc: The delimiter for split region_info, default to tab "\t" + inputBinding: + position: 0 + prefix: -d + + e: + type: string? + doc: The column for segment ends in the region, e.g. exon ends + inputBinding: + position: 0 + prefix: -e + + f: + type: string? + doc: The threshold for allele frequency, default - 0.01 or 1%% + inputBinding: + position: 0 + prefix: -f + + g: + type: string? + doc: The column for gene name, or segment annotation + inputBinding: + position: 0 + prefix: -g + + h: + type: boolean? + doc: Print a header row decribing columns + inputBinding: + position: 0 + prefix: -h + + i: + type: boolean? + doc: Output splicing read counts + inputBinding: + position: 0 + prefix: -i + + j: + type: boolean? + doc: CRISPR_filtering_bp In CRISPR mode, the minimum amount in bp that a read needs to overlap with cutting site. If a read does not meet the criteria, it will not be used for variant calling, since it is likely just a partially amplified PCR. Default not set, or no filtering + inputBinding: + position: 0 + prefix: -j + + + k: + type: string? + doc: Indicate whether to perform local realignment. Default - 1. Set to 0 to disable it. For Ion or PacBio, 0 is recommended. + inputBinding: + position: 0 + prefix: -k + + m: + type: string? + doc: If set, reads with mismatches more than INT will be filtered and ignored. Gaps are not counted as mismatches. Valid only for bowtie2/TopHat or BWA aln followed by sampe. BWA mem is calculated as NM - Indels. Default - 8, or reads with more than 8 mismatches will not be used. + inputBinding: + position: 0 + prefix: -m + + n: + type: string? + doc: The regular expression to extract sample name from bam filenames. Default to - /([^\/\._]+?)_[^\/]*.bam/ + inputBinding: + position: 0 + prefix: -n + + o: + type: string? + doc: The Qratio of (good_quality_reads)/(bad_quality_reads+0.5). The quality is defined by -q option. Default - 1.5 + inputBinding: + position: 0 + prefix: -o + + p: + type: boolean? + doc: Do pileup regarless the frequency + inputBinding: + position: 0 + prefix: -p + + q: + type: string? + doc: The phred score for a base to be considered a good call. Default - 22.5 (for Illumina) For PGM, set it to ~15, as PGM tends to under estimate base quality. + inputBinding: + position: 0 + prefix: -q + + r: + type: string? + doc: The minimum # of variance reads, default: 2 + inputBinding: + position: 0 + prefix: -r + + s: + type: string? + doc: The column for segment starts in the region, e.g. exon starts + inputBinding: + position: 0 + prefix: -s + + t: + type: boolean? + doc: Indicate to remove duplicated reads. Only one pair with same start positions will be kept + inputBinding: + position: 0 + prefix: -t + + th: + type: string? + doc: Threads count. + inputBinding: + position: 0 + prefix: -th + + three: + type: boolean? + doc: Indicate to move indels to 3-prime if alternative alignment can be achieved. + inputBinding: + position: 0 + prefix: '-3' + + u: + type: boolean? + doc: Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only. Default unique mode disabled, all reads are counted. + inputBinding: + position: 0 + prefix: -u + + w: + type: string? + doc: The insert size. Used for SV calling. Default 300 + inputBinding: + position: 0 + prefix: -w + + x: + type: string? + doc: The number of nucleotide to extend for each segment, default 0 + inputBinding: + position: 0 + prefix: -x + + y: + type: string? + doc: Verbose + inputBinding: + position: 0 + prefix: -y + + z: + type: string? + doc: #Indicate whether the BED file contains zero-based coordinates, the same way as the Genome browser IGV does. -z 1 indicates that coordinates in a BED file start from 0. -z 0 indicates that the coordinates start from 1. Default 1 for a BED file or amplicon BED file (0-based). Use 0 to turn it off. When using -R option, it is set to 0 + inputBinding: + position: 0 + prefix: -z + +outputs: + output: + type: File + outputBinding: + glob: vardict_app_output.vcf + + +stdout: vardict_app_output.vcf diff --git a/vardictjava_1.8.2/vardictjava_1.8.2.cwl b/vardictjava_1.8.2/vardictjava_1.8.2.cwl new file mode 100644 index 00000000..e24cb107 --- /dev/null +++ b/vardictjava_1.8.2/vardictjava_1.8.2.cwl @@ -0,0 +1,246 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_22 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input_bam + type: + - File + - type: array + items: File + inputBinding: + position: 0 + prefix: '--in' + doc: Required list of input sam or bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: working_directory + type: string? + doc: Set the temp directory (overrides java.io.tmpdir) + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '--ref' + doc: Genome reference location + secondaryFiles: + - .fai + - id: targets + type: File + inputBinding: + position: 0 + prefix: '--targets' + - id: kmer_size + type: string? + inputBinding: + position: 0 + prefix: '--kmer' + doc: >- + Optional assembly kmer size(delimit with commas if multiple sizes + specified) + - id: maximum_average_depth + type: int? + inputBinding: + position: 0 + prefix: '--mad' + doc: >- + Regions with average depth exceeding this value will be downsampled + (default: 1000) + - id: soft_clip_contig + type: string? + inputBinding: + position: 0 + prefix: '--sc' + doc: >- + Soft clip contig args + [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len] + (default:16,13,80,15) + - id: maximum_mixmatch_rate + type: float? + inputBinding: + position: 0 + prefix: '--mmr' + doc: >- + Max allowed mismatch rate when mapping reads back to contigs (default: + 0.05) + - id: scoring_gap_alignments + type: string? + inputBinding: + position: 0 + prefix: '--sga' + doc: >- + Scoring used for contig alignments(match, + mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1) + - id: contig_anchor + type: string? + inputBinding: + position: 0 + prefix: '--ca' + doc: >- + Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge] + (default:10,2) + - id: window_size + type: string? + inputBinding: + position: 0 + prefix: '--ws' + doc: |- + Processing window size and overlap + (size,overlap) (default: 400,200) + - id: consensus_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--cons' + doc: Use positional consensus sequence when aligning high quality soft clipping + - id: output_bams + type: + - string + - type: array + items: string + inputBinding: + position: 0 + prefix: '--out' + doc: Required list of output sam or bam file (s) separated by comma + - id: ignore_bad_assembly + type: boolean? + inputBinding: + position: 0 + prefix: '--ignore-bad-assembly' + doc: Use this option to avoid parsing errors for corrupted assemblies + - id: bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--index' + doc: >- + Enable BAM index generation when outputting sorted alignments (may require + additonal memory) + - id: input_vcf + type: File? + inputBinding: + position: 0 + prefix: '--in-vcf' + doc: >- + VCF containing known (or suspected) variant sites. Very large files + should be avoided. + - id: no_edge_complex_indel + type: boolean? + inputBinding: + position: 0 + prefix: '--no-edge-ci' + doc: Prevent output of complex indels at read start or read end + - id: no_sort + type: boolean? + inputBinding: + position: 0 + prefix: '--nosort' + doc: Do not attempt to sort final output +outputs: + - id: abra_realigned_bam + type: + - 'null' + - File + - type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.output_bams + } + secondaryFiles: + - ^.bai +label: abra2_2.22 +arguments: + - position: 0 + valueFrom: |- + ${ + if (inputs.memory_per_job && inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (!inputs.memory_per_job && inputs.memory_overhead) { + + return "-Xmx20G" + } + else { + + return "-Xmx20G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/abra2.jar + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } + - position: 0 + prefix: '--tmpdir' + valueFrom: |- + ${ + if(inputs.working_directory) + return inputs.working_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 60000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/abra2:2.22' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': abra2 + 'doap:revision': 2.22 From 17e22f50728d1a4787ccfd327b7787c16fd48137 Mon Sep 17 00:00:00 2001 From: svural Date: Fri, 3 Dec 2021 12:08:58 -0500 Subject: [PATCH 427/476] updated example_input.yaml --- vardictjava_1.8.2/example_inputs.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vardictjava_1.8.2/example_inputs.yaml b/vardictjava_1.8.2/example_inputs.yaml index e69de29b..3ba11c9e 100644 --- a/vardictjava_1.8.2/example_inputs.yaml +++ b/vardictjava_1.8.2/example_inputs.yaml @@ -0,0 +1,18 @@ +G: + class: File + path: "b37.fasta" +b: + class: File + path: "/path/to/normal/bam" +b2: + class: File + path: "/path/to/tumor/bam" +bedfile: + class: File + path: "/path/to/bed/file" +c: "1" +S: "2" +E: "3" +g: "4" +th: "4" +vcf: "output.vcf" From 5868586f8941ffd66f2cd55a2f3b01977789d6fc Mon Sep 17 00:00:00 2001 From: svural Date: Tue, 7 Dec 2021 16:24:50 -0500 Subject: [PATCH 428/476] added container dir and labeled vardict docker line --- vardictjava_1.8.2/container/Dockerfile | 30 ++ vardictjava_1.8.2/container/run_test.sh | 20 + vardictjava_1.8.2/container/testsomatic.R | 53 +++ vardictjava_1.8.2/container/var2vcf_paired.pl | 380 ++++++++++++++++++ vardictjava_1.8.2/vardict_app.cwl | 2 +- vardictjava_1.8.2/vardictjava_1.8.2.cwl | 246 ------------ 6 files changed, 484 insertions(+), 247 deletions(-) create mode 100644 vardictjava_1.8.2/container/Dockerfile create mode 100755 vardictjava_1.8.2/container/run_test.sh create mode 100755 vardictjava_1.8.2/container/testsomatic.R create mode 100755 vardictjava_1.8.2/container/var2vcf_paired.pl delete mode 100644 vardictjava_1.8.2/vardictjava_1.8.2.cwl diff --git a/vardictjava_1.8.2/container/Dockerfile b/vardictjava_1.8.2/container/Dockerfile new file mode 100644 index 00000000..ddaf5599 --- /dev/null +++ b/vardictjava_1.8.2/container/Dockerfile @@ -0,0 +1,30 @@ +FROM alpine:3.8 + +LABEL maintainer="Nikhil Kumar (kumarn1@mskcc.org)" \ + version.image="1.0.0" \ + version.vardict="1.8.2" \ + version.r="3.5.1" \ + version.perl="5.26.2-r1" \ + version.alpine="3.8" \ + source.vardict="https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2" \ + source.r="https://pkgs.alpinelinux.org/package/edge/community/x86/R" \ + source.perl="https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl" + +ENV VARDICT_VERSION 1.8.2 + +COPY runscript.sh /usr/bin/runscript.sh +COPY run_test.sh /run_test.sh + +RUN apk add --update \ + && apk add ca-certificates openssl bash perl \ + && apk add openjdk8-jre-base \ + && apk add R R-dev \ + && cd /tmp && wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v${VARDICT_VERSION}/VarDict-${VARDICT_VERSION}.zip \ + && unzip VarDict-${VARDICT_VERSION}.zip \ + && mv /tmp/VarDict-${VARDICT_VERSION} /usr/bin/vardict \ + && rm -rf /var/cache/apk/* /tmp/* \ + && chmod +x /usr/bin/runscript.sh \ + && exec /run_test.sh + +COPY testsomatic.R /usr/bin/vardict/ +COPY var2vcf_paired.pl /usr/bin/vardict/ diff --git a/vardictjava_1.8.2/container/run_test.sh b/vardictjava_1.8.2/container/run_test.sh new file mode 100755 index 00000000..efed55b4 --- /dev/null +++ b/vardictjava_1.8.2/container/run_test.sh @@ -0,0 +1,20 @@ +# get actual output of the tool +actual=$(exec /usr/bin/runscript.sh vardict | head -1) + +# expected output +expected=$(cat << EOM +usage: vardict [-n name_reg] [-b bam] [-c chr] [-S start] [-E end] [-s seg_starts] [-e seg_ends] [-x #_nu] [-g gene] [-f freq] [-r #_reads] +EOM +) + +expected_no_space=$(echo $expected | tr -d "[:space:]") +actual_no_space=$(echo $actual | tr -d "[:space:]") +# diff +if [ "$actual_no_space" != "$expected_no_space" ] +then + echo "-----expected-----" + echo $expected + echo "-----actual-----" + echo $actual + exit 1 +fi \ No newline at end of file diff --git a/vardictjava_1.8.2/container/testsomatic.R b/vardictjava_1.8.2/container/testsomatic.R new file mode 100755 index 00000000..cc6dc092 --- /dev/null +++ b/vardictjava_1.8.2/container/testsomatic.R @@ -0,0 +1,53 @@ +#!/usr/bin/env Rscript + +#args <- commandArgs(trailingOnly = TRUE) + +myfile = file("stdin") +open(myfile, blocking=TRUE) +myinput = readLines(myfile) # read from stdin +if (length(myinput) > 0 ){ + mynumcols = sapply(gregexpr("\\t", myinput[1]), length) + 1 # count num of tabs + 1 +}else{ + mynumcols = 0 + d = matrix(0,0,0) +} + +if (mynumcols >= 48) { + d <- read.table( textConnection(myinput), sep = "\t", header = F, colClasses=c("character", NA, NA, NA, NA, "character", "character", NA, NA, NA, NA, NA, NA, "character", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "character", NA, "character", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "character", "character", "character", "character")) +} else if (mynumcols > 0){ + stop("Incorrect input detected in testsomatic.R") +} + +if (nrow(d) > 0){ + pvalues1 <- vector(mode="double", length=dim(d)[1]) + oddratio1 <- vector(mode="double", length=dim(d)[1]) + pvalues2 <- vector(mode="double", length=dim(d)[1]) + oddratio2 <- vector(mode="double", length=dim(d)[1]) + pvalues <- vector(mode="double", length=dim(d)[1]) + oddratio <- vector(mode="double", length=dim(d)[1]) + + for( i in 1:dim(d)[1] ) { + h <- fisher.test(matrix(c(d[i,10], d[i,11], d[i,12], d[i,13]), nrow=2)) + pvalues1[i] <- round(h$p.value, 5) + oddratio1[i] <- round(h$estimate, 5) + h <- fisher.test(matrix(c(d[i,28], d[i,29], d[i,30], d[i,31]), nrow=2)) + pvalues2[i] <- round(h$p.value, 5) + oddratio2[i] <- round(h$estimate, 5) + tref <- if ( d[i,8] - d[i,9] < 0 ) 0 else d[i,8] - d[i,9] + rref <- if ( d[i,26] - d[i,27] < 0 ) 0 else d[i,26] - d[i,27] + h <- fisher.test(matrix(c(d[i,9], tref, d[i,27], rref), nrow=2), alternative="greater") + pv <- h$p.value + od <- h$estimate + h <- fisher.test(matrix(c(d[i,9], tref, d[i,27], rref), nrow=2), alternative="less") + if ( h$p.value < pv ) { + pv <- h$p.value + od <- h$estimate + } + pvalues[i] <- round(pv, 5) + oddratio[i] <- round(od, 5) + } + curscipen <- getOption("scipen") + options(scipen=999) + write.table(data.frame(d[,1:25], pvalues1, oddratio1, d[,26:43], pvalues2, oddratio2, d[, 44:dim(d)[2]], pvalues, oddratio), file = "", quote = F, sep = "\t", eol = "\n", row.names=F, col.names=F) + options(scipen=curscipen) +} diff --git a/vardictjava_1.8.2/container/var2vcf_paired.pl b/vardictjava_1.8.2/container/var2vcf_paired.pl new file mode 100755 index 00000000..98799cf3 --- /dev/null +++ b/vardictjava_1.8.2/container/var2vcf_paired.pl @@ -0,0 +1,380 @@ +#!/usr/bin/env perl + +use warnings; +use Getopt::Std; +use strict; + +our ($opt_d, $opt_v, $opt_f, $opt_h, $opt_H, + $opt_p, $opt_q, $opt_F, $opt_S, $opt_Q, + $opt_o, $opt_C, $opt_M, $opt_P, $opt_N, + $opt_I, $opt_m, $opt_c, $opt_D, $opt_t, + $opt_r, $opt_O, $opt_X, $opt_k, $opt_V, + $opt_x, $opt_A, $opt_b, $opt_G); + +our $VERSION = "1.8.2"; + +getopts('htHSCMAd:v:f:p:q:F:Q:o:P:N:m:c:I:D:r:O:X:k:V:x:b:G:') || Usage(); +($opt_h || $opt_H) && Usage(); + +my $MinDepth = $opt_d ? $opt_d : 5; +my $VarDepth = $opt_v ? $opt_v : 3; +my $FREQ = defined($opt_f) ? $opt_f : 0.02; +my $PMEAN = defined($opt_p) ? $opt_p : 8; +my $QMEAN = defined($opt_q) ? $opt_q : 22.5; # base quality +my $MQMEAN = defined($opt_Q) ? $opt_Q : 0; # mapping quality +my $GTFREQ = defined($opt_F) ? $opt_F : 0.2; # Genotype frequency +my $SN = defined($opt_o) ? $opt_o : 1.5; # Signal to Noise +my $PVAL = defined($opt_P) ? $opt_P : 0.05; # the p-value from fisher test +my $DIFF = defined($opt_D) ? $opt_D : 0.2; +$opt_I = $opt_I ? $opt_I : 12; +$opt_m = $opt_m ? $opt_m : 5.25; +$opt_c = $opt_c ? $opt_c : 0; + +my %hash; +my $sample="tumor"; +while(<>) { + chomp; + next if (/R_HOME/); + my @a = split(/\t/); + $sample = $a[0]; + my $chr = $a[2]; + push( @{ $hash{ $chr }->{ $a[3] } }, \@a ); +} +my $samplem = "${sample}-match"; + +if ( $opt_N ) { + ($sample, $samplem) = split(/\|/, $opt_N); + $samplem = "${sample}-match" unless( $samplem ); +} +(my $sample_nowhitespace = $sample) =~ s/\s/_/g; + +print < +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= 1 indicates MSI"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= 0.01/5**vd2 (means the evidence in tumor sample might be weak, e.g. small diff in AF)"> +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= 0.05, thus not somatic"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= $PVAL"> +##FILTER= +##FILTER= +##FILTER== $opt_m, thus likely false positive"> +##FILTER= +##FILTER= +##FILTER= +##FILTER==14)"> +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +VCFHEADER + +print join("\t", "#CHROM", qw(POS ID REF ALT QUAL FILTER INFO FORMAT), $sample, $samplem), "\n"; + +# Exit if we don't have any variants to write +exit(0) unless( %hash ); + +my @chrs = reorder(keys %hash); +foreach my $chr (@chrs) { + my @pos = sort { $a <=> $b } (keys %{ $hash{ $chr } }); + my ($pds, $pde) = (0, 0); # previous Deletion variant's start and end + my ($pis, $pie) = (0, 0); # previous Insertion variant's start and end + my ($pvs, $pve) = (0, 0); # previous SNV variant's start and end + my ($pinfo1, $pfilter, $pinfo2) = ("", "", ""); + foreach my $p (@pos) { + my @tmp = sort { $b->[14] <=> $a->[14] } @{ $hash{ $chr }->{ $p } }; + my $ALL = $opt_A ? @tmp + 0 : 1; + my %seen = (); + for(my $i = 0; $i < $ALL; $i++) { + my $d = $tmp[$i]; # Only the highest AF get represented + my ($sample, $gene, $chrt, $start, $end, $ref, $alt, $dp1, $vd1, $rfwd1, $rrev1, $vfwd1, $vrev1, $gt1, $af1, $bias1, $pmean1, $pstd1, $qual1, $qstd1, $mapq1, $sn1, $hiaf1, $adjaf1, $nm1, $sbf1, $oddratio1, $dp2, $vd2, $rfwd2, $rrev2, $vfwd2, $vrev2, $gt2, $af2, $bias2, $pmean2, $pstd2, $qual2, $qstd2, $mapq2, $sn2, $hiaf2, $adjaf2, $nm2, $sbf2, $oddratio2, $shift3, $msi, $msilen, $lseq, $rseq, $seg, $status, $type, $sv1, $duprate1, $sv2, $duprate2, $pvalue, $oddratio) = @$d; + my $rd1 = $rfwd1 + $rrev1; + my $rd2 = $rfwd2 + $rrev2; + next unless ( $ref ); + next if ( $seen{ "$chrt-$start-$end-$ref-$alt" } ); + $seen{ "$chrt-$start-$end-$ref-$alt" } = 1; + unless ($type) { $type = "REF"; } + #$pvalue *= sqrt(60/($mapq1+length($ref)+length($alt)-1))*$af1; + my @filters = (); + my @filters2 = (); + if ( $oddratio eq "Inf" ) { + $oddratio = 0; + } + if ( $oddratio1 eq "Inf" ) { + $oddratio1 = 0; + } elsif ( $oddratio1 < 1 && $oddratio1 > 0 ) { + $oddratio1 = sprintf("%.2f", 1/$oddratio1); + } + if ( $oddratio2 eq "Inf" ) { + $oddratio2 = 0; + } elsif ( $oddratio2 < 1 && $oddratio2 > 0 ) { + $oddratio2 = sprintf("%.2f", 1/$oddratio2); + } + if ($dp1 < $MinDepth) { + push( @filters, "d$MinDepth") unless ( $status eq "StrongSomatic" && $pvalue < 0.15 && $af1*$vd1 >= 0.5); + } + if ($vd1 < $VarDepth) { + push( @filters, "v$VarDepth") unless ( $status eq "StrongSomatic" && $pvalue < 0.15 && $af1*$vd1 >= 0.5); + } + push(@filters2, "d$MinDepth") if ( $dp2 < $MinDepth ); + push(@filters2, "v$VarDepth") if ( $vd2 < $VarDepth ); + #if ( $status =~ /Somatic/ || $status =~ /SampleSpecific/ ) { + push( @filters, "f$FREQ") if ($af1 < $FREQ); + #push( @filters, "MAF0.05") if ($qual2 >= $QMEAN && $pmean2 >= $PMEAN && $mapq2 >= $MQMEAN && $sn2 >= $SN && $nm2 < $opt_m && $af2 > 0.05); + push( @filters, "p$PMEAN") if ($pmean1 < $PMEAN); + push( @filters, "pSTD") if ($pstd1 == 0 && $vd1 < $MinDepth); + push( @filters, "q$QMEAN") if ($qual1 < $QMEAN); + push( @filters, "Q$MQMEAN") if ($mapq1 < $MQMEAN); + push( @filters, "Q$MQMEAN") if ($mapq1 < 10 && $type eq "SNV"); # consider SNV somatic in low mapping quality region false positves + push( @filters, "SN$SN") if ($sn1 < $SN); + push( @filters, "NM$opt_m") if ($nm1 >= $opt_m); + #push( @filters, "Bias") if (($bias1 eq "2;1" || $bias1 eq "2;0") && $sbf1 < 0.01 && ($oddratio1 > 5 || $oddratio1 == 0)); + push( @filters, "Bias") if ($bias1 eq "2;1" && $sbf1 < 0.01 && ($oddratio1 > 5 || $oddratio1 == 0) && $end - $start < 100); + #} elsif ( $status =~ /LOH/ || $status =~ /Deletion/ ) { + push( @filters2, "f$FREQ") if ($af2 < $FREQ); + push( @filters2, "p$PMEAN") if ($pmean2 < $PMEAN); + push( @filters2, "pSTD") if ($pstd2 == 0 && $vd2 < $MinDepth); + push( @filters2, "q$QMEAN") if ($qual2 < $QMEAN); + push( @filters2, "Q$MQMEAN") if ($mapq2 < $MQMEAN); + push( @filters2, "SN$SN") if ($sn2 < $SN); + push( @filters2, "NM$opt_m") if ($nm2 >= $opt_m); + #push( @filters2, "Bias") if (($bias2 eq "2;1" || $bias2 eq "2;0") && $sbf2 < 0.01 && ($oddratio2 > 5 || $oddratio2 == 0)); + my %bias_filters = map { $_, 1 } @filters; + push( @filters, "Bias") if (!$bias_filters{ "Bias" } && $bias2 eq "2;1" && $sbf2 < 0.01 && ($oddratio2 > 5 || $oddratio2 == 0) && $end - $start < 100); + #} + # Require stringent statistics in regions with MSI + if ( ($msi > $opt_I && $msilen > 1) || ($msi > 12 && $msilen == 1)) { + push( @filters, "MSI$opt_I") unless( $status eq "StrongSomatic" && $pvalue < 0.0005 ); + } + if ( abs(length($ref)-length($alt)) == $msilen && !grep(/^MSI$opt_I/,@filters)) { + push( @filters, "MSI$opt_I") if ( ($msi > $opt_I && $msilen > 1 && $af1 < 0.35 && $af2 < 0.35) || ($msi > 12 && $msilen == 1 && $af1 < 0.35 && $af2 < 0.35) ); + } + my $p_likely = 0; + my $indel_likely = 0; + #push( @filters, "Bias") if (($a[15] eq "2;1" && $a[24] < 0.01) || ($a[15] eq "2;0" && $a[24] < 0.01) ); #|| ($a[9]+$a[10] > 0 && abs($a[9]/($a[9]+$a[10])-$a[11]/($a[11]+$a[12])) > 0.5)); + if ( $opt_M ) { + if ( $pvalue > $PVAL ) { + push(@filters, "P$PVAL") unless ($status eq "StrongSomatic" && (($pvalue < 0.25 && $af1 > 0.1 ) || ($pvalue < 0.5 && $af1 > 0.20) || ($pvalue < 0.15 && $af1 > 0.05))); + } elsif ( $status =~ /LikelySomatic/ && $pvalue > 0.05/5**$vd2 ) { # Increase the stringency for LikelySomatic + $p_likely = 1; + } elsif ( $status =~ /Likely/ && $type ne "SNV" ) { + $indel_likely = 1 unless(length($ref) <= 2 && length($alt) <= 2); + } + } + #if ( @filters == 0 && abs(length($ref)-length($alt)) == $msilen ) { + #push( @filters, "InGap" ) if ( $pds && $type eq "SNV" && $start <= $pde && $end >= $pds && $status =~ /Somatic/ ); + #push( @filters, "InIns" ) if ( $pis && $type eq "SNV" && $start <= $pie && $end >= $pis && $status =~ /Somatic/ ); + #push( @filters, "LongAT") if (isLongAT($lseq) || isLongAT($rseq)); + #} + #my $filter = @filters > 1 ? join(";", @filters) : (((@filters == 1 && ($filters[0] eq "P$PVAL" || $filters[0] eq "P0.01Likely" || $filters[0] eq "InDelLikely" || "DIFF$DIFF")) ? "PASS" : $filters[0]) :"PASS"); + my $filter = @filters > 0 ? join(";", @filters) : "PASS"; + + # Unless somatic only option (-M) is specified, any good variants in germline should be + # reported as well, regardless of the tumor sample + unless($opt_M) { + $filter = "PASS" if ( $filter ne "PASS" && @filters2 == 0 ); + } + my $gt = (1-$af1 < $GTFREQ) ? "1/1" : ($af1 >= 0.5 ? "1/0" : ($af1 >= $FREQ ? "0/1" : "0/0")); + my $gtm = (1-$af2 < $GTFREQ) ? "1/1" : ($af2 >= 0.5 ? "1/0" : ($af2 >= $FREQ ? "0/1" : "0/0")); + $bias1 =~ s/;/,/; + $bias2 =~ s/;/,/; + $bias1 = "0,0" if ($bias1 eq '0'); + $bias2 = "0,0" if ($bias2 eq '0'); + $mapq1 = sprintf '%.0f', $mapq1; + $mapq2 = sprintf '%.0f', $mapq2; + my $qual = $vd1 > $vd2 ? int(log($vd1)/log(2) * $qual1) : int(log($vd2)/log(2) * $qual2); + if ( $pfilter eq "PASS" && $pinfo2 =~ /Somatic/ && $pinfo2 =~ /TYPE=SNV/ && $filter eq "PASS" && $status =~ /Somatic/ && $type eq "SNV" && $start - $pvs < $opt_c ) { + $pfilter = "Cluster${opt_c}bp"; + $filter = "Cluster${opt_c}bp"; + } + if ( $pinfo1 ) { + #print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( ($opt_M && $pinfo2 !~ /Somatic/) || $opt_S && $pfilter ne "PASS" ); + print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( $opt_S && $pfilter ne "PASS" ); + } + ($pinfo1, $pfilter, $pinfo2) = (join("\t", $chr, $start, ".", $ref, $alt, $qual), $filter, + join("\t", join("","STATUS=$status;SAMPLE=$sample_nowhitespace;TYPE=$type;DP=$dp1;VD=$vd1;AF=$af1;SHIFT3=$shift3;MSI=$msi;MSILEN=$msilen;SSF=$pvalue;SOR=$oddratio;LSEQ=$lseq;RSEQ=$rseq", + $p_likely ? ";P0.01Likely" : "", $indel_likely ? ";InDelLikely" : ""), + "GT:DP:VD:ALD:RD:AD:AF:BIAS:PMEAN:PSTD:QUAL:QSTD:SBF:ODDRATIO:MQ:SN:HIAF:ADJAF:NM", + "$gt:$dp1:$vd1:$vfwd1,$vrev1:$rfwd1,$rrev1:$rd1,$vd1:$af1:$bias1:$pmean1:$pstd1:$qual1:$qstd1:$sbf1:$oddratio1:$mapq1:$sn1:$hiaf1:$adjaf1:$nm1", + "$gtm:$dp2:$vd2:$vfwd2,$vrev2:$rfwd2,$rrev2:$rd2,$vd2:$af2:$bias2:$pmean2:$pstd2:$qual2:$qstd2:$sbf2:$oddratio2:$mapq2:$sn2:$hiaf2:$adjaf2:$nm2")); + ($pds, $pde) = ($start+1, $end) if ($type eq "Deletion"); + ($pis, $pie) = ($start-1, $end+1) if ($type eq "Insertion"); + ($pvs, $pve) = ($start, $end) if ( $type eq "SNV" && $filter eq "PASS"); + } + } + if ( $pinfo1 ) { + print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( $opt_S && $pfilter ne "PASS" ); + } +} + +sub isLongAT { + my $seq = shift; + return 1 if ( $seq =~ /T{14,}/ ); + return 1 if ( $seq =~ /A{14,}/ ); + return 0; +} + +sub reorder { + my @chr = @_; + my @chrn = (); # numeric chromosomes + my @nonchrn = (); # non-numeric chrosomes + foreach my $c (@chr) { + if ( $c =~ /\d/ && $c !~ /_/) { + my $t = $c; + $t =~ s/\D//g; + push(@chrn, [$t, $c]); + } else { + next if ( $c eq "X" || $c eq "chrX" || $c eq "Y" || $c eq "chrY" ); + next if ( $c eq "MT" || $c eq "chrM" ); + push(@nonchrn, $c); + } + } + @chrn = sort { $a->[0] <=> $b->[0]; } @chrn; + @chr = map { $_->[1]; } @chrn; + if ( $hash{ X } ) { + push(@chr, 'X' ); + } elsif ( $hash{ chrX } ) { + push(@chr, 'chrX' ); + } + if ( $hash{ Y } ) { + push(@chr, 'Y' ); + } elsif ( $hash{ chrY } ) { + push(@chr, 'chrY' ); + } + if ( $hash{ MT } ) { + push(@chr, 'MT' ); + } elsif ( $hash{ chrM } ) { + push(@chr, 'chrM' ); + } + push ( @chr, @nonchrn ); + return (@chr); +} + +sub print_contigs +{ + my ($path) = @_; + if (not defined($path)) {return;} + + open(my $bed_file, "<", $path) + or return; + + while (my $line = <$bed_file>) + { + chomp $line; + my ($name, $start, $end) = split(/\t/, $line); + print "##contig=\n"; + } +} + +sub print_reference { + my $path = shift; + return unless defined($path); + print "##reference=$path\n"; +} + +sub Usage { +print < 12 non-monomer MSI will be considered false positive. For monomers, that number is 10. + -m int + The maximum mean mismatches allowed. Default: 5.25, or if a variant is supported by reads with more than 5.25 mismatches, it'll be considered + false positive. Mismatches don't includes indels in the alignment. + -N Name(s) + The sample name(s). If only one name is given, the matched will be simply names as "name-match". Two names + are given separated by "|", such as "tumor|blood". + -P float + The maximum p-value. Default to 0.05. + -p float + The minimum mean position of variants in the read. Default: 5. + -q float + The minimum mean base quality. Default to 22.5 for Illumina sequencing + -Q float + The minimum mapping quality. Default to 0 for Illumina sequencing + -d integer + The minimum total depth. Default to 5 + -v integer + The minimum variant depth. Default to 3 + -f float + The minimum allele frequency. Default to 0.02 + -o signal/noise + The minimum signal to noise, or the ratio of hi/(lo+0.5). Default to 1.5. Set it higher for deep sequencing. + -F float + The minimum allele frequency to consider to be homozygous. Default to 0.2. Thus frequency > 0.8 (1-0.2) will + be considered homozygous "1/1", between 0.5 - (1-0.2) will be "1/0", between (-f) - 0.5 will be "0/1", + below (-f) will be "0/0". + -b Path to the *.bed file which is used to generate contigs in the header + -G Path to the *.fasta (*.fa) file which is used to generate reference tag in the header + +AUTHOR + Written by Zhongwu Lai, AstraZeneca, Boston, USA + +REPORTING BUGS + Report bugs to zhongwu\@yahoo.com + +COPYRIGHT + This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. + +USAGE +exit(0); +} diff --git a/vardictjava_1.8.2/vardict_app.cwl b/vardictjava_1.8.2/vardict_app.cwl index db06bb00..a3f40fbf 100644 --- a/vardictjava_1.8.2/vardict_app.cwl +++ b/vardictjava_1.8.2/vardict_app.cwl @@ -24,7 +24,7 @@ requirements: coresMin: 4 ramMin: 64000 DockerRequirement: - dockerPull: ghcr.io/msk-access/vardictjava + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 inputs: diff --git a/vardictjava_1.8.2/vardictjava_1.8.2.cwl b/vardictjava_1.8.2/vardictjava_1.8.2.cwl deleted file mode 100644 index e24cb107..00000000 --- a/vardictjava_1.8.2/vardictjava_1.8.2.cwl +++ /dev/null @@ -1,246 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.0 -$namespaces: - dct: 'http://purl.org/dc/terms/' - doap: 'http://usefulinc.com/ns/doap#' - foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' -id: abra2_2_22 -baseCommand: - - java -inputs: - - id: memory_per_job - type: int? - doc: Memory per job in megabytes - - id: memory_overhead - type: int? - doc: Memory overhead per job in megabytes - - id: number_of_threads - type: int? - - id: input_bam - type: - - File - - type: array - items: File - inputBinding: - position: 0 - prefix: '--in' - doc: Required list of input sam or bam file (s) separated by comma - secondaryFiles: - - ^.bai - - id: working_directory - type: string? - doc: Set the temp directory (overrides java.io.tmpdir) - - id: reference_fasta - type: File - inputBinding: - position: 0 - prefix: '--ref' - doc: Genome reference location - secondaryFiles: - - .fai - - id: targets - type: File - inputBinding: - position: 0 - prefix: '--targets' - - id: kmer_size - type: string? - inputBinding: - position: 0 - prefix: '--kmer' - doc: >- - Optional assembly kmer size(delimit with commas if multiple sizes - specified) - - id: maximum_average_depth - type: int? - inputBinding: - position: 0 - prefix: '--mad' - doc: >- - Regions with average depth exceeding this value will be downsampled - (default: 1000) - - id: soft_clip_contig - type: string? - inputBinding: - position: 0 - prefix: '--sc' - doc: >- - Soft clip contig args - [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len] - (default:16,13,80,15) - - id: maximum_mixmatch_rate - type: float? - inputBinding: - position: 0 - prefix: '--mmr' - doc: >- - Max allowed mismatch rate when mapping reads back to contigs (default: - 0.05) - - id: scoring_gap_alignments - type: string? - inputBinding: - position: 0 - prefix: '--sga' - doc: >- - Scoring used for contig alignments(match, - mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1) - - id: contig_anchor - type: string? - inputBinding: - position: 0 - prefix: '--ca' - doc: >- - Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge] - (default:10,2) - - id: window_size - type: string? - inputBinding: - position: 0 - prefix: '--ws' - doc: |- - Processing window size and overlap - (size,overlap) (default: 400,200) - - id: consensus_sequence - type: boolean? - inputBinding: - position: 0 - prefix: '--cons' - doc: Use positional consensus sequence when aligning high quality soft clipping - - id: output_bams - type: - - string - - type: array - items: string - inputBinding: - position: 0 - prefix: '--out' - doc: Required list of output sam or bam file (s) separated by comma - - id: ignore_bad_assembly - type: boolean? - inputBinding: - position: 0 - prefix: '--ignore-bad-assembly' - doc: Use this option to avoid parsing errors for corrupted assemblies - - id: bam_index - type: boolean? - inputBinding: - position: 0 - prefix: '--index' - doc: >- - Enable BAM index generation when outputting sorted alignments (may require - additonal memory) - - id: input_vcf - type: File? - inputBinding: - position: 0 - prefix: '--in-vcf' - doc: >- - VCF containing known (or suspected) variant sites. Very large files - should be avoided. - - id: no_edge_complex_indel - type: boolean? - inputBinding: - position: 0 - prefix: '--no-edge-ci' - doc: Prevent output of complex indels at read start or read end - - id: no_sort - type: boolean? - inputBinding: - position: 0 - prefix: '--nosort' - doc: Do not attempt to sort final output -outputs: - - id: abra_realigned_bam - type: - - 'null' - - File - - type: array - items: File - outputBinding: - glob: |- - ${ - return inputs.output_bams - } - secondaryFiles: - - ^.bai -label: abra2_2.22 -arguments: - - position: 0 - valueFrom: |- - ${ - if (inputs.memory_per_job && inputs.memory_overhead) { - - if (inputs.memory_per_job % 1000 == 0) { - - return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" - } - else { - - return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" - } - } - else if (inputs.memory_per_job && !inputs.memory_overhead) { - - if (inputs.memory_per_job % 1000 == 0) { - - return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" - } - else { - - return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" - } - } - else if (!inputs.memory_per_job && inputs.memory_overhead) { - - return "-Xmx20G" - } - else { - - return "-Xmx20G" - } - } - - position: 0 - prefix: '-jar' - valueFrom: /usr/local/bin/abra2.jar - - position: 0 - prefix: '--threads' - valueFrom: |- - ${ - if(inputs.number_of_threads) - return inputs.number_of_threads - return runtime.cores - } - - position: 0 - prefix: '--tmpdir' - valueFrom: |- - ${ - if(inputs.working_directory) - return inputs.working_directory; - return runtime.tmpdir - } -requirements: - - class: ResourceRequirement - ramMin: 60000 - coresMin: 16 - - class: DockerRequirement - dockerPull: 'ghcr.io/msk-access/abra2:2.22' - - class: InlineJavascriptRequirement -'dct:contributor': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:kumarn1@mskcc.org' - 'foaf:name': Nikhil Kumar - 'foaf:name': Memorial Sloan Kettering Cancer Center -'dct:creator': - - class: 'foaf:Organization' - 'foaf:member': - - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah - 'foaf:name': Memorial Sloan Kettering Cancer Center -'doap:release': - - class: 'doap:Version' - 'doap:name': abra2 - 'doap:revision': 2.22 From 15cff4e5b07cc9f258cf3a9bf37cbd858cd1fc65 Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 24 Jan 2022 15:25:00 -0500 Subject: [PATCH 429/476] corrected a typo and tested --- vardictjava_1.8.2/example_inputs.yaml | 50 ++- .../{testsomatic.cwl => teststrandbias.cwl} | 10 +- vardictjava_1.8.2/var_to_vcf.cwl | 113 +----- vardictjava_1.8.2/vardict_app.cwl | 362 +----------------- ...cwl => vardict_workflow_single_sample.cwl} | 145 +------ 5 files changed, 82 insertions(+), 598 deletions(-) rename vardictjava_1.8.2/{testsomatic.cwl => teststrandbias.cwl} (65%) rename vardictjava_1.8.2/{vardict.cwl => vardict_workflow_single_sample.cwl} (52%) diff --git a/vardictjava_1.8.2/example_inputs.yaml b/vardictjava_1.8.2/example_inputs.yaml index 3ba11c9e..c2af136f 100644 --- a/vardictjava_1.8.2/example_inputs.yaml +++ b/vardictjava_1.8.2/example_inputs.yaml @@ -1,18 +1,52 @@ G: class: File - path: "b37.fasta" + path: "/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta" b: class: File - path: "/path/to/normal/bam" -b2: - class: File - path: "/path/to/tumor/bam" + path: "/work/ci/vurals/vardict_resources/Myeloid200-1-05500HJ_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam" bedfile: class: File - path: "/path/to/bed/file" + path: "/work/ci/vurals/vardict_resources/picard_baits.bed" c: "1" +f: "0" S: "2" E: "3" -g: "4" +g: "5" th: "4" -vcf: "output.vcf" +vcf: "test.vcf" +N: "testMyeloidtest" +f_1: "0" +# N: "tumor_sample_name" +# N2: "normal_sample_name" + +# +# bam_index: true +# no_edge_complex_indel: true +# consensus_sequence: +# contig_anchor: +# ignore_bad_assembly: +# input_bam: +# class: File +# path: "path/to/alignment.bam" +# input_vcf: +# kmer_size: +# maximum_average_depth: +# maximum_mixmatch_rate: +# memory_overhead: +# memory_per_job: +# no_sort: +# number_of_threads: +# output_bam: name_of_realigned_abra.bam +# path_to_abra: +# reference_fasta: +# class: File +# path: "/path/to/reference.fasta" +# scoring_gap_alignments: +# soft_clip_contig: +# targets: +# class: File +# metadata: {} +# path: "/path/to/target.bed" +# secondaryFiles: [] +# window_size: +# working_directory: diff --git a/vardictjava_1.8.2/testsomatic.cwl b/vardictjava_1.8.2/teststrandbias.cwl similarity index 65% rename from vardictjava_1.8.2/testsomatic.cwl rename to vardictjava_1.8.2/teststrandbias.cwl index 02b00c0f..22e602ba 100644 --- a/vardictjava_1.8.2/testsomatic.cwl +++ b/vardictjava_1.8.2/teststrandbias.cwl @@ -3,8 +3,8 @@ cwlVersion: v1.0 class: CommandLineTool baseCommand: - Rscript -- /usr/bin/vardict/testsomatic.R -id: testsomatic +- /usr/bin/vardict/bin/teststrandbias.R +id: teststrandbias requirements: InlineJavascriptRequirement: {} @@ -12,7 +12,7 @@ requirements: coresMin: 2 ramMin: 12000 DockerRequirement: - dockerPull: mskcc/roslin-variant-vardict:1.5.1 + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 inputs: @@ -22,7 +22,7 @@ outputs: output_var: type: File? outputBinding: - glob: output_testsomatic.var + glob: output_teststrandbias.var stdin: $(inputs.input_vardict.path) -stdout: output_testsomatic.var +stdout: output_teststrandbias.var diff --git a/vardictjava_1.8.2/var_to_vcf.cwl b/vardictjava_1.8.2/var_to_vcf.cwl index 670fcfca..eacfc439 100644 --- a/vardictjava_1.8.2/var_to_vcf.cwl +++ b/vardictjava_1.8.2/var_to_vcf.cwl @@ -3,7 +3,7 @@ cwlVersion: v1.0 class: CommandLineTool baseCommand: - perl -- /usr/bin/vardict/var2vcf_paired.pl +- /usr/bin/vardict/bin/var2vcf_valid.pl id: vardict_var2vcf arguments: @@ -17,72 +17,21 @@ requirements: coresMin: 4 ramMin: 32000 DockerRequirement: - dockerPull: mskcc/roslin-variant-vardict:1.5.1 + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 inputs: - C: - type: boolean? - doc: Indicate the chromosome names are just numbers, such as 1, 2, not chr1, chr2 - inputBinding: - position: 0 - prefix: -C - - D: - type: float? - doc: Debug mode. Will print some error messages and append full genotype at the end. - inputBinding: - position: 0 - prefix: -D - F: - type: float? - doc: The hexical to filter reads using samtools. Default - 0x500 (filter 2nd alignments and duplicates). Use -F 0 to turn it off. - inputBinding: - position: 0 - prefix: -F - - I: - type: int? - doc: The indel size. Default - 120bp - inputBinding: - position: 0 - prefix: -I - - M: + S: type: boolean? - doc: The minimum matches for a read to be considered. If, after soft-clipping, the matched bp is less than INT, then the read is discarded. It's meant for PCR based targeted sequencing where there's no insert and the matching is only the primers. Default - 0, or no filtering + doc: If set variants that didnt pass filters will not be present in VCF file. inputBinding: position: 0 - prefix: -M + prefix: -S N: type: string? doc: Tumor Sample Name - N2: - type: string? - doc: Normal Sample Name - - P: - type: float? - doc: The read position filter. If the mean variants position is less that specified, it's considered false positive. Default - 5 - inputBinding: - position: 0 - prefix: -P - - Q: - type: string? - doc: If set, reads with mapping quality less than INT will be filtered and ignored - inputBinding: - position: 0 - prefix: -Q - - S: - type: boolean? - doc: The column for region start, e.g. gene start - inputBinding: - position: 0 - prefix: -S f: type: string? doc: The threshold for allele frequency, default - 0.05 or 5%% @@ -90,60 +39,10 @@ inputs: position: 0 prefix: -f - m: - type: int? - doc: If set, reads with mismatches more than INT will be filtered and ignored. Gaps are not counted as mismatches. Valid only for bowtie2/TopHat or BWA aln followed by sampe. BWA mem is calculated as NM - Indels. Default - 8, or reads with more than 8 mismatches will not be used. - inputBinding: - position: 0 - prefix: -m - o: - type: float? - doc: The Qratio of (good_quality_reads)/(bad_quality_reads+0.5). The quality is defined by -q option. Default - 1.5 - inputBinding: - position: 0 - prefix: -o - - p: - doc: Do pileup regarless the frequency - type: float? - inputBinding: - position: 0 - prefix: -p - vcf: type: string doc: output vcf file - A: - type: boolean? - inputBinding: - position: 0 - prefix: -A - - c: - type: int? - inputBinding: - position: 0 - prefix: -c - - q: - type: float? - inputBinding: - position: 0 - prefix: -q - - d: - type: int? - inputBinding: - position: 0 - prefix: -d - - v: - type: int? - inputBinding: - position: 0 - prefix: -v - input_vcf: type: File? @@ -154,4 +53,4 @@ outputs: glob: ${ return inputs.vcf; } stdin: $(inputs.input_vcf.path) -stdout: ${ return inputs.vcf; } \ No newline at end of file +stdout: ${ return inputs.vcf; } diff --git a/vardictjava_1.8.2/vardict_app.cwl b/vardictjava_1.8.2/vardict_app.cwl index a3f40fbf..d40ff33f 100644 --- a/vardictjava_1.8.2/vardict_app.cwl +++ b/vardictjava_1.8.2/vardict_app.cwl @@ -8,11 +8,11 @@ id: vardict arguments: - position: 1 prefix: -b - valueFrom: "${\n return inputs.b.path + \"|\" + inputs.b2.path;\n}" -- position: 0 - prefix: -N - valueFrom: "${\n if (inputs.N2)\n return [inputs.N, inputs.N2];\n else\n\ - \ return inputs.N;\n}" + valueFrom: "${\n return inputs.b.path;\n}" +# - position: 0 +# prefix: -N +# valueFrom: "${\n if (inputs.N2)\n return [inputs.N, inputs.N2];\n else\n\ +# \ return inputs.N;\n}" requirements: @@ -27,42 +27,6 @@ requirements: dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 inputs: - - B: - type: int? - doc: The minimum # of reads to determine strand bias, default: 2 - inputBinding: - position: 0 - prefix: -B - - C: - type: boolean? - doc: Indicate the chromosome names are just numbers, such as 1, 2, not chr1, chr2 (deprecated!!!) - inputBinding: - position: 0 - prefix: -C - - D: - type: boolean? - doc: Debug mode. Will print some error messages and append full genotype at the end. - inputBinding: - position: 0 - prefix: -D - - E: - type: string? - doc: The column for region end, e.g. gene end - inputBinding: - position: 0 - prefix: -E - - F: - type: boolean? - doc: The hexical to filter reads using samtools. Default - 0x500 (filter 2nd alignments and duplicates). Use -F 0 to turn it off. - inputBinding: - position: 0 - prefix: -F - G: type: File doc: The reference fasta. Should be indexed (.fai) @@ -72,178 +36,23 @@ inputs: position: 0 prefix: -G - H: - type: boolean? - doc: Print this help page - inputBinding: - position: 0 - prefix: -H - - I: - type: string? - doc: #The indel size. Default: 50bp. Be cautious with -I option, especially in the amplicon mode, as amplicon sequencing is not a way to find large indels. Increasing the search size might be slow and false positives may appear in low complexity regions. Increasing it to 200-300 bp is only recommend for hybrid capture sequencing. - inputBinding: - position: 0 - prefix: -I - - J: - type: boolean? - doc: The genomic position that CRISPR/Cas9 suppose to cut, typically 3bp from the PAM NGG site and within the guide. For CRISPR mode only. It will adjust the variants (mostly In-Del) start and end sites to as close to this location as possible, if there are alternatives. The option should only be used for CRISPR mode. - inputBinding: - position: 0 - prefix: -J - - K: - type: boolean? - doc: Include Ns in the total depth calculation - inputBinding: - position: 0 - prefix: -K - - L: - type: boolean? - doc: The minimum structural variant length to be presented using , etc. Default 1000. Any indel, complex variants less than this will be spelled out with exact nucleotides - inputBinding: - position: 0 - prefix: -L - - M: + f: type: string? - doc: The minimum matches for a read to be considered. If, after soft-clipping, the matched bp is less than INT, then the read is discarded. It's meant for PCR based targeted sequencing where there's no insert and the matching is only the primers. Default - 0, or no filtering + doc: The threshold for allele frequency, default - 0.01 or 1%% inputBinding: position: 0 - prefix: -M + prefix: -f N: type: string? doc: Tumor Sample Name - N2: - type: string? - doc: Normal Sample Name - - O: - type: string? - doc: The reads should have at least mean MapQ to be considered a valid variant. Default - no filtering - inputBinding: - position: 0 - prefix: -O - - P: - type: string? - doc: The read position filter. If the mean variants position is less that specified, its considered false positive. Default 5 - inputBinding: - position: 0 - prefix: -P - - Q: - type: string? - doc: If set, reads with mapping quality less than INT will be filtered and ignored - inputBinding: - position: 0 - prefix: -Q - - R: - type: string? - doc: The region of interest. In the format of chr -start-end. If end is omitted, then a single position. No BED is needed. - inputBinding: - position: 0 - prefix: -R - - S: - type: string? - doc: The column for region start, e.g. gene start - inputBinding: - position: 0 - prefix: -S - - T: - type: string? - doc: Trim bases after [INT] bases in the reads - inputBinding: - position: 0 - prefix: -T - - U: - type: string? - doc: Turn off structural variant calling - inputBinding: - position: 0 - prefix: -U - - UN: - type: string? - doc: Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using first read only. Default unique mode disabled, all reads are counted. - inputBinding: - position: 0 - prefix: -UN - - V: - type: string? - doc: The lowest frequency in normal sample allowed for a putative somatic mutations. Default to 0.05 - inputBinding: - position: 0 - prefix: -V - - VS: - type: string? - doc: How strict to be when reading a SAM or BAM. STRICT - throw an exception if something looks wrong. LENIENT - Emit warnings but keep going if possible. SILENT - Like LENIENT, only dont emit warning messages. Default - LENIENT - inputBinding: - position: 0 - prefix: -VS - - W: - type: string? - doc: The insert size STD. Used for SV calling. Default 100 - inputBinding: - position: 0 - prefix: -W - - X: - type: string? - doc: Extension of bp to look for mismatches after insersion or deletion. Default to 2 bp, or only calls when they are within 2 bp. - inputBinding: - position: 0 - prefix: -X - - Y: - type: string? - doc: Extension of bp of reference to build lookup table. Default to 1200 bp. Increasing the number will slow down the program. The main purpose is to call large indels with 1000 bp that can be missed by discordant mate pairs. - inputBinding: - position: 0 - prefix: -Y - - Z: - type: string? - doc: For downsampling fraction. e.g. 0.7 means roughly 70%% downsampling. Default - No downsampling. Use with caution. The downsampling will be random and non-reproducible. - inputBinding: - position: 0 - prefix: -Z - - a: - type: string? - doc: Indicate its amplicon based calling. Reads dont map to the amplicon will be skipped. A read pair is considered belonging the amplicon if the edges are less than int bp to the amplicon, and overlap fraction is at least float. Default 10 0.95 - inputBinding: - position: 0 - prefix: -a - b: type: File? secondaryFiles: - .bai doc: Tumor bam - b2: - type: File? - secondaryFiles: - - .bai - doc: Normal bam - - bedfile: - type: File? - inputBinding: - position: 1 - c: type: string? doc: The column for chromosome @@ -251,26 +60,19 @@ inputs: position: 0 prefix: -c - d: - type: string? - doc: The delimiter for split region_info, default to tab "\t" - inputBinding: - position: 0 - prefix: -d - - e: + S: type: string? - doc: The column for segment ends in the region, e.g. exon ends + doc: The column for region start, e.g. gene start inputBinding: position: 0 - prefix: -e + prefix: -S - f: + E: type: string? - doc: The threshold for allele frequency, default - 0.01 or 1%% + doc: The column for region end, e.g. gene end inputBinding: position: 0 - prefix: -f + prefix: -E g: type: string? @@ -279,139 +81,11 @@ inputs: position: 0 prefix: -g - h: - type: boolean? - doc: Print a header row decribing columns - inputBinding: - position: 0 - prefix: -h - - i: - type: boolean? - doc: Output splicing read counts - inputBinding: - position: 0 - prefix: -i - - j: - type: boolean? - doc: CRISPR_filtering_bp In CRISPR mode, the minimum amount in bp that a read needs to overlap with cutting site. If a read does not meet the criteria, it will not be used for variant calling, since it is likely just a partially amplified PCR. Default not set, or no filtering - inputBinding: - position: 0 - prefix: -j - - - k: - type: string? - doc: Indicate whether to perform local realignment. Default - 1. Set to 0 to disable it. For Ion or PacBio, 0 is recommended. - inputBinding: - position: 0 - prefix: -k - - m: - type: string? - doc: If set, reads with mismatches more than INT will be filtered and ignored. Gaps are not counted as mismatches. Valid only for bowtie2/TopHat or BWA aln followed by sampe. BWA mem is calculated as NM - Indels. Default - 8, or reads with more than 8 mismatches will not be used. - inputBinding: - position: 0 - prefix: -m - - n: - type: string? - doc: The regular expression to extract sample name from bam filenames. Default to - /([^\/\._]+?)_[^\/]*.bam/ - inputBinding: - position: 0 - prefix: -n - - o: - type: string? - doc: The Qratio of (good_quality_reads)/(bad_quality_reads+0.5). The quality is defined by -q option. Default - 1.5 - inputBinding: - position: 0 - prefix: -o - - p: - type: boolean? - doc: Do pileup regarless the frequency - inputBinding: - position: 0 - prefix: -p - - q: - type: string? - doc: The phred score for a base to be considered a good call. Default - 22.5 (for Illumina) For PGM, set it to ~15, as PGM tends to under estimate base quality. - inputBinding: - position: 0 - prefix: -q - - r: - type: string? - doc: The minimum # of variance reads, default: 2 - inputBinding: - position: 0 - prefix: -r - - s: - type: string? - doc: The column for segment starts in the region, e.g. exon starts - inputBinding: - position: 0 - prefix: -s - - t: - type: boolean? - doc: Indicate to remove duplicated reads. Only one pair with same start positions will be kept - inputBinding: - position: 0 - prefix: -t - - th: - type: string? - doc: Threads count. - inputBinding: - position: 0 - prefix: -th - - three: - type: boolean? - doc: Indicate to move indels to 3-prime if alternative alignment can be achieved. - inputBinding: - position: 0 - prefix: '-3' - - u: - type: boolean? - doc: Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only. Default unique mode disabled, all reads are counted. - inputBinding: - position: 0 - prefix: -u - - w: - type: string? - doc: The insert size. Used for SV calling. Default 300 - inputBinding: - position: 0 - prefix: -w - - x: - type: string? - doc: The number of nucleotide to extend for each segment, default 0 - inputBinding: - position: 0 - prefix: -x - - y: - type: string? - doc: Verbose - inputBinding: - position: 0 - prefix: -y - - z: - type: string? - doc: #Indicate whether the BED file contains zero-based coordinates, the same way as the Genome browser IGV does. -z 1 indicates that coordinates in a BED file start from 0. -z 0 indicates that the coordinates start from 1. Default 1 for a BED file or amplicon BED file (0-based). Use 0 to turn it off. When using -R option, it is set to 0 + bedfile: + type: File? inputBinding: - position: 0 - prefix: -z + position: 1 +#####~~~~~~~~~~#####~~~~~~~~~~~~#####~~~~~~~~~~######~~~~~~~~###### outputs: output: diff --git a/vardictjava_1.8.2/vardict.cwl b/vardictjava_1.8.2/vardict_workflow_single_sample.cwl similarity index 52% rename from vardictjava_1.8.2/vardict.cwl rename to vardictjava_1.8.2/vardict_workflow_single_sample.cwl index a7b58f32..685aadc8 100644 --- a/vardictjava_1.8.2/vardict.cwl +++ b/vardictjava_1.8.2/vardict_workflow_single_sample.cwl @@ -17,116 +17,30 @@ requirements: inputs: bedfile: type: File? - B: - type: int? - C: - type: boolean? - D: - type: boolean? - E: - type: string? - F: - type: boolean? + G: type: File secondaryFiles: ['.fai'] - H: - type: boolean? - I: - type: string? - J: - type: boolean? - K: - type: boolean? - L: - type: string? - M: + f: type: string? N: type: string? - N2: - type: string? - O: - type: string? - P: - type: string? - Q: - type: string? - R: - type: string? - S: - type: string? - T: - type: string? - U: - type: string? - UN: - type: string? - V: - type: string? - VS: - type: string? - W: - type: string? - X: - type: string? - Y: - type: string? - Z: - type: string? - - a: - type: string? - b2: - type: File? - secondaryFiles: ['.bai'] b: type: File? secondaryFiles: ['.bai'] c: type: string? - d: - type: string? - e: + S: type: string? - f: + E: type: string? g: type: string? - hh: - type: boolean? - i: - type: boolean? - j: - type: string? - k: - type: string? - m: - type: string? - n: - type: string? - o: - type: string? - p: - type: boolean? - q: - type: string? - r: - type: string? - s: - type: string? - t: - type: boolean? - th: - type: string? - x: - type: string? - z: - type: string? vcf: type: string? f_1: type: string? + outputs: output: type: File @@ -135,54 +49,18 @@ steps: vardict: run: ./vardict_app.cwl in: - B: B - C: C - D: D - E: E - F: F G: G - H: H - I: I - M: M + f: f N: N - O: O - P: P - Q: Q - R: R S: S - T: T - V: V - VS: VS - X: X - Z: Z - a: a + c: c b: b - b2: b2 bedfile: bedfile - c: c - d: d - e: e - f: f + E: E g: g - h: hh - i: i - k: k - m: m - n: n - o: o - p: p - q: q - r: r - t: t - th: th - v: - valueFrom: ${ return inputs.vcf.replace(".vcf", "_tmp.vcf") } - vcf: vcf - x: x - z: z out: [output] - testsomatic: - run: ./testsomatic.cwl + teststrandbias: + run: ./teststrandbias.cwl in: input_vardict: vardict/output out: [output_var] @@ -190,10 +68,9 @@ steps: run: ./var_to_vcf.cwl in: N: N - N2: N2 f: f_1 vcf: vcf - input_vcf: testsomatic/output_var + input_vcf: teststrandbias/output_var out: [output] 'dct:contributor': From a1c341d7ecf0ffd17ba5e1ef6b3bf31dd0b7ce90 Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 24 Jan 2022 15:51:08 -0500 Subject: [PATCH 430/476] made yaml file generic --- vardictjava_1.8.2/example_inputs.yaml | 44 +++------------------------ vardictjava_1.8.2/run_command_cwl.sh | 8 +++++ 2 files changed, 13 insertions(+), 39 deletions(-) create mode 100755 vardictjava_1.8.2/run_command_cwl.sh diff --git a/vardictjava_1.8.2/example_inputs.yaml b/vardictjava_1.8.2/example_inputs.yaml index c2af136f..81f4d3b0 100644 --- a/vardictjava_1.8.2/example_inputs.yaml +++ b/vardictjava_1.8.2/example_inputs.yaml @@ -1,52 +1,18 @@ G: class: File - path: "/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta" + path: "/path/to/ref/file" b: class: File - path: "/work/ci/vurals/vardict_resources/Myeloid200-1-05500HJ_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam" + path: "/path/to/bam/file" bedfile: class: File - path: "/work/ci/vurals/vardict_resources/picard_baits.bed" + path: "/path/to/bed/file" c: "1" f: "0" S: "2" E: "3" g: "5" th: "4" -vcf: "test.vcf" -N: "testMyeloidtest" +vcf: "output.vcf" +N: "name" f_1: "0" -# N: "tumor_sample_name" -# N2: "normal_sample_name" - -# -# bam_index: true -# no_edge_complex_indel: true -# consensus_sequence: -# contig_anchor: -# ignore_bad_assembly: -# input_bam: -# class: File -# path: "path/to/alignment.bam" -# input_vcf: -# kmer_size: -# maximum_average_depth: -# maximum_mixmatch_rate: -# memory_overhead: -# memory_per_job: -# no_sort: -# number_of_threads: -# output_bam: name_of_realigned_abra.bam -# path_to_abra: -# reference_fasta: -# class: File -# path: "/path/to/reference.fasta" -# scoring_gap_alignments: -# soft_clip_contig: -# targets: -# class: File -# metadata: {} -# path: "/path/to/target.bed" -# secondaryFiles: [] -# window_size: -# working_directory: diff --git a/vardictjava_1.8.2/run_command_cwl.sh b/vardictjava_1.8.2/run_command_cwl.sh new file mode 100755 index 00000000..0a244c2c --- /dev/null +++ b/vardictjava_1.8.2/run_command_cwl.sh @@ -0,0 +1,8 @@ +module load singularity/3.7.1 +module load cwl/cwltool +cwltool \ + --cachedir /work/ci/vurals/access_related/vardictjava_1.8.2-2/tmp/cache \ + --outdir /work/ci/vurals/access_related/vardictjava_1.8.2-2/myoutput \ + --tmpdir-prefix /work/ci/vurals/access_related/vardictjava_1.8.2-2/tmp/tmp \ + --singularity \ + --non-strict vardict_workflow_single_sample.cwl example_inputs.yaml From 4d9dc84f2d97981f7273192abb94a17a6a9f2a6d Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 24 Jan 2022 17:06:11 -0500 Subject: [PATCH 431/476] deleted extra stuff and testing lines Docker file --- vardictjava_1.8.2/container/Dockerfile | 10 +- vardictjava_1.8.2/container/run_test.sh | 20 - vardictjava_1.8.2/container/testsomatic.R | 53 --- vardictjava_1.8.2/container/var2vcf_paired.pl | 380 ------------------ vardictjava_1.8.2/run_command_cwl.sh | 8 - 5 files changed, 1 insertion(+), 470 deletions(-) delete mode 100755 vardictjava_1.8.2/container/run_test.sh delete mode 100755 vardictjava_1.8.2/container/testsomatic.R delete mode 100755 vardictjava_1.8.2/container/var2vcf_paired.pl delete mode 100755 vardictjava_1.8.2/run_command_cwl.sh diff --git a/vardictjava_1.8.2/container/Dockerfile b/vardictjava_1.8.2/container/Dockerfile index ddaf5599..cd6c9371 100644 --- a/vardictjava_1.8.2/container/Dockerfile +++ b/vardictjava_1.8.2/container/Dockerfile @@ -12,9 +12,6 @@ LABEL maintainer="Nikhil Kumar (kumarn1@mskcc.org)" \ ENV VARDICT_VERSION 1.8.2 -COPY runscript.sh /usr/bin/runscript.sh -COPY run_test.sh /run_test.sh - RUN apk add --update \ && apk add ca-certificates openssl bash perl \ && apk add openjdk8-jre-base \ @@ -22,9 +19,4 @@ RUN apk add --update \ && cd /tmp && wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v${VARDICT_VERSION}/VarDict-${VARDICT_VERSION}.zip \ && unzip VarDict-${VARDICT_VERSION}.zip \ && mv /tmp/VarDict-${VARDICT_VERSION} /usr/bin/vardict \ - && rm -rf /var/cache/apk/* /tmp/* \ - && chmod +x /usr/bin/runscript.sh \ - && exec /run_test.sh - -COPY testsomatic.R /usr/bin/vardict/ -COPY var2vcf_paired.pl /usr/bin/vardict/ + && rm -rf /var/cache/apk/* /tmp/* diff --git a/vardictjava_1.8.2/container/run_test.sh b/vardictjava_1.8.2/container/run_test.sh deleted file mode 100755 index efed55b4..00000000 --- a/vardictjava_1.8.2/container/run_test.sh +++ /dev/null @@ -1,20 +0,0 @@ -# get actual output of the tool -actual=$(exec /usr/bin/runscript.sh vardict | head -1) - -# expected output -expected=$(cat << EOM -usage: vardict [-n name_reg] [-b bam] [-c chr] [-S start] [-E end] [-s seg_starts] [-e seg_ends] [-x #_nu] [-g gene] [-f freq] [-r #_reads] -EOM -) - -expected_no_space=$(echo $expected | tr -d "[:space:]") -actual_no_space=$(echo $actual | tr -d "[:space:]") -# diff -if [ "$actual_no_space" != "$expected_no_space" ] -then - echo "-----expected-----" - echo $expected - echo "-----actual-----" - echo $actual - exit 1 -fi \ No newline at end of file diff --git a/vardictjava_1.8.2/container/testsomatic.R b/vardictjava_1.8.2/container/testsomatic.R deleted file mode 100755 index cc6dc092..00000000 --- a/vardictjava_1.8.2/container/testsomatic.R +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env Rscript - -#args <- commandArgs(trailingOnly = TRUE) - -myfile = file("stdin") -open(myfile, blocking=TRUE) -myinput = readLines(myfile) # read from stdin -if (length(myinput) > 0 ){ - mynumcols = sapply(gregexpr("\\t", myinput[1]), length) + 1 # count num of tabs + 1 -}else{ - mynumcols = 0 - d = matrix(0,0,0) -} - -if (mynumcols >= 48) { - d <- read.table( textConnection(myinput), sep = "\t", header = F, colClasses=c("character", NA, NA, NA, NA, "character", "character", NA, NA, NA, NA, NA, NA, "character", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "character", NA, "character", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "character", "character", "character", "character")) -} else if (mynumcols > 0){ - stop("Incorrect input detected in testsomatic.R") -} - -if (nrow(d) > 0){ - pvalues1 <- vector(mode="double", length=dim(d)[1]) - oddratio1 <- vector(mode="double", length=dim(d)[1]) - pvalues2 <- vector(mode="double", length=dim(d)[1]) - oddratio2 <- vector(mode="double", length=dim(d)[1]) - pvalues <- vector(mode="double", length=dim(d)[1]) - oddratio <- vector(mode="double", length=dim(d)[1]) - - for( i in 1:dim(d)[1] ) { - h <- fisher.test(matrix(c(d[i,10], d[i,11], d[i,12], d[i,13]), nrow=2)) - pvalues1[i] <- round(h$p.value, 5) - oddratio1[i] <- round(h$estimate, 5) - h <- fisher.test(matrix(c(d[i,28], d[i,29], d[i,30], d[i,31]), nrow=2)) - pvalues2[i] <- round(h$p.value, 5) - oddratio2[i] <- round(h$estimate, 5) - tref <- if ( d[i,8] - d[i,9] < 0 ) 0 else d[i,8] - d[i,9] - rref <- if ( d[i,26] - d[i,27] < 0 ) 0 else d[i,26] - d[i,27] - h <- fisher.test(matrix(c(d[i,9], tref, d[i,27], rref), nrow=2), alternative="greater") - pv <- h$p.value - od <- h$estimate - h <- fisher.test(matrix(c(d[i,9], tref, d[i,27], rref), nrow=2), alternative="less") - if ( h$p.value < pv ) { - pv <- h$p.value - od <- h$estimate - } - pvalues[i] <- round(pv, 5) - oddratio[i] <- round(od, 5) - } - curscipen <- getOption("scipen") - options(scipen=999) - write.table(data.frame(d[,1:25], pvalues1, oddratio1, d[,26:43], pvalues2, oddratio2, d[, 44:dim(d)[2]], pvalues, oddratio), file = "", quote = F, sep = "\t", eol = "\n", row.names=F, col.names=F) - options(scipen=curscipen) -} diff --git a/vardictjava_1.8.2/container/var2vcf_paired.pl b/vardictjava_1.8.2/container/var2vcf_paired.pl deleted file mode 100755 index 98799cf3..00000000 --- a/vardictjava_1.8.2/container/var2vcf_paired.pl +++ /dev/null @@ -1,380 +0,0 @@ -#!/usr/bin/env perl - -use warnings; -use Getopt::Std; -use strict; - -our ($opt_d, $opt_v, $opt_f, $opt_h, $opt_H, - $opt_p, $opt_q, $opt_F, $opt_S, $opt_Q, - $opt_o, $opt_C, $opt_M, $opt_P, $opt_N, - $opt_I, $opt_m, $opt_c, $opt_D, $opt_t, - $opt_r, $opt_O, $opt_X, $opt_k, $opt_V, - $opt_x, $opt_A, $opt_b, $opt_G); - -our $VERSION = "1.8.2"; - -getopts('htHSCMAd:v:f:p:q:F:Q:o:P:N:m:c:I:D:r:O:X:k:V:x:b:G:') || Usage(); -($opt_h || $opt_H) && Usage(); - -my $MinDepth = $opt_d ? $opt_d : 5; -my $VarDepth = $opt_v ? $opt_v : 3; -my $FREQ = defined($opt_f) ? $opt_f : 0.02; -my $PMEAN = defined($opt_p) ? $opt_p : 8; -my $QMEAN = defined($opt_q) ? $opt_q : 22.5; # base quality -my $MQMEAN = defined($opt_Q) ? $opt_Q : 0; # mapping quality -my $GTFREQ = defined($opt_F) ? $opt_F : 0.2; # Genotype frequency -my $SN = defined($opt_o) ? $opt_o : 1.5; # Signal to Noise -my $PVAL = defined($opt_P) ? $opt_P : 0.05; # the p-value from fisher test -my $DIFF = defined($opt_D) ? $opt_D : 0.2; -$opt_I = $opt_I ? $opt_I : 12; -$opt_m = $opt_m ? $opt_m : 5.25; -$opt_c = $opt_c ? $opt_c : 0; - -my %hash; -my $sample="tumor"; -while(<>) { - chomp; - next if (/R_HOME/); - my @a = split(/\t/); - $sample = $a[0]; - my $chr = $a[2]; - push( @{ $hash{ $chr }->{ $a[3] } }, \@a ); -} -my $samplem = "${sample}-match"; - -if ( $opt_N ) { - ($sample, $samplem) = split(/\|/, $opt_N); - $samplem = "${sample}-match" unless( $samplem ); -} -(my $sample_nowhitespace = $sample) =~ s/\s/_/g; - -print < -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= 1 indicates MSI"> -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= 0.01/5**vd2 (means the evidence in tumor sample might be weak, e.g. small diff in AF)"> -##INFO= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= 0.05, thus not somatic"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= $PVAL"> -##FILTER= -##FILTER= -##FILTER== $opt_m, thus likely false positive"> -##FILTER= -##FILTER= -##FILTER= -##FILTER==14)"> -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -VCFHEADER - -print join("\t", "#CHROM", qw(POS ID REF ALT QUAL FILTER INFO FORMAT), $sample, $samplem), "\n"; - -# Exit if we don't have any variants to write -exit(0) unless( %hash ); - -my @chrs = reorder(keys %hash); -foreach my $chr (@chrs) { - my @pos = sort { $a <=> $b } (keys %{ $hash{ $chr } }); - my ($pds, $pde) = (0, 0); # previous Deletion variant's start and end - my ($pis, $pie) = (0, 0); # previous Insertion variant's start and end - my ($pvs, $pve) = (0, 0); # previous SNV variant's start and end - my ($pinfo1, $pfilter, $pinfo2) = ("", "", ""); - foreach my $p (@pos) { - my @tmp = sort { $b->[14] <=> $a->[14] } @{ $hash{ $chr }->{ $p } }; - my $ALL = $opt_A ? @tmp + 0 : 1; - my %seen = (); - for(my $i = 0; $i < $ALL; $i++) { - my $d = $tmp[$i]; # Only the highest AF get represented - my ($sample, $gene, $chrt, $start, $end, $ref, $alt, $dp1, $vd1, $rfwd1, $rrev1, $vfwd1, $vrev1, $gt1, $af1, $bias1, $pmean1, $pstd1, $qual1, $qstd1, $mapq1, $sn1, $hiaf1, $adjaf1, $nm1, $sbf1, $oddratio1, $dp2, $vd2, $rfwd2, $rrev2, $vfwd2, $vrev2, $gt2, $af2, $bias2, $pmean2, $pstd2, $qual2, $qstd2, $mapq2, $sn2, $hiaf2, $adjaf2, $nm2, $sbf2, $oddratio2, $shift3, $msi, $msilen, $lseq, $rseq, $seg, $status, $type, $sv1, $duprate1, $sv2, $duprate2, $pvalue, $oddratio) = @$d; - my $rd1 = $rfwd1 + $rrev1; - my $rd2 = $rfwd2 + $rrev2; - next unless ( $ref ); - next if ( $seen{ "$chrt-$start-$end-$ref-$alt" } ); - $seen{ "$chrt-$start-$end-$ref-$alt" } = 1; - unless ($type) { $type = "REF"; } - #$pvalue *= sqrt(60/($mapq1+length($ref)+length($alt)-1))*$af1; - my @filters = (); - my @filters2 = (); - if ( $oddratio eq "Inf" ) { - $oddratio = 0; - } - if ( $oddratio1 eq "Inf" ) { - $oddratio1 = 0; - } elsif ( $oddratio1 < 1 && $oddratio1 > 0 ) { - $oddratio1 = sprintf("%.2f", 1/$oddratio1); - } - if ( $oddratio2 eq "Inf" ) { - $oddratio2 = 0; - } elsif ( $oddratio2 < 1 && $oddratio2 > 0 ) { - $oddratio2 = sprintf("%.2f", 1/$oddratio2); - } - if ($dp1 < $MinDepth) { - push( @filters, "d$MinDepth") unless ( $status eq "StrongSomatic" && $pvalue < 0.15 && $af1*$vd1 >= 0.5); - } - if ($vd1 < $VarDepth) { - push( @filters, "v$VarDepth") unless ( $status eq "StrongSomatic" && $pvalue < 0.15 && $af1*$vd1 >= 0.5); - } - push(@filters2, "d$MinDepth") if ( $dp2 < $MinDepth ); - push(@filters2, "v$VarDepth") if ( $vd2 < $VarDepth ); - #if ( $status =~ /Somatic/ || $status =~ /SampleSpecific/ ) { - push( @filters, "f$FREQ") if ($af1 < $FREQ); - #push( @filters, "MAF0.05") if ($qual2 >= $QMEAN && $pmean2 >= $PMEAN && $mapq2 >= $MQMEAN && $sn2 >= $SN && $nm2 < $opt_m && $af2 > 0.05); - push( @filters, "p$PMEAN") if ($pmean1 < $PMEAN); - push( @filters, "pSTD") if ($pstd1 == 0 && $vd1 < $MinDepth); - push( @filters, "q$QMEAN") if ($qual1 < $QMEAN); - push( @filters, "Q$MQMEAN") if ($mapq1 < $MQMEAN); - push( @filters, "Q$MQMEAN") if ($mapq1 < 10 && $type eq "SNV"); # consider SNV somatic in low mapping quality region false positves - push( @filters, "SN$SN") if ($sn1 < $SN); - push( @filters, "NM$opt_m") if ($nm1 >= $opt_m); - #push( @filters, "Bias") if (($bias1 eq "2;1" || $bias1 eq "2;0") && $sbf1 < 0.01 && ($oddratio1 > 5 || $oddratio1 == 0)); - push( @filters, "Bias") if ($bias1 eq "2;1" && $sbf1 < 0.01 && ($oddratio1 > 5 || $oddratio1 == 0) && $end - $start < 100); - #} elsif ( $status =~ /LOH/ || $status =~ /Deletion/ ) { - push( @filters2, "f$FREQ") if ($af2 < $FREQ); - push( @filters2, "p$PMEAN") if ($pmean2 < $PMEAN); - push( @filters2, "pSTD") if ($pstd2 == 0 && $vd2 < $MinDepth); - push( @filters2, "q$QMEAN") if ($qual2 < $QMEAN); - push( @filters2, "Q$MQMEAN") if ($mapq2 < $MQMEAN); - push( @filters2, "SN$SN") if ($sn2 < $SN); - push( @filters2, "NM$opt_m") if ($nm2 >= $opt_m); - #push( @filters2, "Bias") if (($bias2 eq "2;1" || $bias2 eq "2;0") && $sbf2 < 0.01 && ($oddratio2 > 5 || $oddratio2 == 0)); - my %bias_filters = map { $_, 1 } @filters; - push( @filters, "Bias") if (!$bias_filters{ "Bias" } && $bias2 eq "2;1" && $sbf2 < 0.01 && ($oddratio2 > 5 || $oddratio2 == 0) && $end - $start < 100); - #} - # Require stringent statistics in regions with MSI - if ( ($msi > $opt_I && $msilen > 1) || ($msi > 12 && $msilen == 1)) { - push( @filters, "MSI$opt_I") unless( $status eq "StrongSomatic" && $pvalue < 0.0005 ); - } - if ( abs(length($ref)-length($alt)) == $msilen && !grep(/^MSI$opt_I/,@filters)) { - push( @filters, "MSI$opt_I") if ( ($msi > $opt_I && $msilen > 1 && $af1 < 0.35 && $af2 < 0.35) || ($msi > 12 && $msilen == 1 && $af1 < 0.35 && $af2 < 0.35) ); - } - my $p_likely = 0; - my $indel_likely = 0; - #push( @filters, "Bias") if (($a[15] eq "2;1" && $a[24] < 0.01) || ($a[15] eq "2;0" && $a[24] < 0.01) ); #|| ($a[9]+$a[10] > 0 && abs($a[9]/($a[9]+$a[10])-$a[11]/($a[11]+$a[12])) > 0.5)); - if ( $opt_M ) { - if ( $pvalue > $PVAL ) { - push(@filters, "P$PVAL") unless ($status eq "StrongSomatic" && (($pvalue < 0.25 && $af1 > 0.1 ) || ($pvalue < 0.5 && $af1 > 0.20) || ($pvalue < 0.15 && $af1 > 0.05))); - } elsif ( $status =~ /LikelySomatic/ && $pvalue > 0.05/5**$vd2 ) { # Increase the stringency for LikelySomatic - $p_likely = 1; - } elsif ( $status =~ /Likely/ && $type ne "SNV" ) { - $indel_likely = 1 unless(length($ref) <= 2 && length($alt) <= 2); - } - } - #if ( @filters == 0 && abs(length($ref)-length($alt)) == $msilen ) { - #push( @filters, "InGap" ) if ( $pds && $type eq "SNV" && $start <= $pde && $end >= $pds && $status =~ /Somatic/ ); - #push( @filters, "InIns" ) if ( $pis && $type eq "SNV" && $start <= $pie && $end >= $pis && $status =~ /Somatic/ ); - #push( @filters, "LongAT") if (isLongAT($lseq) || isLongAT($rseq)); - #} - #my $filter = @filters > 1 ? join(";", @filters) : (((@filters == 1 && ($filters[0] eq "P$PVAL" || $filters[0] eq "P0.01Likely" || $filters[0] eq "InDelLikely" || "DIFF$DIFF")) ? "PASS" : $filters[0]) :"PASS"); - my $filter = @filters > 0 ? join(";", @filters) : "PASS"; - - # Unless somatic only option (-M) is specified, any good variants in germline should be - # reported as well, regardless of the tumor sample - unless($opt_M) { - $filter = "PASS" if ( $filter ne "PASS" && @filters2 == 0 ); - } - my $gt = (1-$af1 < $GTFREQ) ? "1/1" : ($af1 >= 0.5 ? "1/0" : ($af1 >= $FREQ ? "0/1" : "0/0")); - my $gtm = (1-$af2 < $GTFREQ) ? "1/1" : ($af2 >= 0.5 ? "1/0" : ($af2 >= $FREQ ? "0/1" : "0/0")); - $bias1 =~ s/;/,/; - $bias2 =~ s/;/,/; - $bias1 = "0,0" if ($bias1 eq '0'); - $bias2 = "0,0" if ($bias2 eq '0'); - $mapq1 = sprintf '%.0f', $mapq1; - $mapq2 = sprintf '%.0f', $mapq2; - my $qual = $vd1 > $vd2 ? int(log($vd1)/log(2) * $qual1) : int(log($vd2)/log(2) * $qual2); - if ( $pfilter eq "PASS" && $pinfo2 =~ /Somatic/ && $pinfo2 =~ /TYPE=SNV/ && $filter eq "PASS" && $status =~ /Somatic/ && $type eq "SNV" && $start - $pvs < $opt_c ) { - $pfilter = "Cluster${opt_c}bp"; - $filter = "Cluster${opt_c}bp"; - } - if ( $pinfo1 ) { - #print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( ($opt_M && $pinfo2 !~ /Somatic/) || $opt_S && $pfilter ne "PASS" ); - print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( $opt_S && $pfilter ne "PASS" ); - } - ($pinfo1, $pfilter, $pinfo2) = (join("\t", $chr, $start, ".", $ref, $alt, $qual), $filter, - join("\t", join("","STATUS=$status;SAMPLE=$sample_nowhitespace;TYPE=$type;DP=$dp1;VD=$vd1;AF=$af1;SHIFT3=$shift3;MSI=$msi;MSILEN=$msilen;SSF=$pvalue;SOR=$oddratio;LSEQ=$lseq;RSEQ=$rseq", - $p_likely ? ";P0.01Likely" : "", $indel_likely ? ";InDelLikely" : ""), - "GT:DP:VD:ALD:RD:AD:AF:BIAS:PMEAN:PSTD:QUAL:QSTD:SBF:ODDRATIO:MQ:SN:HIAF:ADJAF:NM", - "$gt:$dp1:$vd1:$vfwd1,$vrev1:$rfwd1,$rrev1:$rd1,$vd1:$af1:$bias1:$pmean1:$pstd1:$qual1:$qstd1:$sbf1:$oddratio1:$mapq1:$sn1:$hiaf1:$adjaf1:$nm1", - "$gtm:$dp2:$vd2:$vfwd2,$vrev2:$rfwd2,$rrev2:$rd2,$vd2:$af2:$bias2:$pmean2:$pstd2:$qual2:$qstd2:$sbf2:$oddratio2:$mapq2:$sn2:$hiaf2:$adjaf2:$nm2")); - ($pds, $pde) = ($start+1, $end) if ($type eq "Deletion"); - ($pis, $pie) = ($start-1, $end+1) if ($type eq "Insertion"); - ($pvs, $pve) = ($start, $end) if ( $type eq "SNV" && $filter eq "PASS"); - } - } - if ( $pinfo1 ) { - print "$pinfo1\t$pfilter\t$pinfo2\n" unless ( $opt_S && $pfilter ne "PASS" ); - } -} - -sub isLongAT { - my $seq = shift; - return 1 if ( $seq =~ /T{14,}/ ); - return 1 if ( $seq =~ /A{14,}/ ); - return 0; -} - -sub reorder { - my @chr = @_; - my @chrn = (); # numeric chromosomes - my @nonchrn = (); # non-numeric chrosomes - foreach my $c (@chr) { - if ( $c =~ /\d/ && $c !~ /_/) { - my $t = $c; - $t =~ s/\D//g; - push(@chrn, [$t, $c]); - } else { - next if ( $c eq "X" || $c eq "chrX" || $c eq "Y" || $c eq "chrY" ); - next if ( $c eq "MT" || $c eq "chrM" ); - push(@nonchrn, $c); - } - } - @chrn = sort { $a->[0] <=> $b->[0]; } @chrn; - @chr = map { $_->[1]; } @chrn; - if ( $hash{ X } ) { - push(@chr, 'X' ); - } elsif ( $hash{ chrX } ) { - push(@chr, 'chrX' ); - } - if ( $hash{ Y } ) { - push(@chr, 'Y' ); - } elsif ( $hash{ chrY } ) { - push(@chr, 'chrY' ); - } - if ( $hash{ MT } ) { - push(@chr, 'MT' ); - } elsif ( $hash{ chrM } ) { - push(@chr, 'chrM' ); - } - push ( @chr, @nonchrn ); - return (@chr); -} - -sub print_contigs -{ - my ($path) = @_; - if (not defined($path)) {return;} - - open(my $bed_file, "<", $path) - or return; - - while (my $line = <$bed_file>) - { - chomp $line; - my ($name, $start, $end) = split(/\t/, $line); - print "##contig=\n"; - } -} - -sub print_reference { - my $path = shift; - return unless defined($path); - print "##reference=$path\n"; -} - -sub Usage { -print < 12 non-monomer MSI will be considered false positive. For monomers, that number is 10. - -m int - The maximum mean mismatches allowed. Default: 5.25, or if a variant is supported by reads with more than 5.25 mismatches, it'll be considered - false positive. Mismatches don't includes indels in the alignment. - -N Name(s) - The sample name(s). If only one name is given, the matched will be simply names as "name-match". Two names - are given separated by "|", such as "tumor|blood". - -P float - The maximum p-value. Default to 0.05. - -p float - The minimum mean position of variants in the read. Default: 5. - -q float - The minimum mean base quality. Default to 22.5 for Illumina sequencing - -Q float - The minimum mapping quality. Default to 0 for Illumina sequencing - -d integer - The minimum total depth. Default to 5 - -v integer - The minimum variant depth. Default to 3 - -f float - The minimum allele frequency. Default to 0.02 - -o signal/noise - The minimum signal to noise, or the ratio of hi/(lo+0.5). Default to 1.5. Set it higher for deep sequencing. - -F float - The minimum allele frequency to consider to be homozygous. Default to 0.2. Thus frequency > 0.8 (1-0.2) will - be considered homozygous "1/1", between 0.5 - (1-0.2) will be "1/0", between (-f) - 0.5 will be "0/1", - below (-f) will be "0/0". - -b Path to the *.bed file which is used to generate contigs in the header - -G Path to the *.fasta (*.fa) file which is used to generate reference tag in the header - -AUTHOR - Written by Zhongwu Lai, AstraZeneca, Boston, USA - -REPORTING BUGS - Report bugs to zhongwu\@yahoo.com - -COPYRIGHT - This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. - -USAGE -exit(0); -} diff --git a/vardictjava_1.8.2/run_command_cwl.sh b/vardictjava_1.8.2/run_command_cwl.sh deleted file mode 100755 index 0a244c2c..00000000 --- a/vardictjava_1.8.2/run_command_cwl.sh +++ /dev/null @@ -1,8 +0,0 @@ -module load singularity/3.7.1 -module load cwl/cwltool -cwltool \ - --cachedir /work/ci/vurals/access_related/vardictjava_1.8.2-2/tmp/cache \ - --outdir /work/ci/vurals/access_related/vardictjava_1.8.2-2/myoutput \ - --tmpdir-prefix /work/ci/vurals/access_related/vardictjava_1.8.2-2/tmp/tmp \ - --singularity \ - --non-strict vardict_workflow_single_sample.cwl example_inputs.yaml From 901379ad8974c00863a45823a75955146e81487b Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 31 Jan 2022 15:07:03 -0500 Subject: [PATCH 432/476] initial commit --- docs/vardict/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/vardict/README.md diff --git a/docs/vardict/README.md b/docs/vardict/README.md new file mode 100644 index 00000000..b6a3476b --- /dev/null +++ b/docs/vardict/README.md @@ -0,0 +1 @@ +Vardict documentations here From f58bab76a1bb179621e632946461c34632a32b06 Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 31 Jan 2022 16:57:02 -0500 Subject: [PATCH 433/476] initial commit --- docs/vardict/README.md | 4 +- docs/vardict/vardict_1.8.3.md | 88 +++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 docs/vardict/vardict_1.8.3.md diff --git a/docs/vardict/README.md b/docs/vardict/README.md index b6a3476b..76b832e7 100644 --- a/docs/vardict/README.md +++ b/docs/vardict/README.md @@ -1 +1,3 @@ -Vardict documentations here +#Vardict + + diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.3.md new file mode 100644 index 00000000..34e174dd --- /dev/null +++ b/docs/vardict/vardict_1.8.3.md @@ -0,0 +1,88 @@ +# Vardict v1.8.3 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image (alpine) | 3.8 | - | +| vardict | 1.8.3 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | +| + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vardict_workflow_single_sample.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/manta_1.5.1/vardict_workflow_single_sample.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/vardict_workflow_single_sample.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner vardict_workflow_single_sample.cwl --help +usage: manta_1.5.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + +-h, --help show this help message and exit +-E E +-G G +-N N +-S S +-b B +--bedfile BEDFILE +-c C +-f F +--f_1 F_1 +-g G +--vcf VCF + + + + + + + + + + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --call_regions CALL_REGIONS + bgzip-compressed, tabix-indexed BED file specifiying + regions to which variant analysis will be restricted + --non_wgs toggles on settings for WES + --normal_bam NORMAL_BAM + Normal sample BAM or CRAM file. May be specified more + than once, multiple inputs will be treated as each BAM + file representing a different sample. [optional] (no + default) + --output_contigs if true, outputs assembled contig sequences in final + VCF files, in the INFO field CONTIG + --reference_fasta REFERENCE_FASTA + samtools-indexed reference fasta file [required] + --tumor_bam TUMOR_BAM + Tumor sample BAM or CRAM file. Only up to one tumor + bam file accepted. + --generateEvidenceBam + Generate a bam of supporting reads for all SVs +``` From 77813ef3ce7782d1ffdbfb6aa70e63c6e707f6bb Mon Sep 17 00:00:00 2001 From: svural Date: Mon, 31 Jan 2022 17:13:09 -0500 Subject: [PATCH 434/476] adjustments in md file --- docs/vardict/vardict_1.8.3.md | 50 +++++++++-------------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.3.md index 34e174dd..df607cd8 100644 --- a/docs/vardict/vardict_1.8.3.md +++ b/docs/vardict/vardict_1.8.3.md @@ -1,4 +1,4 @@ -# Vardict v1.8.3 +# Vardict v1.8.3 - Single sample mode ## Version of tools in docker image \(/container/Dockerfile\) @@ -6,7 +6,7 @@ | :--- | :--- | :--- | | ubuntu base image (alpine) | 3.8 | - | | vardict | 1.8.3 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | -| + ## CWL @@ -22,18 +22,18 @@ ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/manta_1.5.1/vardict_workflow_single_sample.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir toil_log -> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/vardict_workflow_single_sample.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & ``` ### Usage ```bash > toil-cwl-runner vardict_workflow_single_sample.cwl --help -usage: manta_1.5.1.cwl [-h] +usage: vardict_workflow_single_sample.cwl [-h] positional arguments: job_order Job input json file @@ -42,47 +42,23 @@ optional arguments: -h, --help show this help message and exit -E E + The column for the region end, e.g. gene end -G G + The reference fasta. Should be indexed (.fai). -N N + The sample name to be used directly. -S S + The column for the region start, e.g. gene start -b B + The indexed BAM file. --bedfile BEDFILE -c C + The column for chromosome -f F + The threshold for allele frequency, default: 0.01 or 1% --f_1 F_1 -g G + The column for a gene name, or segment annotation --vcf VCF - - - - - - - - - -h, --help show this help message and exit - --memory_per_job MEMORY_PER_JOB - Memory per job in megabytes - --memory_overhead MEMORY_OVERHEAD - Memory overhead per job in megabytes - --number_of_threads NUMBER_OF_THREADS - --call_regions CALL_REGIONS - bgzip-compressed, tabix-indexed BED file specifiying - regions to which variant analysis will be restricted - --non_wgs toggles on settings for WES - --normal_bam NORMAL_BAM - Normal sample BAM or CRAM file. May be specified more - than once, multiple inputs will be treated as each BAM - file representing a different sample. [optional] (no - default) - --output_contigs if true, outputs assembled contig sequences in final - VCF files, in the INFO field CONTIG - --reference_fasta REFERENCE_FASTA - samtools-indexed reference fasta file [required] - --tumor_bam TUMOR_BAM - Tumor sample BAM or CRAM file. Only up to one tumor - bam file accepted. - --generateEvidenceBam - Generate a bam of supporting reads for all SVs ``` From 741aca7be5712d046b58abe7f34fbcadc69ae3af Mon Sep 17 00:00:00 2001 From: svural Date: Thu, 3 Feb 2022 11:58:36 -0500 Subject: [PATCH 435/476] populated the readme file --- docs/vardict/vardict_1.8.3.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.3.md index df607cd8..0c957d88 100644 --- a/docs/vardict/vardict_1.8.3.md +++ b/docs/vardict/vardict_1.8.3.md @@ -1,4 +1,8 @@ # Vardict v1.8.3 - Single sample mode +To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here +https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode + + ## Version of tools in docker image \(/container/Dockerfile\) @@ -6,7 +10,8 @@ | :--- | :--- | :--- | | ubuntu base image (alpine) | 3.8 | - | | vardict | 1.8.3 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | - +| perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) | +| r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) | ## CWL @@ -51,14 +56,15 @@ optional arguments: The column for the region start, e.g. gene start -b B The indexed BAM file. ---bedfile BEDFILE -c C The column for chromosome -f F The threshold for allele frequency, default: 0.01 or 1% --f_1 F_1 + The minimum allele frequency. Used for var2vcf_valid.pl -g G The column for a gene name, or segment annotation --vcf VCF - + vcf file name given to var2vcf_valid.pl +--bedfile BEDFILE ``` From 0b39ebe8795441e44079d26b9370c6672dc2b781 Mon Sep 17 00:00:00 2001 From: svural Date: Fri, 4 Feb 2022 11:07:43 -0500 Subject: [PATCH 436/476] updated docs --- docs/vardict/vardict_1.8.3.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.3.md index 0c957d88..702dc81b 100644 --- a/docs/vardict/vardict_1.8.3.md +++ b/docs/vardict/vardict_1.8.3.md @@ -1,5 +1,5 @@ # Vardict v1.8.3 - Single sample mode -To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here +To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here: https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode @@ -29,9 +29,6 @@ https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode #Using CWLTOOL > cwltool --singularity --non-strict /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml -#Using toil-cwl-runner -> mkdir toil_log -> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & ``` ### Usage From db02adf0e3472a0638b6a3206d7996bf0866e1ed Mon Sep 17 00:00:00 2001 From: svural Date: Tue, 8 Feb 2022 11:32:16 -0500 Subject: [PATCH 437/476] add toil-cwl-runner to documentation --- docs/vardict/vardict_1.8.3.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.3.md index 702dc81b..65698dc4 100644 --- a/docs/vardict/vardict_1.8.3.md +++ b/docs/vardict/vardict_1.8.3.md @@ -29,6 +29,8 @@ https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode #Using CWLTOOL > cwltool --singularity --non-strict /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml +#Using Toil-cwl-runner +toil-cwl-runner --singularity vardict_workflow_single_sample.cwl example_inputs.yaml ``` ### Usage From 1395e87044681cc16d5c49f50acb88f2d32d5683 Mon Sep 17 00:00:00 2001 From: svural Date: Thu, 10 Feb 2022 10:50:51 -0500 Subject: [PATCH 438/476] cleaned up and added contributor sections to the cwls --- vardictjava_1.8.2/teststrandbias.cwl | 25 +++++++++++++++++ vardictjava_1.8.2/var_to_vcf.cwl | 25 +++++++++++++++++ vardictjava_1.8.2/vardict_app.cwl | 27 ++++++++++++++++++- .../vardict_workflow_single_sample.cwl | 10 +++++-- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/vardictjava_1.8.2/teststrandbias.cwl b/vardictjava_1.8.2/teststrandbias.cwl index 22e602ba..4e49fbd7 100644 --- a/vardictjava_1.8.2/teststrandbias.cwl +++ b/vardictjava_1.8.2/teststrandbias.cwl @@ -26,3 +26,28 @@ outputs: stdin: $(inputs.input_vardict.path) stdout: output_teststrandbias.var + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava_1.8.2/var_to_vcf.cwl b/vardictjava_1.8.2/var_to_vcf.cwl index eacfc439..7f16c6bc 100644 --- a/vardictjava_1.8.2/var_to_vcf.cwl +++ b/vardictjava_1.8.2/var_to_vcf.cwl @@ -54,3 +54,28 @@ outputs: stdin: $(inputs.input_vcf.path) stdout: ${ return inputs.vcf; } + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava_1.8.2/vardict_app.cwl b/vardictjava_1.8.2/vardict_app.cwl index d40ff33f..e2a9d1e3 100644 --- a/vardictjava_1.8.2/vardict_app.cwl +++ b/vardictjava_1.8.2/vardict_app.cwl @@ -85,7 +85,6 @@ inputs: type: File? inputBinding: position: 1 -#####~~~~~~~~~~#####~~~~~~~~~~~~#####~~~~~~~~~~######~~~~~~~~###### outputs: output: @@ -95,3 +94,29 @@ outputs: stdout: vardict_app_output.vcf + + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava_1.8.2/vardict_workflow_single_sample.cwl b/vardictjava_1.8.2/vardict_workflow_single_sample.cwl index 685aadc8..17054b33 100644 --- a/vardictjava_1.8.2/vardict_workflow_single_sample.cwl +++ b/vardictjava_1.8.2/vardict_workflow_single_sample.cwl @@ -44,7 +44,7 @@ inputs: outputs: output: type: File - outputSource: vardict_1/output + outputSource: var_to_vcf/output steps: vardict: run: ./vardict_app.cwl @@ -64,7 +64,7 @@ steps: in: input_vardict: vardict/output out: [output_var] - vardict_1: + var_to_vcf: run: ./var_to_vcf.cwl in: N: N @@ -80,6 +80,12 @@ steps: 'foaf:mbox': 'mailto:vurals@mskcc.org' 'foaf:name': Suleyman Vural 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' 'foaf:member': From 917da21850a735ae403cc2424f954d4fc62d126c Mon Sep 17 00:00:00 2001 From: svural Date: Tue, 22 Feb 2022 13:36:40 -0500 Subject: [PATCH 439/476] inital commit --- vcf2maf_1.6.21/README.md | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 vcf2maf_1.6.21/README.md diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md new file mode 100644 index 00000000..491ef1c3 --- /dev/null +++ b/vcf2maf_1.6.21/README.md @@ -0,0 +1,90 @@ +# CWL and Dockerfile for running vcf2maf v1.6.17 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| vcf2maf | 1.6.17 | https://github.com/mskcc/vcf2maf/archive/v1.6.17.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner vcf2maf_1.6.17.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --any_allele ANY_ALLELE + When reporting co-located variants, allow mismatched + variant alleles too + --buffer_size BUFFER_SIZE + Number of variants VEP loads at a time; Reduce this + for low memory systems + --cache_version CACHE_VERSION + Version of VEP and its cache to use + --custom_enst CUSTOM_ENST + List of custom ENST IDs that override canonical + selection + --maf_center MAF_CENTER + Variant calling center to report in MAF + --max_filter_ac MAX_FILTER_AC + Use tag common_variant if the filter-vcf reports a + subpopulation AC higher than this + --min_hom_vaf MIN_HOM_VAF + If GT undefined in VCF, minimum allele fraction to + call a variant homozygous + --ncbi_build NCBI_BUILD + Genome build of variants in input + --normal_id NORMAL_ID + Matched_Norm_Sample_Barcode to report in the MAF + --output_maf OUTPUT_MAF + Path to output MAF file + --ref_fasta REF_FASTA + Reference FASTA file + --remap_chain REMAP_CHAIN + Chain file to remap variants to a different assembly + before running VEP + --retain_fmt RETAIN_FMT + Comma-delimited names of FORMAT fields to retain as + extra columns in MAF [] + --retain_info RETAIN_INFO + Comma-delimited names of INFO fields to retain as + extra columns in MAF + --species SPECIES Species of variants in input + --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF + --vcf_normal_id VCF_NORMAL_ID + Matched normal ID used in VCFs genotype columns + --vcf_tumor_id VCF_TUMOR_ID + Tumor sample ID used in VCFs genotype columns + --vep_data VEP_DATA VEPs base cache/plugin directory + --vep_forks VEP_FORKS + Number of forked processes to use when running VEP + --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep + binary + + +``` From 6443d09245ef9e8803366fa5a8c567659d65a460 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 4 Mar 2022 15:35:26 -0500 Subject: [PATCH 440/476] Adding Mosdepth Adding the CWL & Readme for Mosdepth --- docs/mosdepth/mosdepth_0.3.3.md | 0 mosdepth_0.3.3/mosdepth_0.3.3.cwl | 60 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 docs/mosdepth/mosdepth_0.3.3.md create mode 100644 mosdepth_0.3.3/mosdepth_0.3.3.cwl diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md new file mode 100644 index 00000000..e69de29b diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl new file mode 100644 index 00000000..2d6afcf7 --- /dev/null +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -0,0 +1,60 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' +id: mosdepth_0_3_3 +baseCommand: + - mosdepth +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: bed + type: File? + inputBinding: + position: 0 + prefix: '-b' + shellQuote: false + doc: optional BED file or (integer) window-sizes. + - id: chrom + type: File? + inputBinding: + position: 0 + prefix: '-c' + shellQuote: false + doc: chromosome to restrict depth calculation. + - id: input_bam + type: File + secondaryFiles: + - ^.bai + - id: prefix + type: File + doc: Prefix for the output files +outputs: + - id: mosdepth_output + type: File? + outputBinding: + glob: $(inputs.prefix).* +doc: 'fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.' +label: mosdepth_0.3.3 +arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) + - position: 99 + valueFrom: $(inputs.prefix) + - position: 100 + valueFrom: $(inputs.input_bam) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/mosdepth:0.3.3' + - class: InlineJavascriptRequirement From 6b04b014affb53fa5b951514b856ebcd5acd0eae Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 4 Mar 2022 15:37:55 -0500 Subject: [PATCH 441/476] Update mosdepth_0.3.3.cwl --- mosdepth_0.3.3/mosdepth_0.3.3.cwl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl index 2d6afcf7..df1bc7d1 100644 --- a/mosdepth_0.3.3/mosdepth_0.3.3.cwl +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -1,6 +1,9 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' sbg: 'https://www.sevenbridges.com/' id: mosdepth_0_3_3 baseCommand: @@ -58,3 +61,22 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/mosdepth:0.3.3' - class: InlineJavascriptRequirement + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Carmelina + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Carmelina + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mosdepth + 'doap:revision': 0.3.3 From 5c288c9aeed5e5f9496e5565ef53c9b158845147 Mon Sep 17 00:00:00 2001 From: ksivaprakasam Date: Mon, 14 Mar 2022 10:32:53 -0500 Subject: [PATCH 442/476] cwl_v1 --- octopus/0.7.4/octopus_0-7-4.cwl | 99 +++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 octopus/0.7.4/octopus_0-7-4.cwl diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl new file mode 100644 index 00000000..f9a41d29 --- /dev/null +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -0,0 +1,99 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' +id: octopus_0_7_4 +baseCommand: + - octopus +inputs: + - id: Reference + type: File + inputBinding: + position: 0 + prefix: '-R' + secondaryFiles: + - .fai + - id: input + type: + - File + - type: array + items: File + inputBinding: + position: 0 + prefix: '-I' + doc: Tumor and normal bam files with .bai + secondaryFiles: + - ^.bai + - id: normalId + type: string? + inputBinding: + position: 0 + prefix: '-N' + doc: add the name of the normal sample + - id: tumorOnlySample + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + doc: mention this parameter if it is tumor only sample. + - id: somaticOnlyCalls + type: boolean? + inputBinding: + position: 0 + prefix: '--somatics-only' + doc: if somatics only call is required. Use this with -f ON parameter + - id: targettedCalling.singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-T' + doc: >- + list of regions to call variants from. + + eg 1. chr1: all of chr1. + + 2. chr2:10,000,000: the single position 10000000 in chr2. + + chr3:5,000,000-: everything from 3. chr3:5,000,000 onwards. + + 4. chr4:100,000,000-200,000,000: everything between chr4:100,000,000 and + chr4:200,000,000. The interval is half open so position chr4:200,000,000 + is not included. + - id: skipRegions.singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-K' + doc: to skip a set of regions + - id: targettedCalling.file + type: File? + inputBinding: + position: 0 + prefix: '-t' + doc: regions in a text or bed file + - id: skipRegions.file + type: File? + inputBinding: + position: 0 + prefix: '-k' + doc: regions in text or bed file format + - id: error.models + type: string? + inputBinding: + position: 0 + prefix: '--sequence-error-model' + doc: >- + error model will be in the format - [library preparation]<.sequencer> + eg: PCR.NOVASEQ +outputs: + - id: outputVCF + type: File? + outputBinding: + glob: '*.vcf' +label: octopus +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/octopus:0.7.4' From 2b97be6a7bc3af56081650f91e1e4845355f86ee Mon Sep 17 00:00:00 2001 From: svural Date: Wed, 16 Mar 2022 11:40:32 -0400 Subject: [PATCH 443/476] initial commit --- vcf2maf_1.6.21/container/Dockerfile | 79 +++++++++++ vcf2maf_1.6.21/example_inputs.yaml | 14 ++ vcf2maf_1.6.21/vcf2maf_1.6.21.cwl | 195 ++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+) create mode 100644 vcf2maf_1.6.21/container/Dockerfile create mode 100644 vcf2maf_1.6.21/example_inputs.yaml create mode 100644 vcf2maf_1.6.21/vcf2maf_1.6.21.cwl diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile new file mode 100644 index 00000000..919941e3 --- /dev/null +++ b/vcf2maf_1.6.21/container/Dockerfile @@ -0,0 +1,79 @@ +FROM clearlinux:latest AS builder + +ARG VCF2MAF_VERSION=1.6.21 +ARG HTSLIB_VERSION=1.10.2 +ARG SAMTOOLS_VERSION=1.10 +ARG BCFTOOLS_VERSION=1.10.2 +ARG VEP_VERSION=105.0 +# Install a minimal versioned OS into /install_root, and bundled tools if any +ENV CLEAR_VERSION=33980 +RUN swupd os-install --no-progress --no-boot-update --no-scripts \ + --version ${CLEAR_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --bundles os-core-update,which + +# Download and install conda into /usr/bin +ENV MINICONDA_VERSION=py37_4.9.2 +RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ + curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ + sh /tmp/miniconda.sh -bfp /usr + +#Download and install vcf2maf +RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ + && unzip v${VCF2MAF_VERSION}.zip \ + && rm v${VCF2MAF_VERSION}.zip + +# Use conda to install remaining tools/dependencies into /usr/local +ENV VEP_VERSION=${VEP_VERSION} \ + HTSLIB_VERSION=${HTSLIB_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ + SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ + LIFTOVER_VERSION=377 +RUN conda create -qy -p /usr/local \ + -c conda-forge \ + -c bioconda \ + -c defaults \ + ensembl-vep==${VEP_VERSION} \ + htslib==${HTSLIB_VERSION} \ + bcftools==${BCFTOOLS_VERSION} \ + samtools==${SAMTOOLS_VERSION} \ + ucsc-liftover==${LIFTOVER_VERSION} + +#Copy offline cache +COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache +COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache +RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ + && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ + && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ + && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ + && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ + && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ + && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ + && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz + +RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep + +# Deploy the minimal OS and tools into a clean target layer +FROM scratch +ARG VCF2MAF_VERSION=1.6.21 +ENV VEP_VERSION=${VEP_VERSION} + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL maintainer="Suleyman Vural " +LABEL maintainer="Cyriac Kandoth " +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="105" + +COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data +COPY --from=builder /.vep /.vep/ +COPY --from=builder /install_root / +COPY --from=builder /usr/local /usr/local + +RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl + +WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml new file mode 100644 index 00000000..1de8d51e --- /dev/null +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -0,0 +1,14 @@ +input_vcf: + class: File + path: /path/to/input.vcf +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: "GRCh37" +vep_data: /.vep/ # location in the container +ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container +vep_path: /usr/local/bin/ +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl new file mode 100644 index 00000000..2416a380 --- /dev/null +++ b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl @@ -0,0 +1,195 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.21 + +baseCommand: + - perl + - /opt/vcf2maf-1.6.21/vcf2maf.pl + +inputs: + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + cache_version: + type: + - 'null' + - string + default: '105' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', string] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:vurals@mskcc.org + foaf:name: Suleyman Vural + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.21 From 3e07e8452d126e28c3e396a13731859e089cd0a9 Mon Sep 17 00:00:00 2001 From: Carmelina Date: Thu, 17 Mar 2022 11:21:48 -0400 Subject: [PATCH 444/476] Update mosdepth cwl --- mosdepth_0.3.3/mosdepth_0.3.3.cwl | 71 +++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl index df1bc7d1..848e3793 100644 --- a/mosdepth_0.3.3/mosdepth_0.3.3.cwl +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -32,17 +32,73 @@ inputs: shellQuote: false doc: chromosome to restrict depth calculation. - id: input_bam - type: File + type: + - File + - type: array + items: File + doc: Required list of input bam file (s) separated by comma secondaryFiles: - ^.bai - id: prefix - type: File + type: string? doc: Prefix for the output files + - id: flag + type: int? + inputBinding: + position: 0 + prefix: '-F' + doc: exclude reads with any of the bits in FLAG set + - id: mapq + type: int? + inputBinding: + position: 0 + prefix: '-Q' + doc: mapping quality threshold. reads with a mapping quality less than this are ignored outputs: - - id: mosdepth_output + - id: per_base_bed + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.per-base.bed.gz' + } else { + return 'per-base.bed.gz' + } + } + - id: per_region_bed + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.regions.bed.gz' + } else { + return 'regions.bed.gz' + } + } + - id: global_distribution + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.global.dist.txt' + } else { + return 'mosdepth.global.dist.txt' + } + } + - id: region_distribution type: File? outputBinding: - glob: $(inputs.prefix).* + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.region.dist.txt' + } else { + return 'mosdepth.region.dist.txt' + } + } doc: 'fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.' label: mosdepth_0.3.3 arguments: @@ -61,20 +117,19 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/mosdepth:0.3.3' - class: InlineJavascriptRequirement - 'dct:contributor': - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Carmelina + 'foaf:name': Carmelina 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:johnsoni@mskcc.org' - 'foaf:name': Carmelina + 'foaf:mbox': 'mailto:charalk@mskcc.org' + 'foaf:name': Carmelina 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' From 2c3341bafdc7cd8738923e34354bf4f1deff5aa1 Mon Sep 17 00:00:00 2001 From: svural Date: Thu, 17 Mar 2022 11:36:05 -0400 Subject: [PATCH 445/476] initial commit --- vcf2maf_1.6.21/README.md | 106 +++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 60 deletions(-) diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md index 491ef1c3..6d8cd35d 100644 --- a/vcf2maf_1.6.21/README.md +++ b/vcf2maf_1.6.21/README.md @@ -1,11 +1,17 @@ -# CWL and Dockerfile for running vcf2maf v1.6.17 +# CWL and Dockerfile for running vcf2maf v1.6.21 ## Version of tools in docker image (/container/Dockerfile) | Tool | Version | Location | |--- |--- |--- | -| alpine:3.8 base image | 3.8 | - | -| vcf2maf | 1.6.17 | https://github.com/mskcc/vcf2maf/archive/v1.6.17.zip | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | ## CWL @@ -14,14 +20,14 @@ - Example Command using [toil](https://toil.readthedocs.io): ```bash - > toil-cwl-runner vcf2maf_1.6.17.cwl example_inputs.yaml + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml ``` **If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** ```bash #Using CWLTOOL -> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml #Using toil-cwl-runner > mkdir vcf2maf_toil_log @@ -31,60 +37,40 @@ ### Usage ``` -usage: toil-cwl-runner vcf2maf_1.6.17.cwl [-h] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --any_allele ANY_ALLELE - When reporting co-located variants, allow mismatched - variant alleles too - --buffer_size BUFFER_SIZE - Number of variants VEP loads at a time; Reduce this - for low memory systems - --cache_version CACHE_VERSION - Version of VEP and its cache to use - --custom_enst CUSTOM_ENST - List of custom ENST IDs that override canonical - selection - --maf_center MAF_CENTER - Variant calling center to report in MAF - --max_filter_ac MAX_FILTER_AC - Use tag common_variant if the filter-vcf reports a - subpopulation AC higher than this - --min_hom_vaf MIN_HOM_VAF - If GT undefined in VCF, minimum allele fraction to - call a variant homozygous - --ncbi_build NCBI_BUILD - Genome build of variants in input - --normal_id NORMAL_ID - Matched_Norm_Sample_Barcode to report in the MAF - --output_maf OUTPUT_MAF - Path to output MAF file - --ref_fasta REF_FASTA - Reference FASTA file - --remap_chain REMAP_CHAIN - Chain file to remap variants to a different assembly - before running VEP - --retain_fmt RETAIN_FMT - Comma-delimited names of FORMAT fields to retain as - extra columns in MAF [] - --retain_info RETAIN_INFO - Comma-delimited names of INFO fields to retain as - extra columns in MAF - --species SPECIES Species of variants in input - --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF - --vcf_normal_id VCF_NORMAL_ID - Matched normal ID used in VCFs genotype columns - --vcf_tumor_id VCF_TUMOR_ID - Tumor sample ID used in VCFs genotype columns - --vep_data VEP_DATA VEPs base cache/plugin directory - --vep_forks VEP_FORKS - Number of forked processes to use when running VEP - --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep - binary - +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual ``` From e6c40c71db476ff6ce01183fa828a8031b161a62 Mon Sep 17 00:00:00 2001 From: svural Date: Thu, 17 Mar 2022 13:46:31 -0400 Subject: [PATCH 446/476] inital commit --- vcf2maf_1.6.21/example_inputs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml index 1de8d51e..906ab455 100644 --- a/vcf2maf_1.6.21/example_inputs.yaml +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -8,7 +8,7 @@ vcf_normal_id: normal_sample_name ncbi_build: "GRCh37" vep_data: /.vep/ # location in the container ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container -vep_path: /usr/local/bin/ +vep_path: /usr/local/bin/ # location in the container retain_info: retain_info_str retain_fmt: retain_fmt_str output_maf: "output.maf" From 91de67f37152dffbac579f59c4bff5ccc53c6a8c Mon Sep 17 00:00:00 2001 From: svural Date: Thu, 17 Mar 2022 13:48:06 -0400 Subject: [PATCH 447/476] initial commit --- docs/vcf2maf/README.md | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 docs/vcf2maf/README.md diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/docs/vcf2maf/README.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` From a2a43e37ec25f4f158c4a8c40e5bed233da4a7df Mon Sep 17 00:00:00 2001 From: carmelinacharalambous Date: Thu, 17 Mar 2022 15:31:29 -0400 Subject: [PATCH 448/476] Adding README.md and example inputs files --- mosdepth_0.3.3/README.md | 66 +++++++++++++++++++++++++++++++ mosdepth_0.3.3/example_inputs.yml | 10 +++++ 2 files changed, 76 insertions(+) create mode 100644 mosdepth_0.3.3/README.md create mode 100644 mosdepth_0.3.3/example_inputs.yml diff --git a/mosdepth_0.3.3/README.md b/mosdepth_0.3.3/README.md new file mode 100644 index 00000000..56256587 --- /dev/null +++ b/mosdepth_0.3.3/README.md @@ -0,0 +1,66 @@ +Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**. + +`mosdepth` can output: ++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome. ++ mean per-window depth given a window size--as would be used for CNV calling. ++ the mean per-region given a BED file of regions. +* the mean or median per-region cumulative coverage histogram given a window size ++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. ++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20) ++ threshold output to indicate how many bases in each region are covered at the given thresholds. ++ A summary of mean depths per chromosome and within specified regions per chromosome. + +# CWL for running Mosdepth - Coverage tool +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| mosdepth | 0.3.3 | brentp/mosdepth:v0.3.3 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml & +``` + +### Usage + +```bash +usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] [--bed BED] + [--chrom CHROM] [--prefix PREFIX] [--flag FLAG] + [--mapq MAPQ] + [job_order] + +fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --bed BED optional BED file or (integer) window-sizes. + --chrom CHROM chromosome to restrict depth calculation. + --prefix PREFIX Prefix for the output files + --flag FLAG exclude reads with any of the bits in FLAG set + --mapq MAPQ mapping quality threshold. reads with a mapping + quality less than this are ignored +``` diff --git a/mosdepth_0.3.3/example_inputs.yml b/mosdepth_0.3.3/example_inputs.yml new file mode 100644 index 00000000..540ecf3c --- /dev/null +++ b/mosdepth_0.3.3/example_inputs.yml @@ -0,0 +1,10 @@ +bed: + class: File + path: "/path/to/bed" +input_bam: +- class: File + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam/index" +prefix: "sample_prefix" \ No newline at end of file From 7fda3007860332dfc1811a25e95f14f24dfb6409 Mon Sep 17 00:00:00 2001 From: carmelinacharalambous Date: Thu, 17 Mar 2022 16:48:36 -0400 Subject: [PATCH 449/476] Updated mosdepth_0.3.3.md --- docs/SUMMARY.md | 3 ++ docs/mosdepth/README.md | 2 + docs/mosdepth/mosdepth_0.3.3.md | 68 +++++++++++++++++++++++++++++++++ mosdepth_0.3.3/README.md | 66 -------------------------------- 4 files changed, 73 insertions(+), 66 deletions(-) create mode 100644 docs/mosdepth/README.md delete mode 100644 mosdepth_0.3.3/README.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 4cef6d4a..4ad36661 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -71,3 +71,6 @@ * [Waltz](waltz/README.md) * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) +* [Mosdepth](mosdepth/README.md) + * [0.3.3](mosdepth/mosdepth_0.3.3.md) + \ No newline at end of file diff --git a/docs/mosdepth/README.md b/docs/mosdepth/README.md new file mode 100644 index 00000000..d576ad29 --- /dev/null +++ b/docs/mosdepth/README.md @@ -0,0 +1,2 @@ +# Mosdepth + diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md index e69de29b..44080bde 100644 --- a/docs/mosdepth/mosdepth_0.3.3.md +++ b/docs/mosdepth/mosdepth_0.3.3.md @@ -0,0 +1,68 @@ +Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**. + +`mosdepth` can output: ++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome. ++ mean per-window depth given a window size--as would be used for CNV calling. ++ the mean per-region given a BED file of regions. +* the mean or median per-region cumulative coverage histogram given a window size ++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. ++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20) ++ threshold output to indicate how many bases in each region are covered at the given thresholds. ++ A summary of mean depths per chromosome and within specified regions per chromosome. + +# CWL for running Mosdepth - Coverage tool +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| mosdepth | 0.3.3 | [https://hub.docker.com/r/brentp/mosdepth/tags](https://hub.docker.com/r/brentp/mosdepth/tags) [https://github.com/brentp/mosdepth/releases/tag/v0.3.3](https://github.com/brentp/mosdepth/releases/tag/v0.3.3) | + +[![](https://img.shields.io/badge/version-0.3.3-blue)](https://github.com/brentp/mosdepth/releases/tag/v0.3.3)| + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml & +``` + +### Usage + +```bash +usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] [--bed BED] + [--chrom CHROM] [--prefix PREFIX] [--flag FLAG] + [--mapq MAPQ] + [job_order] + +fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --bed BED optional BED file or (integer) window-sizes. + --chrom CHROM chromosome to restrict depth calculation. + --prefix PREFIX Prefix for the output files + --flag FLAG exclude reads with any of the bits in FLAG set + --mapq MAPQ mapping quality threshold. reads with a mapping + quality less than this are ignored +``` diff --git a/mosdepth_0.3.3/README.md b/mosdepth_0.3.3/README.md deleted file mode 100644 index 56256587..00000000 --- a/mosdepth_0.3.3/README.md +++ /dev/null @@ -1,66 +0,0 @@ -Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**. - -`mosdepth` can output: -+ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome. -+ mean per-window depth given a window size--as would be used for CNV calling. -+ the mean per-region given a BED file of regions. -* the mean or median per-region cumulative coverage histogram given a window size -+ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. -+ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20) -+ threshold output to indicate how many bases in each region are covered at the given thresholds. -+ A summary of mean depths per chromosome and within specified regions per chromosome. - -# CWL for running Mosdepth - Coverage tool -## Version of tools in docker image - -| Tool | Version | Location | -|--- |--- |--- | -| mosdepth | 0.3.3 | brentp/mosdepth:v0.3.3 | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml -``` - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml & -``` - -### Usage - -```bash -usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--number_of_threads NUMBER_OF_THREADS] [--bed BED] - [--chrom CHROM] [--prefix PREFIX] [--flag FLAG] - [--mapq MAPQ] - [job_order] - -fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --memory_per_job MEMORY_PER_JOB - Memory per job in megabytes - --memory_overhead MEMORY_OVERHEAD - Memory overhead per job in megabytes - --number_of_threads NUMBER_OF_THREADS - --bed BED optional BED file or (integer) window-sizes. - --chrom CHROM chromosome to restrict depth calculation. - --prefix PREFIX Prefix for the output files - --flag FLAG exclude reads with any of the bits in FLAG set - --mapq MAPQ mapping quality threshold. reads with a mapping - quality less than this are ignored -``` From b04acc73d6b4eeb12f2df244b45a1513b48ca9bf Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Thu, 17 Mar 2022 16:52:47 -0400 Subject: [PATCH 450/476] Update mosdepth_0.3.3.cwl --- mosdepth_0.3.3/mosdepth_0.3.3.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl index 848e3793..7d46c539 100644 --- a/mosdepth_0.3.3/mosdepth_0.3.3.cwl +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -122,7 +122,7 @@ requirements: 'foaf:member': - class: 'foaf:Person' 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Carmelina + 'foaf:name': Ronak Shah 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' From 8bf8ecce4af72e8dbb6675db9b5e5c52a38f55f1 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 18 Mar 2022 16:33:26 -0400 Subject: [PATCH 451/476] Update SUMMARY.md --- docs/SUMMARY.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 4ad36661..2dfb85a5 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -47,6 +47,8 @@ * [MuTect 1.1.5](mutect/mutect_1.1.5.md) * [Merge Fastq](merge-fastq/README.md) * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md) +* [Mosdepth](mosdepth/README.md) + * [0.3.3](mosdepth/mosdepth_0.3.3.md) * [Picard Tools](picard-tools/README.md) * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md) * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md) @@ -68,9 +70,10 @@ * [v0.6.2](trim-galore/trim_galore_0.6.2.md) * [Ubuntu utilites](ubuntu-utilites/README.md) * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) +* [VarDictJava](vardict/README.md) + * [v18.04](vardict/vardict_1.8.3.md) * [Waltz](waltz/README.md) * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) -* [Mosdepth](mosdepth/README.md) - * [0.3.3](mosdepth/mosdepth_0.3.3.md) - \ No newline at end of file + + From a931c0779be8398ef0c6e21d312652863bb2778d Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Mon, 21 Mar 2022 12:13:12 -0500 Subject: [PATCH 452/476] working cwl commit --- octopus/0.7.4/example_input.yaml | 21 ++++++++++++++++ octopus/0.7.4/octopus_0-7-4.cwl | 42 +++++++++++++++++--------------- 2 files changed, 43 insertions(+), 20 deletions(-) create mode 100644 octopus/0.7.4/example_input.yaml diff --git a/octopus/0.7.4/example_input.yaml b/octopus/0.7.4/example_input.yaml new file mode 100644 index 00000000..70b44e10 --- /dev/null +++ b/octopus/0.7.4/example_input.yaml @@ -0,0 +1,21 @@ +error_models: null +input: + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-L001-d/current/C-0A8NCE-L001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-N001-d/current/C-0A8NCE-N001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam +normalId: C-0A8NCE-N001-d +output_file_name: oct.vcf +reference: + class: File + metadata: {} + path: >- + /juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta +skipRegions_file: null +skipRegions_singleEntry: null +somaticOnlyCalls: null +targettedCalling_file: null +targettedCalling_singleEntry: null +tumorOnlySample: null \ No newline at end of file diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl index f9a41d29..314825ca 100644 --- a/octopus/0.7.4/octopus_0-7-4.cwl +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -6,24 +6,13 @@ id: octopus_0_7_4 baseCommand: - octopus inputs: - - id: Reference - type: File - inputBinding: - position: 0 - prefix: '-R' - secondaryFiles: - - .fai - id: input - type: - - File - - type: array - items: File + type: 'File[]' inputBinding: position: 0 prefix: '-I' + secondaryFiles: ^.bai doc: Tumor and normal bam files with .bai - secondaryFiles: - - ^.bai - id: normalId type: string? inputBinding: @@ -42,7 +31,7 @@ inputs: position: 0 prefix: '--somatics-only' doc: if somatics only call is required. Use this with -f ON parameter - - id: targettedCalling.singleEntry + - id: targettedCalling_singleEntry type: string? inputBinding: position: 0 @@ -59,25 +48,25 @@ inputs: 4. chr4:100,000,000-200,000,000: everything between chr4:100,000,000 and chr4:200,000,000. The interval is half open so position chr4:200,000,000 is not included. - - id: skipRegions.singleEntry + - id: skipRegions_singleEntry type: string? inputBinding: position: 0 prefix: '-K' doc: to skip a set of regions - - id: targettedCalling.file + - id: targettedCalling_file type: File? inputBinding: position: 0 prefix: '-t' doc: regions in a text or bed file - - id: skipRegions.file + - id: skipRegions_file type: File? inputBinding: position: 0 prefix: '-k' doc: regions in text or bed file format - - id: error.models + - id: error_models type: string? inputBinding: position: 0 @@ -85,11 +74,23 @@ inputs: doc: >- error model will be in the format - [library preparation]<.sequencer> eg: PCR.NOVASEQ + - id: reference + type: File + inputBinding: + position: 0 + prefix: '-R' + secondaryFiles: + - .fai + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: '-o' outputs: - id: outputVCF - type: File? + type: File outputBinding: - glob: '*.vcf' + glob: '${ if (inputs.output) return inputs.output; return null; }' label: octopus requirements: - class: ResourceRequirement @@ -97,3 +98,4 @@ requirements: coresMin: 2 - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/octopus:0.7.4' + - class: InlineJavascriptRequirement From 8a19a6250b46deff342bb2de69b9c1ed9539bfd4 Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Mon, 21 Mar 2022 15:26:53 -0500 Subject: [PATCH 453/476] Update octopus_0-7-4.cwl updated namespace and descrip --- octopus/0.7.4/octopus_0-7-4.cwl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl index 314825ca..54b743b5 100644 --- a/octopus/0.7.4/octopus_0-7-4.cwl +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -2,6 +2,9 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: sbg: 'https://www.sevenbridges.com/' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' id: octopus_0_7_4 baseCommand: - octopus @@ -99,3 +102,22 @@ requirements: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/octopus:0.7.4' - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': abra2 + 'doap:revision': 2.22 + From a9df476c8fe2ed969732e1f895ec8d17fc2e1421 Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Mon, 21 Mar 2022 15:56:38 -0500 Subject: [PATCH 454/476] updated summary and docs --- docs/octopus/README.md | 2 ++ docs/octopus/octopus_0.7.4.md | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 docs/octopus/README.md create mode 100644 docs/octopus/octopus_0.7.4.md diff --git a/docs/octopus/README.md b/docs/octopus/README.md new file mode 100644 index 00000000..c19f208c --- /dev/null +++ b/docs/octopus/README.md @@ -0,0 +1,2 @@ +# Octopus + diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md new file mode 100644 index 00000000..85750b91 --- /dev/null +++ b/docs/octopus/octopus_0.7.4.md @@ -0,0 +1,42 @@ +## CWL and Docker for Running Octopus + +## Version of tools in [docker image](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| ------- | ------- | -------------------------------- | +| Octopus | v0.7.4 | ghcr.io/msk-access/octopus:0.7.4 | + +### CWL + +CWL specification 1.0 +Use example_input.yaml to see the inputs to the cwl +Example Command using [toil](https://toil.readthedocs.io/): +`toil-cwl-runner octopus_0-7-4.cwl example_input.yaml` + +If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing type==X86_64 && you can use the following command + +### Using CWLTOOL + +``` +cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml +``` + +### Using toil-cwl-runner + +```shell +mkdir octopus_toil_log +toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log --jobStore /path/to/octopus_jobStore --batchSystem lsf --workDir /path/to/octopus_toil_log --outdir . --writeLogs /path/to/octopus_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml > octopus_toil.stdout 2> octopus_toil.stderr & +``` + +### Usage + +```shell +usage: /work/bergerm1/bergerlab/sivaprk/tool_testing/octopus_full.cwl + [-h] --input INPUT [--normalId NORMALID] [--tumorOnlySample] + [--somaticOnlyCalls] + [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY] + [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY] + [--targettedCalling_file TARGETTEDCALLING_FILE] + [--skipRegions_file SKIPREGIONS_FILE] [--error_models ERROR_MODELS] + --reference REFERENCE --output_file_name OUTPUT_FILE_NAME +``` \ No newline at end of file From ad73f7cfad98d894f5333509ba74a634dbf98bef Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Tue, 22 Mar 2022 09:43:40 -0500 Subject: [PATCH 455/476] Update SUMMARY.md Added octopus --- docs/SUMMARY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 4cef6d4a..304264ad 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -47,6 +47,8 @@ * [MuTect 1.1.5](mutect/mutect_1.1.5.md) * [Merge Fastq](merge-fastq/README.md) * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md) +* [Octopus](octopus/README.md) + * [v0.7.4](octopus/octopus_0.7.4.md) * [Picard Tools](picard-tools/README.md) * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md) * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md) From 4c6c34a077e56ab32338a0242cec03fbdc0cecb6 Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Tue, 22 Mar 2022 10:47:46 -0500 Subject: [PATCH 456/476] Update octopus_0-7-4.cwl updated cwl --- octopus/0.7.4/octopus_0-7-4.cwl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl index 54b743b5..0c7d74c1 100644 --- a/octopus/0.7.4/octopus_0-7-4.cwl +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -106,18 +106,18 @@ requirements: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:kumarn1@mskcc.org' - 'foaf:name': Nikhil Kumar + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:shahr2@mskcc.org' - 'foaf:name': Ronak Shah + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam 'foaf:name': Memorial Sloan Kettering Cancer Center 'doap:release': - class: 'doap:Version' - 'doap:name': abra2 - 'doap:revision': 2.22 + 'doap:name': octopus + 'doap:revision': 0.7.4 From b2825fcb33ac074b8a5bcd46a30e4707182130d4 Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Tue, 22 Mar 2022 10:59:17 -0500 Subject: [PATCH 457/476] Update octopus_0.7.4.md updated github url --- docs/octopus/octopus_0.7.4.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md index 85750b91..aa1f612b 100644 --- a/docs/octopus/octopus_0.7.4.md +++ b/docs/octopus/octopus_0.7.4.md @@ -2,9 +2,9 @@ ## Version of tools in [docker image](https://hub.docker.com/r/biocontainers/bedtools) -| Tool | Version | Location | -| ------- | ------- | -------------------------------- | -| Octopus | v0.7.4 | ghcr.io/msk-access/octopus:0.7.4 | +| Tool | Version | Location | +| ------- | ------- | ---------------------------------------------------------- | +| Octopus | v0.7.4 | https://github.com/luntergroup/octopus/releases/tag/v0.7.4 | ### CWL From a28bdca1a55c00564ce300939c50ed6e7c65a66e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Tue, 22 Mar 2022 12:22:09 -0400 Subject: [PATCH 458/476] Update octopus_0.7.4.md --- docs/octopus/octopus_0.7.4.md | 54 ++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md index aa1f612b..2967f783 100644 --- a/docs/octopus/octopus_0.7.4.md +++ b/docs/octopus/octopus_0.7.4.md @@ -1,6 +1,6 @@ ## CWL and Docker for Running Octopus -## Version of tools in [docker image](https://hub.docker.com/r/biocontainers/bedtools) +## Version of tools in [docker image](https://hub.docker.com/r/dancooke/octopus/tags) | Tool | Version | Location | | ------- | ------- | ---------------------------------------------------------- | @@ -18,7 +18,7 @@ If at MSK, using the JUNO cluster having installed toil version 3.19 and manuall ### Using CWLTOOL ``` -cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml +cwltool --singularity --non-strict /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml ``` ### Using toil-cwl-runner @@ -31,12 +31,44 @@ toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log - ### Usage ```shell -usage: /work/bergerm1/bergerlab/sivaprk/tool_testing/octopus_full.cwl - [-h] --input INPUT [--normalId NORMALID] [--tumorOnlySample] - [--somaticOnlyCalls] - [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY] - [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY] - [--targettedCalling_file TARGETTEDCALLING_FILE] - [--skipRegions_file SKIPREGIONS_FILE] [--error_models ERROR_MODELS] - --reference REFERENCE --output_file_name OUTPUT_FILE_NAME -``` \ No newline at end of file +usage: octopus_0-7-4.cwl [-h] --input INPUT [--normalId NORMALID] + [--tumorOnlySample] [--somaticOnlyCalls] + [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY] + [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY] + [--targettedCalling_file TARGETTEDCALLING_FILE] + [--skipRegions_file SKIPREGIONS_FILE] + [--error_models ERROR_MODELS] --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Tumor and normal bam files with .bai + --normalId NORMALID add the name of the normal sample + --tumorOnlySample mention this parameter if it is tumor only sample. + --somaticOnlyCalls if somatics only call is required. Use this with -f ON + parameter + --targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY + list of regions to call variants from. eg 1. chr1: all + of chr1. 2. chr2:10,000,000: the single position + 10000000 in chr2. chr3:5,000,000-: everything from 3. + chr3:5,000,000 onwards. 4. + chr4:100,000,000-200,000,000: everything between + chr4:100,000,000 and chr4:200,000,000. The interval is + half open so position chr4:200,000,000 is not + included. + --skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY + to skip a set of regions + --targettedCalling_file TARGETTEDCALLING_FILE + regions in a text or bed file + --skipRegions_file SKIPREGIONS_FILE + regions in text or bed file format + --error_models ERROR_MODELS + error model will be in the format - [library + preparation]<.sequencer> eg: PCR.NOVASEQ + --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME +``` From eabd77b4ab13cb88a8d1a51fb1ff30666fc9e6df Mon Sep 17 00:00:00 2001 From: buehlere Date: Thu, 14 Apr 2022 13:41:12 -0400 Subject: [PATCH 459/476] first delly commit making a practice cwl script for running delly tools. --- delly_tools_1.0.0/container/Dockerfile | 53 ++++++++++++++++++++++++++ delly_tools_1.0.0/delly_1.0.cwl | 26 +++++++++++++ delly_tools_1.0.0/example_input.yml | 4 ++ 3 files changed, 83 insertions(+) create mode 100644 delly_tools_1.0.0/container/Dockerfile create mode 100644 delly_tools_1.0.0/delly_1.0.cwl create mode 100644 delly_tools_1.0.0/example_input.yml diff --git a/delly_tools_1.0.0/container/Dockerfile b/delly_tools_1.0.0/container/Dockerfile new file mode 100644 index 00000000..4acc8f4d --- /dev/null +++ b/delly_tools_1.0.0/container/Dockerfile @@ -0,0 +1,53 @@ +# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile +# modify for additional functionality +# use the ubuntu base image +FROM ubuntu:18.04 + +# install required packages +RUN apt-get update && apt-get install -y \ + autoconf \ + build-essential \ + cmake \ + g++ \ + gfortran \ + git \ + libcurl4-gnutls-dev \ + hdf5-tools \ + libboost-date-time-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + libboost-iostreams-dev \ + libbz2-dev \ + libhdf5-dev \ + libncurses-dev \ + liblzma-dev \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# set environment +ENV BOOST_ROOT /usr + +# install delly +RUN cd /opt \ + && git clone --recursive https://github.com/dellytools/delly.git \ + && cd /opt/delly/ \ + && make STATIC=1 all \ + && make install + + +# Multi-stage build +FROM alpine:latest +RUN mkdir -p /opt/delly/bin +WORKDIR /opt/delly/bin +COPY --from=0 /opt/delly/bin/delly . + +# Workdir +WORKDIR /root/ + +# Add Delly to PATH +ENV PATH="/opt/delly/bin:${PATH}" + +# by default /bin/sh +CMD ["/bin/sh"] \ No newline at end of file diff --git a/delly_tools_1.0.0/delly_1.0.cwl b/delly_tools_1.0.0/delly_1.0.cwl new file mode 100644 index 00000000..cc62c388 --- /dev/null +++ b/delly_tools_1.0.0/delly_1.0.cwl @@ -0,0 +1,26 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: 'ghcr.io/msk-access/delly_1.0:latest' +baseCommand: + - delly +arguments: ["call"] +inputs: + - id: out_file + type: string? + doc: name of output file + inputBinding: + position: 0 + prefix: '-o' + - id: input_bam + type: string[] + inputBinding: + prefix: -g + position: 1 +outputs: + bcf_out: + type: File + outputBinding: + glob: $(inputs.out_file) \ No newline at end of file diff --git a/delly_tools_1.0.0/example_input.yml b/delly_tools_1.0.0/example_input.yml new file mode 100644 index 00000000..2ad3eb59 --- /dev/null +++ b/delly_tools_1.0.0/example_input.yml @@ -0,0 +1,4 @@ +out_file: "name_of_output.bcf" +input_bam: ["/path/to/reference.fasta", "path/to/input.bam", "path/to/control.bam"] + + From b19a5314d603f34ec12b48e5df5e437d2b3f8b05 Mon Sep 17 00:00:00 2001 From: buehlere Date: Thu, 14 Apr 2022 14:03:40 -0400 Subject: [PATCH 460/476] adding additional documentation --- delly_tools_1.0.0/delly_1.0.cwl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/delly_tools_1.0.0/delly_1.0.cwl b/delly_tools_1.0.0/delly_1.0.cwl index cc62c388..c3a65a65 100644 --- a/delly_tools_1.0.0/delly_1.0.cwl +++ b/delly_tools_1.0.0/delly_1.0.cwl @@ -14,11 +14,13 @@ inputs: inputBinding: position: 0 prefix: '-o' + doc: The name to be used for the output bcf file - id: input_bam type: string[] inputBinding: prefix: -g position: 1 + doc: a list of strings naming directories for the reference genome fasta file, an indexed bam tumor file, an indexed bam control file outputs: bcf_out: type: File From 16d39a904bd16eccb1ae10315e307f76e9c6b534 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Fri, 15 Apr 2022 11:14:22 -0400 Subject: [PATCH 461/476] Updating Delly2 :heavy_check_mark: Dockerfile :heavy_check_mark: CWL file :heavy_check_mark: README Co-Authored-By: Eric Buehler <31450790+buehlere@users.noreply.github.com> --- delly_0.9.1/README.md | 58 ++++++++++ .../container/Dockerfile | 24 ++++- delly_0.9.1/delly_0.9.1.cwl | 100 ++++++++++++++++++ .../example_input.yml | 0 delly_tools_1.0.0/delly_1.0.cwl | 28 ----- 5 files changed, 181 insertions(+), 29 deletions(-) create mode 100644 delly_0.9.1/README.md rename {delly_tools_1.0.0 => delly_0.9.1}/container/Dockerfile (55%) create mode 100644 delly_0.9.1/delly_0.9.1.cwl rename {delly_tools_1.0.0 => delly_0.9.1}/example_input.yml (100%) delete mode 100644 delly_tools_1.0.0/delly_1.0.cwl diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md new file mode 100644 index 00000000..d3e95864 --- /dev/null +++ b/delly_0.9.1/README.md @@ -0,0 +1,58 @@ +# CWL and Dockerfile for running ABRA2 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` diff --git a/delly_tools_1.0.0/container/Dockerfile b/delly_0.9.1/container/Dockerfile similarity index 55% rename from delly_tools_1.0.0/container/Dockerfile rename to delly_0.9.1/container/Dockerfile index 4acc8f4d..ecb79747 100644 --- a/delly_tools_1.0.0/container/Dockerfile +++ b/delly_0.9.1/container/Dockerfile @@ -2,6 +2,27 @@ # modify for additional functionality # use the ubuntu base image FROM ubuntu:18.04 +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG DELLY_VERSION +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Eric Buehlere (buehlere@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.delly=${DELLY_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \ + DELLY version ${DELLY_VERSION}" + # install required packages RUN apt-get update && apt-get install -y \ @@ -33,6 +54,7 @@ ENV BOOST_ROOT /usr RUN cd /opt \ && git clone --recursive https://github.com/dellytools/delly.git \ && cd /opt/delly/ \ + && git checkout ${DELLY_VERSION} \ && make STATIC=1 all \ && make install @@ -50,4 +72,4 @@ WORKDIR /root/ ENV PATH="/opt/delly/bin:${PATH}" # by default /bin/sh -CMD ["/bin/sh"] \ No newline at end of file +CMD ["/bin/sh"] diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl new file mode 100644 index 00000000..d3ff0dbb --- /dev/null +++ b/delly_0.9.1/delly_0.9.1.cwl @@ -0,0 +1,100 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - delly +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: out_file + type: string? + inputBinding: + position: 0 + prefix: '-o' + shellQuote: false + doc: The name to be used for the output bcf file + - id: reference_genome + type: File + inputBinding: + position: 0 + prefix: '-g' + shellQuote: false + doc: reference genome fasta file + - id: input_bams + type: + - File + - type: array + items: File + inputBinding: + position: 99 + shellQuote: false + doc: >- + an indexed bam tumor file, an indexed bam control file or it can be an + array of indexed bam files + secondaryFiles: + - ^.bai + - id: exclude_regions + type: File? + inputBinding: + position: 0 + prefix: '-x' + shellQuote: false + doc: file with regions to exclude + - id: vcffile + type: File? + inputBinding: + position: 0 + prefix: '-v' + shellQuote: false + doc: input VCF/BCF file for genotyping + - id: svtype + type: string? + inputBinding: + position: 0 + prefix: '-t' + shellQuote: false + doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]' +outputs: + - id: bcf_out + type: File + outputBinding: + glob: $(inputs.out_file) +arguments: + - call +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 60000 + coresMin: 16 + - class: InlineJavascriptRequirement +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/delly:0.9.1' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Eric Buehlere + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': delly + 'doap:revision': 0.9.1 diff --git a/delly_tools_1.0.0/example_input.yml b/delly_0.9.1/example_input.yml similarity index 100% rename from delly_tools_1.0.0/example_input.yml rename to delly_0.9.1/example_input.yml diff --git a/delly_tools_1.0.0/delly_1.0.cwl b/delly_tools_1.0.0/delly_1.0.cwl deleted file mode 100644 index c3a65a65..00000000 --- a/delly_tools_1.0.0/delly_1.0.cwl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env cwl-runner -cwlVersion: v1.0 -class: CommandLineTool -hints: - DockerRequirement: - dockerPull: 'ghcr.io/msk-access/delly_1.0:latest' -baseCommand: - - delly -arguments: ["call"] -inputs: - - id: out_file - type: string? - doc: name of output file - inputBinding: - position: 0 - prefix: '-o' - doc: The name to be used for the output bcf file - - id: input_bam - type: string[] - inputBinding: - prefix: -g - position: 1 - doc: a list of strings naming directories for the reference genome fasta file, an indexed bam tumor file, an indexed bam control file -outputs: - bcf_out: - type: File - outputBinding: - glob: $(inputs.out_file) \ No newline at end of file From f21ac0af966ee4c08402826504059757f3510bf3 Mon Sep 17 00:00:00 2001 From: buehlere Date: Fri, 15 Apr 2022 14:53:14 -0400 Subject: [PATCH 462/476] Code Review Updates Delly :heavy_check_mark: update README :heavy_check_mark: update example_input.yml :heavy_check_mark: update delly_0.9.1.cwl with additional options :heavy_check_mark: update Dockerfile documentation --- delly_0.9.1/README.md | 25 ++++++++++++++++++++++- delly_0.9.1/container/Dockerfile | 4 ++-- delly_0.9.1/delly_0.9.1.cwl | 35 +++++++++++++++++++++++++++++--- delly_0.9.1/example_input.yml | 7 ++++--- 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md index d3e95864..08dcf390 100644 --- a/delly_0.9.1/README.md +++ b/delly_0.9.1/README.md @@ -1,4 +1,4 @@ -# CWL and Dockerfile for running ABRA2 +# CWL and Dockerfile for running Delly ## Version of tools in docker image (/container/Dockerfile) @@ -56,3 +56,26 @@ optional arguments: --vcffile VCFFILE input VCF/BCF file for genotyping --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] ``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/delly_0.9.1/container/Dockerfile b/delly_0.9.1/container/Dockerfile index ecb79747..6b66905c 100644 --- a/delly_0.9.1/container/Dockerfile +++ b/delly_0.9.1/container/Dockerfile @@ -1,6 +1,6 @@ # taken from: https://github.com/dellytools/delly/blob/main/Dockerfile # modify for additional functionality -# use the ubuntu base image +################## Base Image ########## FROM ubuntu:18.04 ################## ARGUMENTS/Environments ########## @@ -24,7 +24,7 @@ LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as DELLY version ${DELLY_VERSION}" -# install required packages +################## INSTALL ########################## RUN apt-get update && apt-get install -y \ autoconf \ build-essential \ diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl index d3ff0dbb..9c6f82dc 100644 --- a/delly_0.9.1/delly_0.9.1.cwl +++ b/delly_0.9.1/delly_0.9.1.cwl @@ -64,6 +64,36 @@ inputs: prefix: '-t' shellQuote: false doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]' + - id: geno_qual + type: int? + inputBinding: + position: 71 + prefix: '-u' + doc: min. mapping quality for genotyping + - id: dump + type: File? + inputBinding: + position: 0 + prefix: '-d' + doc: gzipped output file for SV-reads (optional) + - id: map_qual + type: int? + inputBinding: + position: 0 + prefix: '-q' + doc: min. paired-end (PE) mapping quality + - id: qual_tra + type: int? + inputBinding: + position: 0 + prefix: '-r' + doc: min. PE quality for translocation + - id: mad_cutoff + type: int? + inputBinding: + position: 0 + prefix: '-s' + doc: 'insert size cutoff, median+s*MAD (deletions only)' outputs: - id: bcf_out type: File @@ -76,7 +106,6 @@ requirements: - class: ResourceRequirement ramMin: 60000 coresMin: 16 - - class: InlineJavascriptRequirement hints: - class: DockerRequirement dockerPull: 'ghcr.io/msk-access/delly:0.9.1' @@ -84,8 +113,8 @@ hints: - class: 'foaf:Organization' 'foaf:member': - class: 'foaf:Person' - 'foaf:mbox': 'mailto:kumarn1@mskcc.org' - 'foaf:name': Eric Buehlere + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler 'foaf:name': Memorial Sloan Kettering Cancer Center 'dct:creator': - class: 'foaf:Organization' diff --git a/delly_0.9.1/example_input.yml b/delly_0.9.1/example_input.yml index 2ad3eb59..d84fb73c 100644 --- a/delly_0.9.1/example_input.yml +++ b/delly_0.9.1/example_input.yml @@ -1,4 +1,5 @@ out_file: "name_of_output.bcf" -input_bam: ["/path/to/reference.fasta", "path/to/input.bam", "path/to/control.bam"] - - +reference_genome: {class: File, path: path_to_file.fasta} +input_bams: + - {class: File, path: /path/to/file.bam} + - {class: File, path: /path/to/file.bam} From c62f7ab4c7e0503107f468a1157e9776b9f71487 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 18 Apr 2022 16:42:49 -0400 Subject: [PATCH 463/476] Revert "Feature/vcf2maf update" --- docs/vcf2maf/README.md | 76 ----------- vcf2maf_1.6.21/README.md | 76 ----------- vcf2maf_1.6.21/container/Dockerfile | 79 ----------- vcf2maf_1.6.21/example_inputs.yaml | 14 -- vcf2maf_1.6.21/vcf2maf_1.6.21.cwl | 195 ---------------------------- 5 files changed, 440 deletions(-) delete mode 100644 docs/vcf2maf/README.md delete mode 100644 vcf2maf_1.6.21/README.md delete mode 100644 vcf2maf_1.6.21/container/Dockerfile delete mode 100644 vcf2maf_1.6.21/example_inputs.yaml delete mode 100644 vcf2maf_1.6.21/vcf2maf_1.6.21.cwl diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md deleted file mode 100644 index 6d8cd35d..00000000 --- a/docs/vcf2maf/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# CWL and Dockerfile for running vcf2maf v1.6.21 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| clearlinux (base image) | - | - | -| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | -| VEP | 105 | - | - | -|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -|BCFTOOLS_VERSION | 1.10.2 | - | - | -|SAMTOOLS_VERSION | 1.10 | - | - | -|VCF2MAF_VERSION | 1.6.21 | - | - | -|HTSLIB_VERSION | 1.10.2 | - | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> mkdir vcf2maf_toil_log -> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & -``` - -### Usage - -``` -Usage: - perl vcf2maf.pl --help - perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID - ---input-vcf Path to input file in VCF format ---output-maf Path to output MAF file ---tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] ---tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] ---normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] ---vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] ---vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] ---custom-enst List of custom ENST IDs that override canonical selection ---vep-path Folder containing the vep script [~/miniconda3/bin] ---vep-data VEP's base cache/plugin directory [~/.vep] ---vep-forks Number of forked processes to use when running VEP [4] ---vep-custom String to pass into VEP's --custom option [] ---vep-config Config file to pass into VEP's --config option [] ---vep-overwrite Allow VEP to overwrite output VCF if it exists ---buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] ---any-allele When reporting co-located variants, allow mismatched variant alleles too ---inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found ---online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) ---ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] ---max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] ---species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] ---ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] ---cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] ---maf-center Variant calling center to report in MAF [.] ---retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] ---retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] ---retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] ---min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] ---remap-chain Chain file to remap variants to a different assembly before running VEP ---verbose Print more things to log progress ---help Print a brief help message and quit ---man Print the detailed manual -``` diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md deleted file mode 100644 index 6d8cd35d..00000000 --- a/vcf2maf_1.6.21/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# CWL and Dockerfile for running vcf2maf v1.6.21 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| clearlinux (base image) | - | - | -| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | -| VEP | 105 | - | - | -|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -|BCFTOOLS_VERSION | 1.10.2 | - | - | -|SAMTOOLS_VERSION | 1.10 | - | - | -|VCF2MAF_VERSION | 1.6.21 | - | - | -|HTSLIB_VERSION | 1.10.2 | - | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> mkdir vcf2maf_toil_log -> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & -``` - -### Usage - -``` -Usage: - perl vcf2maf.pl --help - perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID - ---input-vcf Path to input file in VCF format ---output-maf Path to output MAF file ---tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] ---tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] ---normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] ---vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] ---vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] ---custom-enst List of custom ENST IDs that override canonical selection ---vep-path Folder containing the vep script [~/miniconda3/bin] ---vep-data VEP's base cache/plugin directory [~/.vep] ---vep-forks Number of forked processes to use when running VEP [4] ---vep-custom String to pass into VEP's --custom option [] ---vep-config Config file to pass into VEP's --config option [] ---vep-overwrite Allow VEP to overwrite output VCF if it exists ---buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] ---any-allele When reporting co-located variants, allow mismatched variant alleles too ---inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found ---online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) ---ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] ---max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] ---species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] ---ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] ---cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] ---maf-center Variant calling center to report in MAF [.] ---retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] ---retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] ---retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] ---min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] ---remap-chain Chain file to remap variants to a different assembly before running VEP ---verbose Print more things to log progress ---help Print a brief help message and quit ---man Print the detailed manual -``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile deleted file mode 100644 index 919941e3..00000000 --- a/vcf2maf_1.6.21/container/Dockerfile +++ /dev/null @@ -1,79 +0,0 @@ -FROM clearlinux:latest AS builder - -ARG VCF2MAF_VERSION=1.6.21 -ARG HTSLIB_VERSION=1.10.2 -ARG SAMTOOLS_VERSION=1.10 -ARG BCFTOOLS_VERSION=1.10.2 -ARG VEP_VERSION=105.0 -# Install a minimal versioned OS into /install_root, and bundled tools if any -ENV CLEAR_VERSION=33980 -RUN swupd os-install --no-progress --no-boot-update --no-scripts \ - --version ${CLEAR_VERSION} \ - --path /install_root \ - --statedir /swupd-state \ - --bundles os-core-update,which - -# Download and install conda into /usr/bin -ENV MINICONDA_VERSION=py37_4.9.2 -RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ - curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ - sh /tmp/miniconda.sh -bfp /usr - -#Download and install vcf2maf -RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ - && unzip v${VCF2MAF_VERSION}.zip \ - && rm v${VCF2MAF_VERSION}.zip - -# Use conda to install remaining tools/dependencies into /usr/local -ENV VEP_VERSION=${VEP_VERSION} \ - HTSLIB_VERSION=${HTSLIB_VERSION} \ - BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ - SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ - LIFTOVER_VERSION=377 -RUN conda create -qy -p /usr/local \ - -c conda-forge \ - -c bioconda \ - -c defaults \ - ensembl-vep==${VEP_VERSION} \ - htslib==${HTSLIB_VERSION} \ - bcftools==${BCFTOOLS_VERSION} \ - samtools==${SAMTOOLS_VERSION} \ - ucsc-liftover==${LIFTOVER_VERSION} - -#Copy offline cache -COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache -COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache -RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ - ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ - && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ - && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ - && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ - ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ - && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ - && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ - && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ - && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz - -RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep - -# Deploy the minimal OS and tools into a clean target layer -FROM scratch -ARG VCF2MAF_VERSION=1.6.21 -ENV VEP_VERSION=${VEP_VERSION} - -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL maintainer="Suleyman Vural " -LABEL maintainer="Cyriac Kandoth " -LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} -LABEL org.opencontainers.image.version.vep="105" - -COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} -COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ -COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data -COPY --from=builder /.vep /.vep/ -COPY --from=builder /install_root / -COPY --from=builder /usr/local /usr/local - -RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl - -WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml deleted file mode 100644 index 906ab455..00000000 --- a/vcf2maf_1.6.21/example_inputs.yaml +++ /dev/null @@ -1,14 +0,0 @@ -input_vcf: - class: File - path: /path/to/input.vcf -tumor_id: tumor_sample_name -vcf_tumor_id: tumor_sample_name -normal_id: normal_sample_name -vcf_normal_id: normal_sample_name -ncbi_build: "GRCh37" -vep_data: /.vep/ # location in the container -ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container -vep_path: /usr/local/bin/ # location in the container -retain_info: retain_info_str -retain_fmt: retain_fmt_str -output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl deleted file mode 100644 index 2416a380..00000000 --- a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl +++ /dev/null @@ -1,195 +0,0 @@ - -class: CommandLineTool -cwlVersion: v1.0 - -$namespaces: - dct: http://purl.org/dc/terms/ - doap: http://usefulinc.com/ns/doap# - foaf: http://xmlns.com/foaf/0.1/ - sbg: https://www.sevenbridges.com/ - -id: vcf2maf_v1.6.21 - -baseCommand: - - perl - - /opt/vcf2maf-1.6.21/vcf2maf.pl - -inputs: - memory_per_job: - type: ["null",int] - doc: Memory per job in megabytes - memory_overhead: - type: ["null",int] - doc: Memory overhead per job in megabytes - cache_version: - type: - - 'null' - - string - default: '105' - doc: Version of VEP and its cache to use - inputBinding: - prefix: --cache-version - species: - type: - - 'null' - - string - default: homo_sapiens - doc: Species of variants in input - inputBinding: - prefix: --species - ncbi_build: - type: - - 'null' - - string - default: GRCh37 - doc: Genome build of variants in input - inputBinding: - prefix: --ncbi-build - ref_fasta: - type: ['null', string] - doc: Reference FASTA file - inputBinding: - prefix: --ref-fasta - maf_center: - type: ['null', string] - default: mskcc.org - doc: Variant calling center to report in MAF - inputBinding: - prefix: --maf-center - output_maf: - type: ['null', string] - doc: Path to output MAF file - inputBinding: - prefix: --output-maf - min_hom_vaf: - type: - - 'null' - - float - default: 0.7 - doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous - inputBinding: - prefix: --min-hom-vaf - remap_chain: - type: ['null', string] - doc: Chain file to remap variants to a different assembly before running VEP - inputBinding: - prefix: --remap-chain - normal_id: - type: ['null', string] - default: NORMAL - doc: Matched_Norm_Sample_Barcode to report in the MAF - inputBinding: - prefix: --normal-id - buffer_size: - type: - - 'null' - - int - default: 5000 - doc: Number of variants VEP loads at a time; Reduce this for low memory systems - inputBinding: - prefix: --buffer-size - custom_enst: - type: ['null', string] - doc: List of custom ENST IDs that override canonical selection - inputBinding: - prefix: --custom-enst - vcf_normal_id: - type: ['null', string] - default: NORMAL - doc: Matched normal ID used in VCFs genotype columns - inputBinding: - prefix: --vcf-normal-id - vep_path: - type: ['null', string] - doc: Folder containing variant_effect_predictor.pl or vep binary - inputBinding: - prefix: --vep-path - vep_data: - type: ['null', string] - doc: VEPs base cache/plugin directory - inputBinding: - prefix: --vep-data - any_allele: - type: ['null', string] - doc: When reporting co-located variants, allow mismatched variant alleles too - inputBinding: - prefix: --any-allele - input_vcf: - type: - - string - - File - doc: Path to input file in VCF format - inputBinding: - prefix: --input-vcf - vep_forks: - type: - - 'null' - - int - default: 4 - doc: Number of forked processes to use when running VEP - inputBinding: - prefix: --vep-forks - vcf_tumor_id: - type: ['null', string] - default: TUMOR - doc: Tumor sample ID used in VCFs genotype columns - inputBinding: - prefix: --vcf-tumor-id - tumor_id: - type: ['null', string] - default: TUMOR - doc: Tumor_Sample_Barcode to report in the MAF - inputBinding: - prefix: --tumor-id - retain_info: - type: ['null', string] - doc: Comma-delimited names of INFO fields to retain as extra columns in MAF - inputBinding: - prefix: --retain-info - retain_fmt: - type: ['null', string] - doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] - inputBinding: - prefix: --retain-fmt -outputs: - vcf2maf_maf: - type: File - outputBinding: - glob: | - ${ - if (inputs.output_maf) - return inputs.output_maf; - return null; - } - -arguments: -- valueFrom: "$(runtime.tmpdir)" - prefix: '--tmp-dir' - shellQuote: false - -requirements: - InlineJavascriptRequirement: {} - ResourceRequirement: - ramMin: 8000 - coresMin: 2 - DockerRequirement: - dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 - -dct:contributor: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:vurals@mskcc.org - foaf:name: Suleyman Vural - foaf:name: Memorial Sloan Kettering Cancer Center -dct:creator: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:kumarn1@mskcc.org - foaf:name: Nikhil Kumar - foaf:name: Memorial Sloan Kettering Cancer Center -doap:release: - - class: doap:Version - doap:name: vcf2maf - doap:revision: 1.6.21 From 44f6b73f14aa1c5beca2b0b27d693ec6896ba51e Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 18 Apr 2022 17:01:42 -0400 Subject: [PATCH 464/476] Fixing Structure and docs --- docs/delly/README.md | 3 + docs/delly/delly_0.9.1.md | 81 +++++++++++++++++++ .../{vardict_1.8.3.md => vardict_1.8.2.md} | 2 +- .../container/Dockerfile | 0 .../v1.8.2}/example_inputs.yaml | 0 .../v1.8.2}/teststrandbias.cwl | 0 .../v1.8.2}/var_to_vcf.cwl | 0 .../v1.8.2}/vardict_app.cwl | 0 .../vardict_workflow_single_sample.cwl | 0 9 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 docs/delly/README.md create mode 100644 docs/delly/delly_0.9.1.md rename docs/vardict/{vardict_1.8.3.md => vardict_1.8.2.md} (97%) rename {vardictjava_1.8.2 => vardictjava}/container/Dockerfile (100%) rename {vardictjava_1.8.2 => vardictjava/v1.8.2}/example_inputs.yaml (100%) rename {vardictjava_1.8.2 => vardictjava/v1.8.2}/teststrandbias.cwl (100%) rename {vardictjava_1.8.2 => vardictjava/v1.8.2}/var_to_vcf.cwl (100%) rename {vardictjava_1.8.2 => vardictjava/v1.8.2}/vardict_app.cwl (100%) rename {vardictjava_1.8.2 => vardictjava/v1.8.2}/vardict_workflow_single_sample.cwl (100%) diff --git a/docs/delly/README.md b/docs/delly/README.md new file mode 100644 index 00000000..93564821 --- /dev/null +++ b/docs/delly/README.md @@ -0,0 +1,3 @@ +#Delly + + diff --git a/docs/delly/delly_0.9.1.md b/docs/delly/delly_0.9.1.md new file mode 100644 index 00000000..08dcf390 --- /dev/null +++ b/docs/delly/delly_0.9.1.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/vardict/vardict_1.8.3.md b/docs/vardict/vardict_1.8.2.md similarity index 97% rename from docs/vardict/vardict_1.8.3.md rename to docs/vardict/vardict_1.8.2.md index 65698dc4..3d9ae093 100644 --- a/docs/vardict/vardict_1.8.3.md +++ b/docs/vardict/vardict_1.8.2.md @@ -9,7 +9,7 @@ https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode | Tool | Version | Location | | :--- | :--- | :--- | | ubuntu base image (alpine) | 3.8 | - | -| vardict | 1.8.3 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | +| vardict | 1.8.2 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | | perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) | | r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) | diff --git a/vardictjava_1.8.2/container/Dockerfile b/vardictjava/container/Dockerfile similarity index 100% rename from vardictjava_1.8.2/container/Dockerfile rename to vardictjava/container/Dockerfile diff --git a/vardictjava_1.8.2/example_inputs.yaml b/vardictjava/v1.8.2/example_inputs.yaml similarity index 100% rename from vardictjava_1.8.2/example_inputs.yaml rename to vardictjava/v1.8.2/example_inputs.yaml diff --git a/vardictjava_1.8.2/teststrandbias.cwl b/vardictjava/v1.8.2/teststrandbias.cwl similarity index 100% rename from vardictjava_1.8.2/teststrandbias.cwl rename to vardictjava/v1.8.2/teststrandbias.cwl diff --git a/vardictjava_1.8.2/var_to_vcf.cwl b/vardictjava/v1.8.2/var_to_vcf.cwl similarity index 100% rename from vardictjava_1.8.2/var_to_vcf.cwl rename to vardictjava/v1.8.2/var_to_vcf.cwl diff --git a/vardictjava_1.8.2/vardict_app.cwl b/vardictjava/v1.8.2/vardict_app.cwl similarity index 100% rename from vardictjava_1.8.2/vardict_app.cwl rename to vardictjava/v1.8.2/vardict_app.cwl diff --git a/vardictjava_1.8.2/vardict_workflow_single_sample.cwl b/vardictjava/v1.8.2/vardict_workflow_single_sample.cwl similarity index 100% rename from vardictjava_1.8.2/vardict_workflow_single_sample.cwl rename to vardictjava/v1.8.2/vardict_workflow_single_sample.cwl From f2beec884236937f62a41d15df5de5f9ffbe3043 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 18 Apr 2022 17:05:16 -0400 Subject: [PATCH 465/476] Revert "Feature/vcf2maf update" --- docs/vcf2maf/README.md | 76 ----------- vcf2maf_1.6.21/README.md | 76 ----------- vcf2maf_1.6.21/container/Dockerfile | 79 ----------- vcf2maf_1.6.21/example_inputs.yaml | 14 -- vcf2maf_1.6.21/vcf2maf_1.6.21.cwl | 195 ---------------------------- 5 files changed, 440 deletions(-) delete mode 100644 docs/vcf2maf/README.md delete mode 100644 vcf2maf_1.6.21/README.md delete mode 100644 vcf2maf_1.6.21/container/Dockerfile delete mode 100644 vcf2maf_1.6.21/example_inputs.yaml delete mode 100644 vcf2maf_1.6.21/vcf2maf_1.6.21.cwl diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md deleted file mode 100644 index 6d8cd35d..00000000 --- a/docs/vcf2maf/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# CWL and Dockerfile for running vcf2maf v1.6.21 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| clearlinux (base image) | - | - | -| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | -| VEP | 105 | - | - | -|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -|BCFTOOLS_VERSION | 1.10.2 | - | - | -|SAMTOOLS_VERSION | 1.10 | - | - | -|VCF2MAF_VERSION | 1.6.21 | - | - | -|HTSLIB_VERSION | 1.10.2 | - | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> mkdir vcf2maf_toil_log -> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & -``` - -### Usage - -``` -Usage: - perl vcf2maf.pl --help - perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID - ---input-vcf Path to input file in VCF format ---output-maf Path to output MAF file ---tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] ---tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] ---normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] ---vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] ---vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] ---custom-enst List of custom ENST IDs that override canonical selection ---vep-path Folder containing the vep script [~/miniconda3/bin] ---vep-data VEP's base cache/plugin directory [~/.vep] ---vep-forks Number of forked processes to use when running VEP [4] ---vep-custom String to pass into VEP's --custom option [] ---vep-config Config file to pass into VEP's --config option [] ---vep-overwrite Allow VEP to overwrite output VCF if it exists ---buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] ---any-allele When reporting co-located variants, allow mismatched variant alleles too ---inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found ---online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) ---ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] ---max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] ---species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] ---ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] ---cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] ---maf-center Variant calling center to report in MAF [.] ---retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] ---retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] ---retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] ---min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] ---remap-chain Chain file to remap variants to a different assembly before running VEP ---verbose Print more things to log progress ---help Print a brief help message and quit ---man Print the detailed manual -``` diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md deleted file mode 100644 index 6d8cd35d..00000000 --- a/vcf2maf_1.6.21/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# CWL and Dockerfile for running vcf2maf v1.6.21 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| clearlinux (base image) | - | - | -| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | -| VEP | 105 | - | - | -|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -|BCFTOOLS_VERSION | 1.10.2 | - | - | -|SAMTOOLS_VERSION | 1.10 | - | - | -|VCF2MAF_VERSION | 1.6.21 | - | - | -|HTSLIB_VERSION | 1.10.2 | - | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml - -#Using toil-cwl-runner -> mkdir vcf2maf_toil_log -> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & -``` - -### Usage - -``` -Usage: - perl vcf2maf.pl --help - perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID - ---input-vcf Path to input file in VCF format ---output-maf Path to output MAF file ---tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] ---tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] ---normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] ---vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] ---vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] ---custom-enst List of custom ENST IDs that override canonical selection ---vep-path Folder containing the vep script [~/miniconda3/bin] ---vep-data VEP's base cache/plugin directory [~/.vep] ---vep-forks Number of forked processes to use when running VEP [4] ---vep-custom String to pass into VEP's --custom option [] ---vep-config Config file to pass into VEP's --config option [] ---vep-overwrite Allow VEP to overwrite output VCF if it exists ---buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] ---any-allele When reporting co-located variants, allow mismatched variant alleles too ---inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found ---online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) ---ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] ---max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] ---species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] ---ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] ---cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] ---maf-center Variant calling center to report in MAF [.] ---retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] ---retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] ---retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] ---min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] ---remap-chain Chain file to remap variants to a different assembly before running VEP ---verbose Print more things to log progress ---help Print a brief help message and quit ---man Print the detailed manual -``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile deleted file mode 100644 index 919941e3..00000000 --- a/vcf2maf_1.6.21/container/Dockerfile +++ /dev/null @@ -1,79 +0,0 @@ -FROM clearlinux:latest AS builder - -ARG VCF2MAF_VERSION=1.6.21 -ARG HTSLIB_VERSION=1.10.2 -ARG SAMTOOLS_VERSION=1.10 -ARG BCFTOOLS_VERSION=1.10.2 -ARG VEP_VERSION=105.0 -# Install a minimal versioned OS into /install_root, and bundled tools if any -ENV CLEAR_VERSION=33980 -RUN swupd os-install --no-progress --no-boot-update --no-scripts \ - --version ${CLEAR_VERSION} \ - --path /install_root \ - --statedir /swupd-state \ - --bundles os-core-update,which - -# Download and install conda into /usr/bin -ENV MINICONDA_VERSION=py37_4.9.2 -RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ - curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ - sh /tmp/miniconda.sh -bfp /usr - -#Download and install vcf2maf -RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ - && unzip v${VCF2MAF_VERSION}.zip \ - && rm v${VCF2MAF_VERSION}.zip - -# Use conda to install remaining tools/dependencies into /usr/local -ENV VEP_VERSION=${VEP_VERSION} \ - HTSLIB_VERSION=${HTSLIB_VERSION} \ - BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ - SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ - LIFTOVER_VERSION=377 -RUN conda create -qy -p /usr/local \ - -c conda-forge \ - -c bioconda \ - -c defaults \ - ensembl-vep==${VEP_VERSION} \ - htslib==${HTSLIB_VERSION} \ - bcftools==${BCFTOOLS_VERSION} \ - samtools==${SAMTOOLS_VERSION} \ - ucsc-liftover==${LIFTOVER_VERSION} - -#Copy offline cache -COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache -COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache -RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ - ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ - && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ - && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ - && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ - ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ - && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ - && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ - && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ - && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz - -RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep - -# Deploy the minimal OS and tools into a clean target layer -FROM scratch -ARG VCF2MAF_VERSION=1.6.21 -ENV VEP_VERSION=${VEP_VERSION} - -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL maintainer="Suleyman Vural " -LABEL maintainer="Cyriac Kandoth " -LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} -LABEL org.opencontainers.image.version.vep="105" - -COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} -COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ -COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data -COPY --from=builder /.vep /.vep/ -COPY --from=builder /install_root / -COPY --from=builder /usr/local /usr/local - -RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl - -WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml deleted file mode 100644 index 906ab455..00000000 --- a/vcf2maf_1.6.21/example_inputs.yaml +++ /dev/null @@ -1,14 +0,0 @@ -input_vcf: - class: File - path: /path/to/input.vcf -tumor_id: tumor_sample_name -vcf_tumor_id: tumor_sample_name -normal_id: normal_sample_name -vcf_normal_id: normal_sample_name -ncbi_build: "GRCh37" -vep_data: /.vep/ # location in the container -ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container -vep_path: /usr/local/bin/ # location in the container -retain_info: retain_info_str -retain_fmt: retain_fmt_str -output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl deleted file mode 100644 index 2416a380..00000000 --- a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl +++ /dev/null @@ -1,195 +0,0 @@ - -class: CommandLineTool -cwlVersion: v1.0 - -$namespaces: - dct: http://purl.org/dc/terms/ - doap: http://usefulinc.com/ns/doap# - foaf: http://xmlns.com/foaf/0.1/ - sbg: https://www.sevenbridges.com/ - -id: vcf2maf_v1.6.21 - -baseCommand: - - perl - - /opt/vcf2maf-1.6.21/vcf2maf.pl - -inputs: - memory_per_job: - type: ["null",int] - doc: Memory per job in megabytes - memory_overhead: - type: ["null",int] - doc: Memory overhead per job in megabytes - cache_version: - type: - - 'null' - - string - default: '105' - doc: Version of VEP and its cache to use - inputBinding: - prefix: --cache-version - species: - type: - - 'null' - - string - default: homo_sapiens - doc: Species of variants in input - inputBinding: - prefix: --species - ncbi_build: - type: - - 'null' - - string - default: GRCh37 - doc: Genome build of variants in input - inputBinding: - prefix: --ncbi-build - ref_fasta: - type: ['null', string] - doc: Reference FASTA file - inputBinding: - prefix: --ref-fasta - maf_center: - type: ['null', string] - default: mskcc.org - doc: Variant calling center to report in MAF - inputBinding: - prefix: --maf-center - output_maf: - type: ['null', string] - doc: Path to output MAF file - inputBinding: - prefix: --output-maf - min_hom_vaf: - type: - - 'null' - - float - default: 0.7 - doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous - inputBinding: - prefix: --min-hom-vaf - remap_chain: - type: ['null', string] - doc: Chain file to remap variants to a different assembly before running VEP - inputBinding: - prefix: --remap-chain - normal_id: - type: ['null', string] - default: NORMAL - doc: Matched_Norm_Sample_Barcode to report in the MAF - inputBinding: - prefix: --normal-id - buffer_size: - type: - - 'null' - - int - default: 5000 - doc: Number of variants VEP loads at a time; Reduce this for low memory systems - inputBinding: - prefix: --buffer-size - custom_enst: - type: ['null', string] - doc: List of custom ENST IDs that override canonical selection - inputBinding: - prefix: --custom-enst - vcf_normal_id: - type: ['null', string] - default: NORMAL - doc: Matched normal ID used in VCFs genotype columns - inputBinding: - prefix: --vcf-normal-id - vep_path: - type: ['null', string] - doc: Folder containing variant_effect_predictor.pl or vep binary - inputBinding: - prefix: --vep-path - vep_data: - type: ['null', string] - doc: VEPs base cache/plugin directory - inputBinding: - prefix: --vep-data - any_allele: - type: ['null', string] - doc: When reporting co-located variants, allow mismatched variant alleles too - inputBinding: - prefix: --any-allele - input_vcf: - type: - - string - - File - doc: Path to input file in VCF format - inputBinding: - prefix: --input-vcf - vep_forks: - type: - - 'null' - - int - default: 4 - doc: Number of forked processes to use when running VEP - inputBinding: - prefix: --vep-forks - vcf_tumor_id: - type: ['null', string] - default: TUMOR - doc: Tumor sample ID used in VCFs genotype columns - inputBinding: - prefix: --vcf-tumor-id - tumor_id: - type: ['null', string] - default: TUMOR - doc: Tumor_Sample_Barcode to report in the MAF - inputBinding: - prefix: --tumor-id - retain_info: - type: ['null', string] - doc: Comma-delimited names of INFO fields to retain as extra columns in MAF - inputBinding: - prefix: --retain-info - retain_fmt: - type: ['null', string] - doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] - inputBinding: - prefix: --retain-fmt -outputs: - vcf2maf_maf: - type: File - outputBinding: - glob: | - ${ - if (inputs.output_maf) - return inputs.output_maf; - return null; - } - -arguments: -- valueFrom: "$(runtime.tmpdir)" - prefix: '--tmp-dir' - shellQuote: false - -requirements: - InlineJavascriptRequirement: {} - ResourceRequirement: - ramMin: 8000 - coresMin: 2 - DockerRequirement: - dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 - -dct:contributor: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:vurals@mskcc.org - foaf:name: Suleyman Vural - foaf:name: Memorial Sloan Kettering Cancer Center -dct:creator: - - class: foaf:Organization - foaf:member: - - class: foaf:Person - foaf:mbox: mailto:kumarn1@mskcc.org - foaf:name: Nikhil Kumar - foaf:name: Memorial Sloan Kettering Cancer Center -doap:release: - - class: doap:Version - doap:name: vcf2maf - doap:revision: 1.6.21 From cdd1ea52a52b7930e4cefd5d1b0f3aaba0ecd9ad Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 18 Apr 2022 17:19:05 -0400 Subject: [PATCH 466/476] Revert "Revert 135 feature/vcf2maf update" From 4a6f1a202ab1a4b51d0034c340602df868927bed Mon Sep 17 00:00:00 2001 From: svural Date: Tue, 19 Apr 2022 15:38:14 -0400 Subject: [PATCH 467/476] initial commit --- docs/vcf2maf/README.md | 76 +++++++++++ vcf2maf_1.6.21/README.md | 76 +++++++++++ vcf2maf_1.6.21/container/Dockerfile | 79 +++++++++++ vcf2maf_1.6.21/example_inputs.yaml | 14 ++ vcf2maf_1.6.21/vcf2maf_1.6.21.cwl | 195 ++++++++++++++++++++++++++++ 5 files changed, 440 insertions(+) create mode 100644 docs/vcf2maf/README.md create mode 100644 vcf2maf_1.6.21/README.md create mode 100644 vcf2maf_1.6.21/container/Dockerfile create mode 100644 vcf2maf_1.6.21/example_inputs.yaml create mode 100644 vcf2maf_1.6.21/vcf2maf_1.6.21.cwl diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/docs/vcf2maf/README.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/vcf2maf_1.6.21/README.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile new file mode 100644 index 00000000..919941e3 --- /dev/null +++ b/vcf2maf_1.6.21/container/Dockerfile @@ -0,0 +1,79 @@ +FROM clearlinux:latest AS builder + +ARG VCF2MAF_VERSION=1.6.21 +ARG HTSLIB_VERSION=1.10.2 +ARG SAMTOOLS_VERSION=1.10 +ARG BCFTOOLS_VERSION=1.10.2 +ARG VEP_VERSION=105.0 +# Install a minimal versioned OS into /install_root, and bundled tools if any +ENV CLEAR_VERSION=33980 +RUN swupd os-install --no-progress --no-boot-update --no-scripts \ + --version ${CLEAR_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --bundles os-core-update,which + +# Download and install conda into /usr/bin +ENV MINICONDA_VERSION=py37_4.9.2 +RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ + curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ + sh /tmp/miniconda.sh -bfp /usr + +#Download and install vcf2maf +RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ + && unzip v${VCF2MAF_VERSION}.zip \ + && rm v${VCF2MAF_VERSION}.zip + +# Use conda to install remaining tools/dependencies into /usr/local +ENV VEP_VERSION=${VEP_VERSION} \ + HTSLIB_VERSION=${HTSLIB_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ + SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ + LIFTOVER_VERSION=377 +RUN conda create -qy -p /usr/local \ + -c conda-forge \ + -c bioconda \ + -c defaults \ + ensembl-vep==${VEP_VERSION} \ + htslib==${HTSLIB_VERSION} \ + bcftools==${BCFTOOLS_VERSION} \ + samtools==${SAMTOOLS_VERSION} \ + ucsc-liftover==${LIFTOVER_VERSION} + +#Copy offline cache +COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache +COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache +RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ + && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ + && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ + && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ + && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ + && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ + && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ + && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz + +RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep + +# Deploy the minimal OS and tools into a clean target layer +FROM scratch +ARG VCF2MAF_VERSION=1.6.21 +ENV VEP_VERSION=${VEP_VERSION} + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL maintainer="Suleyman Vural " +LABEL maintainer="Cyriac Kandoth " +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="105" + +COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data +COPY --from=builder /.vep /.vep/ +COPY --from=builder /install_root / +COPY --from=builder /usr/local /usr/local + +RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl + +WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml new file mode 100644 index 00000000..906ab455 --- /dev/null +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -0,0 +1,14 @@ +input_vcf: + class: File + path: /path/to/input.vcf +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: "GRCh37" +vep_data: /.vep/ # location in the container +ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container +vep_path: /usr/local/bin/ # location in the container +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl new file mode 100644 index 00000000..2416a380 --- /dev/null +++ b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl @@ -0,0 +1,195 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.21 + +baseCommand: + - perl + - /opt/vcf2maf-1.6.21/vcf2maf.pl + +inputs: + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + cache_version: + type: + - 'null' + - string + default: '105' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', string] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:vurals@mskcc.org + foaf:name: Suleyman Vural + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.21 From f5f01805dcb8bcf78c599f189ea9767084e287a5 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 12:30:03 -0400 Subject: [PATCH 468/476] Adding VCF2MAF 1.6.21 Co-Authored-By: Suleyman Vural --- docs/vcf2maf/README.md | 3 + docs/vcf2maf/vcf2maf_1.6.21.md | 76 +++++++++++ vcf2maf_1.6.21/README.md | 76 +++++++++++ vcf2maf_1.6.21/container/Dockerfile | 79 +++++++++++ vcf2maf_1.6.21/example_inputs.yaml | 14 ++ vcf2maf_1.6.21/vcf2maf_1.6.21.cwl | 195 ++++++++++++++++++++++++++++ 6 files changed, 443 insertions(+) create mode 100644 docs/vcf2maf/README.md create mode 100644 docs/vcf2maf/vcf2maf_1.6.21.md create mode 100644 vcf2maf_1.6.21/README.md create mode 100644 vcf2maf_1.6.21/container/Dockerfile create mode 100644 vcf2maf_1.6.21/example_inputs.yaml create mode 100644 vcf2maf_1.6.21/vcf2maf_1.6.21.cwl diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md new file mode 100644 index 00000000..908acffd --- /dev/null +++ b/docs/vcf2maf/README.md @@ -0,0 +1,3 @@ +#VCF2MAF + + diff --git a/docs/vcf2maf/vcf2maf_1.6.21.md b/docs/vcf2maf/vcf2maf_1.6.21.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/docs/vcf2maf/vcf2maf_1.6.21.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/vcf2maf_1.6.21/README.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile new file mode 100644 index 00000000..919941e3 --- /dev/null +++ b/vcf2maf_1.6.21/container/Dockerfile @@ -0,0 +1,79 @@ +FROM clearlinux:latest AS builder + +ARG VCF2MAF_VERSION=1.6.21 +ARG HTSLIB_VERSION=1.10.2 +ARG SAMTOOLS_VERSION=1.10 +ARG BCFTOOLS_VERSION=1.10.2 +ARG VEP_VERSION=105.0 +# Install a minimal versioned OS into /install_root, and bundled tools if any +ENV CLEAR_VERSION=33980 +RUN swupd os-install --no-progress --no-boot-update --no-scripts \ + --version ${CLEAR_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --bundles os-core-update,which + +# Download and install conda into /usr/bin +ENV MINICONDA_VERSION=py37_4.9.2 +RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ + curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ + sh /tmp/miniconda.sh -bfp /usr + +#Download and install vcf2maf +RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ + && unzip v${VCF2MAF_VERSION}.zip \ + && rm v${VCF2MAF_VERSION}.zip + +# Use conda to install remaining tools/dependencies into /usr/local +ENV VEP_VERSION=${VEP_VERSION} \ + HTSLIB_VERSION=${HTSLIB_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ + SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ + LIFTOVER_VERSION=377 +RUN conda create -qy -p /usr/local \ + -c conda-forge \ + -c bioconda \ + -c defaults \ + ensembl-vep==${VEP_VERSION} \ + htslib==${HTSLIB_VERSION} \ + bcftools==${BCFTOOLS_VERSION} \ + samtools==${SAMTOOLS_VERSION} \ + ucsc-liftover==${LIFTOVER_VERSION} + +#Copy offline cache +COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache +COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache +RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ + && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ + && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ + && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ + && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ + && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ + && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ + && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz + +RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep + +# Deploy the minimal OS and tools into a clean target layer +FROM scratch +ARG VCF2MAF_VERSION=1.6.21 +ENV VEP_VERSION=${VEP_VERSION} + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL maintainer="Suleyman Vural " +LABEL maintainer="Cyriac Kandoth " +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="105" + +COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data +COPY --from=builder /.vep /.vep/ +COPY --from=builder /install_root / +COPY --from=builder /usr/local /usr/local + +RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl + +WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml new file mode 100644 index 00000000..906ab455 --- /dev/null +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -0,0 +1,14 @@ +input_vcf: + class: File + path: /path/to/input.vcf +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: "GRCh37" +vep_data: /.vep/ # location in the container +ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container +vep_path: /usr/local/bin/ # location in the container +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl new file mode 100644 index 00000000..2416a380 --- /dev/null +++ b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl @@ -0,0 +1,195 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.21 + +baseCommand: + - perl + - /opt/vcf2maf-1.6.21/vcf2maf.pl + +inputs: + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + cache_version: + type: + - 'null' + - string + default: '105' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', string] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:vurals@mskcc.org + foaf:name: Suleyman Vural + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.21 From 39bf207b65b45f307db6ef80f774e5f6019cc8d0 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 12:36:26 -0400 Subject: [PATCH 469/476] Fix Docs --- docs/SUMMARY.md | 6 +++++- docs/delly/{delly_0.9.1.md => delly_call_0.9.1.md} | 0 2 files changed, 5 insertions(+), 1 deletion(-) rename docs/delly/{delly_0.9.1.md => delly_call_0.9.1.md} (100%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d4677298..eb5ea23e 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -15,6 +15,8 @@ * [major](../biometrics_major/README.md) * [genotype](../biometrics_genotype/README.md) * [sexmismatch](../biometrics_sexmismatch/README.md) +* [Delly](delly/README.md) + * [delly call 0.9.1](delly/delly_call_0.9.1.md) * [Disambiguate](disambiguate/README.md) * [v1.0.0](disambiguate/disambiguate_1.0.0.md) * [Fgbio](fgbio/README.md) @@ -73,7 +75,9 @@ * [Ubuntu utilites](ubuntu-utilites/README.md) * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) * [VarDictJava](vardict/README.md) - * [v18.04](vardict/vardict_1.8.3.md) + * [v1.8.2](vardict/vardict_1.8.2.md) +* [VCF2MAF](vcf2maf/README.md) + * [1.6.21](vcf2maf/vcf2maf_1.6.21.md) * [Waltz](waltz/README.md) * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) diff --git a/docs/delly/delly_0.9.1.md b/docs/delly/delly_call_0.9.1.md similarity index 100% rename from docs/delly/delly_0.9.1.md rename to docs/delly/delly_call_0.9.1.md From 264331ba736521b1ec9a40ab92ab52cc5d7d8a0b Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 12:39:52 -0400 Subject: [PATCH 470/476] Fix Docs --- docs/delly/README.md | 3 +-- docs/vardict/README.md | 3 --- docs/vardictjava/README.md | 3 +++ docs/{vardict => vardictjava}/vardict_1.8.2.md | 0 docs/vcf2maf/README.md | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) delete mode 100644 docs/vardict/README.md create mode 100644 docs/vardictjava/README.md rename docs/{vardict => vardictjava}/vardict_1.8.2.md (100%) diff --git a/docs/delly/README.md b/docs/delly/README.md index 93564821..3cf4e541 100644 --- a/docs/delly/README.md +++ b/docs/delly/README.md @@ -1,3 +1,2 @@ -#Delly - +# Delly diff --git a/docs/vardict/README.md b/docs/vardict/README.md deleted file mode 100644 index 76b832e7..00000000 --- a/docs/vardict/README.md +++ /dev/null @@ -1,3 +0,0 @@ -#Vardict - - diff --git a/docs/vardictjava/README.md b/docs/vardictjava/README.md new file mode 100644 index 00000000..5cf8e57f --- /dev/null +++ b/docs/vardictjava/README.md @@ -0,0 +1,3 @@ +# VardictJava + + diff --git a/docs/vardict/vardict_1.8.2.md b/docs/vardictjava/vardict_1.8.2.md similarity index 100% rename from docs/vardict/vardict_1.8.2.md rename to docs/vardictjava/vardict_1.8.2.md diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md index 908acffd..e349a6cd 100644 --- a/docs/vcf2maf/README.md +++ b/docs/vcf2maf/README.md @@ -1,3 +1,3 @@ -#VCF2MAF +# VCF2MAF From 284124471386fada0b3d41ec8323cb2165e3ac6c Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 12:41:28 -0400 Subject: [PATCH 471/476] fix docs --- docs/SUMMARY.md | 4 ++-- docs/vardictjava/{vardict_1.8.2.md => vardictjava_1.8.2.md} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename docs/vardictjava/{vardict_1.8.2.md => vardictjava_1.8.2.md} (100%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index eb5ea23e..d53026f6 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -74,8 +74,8 @@ * [v0.6.2](trim-galore/trim_galore_0.6.2.md) * [Ubuntu utilites](ubuntu-utilites/README.md) * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) -* [VarDictJava](vardict/README.md) - * [v1.8.2](vardict/vardict_1.8.2.md) +* [VarDictJava](vardictjava/README.md) + * [v1.8.2](vardictjava/vardictjava_1.8.2.md) * [VCF2MAF](vcf2maf/README.md) * [1.6.21](vcf2maf/vcf2maf_1.6.21.md) * [Waltz](waltz/README.md) diff --git a/docs/vardictjava/vardict_1.8.2.md b/docs/vardictjava/vardictjava_1.8.2.md similarity index 100% rename from docs/vardictjava/vardict_1.8.2.md rename to docs/vardictjava/vardictjava_1.8.2.md From c5a3815090f43d8f6b9d59636a9965ad051f0250 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 17:24:35 -0400 Subject: [PATCH 472/476] Adding BCFtools to docs and updating the docker image location --- bcftools_1.6/bcftools_concat_1.6.cwl | 2 +- docs/SUMMARY.md | 2 + docs/bcftools/README.md | 1 + docs/bcftools/bcftools_concat_1.6.md | 69 ++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 docs/bcftools/README.md create mode 100644 docs/bcftools/bcftools_concat_1.6.md diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl index c6d665f3..98f4122c 100644 --- a/bcftools_1.6/bcftools_concat_1.6.cwl +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -157,7 +157,7 @@ requirements: ramMin: 8000 coresMin: 1 DockerRequirement: - dockerPull: mskaccess/bcftools:1.6 + dockerPull: ghcr.io/msk-access/bcftools:1.6 dct:contributor: diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d53026f6..8b0e73f4 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -9,6 +9,8 @@ * [Bedtools](bedtools/README.md) * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md) * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md) +* [BCFTOOLS](bcftools/README.md) + * [bcftools concat v1.6](bcftools/bcftools_concat_1.6.md) * Biometrics * [extract](../biometrics_extract/README.md) * [minor](../biometrics_minor/README.md) diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md new file mode 100644 index 00000000..a08fa464 --- /dev/null +++ b/docs/bcftools/README.md @@ -0,0 +1 @@ +# BCFTOOLS diff --git a/docs/bcftools/bcftools_concat_1.6.md b/docs/bcftools/bcftools_concat_1.6.md new file mode 100644 index 00000000..ecf5b687 --- /dev/null +++ b/docs/bcftools/bcftools_concat_1.6.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running bcftools v1.6 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir bcftools_toil_log +> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --compact_PS Do not output PS tag at each site, only at the start + of a new phase set block. + --ligate Ligate phased VCFs by matching phase at overlapping + haplotypes + --list LIST Read the list of files from a file. + --min_PQ MIN_PQ Break phase set if phasing quality is lower than + [30] + --naive Concatenate BCF files without recompression + (dangerous, use with caution) + --no_version do not append version and command line to the header + --output OUTPUT Write output to a file [standard output] + --output_type OUTPUT_TYPE + b - compressed BCF, u - uncompressed BCF, z + - compressed VCF, v - uncompressed VCF [v] + --regions REGIONS Restrict to comma-separated list of regions + --regions_file REGIONS_FILE + Restrict to regions listed in a file + --remove_duplicates Alias for -d none + --rm_dups RM_DUPS Output duplicate records present in multiple + files only once - + --threads THREADS Number of extra output compression threads [0] + --vcf_files_csi VCF_FILES_CSI + Array of vcf files to be concatenated into one vcf + --vcf_files_tbi VCF_FILES_TBI + Array of vcf files to be concatenated into one vcf + +``` From b7ae002bc549a4555c79ac91453220e91c6ea5d6 Mon Sep 17 00:00:00 2001 From: Ronak Shah Date: Mon, 25 Apr 2022 17:53:24 -0400 Subject: [PATCH 473/476] Updating bcftools --- bcftools_1.6/README.md | 2 +- bcftools_1.6/container/Dockerfile | 51 ---------------------------- docs/bcftools/bcftools_concat_1.6.md | 2 +- 3 files changed, 2 insertions(+), 53 deletions(-) delete mode 100644 bcftools_1.6/container/Dockerfile diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md index ecf5b687..770e8751 100644 --- a/bcftools_1.6/README.md +++ b/bcftools_1.6/README.md @@ -5,7 +5,7 @@ | Tool | Version | Location | |--- |--- |--- | | alpine:3.8 base image | 3.8 | - | -| bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | ## CWL diff --git a/bcftools_1.6/container/Dockerfile b/bcftools_1.6/container/Dockerfile deleted file mode 100644 index 42b00041..00000000 --- a/bcftools_1.6/container/Dockerfile +++ /dev/null @@ -1,51 +0,0 @@ -################## BASE IMAGE ###################### -FROM alpine:3.8 - -################## ARGUMENTS/Environments ########## - -ARG BUILD_DATE -ARG BUILD_VERSION=1.0.0 -ARG HTSLIB_VERSION=1.6 -ARG BCFTOOLS_VERSION=1.6 - -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" -LABEL org.opencontainers.image.created=${BUILD_DATE} -LABEL org.opencontainers.image.version=${BUILD_VERSION} -LABEL org.opencontainers.image.version.htslib=${HTSLIB_VERSION} -LABEL org.opencontainers.image.version.bcftools=${BCFTOOLS_VERSION} -LABEL org.opencontainers.image.version.perl="5.26.2-r1" -LABEL org.opencontainers.image.version.alpine="3.8" -LABEL org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/tag/${HTSLIB_VERSION}" -LABEL org.opencontainers.image.source.bcftools="https://github.com/samtools/bcftools/releases/tag/${BCFTOOLS_VERSION}" - - -LABEL org.opencontainers.image.description="This container uses alpine3.8 as the base image to build bcftools version ${BCFTOOLS_VERSION}" - -################## INSTALL ########################## - -ENV HTSLIB_VERSION=${HTSLIB_VERSION} -ENV BCFTOOLS_VERSION=${BCFTOOLS_VERSION} - - -RUN apk add --update \ - # install all the build-related tools - && apk add ca-certificates gcc g++ make git curl curl-dev wget gzip perl perl-dev musl-dev libgcrypt-dev zlib-dev bzip2-dev xz-dev ncurses-dev rsync \ - # install system packages and Perl modules - && apk add expat-dev libressl-dev perl-net-ssleay mariadb-dev libxml2-dev perl-dbd-mysql perl-module-metadata perl-gd perl-db_file perl-archive-zip perl-cgi perl-dbi perl-encode perl-time-hires perl-file-copy-recursive perl-json \ - # install htslib (for vep) - && cd /tmp && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ - && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ - && cd /tmp/htslib-${HTSLIB_VERSION} \ - && ./configure \ - && make && make install \ - # install bcftools - && cd /tmp && wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ - && tar xvjf bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ - && cd /tmp/bcftools-${BCFTOOLS_VERSION} \ - && make HTSDIR=/tmp/htslib-${HTSLIB_VERSION} && make install \ - # clean up - && rm -rf /var/cache/apk/* /tmp/* \ - && chmod +x /usr/bin/runscript.sh \ - && exec /run_test.sh diff --git a/docs/bcftools/bcftools_concat_1.6.md b/docs/bcftools/bcftools_concat_1.6.md index ecf5b687..eabc4ff7 100644 --- a/docs/bcftools/bcftools_concat_1.6.md +++ b/docs/bcftools/bcftools_concat_1.6.md @@ -4,7 +4,7 @@ | Tool | Version | Location | |--- |--- |--- | -| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | | bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip | ## CWL From a55b050dcafa50b8902df2ef0d876a817bbf2a44 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Apr 2022 03:01:36 +0000 Subject: [PATCH 474/476] Bump pip from 18.1 to 21.1 Bumps [pip](https://github.com/pypa/pip) from 18.1 to 21.1. - [Release notes](https://github.com/pypa/pip/releases) - [Changelog](https://github.com/pypa/pip/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/pip/compare/18.1...21.1) --- updated-dependencies: - dependency-name: pip dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 151708e9..946e86c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ typing==3.7.4 ruamel.yaml==0.15.77 # From requirements_dev -pip==18.1 +pip==21.1 bumpversion==0.5.3 wheel==0.32.1 watchdog==0.9.0 From 45531ce9c741e247c6e8e40d3cf1dbd177ee8ffc Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Thu, 28 Apr 2022 13:37:32 -0500 Subject: [PATCH 475/476] bgzip, tabix, sortVcf --- bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl | 57 +++++++++++++++++++ bcftools_1.15.1/bcftools_tabix_1.15.1.cwl | 50 ++++++++++++++++ bcftools_1.15.1/example_input_bgzip.yaml | 3 + bcftools_1.15.1/example_input_tabix.yaml | 6 ++ .../bedtools_sortbed_vcf.cwl | 54 ++++++++++++++++++ bedtools_sortvcf_2.28.0/example_input.yaml | 1 + docs/SUMMARY.md | 7 +++ docs/bcftools/README.md | 2 + docs/bcftools/bcftools_bgzip_v1.15.1.md | 43 ++++++++++++++ docs/bcftools/bcftools_tabix_v1.15.1.md | 43 ++++++++++++++ docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md | 43 ++++++++++++++ 11 files changed, 309 insertions(+) create mode 100644 bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl create mode 100644 bcftools_1.15.1/bcftools_tabix_1.15.1.cwl create mode 100644 bcftools_1.15.1/example_input_bgzip.yaml create mode 100644 bcftools_1.15.1/example_input_tabix.yaml create mode 100644 bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl create mode 100644 bedtools_sortvcf_2.28.0/example_input.yaml create mode 100644 docs/bcftools/README.md create mode 100644 docs/bcftools/bcftools_bgzip_v1.15.1.md create mode 100644 docs/bcftools/bcftools_tabix_v1.15.1.md create mode 100644 docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md diff --git a/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl new file mode 100644 index 00000000..fe2317e9 --- /dev/null +++ b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl @@ -0,0 +1,57 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: tabix +baseCommand: + - bgzip +inputs: + - id: input + type: File + inputBinding: + position: 0 + doc: input VCF file +outputs: + - id: zippedVcf + type: File? + outputBinding: + glob: >- + ${ if(inputs.output_file_name){ return + inputs.output_file_name } else { return + inputs.input.basename.replace(/.vcf/,'.vcf.gz') } } +label: tabix +arguments: + - position: 0 + prefix: '' + valueFrom: '-c' +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InlineJavascriptRequirement +stdout: >- + ${ if (inputs.output_file_name) return inputs.output_file_name; + return inputs.input.basename.replace('.vcf','.vcf.gz'); } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bcftools + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl new file mode 100644 index 00000000..039078be --- /dev/null +++ b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl @@ -0,0 +1,50 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: tabix_1_3_1 +baseCommand: + - tabix +inputs: + - 'sbg:toolDefaultValue': vcf + id: preset + type: string? + inputBinding: + position: 0 + prefix: '-p' + doc: 'gff, bed, sam, vcf' + - id: file + type: File + inputBinding: + position: 1 + doc: Input bgziped vcf file + 'sbg:fileTypes': .gz +outputs: [] +label: tabix_1.3.1 +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/tabix:1.3.1' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': tabix + 'doap:revision': 1.3.1 diff --git a/bcftools_1.15.1/example_input_bgzip.yaml b/bcftools_1.15.1/example_input_bgzip.yaml new file mode 100644 index 00000000..05320852 --- /dev/null +++ b/bcftools_1.15.1/example_input_bgzip.yaml @@ -0,0 +1,3 @@ +input: + class: File + path: /Users/ksivaprakasam/Documents/vep.vcf diff --git a/bcftools_1.15.1/example_input_tabix.yaml b/bcftools_1.15.1/example_input_tabix.yaml new file mode 100644 index 00000000..12f3bdad --- /dev/null +++ b/bcftools_1.15.1/example_input_tabix.yaml @@ -0,0 +1,6 @@ +file: + class: File + metadata: {} + path: ../../../vep.vcf.gz + secondaryFiles: [] +preset: vcf diff --git a/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl new file mode 100644 index 00000000..eefa400e --- /dev/null +++ b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl @@ -0,0 +1,54 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: sortvcf +baseCommand: + - sortBed +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-i' + doc: input VCF file +outputs: + - id: sorted.vcf + type: File? + outputBinding: + glob: >- + ${ if(inputs.output_file_name){ return + inputs.output_file_name } else { return + inputs.input.basename.replace(/.vcf/,'.sorted.vcf') } } +label: sortvcf +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' + - class: InlineJavascriptRequirement +stdout: >- + ${ if (inputs.output_file_name) return inputs.output_file_name; + return inputs.input.basename.replace('.vcf','.sorted.vcf'); } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sortbed + 'doap:revision': 2.28.0 diff --git a/bedtools_sortvcf_2.28.0/example_input.yaml b/bedtools_sortvcf_2.28.0/example_input.yaml new file mode 100644 index 00000000..1b65b0c9 --- /dev/null +++ b/bedtools_sortvcf_2.28.0/example_input.yaml @@ -0,0 +1 @@ +input: null diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index d4677298..6d8189aa 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -6,9 +6,16 @@ * [v2.22](abra2/abra2_2.22.md) * access_utils * [0.1.1](../access_utils/0.1.1/README.md) +* [bcftools](bcftools/README.md) + + * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md) + * [bcftools taxi v1.15.1](bcftools/bcftools_tabix_v1.15.1.md) + * [Bedtools](bedtools/README.md) + * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md) * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md) + * [sortVcf v2.28.0\_cv2](bedtools/bedtools_sortvcf_v2.28.0_cv2.md) * Biometrics * [extract](../biometrics_extract/README.md) * [minor](../biometrics_minor/README.md) diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md new file mode 100644 index 00000000..a757ebd0 --- /dev/null +++ b/docs/bcftools/README.md @@ -0,0 +1,2 @@ +# BCFTOOLS v1.15.1 + diff --git a/docs/bcftools/bcftools_bgzip_v1.15.1.md b/docs/bcftools/bcftools_bgzip_v1.15.1.md new file mode 100644 index 00000000..5ff81c8f --- /dev/null +++ b/docs/bcftools/bcftools_bgzip_v1.15.1.md @@ -0,0 +1,43 @@ +## CWL and Docker for Running bgzip using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| --------------------- | ------- | ------------------------------------- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_bgzip_1.15.1.cwl example_input_bgzip.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_bgzip_1.15.1.cwl [-h] --input INPUT + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bcftools/bcftools_tabix_v1.15.1.md b/docs/bcftools/bcftools_tabix_v1.15.1.md new file mode 100644 index 00000000..e2adfbc6 --- /dev/null +++ b/docs/bcftools/bcftools_tabix_v1.15.1.md @@ -0,0 +1,43 @@ +## CWL and Docker for Running tabs using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| --------------------- | ------- | ------------------------------------- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_tabix_1.15.1.cwl example_input_tabix.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_tabix_1.15.1.cwl [-h] --input INPUT [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md new file mode 100644 index 00000000..7b76d73b --- /dev/null +++ b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# SortVCF v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :------- | :----------- | :----------------------------------------------------------- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bedtools_sortbed_vcf.cwl example_input.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict bedtools_sortbed_vcf.cwl inputs.yaml + +#Using toil-cwl-runner +mkdir run_directory +toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_sortbed_vcf.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```shell +Usage: bedtools_sortbed_vcf.cwl [-h] --input INPUT [job_order] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + From dbee02c9ac3f0dfd0b8a90c7cc3c36768e489235 Mon Sep 17 00:00:00 2001 From: Karthigayini Sivaprakasam Date: Mon, 2 May 2022 12:25:40 -0500 Subject: [PATCH 476/476] resolved_conflict1 --- docs/SUMMARY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 6d8189aa..919f6fdc 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -9,7 +9,7 @@ * [bcftools](bcftools/README.md) * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md) - * [bcftools taxi v1.15.1](bcftools/bcftools_tabix_v1.15.1.md) + * [bcftools tabix v1.15.1](bcftools/bcftools_tabix_v1.15.1.md) * [Bedtools](bedtools/README.md)