diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..d4a2c440 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/.gitbook.yaml b/.gitbook.yaml new file mode 100644 index 00000000..afdeba9f --- /dev/null +++ b/.gitbook.yaml @@ -0,0 +1 @@ +root: ./docs diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..14740fbe --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* cwl-commandlinetools version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.gitignore b/.gitignore index 76d33366..de23585e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python +env/ build/ develop-eggs/ dist/ @@ -81,8 +82,10 @@ celerybeat-schedule # SageMath parsed files *.sage.py -# Environments +# dotenv .env + +# virtualenv .venv env/ venv/ @@ -108,7 +111,6 @@ venv.bak/ # vscode .vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json \ No newline at end of file + +# pycharm +.idea diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..c0504869 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +# Config file for automatic testing at travis-ci.org + +language: python +python: + - 3.7 + - 2.7 + +# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: + - pip install -U tox-travis + - pip install -r requirements.txt + +# Command to run tests, e.g. python setup.py test +script: + - find . -name '*.cwl' | xargs -n 1 cwltool --validate + +# Assuming you have installed the travis-ci CLI tool, after you +# create the Github repo and add it to Travis, run the +# following command to finish PyPI deployment setup: +# $ travis encrypt --add deploy.password +deploy: + provider: pypi + distributions: sdist bdist_wheel + user: __token__ + password: + secure: g9Ja5QDLc1WGu50xpmXl6wcP7qRNzfYZk7i3PEJtQNO6JLPtxEmBgDAb4+RedRxLo9MRmws/n/bFkTOSP837d+tJ91cYN6TFbVu2teWiR6hblDX/Twhbceq/MjdYJyAVsH+KpuORjuJGqzk2I4QLzI+B/0mXuWcE4EPaCZ5mpm0aYYOTLW1Ukxl1j/PoV8wWC2glItLQ02zIvLyr276+en+RAdWYwqW8sY7rn4hI6VaM78OMsc2/cvG27X82SX4rBxJ3/VveslAc3O7Kck02ltOPyOLI3w++HEVvhHAaCK3kDxNEYQCMly1lDYWTfAGm2F5TZ5mgt2adb08AN//0GnWQOfciHh3JUrIt7po7B5Zs8kmZNGGTJFog8o+btU4pAeCDIt61lFyMo7VVpvPzR4ToiGP3zBvGEgnZd7WpTI0H0E4oc821vl9SAN+3aWQhDxDHl+z3VDwpZTA18mgQikFNc7asKDSXCAGoStI/YFWjw3X+tvFGMXR+R6dpmeSplFFSOx9L3TbrtymWProH8MOyxSVNDdQG6Vz41bN9IS47GRI+/1A9jXxwGurKY1ZL7HZDApDx42Fn2RdOFQNyLCeCneI+RUXtkHY56tH3GpBmnnJX6bKPrn4+VIbgd1VCahPrG8INqxx+SE4JojdIQHcxDy924PCL8mS4hakE4Z0= + on: + tags: true + repo: msk-access/cwl-commandlinetools + python: 2.7 + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e994ec71 --- /dev/null +++ b/LICENSE @@ -0,0 +1,16 @@ +Apache Software License 2.0 + +Copyright (c) 2019, msk-access + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..405d5d7e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include LICENSE +include README.md + +recursive-include tests * +recursive-include *.cwl +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..4dba77a2 --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 cwl_commandlinetools tests + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source cwl_commandlinetools -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/README.md b/README.md new file mode 100644 index 00000000..650f0f8e --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +--- +description: Central location for storing common workflow language based command line tools for building msk-access workflows +--- + +# MSK-ACCESS command-line tools + +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) +[![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.com/msk-access/cwl-commandlinetools) + +- Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) + +## Features + +Create command line tools in common workflow language to generate msk-access workflows. + +## Installation + +Clone the repository: + +``` +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +**Follow the README in respective tool folder for execution of the tool.** + + +## Credits + +- CMO ACCESS Informatics Team +- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + - Cookiecutter: https://github.com/audreyr/cookiecutter + - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage + diff --git a/Workflows/bwa_sort_merge.cwl b/Workflows/bwa_sort_merge.cwl deleted file mode 100644 index 99b25be9..00000000 --- a/Workflows/bwa_sort_merge.cwl +++ /dev/null @@ -1,86 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: bwa_sort_merge -label: bwa_sort_merge - -inputs: - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: read_pair - type: - type: array - items: - items: File - type: array - - id: sample_id - type: string - - id: lane_id - type: 'string[]' - -outputs: - - id: sample_id_output - outputSource: - - bwa_sort/sample_id_output - type: - - string - - type: array - items: string - - id: output_md_metrics - outputSource: - - gatk_markduplicatesgatk/output_md_metrics - type: File - - id: output_md_bam - outputSource: - - gatk_markduplicatesgatk/output_md_bam - type: File - -steps: - - id: samtools_merge - in: - - id: input_bams - source: - - bwa_sort/output_file - out: - - id: output_file - run: ../CommandLineTools/samtools-merge_1.9/samtools-merge_1.9.cwl - - id: bwa_sort - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pair - - id: sample_id - source: sample_id - - id: lane_id - source: lane_id - out: - - id: output_file - - id: sample_id_output - - id: lane_id_output - run: ./bwa_sort.cwl - label: bwa_sort - scatter: - - read_pair - - lane_id - scatterMethod: dotproduct - - id: gatk_markduplicatesgatk - in: - - id: input_bam - source: samtools_merge/output_file - out: - - id: output_md_bam - - id: output_md_metrics - run: ../CommandLineTools/mark-duplicates_4.1.0.0/mark-duplicates_4.1.0.0.cwl - label: GATK MarkDuplicates - -requirements: - - class: SubworkflowFeatureRequirement - - class: ScatterFeatureRequirement diff --git a/Workflows/make_bam.cwl b/Workflows/make_bam.cwl deleted file mode 100644 index 5d1dfdef..00000000 --- a/Workflows/make_bam.cwl +++ /dev/null @@ -1,107 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: make_bam -label: make_bam - -inputs: - - id: read_pairs_normal - type: - type: array - items: - items: File - type: array - - id: lane_ids_normal - type: 'string[]' - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: sample_id_normal - type: string - - id: sample_id_tumor - type: string - - id: read_pairs_tumor - type: - type: array - items: - items: File - type: array - - id: lane_ids_tumor - type: 'string[]' - -outputs: - - id: sample_id_output_normal - outputSource: - - make_bam_Normal/sample_id_output - type: - - string - - type: array - items: string - - id: normal_bam - outputSource: - - make_bam_Normal/output_md_bam - type: File - - id: sample_id_output_tumor - outputSource: - - make_bam_Tumor/sample_id_output - type: - - string - - type: array - items: string - - id: tumor_bam - outputSource: - - make_bam_Tumor/output_md_bam - type: File - - id: tumor_metrics - outputSource: - - make_bam_Tumor/output_md_metrics - type: File - - id: normal_metrics - outputSource: - - make_bam_Normal/output_md_metrics - type: File - -steps: - - id: make_bam_Normal - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_normal - - id: sample_id - source: sample_id_normal - - id: lane_id - source: - - lane_ids_normal - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Normal - - id: make_bam_Tumor - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_tumor - - id: sample_id - source: sample_id_tumor - - id: lane_id - source: - - lane_ids_tumor - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Tumor -requirements: - - class: SubworkflowFeatureRequirement diff --git a/Workflows/msisensor-run-both.cwl b/Workflows/msisensor-run-both.cwl deleted file mode 100644 index a9d2f373..00000000 --- a/Workflows/msisensor-run-both.cwl +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env cwl-runner - -$namespaces: - dct: http://purl.org/dc/terms/ - foaf: http://xmlns.com/foaf/0.1/ - doap: http://usefulinc.com/ns/doap# - -cwlVersion: v1.0 - -class: Workflow -id: msisensor-run-both -requirements: - StepInputExpressionRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - SubworkflowFeatureRequirement: {} - InlineJavascriptRequirement: {} - -inputs: - normal_bam: - type: File - secondaryFiles: [ ".bai" ] - tumor_bam: - type: File - secondaryFiles: [ ".bai" ] - output_prefix: string - msi_file: File - -outputs: - msisensor_0.2_output: - type: File - outputSource: msisensor_0.2/output - - msisensor_0.6_output: - type: File - outputSource: msisensor_0.6/output - -steps: - msisensor_0.2: - run: ../CommandLineTools/msisensor_0.2/msisensor-0.2.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.2.txt"; } - out: [ output ] - - msisensor_0.6: - run: ../CommandLineTools/msisensor_0.6/msisensor-0.6.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.6.txt"; } - out: [ output ] diff --git a/Workflows/mutect_wf.cwl b/Workflows/mutect_wf.cwl deleted file mode 100644 index c046e18a..00000000 --- a/Workflows/mutect_wf.cwl +++ /dev/null @@ -1,65 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: mutect_wf -label: mutect_wf -inputs: - - id: scatter-count - type: int? - - id: output - type: string - - id: intervals - type: File? - - id: reference - type: File? - - id: tumor_sample - type: string? - - id: normal_sample - type: string? - - id: input_normal - type: File? - - id: input_tumor - type: File? - -outputs: - - id: output_1 - outputSource: - - mutect2/output - type: 'File[]?' - -steps: - - id: scatterintervals - in: - - id: reference - source: reference - - id: intervals - source: intervals - - id: scatter-count - source: scatter-count - - id: output - source: output - out: - - id: interval_files - run: ../CommandLineTools/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl - label: ScatterIntervals - - id: mutect2 - in: - - id: reference - source: reference - - id: intervals - source: scatterintervals/interval_files - - id: input - source: input_tumor - - id: tumor_sample - source: tumor_sample - - id: input_normal - source: input_normal - - id: normal_sample - source: normal_sample - out: - - id: output - run: ../CommandLineTools/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl - label: Mutect2 - scatter: - - intervals -requirements: - - class: ScatterFeatureRequirement diff --git a/Workflows/snp-pileup-to-facets.cwl b/Workflows/snp-pileup-to-facets.cwl deleted file mode 100644 index 619a8753..00000000 --- a/Workflows/snp-pileup-to-facets.cwl +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env cwl-runner - -class: Workflow -cwlVersion: v1.0 - -requirements: - InlineJavascriptRequirement: {} - StepInputExpressionRequirement: {} - SubworkflowFeatureRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - -inputs: - facets_vcf: - type: File - secondaryFiles: - - .gz - - bam_normal: - type: File - - bam_tumor: - type: File - - tumor_sample_name: - type: string - -outputs: - - snp_pileup_out: - type: File - outputSource: do_snp_pileup/output_file - - facets_png: - type: File[]? - outputSource: do_facets/png_files - - facets_txt_purity: - type: File? - outputSource: do_facets/txt_files_purity - - facets_txt_hisens: - type: File? - outputSource: do_facets/txt_files_hisens - - facets_out_files: - type: File[]? - outputSource: do_facets/out_files - - facets_rdata: - type: File[]? - outputSource: do_facets/rdata_files - - facets_seg: - type: File[]? - outputSource: do_facets/seg_files - -steps: - do_snp_pileup: - run: ../CommandLineTools/snp-pileup_0.1.1/htstools-0.1.1.cwl - in: - vcf_file: facets_vcf - bam_normal: bam_normal - bam_tumor: bam_tumor - output_file: - valueFrom: ${ return inputs.bam_normal.basename.replace(".bam", "") + "_" + inputs.bam_tumor.basename.replace(".bam", "") + ".dat.gz"; } - out: [ output_file ] - - do_facets: - run: ../CommandLineTools/facets_1.5.6/facets.doFacets-1.5.6.cwl - in: - genome: - valueFrom: ${ return "hg19"; } - counts_file: do_snp_pileup/output_file - TAG: - valueFrom: ${ return inputs.counts_file.basename.replace(".dat.gz", ""); } - tumor_id: tumor_sample_name - directory: - valueFrom: ${ return "."; } - out: [ png_files, txt_files_purity, txt_files_hisens, out_files, rdata_files, seg_files ] diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..d62ed77c --- /dev/null +++ b/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '1.1.1' diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index e9ac545e..0f31e4d4 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -184,7 +184,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.1.0' + dockerPull: 'ghcr.io/msk-access/abra2:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/abra2_2.17/container/Dockerfile b/abra2_2.17/container/Dockerfile index 9f1bc82f..e7c64df9 100644 --- a/abra2_2.17/container/Dockerfile +++ b/abra2_2.17/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ - org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/" + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" @@ -37,4 +37,4 @@ RUN apt-get update && \ RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ - cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar \ No newline at end of file + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl index 8c9be47a..1af3c194 100644 --- a/abra2_2.19/abra2_2.19.cwl +++ b/abra2_2.19/abra2_2.19.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: abra2_2.19 + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_19 baseCommand: - java inputs: @@ -173,7 +174,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.2.0' + dockerPull: 'aphoid/abra2:2.19' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -193,6 +194,3 @@ requirements: - class: 'doap:Version' 'doap:name': abra2 'doap:revision': 2.19 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl new file mode 100644 index 00000000..e24cb107 --- /dev/null +++ b/abra2_2.22/abra2_2.22.cwl @@ -0,0 +1,246 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_22 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input_bam + type: + - File + - type: array + items: File + inputBinding: + position: 0 + prefix: '--in' + doc: Required list of input sam or bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: working_directory + type: string? + doc: Set the temp directory (overrides java.io.tmpdir) + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '--ref' + doc: Genome reference location + secondaryFiles: + - .fai + - id: targets + type: File + inputBinding: + position: 0 + prefix: '--targets' + - id: kmer_size + type: string? + inputBinding: + position: 0 + prefix: '--kmer' + doc: >- + Optional assembly kmer size(delimit with commas if multiple sizes + specified) + - id: maximum_average_depth + type: int? + inputBinding: + position: 0 + prefix: '--mad' + doc: >- + Regions with average depth exceeding this value will be downsampled + (default: 1000) + - id: soft_clip_contig + type: string? + inputBinding: + position: 0 + prefix: '--sc' + doc: >- + Soft clip contig args + [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len] + (default:16,13,80,15) + - id: maximum_mixmatch_rate + type: float? + inputBinding: + position: 0 + prefix: '--mmr' + doc: >- + Max allowed mismatch rate when mapping reads back to contigs (default: + 0.05) + - id: scoring_gap_alignments + type: string? + inputBinding: + position: 0 + prefix: '--sga' + doc: >- + Scoring used for contig alignments(match, + mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1) + - id: contig_anchor + type: string? + inputBinding: + position: 0 + prefix: '--ca' + doc: >- + Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge] + (default:10,2) + - id: window_size + type: string? + inputBinding: + position: 0 + prefix: '--ws' + doc: |- + Processing window size and overlap + (size,overlap) (default: 400,200) + - id: consensus_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--cons' + doc: Use positional consensus sequence when aligning high quality soft clipping + - id: output_bams + type: + - string + - type: array + items: string + inputBinding: + position: 0 + prefix: '--out' + doc: Required list of output sam or bam file (s) separated by comma + - id: ignore_bad_assembly + type: boolean? + inputBinding: + position: 0 + prefix: '--ignore-bad-assembly' + doc: Use this option to avoid parsing errors for corrupted assemblies + - id: bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--index' + doc: >- + Enable BAM index generation when outputting sorted alignments (may require + additonal memory) + - id: input_vcf + type: File? + inputBinding: + position: 0 + prefix: '--in-vcf' + doc: >- + VCF containing known (or suspected) variant sites. Very large files + should be avoided. + - id: no_edge_complex_indel + type: boolean? + inputBinding: + position: 0 + prefix: '--no-edge-ci' + doc: Prevent output of complex indels at read start or read end + - id: no_sort + type: boolean? + inputBinding: + position: 0 + prefix: '--nosort' + doc: Do not attempt to sort final output +outputs: + - id: abra_realigned_bam + type: + - 'null' + - File + - type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.output_bams + } + secondaryFiles: + - ^.bai +label: abra2_2.22 +arguments: + - position: 0 + valueFrom: |- + ${ + if (inputs.memory_per_job && inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (!inputs.memory_per_job && inputs.memory_overhead) { + + return "-Xmx20G" + } + else { + + return "-Xmx20G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/abra2.jar + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } + - position: 0 + prefix: '--tmpdir' + valueFrom: |- + ${ + if(inputs.working_directory) + return inputs.working_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 60000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/abra2:2.22' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': abra2 + 'doap:revision': 2.22 diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile new file mode 100644 index 00000000..0ec95e41 --- /dev/null +++ b/abra2_2.22/container/Dockerfile @@ -0,0 +1,40 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG JAVA_VERSION=8 +ARG ABRA2_VERSION=2.22 + +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" + +LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ + chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar diff --git a/abra2_2.22/example_inputs.yaml b/abra2_2.22/example_inputs.yaml new file mode 100644 index 00000000..928aafd8 --- /dev/null +++ b/abra2_2.22/example_inputs.yaml @@ -0,0 +1,30 @@ +bam_index: true +no_edge_complex_indel: true +consensus_sequence: +contig_anchor: +ignore_bad_assembly: +input_bam: + class: File + path: "path/to/alignment.bam" +input_vcf: +kmer_size: +maximum_average_depth: +maximum_mixmatch_rate: +memory_overhead: +memory_per_job: +no_sort: +number_of_threads: +output_bam: name_of_realigned_abra.bam +path_to_abra: +reference_fasta: + class: File + path: "/path/to/reference.fasta" +scoring_gap_alignments: +soft_clip_contig: +targets: + class: File + metadata: {} + path: "/path/to/target.bed" + secondaryFiles: [] +window_size: +working_directory: diff --git a/access_utils/0.1.1/README.md b/access_utils/0.1.1/README.md new file mode 100644 index 00000000..45bd9f8b --- /dev/null +++ b/access_utils/0.1.1/README.md @@ -0,0 +1,32 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| access_utils | 0.1.1 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner general_stats_parse.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner general_stats_parse.cwl -h +``` diff --git a/access_utils/0.1.1/example_inputs.yaml b/access_utils/0.1.1/example_inputs.yaml new file mode 100644 index 00000000..27fccad3 --- /dev/null +++ b/access_utils/0.1.1/example_inputs.yaml @@ -0,0 +1,2 @@ +dir: /path/to/sample_info_directory +samples-json: /path/to/sample_meta.json diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl new file mode 100644 index 00000000..4c39e5db --- /dev/null +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -0,0 +1,61 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: general_stats_parse +baseCommand: + - general_stats_parse.py +arguments: + - --dir + - . +inputs: + - id: directory + type: Directory + doc: Directory containing results. + - id: samples-json + type: File + inputBinding: + prefix: '--samples-json' + doc: Sample JSON file. + - id: config + type: File? + inputBinding: + prefix: '--config' + doc: MultQC config file. +outputs: + - id: aggregate_parsed_stats + label: aggregate_parsed_stats + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = "aggregate_qc_stats"; + return self[0] + } +label: general_stats_parse +requirements: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.directory) + writable: true + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl new file mode 100644 index 00000000..fe2317e9 --- /dev/null +++ b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl @@ -0,0 +1,57 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: tabix +baseCommand: + - bgzip +inputs: + - id: input + type: File + inputBinding: + position: 0 + doc: input VCF file +outputs: + - id: zippedVcf + type: File? + outputBinding: + glob: >- + ${ if(inputs.output_file_name){ return + inputs.output_file_name } else { return + inputs.input.basename.replace(/.vcf/,'.vcf.gz') } } +label: tabix +arguments: + - position: 0 + prefix: '' + valueFrom: '-c' +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InlineJavascriptRequirement +stdout: >- + ${ if (inputs.output_file_name) return inputs.output_file_name; + return inputs.input.basename.replace('.vcf','.vcf.gz'); } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bcftools + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl new file mode 100644 index 00000000..039078be --- /dev/null +++ b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl @@ -0,0 +1,50 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: tabix_1_3_1 +baseCommand: + - tabix +inputs: + - 'sbg:toolDefaultValue': vcf + id: preset + type: string? + inputBinding: + position: 0 + prefix: '-p' + doc: 'gff, bed, sam, vcf' + - id: file + type: File + inputBinding: + position: 1 + doc: Input bgziped vcf file + 'sbg:fileTypes': .gz +outputs: [] +label: tabix_1.3.1 +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/tabix:1.3.1' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': tabix + 'doap:revision': 1.3.1 diff --git a/bcftools_1.15.1/example_input_bgzip.yaml b/bcftools_1.15.1/example_input_bgzip.yaml new file mode 100644 index 00000000..05320852 --- /dev/null +++ b/bcftools_1.15.1/example_input_bgzip.yaml @@ -0,0 +1,3 @@ +input: + class: File + path: /Users/ksivaprakasam/Documents/vep.vcf diff --git a/bcftools_1.15.1/example_input_tabix.yaml b/bcftools_1.15.1/example_input_tabix.yaml new file mode 100644 index 00000000..12f3bdad --- /dev/null +++ b/bcftools_1.15.1/example_input_tabix.yaml @@ -0,0 +1,6 @@ +file: + class: File + metadata: {} + path: ../../../vep.vcf.gz + secondaryFiles: [] +preset: vcf diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md new file mode 100644 index 00000000..770e8751 --- /dev/null +++ b/bcftools_1.6/README.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running bcftools v1.6 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir bcftools_toil_log +> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --compact_PS Do not output PS tag at each site, only at the start + of a new phase set block. + --ligate Ligate phased VCFs by matching phase at overlapping + haplotypes + --list LIST Read the list of files from a file. + --min_PQ MIN_PQ Break phase set if phasing quality is lower than + [30] + --naive Concatenate BCF files without recompression + (dangerous, use with caution) + --no_version do not append version and command line to the header + --output OUTPUT Write output to a file [standard output] + --output_type OUTPUT_TYPE + b - compressed BCF, u - uncompressed BCF, z + - compressed VCF, v - uncompressed VCF [v] + --regions REGIONS Restrict to comma-separated list of regions + --regions_file REGIONS_FILE + Restrict to regions listed in a file + --remove_duplicates Alias for -d none + --rm_dups RM_DUPS Output duplicate records present in multiple + files only once - + --threads THREADS Number of extra output compression threads [0] + --vcf_files_csi VCF_FILES_CSI + Array of vcf files to be concatenated into one vcf + --vcf_files_tbi VCF_FILES_TBI + Array of vcf files to be concatenated into one vcf + +``` diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl new file mode 100644 index 00000000..98f4122c --- /dev/null +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -0,0 +1,180 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: bcftools_concat_v1_6 + +baseCommand: + - /usr/bin/bcftools + - concat + +doc: | + concatenate VCF/BCF files from the same set of samples + +inputs: + + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + + threads: + type: ["null", string] + doc: Number of extra output compression threads [0] + inputBinding: + prefix: --threads + + compact_PS: + type: ["null", boolean] + default: false + doc: Do not output PS tag at each site, only at the start of a new phase set block. + inputBinding: + prefix: --compact-PS + + remove_duplicates: + type: ["null", boolean] + default: false + doc: Alias for -d none + inputBinding: + prefix: --remove-duplicates + + ligate: + type: ["null", boolean] + default: false + doc: Ligate phased VCFs by matching phase at overlapping haplotypes + inputBinding: + prefix: --ligate + + output_type: + type: ["null", string] + doc: b - compressed BCF, u - uncompressed BCF, z - compressed VCF, v - uncompressed VCF [v] + inputBinding: + prefix: --output-type + + no_version: + type: ["null", boolean] + default: false + doc: do not append version and command line to the header + inputBinding: + prefix: --no-version + + naive: + type: ["null", boolean] + default: false + doc: Concatenate BCF files without recompression (dangerous, use with caution) + inputBinding: + prefix: --naive + + allow_overlaps: + type: ["null", boolean] + default: false + doc: First coordinate of the next file can precede last record of the current file. + inputBinding: + prefix: --allow-overlaps + + min_PQ: + type: ["null", string] + doc: Break phase set if phasing quality is lower than [30] + inputBinding: + prefix: --min-PQ + + regions_file: + type: ["null", string] + doc: Restrict to regions listed in a file + inputBinding: + prefix: --regions-file + + regions: + type: ["null", string] + doc: Restrict to comma-separated list of regions + inputBinding: + prefix: --regions + + rm_dups: + type: ["null", string] + doc: Output duplicate records present in multiple files only once - + inputBinding: + prefix: --rm-dups + + output: + type: string + doc: Write output to a file [standard output] + default: "bcftools_concat.vcf" + inputBinding: + prefix: --output + + list: + type: ['null', string] + doc: Read the list of files from a file. + inputBinding: + prefix: --file-list + + vcf_files_tbi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - .tbi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + + vcf_files_csi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - ^.bcf.csi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + +outputs: + bcftools_concat_vcf_output_file: + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) + return inputs.output; + return null; + } + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 1 + DockerRequirement: + dockerPull: ghcr.io/msk-access/bcftools:1.6 + + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: bcftools + doap:revision: 1.6 diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml new file mode 100644 index 00000000..0512753d --- /dev/null +++ b/bcftools_1.6/example_inputs.yaml @@ -0,0 +1,9 @@ +vcf_files_tbi: + class: File + path: /path/to/vcf/and/tbi/files +tumor_sample_name: tumor_sample_name +normal_sample_name: normal_sample_name +allow_overlaps: allow_overlaps_boolean +rm_dups: rm_dups_str +output_type: output_type_str +output: output_file_name diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md deleted file mode 100644 index 75de8a57..00000000 --- a/bedtools_genomecov_v2.28.0_cv2/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# CWL and Dockerfile for running Bedtools GenomeCov - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help - -usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT - --output_file_name OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--option_bedgraph] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT The input file can be in BAM format (Note: BAM must be - sorted by position) - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --option_bedgraph option flag parameter to choose output file format. - -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl index 43f392dd..396931b9 100644 --- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl +++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl @@ -39,7 +39,7 @@ inputs: option flag parameter to choose output file format. -bg refers to bedgraph format outputs: - - id: output_file + - id: bedtools_genomecove_bedgraph type: File outputBinding: glob: |- @@ -55,7 +55,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md deleted file mode 100644 index 960664db..00000000 --- a/bedtools_merge_v2.28.0_cv2/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# CWL and Dockerfile for running Bedtools Merge - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help - -usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name - OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--distance_between_features DISTANCE_BETWEEN_FEATURES] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BEDgraph format file generated from Bedtools Genomecov - module - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --distance_between_features DISTANCE_BETWEEN_FEATURES - Maximum distance between features allowed for features - to be merged. \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index 21e7cc7b..e6259ff3 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -33,8 +33,8 @@ inputs: shellQuote: false doc: Maximum distance between features allowed for features to be merged. outputs: - - id: output_file - type: File? + - id: bedtools_merge_bed + type: File outputBinding: glob: |- ${ @@ -49,7 +49,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl new file mode 100644 index 00000000..eefa400e --- /dev/null +++ b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl @@ -0,0 +1,54 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: sortvcf +baseCommand: + - sortBed +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-i' + doc: input VCF file +outputs: + - id: sorted.vcf + type: File? + outputBinding: + glob: >- + ${ if(inputs.output_file_name){ return + inputs.output_file_name } else { return + inputs.input.basename.replace(/.vcf/,'.sorted.vcf') } } +label: sortvcf +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' + - class: InlineJavascriptRequirement +stdout: >- + ${ if (inputs.output_file_name) return inputs.output_file_name; + return inputs.input.basename.replace('.vcf','.sorted.vcf'); } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sortbed + 'doap:revision': 2.28.0 diff --git a/bedtools_sortvcf_2.28.0/example_input.yaml b/bedtools_sortvcf_2.28.0/example_input.yaml new file mode 100644 index 00000000..1b65b0c9 --- /dev/null +++ b/bedtools_sortvcf_2.28.0/example_input.yaml @@ -0,0 +1 @@ +input: null diff --git a/biometrics_extract/0.2.13/biometrics_extract.cwl b/biometrics_extract/0.2.13/biometrics_extract.cwl new file mode 100644 index 00000000..19ee1121 --- /dev/null +++ b/biometrics_extract/0.2.13/biometrics_extract.cwl @@ -0,0 +1,139 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract_0_2_13 +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: File + inputBinding: + position: 0 + prefix: '--sample-bam' + doc: BAM file. + secondaryFiles: + - ^.bai + - id: sample_sex + type: string? + inputBinding: + position: 0 + prefix: '--sample-sex' + doc: Expected sample sex (i.e. M or F). + - id: sample_group + type: string? + inputBinding: + position: 0 + prefix: '--sample-group' + doc: The sample group (e.g. the sample patient ID). + - id: sample_name + type: string + inputBinding: + position: 0 + prefix: '--sample-name' + doc: >- + Sample name. If not specified, sample name is automatically figured out + from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: '--fafile' + doc: Path to reference fasta. + secondaryFiles: + - ^.fasta.fai + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: '--vcf' + doc: VCF file containing the SNPs to be queried. + - id: bed_file + type: File? + inputBinding: + position: 0 + prefix: '--bed' + doc: BED file containing the intervals to be queried. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 1 + id: min_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-mapping-quality' + doc: Minimum mapping quality of reads to be used for pileup. + - default: 1 + id: min_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-base-quality' + doc: Minimum base quality of reads to be used for pileup. + - default: 10 + id: min_coverage + type: int? + inputBinding: + position: 0 + prefix: '--min-coverage' + doc: Minimum coverage to count a site. + - default: 0.1 + id: min_homozygous_thresh + type: float? + inputBinding: + position: 0 + prefix: '--min-homozygous-thresh' + doc: Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: '--default-genotype' + doc: Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: File + outputBinding: + glob: |- + ${ + if (inputs.database) { + return inputs.database + '/' + inputs.sample_name + '.pickle'; + } else { + return inputs.sample_name + '.pickle'; + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_extract/0.2.13/example_inputs.yaml b/biometrics_extract/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..566b496a --- /dev/null +++ b/biometrics_extract/0.2.13/example_inputs.yaml @@ -0,0 +1,24 @@ +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" +fafile: + class: File + path: /path/to/fasta +sample_bam: + - class: File + path: /path/to/bam +bed_file: null +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_extract/0.2.5/biometrics_extract.cwl b/biometrics_extract/0.2.5/biometrics_extract.cwl new file mode 100644 index 00000000..0abf2aec --- /dev/null +++ b/biometrics_extract/0.2.5/biometrics_extract.cwl @@ -0,0 +1,172 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --sample-bam + secondaryFiles: + - ^.bai + doc: >- + BAM file. + - id: sample_type + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-type + doc: >- + Sample types: Normal or Tumor. + - id: sample_sex + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-sex + doc: >- + Expected sample sex (i.e. M or F). + - id: sample_group + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-group + doc: >- + The sample group (e.g. the sample patient ID). + - id: sample_name + type: + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-name + doc: >- + Sample name. If not specified, sample name is automatically figured out from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: --fafile + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta. + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: --vcf + doc: >- + VCF file containing the SNPs to be queried. + - id: bed_file + type: File? + inputBinding: + position: 0 + prefix: --bed + doc: >- + BED file containing the intervals to be queried. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: min_mapping_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-mapping-quality + doc: >- + Minimum mapping quality of reads to be used for pileup. + - id: min_base_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-base-quality + doc: >- + Minimum base quality of reads to be used for pileup. + - id: min_coverage + type: int? + default: 10 + inputBinding: + position: 0 + prefix: --min-coverage + doc: >- + Minimum coverage to count a site. + - id: min_homozygous_thresh + type: float? + default: 0.1 + inputBinding: + position: 0 + prefix: --min-homozygous-thresh + doc: >- + Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: --default-genotype + doc: >- + Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: + type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.sample_name.map(val => { + if (inputs.database) { + return inputs.database + '/' + val + '.pk'; + } else { + return val + '.pk'; + } + }); + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_extract/0.2.5/example_inputs.yaml b/biometrics_extract/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..566b496a --- /dev/null +++ b/biometrics_extract/0.2.5/example_inputs.yaml @@ -0,0 +1,24 @@ +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" +fafile: + class: File + path: /path/to/fasta +sample_bam: + - class: File + path: /path/to/bam +bed_file: null +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md new file mode 100644 index 00000000..5687fef3 --- /dev/null +++ b/biometrics_extract/README.md @@ -0,0 +1,69 @@ +# CWL for running biometrics extract tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_extract.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_extract.cwl [-h] [--sample_bam SAMPLE_BAM] + [--sample_type SAMPLE_TYPE] + [--sample_sex SAMPLE_SEX] + [--sample_group SAMPLE_GROUP] + [--sample_name SAMPLE_NAME] --fafile + FAFILE --vcf_file VCF_FILE --bed_file + BED_FILE --database DATABASE + [--min_mapping_quality MIN_MAPPING_QUALITY] + [--min_base_quality MIN_BASE_QUALITY] + [--min_coverage MIN_COVERAGE] + [--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH] + [--default_genotype DEFAULT_GENOTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --sample_bam SAMPLE_BAM + BAM file. + --sample_type SAMPLE_TYPE + Sample types: Normal or Tumor. + --sample_sex SAMPLE_SEX + Expected sample sex (i.e. M or F). + --sample_group SAMPLE_GROUP + The sample group (e.g. the sample patient ID). + --sample_name SAMPLE_NAME + Sample name. If not specified, sample name is + automatically figured out from the BAM file. + --fafile FAFILE Path to reference fasta. + --vcf_file VCF_FILE VCF file containing the SNPs to be queried. + --bed_file BED_FILE BED file containing the intervals to be queried. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --min_mapping_quality MIN_MAPPING_QUALITY + Minimum mapping quality of reads to be used for + pileup. + --min_base_quality MIN_BASE_QUALITY + Minimum base quality of reads to be used for pileup. + --min_coverage MIN_COVERAGE + Minimum coverage to count a site. + --min_homozygous_thresh MIN_HOMOZYGOUS_THRESH + Minimum threshold to define homozygous. + --default_genotype DEFAULT_GENOTYPE + Default genotype if coverage is too low (options are + Het or Hom). +``` diff --git a/biometrics_genotype/0.2.13/biometrics_genotype.cwl b/biometrics_genotype/0.2.13/biometrics_genotype.cwl new file mode 100644 index 00000000..d4d1449a --- /dev/null +++ b/biometrics_genotype/0.2.13/biometrics_genotype.cwl @@ -0,0 +1,149 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_genotype_0_2_13 +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + type: array + items: File + inputBinding: + position: 0 + prefix: '--input' + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.05 + id: discordance_threshold + type: float? + inputBinding: + position: 0 + prefix: '--discordance-threshold' + doc: >- + Discordance values less than this are regarded as matching samples. + (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. + - default: 2 + id: threads + type: int? + inputBinding: + position: 0 + prefix: '--threads' + doc: Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_genotype/0.2.13/example_inputs.yaml b/biometrics_genotype/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..0bc68d94 --- /dev/null +++ b/biometrics_genotype/0.2.13/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_genotype/0.2.5/biometrics_genotype.cwl b/biometrics_genotype/0.2.5/biometrics_genotype.cwl new file mode 100644 index 00000000..51285328 --- /dev/null +++ b/biometrics_genotype/0.2.5/biometrics_genotype.cwl @@ -0,0 +1,145 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_genotype +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --input + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: discordance_threshold + type: float? + default: 0.05 + inputBinding: + position: 0 + prefix: --discordance-threshold + doc: >- + Discordance values less than this are regarded as matching samples. (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + - id: threads + type: int? + default: 2 + inputBinding: + position: 0 + prefix: --threads + doc: >- + Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_genotype/0.2.5/example_inputs.yaml b/biometrics_genotype/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..0bc68d94 --- /dev/null +++ b/biometrics_genotype/0.2.5/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md new file mode 100644 index 00000000..fd398b99 --- /dev/null +++ b/biometrics_genotype/README.md @@ -0,0 +1,53 @@ +# CWL for running biometrics genotype tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_genotype.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_genotype.cwl [-h] --input INPUT [--database DATABASE] + [--discordance_threshold DISCORDANCE_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --discordance_threshold DISCORDANCE_THRESHOLD + Discordance values less than this are regarded as + matching samples. (default: 0.05) + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. + --threads THREADS Number of threads to use. +``` diff --git a/biometrics_major/0.2.13/biometrics_major.cwl b/biometrics_major/0.2.13/biometrics_major.cwl new file mode 100644 index 00000000..b5a42a9e --- /dev/null +++ b/biometrics_major/0.2.13/biometrics_major.cwl @@ -0,0 +1,123 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major_0_2_13 +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.6 + id: major_threshold + type: float? + inputBinding: + position: 0 + prefix: '--major-threshold' + doc: Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_major/0.2.13/example_inputs.yaml b/biometrics_major/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..da03de55 --- /dev/null +++ b/biometrics_major/0.2.13/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_major/0.2.5/biometrics_major.cwl b/biometrics_major/0.2.5/biometrics_major.cwl new file mode 100644 index 00000000..217c9d96 --- /dev/null +++ b/biometrics_major/0.2.5/biometrics_major.cwl @@ -0,0 +1,120 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: major_threshold + type: float? + default: 0.6 + inputBinding: + position: 0 + prefix: --major-threshold + doc: >- + Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_major/0.2.5/example_inputs.yaml b/biometrics_major/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..da03de55 --- /dev/null +++ b/biometrics_major/0.2.5/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_major/README.md b/biometrics_major/README.md new file mode 100644 index 00000000..10fa476e --- /dev/null +++ b/biometrics_major/README.md @@ -0,0 +1,51 @@ +# CWL for running biometrics major tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_major.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_major.cwl [-h] --input INPUT [--database DATABASE] + [--major_threshold MAJOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --major_threshold MAJOR_THRESHOLD + Major contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_minor/0.2.13/biometrics_minor.cwl b/biometrics_minor/0.2.13/biometrics_minor.cwl new file mode 100644 index 00000000..2535eb6b --- /dev/null +++ b/biometrics_minor/0.2.13/biometrics_minor.cwl @@ -0,0 +1,130 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor_0_2_13 +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.002 + id: minor_threshold + type: float? + inputBinding: + position: 0 + prefix: '--minor-threshold' + doc: Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_minor/0.2.13/example_inputs.yaml b/biometrics_minor/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..bddb4c72 --- /dev/null +++ b/biometrics_minor/0.2.13/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor/0.2.5/biometrics_minor.cwl b/biometrics_minor/0.2.5/biometrics_minor.cwl new file mode 100644 index 00000000..dc0410cb --- /dev/null +++ b/biometrics_minor/0.2.5/biometrics_minor.cwl @@ -0,0 +1,127 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: minor_threshold + type: float? + default: 0.002 + inputBinding: + position: 0 + prefix: --minor-threshold + doc: >- + Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_minor/0.2.5/example_inputs.yaml b/biometrics_minor/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..bddb4c72 --- /dev/null +++ b/biometrics_minor/0.2.5/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md new file mode 100644 index 00000000..af94ea40 --- /dev/null +++ b/biometrics_minor/README.md @@ -0,0 +1,51 @@ +# CWL for running biometrics minor tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_minor.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_minor.cwl [-h] --input INPUT [--database DATABASE] + [--minor_threshold MINOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --minor_threshold MINOR_THRESHOLD + Minor contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl new file mode 100644 index 00000000..e8755e55 --- /dev/null +++ b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl @@ -0,0 +1,110 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch_0_2_13 +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 50 + id: coverage_threshold + type: int? + inputBinding: + position: 0 + prefix: '--coverage-threshold' + doc: Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_sexmismatch/0.2.13/example_inputs.yaml b/biometrics_sexmismatch/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..60832e43 --- /dev/null +++ b/biometrics_sexmismatch/0.2.13/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl new file mode 100644 index 00000000..bae28a19 --- /dev/null +++ b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl @@ -0,0 +1,106 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: coverage_threshold + type: int? + default: 50 + inputBinding: + position: 0 + prefix: --coverage-threshold + doc: >- + Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_sexmismatch/0.2.5/example_inputs.yaml b/biometrics_sexmismatch/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..60832e43 --- /dev/null +++ b/biometrics_sexmismatch/0.2.5/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md new file mode 100644 index 00000000..81b02145 --- /dev/null +++ b/biometrics_sexmismatch/README.md @@ -0,0 +1,52 @@ +# CWL for running biometrics sexmismatch tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_sexmismatch.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_sexmismatch.cwl [-h] --input INPUT + [--database DATABASE] + [--coverage_threshold COVERAGE_THRESHOLD] + [--prefix PREFIX] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --coverage_threshold COVERAGE_THRESHOLD + Samples with Y chromosome above this value will be + considered male. + --prefix PREFIX Output file prefix. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md new file mode 100644 index 00000000..fd6785d6 --- /dev/null +++ b/bwa_mem_0.7.17/README.md @@ -0,0 +1,136 @@ +# CWL and Dockerfile for running BWA MEM + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 | + +[![](https://images.microbadger.com/badges/version/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: bwa_mem_0.7.17.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --reads + READS --reference REFERENCE [-A A] [-B B] [-C] + [-E E] [-L L] [-M] [-O O] [-P] [-S] [-T T] [-U U] + [-a] [-c C] [-d D] [-k K] [-K K] [--output OUTPUT] + [-p] [-r R] [-v V] [-w W] [-y Y] [-D D] [-W W] + [-m M] [-e] [-x X] [-j J] [--he HE] [-V] [-Y] [-I I] + [-R R] [--sample_id SAMPLE_ID] [--lane_id LANE_ID] + [--platform PLATFORM] + [--platform_unit PLATFORM_UNIT] + [--center_name CENTER_NAME] + [--library_id LIBRARY_ID] + [job_order] + +bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] +[-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] +[-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] +db.prefix reads.fq [mates.fq] Align 70bp-1Mbp query sequences with the BWA-MEM +algorithm. Briefly, the algorithm works by seeding alignments with maximal +exact matches (MEMs) and then extending seeds with the affine-gap Smith- +Waterman algorithm (SW). If mates.fq file is absent and option -p is not set, +this command regards input reads are single-end. If mates.fq is present, this +command assumes the i-th read in reads.fq and the i-th read in mates.fq +constitute a read pair. If -p is used, the command assumes the 2i-th and the +(2i+1)-th read in reads.fq constitute a read pair (such input file is said to +be interleaved). In this case, mates.fq is ignored. In the paired-end mode, +the mem command will infer the read orientation and the insert size +distribution from a batch of reads. The BWA-MEM algorithm performs local +alignment. It may produce multiple primary alignments for different part of a +query sequence. This is a crucial feature for long sequences. However, some +tools such as Picard’s markDuplicates does not work with split alignments. One +may consider to use option -M to flag shorter split hits as secondary. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --reads READS + --reference REFERENCE + -A A score for a sequence match, which scales options + -TdBOELU unless overridden [1] + -B B penalty for a mismatch [4] + -C append FASTA/FASTQ comment to SAM output + -E E gap extension penalty; a gap of size k cost '{-O} + + {-E}*k' [1,1] + -L L penalty for 5'- and 3'-end clipping [5,5] + -M + -O O gap open penalties for deletions and insertions [6,6] + -P skip pairing; mate rescue performed unless -S also in + use + -S skip mate rescue + -T T minimum score to output [30] + -U U penalty for an unpaired read pair [17] + -a output all alignments for SE or unpaired PE + -c C skip seeds with more than INT occurrences [500] + -d D off-diagonal X-dropoff [100] + -k K minimum seed length [19] + -K K process INT input bases in each batch regardless of + nThreads (for reproducibility) [] + --output OUTPUT + -p smart pairing (ignoring in2.fq) + -r R look for internal seeds inside a seed longer than {-k} + * FLOAT [1.5] + -v V verbosity level: 1=error, 2=warning, 3=message, + 4+=debugging [3] + -w W band width for banded alignment [100] + -y Y seed occurrence for the 3rd round seeding [20] + -D D drop chains shorter than FLOAT fraction of the longest + overlapping chain [0.50] + -W W discard a chain if seeded bases shorter than INT [0] + -m M perform at most INT rounds of mate rescues for each + read [50] + -e + -x X read type. Setting -x changes multiple parameters + unless overridden [null] pacbio: -k17 -W40 -r10 -A1 + -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: -k14 -W20 + -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to + ref) intractg: -B9 -O16 -L5 (intra-species contigs to + ref) + -j J treat ALT contigs as part of the primary assembly + (i.e. ignore .alt file) + --he HE if there are 80% of the max + score, output all in XA [5,200] + -V output the reference FASTA header in the XR tag + -Y use soft clipping for supplementary alignments + -I I + -R R STR read group header line such as '@RG\tID -foo\tSM + -bar' [null] + --sample_id SAMPLE_ID + --lane_id LANE_ID + --platform PLATFORM + --platform_unit PLATFORM_UNIT + --center_name CENTER_NAME + --library_id LIBRARY_ID +``` diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl new file mode 100644 index 00000000..5e7e55dd --- /dev/null +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -0,0 +1,351 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - bwa + - mem +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: reads + type: 'File[]' + inputBinding: + position: 3 + - id: reference + type: File + inputBinding: + position: 2 + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - id: A + type: int? + inputBinding: + position: 0 + prefix: '-A' + doc: >- + score for a sequence match, which scales options -TdBOELU unless + overridden [1] + - id: B + type: int? + inputBinding: + position: 0 + prefix: '-B' + doc: 'penalty for a mismatch [4]' + - id: C + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + doc: append FASTA/FASTQ comment to SAM output + - id: E + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-E' + itemSeparator: ',' + doc: 'gap extension penalty; a gap of size k cost ''{-O} + {-E}*k'' [1,1]' + - id: L + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-L' + itemSeparator: ',' + doc: 'penalty for 5''- and 3''-end clipping [5,5]' + - id: M + type: boolean? + inputBinding: + position: 0 + prefix: '-M' + - id: O + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-O' + itemSeparator: ',' + doc: 'gap open penalties for deletions and insertions [6,6]' + - id: P + type: boolean? + inputBinding: + position: 0 + prefix: '-P' + doc: skip pairing; mate rescue performed unless -S also in use + - id: S + type: boolean? + inputBinding: + position: 0 + prefix: '-S' + doc: skip mate rescue + - id: T + type: int? + inputBinding: + position: 0 + prefix: '-T' + doc: 'minimum score to output [30]' + - id: U + type: int? + inputBinding: + position: 0 + prefix: '-U' + doc: 'penalty for an unpaired read pair [17]' + - id: a + type: boolean? + inputBinding: + position: 0 + prefix: '-a' + doc: output all alignments for SE or unpaired PE + - id: c + type: int? + inputBinding: + position: 0 + prefix: '-c' + doc: 'skip seeds with more than INT occurrences [500]' + - id: d + type: int? + inputBinding: + position: 0 + prefix: '-d' + doc: 'off-diagonal X-dropoff [100]' + - id: k + type: int? + inputBinding: + position: 0 + prefix: '-k' + doc: 'minimum seed length [19]' + - id: K + type: int? + inputBinding: + position: 0 + prefix: '-K' + doc: >- + process INT input bases in each batch regardless of nThreads (for + reproducibility) [] + - id: output + type: string? + - id: p + type: boolean? + inputBinding: + position: 0 + prefix: '-p' + doc: smart pairing (ignoring in2.fq) + - id: r + type: float? + inputBinding: + position: 0 + prefix: '-r' + doc: 'look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]' + - id: v + type: int? + inputBinding: + position: 0 + prefix: '-v' + doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]' + - id: w + type: int? + inputBinding: + position: 0 + prefix: '-w' + doc: 'band width for banded alignment [100]' + - id: 'y' + type: int? + inputBinding: + position: 0 + prefix: '-y' + doc: 'seed occurrence for the 3rd round seeding [20]' + - id: D + type: float? + inputBinding: + position: 0 + prefix: '-D' + doc: >- + drop chains shorter than FLOAT fraction of the longest overlapping chain + [0.50] + - id: W + type: int? + inputBinding: + position: 0 + prefix: '-W' + doc: 'discard a chain if seeded bases shorter than INT [0]' + - id: m + type: int? + inputBinding: + position: 0 + prefix: '-m' + doc: 'perform at most INT rounds of mate rescues for each read [50]' + - id: e + type: boolean? + inputBinding: + position: 0 + prefix: '-e' + - id: x + type: string? + inputBinding: + position: 0 + prefix: '-x' + doc: >- + read type. Setting -x changes multiple parameters unless overridden [null] + pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: + -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) + intractg: -B9 -O16 -L5 (intra-species contigs to ref) + - id: H + type: boolean? + inputBinding: + position: 0 + prefix: '-H' + doc: >- + Use hard clipping ’H’ in the SAM output. This option may dramatically + reduce the redundancy of output when mapping long contig or BAC sequences + - id: j + type: File? + inputBinding: + position: 0 + prefix: '-j' + doc: >- + treat ALT contigs as part of the primary assembly (i.e. ignore + .alt file) + - id: he + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-h' + itemSeparator: ',' + doc: >- + if there are 80% of the max score, output all in XA + [5,200] + - id: V + type: boolean? + inputBinding: + position: 0 + prefix: '-V' + doc: output the reference FASTA header in the XR tag + - id: 'Y' + type: boolean? + inputBinding: + position: 0 + prefix: '-Y' + doc: use soft clipping for supplementary alignments + - id: I + type: string? + inputBinding: + position: 0 + prefix: '-M' + - id: R + type: string? + doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' + - id: sample_id + type: string? + - id: lane_id + type: string? + - id: platform + type: string? + - id: platform_unit + type: string? + - id: center_name + type: string? + - id: library_id + type: string? +outputs: + - id: bwa_mem_output_sam + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } +doc: >- + bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] + [-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] + [-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] + db.prefix reads.fq [mates.fq] + + Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the + algorithm works by seeding alignments with maximal exact matches (MEMs) and + then extending seeds with the affine-gap Smith-Waterman algorithm (SW). + + + If mates.fq file is absent and option -p is not set, this command regards + input reads are single-end. If mates.fq is present, this command assumes the + i-th read in reads.fq and the i-th read in mates.fq constitute a read pair. If + -p is used, the command assumes the 2i-th and the (2i+1)-th read in reads.fq + constitute a read pair (such input file is said to be interleaved). In this + case, mates.fq is ignored. In the paired-end mode, the mem command will infer + the read orientation and the insert size distribution from a batch of reads. + + + The BWA-MEM algorithm performs local alignment. It may produce multiple + primary alignments for different part of a query sequence. This is a crucial + feature for long sequences. However, some tools such as Picard’s + markDuplicates does not work with split alignments. One may consider to use + option -M to flag shorter split hits as secondary. +label: bwa_mem_0.7.17 +arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) + - position: 0 + prefix: '-R' + valueFrom: |- + ${ + if (inputs.sample_id) { + var rg_id = "@RG\\tID:" + inputs.sample_id + "\\tSM:" + inputs.sample_id; + if (inputs.library_id) { + rg_id += "\\tLB:" + inputs.library_id; + } if (inputs.platform) { + rg_id += "\\tPL:" + inputs.platform; + } if (inputs.platform_unit) { + rg_id += "\\tPU:" + inputs.platform_unit; + } if (inputs.center_name) { + rg_id += "\\tCN:" + inputs.center_name; + } + return rg_id + } else { + return inputs.R + } + } +requirements: + - class: ResourceRequirement + ramMin: 34000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bwa:0.7.17' + - class: InlineJavascriptRequirement +stdout: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bwa + 'doap:revision': 0.7.17 diff --git a/bwa_mem_0.7.17/container/Dockerfile b/bwa_mem_0.7.17/container/Dockerfile new file mode 100644 index 00000000..49cc6831 --- /dev/null +++ b/bwa_mem_0.7.17/container/Dockerfile @@ -0,0 +1,23 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Ian Johnson (johnsoni@mskcc.org)" \ + version.image="0.1.0" \ + version.bwa="0.7.17" \ + version.ubuntu="16.04" \ + source.bwa="https://github.com/lh3/bwa/releases/tag/v0.7.17" + +ENV BWA_VERSION 0.7.17 + +RUN apt-get -y update \ + # install build tools and dependencies + && apt-get -y install build-essential zlib1g-dev wget unzip \ + # download and unzip bwa + && cd /tmp && wget "https://github.com/lh3/bwa/archive/v${BWA_VERSION}.zip" \ + && unzip "v${BWA_VERSION}.zip" \ + # build + && cd "/tmp/bwa-${BWA_VERSION}" \ + && make \ + # move binaries to /usr/bin + && mv "/tmp/bwa-${BWA_VERSION}/bwa" /usr/bin \ + # clean up + && rm -rf /tmp/* diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml new file mode 100644 index 00000000..74683384 --- /dev/null +++ b/bwa_mem_0.7.17/example_inputs.yaml @@ -0,0 +1,9 @@ +reads: +- class: File + path: "path/to/fastq_R1.fastq" +- class: File + path: "path/to/fastq_R2.fastq" +reference: + class: File + path: "/path/to/reference.fasta" +sample_id: test_sample_id diff --git a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl index 8741d4fd..41701c6c 100644 --- a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl +++ b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl @@ -4,7 +4,6 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' baseCommand: - bwa - mem diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py new file mode 100644 index 00000000..30f6dd27 --- /dev/null +++ b/cwl_commandlinetools/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '1.2.0' diff --git a/cwl_commandlinetools/cwl_commandlinetools.py b/cwl_commandlinetools/cwl_commandlinetools.py new file mode 100644 index 00000000..7fbbae4f --- /dev/null +++ b/cwl_commandlinetools/cwl_commandlinetools.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Main module.""" diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md new file mode 100644 index 00000000..08dcf390 --- /dev/null +++ b/delly_0.9.1/README.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/delly_0.9.1/container/Dockerfile b/delly_0.9.1/container/Dockerfile new file mode 100644 index 00000000..6b66905c --- /dev/null +++ b/delly_0.9.1/container/Dockerfile @@ -0,0 +1,75 @@ +# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile +# modify for additional functionality +################## Base Image ########## +FROM ubuntu:18.04 +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG DELLY_VERSION +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Eric Buehlere (buehlere@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.delly=${DELLY_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \ + DELLY version ${DELLY_VERSION}" + + +################## INSTALL ########################## +RUN apt-get update && apt-get install -y \ + autoconf \ + build-essential \ + cmake \ + g++ \ + gfortran \ + git \ + libcurl4-gnutls-dev \ + hdf5-tools \ + libboost-date-time-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + libboost-iostreams-dev \ + libbz2-dev \ + libhdf5-dev \ + libncurses-dev \ + liblzma-dev \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# set environment +ENV BOOST_ROOT /usr + +# install delly +RUN cd /opt \ + && git clone --recursive https://github.com/dellytools/delly.git \ + && cd /opt/delly/ \ + && git checkout ${DELLY_VERSION} \ + && make STATIC=1 all \ + && make install + + +# Multi-stage build +FROM alpine:latest +RUN mkdir -p /opt/delly/bin +WORKDIR /opt/delly/bin +COPY --from=0 /opt/delly/bin/delly . + +# Workdir +WORKDIR /root/ + +# Add Delly to PATH +ENV PATH="/opt/delly/bin:${PATH}" + +# by default /bin/sh +CMD ["/bin/sh"] diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl new file mode 100644 index 00000000..9c6f82dc --- /dev/null +++ b/delly_0.9.1/delly_0.9.1.cwl @@ -0,0 +1,129 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - delly +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: out_file + type: string? + inputBinding: + position: 0 + prefix: '-o' + shellQuote: false + doc: The name to be used for the output bcf file + - id: reference_genome + type: File + inputBinding: + position: 0 + prefix: '-g' + shellQuote: false + doc: reference genome fasta file + - id: input_bams + type: + - File + - type: array + items: File + inputBinding: + position: 99 + shellQuote: false + doc: >- + an indexed bam tumor file, an indexed bam control file or it can be an + array of indexed bam files + secondaryFiles: + - ^.bai + - id: exclude_regions + type: File? + inputBinding: + position: 0 + prefix: '-x' + shellQuote: false + doc: file with regions to exclude + - id: vcffile + type: File? + inputBinding: + position: 0 + prefix: '-v' + shellQuote: false + doc: input VCF/BCF file for genotyping + - id: svtype + type: string? + inputBinding: + position: 0 + prefix: '-t' + shellQuote: false + doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]' + - id: geno_qual + type: int? + inputBinding: + position: 71 + prefix: '-u' + doc: min. mapping quality for genotyping + - id: dump + type: File? + inputBinding: + position: 0 + prefix: '-d' + doc: gzipped output file for SV-reads (optional) + - id: map_qual + type: int? + inputBinding: + position: 0 + prefix: '-q' + doc: min. paired-end (PE) mapping quality + - id: qual_tra + type: int? + inputBinding: + position: 0 + prefix: '-r' + doc: min. PE quality for translocation + - id: mad_cutoff + type: int? + inputBinding: + position: 0 + prefix: '-s' + doc: 'insert size cutoff, median+s*MAD (deletions only)' +outputs: + - id: bcf_out + type: File + outputBinding: + glob: $(inputs.out_file) +arguments: + - call +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 60000 + coresMin: 16 +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/delly:0.9.1' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': delly + 'doap:revision': 0.9.1 diff --git a/delly_0.9.1/example_input.yml b/delly_0.9.1/example_input.yml new file mode 100644 index 00000000..d84fb73c --- /dev/null +++ b/delly_0.9.1/example_input.yml @@ -0,0 +1,5 @@ +out_file: "name_of_output.bcf" +reference_genome: {class: File, path: path_to_file.fasta} +input_bams: + - {class: File, path: /path/to/file.bam} + - {class: File, path: /path/to/file.bam} diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md deleted file mode 100644 index b1cd50f0..00000000 --- a/disambiguate_1.0.0/README.md +++ /dev/null @@ -1,52 +0,0 @@ - # CWL and Dockerfile for running Disambiguate - -## Version of tools in docker image (/container/Dockerfile) - -Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. - -| Tool | Version | Location | Notes | -|--- |--- |--- | - | -| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| -| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | -| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | - -[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") - - -## CWL - -- CWL specification 1.0 -- Use `example_inputs.yaml` to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml -``` - -## Command -``` -USAGE: - - cwltool disambiguate_1.0.0.cwl \ - --prefix \ - --output_dir \ - [--aligner ] \ - - -Where: - - --prefix - (required) Sample ID or name used as prefix. Do not include .bam - - --output_dir - (required) Output directory - - --aligner - Aligner option {bwa(default),tophat,hisat2,star} - - - (required) Species A BAM file - - - (required) Species B BAM file -``` diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..324e5df3 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,34 @@ +--- +description: >- + Central location for storing common workflow language based command line tools + for building msk-access workflows +--- + +# MSK-ACCESS command-line tools + +[![PyPI version](https://badge.fury.io/py/cwl-commandlinetools.svg)](https://badge.fury.io/py/cwl-commandlinetools) [![Build Status](https://travis-ci.com/msk-access/cwl-commandlinetools.svg?branch=master)](https://travis-ci.com/msk-access/cwl-commandlinetools) + +* Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) + +## Features + +Create command line tools in common workflow language to generate msk-access workflows. + +## Installation + +Clone the repository: + +```text +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +**Follow the README in repsective tool folder for execution of the tool.** + +## Credits + +This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. + +* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter) +* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) + diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 00000000..66fd9a0f --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,90 @@ +# Table of contents + +* [MSK-ACCESS command-line tools](README.md) +* [ABRA2](abra2/README.md) + * [v2.17](abra2/abra2_2.17.md) + * [v2.22](abra2/abra2_2.22.md) +* access_utils + * [0.1.1](../access_utils/0.1.1/README.md) +* [bcftools](bcftools/README.md) + * [bcftools concat v1.6](bcftools/bcftools_concat_1.6.md) + * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md) + * [bcftools tabix v1.15.1](bcftools/bcftools_tabix_v1.15.1.md) +* [Bedtools](bedtools/README.md) + * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md) + * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md) + * [sortVcf v2.28.0\_cv2](bedtools/bedtools_sortvcf_v2.28.0_cv2.md) +* Biometrics + * [extract](../biometrics_extract/README.md) + * [minor](../biometrics_minor/README.md) + * [major](../biometrics_major/README.md) + * [genotype](../biometrics_genotype/README.md) + * [sexmismatch](../biometrics_sexmismatch/README.md) +* [Delly](delly/README.md) + * [delly call 0.9.1](delly/delly_call_0.9.1.md) +* [Disambiguate](disambiguate/README.md) + * [v1.0.0](disambiguate/disambiguate_1.0.0.md) +* [Fgbio](fgbio/README.md) + * [CallDuplexConsensusReads v1.2.0](fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md) + * [CollectDuplexSeqMetrics v1.2.0](fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md) + * [GroupReadsByUmi v1.2.0](fgbio/fgbio_group_reads_by_umi_1.2.0.md) + * [FastqToBam v1.2.0](fgbio/fgbio_fastq_to_bam_1.2.0.md) + * [FilterConsensusReads v1.2.0](fgbio/fgbio_filter_consensus_reads_1.2.0.md) + * [simplex\_filter v0.1.8](fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md) +* [GATK](gatk/README.md) + * [ApplyBQSR v4.1.2.0](gatk/gatk_applybqsr_4.1.2.0.md) + * [ApplyBQSR v4.1.8.1](gatk/gatk_apply_bqsr_4.1.8.1.md) + * [BaseRecalibrator v4.1.2.0](gatk/gatk_baserecalibrator_4.1.2.0.md) + * [BaseRecalibrator v4.1.8.1](gatk/gatk_base_recalibrator_4.1.8.1.md) + * [MergeBamAlignment v4.1.8.0](gatk/gatk_merge_bam_alignment_4.1.8.0.md) + * [MergeSamFiles v4.1.8.0](gatk/gatk_merge_sam_files_4.1.8.0.md) + * [SamToFastq v4.1.8.0](gatk/samtofastq-v4.1.8.0.md) +* GetBaseCountsMultiSample + * [1.2.5](../getbasecountsmultisample/1.2.5/README.md) +* [Manta](manta/README.md) + * [Manta v1.5.1](manta/manta_1.5.1.md) +* [Marianas](marianas/README.md) + * [Collapsing First Pass v1.8.1](marianas/marianas_collapsing_first_pass_1.8.1.md) + * [Collapsing Second Pass v1.8.1](marianas/marianas_collapsing_second_pass_1.8.1.md) + * [Process Loop UMI v1.8.1](marianas/marianas_process_loop_umi_1.8.1.md) + * [Seprate BAMs v1.8.1](marianas/marianas_separate_bams_1.8.1.md) +* MultiQC + * [MultiQC v1.10.1.7](../multiqc/1.10.1.7/README.md) +* [MuTect](mutect/README.md) + * [MuTect 1.1.5](mutect/mutect_1.1.5.md) +* [Merge Fastq](merge-fastq/README.md) + * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md) +* [Mosdepth](mosdepth/README.md) + * [0.3.3](mosdepth/mosdepth_0.3.3.md) +* [Octopus](octopus/README.md) + * [v0.7.4](octopus/octopus_0.7.4.md) +* [Picard Tools](picard-tools/README.md) + * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md) + * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md) + * [AddOrReplaceReadGroups v4.1.8.1](picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md) + * [CollectAlignmentSummaryMetrics v2.8.1](picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md) + * [CollectAlignmentSummaryMetrics v2.21.2](picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md) + * [CollectMultipleMetrics v2.8.1](picard-tools/picard_collectmultiplemetric_2.8.1.md) + * [CollectMultipleMetrics v2.21.2](picard-tools/picard_collectmultiplemetric_2.21.2.md) + * [FixMateInformation v1.96](picard-tools/picard_fix_mate_information_1.96.md) + * [FixMateInformation v2.21.2](picard-tools/picard_fix_mate_information_2.21.2.md) + * [FixMateInformation v4.1.8.1](picard-tools/picard_fix_mate_information_4.1.8.1.md) + * [HSmetrics v2.8.1](picard-tools/picard_hsmetrics_2.8.1.md) + * [HSmetrics v2.21.2](picard-tools/picard_hsmetrics_2.21.2.md) + * [MarkDuplicates v1.96](picard-tools/picard_mark_duplicates_1.96.md) + * [MarkDuplicates v2.8.1](picard-tools/picard_mark_duplicates_2.8.1.md) + * [MarkDuplicates v2.21.2](picard-tools/picard_mark_duplicates_2.21.2.md) + * [MarkDuplicates v4.1.8.1](picard-tools/picard_mark_duplicates_4.1.8.1.md) +* [Trim Galore](trim-galore/README.md) + * [v0.6.2](trim-galore/trim_galore_0.6.2.md) +* [Ubuntu utilites](ubuntu-utilites/README.md) + * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) +* [VarDictJava](vardictjava/README.md) + * [v1.8.2](vardictjava/vardictjava_1.8.2.md) +* [VCF2MAF](vcf2maf/README.md) + * [1.6.21](vcf2maf/vcf2maf_1.6.21.md) +* [Waltz](waltz/README.md) + * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) + * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) + + diff --git a/docs/abra2/README.md b/docs/abra2/README.md new file mode 100644 index 00000000..5cd595c7 --- /dev/null +++ b/docs/abra2/README.md @@ -0,0 +1,2 @@ +# ABRA2 + diff --git a/abra2_2.17/README.md b/docs/abra2/abra2_2.17.md similarity index 73% rename from abra2_2.17/README.md rename to docs/abra2/abra2_2.17.md index 89818e32..e2f9457b 100644 --- a/abra2_2.17/README.md +++ b/docs/abra2/abra2_2.17.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running ABRA2 +# v2.17 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) | -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command:** ```bash #Using CWLTOOL @@ -30,9 +30,9 @@ > toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & ``` -### Usage +### Usage -``` +```text usage: abra2_2.17.cwl [-h] positional arguments: @@ -85,4 +85,5 @@ optional arguments: VCF containing known (or suspected) variant sites. Very large files should be avoided. --no_sort Do not attempt to sort final output - ``` \ No newline at end of file +``` + diff --git a/docs/abra2/abra2_2.22.md b/docs/abra2/abra2_2.22.md new file mode 100644 index 00000000..c23aaada --- /dev/null +++ b/docs/abra2/abra2_2.22.md @@ -0,0 +1,21 @@ +# v2.22 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.22 | [https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar](https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar) | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml +``` + diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md new file mode 100644 index 00000000..a08fa464 --- /dev/null +++ b/docs/bcftools/README.md @@ -0,0 +1 @@ +# BCFTOOLS diff --git a/docs/bcftools/bcftools_bgzip_v1.15.1.md b/docs/bcftools/bcftools_bgzip_v1.15.1.md new file mode 100644 index 00000000..5ff81c8f --- /dev/null +++ b/docs/bcftools/bcftools_bgzip_v1.15.1.md @@ -0,0 +1,43 @@ +## CWL and Docker for Running bgzip using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| --------------------- | ------- | ------------------------------------- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_bgzip_1.15.1.cwl example_input_bgzip.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_bgzip_1.15.1.cwl [-h] --input INPUT + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bcftools/bcftools_concat_1.6.md b/docs/bcftools/bcftools_concat_1.6.md new file mode 100644 index 00000000..eabc4ff7 --- /dev/null +++ b/docs/bcftools/bcftools_concat_1.6.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running bcftools v1.6 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | +| bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir bcftools_toil_log +> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --compact_PS Do not output PS tag at each site, only at the start + of a new phase set block. + --ligate Ligate phased VCFs by matching phase at overlapping + haplotypes + --list LIST Read the list of files from a file. + --min_PQ MIN_PQ Break phase set if phasing quality is lower than + [30] + --naive Concatenate BCF files without recompression + (dangerous, use with caution) + --no_version do not append version and command line to the header + --output OUTPUT Write output to a file [standard output] + --output_type OUTPUT_TYPE + b - compressed BCF, u - uncompressed BCF, z + - compressed VCF, v - uncompressed VCF [v] + --regions REGIONS Restrict to comma-separated list of regions + --regions_file REGIONS_FILE + Restrict to regions listed in a file + --remove_duplicates Alias for -d none + --rm_dups RM_DUPS Output duplicate records present in multiple + files only once - + --threads THREADS Number of extra output compression threads [0] + --vcf_files_csi VCF_FILES_CSI + Array of vcf files to be concatenated into one vcf + --vcf_files_tbi VCF_FILES_TBI + Array of vcf files to be concatenated into one vcf + +``` diff --git a/docs/bcftools/bcftools_tabix_v1.15.1.md b/docs/bcftools/bcftools_tabix_v1.15.1.md new file mode 100644 index 00000000..e2adfbc6 --- /dev/null +++ b/docs/bcftools/bcftools_tabix_v1.15.1.md @@ -0,0 +1,43 @@ +## CWL and Docker for Running tabs using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| --------------------- | ------- | ------------------------------------- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_tabix_1.15.1.cwl example_input_tabix.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_tabix_1.15.1.cwl [-h] --input INPUT [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bedtools/README.md b/docs/bedtools/README.md new file mode 100644 index 00000000..13a0e51c --- /dev/null +++ b/docs/bedtools/README.md @@ -0,0 +1,2 @@ +# Bedtools + diff --git a/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md new file mode 100644 index 00000000..da41c5b2 --- /dev/null +++ b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# genomecov v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format + diff --git a/docs/bedtools/bedtools_merge_v2.28.0_cv2.md b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md new file mode 100644 index 00000000..507a8994 --- /dev/null +++ b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# merge v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged. + diff --git a/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md new file mode 100644 index 00000000..7b76d73b --- /dev/null +++ b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# SortVCF v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :------- | :----------- | :----------------------------------------------------------- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bedtools_sortbed_vcf.cwl example_input.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict bedtools_sortbed_vcf.cwl inputs.yaml + +#Using toil-cwl-runner +mkdir run_directory +toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_sortbed_vcf.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```shell +Usage: bedtools_sortbed_vcf.cwl [-h] --input INPUT [job_order] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/delly/README.md b/docs/delly/README.md new file mode 100644 index 00000000..3cf4e541 --- /dev/null +++ b/docs/delly/README.md @@ -0,0 +1,2 @@ +# Delly + diff --git a/docs/delly/delly_call_0.9.1.md b/docs/delly/delly_call_0.9.1.md new file mode 100644 index 00000000..08dcf390 --- /dev/null +++ b/docs/delly/delly_call_0.9.1.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/disambiguate/README.md b/docs/disambiguate/README.md new file mode 100644 index 00000000..4850c2fd --- /dev/null +++ b/docs/disambiguate/README.md @@ -0,0 +1,2 @@ +# Disambiguate + diff --git a/docs/disambiguate/disambiguate_1.0.0.md b/docs/disambiguate/disambiguate_1.0.0.md new file mode 100644 index 00000000..8e065892 --- /dev/null +++ b/docs/disambiguate/disambiguate_1.0.0.md @@ -0,0 +1,53 @@ +# v1.0.0 + +## Version of tools in docker image \(/container/Dockerfile\) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +| :--- | :--- | :--- | :--- | +| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub | +| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - | +| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) + +## CWL + +* CWL specification 1.0 +* Use `example_inputs.yaml` to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command + +```text +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` + diff --git a/docs/fgbio/README.md b/docs/fgbio/README.md new file mode 100644 index 00000000..f1d1ca8d --- /dev/null +++ b/docs/fgbio/README.md @@ -0,0 +1,2 @@ +# Fgbio + diff --git a/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md new file mode 100644 index 00000000..c8d3d73e --- /dev/null +++ b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md @@ -0,0 +1,79 @@ +# CallDuplexConsensusReads v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read_name_prefix READ_NAME_PREFIX] + [--read_group_id READ_GROUP_ID] + [--error_rate_pre_umi ERROR_RATE_PRE_UMI] + [--error_rate_post_umi ERROR_RATE_POST_UMI] + [--min_input_base_quality MIN_INPUT_BASE_QUALITY] + [--trim] + [--sort_order SORT_ORDER] + [--min_reads MIN_READS] + [--max_reads_per_strand MAX_READS_PER_STRAND] + [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --read_name_prefix READ_NAME_PREFIX + The prefix all consensus read names + --read_group_id READ_GROUP_ID + The new read group ID for all the consensus reads. + --error_rate_pre_umi ERROR_RATE_PRE_UMI + The Phred-scaled error rate for an error prior to the + UMIs being integrated. + --error_rate_post_umi ERROR_RATE_POST_UMI + The Phred-scaled error rate for an error post the UMIs + have been integrated. + --min_input_base_quality MIN_INPUT_BASE_QUALITY + Ignore bases in raw reads that have Q below this + value. + --trim If true, quality trim input reads in addition to + masking low Q bases + --sort_order SORT_ORDER + The sort order of the output, if :none: then the same + as the input. + --min_reads MIN_READS + The minimum number of input reads to a consensus read. + --max_reads_per_strand MAX_READS_PER_STRAND + The maximum number of reads to use when building a + single-strand consensus. If more than this many reads + are present in a tag family, the family is randomly + downsampled to exactly max-reads reads. +``` + diff --git a/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md new file mode 100644 index 00000000..78812a55 --- /dev/null +++ b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md @@ -0,0 +1,62 @@ +# CollectDuplexSeqMetrics v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_collect_duplex_seq_metrics_1.2.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_prefix + OUTPUT_PREFIX [--intervals INTERVALS] [--description DESCRIPTION] + [--duplex_umi_counts DUPLEX_UMI_COUNTS] [--min_ab_reads MIN_AB_READS] + [--min_ba_reads MIN_BA_READS] [--umi_tag UMI_TAG] [--mi_tag MI_TAG] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input BAM file generated by GroupReadByUmi. + --output_prefix OUTPUT_PREFIX + Prefix of output files to write. + --intervals INTERVALS + Optional set of intervals over which to restrict + analysis. [Optional]. + --description DESCRIPTION + Description of data set used to label plots. Defaults + to sample/library. [Optional]. + --duplex_umi_counts DUPLEX_UMI_COUNTS + If true, produce the .duplex_umi_counts.txt file with + counts of duplex UMI observations. [Optional]. + --min_ab_reads MIN_AB_READS + Minimum AB reads to call a tag family a 'duplex'. + [Optional]. + --min_ba_reads MIN_BA_READS + Minimum BA reads to call a tag family a 'duplex'. + [Optional]. + --umi_tag UMI_TAG The tag containing the raw UMI. [Optional]. + --mi_tag MI_TAG The output tag for UMI grouping. [Optional]. +``` + diff --git a/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md new file mode 100644 index 00000000..3d4ede7f --- /dev/null +++ b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md @@ -0,0 +1,82 @@ +# FastqToBam v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read-structures READ_STRUCTURES] + [--sort] [--umi-tag UMI_TAG] + [--read-group-id READ_GROUP_ID] + [--sample SAMPLE] [--library LIBRARY] + [--platform PLATFORM] + [--platform-unit PLATFORM_UNIT] + [--platform-model PLATFORM_MODEL] + [--sequencing-center SEQUENCING_CENTER] + [--predicted-insert-size PREDICTED_INSERT_SIZE] + [--description DESCRIPTION] + [--comment COMMENT] [--run-date RUN_DATE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Fastq files corresponding to each sequencing read + (e.g. R1, I1, etc.). + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --read-structures READ_STRUCTURES + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read- + Structures + --sort If true, queryname sort the BAM file, otherwise + preserve input order. + --umi-tag UMI_TAG Tag in which to store molecular barcodes/UMIs + --read-group-id READ_GROUP_ID + Read group ID to use in the file header. + --sample SAMPLE The name of the sequenced sample. + --library LIBRARY The name/ID of the sequenced library. + --platform PLATFORM Sequencing Platform + --platform-unit PLATFORM_UNIT + Platform unit (e.g. ‘..') + --platform-model PLATFORM_MODEL + Platform model to insert into the group header (ex. + miseq, hiseq2500, hiseqX) + --sequencing-center SEQUENCING_CENTER + The sequencing center from which the data originated + --predicted-insert-size PREDICTED_INSERT_SIZE + Predicted median insert size, to insert into the read + group header + --description DESCRIPTION + Description of the read group. + --comment COMMENT Comment(s) to include in the output file’s header + --run-date RUN_DATE Date the run was produced, to insert into the read + group header +``` + diff --git a/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md new file mode 100644 index 00000000..470166a1 --- /dev/null +++ b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md @@ -0,0 +1,80 @@ +# FilterConsensusReads v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + --reference_fasta + REFERENCE_FASTA + [--reverse_per_base_tags] + [--min_reads MIN_READS] + [--max_read_error_rate MAX_READ_ERROR_RATE] + [--max_base_error_rate MAX_BASE_ERROR_RATE] + [--min_base_quality MIN_BASE_QUALITY] + [--max_no_call_fraction MAX_NO_CALL_FRACTION] + [--min_mean_base_quality MIN_MEAN_BASE_QUALITY] + [--require_single_strand_agreement] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --reference_fasta REFERENCE_FASTA + Reference fasta file. + --reverse_per_base_tags + Reverse [complement] per base tags on reverse strand + reads. + --min_reads MIN_READS + The minimum number of reads supporting a consensus + base/read. (Max 3 values) + --max_read_error_rate MAX_READ_ERROR_RATE + The maximum raw-read error rate across the entire + consensus read. (Max 3 values) + --max_base_error_rate MAX_BASE_ERROR_RATE + The maximum error rate for a single consensus base. + (Max 3 values) + --min_base_quality MIN_BASE_QUALITY + Mask (make N) consensus bases with quality less than + this threshold. + --max_no_call_fraction MAX_NO_CALL_FRACTION + Maximum fraction of no-calls in the read after + filtering + --min_mean_base_quality MIN_MEAN_BASE_QUALITY + The minimum mean base quality across the consensus + read + --require_single_strand_agreement + Mask (make N) consensus bases where the AB and BA + consensus reads disagree (for duplex-sequencing only). +``` + diff --git a/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md new file mode 100644 index 00000000..87ad2aaa --- /dev/null +++ b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md @@ -0,0 +1,68 @@ +# GroupReadsByUmi v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--family_size_histogram FAMILY_SIZE_HISTOGRAM] + [--raw_tag RAW_TAG] + [--assign_tag ASSIGN_TAG] + [--min_map_q MIN_MAP_Q] + [--include_non_pf_reads] + --strategy STRATEGY + [--edits EDITS] + [--min_umi_length MIN_UMI_LENGTH] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input BAM file. + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --family_size_histogram FAMILY_SIZE_HISTOGRAM + Optional output of tag family size counts. + --raw_tag RAW_TAG The tag containing the raw UMI. + --assign_tag ASSIGN_TAG + The output tag for UMI grouping. + --min_map_q MIN_MAP_Q + Minimum mapping quality. + --include_non_pf_reads + --strategy STRATEGY The UMI assignment strategy. + (identity,edit,adjacency,paired) + --edits EDITS The allowable number of edits between UMIs. + --min_umi_length MIN_UMI_LENGTH + The minimum UMI length. If not specified then all UMIs + must have the same length, otherwise discard reads + with UMIs shorter than this length and allow for + differing UMI lengths. +``` + diff --git a/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md new file mode 100644 index 00000000..b12b00a3 --- /dev/null +++ b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md @@ -0,0 +1,44 @@ +# simplex\_filter v0.1.8 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio\_postprocessing | 0.1.8 | [https://github.com/msk-access/fgbio\_postprocessing](https://github.com/msk-access/fgbio_postprocessing) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM + [--output_file_name OUTPUT_FILE_NAME] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input_bam INPUT_BAM + Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). +``` + diff --git a/docs/gatk/README.md b/docs/gatk/README.md new file mode 100644 index 00000000..ebad94ae --- /dev/null +++ b/docs/gatk/README.md @@ -0,0 +1,2 @@ +# GATK + diff --git a/docs/gatk/gatk_apply_bqsr_4.1.8.1.md b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md new file mode 100644 index 00000000..c919c1f4 --- /dev/null +++ b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_apply\_bqsr\_4.1.8.1.cwl --help + +usage: gatk\_apply\_bqsr\_4.1.8.1.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_applybqsr_4.1.2.0.md b/docs/gatk/gatk_applybqsr_4.1.2.0.md new file mode 100644 index 00000000..709855a1 --- /dev/null +++ b/docs/gatk/gatk_applybqsr_4.1.2.0.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help + +usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_base_recalibrator_4.1.8.1.md b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md new file mode 100644 index 00000000..9b90a39f --- /dev/null +++ b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_base\_recalibrator\_4.1.8.1.cwl --help + +usage: gatk\_base\_recalibrator\_4.1.8.1.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/docs/gatk/gatk_baserecalibrator_4.1.2.0.md b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md new file mode 100644 index 00000000..41f341b4 --- /dev/null +++ b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help + +usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/docs/gatk/gatk_downsamplesam_4.1.8.1.md b/docs/gatk/gatk_downsamplesam_4.1.8.1.md new file mode 100644 index 00000000..b0f093d9 --- /dev/null +++ b/docs/gatk/gatk_downsamplesam_4.1.8.1.md @@ -0,0 +1,113 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_downsamplesam_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_downsamplesam_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl --help + +usage: gatk_downsamplesam_4.1.8.1.cwl [-h] --input INPUT --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--output_file_name_metrics OUTPUT_FILE_NAME_METRICS] + [--probability PROBABILITY] + [--random_seed RANDOM_SEED] + [--strategy STRATEGY] + [--arguments_file ARGUMENTS_FILE] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--lenient] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --output_file_name_metrics OUTPUT_FILE_NAME_METRICS + Output file name for metrics file. Not Required + --probability PROBABILITY + The probability of keeping any individual read, + between 0 and 1. + --random_seed RANDOM_SEED + Random seed used for deterministic results. Setting to + null will cause multiple invocations to produce + different results. + --strategy STRATEGY The --STRATEGY argument is an enumerated type + (Strategy), which can have one of the following + values: HighAccuracy ConstantMemory Chained default + Strategy ConstantMemory + --arguments_file ARGUMENTS_FILE + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + Read filters to be disabled before analysis + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --lenient + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. + + diff --git a/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md new file mode 100644 index 00000000..8a593757 --- /dev/null +++ b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md @@ -0,0 +1,245 @@ +# MergeBamAlignment v4.1.8.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_bam_alignment_4.1.8.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --unmapped_bam UNMAPPED_BAM + --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_mate_cigar] + [--add_pg_tag_to_reads] + [--aligned_bam ALIGNED_BAM] + [--aligned_reads_only] + [--aligner_proper_pair_flags] + [--attributes_to_remove ATTRIBUTES_TO_REMOVE] + [--attributes_to_retain ATTRIBUTES_TO_RETAIN] + [--attributes_to_reverse ATTRIBUTES_TO_REVERSE] + [--attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT] + [--clip_adapters] + [--clip_overlapping_reads] + [--expected_orientations EXPECTED_ORIENTATIONS] + [--hard_clip_overlapping_reads] + [--include_secondary_alignments] + [--is_bisulfite_sequence] + [--jump_size JUMP_SIZE] + [--matching_dictionary_tags MATCHING_DICTIONARY_TAGS] + [--max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS] + [--min_unclipped_bases MIN_UNCLIPPED_BASES] + [--paired_run] + [--primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY] + [--read1_aligned_bam READ1_ALIGNED_BAM] + [--read1_trim READ1_TRIM] + [--read2_aligned_bam READ2_ALIGNED_BAM] + [--read2_trim READ2_TRIM] + [--sort_order SORT_ORDER] + [--unmap_contaminant_reads] + [--unmapped_read_strategy UNMAPPED_READ_STRATEGY] + [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --unmapped_bam UNMAPPED_BAM + Original SAM or BAM file of unmapped reads, which must + be in queryname order. Reads MUST be unmapped. + Required. + --reference REFERENCE + Reference sequence file. Required. + --output_file_name OUTPUT_FILE_NAME + Merged SAM or BAM file to write to. Required. + --add_mate_cigar Adds the mate CIGAR tag (MC) if true, does not if + false. Default value: true. Possible values: {true, + false} + --add_pg_tag_to_reads + Add PG tag to each read in a SAM or BAM Default value: + true. Possible values: {true, false} + --aligned_bam ALIGNED_BAM + SAM or BAM file(s) with alignment data. This argument + may be specified 0 or more times. Default value: null. + Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM + (R2_ALIGNED) + --aligned_reads_only Whether to output only aligned reads. Default value: + false. Possible values: {true, false} + --aligner_proper_pair_flags + Use the aligners idea of what a proper pair is rather + than computing in this program. Default value: false. + Possible values: {true, false} + --attributes_to_remove ATTRIBUTES_TO_REMOVE + Attributes from the alignment record that should be + removed when merging. This overrides + ATTRIBUTES_TO_RETAIN if they share common tags. This + argument may be specified 0 or more times. Default + value: null. + --attributes_to_retain ATTRIBUTES_TO_RETAIN + Reserved alignment attributes (tags starting with X, + Y, or Z) that should be brought over from the + alignment data when merging. This argument may be + specified 0 or more times. Default value: null. + --attributes_to_reverse ATTRIBUTES_TO_REVERSE + Attributes on negative strand reads that need to be + reversed. This argument may be specified 0 or more + times. Default value: [OQ, U2]. + --attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT + Attributes on negative strand reads that need to be + reverse complemented. This argument may be specified 0 + or more times. Default value: [E2, SQ]. + --clip_adapters Whether to clip adapters where identified. Default + value: true. Possible values: {true, false} + --clip_overlapping_reads + For paired reads, clip the 3' end of each read if + necessary so that it does not extend past the 5' end + of its mate. Clipping will be either soft or hard + clipping, depending on CLIP_OVERLAPPING_READS_OPERATOR + setting. Hard clipped bases and their qualities will + be stored in the XB and XQ tags respectively. Default + value: true. Possible values: {true, false} + --expected_orientations EXPECTED_ORIENTATIONS + The expected orientation of proper read pairs. + Replaces JUMP_SIZE This argument may be specified 0 or + more times. Default value: null. Possible values: {FR, + RF, TANDEM} Cannot be used in conjunction with + argument(s) JUMP_SIZE (JUMP) + --hard_clip_overlapping_reads + If true, hard clipping will be applied to overlapping + reads. By default, soft clipping is used. Default + value: false. Possible values: {true, false} + --include_secondary_alignments + If false, do not write secondary alignments to output. + Default value: true. Possible values: {true, false} + --is_bisulfite_sequence + Whether the lane is bisulfite sequence (used when + calculating the NM tag). Default value: false. + Possible values: {true, false} + --jump_size JUMP_SIZE + The expected jump size (required if this is a jumping + library). Deprecated. Use EXPECTED_ORIENTATIONS + instead Default value: null. Cannot be used in + conjunction with argument(s) EXPECTED_ORIENTATIONS + (ORIENTATIONS) + --matching_dictionary_tags MATCHING_DICTIONARY_TAGS + List of Sequence Records tags that must be equal (if + present) in the reference dictionary and in the + aligned file. Mismatching tags will cause an error if + in this list, and a warning otherwise. This argument + may be specified 0 or more times. Default value: [M5, + LN]. + --max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS + The maximum number of insertions or deletions + permitted for an alignment to be included. Alignments + with more than this many insertions or deletions will + be ignored. Set to -1 to allow any number of + insertions or deletions. Default value: 1. + --min_unclipped_bases MIN_UNCLIPPED_BASES + If UNMAP_CONTAMINANT_READS is set, require this many + unclipped bases or else the read will be marked as + contaminant. Default value: 32. + --paired_run DEPRECATED. This argument is ignored and will be + removed. Default value: true. Possible values: {true, + false} + --primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY + Strategy for selecting primary alignment when the + aligner has provided more than one alignment for a + pair or fragment, and none are marked as primary, more + than one is marked as primary, or the primary + alignment is filtered out for some reason. For all + strategies, ties are resolved arbitrarily. Default + value: BestMapq. BestMapq (Expects that multiple + alignments will be correlated with HI tag, and prefers + the pair of alignments with the largest MAPQ, in the + absence of a primary selected by the aligner.) + EarliestFragment (Prefers the alignment which maps the + earliest base in the read. Note that EarliestFragment + may not be used for paired reads.) BestEndMapq + (Appropriate for cases in which the aligner is not + pair-aware, and does not output the HI tag. It simply + picks the alignment for each end with the highest + MAPQ, and makes those alignments primary, regardless + of whether the two alignments make sense together.) + MostDistant (Appropriate for a non-pair-aware aligner. + Picks the alignment pair with the largest insert size. + If all alignments would be chimeric, it picks the + alignments for each end with the best MAPQ.) + --read1_aligned_bam READ1_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the first + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read1_trim READ1_TRIM + The number of bases trimmed from the beginning of read + 1 prior to alignment Default value: 0. + --read2_aligned_bam READ2_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the second + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read2_trim READ2_TRIM + The number of bases trimmed from the beginning of read + 2 prior to alignment Default value: 0. + --sort_order SORT_ORDER + The order in which the merged reads should be output. + Default value: coordinate. Possible values: {unsorted, + queryname, coordinate, duplicate, unknown} + --unmap_contaminant_reads + Detect reads originating from foreign organisms (e.g. + bacterial DNA in a non-bacterial sample),and unmap + + label those reads accordingly. Default value: false. + Possible values: {true, false} + --unmapped_read_strategy UNMAPPED_READ_STRATEGY + How to deal with alignment information in reads that + are being unmapped (e.g. due to cross-species + contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. Note that the + DO_NOT_CHANGE strategy will actually reset the cigar + and set the mapping quality on unmapped reads since + otherwisethe result will be an invalid record. To + force no change use the DO_NOT_CHANGE_INVALID + strategy. Default value: DO_NOT_CHANGE. Possible + values: {COPY_TO_TAG, DO_NOT_CHANGE, + DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` + diff --git a/docs/gatk/gatk_merge_sam_files_4.1.8.0.md b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md new file mode 100644 index 00000000..0602f9be --- /dev/null +++ b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md @@ -0,0 +1,98 @@ +# MergeSamFiles v4.1.8.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--assume_sorted] [--comment COMMENT] + [--create_index] [--create_md5_file] + [--intervals INTERVALS] + [--merge_sequence_dictionaries] + [--reference_sequence REFERENCE_SEQUENCE] + [--sort_order SORT_ORDER] + [--use_threading] + [--validation_stringency VALIDATION_STRINGENCY] + [--verbosity VERBOSITY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT SAM or BAM input file This argument must be specified + at least once. Required. + --output_file_name OUTPUT_FILE_NAME + SAM or BAM file to write merged result to Required. + --assume_sorted If true, assume that the input files are in the same + sort order as the requested output sort order, even if + their headers say otherwise. Default value: false. + Possible values: {true, false} + --comment COMMENT Comment(s) to include in the merged output files + header. This argument may be specified 0 or more + times. Default value: null. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --intervals INTERVALS + An interval list file that contains the locations of + the positions to merge. Assume bam are sorted and + indexed. The resulting file will contain alignments + that may overlap with genomic regions outside the + requested region. Unmapped reads are discarded. + Default value: null. + --merge_sequence_dictionaries + Merge the sequence dictionaries Default value: false. + Possible values: {true, false} + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --sort_order SORT_ORDER + Sort order of output file Default value: coordinate. + Possible values: {unsorted, queryname, coordinate, + duplicate, unknown} + --use_threading Option to create a background thread to encode, + compress and write to disk the output file. The + threaded version uses about 20% more CPU and decreases + runtime by ~20% when writing out a compressed BAM + file. Default value: false. Possible values: {true, + false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} + --verbosity VERBOSITY + Control verbosity of logging. Default value: INFO. + Possible values: {ERROR, WARNING, INFO, DEBUG} +``` + diff --git a/docs/gatk/samtofastq-v4.1.8.0.md b/docs/gatk/samtofastq-v4.1.8.0.md new file mode 100644 index 00000000..b2b44b12 --- /dev/null +++ b/docs/gatk/samtofastq-v4.1.8.0.md @@ -0,0 +1,2 @@ +# SamToFastq v4.1.8.0 + diff --git a/docs/manta/README.md b/docs/manta/README.md new file mode 100644 index 00000000..1849796d --- /dev/null +++ b/docs/manta/README.md @@ -0,0 +1,2 @@ +# Manta + diff --git a/docs/manta/manta_1.5.1.md b/docs/manta/manta_1.5.1.md new file mode 100644 index 00000000..9b285335 --- /dev/null +++ b/docs/manta/manta_1.5.1.md @@ -0,0 +1,70 @@ +# Manta v1.5.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image | 16.04 | - | +| manta | 1.5.1 | [https://github.com/Illumina/manta/releases/download/](https://github.com/Illumina/manta/releases/download/) | +| samtools | 1.9 | [https://github.com/samtools/samtools/releases/download/](https://github.com/samtools/samtools/releases/download/) | +| htslib | 1.9 | "[https://github.com/samtools/htslib/releases/download/](https://github.com/samtools/htslib/releases/download/) | + +[![](https://images.microbadger.com/badges/image/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2)[![](https://images.microbadger.com/badges/version/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/commit/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/license/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/manta_1.5.1/manta_1.51.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/manta.1.5.1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner manta_1.5.1.cwl --help +usage: manta_1.5.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --call_regions CALL_REGIONS + bgzip-compressed, tabix-indexed BED file specifiying + regions to which variant analysis will be restricted + --non_wgs toggles on settings for WES + --normal_bam NORMAL_BAM + Normal sample BAM or CRAM file. May be specified more + than once, multiple inputs will be treated as each BAM + file representing a different sample. [optional] (no + default) + --output_contigs if true, outputs assembled contig sequences in final + VCF files, in the INFO field CONTIG + --reference_fasta REFERENCE_FASTA + samtools-indexed reference fasta file [required] + --tumor_bam TUMOR_BAM + Tumor sample BAM or CRAM file. Only up to one tumor + bam file accepted. + --generateEvidenceBam + Generate a bam of supporting reads for all SVs +``` + diff --git a/docs/marianas/README.md b/docs/marianas/README.md new file mode 100644 index 00000000..67939d60 --- /dev/null +++ b/docs/marianas/README.md @@ -0,0 +1,2 @@ +# Marianas + diff --git a/docs/marianas/marianas_collapsing_first_pass_1.8.1.md b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md new file mode 100644 index 00000000..f0b24afe --- /dev/null +++ b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing First Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_collapsing_second_pass_1.8.1.md b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md new file mode 100644 index 00000000..7117bec5 --- /dev/null +++ b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing Second Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` + diff --git a/docs/marianas/marianas_process_loop_umi_1.8.1.md b/docs/marianas/marianas_process_loop_umi_1.8.1.md new file mode 100644 index 00000000..7c1efc78 --- /dev/null +++ b/docs/marianas/marianas_process_loop_umi_1.8.1.md @@ -0,0 +1,19 @@ +# Process Loop UMI v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_separate_bams_1.8.1.md b/docs/marianas/marianas_separate_bams_1.8.1.md new file mode 100644 index 00000000..232c89da --- /dev/null +++ b/docs/marianas/marianas_separate_bams_1.8.1.md @@ -0,0 +1,33 @@ +# Seprate BAMs v1.8.1 + +## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl + [-h] --input_bam INPUT_BAM [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input_bam INPUT_BAM +``` + diff --git a/docs/merge-fastq/README.md b/docs/merge-fastq/README.md new file mode 100644 index 00000000..19544a3d --- /dev/null +++ b/docs/merge-fastq/README.md @@ -0,0 +1,2 @@ +# Merge Fastq + diff --git a/docs/merge-fastq/merge_fastq_0.1.7.md b/docs/merge-fastq/merge_fastq_0.1.7.md new file mode 100644 index 00000000..ec1c9213 --- /dev/null +++ b/docs/merge-fastq/merge_fastq_0.1.7.md @@ -0,0 +1,67 @@ +# v0.1.7 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| miniconda | 3 | [https://hub.docker.com/r/continuumio/miniconda3](https://hub.docker.com/r/continuumio/miniconda3) | +| merge\_fastq | 0.1.7 | [https://pypi.org/project/merge-fastq/](https://pypi.org/project/merge-fastq/) | + +[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner merge_fastq_0.1.7.cwl --help +usage: merge_fastq_0.1.7.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --fastq1 FASTQ1 Full path to gziped READ1 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + --fastq2 FASTQ2 Full path to gziped READ2 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + --output_path OUTPUT_PATH + Full path to write the output files (default: Current + working directory) + --out_fastq1_name OUT_FASTQ1_NAME + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + --out_fastq2_name OUT_FASTQ2_NAME + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) +``` + diff --git a/docs/mosdepth/README.md b/docs/mosdepth/README.md new file mode 100644 index 00000000..d576ad29 --- /dev/null +++ b/docs/mosdepth/README.md @@ -0,0 +1,2 @@ +# Mosdepth + diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md new file mode 100644 index 00000000..44080bde --- /dev/null +++ b/docs/mosdepth/mosdepth_0.3.3.md @@ -0,0 +1,68 @@ +Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**. + +`mosdepth` can output: ++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome. ++ mean per-window depth given a window size--as would be used for CNV calling. ++ the mean per-region given a BED file of regions. +* the mean or median per-region cumulative coverage histogram given a window size ++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. ++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20) ++ threshold output to indicate how many bases in each region are covered at the given thresholds. ++ A summary of mean depths per chromosome and within specified regions per chromosome. + +# CWL for running Mosdepth - Coverage tool +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| mosdepth | 0.3.3 | [https://hub.docker.com/r/brentp/mosdepth/tags](https://hub.docker.com/r/brentp/mosdepth/tags) [https://github.com/brentp/mosdepth/releases/tag/v0.3.3](https://github.com/brentp/mosdepth/releases/tag/v0.3.3) | + +[![](https://img.shields.io/badge/version-0.3.3-blue)](https://github.com/brentp/mosdepth/releases/tag/v0.3.3)| + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml & +``` + +### Usage + +```bash +usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] [--bed BED] + [--chrom CHROM] [--prefix PREFIX] [--flag FLAG] + [--mapq MAPQ] + [job_order] + +fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --bed BED optional BED file or (integer) window-sizes. + --chrom CHROM chromosome to restrict depth calculation. + --prefix PREFIX Prefix for the output files + --flag FLAG exclude reads with any of the bits in FLAG set + --mapq MAPQ mapping quality threshold. reads with a mapping + quality less than this are ignored +``` diff --git a/docs/mutect/README.md b/docs/mutect/README.md new file mode 100644 index 00000000..639d595b --- /dev/null +++ b/docs/mutect/README.md @@ -0,0 +1,2 @@ +# MuTect + diff --git a/docs/mutect/mutect_1.1.5.md b/docs/mutect/mutect_1.1.5.md new file mode 100644 index 00000000..f5a70ddb --- /dev/null +++ b/docs/mutect/mutect_1.1.5.md @@ -0,0 +1,273 @@ +# MuTect 1.1.5 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk:7 base image | 7 | - | +| muTect | 1.1.5 | [https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip](https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir mutect_toil_log +> toil-cwl-runner --singularity --logFile /path/to/mutect_toil_log/cwltoil.log --jobStore /path/to/mutect_jobStore --batchSystem lsf --workDir /path/to/mutect_toil_log --outdir . --writeLogs /path/to/mutect_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml > mutect_toil.stdout 2> mutect_toil.stderr & +``` + +### Usage + +```text +usage: toil-cwl-runner mutect_1.1.5.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --BQSR BQSR The input covariates table file which enables on-the- + fly base quality score recalibration + --absolute_copy_number_data ABSOLUTE_COPY_NUMBER_DATA + Absolute Copy Number Data, as defined by Absolute, to + use in power calculations + --arg_file ARG_FILE Reads arguments from the specified file + --bam_tumor_sample_name BAM_TUMOR_SAMPLE_NAME + if the tumor bam contains multiple samples, only use + read groups with SM equal to this value + --baq BAQ Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + --baqGapOpenPenalty BAQGAPOPENPENALTY + BAQ gap open penalty (Phred Scaled). Default value is + 40. 30 is perhaps better for whole genome call sets + --clipping_bias_pvalue_threshold CLIPPING_BIAS_PVALUE_THRESHOLD + pvalue threshold for fishers exact test of clipping + bias in mutant reads vs ref reads + --cosmic COSMIC VCF file of COSMIC sites + --coverage_20_q20_file COVERAGE_20_Q20_FILE + write out 20x of Q20 coverage in WIGGLE format to this + file + --coverage_file COVERAGE_FILE + write out coverage in WIGGLE format to this file + --dbsnp DBSNP VCF file of DBSNP information + --dbsnp_normal_lod DBSNP_NORMAL_LOD + LOD threshold for calling normal non-variant at dbsnp + sites + --defaultBaseQualities DEFAULTBASEQUALITIES + If reads are missing some or all base quality scores, + this value will be used for all base quality scores + --disableRandomization + Completely eliminates randomization from + nondeterministic methods. To be used mostly in the + testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + --disable_indel_quals + If true, disables printing of base insertion and base + deletion tags (with -BQSR) + --downsample_to_coverage DOWNSAMPLE_TO_COVERAGE + Target coverage threshold for downsampling to coverage + --downsampling_type DOWNSAMPLING_TYPE + Type of reads downsampling to employ at a given locus. + Reads will be selected randomly to be removed from the + pile based on the method described here + (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all + possible reads at a locus + --emit_original_quals + If true, enables printing of the OQ tag with the + original base qualities (with -BQSR) + --enable_extended_output + --excludeIntervals EXCLUDEINTERVALS + One or more genomic intervals to exclude from + processing. Can be explicitly specified on the command + line or in a file (including a rod file) + --filter_mismatching_base_and_quals + if a read has mismatching number of bases and base + qualities, filter out the read instead of blowing up. + --force_alleles force output for all alleles at each site + --force_output force output for each site + --fraction_contamination FRACTION_CONTAMINATION + estimate of fraction (0-1) of physical contamination + with other unrelated samples + --fraction_mapq0_threshold FRACTION_MAPQ0_THRESHOLD + threshold for determining if there is relatedness + between the alt and ref allele read piles + --gap_events_threshold GAP_EVENTS_THRESHOLD + how many gapped events (ins/del) are allowed in + proximity to this candidate + --gatk_key GATK_KEY GATK Key file. Required if running with -et NO_ET. + Please see -phone-home-and-how-does-it-affect- + me#latest for details. + --heavily_clipped_read_fraction HEAVILY_CLIPPED_READ_FRACTION + if this fraction or more of the bases in a read are + soft/hard clipped, do not use this read for mutation + calling + --initial_tumor_lod INITIAL_TUMOR_LOD + Initial LOD threshold for calling tumor variant + --input_file_normal INPUT_FILE_NORMAL + SAM or BAM file(s) + --input_file_tumor INPUT_FILE_TUMOR + SAM or BAM file(s) + --interval_merging INTERVAL_MERGING + Indicates the interval merging rule we should use for + abutting intervals (ALL| OVERLAPPING_ONLY) + --interval_padding INTERVAL_PADDING + Indicates how many basepairs of padding to include + around each of the intervals specified with the -L/ + --interval_set_rule INTERVAL_SET_RULE + Indicates the set merging approach the interval parser + should use to combine the various -L or -XL inputs + (UNION| INTERSECTION) + --java_7 JAVA_7 + --keep_program_records + Should we override the Walkers default and keep + program records from the SAM header + --log_to_file LOG_TO_FILE + Set the logging location + --logging_level LOGGING_LEVEL + Set the minimum level of logging, i.e. setting INFO + gets you INFO up to FATAL, setting ERROR gets you + ERROR and FATAL level logging. + --maxRuntime MAXRUNTIME + If provided, that GATK will stop execution cleanly as + soon after maxRuntime has been exceeded, truncating + the run but not exiting with a failure. By default the + value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + --maxRuntimeUnits MAXRUNTIMEUNITS + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + --max_alt_allele_in_normal_fraction MAX_ALT_ALLELE_IN_NORMAL_FRACTION + threshold for maximum alternate allele fraction in + normal + --max_alt_alleles_in_normal_count MAX_ALT_ALLELES_IN_NORMAL_COUNT + threshold for maximum alternate allele counts in + normal + --max_alt_alleles_in_normal_qscore_sum MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM + threshold for maximum alternate allele quality score + sum in normal + --min_qscore MIN_QSCORE + threshold for minimum base quality score + --minimum_mutation_cell_fraction MINIMUM_MUTATION_CELL_FRACTION + minimum fraction of cells which are presumed to have a + mutation, used to handle non-clonality and + contamination + --minimum_normal_allele_fraction MINIMUM_NORMAL_ALLELE_FRACTION + minimum allele fraction to be considered in normal, + useful for normal sample contaminated with tumor + --monitorThreadEfficiency + Enable GATK threading efficiency monitoring + --mutect MUTECT + --nonDeterministicRandomSeed + Makes the GATK behave non deterministically, that is, + the random numbers generated will be different in + every run + --noop used for debugging, basically exit as soon as we get + the reads + --normal_depth_file NORMAL_DEPTH_FILE + write out normal read depth in WIGGLE format to this + file + --normal_lod NORMAL_LOD + LOD threshold for calling normal non-germline + --normal_sample_name NORMAL_SAMPLE_NAME + name to use for normal in output files + --num_bam_file_handles NUM_BAM_FILE_HANDLES + The total number of BAM file handles to keep open + simultaneously + --num_cpu_threads_per_data_thread NUM_CPU_THREADS_PER_DATA_THREAD + How many CPU threads should be allocated per data + thread to running this analysis? + --num_threads NUM_THREADS + How many data threads should be allocated to running + this analysis. + --only_passing_calls only emit passing calls + --pedigree PEDIGREE Pedigree files for samples + --pedigreeString PEDIGREESTRING + Pedigree string for samples + --pedigreeValidationType PEDIGREEVALIDATIONTYPE + How strict should we be in validating the pedigree + information? (STRICT|SILENT) + --performanceLog PERFORMANCELOG + If provided, a GATK runtime performance log will be + written to this file + --phone_home PHONE_HOME + What kind of GATK run report should we generate? + STANDARD is the default, can be NO_ET so nothing is + posted to the run repository. Please see -phone-home- + and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + --pir_mad_threshold PIR_MAD_THRESHOLD + threshold for clustered read position artifact MAD + --pir_median_threshold PIR_MEDIAN_THRESHOLD + threshold for clustered read position artifact median + --power_constant_af POWER_CONSTANT_AF + Allelic fraction constant to use in power calculations + --power_constant_qscore POWER_CONSTANT_QSCORE + Phred scale quality score constant to use in power + calculations + --power_file POWER_FILE + write out power in WIGGLE format to this file + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + Bases with quality scores less than this threshold + wont be recalibrated (with -BQSR) + --read_buffer_size READ_BUFFER_SIZE + Number of reads per SAM file to buffer in memory + --read_filter READ_FILTER + Specify filtration criteria to apply to each read + individually + --read_group_black_list READ_GROUP_BLACK_LIST + Filters out read groups matching - or a + .txt file containing the filter strings one per line. + --reference_sequence REFERENCE_SEQUENCE + --remove_program_records + Should we override the Walkers default and remove + program records from the SAM header + --required_maximum_alt_allele_mapping_quality_score + required minimum value for + + tumor alt allele maximum mapping quality score + --somatic_classification_normal_power_threshold + Power threshold for normal to + + determine germline vs variant + --tag TAG Arbitrary tag string to identify this GATK run as part + of a group of runs, for later analysis + --tumor_depth_file TUMOR_DEPTH_FILE + write out tumor read depth in WIGGLE format to this + file + --tumor_f_pretest TUMOR_F_PRETEST + for computational efficiency, reject sites with + allelic fraction below this threshold + --tumor_lod TUMOR_LOD + LOD threshold for calling tumor variant + --tumor_sample_name TUMOR_SAMPLE_NAME + name to use for tumor in output files + --unsafe UNSAFE If set, enables unsafe operations - nothing will be + checked at runtime. For expert users only who know + what they are doing. We do not support usage of this + argument. (ALLOW_UNINDEXED_BAM| + ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| + ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + --useOriginalQualities + If set, use the original base quality scores from the + OQ tag when present instead of the standard scores + --validation_strictness VALIDATION_STRICTNESS + How strict should we be with validation + (STRICT|LENIENT|SILENT) + --vcf VCF VCF output of mutation candidates +``` + diff --git a/docs/octopus/README.md b/docs/octopus/README.md new file mode 100644 index 00000000..c19f208c --- /dev/null +++ b/docs/octopus/README.md @@ -0,0 +1,2 @@ +# Octopus + diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md new file mode 100644 index 00000000..2967f783 --- /dev/null +++ b/docs/octopus/octopus_0.7.4.md @@ -0,0 +1,74 @@ +## CWL and Docker for Running Octopus + +## Version of tools in [docker image](https://hub.docker.com/r/dancooke/octopus/tags) + +| Tool | Version | Location | +| ------- | ------- | ---------------------------------------------------------- | +| Octopus | v0.7.4 | https://github.com/luntergroup/octopus/releases/tag/v0.7.4 | + +### CWL + +CWL specification 1.0 +Use example_input.yaml to see the inputs to the cwl +Example Command using [toil](https://toil.readthedocs.io/): +`toil-cwl-runner octopus_0-7-4.cwl example_input.yaml` + +If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing type==X86_64 && you can use the following command + +### Using CWLTOOL + +``` +cwltool --singularity --non-strict /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml +``` + +### Using toil-cwl-runner + +```shell +mkdir octopus_toil_log +toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log --jobStore /path/to/octopus_jobStore --batchSystem lsf --workDir /path/to/octopus_toil_log --outdir . --writeLogs /path/to/octopus_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml > octopus_toil.stdout 2> octopus_toil.stderr & +``` + +### Usage + +```shell +usage: octopus_0-7-4.cwl [-h] --input INPUT [--normalId NORMALID] + [--tumorOnlySample] [--somaticOnlyCalls] + [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY] + [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY] + [--targettedCalling_file TARGETTEDCALLING_FILE] + [--skipRegions_file SKIPREGIONS_FILE] + [--error_models ERROR_MODELS] --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Tumor and normal bam files with .bai + --normalId NORMALID add the name of the normal sample + --tumorOnlySample mention this parameter if it is tumor only sample. + --somaticOnlyCalls if somatics only call is required. Use this with -f ON + parameter + --targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY + list of regions to call variants from. eg 1. chr1: all + of chr1. 2. chr2:10,000,000: the single position + 10000000 in chr2. chr3:5,000,000-: everything from 3. + chr3:5,000,000 onwards. 4. + chr4:100,000,000-200,000,000: everything between + chr4:100,000,000 and chr4:200,000,000. The interval is + half open so position chr4:200,000,000 is not + included. + --skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY + to skip a set of regions + --targettedCalling_file TARGETTEDCALLING_FILE + regions in a text or bed file + --skipRegions_file SKIPREGIONS_FILE + regions in text or bed file format + --error_models ERROR_MODELS + error model will be in the format - [library + preparation]<.sequencer> eg: PCR.NOVASEQ + --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME +``` diff --git a/docs/picard-tools/README.md b/docs/picard-tools/README.md new file mode 100644 index 00000000..094001ac --- /dev/null +++ b/docs/picard-tools/README.md @@ -0,0 +1,2 @@ +# Picard Tools + diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md similarity index 78% rename from picard_add_or_replace_read_groups_1.96/README.md rename to docs/picard-tools/picard_add_or_replace_read_groups_1.96.md index b07355a4..e5a70249 100644 --- a/picard_add_or_replace_read_groups_1.96/README.md +++ b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups +# AddOrReplaceReadGroups v1.96 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md new file mode 100644 index 00000000..70eb1246 --- /dev/null +++ b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md @@ -0,0 +1,90 @@ +# AddOrReplaceReadGroups v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl --help +usage: picard_add_or_replace_read_groups_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md new file mode 100644 index 00000000..707df7f2 --- /dev/null +++ b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md @@ -0,0 +1,95 @@ +# AddOrReplaceReadGroups v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl --help +usage: picard_add_or_replace_read_groups_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file ( sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequencing_center READ_GROUP_SEQUENCING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md new file mode 100644 index 00000000..b88b626f --- /dev/null +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md @@ -0,0 +1,78 @@ +# CollectAlignmentSummaryMetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collect_alignment_summary_metrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md similarity index 85% rename from picard_collect_alignment_summary_metrics_2.8.1/README.md rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md index 16d0b11b..1a318d60 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/README.md +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md @@ -1,19 +1,18 @@ -# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics +# CollectAlignmentSummaryMetrics v2.8.1 -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml @@ -78,3 +77,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md new file mode 100644 index 00000000..14b0eaee --- /dev/null +++ b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md @@ -0,0 +1,78 @@ +# CollectMultipleMetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md new file mode 100644 index 00000000..259b84d5 --- /dev/null +++ b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md @@ -0,0 +1,80 @@ +# CollectMultipleMetrics v2.8.1 + +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2-8-1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/picard_fix_mate_information_1.96/README.md b/docs/picard-tools/picard_fix_mate_information_1.96.md similarity index 71% rename from picard_fix_mate_information_1.96/README.md rename to docs/picard-tools/picard_fix_mate_information_1.96.md index 567a78e3..22a9cd50 100644 --- a/picard_fix_mate_information_1.96/README.md +++ b/docs/picard-tools/picard_fix_mate_information_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - FixMateInformation +# FixMateInformation v1.96 -## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) +## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -33,7 +33,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_1.96.cwl [-h] positional arguments: @@ -72,4 +72,5 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} -``` \ No newline at end of file +``` + diff --git a/docs/picard-tools/picard_fix_mate_information_2.21.2.md b/docs/picard-tools/picard_fix_mate_information_2.21.2.md new file mode 100644 index 00000000..659bf4b1 --- /dev/null +++ b/docs/picard-tools/picard_fix_mate_information_2.21.2.md @@ -0,0 +1,72 @@ +# FixMateInformation v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +```text +usage: picard_fix_mate_information_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md new file mode 100644 index 00000000..7039d4d4 --- /dev/null +++ b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md @@ -0,0 +1,77 @@ +# FixMateInformation v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +```text +usage: picard_fix_mate_information_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_hsmetrics_2.21.2.md b/docs/picard-tools/picard_hsmetrics_2.21.2.md new file mode 100644 index 00000000..8c43151b --- /dev/null +++ b/docs/picard-tools/picard_hsmetrics_2.21.2.md @@ -0,0 +1,86 @@ +# HSmetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.21.2.cwl [-h] + +optional arguments: + -h, --help show this help message and exit + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. Default value: null. This option must + be specified at least 1 times. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. This option can be set + to 'null' to clear the default value. Possible values: + {true, false} + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. Default value: null. This option must be + specified at least 1 times. + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --metric_accumulation_level METRIC_ACCUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. This option can be set to 'null' to clear + the default value. + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. This option can be set to 'null' to clear + the default value. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. This + option can be set to 'null' to clear the default + value. +``` + diff --git a/docs/picard-tools/picard_hsmetrics_2.8.1.md b/docs/picard-tools/picard_hsmetrics_2.8.1.md new file mode 100644 index 00000000..627d3286 --- /dev/null +++ b/docs/picard-tools/picard_hsmetrics_2.8.1.md @@ -0,0 +1,26 @@ +# HSmetrics v2.8.1 + +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.8.1.cwl [-h] +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_1.96.md b/docs/picard-tools/picard_mark_duplicates_1.96.md new file mode 100644 index 00000000..dd13d5ed --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_1.96.md @@ -0,0 +1,20 @@ +# MarkDuplicates v1.96 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_2.21.2.md b/docs/picard-tools/picard_mark_duplicates_2.21.2.md new file mode 100644 index 00000000..f978a4ba --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_2.21.2.md @@ -0,0 +1,77 @@ +# MarkDuplicates v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_2.8.1.md b/docs/picard-tools/picard_mark_duplicates_2.8.1.md new file mode 100644 index 00000000..cfb0fc92 --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_2.8.1.md @@ -0,0 +1,20 @@ +# MarkDuplicates v2.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md new file mode 100644 index 00000000..fe6c11f1 --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md @@ -0,0 +1,115 @@ +# MarkDuplicates v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_4.1.8.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --read_name_regex READ_NAME_REGEX + MarkDuplicates can use the tile and cluster positions + to estimate the rate of optical duplication in + addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. + By default (with no READ_NAME_REGEX specified), + MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set + READ_NAME_REGEX to 'null' to disable optical duplicate + detection. Note that without optical duplicate counts, + library size estimation will be less accurate. If the + read name does not follow a standard Illumina colon- + separation convention, but does contain tile and x,y + coordinates, a regular expression can be specified to + extract three variables: tile/region, x coordinate and + y coordinate from a read name. The regular expression + must contain three capture groups for the three + variables, in order. It must match the entire read + name. e.g. if field names were separated by semi-colon + (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ + Note that if no READ_NAME_REGEX is specified, the read + name is split on ':'. For 5 element names, the 3rd, + 4th and 5th elements are assumed to be tile, x and y + values. For 7 element names (CASAVA 1.8), the 5th, + 6th, and 7th elements are assumed to be tile, x and y + values. + --sorting_collection_size_ratio SORTING_COLLECTION_SIZE_RATIO + This number, plus the maximum RAM available to the + JVM, determine the memory footprint used by some of + the sorting collections. If you are running out of + memory, try reducing this number. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` + diff --git a/docs/trim-galore/README.md b/docs/trim-galore/README.md new file mode 100644 index 00000000..2f5f0270 --- /dev/null +++ b/docs/trim-galore/README.md @@ -0,0 +1,2 @@ +# Trim Galore + diff --git a/trim_galore_0.6.2/README.md b/docs/trim-galore/trim_galore_0.6.2.md similarity index 83% rename from trim_galore_0.6.2/README.md rename to docs/trim-galore/trim_galore_0.6.2.md index 3727924e..4ef02a4d 100644 --- a/trim_galore_0.6.2/README.md +++ b/docs/trim-galore/trim_galore_0.6.2.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running Trim Galore +# v0.6.2 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | -| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ | -| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip | -| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz | +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | +| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) | +| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) | +| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -32,7 +32,7 @@ ### Usage -``` +```text usage: trim_galore_0.6.2.cwl [-h] positional arguments: @@ -92,4 +92,5 @@ optional arguments: --error_rate ERROR_RATE Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: 0.1) -``` \ No newline at end of file +``` + diff --git a/docs/ubuntu-utilites/README.md b/docs/ubuntu-utilites/README.md new file mode 100644 index 00000000..5d69693e --- /dev/null +++ b/docs/ubuntu-utilites/README.md @@ -0,0 +1,2 @@ +# Ubuntu utilites + diff --git a/docs/ubuntu-utilites/utilities_ubuntu_18.04.md b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md new file mode 100644 index 00000000..ba695d67 --- /dev/null +++ b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md @@ -0,0 +1,26 @@ +# v18.04 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | + +## Available tools + +| Tool | Description | +| :--- | :--- | +| sort.cwl | sort lines of text files | +| gzip.cwl | compress or expand files | +| mv.cwl | move \(rename\) files | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs\_toolname.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml +``` + diff --git a/docs/vardictjava/README.md b/docs/vardictjava/README.md new file mode 100644 index 00000000..5cf8e57f --- /dev/null +++ b/docs/vardictjava/README.md @@ -0,0 +1,3 @@ +# VardictJava + + diff --git a/docs/vardictjava/vardictjava_1.8.2.md b/docs/vardictjava/vardictjava_1.8.2.md new file mode 100644 index 00000000..3d9ae093 --- /dev/null +++ b/docs/vardictjava/vardictjava_1.8.2.md @@ -0,0 +1,69 @@ +# Vardict v1.8.3 - Single sample mode +To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here: +https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode + + + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image (alpine) | 3.8 | - | +| vardict | 1.8.2 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | +| perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) | +| r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vardict_workflow_single_sample.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml + +#Using Toil-cwl-runner +toil-cwl-runner --singularity vardict_workflow_single_sample.cwl example_inputs.yaml +``` + +### Usage + +```bash +> toil-cwl-runner vardict_workflow_single_sample.cwl --help +usage: vardict_workflow_single_sample.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + +-h, --help show this help message and exit +-E E + The column for the region end, e.g. gene end +-G G + The reference fasta. Should be indexed (.fai). +-N N + The sample name to be used directly. +-S S + The column for the region start, e.g. gene start +-b B + The indexed BAM file. +-c C + The column for chromosome +-f F + The threshold for allele frequency, default: 0.01 or 1% +--f_1 F_1 + The minimum allele frequency. Used for var2vcf_valid.pl +-g G + The column for a gene name, or segment annotation +--vcf VCF + vcf file name given to var2vcf_valid.pl +--bedfile BEDFILE +``` diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md new file mode 100644 index 00000000..e349a6cd --- /dev/null +++ b/docs/vcf2maf/README.md @@ -0,0 +1,3 @@ +# VCF2MAF + + diff --git a/docs/vcf2maf/vcf2maf_1.6.21.md b/docs/vcf2maf/vcf2maf_1.6.21.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/docs/vcf2maf/vcf2maf_1.6.21.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/docs/waltz/README.md b/docs/waltz/README.md new file mode 100644 index 00000000..0d43eaff --- /dev/null +++ b/docs/waltz/README.md @@ -0,0 +1,2 @@ +# Waltz + diff --git a/waltz_count_reads_3.1.1/README.md b/docs/waltz/waltz_count_reads_3.1.1.md similarity index 70% rename from waltz_count_reads_3.1.1/README.md rename to docs/waltz/waltz_count_reads_3.1.1.md index 8ad83443..e3131e9f 100644 --- a/waltz_count_reads_3.1.1/README.md +++ b/docs/waltz/waltz_count_reads_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - Count Reads +# CountReads v3.1.1 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -55,3 +56,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/waltz_pileupmatrices_3.1.1/README.md b/docs/waltz/waltz_pileupmatrices_3.1.1.md similarity index 70% rename from waltz_pileupmatrices_3.1.1/README.md rename to docs/waltz/waltz_pileupmatrices_3.1.1.md index b5aed666..cc432392 100644 --- a/waltz_pileupmatrices_3.1.1/README.md +++ b/docs/waltz/waltz_pileupmatrices_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - PileupMetrics +# PileupMetrics v3.1.1 -## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) +## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -56,3 +57,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/expression_tools/README.md b/expression_tools/README.md new file mode 100644 index 00000000..f0c79bf1 --- /dev/null +++ b/expression_tools/README.md @@ -0,0 +1,17 @@ +# CWL Expression tools + +## Available tools + +| Tool | Description | +| -------- | ------------------------ | +| put_in_dir.cwl | put the list of files into the same directory | + +## CWL + +- CWL specification 1.0 +- Use example_inputs_toolname.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner put_in_dir.cwl example_inputs_gzip.yaml +``` diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl new file mode 100644 index 00000000..382b64e5 --- /dev/null +++ b/expression_tools/put_in_dir.cwl @@ -0,0 +1,103 @@ +#!/usr/bin/env cwl-runner +# originally from https://github.com/mskcc/pluto-cwl + +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +class: ExpressionTool +id: put-in-dir + +inputs: + output_directory_name: + type: string + doc: >- + Put all `files` in a directory called `output_directory_name`. + output_subdirectory_name: + type: string? + doc: >- + If specified, nest all `files` within a directory called `output_subdirectory_name`, which itself is within `output_directory_name`. + files: + type: + type: array + items: + - File + - type: array + items: + - File + - Directory + - 'null' + +outputs: + directory: + type: Directory + +# This tool returns a Directory object, +# which holds all output files from the list +# of supplied input files +expression: | + ${ + var output_files = []; + var input_files = inputs.files.filter(function(single_file) { + return String(single_file).toUpperCase() != 'NONE'; + }); + + for (var i = 0; i < input_files.length; i++) { + // Handle list of list of files + if (input_files[i] && input_files[i].length) { + for (var ii = 0; ii < input_files[i].length; ii++) { + output_files.push(input_files[i][ii]); + } + // Handle list of files + } else if (input_files[i]) { + output_files.push(input_files[i]); + } + } + + if (inputs.output_subdirectory_name) { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': [ + { + 'class': 'Directory', + 'basename': inputs.output_subdirectory_name, + 'listing': output_files + } + ] + } + }; + } else { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': output_files + } + }; + } + + } + +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md new file mode 100644 index 00000000..10e9866a --- /dev/null +++ b/fastp_0.20.1/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Fastp + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| fastp | 0.20.1 | quay.io/biocontainers/fastp:0.20.1--h8b12597_0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner ./fastp_0.20.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool ./fastp_0.20.1.cwl example_inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/fastp-0_20_1/fastp-0_20_1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage +``` +usage: fastp_0.20.1.cwl [-h] --read1_input READ1_INPUT --read1_output_path + READ1_OUTPUT_PATH [--read2_input READ2_INPUT] + [--read2_output_path READ2_OUTPUT_PATH] + [--unpaired1_path UNPAIRED1_PATH] + [--unpaired2_path UNPAIRED2_PATH] + [--failed_reads_path FAILED_READS_PATH] + [--read1_adapter_sequence READ1_ADAPTER_SEQUENCE] + [--read2_adapter_sequence READ2_ADAPTER_SEQUENCE] + [--minimum_read_length MINIMUM_READ_LENGTH] + --json_output_path JSON_OUTPUT_PATH --html_output_path + HTML_OUTPUT_PATH + [job_order] + +Setup and execute Fastp + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --read1_input READ1_INPUT + read1 input file name + --read1_output_path READ1_OUTPUT_PATH + read1 output file name + --read2_input READ2_INPUT + read2 input file name, for PE data + --read2_output_path READ2_OUTPUT_PATH + read2 output file name + --unpaired1_path UNPAIRED1_PATH + for PE input, if read1 passed QC but read2 not, it + will be written to unpaired1. + --unpaired2_path UNPAIRED2_PATH + for PE input, if read2 passed QC but read1 not, it + will be written to unpaired2. + --failed_reads_path FAILED_READS_PATH + specify the file to store reads that cannot pass the + filters. + --read1_adapter_sequence READ1_ADAPTER_SEQUENCE + the adapter for read1. For SE data, if not specified, + the adapter will be auto-detected. For PE data, this + is used if R1/R2 are found not overlapped. + --read2_adapter_sequence READ2_ADAPTER_SEQUENCE + the adapter for read2. For PE data, this is used if + R1/R2 are found not overlapped. + --minimum_read_length MINIMUM_READ_LENGTH + reads shorter than length_required will be discarded, + default is 15. + --json_output_path JSON_OUTPUT_PATH + the json format report file name + --html_output_path HTML_OUTPUT_PATH + the html format report file name +``` diff --git a/fastp_0.20.1/example_inputs.yaml b/fastp_0.20.1/example_inputs.yaml new file mode 100644 index 00000000..5d3af3bf --- /dev/null +++ b/fastp_0.20.1/example_inputs.yaml @@ -0,0 +1,13 @@ +read1_input: + class: File + path: "./test_data/R1.fq" +read2_input: + class: File + path: "./test_data/R2.fq" +read1_output_path: "./R1.output" +read2_output_path: "./R2.output" +read1_adapter_sequence: "GATCGGAAGAGC" +read2_adapter_sequence: "AGATCGGAAGAGC" +minimum_read_length: 25 +json_output_path: "sample_name.json" +html_output_path: "sample_name.html" diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl new file mode 100644 index 00000000..f6356a6a --- /dev/null +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -0,0 +1,225 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fastp_0_20_1 +baseCommand: + - fastp +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + doc: 'worker thread number, default is 2 (int [=2])' + - id: read1_input + type: File + inputBinding: + position: 0 + prefix: '--in1' + doc: | + read1 input file name + - id: read1_output_path + type: string + inputBinding: + position: 0 + prefix: '--out1' + doc: | + read1 output file name + - id: read2_input + type: File? + inputBinding: + position: 0 + prefix: '--in2' + doc: | + read2 input file name, for PE data + - id: read2_output_path + type: string? + inputBinding: + position: 0 + prefix: '--out2' + doc: | + read2 output file name + - id: unpaired1_path + type: string? + inputBinding: + position: 0 + prefix: '--unpaired1' + doc: > + for PE input, if read1 passed QC but read2 not, it will be written to + unpaired1. + - id: unpaired2_path + type: string? + inputBinding: + position: 0 + prefix: '--unpaired2' + doc: > + for PE input, if read2 passed QC but read1 not, it will be written to + unpaired2. + - id: failed_reads_path + type: string? + inputBinding: + position: 0 + prefix: '--failed_out' + doc: | + specify the file to store reads that cannot pass the filters. + - id: read1_adapter_sequence + type: string? + inputBinding: + position: 0 + prefix: '--adapter_sequence' + doc: > + the adapter for read1. For SE data, if not specified, the adapter will be + auto-detected. For PE data, this is used if R1/R2 are found not + overlapped. + - id: read2_adapter_sequence + type: string? + inputBinding: + position: 0 + prefix: '--adapter_sequence_r2' + doc: > + the adapter for read2. For PE data, this is used if R1/R2 are found not + overlapped. + - id: minimum_read_length + type: int? + inputBinding: + position: 0 + prefix: '--length_required' + doc: | + reads shorter than length_required will be discarded, default is 15. + - id: maximum_read_length + type: int? + inputBinding: + position: 0 + prefix: '--length_limit' + doc: > + reads longer than length_limit will be discarded, default 0 means no + limitation. + - id: max_len_read1 + type: int? + inputBinding: + position: 0 + prefix: '--max_len1' + doc: >- + if read1 is longer than max_len1, then trim read1 at its tail to make it + as long as max_len1. Default 0 means no limitation + - id: max_len_read2 + type: int? + inputBinding: + position: 0 + prefix: '--max_len2' + doc: >- + if read2 is longer than max_len2, then trim read2 at its tail to make it + as long as max_len2. Default 0 means no limitation. If it's not specified, + it will follow read1's settings + - default: fastp.json + id: json_output_path + type: string + inputBinding: + position: 0 + prefix: '--json' + doc: | + the json format report file name + - default: fastp.html + id: html_output_path + type: string + inputBinding: + position: 0 + prefix: '--html' + doc: | + the html format report file name + - id: disable_quality_filtering + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_quality_filtering' + doc: >- + quality filtering is enabled by default. If this option is specified, + quality filtering is disabled + - id: disable_trim_poly_g + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_trim_poly_g' + doc: >- + disable polyG tail trimming, by default trimming is automatically enabled + for Illumina NextSeq/NovaSeq data + - id: verbose + type: File? + inputBinding: + position: 0 + prefix: '--verbose' + doc: output verbose log information (i.e. when every 1M reads are processed) +outputs: + - id: fastp_json_output + type: File + outputBinding: + glob: $(inputs.json_output_path) + - id: fastp_html_output + type: File + outputBinding: + glob: $(inputs.html_output_path) + - id: fastp_read1_output + type: File + outputBinding: + glob: $(inputs.read1_output_path) + - id: fastp_read2_output + type: File? + outputBinding: + glob: $(inputs.read2_output_path) + - id: fastp_unpaired1_output + type: File? + outputBinding: + glob: $(inputs.unpaired1_path) + - id: fastp_unpaired2_output + type: File? + outputBinding: + glob: $(inputs.unpaired2_path) +doc: Setup and execute Fastp +label: fastp_0.20.1 +arguments: + - position: 0 + prefix: '--thread' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fastp:0.20.1--h8b12597_0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:fraihaa@mskcc.org' + 'foaf:name': Adrian Fraiha + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fastp + 'doap:revision': 0.20.1 diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..59eaa165 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +error_rate_post_umi: null +error_rate_pre_umi: null +input: /path/to/bam_file +max_reads_per_strand: null +memory_overhead: null +memory_per_job: null +min_input_base_quality: null +min_reads: + - 1 + - 1 + - 0 +number_of_threads: null +output_file_name: null +read_group_id: null +read_name_prefix: null +sort_order: null +trim: null diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..96211f57 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -0,0 +1,226 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_call_duplex_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: read_name_prefix + type: string? + inputBinding: + position: 2 + prefix: '--read-name-prefix' + doc: The prefix all consensus read names + - id: read_group_id + type: string? + inputBinding: + position: 2 + prefix: '--read-group-id' + doc: The new read group ID for all the consensus reads. + - id: error_rate_pre_umi + type: int? + inputBinding: + position: 2 + prefix: '--error-rate-pre-umi' + doc: >- + The Phred-scaled error rate for an error prior to the UMIs being + integrated. + - id: error_rate_post_umi + type: int? + inputBinding: + position: 2 + prefix: '--error-rate-post-umi' + doc: >- + The Phred-scaled error rate for an error post the UMIs have been + integrated. + - id: min_input_base_quality + type: int? + inputBinding: + position: 2 + prefix: '--min-input-base-quality' + doc: Ignore bases in raw reads that have Q below this value. + - id: trim + type: boolean? + inputBinding: + position: 2 + prefix: '--trim' + doc: 'If true, quality trim input reads in addition to masking low Q bases' + - id: sort_order + type: string? + inputBinding: + position: 2 + prefix: '--sort-order' + doc: 'The sort order of the output, if :none: then the same as the input.' + - id: min_reads + type: 'int[]' + inputBinding: + position: 2 + prefix: '--min-reads' + itemSeparator: ' ' + shellQuote: false + doc: The minimum number of input reads to a consensus read. + - id: max_reads_per_strand + type: int? + inputBinding: + position: 2 + prefix: '--max-reads-per-strand' + doc: >- + The maximum number of reads to use when building a single-strand + consensus. If more than this many reads are present in a tag family, the + family is randomly downsampled to exactly max-reads reads. + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_call_duplex_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } +doc: >- + Calls duplex consensus sequences from reads generated from the same + double-stranded source molecule. Prior to running this tool, read must have + been grouped with GroupReadsByUmi using the paired strategy. Doing so will + apply (by default) MI tags to all reads of the form */A and */B where the /A + and /B suffixes with the same identifier denote reads that are derived from + opposite strands of the same source duplex molecule. + + + Reads from the same unique molecule are first partitioned by source strand and + assembled into single strand consensus molecules as described by + CallMolecularConsensusReads. Subsequently, for molecules that have at least + one observation of each strand, duplex consensus reads are assembled by + combining the evidence from the two single strand consensus reads. + + + Because of the nature of duplex sequencing, this tool does not support + fragment reads - if found in the input they are ignored. Similarly, read pairs + for which consensus reads cannot be generated for one or other read (R1 or R2) + are omitted from the output. + + + Consensus reads have a number of additional optional tags set in the resulting + BAM file. The tag names follow a pattern where the first letter (a, b or c) + denotes that the tag applies to the first single strand consensus (a), second + single-strand consensus (b) or the final duplex consensus (c). The second + letter is intended to capture the meaning of the tag (e.g. d=depth, m=min + depth, e=errors/error-rate) and is upper case for values that are one per read + and lower case for values that are one per base. +label: fgbio_call_duplex_consensus_reads_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx10G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: CallDuplexConsensusReads + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } + - position: 2 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 20000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CallDuplexConsensusReads + 'doap:revision': 1.2.0 diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..38dd911b --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml @@ -0,0 +1,16 @@ +input: + class: File + metadata: {} + path: /path/to/bam +output_prefix: prefix +intervals: + class: File? + metadata: {} + path: /path/to/intervals +description: null +duplex_umi_counts: null +min_ab_reads: null +min_ba_reads: null +number_of_threads: null +umi_tag: null +mi_tag: null diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl new file mode 100644 index 00000000..28697020 --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -0,0 +1,294 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_collect_duplex_seq_metrics_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + doc: Input BAM file generated by GroupReadByUmi. + - id: output_prefix + type: string? + doc: Prefix of output files to write. + - id: intervals + type: File? + inputBinding: + position: 2 + prefix: '--intervals' + doc: 'Optional set of intervals over which to restrict analysis. [Optional].' + - id: description + type: string? + inputBinding: + position: 2 + prefix: '--description' + doc: >- + Description of data set used to label plots. Defaults to sample/library. + [Optional]. + - id: duplex_umi_counts + type: boolean? + inputBinding: + position: 2 + prefix: '--duplex-umi-counts' + doc: >- + If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI + observations. [Optional]. + - id: min_ab_reads + type: int? + inputBinding: + position: 2 + prefix: '--min-ab-reads' + doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].' + - id: min_ba_reads + type: int? + inputBinding: + position: 2 + prefix: '--min-ba-reads' + doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].' + - id: umi_tag + type: string? + inputBinding: + position: 2 + prefix: '--umi-tag' + doc: 'The tag containing the raw UMI. [Optional].' + - id: mi_tag + type: string? + inputBinding: + position: 2 + prefix: '--mi-tag' + doc: 'The output tag for UMI grouping. [Optional].' + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_collect_duplex_seq_metrics_family_size + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.family_sizes.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_yield_metrics.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_umi_counts + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.umi_counts.txt' + } + else{ + return inputs.input.basename.replace('.bam','.umi_counts.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_qc + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_qc.pdf' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_qc.pdf') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts + type: File? + outputBinding: + glob: |- + ${ + if (inputs.output_prefix) { + return inputs.output_prefix + '.duplex_umi_counts.txt' + } else { + return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt') + } + } +doc: >- + Collects a suite of metrics to QC duplex sequencing data. + + Inputs ------ + + The input to this tool must be a BAM file that is either: + + 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it + was produced in) 2. A BAM file that has MI tags present on all reads (usually + set by 'GroupReadsByUmi' and has been sorted with + 'SortBam' into 'TemplateCoordinate' order. + + Calculation of metrics may be restricted to a set of regions using the + '--intervals' parameter. This can significantly affect results as off-target + reads in duplex sequencing experiments often have very different properties + than on-target reads due to the lack of enrichment. + + Several metrics are calculated related to the fraction of tag families that + have duplex coverage. The definition of "duplex" is controlled by the + '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any + tag family with at least one observation of each strand as a duplex, but this + could be made more stringent, e.g. by setting '--min-ab-reads=3 + --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must + be the higher value. + + Outputs ------- + + The following output files are produced: + + 1. .family_sizes.txt: metrics on the frequency of different types of + families of different sizes 2. .duplex_family_sizes.txt: metrics on + the frequency of duplex tag families by the number of observations + from each strand + 3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, + 10%, 15%...100% of the data 4. .umi_counts.txt: metrics on the + frequency of observations of UMIs within reads and tag families 5. + .duplex_qc.pdf: a series of plots generated from the preceding metrics + files for visualization 6. .duplex_umi_counts.txt: (optional) metrics + on the frequency of observations of duplex UMIs within reads + and tag families. This file is only produced if the '--duplex-umi-counts' option is used as it requires significantly + more memory to track all pairs of UMIs seen when a large number of UMI sequences are present. + + Within the metrics files the prefixes 'CS', 'SS' and 'DS' are used to mean: + + * CS: tag families where membership is defined solely on matching genome + coordinates and strand * SS: single-stranded tag families where membership is + defined by genome coordinates, strand and UMI; ie. 50/A and + 50/B are considered different tag families. + * DS: double-stranded tag families where membership is collapsed across + single-stranded tag families from the same + double-stranded source molecule; i.e. 50/A and 50/B become one family + + Requirements ------------ + + For plots to be generated R must be installed and the ggplot2 package + installed with suggested dependencies. Successfully executing the following in + R will ensure a working installation: + + install.packages("ggplot2", repos="http://cran.us.r-project.org", + dependencies=TRUE) +label: fgbio_collect_duplex_seq_metrics_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: CollectDuplexSeqMetrics + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + } + else{ + return inputs.input.basename.replace(/.bam/,'') + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CollectDuplexSeqMetrics + 'doap:revision': 1.2.0 diff --git a/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..f30727a2 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml @@ -0,0 +1,27 @@ +comment: null +description: null +input: + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R1_001.fastq.gz + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R2_001.fastq.gz +library: test +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +platform: Illumina +platform-model: novaseq +platform-unit: . +predicted-insert-size: null +read-group-id: test +read-structures: + - 3M2S+T + - 3M2S+T +run-date: null +sample: test +sequencing-center: mskcc +sort: null +umi-tag: null diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl new file mode 100644 index 00000000..638e8449 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -0,0 +1,255 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_fastq_to_bam_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: 'File[]' + inputBinding: + position: 2 + prefix: '--input' + itemSeparator: ' ' + shellQuote: false + label: PathToFastq + doc: 'Fastq files corresponding to each sequencing read (e.g. R1, I1, etc.).' + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: read-structures + type: 'string[]?' + inputBinding: + position: 2 + prefix: '--read-structures' + itemSeparator: ' ' + shellQuote: false + doc: >- + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures + - id: sort + type: boolean? + inputBinding: + position: 2 + prefix: '--sort' + shellQuote: false + doc: 'If true, queryname sort the BAM file, otherwise preserve input order.' + - id: umi-tag + type: string? + inputBinding: + position: 2 + prefix: '--umi-tag' + shellQuote: false + doc: Tag in which to store molecular barcodes/UMIs + - id: read-group-id + type: string? + inputBinding: + position: 2 + prefix: '--read-group-id' + shellQuote: false + doc: Read group ID to use in the file header. + - id: sample + type: string? + inputBinding: + position: 2 + prefix: '--sample' + shellQuote: false + doc: The name of the sequenced sample. + - id: library + type: string? + inputBinding: + position: 2 + prefix: '--library' + shellQuote: false + doc: The name/ID of the sequenced library. + - id: platform + type: string? + inputBinding: + position: 2 + prefix: '--platform' + shellQuote: false + doc: Sequencing Platform + - id: platform-unit + type: string? + inputBinding: + position: 2 + prefix: '--platform-unit' + shellQuote: false + doc: Platform unit (e.g. ‘..') + - id: platform-model + type: string? + inputBinding: + position: 2 + prefix: '--platform-model' + shellQuote: false + doc: >- + Platform model to insert into the group header (ex. miseq, hiseq2500, + hiseqX) + - id: sequencing-center + type: string? + inputBinding: + position: 2 + prefix: '--sequencing-center' + shellQuote: false + doc: The sequencing center from which the data originated + - id: predicted-insert-size + type: int? + inputBinding: + position: 2 + prefix: '--predicted-insert-size' + shellQuote: false + doc: 'Predicted median insert size, to insert into the read group header' + - id: description + type: string? + inputBinding: + position: 2 + prefix: '--description' + doc: Description of the read group. + - id: comment + type: string? + inputBinding: + position: 2 + prefix: '--comment' + doc: Comment(s) to include in the output file’s header + - id: run-date + type: string? + inputBinding: + position: 2 + prefix: '--run-date' + shellQuote: false + doc: 'Date the run was produced, to insert into the read group header' + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_fastq_to_bam_ubam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +doc: >- + Generates an unmapped BAM (or SAM or CRAM) file from fastq files. Takes in one + or more fastq files (optionally gzipped), each representing a different + sequencing read (e.g. R1, R2, I1 or I2) and can use a set of read structures + to allocate bases in those reads to template reads, sample indices, unique + molecular indices, or to designate bases to be skipped over. + + + Read structures are made up of pairs much like the CIGAR + string in BAM files. Four kinds of operators are recognized: + + + 1. T identifies a template read + + 2. B identifies a sample barcode read + + 3. M identifies a unique molecular index read + + 4. S identifies a set of bases that should be skipped or ignored + + + The last pair may be specified using a + sign instead of + number to denote “all remaining bases”. This is useful if, e.g., fastqs have + been trimmed and contain reads of varying length. For example to convert a + paired-end run with an index read and where the first 5 bases of R1 are a UMI + and the second five bases are monotemplate you might specify: +label: fgbio_fastq_to_bam_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: FastqToBam + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio FastqToBam + 'doap:revision': 1.2.0 diff --git a/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..afe8426d --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +input: /path/to/bam_file +max_base_error_rate: null +max_no_call_fraction: null +max_read_error_rate: null +memory_overhead: null +memory_per_job: null +min_base_quality: null +min_mean_base_quality: null +min_reads: + - 2 + - 2 + - 1 +number_of_threads: null +output_file_name: test.bam +reference_fasta: /path/to/reference_fasta +require_single_strand_agreement: true +reverse_per_base_tags: null diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..91687479 --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -0,0 +1,238 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_filter_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: reference_fasta + type: File + inputBinding: + position: 2 + prefix: '--ref' + doc: Reference fasta file. + secondaryFiles: + - .fai + - ^.dict + - id: reverse_per_base_tags + type: boolean? + inputBinding: + position: 2 + prefix: '--reverse-per-base-tags' + doc: 'Reverse [complement] per base tags on reverse strand reads.' + - id: min_reads + type: 'int[]?' + inputBinding: + position: 2 + prefix: '--min-reads' + itemSeparator: ' ' + shellQuote: false + doc: >- + The minimum number of reads supporting a consensus base/read. (Max 3 + values) + - id: max_read_error_rate + type: 'float[]?' + inputBinding: + position: 2 + prefix: '--max-read-error-rate' + itemSeparator: ' ' + doc: >- + The maximum raw-read error rate across the entire consensus read. (Max 3 + values) + - id: max_base_error_rate + type: 'float[]?' + inputBinding: + position: 2 + prefix: '--max-base-error-rate' + itemSeparator: ' ' + doc: The maximum error rate for a single consensus base. (Max 3 values) + - id: min_base_quality + type: int + inputBinding: + position: 2 + prefix: '--min-base-quality' + doc: Mask (make N) consensus bases with quality less than this threshold. + - id: max_no_call_fraction + type: float? + inputBinding: + position: 2 + prefix: '--max-no-call-fraction' + doc: Maximum fraction of no-calls in the read after filtering + - id: min_mean_base_quality + type: int? + inputBinding: + position: 2 + prefix: '--min-mean-base-quality' + doc: The minimum mean base quality across the consensus read + - id: require_single_strand_agreement + type: boolean? + inputBinding: + position: 2 + prefix: '--require-single-strand-agreement' + doc: >- + Mask (make N) consensus bases where the AB and BA consensus reads disagree + (for duplex-sequencing only). + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_filter_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } + secondaryFiles: + - ^.bai +doc: >- + Filters consensus reads generated by CallMolecularConsensusReads or + CallDuplexConsensusReads. Two kinds of filtering are performed: + + + 1. Masking/filtering of individual bases in reads + + 2. Filtering out of reads (i.e. not writing them to the output file) + + + Base-level filtering/masking is only applied if per-base tags are present (see + CallDuplexConsensusReads and CallMolecularConsensusReads for descriptions of + these tags). Read-level filtering is always applied. When filtering reads, + secondary alignments and supplementary records may be removed independently if + they fail one or more filters; if either R1 or R2 primary alignments fail a + filter then all records for the template will be filtered out. + + + The filters applied are as follows: + + + 1. Reads with fewer than min-reads contributing reads are filtered out + + 2. Reads with an average consensus error rate higher than max-read-error-rate + are filtered out + + 3. Reads with mean base quality of the consensus read, prior to any masking, + less than min-mean-base-quality are filtered out (if specified) + + 4. Bases with quality scores below min-base-quality are masked to Ns + + 5. Bases with fewer than min-reads contributing raw reads are masked to Ns + + 6. Bases with a consensus error rate (defined as the fraction of contributing + reads that voted for a different base than the consensus call) higher than + max-base-error-rate are masked to Ns + + 7. For duplex reads, if require-single-strand-agreement is provided, masks to + Ns any bases where the base was observed in both single-strand consensus reads + and the two reads did not agree + + 8. Reads with a proportion of Ns higher than max-no-call-fraction after + per-base filtering are filtered out +label: fgbio_filter_consensus_reads_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: FilterConsensusReads + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio FilterConsensusReads + 'doap:revision': 1.2.0 diff --git a/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..9fe66421 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml @@ -0,0 +1,13 @@ +assign_tag: null +edits: null +family_size_histogram: null +include_non_pf_reads: null +input: /path/to/bam_file +memory_overhead: null +memory_per_job: null +min_map_q: null +min_umi_length: null +number_of_threads: null +output_file_name: null +raw_tag: null +strategy: paired diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl new file mode 100644 index 00000000..b30a4078 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -0,0 +1,257 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_group_reads_by_umi_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input BAM file. + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: family_size_histogram + type: string? + inputBinding: + position: 2 + prefix: '--family-size-histogram' + doc: Optional output of tag family size counts. + - id: raw_tag + type: string? + inputBinding: + position: 2 + prefix: '--raw-tag' + doc: The tag containing the raw UMI. + - id: assign_tag + type: string? + inputBinding: + position: 2 + prefix: '--assign-tag' + doc: The output tag for UMI grouping. + - id: min_map_q + type: int? + inputBinding: + position: 2 + prefix: '--min-map-q' + doc: Minimum mapping quality. + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 2 + prefix: '--include-non-pf-reads' + - id: strategy + type: string + inputBinding: + position: 2 + prefix: '--strategy' + doc: 'The UMI assignment strategy. (identity,edit,adjacency,paired)' + - id: edits + type: int? + inputBinding: + position: 2 + prefix: '--edits' + doc: The allowable number of edits between UMIs. + - id: min_umi_length + type: int? + inputBinding: + position: 2 + prefix: '--min-umi-length' + doc: >- + The minimum UMI length. If not specified then all UMIs must have the same + length, otherwise discard reads with UMIs shorter than this length and + allow for differing UMI lengths. + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_group_reads_by_umi_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } + - id: fgbio_group_reads_by_umi_histogram + type: File? + outputBinding: + glob: |- + ${ + if(inputs.family_size_histogram) + return inputs.family_size_histogram + } +doc: >- + Groups reads together that appear to have come from the same original + molecule. Reads are grouped by template, and then templates are sorted by the + 5’ mapping positions of the reads from the template, used from earliest + mapping position to latest. Reads that have the same end positions are then + sub-grouped by UMI sequence. + + + Accepts reads in any order (including unsorted) and outputs reads sorted by: + + + The lower genome coordinate of the two outer ends of the templates + + The sequencing library + + The assigned UMI tag + + Read Name + + Reads are aggressively filtered out so that only high quality reads/mappings + are taken forward. Single-end reads must have mapping quality >= min-map-q. + Paired-end reads must have both reads mapped to the same chromosome with both + reads having mapping quality >= min-mapq. (Note: the MQ tag is required on + reads with mapped mates). + + + This is done with the expectation that the next step is building consensus + reads, where it is undesirable to either: + + + Assign reads together that are really from different source molecules + + Build two groups from reads that are really from the same molecule + + Errors in mapping reads could lead to both and therefore are minimized. + + + Grouping of UMIs is performed by one of three strategies: + + + 1. identity: only reads with identical UMI sequences are grouped together. + This strategy may be useful for evaluating data, but should generally be + avoided as it will generate multiple UMI groups per original molecule in the + presence of errors. + + 2. edit: reads are clustered into groups such that each read within a group + has at least one other read in the group with <= edits differences and there + are inter-group pairings with <= edits differences. Effective when there are + small numbers of reads per UMI, but breaks down at very high coverage of UMIs. + + 3. adjacency: a version of the directed adjacency method described in + umi_tools that allows for errors between UMIs but only when there is a count + gradient. + + 4. paired: similar to adjacency but for methods that produce template with a + pair of UMIs such that a read with A-B is related to but not identical to a + read with B-A. Expects the pair of UMIs to be stored in a single tag, + separated by a hyphen (e.g. ACGT-CCGG). The molecular IDs produced have more + structure than for single UMI strategies, and are of the form {base}/{AB|BA}. + E.g. two UMI pairs would be mapped as follows AAAA-GGGG -> 1/AB, GGGG-AAAA -> + 1/BA. + + edit, adjacency and paired make use of the --edits parameter to control the + matching of non-identical UMIs. + + + By default, all UMIs must be the same length. If --min-umi-length=len is + specified then reads that have a UMI shorter than len will be discarded, and + when comparing UMIs of different lengths, the first len bases will be + compared, where len is the length of the shortest UMI. The UMI length is the + number of [ACGT] bases in the UMI (i.e. does not count dashes and other + non-ACGT characters). This option is not implemented for reads with UMI pairs + (i.e. using the paired assigner). +label: fgbio_group_reads_by_umi_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: GroupReadsByUmi + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio GroupReadsByUmi + 'doap:revision': 1.2.0 diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml b/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml new file mode 100644 index 00000000..96445c8c --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml @@ -0,0 +1,2 @@ +input_bam: /path/to/simplex_duplex_fgbio.bam +output_filename: simplex_filtered.bam \ No newline at end of file diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl new file mode 100644 index 00000000..2599e4af --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -0,0 +1,71 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_postprocessing_simplex_filter_0.1.8 +baseCommand: + - simplex_filter +inputs: + - id: input_bam + type: File + inputBinding: + position: 0 + prefix: '--input_bam' + doc: Input file (bam or sam). Required. + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: '--output_filename' + doc: Output file (bam or sam). + - id: min_simplex_reads + type: int? + inputBinding: + position: 0 + prefix: '--min_simplex_reads' + doc: Minimum number of simplex reads to pass filter for consensus reads +outputs: + - id: fgbio_postprocessing_simplex_bam + type: File + outputBinding: + glob: |- + ${ + if (inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input_bam.basename.replace(/.bam$/,'_simplex.bam') + } + } + secondaryFiles: + - ^.bai +label: fgbio_postprocessing_simplex_filter_0.1.8 +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio_postprocessing:0.2.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio_postprocessing + 'doap:revision': 0.1.8 diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md deleted file mode 100644 index 6dd376f7..00000000 --- a/gatk_ApplyBQSR_4.1.2.0/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help - -usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE - [--create_output_bam_index] - --bqsr_recal_file BQSR_RECAL_FILE --input - INPUT [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--emit_original_quals] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--global_qscore_prior GLOBAL_QSCORE_PRIOR] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantize_quals QUANTIZE_QUALS] [--quiet] - [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--read_validation_stringency READ_VALIDATION_STRINGENCY] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_jdk_deflater] [--use_jdk_inflater] - [--use_original_qualities] - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --reference REFERENCE - Reference sequence - --create_output_bam_index - --bqsr_recal_file BQSR_RECAL_FILE - Input recalibration table for BQSR. Only run ApplyBQSR - with the covariates table created from the input BAM - --input INPUT A BAM file containing input read data - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --emit_original_quals - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --global_qscore_prior GLOBAL_QSCORE_PRIOR - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantize_quals QUANTIZE_QUALS - --quiet - --read_filter READ_FILTER - --read_index READ_INDEX - --read_validation_stringency READ_VALIDATION_STRINGENCY - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_jdk_deflater - --use_jdk_inflater - --use_original_qualities - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 5fda71e8..14f9ac57 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: gatk_apply_bqsr_4_1_2_0 baseCommand: - gatk @@ -174,10 +175,14 @@ inputs: position: 6 prefix: '--QUIET' - id: read_filter - type: string? + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 6 - prefix: '--read-filter' - id: read_index type: string? inputBinding: @@ -225,7 +230,7 @@ inputs: - id: number_of_threads type: int? outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: |- @@ -264,8 +269,6 @@ requirements: - class: ResourceRequirement ramMin: 10000 coresMin: 8 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.2.0' - class: InlineJavascriptRequirement @@ -286,4 +289,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': gatk4 - 'doap:revision': 4.1.2.0 \ No newline at end of file + 'doap:revision': 4.1.2.0 diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md deleted file mode 100644 index 005acb15..00000000 --- a/gatk_BaseRecalibrator_4.1.2.0/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help - -usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 - KNOWN_SITES_1 --reference REFERENCE - [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--binary_tag_name BINARY_TAG_NAME] - [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] - [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_index] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--default_base_qualities DEFAULT_BASE_QUALITIES] - [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--indels_context_size INDELS_CONTEXT_SIZE] - [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--low_quality_tail LOW_QUALITY_TAIL] - [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] - [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] - [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantizing_levels QUANTIZING_LEVELS] - [--QUIET] [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_original_qualities] - [--number_of_threads NUMBER_OF_THREADS] - [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--known_sites_2 KNOWN_SITES_2] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BAM/SAM file containing reads - --known_sites_1 KNOWN_SITES_1 - One or more databases of known polymorphic sites used - to exclude regions around known polymorphisms from - analysis - --reference REFERENCE - Reference sequence file - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --binary_tag_name BINARY_TAG_NAME - --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY - --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_index - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --default_base_qualities DEFAULT_BASE_QUALITIES - --deletions_default_quality DELETIONS_DEFAULT_QUALITY - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --indels_context_size INDELS_CONTEXT_SIZE - --insertions_default_quality INSERTIONS_DEFAULT_QUALITY - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --low_quality_tail LOW_QUALITY_TAIL - --maximum_cycle_value MAXIMUM_CYCLE_VALUE - --mismatches_context_size MISMATCHES_CONTEXT_SIZE - --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantizing_levels QUANTIZING_LEVELS - --QUIET - --read_filter READ_FILTER - --read_index READ_INDEX - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_original_qualities - --number_of_threads NUMBER_OF_THREADS - --memory_per_job MEMORY_PER_JOB - --memory_overhead MEMORY_OVERHEAD - --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index cf12e785..ba59e2d0 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -266,7 +266,7 @@ inputs: secondaryFiles: - .idx outputs: - - id: output + - id: gatk_base_recalibrator_output type: File outputBinding: glob: |- diff --git a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl index 1753956b..f288b782 100644 --- a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl +++ b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl @@ -204,7 +204,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.bam' diff --git a/gatk_apply_bqsr_4.1.8.1/example_inputs.yml b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..094a1bc1 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml @@ -0,0 +1,48 @@ +reference: + class: File + path: chr14_chr16.fasta +input: + class: File + path: SeraCare_0-5_14.bam +bqsr_recal_file: + class: File + path: SeraCare_0-5_14.recal.table +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +cloud_index_prefetch_buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +emit_original_quals: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +global_qscore_prior: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +memory_overhead: +memory_per_job: +number_of_threads: +preserve_qscores_less_than: +quantize_quals: +quiet: +read_filter: +read_index: +read_validation_stringency: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_jdk_deflater: +use_jdk_inflater: +use_original_qualities: diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl new file mode 100644 index 00000000..efe37787 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -0,0 +1,326 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_apply_bqsr_4_1_8_1 +baseCommand: + - gatk + - ApplyBQSR +inputs: + - id: reference + type: File + inputBinding: + position: 4 + prefix: '--reference' + doc: Reference sequence + secondaryFiles: + - .fai + - ^.dict + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-index' + - id: bqsr_recal_file + type: File + inputBinding: + position: 4 + prefix: '--bqsr-recal-file' + doc: >- + Input recalibration table for BQSR. Only run ApplyBQSR with the covariates + table created from the input BAM + - id: input + type: File + inputBinding: + position: 4 + prefix: '--input' + doc: A BAM file containing input read data + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: File? + inputBinding: + position: 6 + prefix: '--arguments_file' + - id: cloud_index_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-md5' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' + inputBinding: + position: 6 + doc: Read filters to be disabled before analysis + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-sequence-dictionary-validation' + - default: true + id: emit_original_quals + type: boolean? + inputBinding: + position: 6 + prefix: '--emit-original-quals' + - id: exclude_intervals + type: string? + inputBinding: + position: 6 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 6 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 6 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 6 + prefix: '--gcs-project-for-requester-pays' + - id: global_qscore_prior + type: float? + inputBinding: + position: 6 + prefix: '--global-qscore-prior' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-merging-rule' + - id: interval_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 6 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 6 + prefix: '--lenient' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 6 + prefix: '--preserve-qscores-less-than' + - id: quantize_quals + type: int? + inputBinding: + position: 6 + prefix: '--quantize-quals' + - id: quiet + type: boolean? + inputBinding: + position: 6 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 6 + - id: read_index + type: string? + inputBinding: + position: 6 + prefix: '--read-index' + - id: read_validation_stringency + type: string? + inputBinding: + position: 6 + prefix: '--read-validation-stringency' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 6 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 6 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 6 + prefix: '--sites-only-vcf-output' + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-deflater' + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-inflater' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 6 + prefix: '--use-original-qualities' + - id: memory_overhead + type: int? + - id: memory_per_job + type: int? + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_apply_bqsr_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } + secondaryFiles: + - ^.bai +label: gatk_apply_bqsr_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.1 diff --git a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl index d923ebe4..fac760ef 100644 --- a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl +++ b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl @@ -234,7 +234,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_base_recalibrator_output type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.table' diff --git a/gatk_base_recalibrator_4.1.8.1/example_inputs.yml b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..01b02cbb --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml @@ -0,0 +1,57 @@ +input: + class: File + metadata: {} + path: input.bam +reference: + class: File + metadata: {} + path: ref.fasta +known_sites: + - class: File + path: dbsnp_137_14_16.b37.vcf + - class: File + path: Mills_and_1000G_gold_standard-14_16.indels.b37.vcf +QUIET: +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +binary_tag_name: +bqsr_baq_gap_open_penalty: +cloud-index-prefetch-buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +default_base_qualities: +deletions_default_quality: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +indels_context_size: +insertions_default_quality: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +low_quality_tail: +maximum_cycle_value: +memory_overhead: +memory_per_job: +mismatches_context_size: +mismatches_default_quality: +number_of_threads: +preserve_qscores_less_than: +quantizing_levels: +read_filter: +read_index: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_original_qualities: diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl new file mode 100644 index 00000000..a38cddb6 --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -0,0 +1,352 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_base_recalibrator_4_1_8_1 +baseCommand: + - gatk + - BaseRecalibrator +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--input' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: known_sites + type: + type: array + items: File + inputBinding: + prefix: '--known-sites' + inputBinding: + position: 3 + doc: >- + One or more databases of known polymorphic sites used to exclude regions + around known polymorphisms from analysis + secondaryFiles: + - .idx + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--reference' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: binary_tag_name + type: string? + inputBinding: + position: 10 + prefix: '--binary-tag-name' + - id: bqsr_baq_gap_open_penalty + type: float? + inputBinding: + position: 10 + prefix: '--bqsr-baq-gap-open-penalty' + - id: cloud-index-prefetch-buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-index' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-md5' + - id: default_base_qualities + type: int? + inputBinding: + position: 10 + prefix: '--default-base-qualities' + - id: deletions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--deletions-default-quality' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' + inputBinding: + position: 10 + doc: Read filters to be disabled before analysis + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-sequence-dictionary-validation' + - id: exclude_intervals + type: string? + inputBinding: + position: 10 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 10 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 10 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 10 + prefix: '--gcs-project-for-requester-pays' + - id: indels_context_size + type: int? + inputBinding: + position: 10 + prefix: '--indels-context-size' + - id: insertions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--insertions-default-quality' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 10 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-merging-rule' + - id: interval_padding + type: int? + inputBinding: + position: 10 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 10 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 10 + prefix: '--lenient' + - id: low_quality_tail + type: int? + inputBinding: + position: 10 + prefix: '--low-quality-tail' + - id: maximum_cycle_value + type: int? + inputBinding: + position: 10 + prefix: '--maximum-cycle-value' + - id: mismatches_context_size + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-context-size' + - id: mismatches_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-default-quality' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 10 + prefix: '--preserve-qscores-less-than' + - id: quantizing_levels + type: int? + inputBinding: + position: 10 + prefix: '--quantizing-levels' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 10 + - id: read_index + type: string? + inputBinding: + position: 10 + prefix: '--read-index' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 10 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 10 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 10 + prefix: '--sites-only-vcf-output' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 10 + prefix: '--use-original-qualities' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_base_recalibrator_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } +label: gatk_base_recalibrator_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..d1c1901d --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md @@ -0,0 +1,99 @@ +# CWL for running GATK - CollectAlignmentSummaryMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_alignment_summary_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME [--reference REFERENCE] + [--adaptor_sequence ADAPTOR_SEQUENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS] + [--is_bisulfite_sequenced] [--max_insert_size MAX_INSERT_SIZE] + [--validation_stringency VALIDATION_STRINGENCY] [--assume_sorted] + [--stop_after STOP_AFTER] [--create_index] [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --adaptor_sequence ADAPTOR_SEQUENCE + List of adapter sequences to use when processing the + alignment metrics. This argument may be specified 0 or + more times. Default value: [AATGATACGGCGACCACCGAGATCTA + CACTCTTTCCCTACACGACGCTCTTCCGATCT, + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS + Paired-end reads that do not have this expected + orientation will be considered chimeric. This argument + may be specified 0 or more times. Default value: [FR]. + Possible values: {FR, RF, TANDEM} + --is_bisulfite_sequenced + Whether the SAM or BAM file consists of bisulfite + sequenced reads. Default value: false. Possible + values: {true, false} + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..665cef4b --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml @@ -0,0 +1,28 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +input: + class: File? + metadata: {} + path: "/path/to/fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +output_file_name: "alignment_summary_metrics.txt" +adaptor_sequence: null +metrics_acciumulation_level: null +expected_pair_orientations: null +is_bisulfite_sequenced: false +max_insert_size: null +validation_stringency: null +assume_sorted: null +stop_after: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..13a024dd --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -0,0 +1,240 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_alignment_summary_metrics_4.1.8.0 +baseCommand: + - gatk + - CollectAlignmentSummaryMetrics +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: File to write the output to. Required. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: adaptor_sequence + type: string? + inputBinding: + position: 0 + prefix: '--ADAPTER_SEQUENCE' + doc: >- + List of adapter sequences to use when processing the alignment metrics. + This argument may be specified 0 or more times. Default value: + [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: expected_pair_orientations + type: string? + inputBinding: + position: 0 + prefix: '--EXPECTED_PAIR_ORIENTATIONS' + doc: >- + Paired-end reads that do not have this expected orientation will be + considered chimeric. This argument may be specified 0 or more times. + Default value: [FR]. Possible values: {FR, RF, TANDEM} + - id: is_bisulfite_sequenced + type: boolean? + inputBinding: + position: 0 + prefix: '--IS_BISULFITE_SEQUENCED' + doc: >- + Whether the SAM or BAM file consists of bisulfite sequenced reads. + Default value: false. Possible values: {true, false} + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: '--MAX_INSERT_SIZE' + doc: >- + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: '--ASSUME_SORTED' + doc: >- + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: '--STOP_AFTER' + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_alignment_summary_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if (inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } +label: GATK-CollectAlignmentSummaryMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_hs_metrics_4.1.8.0/README.md b/gatk_collect_hs_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..2f0ab1b8 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/README.md @@ -0,0 +1,136 @@ +# CWL for running GATK - CollectHsMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_hs_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl + [-h] --input INPUT --bait_intervals BAIT_INTERVALS --target_intervals + TARGET_INTERVALS --output_file_name OUTPUT_FILE_NAME + [--per_base_coverage PER_BASE_COVERAGE] + [--per_target_coverage PER_TARGET_COVERAGE] + [--theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT] + [--allele_fraction ALLELE_FRACTION] [--bait_set_name BAIT_SET_NAME] + [--clip_overlapping_reads] [--coverage_cap COVERAGE_CAP] + [--include_indels] [--minimum_base_quality MINIMUM_BASE_QUALITY] + [--minimum_mapping_quality MINIMUM_MAPPING_QUALITY] + [--near_distance NEAR_DISTANCE] [--sample_size SAMPLE_SIZE] + [--reference REFERENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--validation_stringency VALIDATION_STRINGENCY] [--create_index] + [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. This argument must be specified at + least once. Required. + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. This argument must be specified at least + once. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT + Output for Theoretical Sensitivity metrics where the + allele fractions are provided by the ALLELE_FRACTION + argument. Default value: null. + --allele_fraction ALLELE_FRACTION + Allele fraction for which to calculate theoretical + sensitivity. This argument may be specified 0 or more + times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, + 0.1, 0.2, 0.3, 0.5]. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. Possible values: + {true, false} + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. + --include_indels If true count inserted bases as on target and deleted + bases as covered by a read. Default value: false. + Possible values: {true, false} + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS +``` diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..44a42fc1 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -0,0 +1,318 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_hs_metrics_4_1_8_0 +baseCommand: + - gatk + - CollectHsMetrics +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: '--BAIT_INTERVALS' + doc: >- + An interval list file that contains the locations of the baits used. This + argument must be specified at least once. Required. + - id: target_intervals + type: File + inputBinding: + position: 0 + prefix: '--TARGET_INTERVALS' + doc: >- + An interval list file that contains the locations of the targets. This + argument must be specified at least once. Required. + - id: output_file_name + type: string? + doc: The output file to write the metrics to. Required. + - id: per_base_coverage + type: string? + doc: >- + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. + - id: per_target_coverage + type: string? + doc: >- + An optional file to output per target coverage information to. Default + value: null. + - id: theoretical_sensitivity_output + type: string? + inputBinding: + position: 0 + prefix: '--THEORETICAL_SENSITIVITY_OUTPUT' + doc: >- + Output for Theoretical Sensitivity metrics where the allele fractions are + provided by the ALLELE_FRACTION argument. Default value: null. + - id: allele_fraction + type: float? + inputBinding: + position: 0 + prefix: '--ALLELE_FRACTION' + doc: >- + Allele fraction for which to calculate theoretical sensitivity. This + argument may be specified 0 or more times. Default value: [0.001, 0.005, + 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]. + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: '--BAIT_SET_NAME' + doc: >- + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null. + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_OVERLAPPING_READS' + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: + true. Possible values: {true, false} + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: '--COVERAGE_CAP' + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. + - id: include_indels + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_INDELS' + doc: >- + If true count inserted bases as on target and deleted bases as covered by + a read. Default value: false. Possible values: {true, false} + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--MINIMUM_BASE_QUALITY' + doc: >- + Minimum base quality for a base to contribute coverage. Default value: + 20. + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: '--MINIMUM_MAPPING_QUALITY' + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: + 20. + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: '--NEAR_DISTANCE' + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: '--SAMPLE_SIZE' + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_hs_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - id: gatk_collect_hs_metrics_per_base_coverage_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } + - id: gatk_collect_hs_metrics_per_target_coverage_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } +label: GATK-CollectHsMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - position: 0 + prefix: '--PER_TARGET_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } + - position: 0 + prefix: '--PER_BASE_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/README.md b/gatk_collect_insert_size_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..b0ea21bf --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/README.md @@ -0,0 +1,105 @@ +# CWL for running GATK - CollectInsertSizeMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE + [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] + [--minimum_pct MINIMUM_PCT] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] + [--assume_sorted] [--stop_after STOP_AFTER] [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --histogram_file HISTOGRAM_FILE + File to write insert size Histogram chart to. + Required. + --deviations DEVIATIONS + Generate mean, sd and plots by trimming the data down + to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This + is done because insert size data typically includes + enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading + regarding the real distribution. Default value: 10.0. + This option can be set to 'null' to clear the default + value. + --histogram_width HISTOGRAM_WIDTH + Explicitly sets the Histogram width, overriding + automatic truncation of Histogram tail. Also, when + calculating mean and standard deviation, only bins <= + Histogram_WIDTH will be included. Default value: null. + --minimum_pct MINIMUM_PCT + When generating the Histogram, discard any data + categories (out of FR, TANDEM, RF) that have fewer + than this percentage of overall reads. (Range: 0 to + 1). Default value: 0.05. This option can be set to + 'null' to clear the default value. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --include_duplicates If true, also include reads marked as duplicates in + the insert size histogram. Default value: false. This + option can be set to 'null' to clear the default + value. Possible values: {true, false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml b/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..59a104cd --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml @@ -0,0 +1,22 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +output_file_name: "insert_size_metrics.txt" +histogram_file: "histogram.pdf" +deviations: 10.0 +histogram_width: 800 +minimum_pct: 0 +metrics_acciumulation_level: null +include_duplicates: null +validation_stringency: null +assume_sorted: null +stop_after: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..bc83b149 --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -0,0 +1,252 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_insert_size_metrics_4_1_8_0 +baseCommand: + - gatk + - CollectInsertSizeMetrics +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: File to write the output to. Required. + - id: histogram_file + type: string? + doc: File to write insert size Histogram chart to. Required. + - id: deviations + type: float? + inputBinding: + position: 0 + prefix: '--DEVIATIONS' + doc: >- + Generate mean, sd and plots by trimming the data down to MEDIAN + + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size + data typically includes enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading regarding the real + distribution. Default value: 10.0. This option can be set to 'null' to + clear the default value. + - id: histogram_width + type: int? + inputBinding: + position: 0 + prefix: '--HISTOGRAM_WIDTH' + doc: >- + Explicitly sets the Histogram width, overriding automatic truncation of + Histogram tail. Also, when calculating mean and standard deviation, only + bins <= Histogram_WIDTH will be included. Default value: null. + - id: minimum_pct + type: float? + inputBinding: + position: 0 + prefix: '--MINIMUM_PCT' + doc: >- + When generating the Histogram, discard any data categories (out of FR, + TANDEM, RF) that have fewer than this percentage of overall reads. (Range: + 0 to 1). Default value: 0.05. This option can be set to 'null' to clear + the default value. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: include_duplicates + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_DUPLICATES' + doc: >- + If true, also include reads marked as duplicates in the insert size + histogram. Default value: false. This option can be set to 'null' to + clear the default value. Possible values: {true, false} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: '--ASSUME_SORTED' + doc: >- + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: '--STOP_AFTER' + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_insert_size_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - id: gatk_collect_insert_size_metrics_histogram_pdf + type: File + outputBinding: + glob: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } +label: GATK-CollectInsertSizeMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - position: 2 + prefix: '-H' + valueFrom: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_downsamplesam_4.1.8.1/example_inputs.yml b/gatk_downsamplesam_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..8ea893b7 --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/example_inputs.yml @@ -0,0 +1,21 @@ +QUIET: null +arguments_file: null +create_output_bam_index: null +create_output_bam_md5: null +input: + class: File + path: input.bam +lenient: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +output_file_name_metrics: null +probability: null +random_seed: null +reference: + class: File + path: reference.fasta +strategy: null +temporary_directory: null + diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl new file mode 100644 index 00000000..b4078135 --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -0,0 +1,201 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_downsamplesam_4_1_8_1 +baseCommand: + - gatk + - DownsampleSam +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--INPUT' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--REFERENCE_SEQUENCE' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: output_metrics_file_name + type: string? + doc: Output file name for metrics file. Not Required + - id: probability + type: float? + inputBinding: + position: 4 + prefix: '--PROBABILITY' + doc: 'The probability of keeping any individual read, between 0 and 1.' + - id: random_seed + type: int? + inputBinding: + position: 4 + prefix: '--RANDOM_SEED' + doc: >- + Random seed used for deterministic results. Setting to null will cause + multiple invocations to produce different results. + - id: strategy + type: string? + inputBinding: + position: 4 + prefix: '--STRATEGY' + doc: >- + The --STRATEGY argument is an enumerated type (Strategy), which can have + one of the following values: + + + HighAccuracy + + ConstantMemory + + Chained + + default Strategy ConstantMemory + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--CREATE_INDEX' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--CREATE_MD5_FILE' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: "validation_stringency" + type: string? + inputBinding: + position: 6 + prefix: '--VALIDATION_STRINGENCY' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null.' +outputs: + - id: gatk_downsamplesam_output_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + secondaryFiles: + - ^.bai + - id: gatk_downsamplesam_output_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +label: gatk_downsample_sam_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--OUTPUT' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + - position: 2 + prefix: '--METRICS_FILE' + valueFrom: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 20000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl index 636dc04a..f474b88d 100644 --- a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl +++ b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl @@ -137,14 +137,14 @@ inputs: position: 0 prefix: '--TAGGING_POLICY' outputs: - - id: output_md_bam + - id: gatk_mark_duplicates_bam doc: Output marked duplicate bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''md.bam'', ''bam''))' secondaryFiles: - ^.bai - - id: output_md_metrics + - id: gatk_mark_duplicates_metrics doc: Output marked duplicate metrics type: File outputBinding: diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..11eb7e0d --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml @@ -0,0 +1,18 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" +output_file_name: null +chart_output: null +validation_stringency: null +assume_sorted: null +pf_reads_only: null +reference: + class: File + metadata: {} + path: ref.fasta +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl new file mode 100644 index 00000000..14044a27 --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl @@ -0,0 +1,197 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_mean_quality_by_cycle_4_1_8_0 +baseCommand: + - gatk + - MeanQualityByCycle +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output_file_name + type: string? + doc: The output file to write the metrics to. + - id: chart_output + type: string? + doc: A file (with .pdf extension) to write the chart to. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: '--ASSUME_SORTED' + doc: | + If true (default), then the sort order in the header file will be ignored. + - id: pf_reads_only + type: boolean? + inputBinding: + position: 1 + prefix: '--PF_READS_ONLY' + doc: | + If set to true calculate mean quality over PF reads only. Default value: false. Possible values: {true, false} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: >- + Directory with space available to be used by this program for temporary storage of working files. +outputs: + - id: gatk_mean_quality_by_cycle_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - id: gatk_mean_quality_by_cycle_chart_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } +label: GATK-MeanQualityByCycle +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx14G" + } + else { + return "-Xmx14G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) { + return inputs.temporary_directory; + } + return runtime.tmpdir; + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - position: 0 + prefix: '--CHART_OUTPUT' + valueFrom: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } + +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_mean_quality_by_cycle/README.md b/gatk_mean_quality_by_cycle/README.md new file mode 100644 index 00000000..53b29701 --- /dev/null +++ b/gatk_mean_quality_by_cycle/README.md @@ -0,0 +1,76 @@ +# CWL for running GATK - MeanQualityByCycle + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_mean_quality_by_cycle_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_mean_quality_by_cycle_4.1.8.0.cwl + [-h] --input INPUT [--output_file_name OUTPUT_FILE_NAME] + [--chart_output CHART_OUTPUT] [--assume_sorted] [--pf_reads_only] + [--reference REFERENCE] [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. + --chart_output CHART_OUTPUT + A file (with .pdf extension) to write the chart to. + --assume_sorted If true (default), then the sort order in the header + file will be ignored. + --pf_reads_only If set to true calculate mean quality over PF reads + only. Default value: false. Possible values: {true, + false} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..004d24ec --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml @@ -0,0 +1,44 @@ +unmapped_bam: + class: File + path: "/path/to/bam" +reference: + class: File + path: "/path/to/reference.fasta" +aligned_bam: + - class: File + path: "/path/to/bam" +output_file_name: null +add_mate_cigar: null +add_pg_tag_to_reads: null +aligned_reads_only: null +include_duplicates: null +aligner_proper_pair_flags: null +attributes_to_remove: null +attributes_to_retain: null +attributes_to_reverse: null +attributes_to_reverse_complement: null +clip_adapters: null +clip_overlapping_reads: null +expected_orientations: null +hard_clip_overlapping_reads: null +include_secondary_alignments: null +is_bisulfite_sequence: null +jump_size: null +matching_dictionary_tags: null +max_insertions_or_deletions: null +min_unclipped_bases: null +paired_run: null +primary_alignment_strategy: null +read1_aligned_bam: null +read1_trim: null +read2_aligned_bam: null +read2_trim: null +sort_order: null +unmap_contaminant_reads: null +unmapped_read_strategy: null +validation_stringency: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl new file mode 100644 index 00000000..1dd7658b --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -0,0 +1,557 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_bam_alignment_4_1_8_0 +baseCommand: + - gatk + - MergeBamAlignment +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: unmapped_bam + type: File + inputBinding: + position: 0 + prefix: '--UNMAPPED_BAM' + doc: > + Original SAM or BAM file of unmapped reads, which must be in queryname + order. Reads MUST + + be unmapped. Required. + - id: reference + type: File + inputBinding: + position: 0 + prefix: '--REFERENCE_SEQUENCE' + doc: | + Reference sequence file. Required. + secondaryFiles: + - ^.dict + - id: output_file_name + type: string? + doc: | + Merged SAM or BAM file to write to. Required. + - id: add_mate_cigar + type: boolean? + inputBinding: + position: 0 + prefix: '--ADD_MATE_CIGAR' + doc: > + Adds the mate CIGAR tag (MC) if true, does not if false. Default value: + true. Possible + + values: {true, false} + - id: add_pg_tag_to_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--ADD_PG_TAG_TO_READS' + doc: > + Add PG tag to each read in a SAM or BAM Default value: true. Possible + values: {true, + + false} + - id: aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data. This argument may be specified 0 + or more times. + + Default value: null. Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM + + (R1_ALIGNED) READ2_ALIGNED_BAM (R2_ALIGNED) + - id: aligned_reads_only + type: boolean? + inputBinding: + position: 0 + prefix: '--ALIGNED_READS_ONLY' + doc: > + Whether to output only aligned reads. Default value: false. Possible + values: {true, + + false} + - id: aligner_proper_pair_flags + type: boolean? + inputBinding: + position: 0 + prefix: '--ALIGNER_PROPER_PAIR_FLAGS' + doc: > + Use the aligners idea of what a proper pair is rather than computing in + this program. + + Default value: false. Possible values: {true, false} + - id: attributes_to_remove + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REMOVE' + doc: > + Attributes from the alignment record that should be removed when merging. + This overrides + + ATTRIBUTES_TO_RETAIN if they share common tags. This argument may be + specified 0 or more + + times. Default value: null. + - id: attributes_to_retain + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_RETAIN' + doc: > + Reserved alignment attributes (tags starting with X, Y, or Z) that should + be brought over + + from the alignment data when merging. This argument may be specified 0 or + more times. + + Default value: null. + - id: attributes_to_reverse + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REVERSE' + doc: > + Attributes on negative strand reads that need to be reversed. This + argument may be + + specified 0 or more times. Default value: [OQ, U2]. + - id: attributes_to_reverse_complement + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REVERSE_COMPLEMENT' + doc: > + Attributes on negative strand reads that need to be reverse complemented. + This argument + + may be specified 0 or more times. Default value: [E2, SQ]. + - id: clip_adapters + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_ADAPTERS' + doc: > + Whether to clip adapters where identified. Default value: true. Possible + values: {true, + + false} + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_OVERLAPPING_READS' + doc: > + For paired reads, clip the 3' end of each read if necessary so that it + does not extend + + past the 5' end of its mate. Clipping will be either soft or hard + clipping, depending on + + CLIP_OVERLAPPING_READS_OPERATOR setting. Hard clipped bases and their + qualities will be + + stored in the XB and XQ tags respectively. Default value: true. Possible + values: {true, + + false} + - id: expected_orientations + type: string? + inputBinding: + position: 0 + prefix: '--EXPECTED_ORIENTATIONS' + doc: > + The expected orientation of proper read pairs. Replaces JUMP_SIZE This + argument may be + + specified 0 or more times. Default value: null. Possible values: {FR, RF, + TANDEM} Cannot + + be used in conjunction with argument(s) JUMP_SIZE (JUMP) + - id: hard_clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--HARD_CLIP_OVERLAPPING_READS' + doc: > + If true, hard clipping will be applied to overlapping reads. By default, + soft clipping is + + used. Default value: false. Possible values: {true, false} + - id: include_secondary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_SECONDARY_ALIGNMENTS' + doc: > + If false, do not write secondary alignments to output. Default value: + true. Possible + + values: {true, false} + - id: is_bisulfite_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--IS_BISULFITE_SEQUENCE' + doc: > + Whether the lane is bisulfite sequence (used when calculating the NM + tag). Default value: + + false. Possible values: {true, false} + - id: jump_size + type: int? + inputBinding: + position: 0 + prefix: '--JUMP_SIZE' + doc: > + The expected jump size (required if this is a jumping library). + Deprecated. Use + + EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in + conjunction with + + argument(s) EXPECTED_ORIENTATIONS (ORIENTATIONS) + - id: matching_dictionary_tags + type: string? + inputBinding: + position: 0 + prefix: '--MATCHING_DICTIONARY_TAGS' + doc: > + List of Sequence Records tags that must be equal (if present) in the + reference dictionary + + and in the aligned file. Mismatching tags will cause an error if in this + list, and a + + warning otherwise. This argument may be specified 0 or more times. + Default value: [M5, + + LN]. + - id: max_insertions_or_deletions + type: int? + inputBinding: + position: 0 + prefix: '--MAX_INSERTIONS_OR_DELETIONS' + doc: > + The maximum number of insertions or deletions permitted for an alignment + to be included. + + Alignments with more than this many insertions or deletions will be + ignored. Set to -1 to + + allow any number of insertions or deletions. Default value: 1. + - id: min_unclipped_bases + type: int? + inputBinding: + position: 0 + prefix: '--MIN_UNCLIPPED_BASES' + doc: > + If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or + else the read will + + be marked as contaminant. Default value: 32. + - id: paired_run + type: boolean? + inputBinding: + position: 0 + prefix: '--PAIRED_RUN' + doc: > + DEPRECATED. This argument is ignored and will be removed. Default value: + true. Possible + + values: {true, false} + - id: primary_alignment_strategy + type: string? + inputBinding: + position: 0 + prefix: '--PRIMARY_ALIGNMENT_STRATEGY' + doc: > + Strategy for selecting primary alignment when the aligner has provided + more than one + + alignment for a pair or fragment, and none are marked as primary, more + than one is marked + + as primary, or the primary alignment is filtered out for some reason. For + all strategies, + + ties are resolved arbitrarily. Default value: BestMapq. BestMapq (Expects + that multiple + + alignments will be correlated with HI tag, and prefers the pair of + alignments with the + + largest MAPQ, in the absence of a primary selected by the aligner.) + + EarliestFragment (Prefers the alignment which maps the earliest base in + the read. Note + + that EarliestFragment may not be used for paired reads.) + + BestEndMapq (Appropriate for cases in which the aligner is not pair-aware, + and does not + + output the HI tag. It simply picks the alignment for each end with the + highest MAPQ, and + + makes those alignments primary, regardless of whether the two alignments + make sense + + together.) + + MostDistant (Appropriate for a non-pair-aware aligner. Picks the alignment + pair with the + + largest insert size. If all alignments would be chimeric, it picks the + alignments for each + + end with the best MAPQ.) + - id: read1_aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--READ1_ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data from the first read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + + argument(s) ALIGNED_BAM (ALIGNED) + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ1_TRIM' + doc: > + The number of bases trimmed from the beginning of read 1 prior to + alignment Default + + value: 0. + - id: read2_aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--READ2_ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data from the second read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + + argument(s) ALIGNED_BAM (ALIGNED) + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ2_TRIM' + doc: > + The number of bases trimmed from the beginning of read 2 prior to + alignment Default + + value: 0. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: > + The order in which the merged reads should be output. Default value: + coordinate. Possible + + values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: unmap_contaminant_reads + type: boolean? + inputBinding: + position: 1 + prefix: '--UNMAP_CONTAMINANT_READS' + doc: > + Detect reads originating from foreign organisms (e.g. bacterial DNA in a + non-bacterial + + sample),and unmap + label those reads accordingly. Default value: false. + Possible values: + + {true, false} + - id: unmapped_read_strategy + type: string? + inputBinding: + position: 1 + prefix: '--UNMAPPED_READ_STRATEGY' + doc: > + How to deal with alignment information in reads that are being unmapped + (e.g. due to + + cross-species contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. + + Note that the DO_NOT_CHANGE strategy will actually reset the cigar and set + the mapping + + quality on unmapped reads since otherwisethe result will be an invalid + record. To force no + + change use the DO_NOT_CHANGE_INVALID strategy. Default value: + DO_NOT_CHANGE. Possible + + values: {COPY_TO_TAG, DO_NOT_CHANGE, DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_merge_bam_alignment_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') + } + } + secondaryFiles: + - ^.bai +label: GATK-MergeBamAlignment +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 1 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..7fcf6550 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml @@ -0,0 +1,21 @@ +input: + - class: File + path: "/path/to/bam" + - class: File + path: "/path/to/bam" +output_file_name: null +assume_sorted: null +comment: null +create_index: null +create_md5_file: null +intervals: null +merge_sequence_dictionaries: null +reference_sequence: + class: File + path: "/path/to/reference.fasta" +sort_order: null +validation_stringency: null +verbosity: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl new file mode 100644 index 00000000..c38ae9d9 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -0,0 +1,261 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_sam_files_4_1_8_0 +baseCommand: + - gatk + - MergeSamFiles +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: + type: array + items: File + inputBinding: + prefix: '-I' + inputBinding: + position: 1 + doc: > + SAM or BAM input file This argument must be specified at least once. + Required. + - id: output_file_name + type: string? + doc: SAM or BAM file to write merged result to Required. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: '--ASSUME_SORTED' + doc: > + If true, assume that the input files are in the same sort order as the + requested output + + sort order, even if their headers say otherwise. Default value: false. + Possible values: + + {true, false} + - id: comment + type: string? + inputBinding: + position: 1 + prefix: '--COMMENT' + doc: > + Comment(s) to include in the merged output files header. This argument + may be specified + + 0 or more times. Default value: null. + - id: create_index + type: boolean? + inputBinding: + position: 1 + prefix: '--CREATE_INDEX' + doc: > + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: + + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 1 + prefix: '--CREATE_MD5_FILE' + doc: > + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: + + false. Possible values: {true, false} + - id: intervals + type: File? + inputBinding: + position: 1 + prefix: '--INTERVALS' + doc: > + An interval list file that contains the locations of the positions to + merge. Assume bam + + are sorted and indexed. The resulting file will contain alignments that + may overlap with + + genomic regions outside the requested region. Unmapped reads are + discarded. Default + + value: null. + - id: merge_sequence_dictionaries + type: boolean? + inputBinding: + position: 1 + prefix: '--MERGE_SEQUENCE_DICTIONARIES' + doc: > + Merge the sequence dictionaries Default value: false. Possible values: + {true, false} + - id: reference_sequence + type: File? + inputBinding: + position: 1 + prefix: '--REFERENCE_SEQUENCE' + doc: | + Reference sequence file. Default value: null. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: > + Sort order of output file Default value: coordinate. Possible values: + {unsorted, + + queryname, coordinate, duplicate, unknown} + - id: use_threading + type: boolean? + inputBinding: + position: 1 + prefix: '--USE_THREADING' + doc: > + Option to create a background thread to encode, compress and write to disk + the output + + file. The threaded version uses about 20% more CPU and decreases runtime + by ~20% when + + writing out a compressed BAM file. Default value: false. Possible values: + {true, false} + - id: validation_stringency + type: string? + inputBinding: + position: 1 + prefix: '--VALIDATION_STRINGENCY' + doc: > + Validation stringency for all SAM files read by this program. Setting + stringency to + + SILENT can improve performance when processing a BAM file in which + variable-length data + + (read, qualities, tags) do not otherwise need to be decoded. Default + value: STRICT. + + Possible values: {STRICT, LENIENT, SILENT} + - id: verbosity + type: string? + inputBinding: + position: 1 + prefix: '--VERBOSITY' + doc: > + Control verbosity of logging. Default value: INFO. Possible values: + {ERROR, WARNING, + + INFO, DEBUG} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_merge_sam_files_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +label: GATK-MergeSamFiles +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_revert_sam/4.1.8.0/example_inputs.yaml b/gatk_revert_sam/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..b195dfa8 --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/example_inputs.yaml @@ -0,0 +1,26 @@ +input: + class: File + path: "/path" +output: null +output_map: null +attribute_to_clear: null +max_discard_fraction: null +library_name: null +max_records_in_ram: null +output_by_readgroup: null +output_by_readgroup_file_format: null +remove_alignment_information: 'false' +remove_duplicate_information: 'true' +restore_hardclips: 'false' +restore_original_qualities: 'false' +sample_alias: null +sanitize: null +sort_order: 'unsorted' +reference_sequence: null +validation_stringency: 'SILENT' +compression_level: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl new file mode 100644 index 00000000..bde0f2de --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl @@ -0,0 +1,310 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_revert_sam_4_1_8_0 +baseCommand: + - gatk + - RevertSam +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output + type: string? + doc: >- + The output SAM/BAM file to create, or an output directory if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT_MAP (OM) + - id: output_map + type: string? + doc: >- + Tab separated file with two columns, READ_GROUP_ID and OUTPUT, providing file mapping only used if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT (O) + - id: attribute_to_clear + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: '--ATTRIBUTE_TO_CLEAR' + doc: >- + When removing alignment information, the set of optional tags to remove. This may be specified 0 or more times. Default value: [NM, UQ, PG, MD, MQ, SA, MC, AS]. + - id: max_discard_fraction + type: float? + inputBinding: + position: 0 + prefix: '--MAX_DISCARD_FRACTION' + doc: >- + If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to + sanitization thenthe program will exit with an Exception instead of exiting cleanly. + Output BAM will still be valid. Default value: 0.01. + - id: library_name + type: string? + inputBinding: + position: 0 + prefix: '--LIBRARY_NAME' + doc: >- + The library name to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same library name. Default value: null. + - id: max_records_in_ram + type: int? + inputBinding: + position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + doc: >- + When writing files that need to be sorted, this will specify the number of records stored + in RAM before spilling to disk. Increasing this number reduces the number of file handles + needed to sort the file, and increases the amount of RAM needed. Default value: 500000. + - id: output_by_readgroup + type: string? + default: 'false' + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP' + doc: >- + When true, outputs each read group in a separate file. Default value: false. Possible values: {true, false} + - id: output_by_readgroup_file_format + type: string? + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP_FILE_FORMAT' + doc: >- + When using OUTPUT_BY_READGROUP, the output file format can be set to a certain format. + Default value: dynamic. sam (Generate SAM files.) + bam (Generate BAM files.) + cram (Generate CRAM files.) + dynamic (Generate files based on the extention of INPUT.) + - id: remove_alignment_information + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--REMOVE_ALIGNMENT_INFORMATION' + doc: >- + Remove all alignment information from the file. Default value: true. Possible values: {true, false} + - id: remove_duplicate_information + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--REMOVE_DUPLICATE_INFORMATION' + doc: | + Remove duplicate read flags from all reads. Note that if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may have the unusual but sometimes + desirable trait of having unmapped reads that are marked as duplicates. Default value: + true. Possible values: {true, false} + - id: restore_hardclips + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--RESTORE_HARDCLIPS' + doc: >- + When true, restores reads and qualities of records with hard-clips containing XB and XQ tags. Default value: true. Possible values: {true, false} + - id: restore_original_qualities + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--RESTORE_ORIGINAL_QUALITIES' + doc: | + True to restore original qualities from the OQ field to the QUAL field if available. Default value: true. Possible values: {true, false} + - id: sample_alias + type: string? + inputBinding: + position: 1 + prefix: '--SAMPLE_ALIAS' + doc: | + The sample alias to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same sample alias. Default value: null. + - id: sanitize + type: string? + default: 'false' + inputBinding: + position: 1 + prefix: '--SANITIZE' + doc: | + WARNING: This option is potentially destructive. If enabled will discard reads in order to + produce a consistent output BAM. Reads discarded include (but are not limited to) paired + reads with missing mates, duplicated records, records with mismatches in length of bases + and qualities. This option can only be enabled if the output sort order is queryname and + will always cause sorting to occur. Default value: false. Possible values: {true, false} + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: | + The sort order to create the reverted output file with. Default value: queryname. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_revert_sam_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output){ + return inputs.output + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam') + } + } + - id: gatk_revert_sam_output_map + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_map){ + return inputs.output_map + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.tsv') + } + } +label: GATK-RevertSam +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output){ + return inputs.output; + } else if (inputs.output_map) { + return null; + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam'); + } + } + - position: 0 + prefix: '-OM' + valueFrom: |- + ${ + if(inputs.output_map){ + return inputs.output_map; + } else { + return null; + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_revert_sam/README.md b/gatk_revert_sam/README.md new file mode 100644 index 00000000..90ea2bfa --- /dev/null +++ b/gatk_revert_sam/README.md @@ -0,0 +1,164 @@ +# CWL for running GATK - RevertSam + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_revert_sam_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_revert_sam_4.1.8.0.cwl [-h] --input INPUT [--output OUTPUT] + [--output_map OUTPUT_MAP] + [--attribute_to_clear ATTRIBUTE_TO_CLEAR] + [--max_discard_fraction MAX_DISCARD_FRACTION] + [--library_name LIBRARY_NAME] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_by_readgroup OUTPUT_BY_READGROUP] + [--output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT] + [--remove_alignment_information REMOVE_ALIGNMENT_INFORMATION] + [--remove_duplicate_information REMOVE_DUPLICATE_INFORMATION] + [--restore_hardclips RESTORE_HARDCLIPS] + [--restore_original_qualities RESTORE_ORIGINAL_QUALITIES] + [--sample_alias SAMPLE_ALIAS] + [--sanitize SANITIZE] + [--sort_order SORT_ORDER] + [--reference REFERENCE] + [--validation_stringency VALIDATION_STRINGENCY] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--create_md5_file] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output OUTPUT The output SAM/BAM file to create, or an output + directory if OUTPUT_BY_READGROUP is true. Required. + Cannot be used in conjunction with argument(s) + OUTPUT_MAP (OM) + --output_map OUTPUT_MAP + Tab separated file with two columns, READ_GROUP_ID and + OUTPUT, providing file mapping only used if + OUTPUT_BY_READGROUP is true. Required. Cannot be used + in conjunction with argument(s) OUTPUT (O) + --attribute_to_clear ATTRIBUTE_TO_CLEAR + When removing alignment information, the set of + optional tags to remove. This may be specified 0 or + more times. Default value: [NM, UQ, PG, MD, MQ, SA, + MC, AS]. + --max_discard_fraction MAX_DISCARD_FRACTION + If SANITIZE=true and higher than MAX_DISCARD_FRACTION + reads are discarded due to sanitization thenthe + program will exit with an Exception instead of exiting + cleanly. Output BAM will still be valid. Default + value: 0.01. + --library_name LIBRARY_NAME + The library name to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same library name. Default value: + null. + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_by_readgroup OUTPUT_BY_READGROUP + When true, outputs each read group in a separate file. + Default value: false. Possible values: {true, false} + --output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT + When using OUTPUT_BY_READGROUP, the output file format + can be set to a certain format. Default value: + dynamic. sam (Generate SAM files.) bam (Generate BAM + files.) cram (Generate CRAM files.) dynamic (Generate + files based on the extention of INPUT.) + --remove_alignment_information REMOVE_ALIGNMENT_INFORMATION + Remove all alignment information from the file. + Default value: true. Possible values: {true, false} + --remove_duplicate_information REMOVE_DUPLICATE_INFORMATION + Remove duplicate read flags from all reads. Note that + if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may + have the unusual but sometimes desirable trait of + having unmapped reads that are marked as duplicates. + Default value: true. Possible values: {true, false} + --restore_hardclips RESTORE_HARDCLIPS + When true, restores reads and qualities of records + with hard-clips containing XB and XQ tags. Default + value: true. Possible values: {true, false} + --restore_original_qualities RESTORE_ORIGINAL_QUALITIES + True to restore original qualities from the OQ field + to the QUAL field if available. Default value: true. + Possible values: {true, false} + --sample_alias SAMPLE_ALIAS + The sample alias to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same sample alias. Default value: + null. + --sanitize SANITIZE WARNING: This option is potentially destructive. If + enabled will discard reads in order to produce a + consistent output BAM. Reads discarded include (but + are not limited to) paired reads with missing mates, + duplicated records, records with mismatches in length + of bases and qualities. This option can only be + enabled if the output sort order is queryname and will + always cause sorting to occur. Default value: false. + Possible values: {true, false} + --sort_order SORT_ORDER + The sort order to create the reverted output file + with. Default value: queryname. Possible values: + {unsorted, queryname, coordinate, duplicate, unknown} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/gatk_sam_to_fastq_4.1.8.0/README.md b/gatk_sam_to_fastq_4.1.8.0/README.md new file mode 100644 index 00000000..491eac17 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/README.md @@ -0,0 +1,175 @@ +# CWL for running GATK - SamToFastq + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_sam_to_fastq_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_sam_to_fastq_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT [--fastq FASTQ] + [--clipping_action CLIPPING_ACTION] + [--clipping_attribute CLIPPING_ATTRIBUTE] + [--clipping_min_length CLIPPING_MIN_LENGTH] + [--compress_outputs_per_rg] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--include_non_pf_reads] + [--include_non_primary_alignments] + [--interleave] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_dir OUTPUT_DIR] + [--create_md5_file] [--output_per_rg] + [--quality QUALITY] [--re_reverse] + [--read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE] + [--read1_trim READ1_TRIM] + [--read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE] + [--read2_trim READ2_TRIM] + [--reference_sequence REFERENCE_SEQUENCE] + [--rg_tag RG_TAG] + [--second_end_fastq SECOND_END_FASTQ] + [--unpaired_fastq UNPAIRED_FASTQ] + [--validation_stringency VALIDATION_STRINGENCY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input SAM/BAM file to extract reads from Required. + --fastq FASTQ Output FASTQ file (single-end fastq or, if paired, + first end of the pair FASTQ). Required. Cannot be used + in conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) OUTPUT_DIR (ODIR) + --clipping_action CLIPPING_ACTION + The action that should be taken with clipped reads: + 'X' means the reads and qualities should be trimmed at + the clipped position; 'N' means the bases should be + changed to Ns in the clipped region; and any integer + means that the base qualities should be set to that + value in the clipped region. Default value: null. + --clipping_attribute CLIPPING_ATTRIBUTE + The attribute that stores the position at which the + SAM record should be clipped Default value: null. + --clipping_min_length CLIPPING_MIN_LENGTH + When performing clipping with the CLIPPING_ATTRIBUTE + and CLIPPING_ACTION parameters, ensure that the + resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read + is shorter than CLIPPING_MIN_LENGTH then the original + read length will be maintained. Default value: 0. + --compress_outputs_per_rg + Compress output FASTQ files per read group using gzip + and append a .gz extension to the file names. Default + value: false. Possible values: {true, false} Cannot be + used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --include_non_pf_reads + Include non-PF reads from the SAM file into the output + FASTQ files. PF means 'passes filtering'. Reads whose + 'not passing quality controls' flag is set are non-PF + reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} + --include_non_primary_alignments + If true, include non-primary alignments in the output. + Support of non-primary alignments in SamToFastq is not + comprehensive, so there may be exceptions if this is + set to true and there are paired reads with non- + primary alignments. Default value: false. Possible + values: {true, false} + --interleave Will generate an interleaved fastq if paired, each + line will have /1 or /2 to describe which end it came + from Default value: false. Possible values: {true, + false} + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_dir OUTPUT_DIR + Directory in which to output the FASTQ file(s). Used + only when OUTPUT_PER_RG is true. Default value: null. + Cannot be used in conjunction with argument(s) FASTQ + (F). + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false}. + --output_per_rg Output a FASTQ file per read group (two FASTQ files + per read group if the group is paired). Default value: + false. Possible values: {true, false} Cannot be used + in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --quality QUALITY End-trim reads using the phred/bwa quality trimming + algorithm and this quality. Default value: null. + --re_reverse Re-reverse bases and qualities of reads with negative + strand flag set before writing them to FASTQ Default + value: true. Possible values: {true, false} + --read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 1 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read1_trim READ1_TRIM + The number of bases to trim from the beginning of read + 1. Default value: 0. + --read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 2 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read2_trim READ2_TRIM + The number of bases to trim from the beginning of read + 2. Default value: 0. + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --rg_tag RG_TAG The read group tag (PU or ID) to be used to output a + FASTQ file per read group. Default value: PU. + --second_end_fastq SECOND_END_FASTQ + Output FASTQ file (if paired, second end of the pair + FASTQ). Default value: null. Cannot be used in + conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --unpaired_fastq UNPAIRED_FASTQ + Output FASTQ file for unpaired reads; may only be + provided in paired-FASTQ mode Default value: null. + Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} +``` diff --git a/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..aa1c7e46 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml @@ -0,0 +1,32 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" +fastq: null +clipping_action: null +clipping_attribute: null +clipping_min_length: null +compress_outputs_per_rg: null +compression_level: null +create_index: null +include_non_pf_reads: null +include_non_primary_alignments: null +interleave: null +max_records_in_ram: null +output_dir: null +create_md5_file: null +output_per_rg: null +quality: null +re_reverse: null +read1_max_bases_to_write: null +read1_trim: null +read2_max_bases_to_write: null +read2_trim: null +reference_sequence: null +rg_tag: null +second_end_fastq: null +unpaired_fastq: null +validation_stringency: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl new file mode 100644 index 00000000..6516f711 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -0,0 +1,360 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_sam_to_fastq_4_1_8_0 +baseCommand: + - gatk + - SamToFastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: fastq + type: string? + doc: >- + Output FASTQ file (single-end fastq or, if paired, first end of the pair + FASTQ) + - id: input + type: File + inputBinding: + position: 0 + prefix: '--INPUT' + doc: Input SAM/BAM file to extract reads from Required. + - id: clipping_action + type: string? + inputBinding: + position: 0 + prefix: '--CLIPPING_ACTION' + doc: >- + The action that should be taken with clipped reads: 'X' means the reads + and qualities should be trimmed at the clipped position; 'N' means the + bases should be changed to Ns in the clipped region; and any integer means + that the base qualities should be set to that value in the clipped + region. Default value: null. + - id: clipping_attribute + type: string? + inputBinding: + position: 0 + prefix: '--CLIPPING_ATTRIBUTE' + doc: >- + The attribute that stores the position at which the SAM record should be + clipped Default value: null. + - id: clipping_min_length + type: int? + inputBinding: + position: 0 + prefix: '--CLIPPING_MIN_LENGTH' + doc: >- + When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION + parameters, ensure that the resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read is shorter than + CLIPPING_MIN_LENGTH then the original read length will be maintained. + Default value: 0. + - id: compress_outputs_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: '--COMPRESS_OUTPUTS_PER_RG' + doc: >- + Compress output FASTQ files per read group using gzip and append a .gz + extension to the file names. Default value: false. Possible values: + {true, false} Cannot be used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). + Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_NON_PF_READS' + doc: >- + Include non-PF reads from the SAM file into the output FASTQ files. PF + means 'passes filtering'. Reads whose 'not passing quality controls' flag + is set are non-PF reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} + - id: include_non_primary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_NON_PRIMARY_ALIGNMENTS' + doc: >- + If true, include non-primary alignments in the output. Support of + non-primary alignments in SamToFastq is not comprehensive, so there may be + exceptions if this is set to true and there are paired reads with + non-primary alignments. Default value: false. Possible values: {true, + false} + - id: interleave + type: boolean? + inputBinding: + position: 0 + prefix: '--INTERLEAVE' + doc: >- + Will generate an interleaved fastq if paired, each line will have /1 or /2 + to describe which end it came from Default value: false. Possible values: + {true, false} + - default: 50000 + id: max_records_in_ram + type: int? + inputBinding: + position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + doc: >- + When writing files that need to be sorted, this will specify the number of + records stored in RAM before spilling to disk. Increasing this number + reduces the number of file handles needed to sort the file, and increases + the amount of RAM needed. Default value: 500000. + - id: output_dir + type: string? + inputBinding: + position: 0 + prefix: '--OUTPUT_DIR' + doc: >- + Directory in which to output the FASTQ file(s). Used only when + OUTPUT_PER_RG is true. Default value: null. Cannot be used in conjunction + with argument(s) FASTQ (F). + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false}. + - id: output_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: '--OUTPUT_PER_RG' + doc: >- + Output a FASTQ file per read group (two FASTQ files per read group if the + group is paired). Default value: false. Possible values: {true, false} + Cannot be used in conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ + (F2) UNPAIRED_FASTQ (FU) + - id: quality + type: int? + inputBinding: + position: 0 + prefix: '--QUALITY' + doc: >- + End-trim reads using the phred/bwa quality trimming algorithm and this + quality. Default value: null. + - id: re_reverse + type: boolean? + inputBinding: + position: 0 + prefix: '--RE_REVERSE' + doc: >- + Re-reverse bases and qualities of reads with negative strand flag set + before writing them to FASTQ Default value: true. Possible values: {true, + false} + - id: read1_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: '--READ1_MAX_BASES_TO_WRITE' + doc: >- + The maximum number of bases to write from read 1 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ1_TRIM' + doc: >- + The number of bases to trim from the beginning of read 1. Default value: + 0. + - id: read2_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: '--READ2_MAX_BASES_TO_WRITE' + doc: >- + The maximum number of bases to write from read 2 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ2_TRIM' + doc: >- + The number of bases to trim from the beginning of read 2. Default value: + 0. + - id: reference_sequence + type: File? + inputBinding: + position: 0 + prefix: '--REFERENCE_SEQUENCE' + doc: 'Reference sequence file. Default value: null.' + - id: rg_tag + type: string? + inputBinding: + position: 0 + prefix: '--RG_TAG' + doc: >- + The read group tag (PU or ID) to be used to output a FASTQ file per read + group. Default value: PU. + - id: second_end_fastq + type: string? + inputBinding: + position: 0 + prefix: '--SECOND_END_FASTQ' + doc: >- + Output FASTQ file (if paired, second end of the pair FASTQ). Default + value: null. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG + (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + - id: unpaired_fastq + type: string? + inputBinding: + position: 0 + prefix: '--UNPAIRED_FASTQ' + doc: >- + Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ + mode Default value: null. Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, + SILENT} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_sam_to_fastq_fastq + type: File + outputBinding: + glob: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') + } + } + - id: gatk_sam_to_fastq_unpaired_fastq + type: File? + outputBinding: + glob: |- + ${ + if(inputs.unpaired_fastq){ + return inputs.unpaired_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_unpaired.fastq') + } + } + - id: gatk_sam_to_fastq_second_end_fastq + type: File? + outputBinding: + glob: |- + ${ + if(inputs.second_end_fastq){ + return inputs.second_end_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R2.fastq') + } + } +label: GATK-SamToFastq +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '--FASTQ' + valueFrom: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/getbasecountsmultisample/1.2.5/README.md b/getbasecountsmultisample/1.2.5/README.md new file mode 100644 index 00000000..e44235f8 --- /dev/null +++ b/getbasecountsmultisample/1.2.5/README.md @@ -0,0 +1,89 @@ +# CWL and Dockerfile for running BWA MEM + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| GetBaseCountMultiSample | 1.2.5 | https://github.com/msk-access/GetBaseCountMultiSample/releases/tag/1.2.5 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner getbasecountmultisample_1.2.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/gbcms_toil.log --jobStore /path/to/gbcms_jobStore --batchSystem lsf --workDir /path/to/gbcms_toil_log --outdir . --writeLogs /path/to/gbcms_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml > gbcms_toil.stdout 2> gbcms_toil.stderr & +``` + +### Usage + +``` +usage: getbasecountsmultisample_1.2.5.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --genotyping_bams GENOTYPING_BAMS + --genotyping_bams_ids + GENOTYPING_BAMS_IDS + --filter_duplicate FILTER_DUPLICATE + --fragment_count FRAGMENT_COUNT + --maf MAF [--maq MAQ] [--omaf] + --output OUTPUT --ref_fasta + REF_FASTA [--vcf VCF] + [--generic_counting GENERIC_COUNTING] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --genotyping_bams GENOTYPING_BAMS + Input bam file + --genotyping_bams_ids GENOTYPING_BAMS_IDS + Input bam, sample identifier to be used for "Tumor + Sample Barcode" for maf or Sample name in the header + for vcf + --filter_duplicate FILTER_DUPLICATE + Whether to filter reads that are marked as duplicate. + 0=off, 1=on. Default 1 + --fragment_count FRAGMENT_COUNT + Whether to output fragment read counts DPF/RDF/ADF. + 0=off, 1=on. Default 0 + --maf MAF Input variant file in TCGA maf format. --maf or --vcf + need to be specified at least once. But --maf and + --vcf are mutually exclusive + --maq MAQ Mapping quality threshold. Default 20 + --omaf Output the result in maf format + --output OUTPUT Filename for output of raw fillout data in MAF/VCF + format + --ref_fasta REF_FASTA + Input reference sequence file + --vcf VCF Input variant file in vcf-like format(the first 5 + columns are used). --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually + exclusive + --generic_counting GENERIC_COUNTING + se the newly implemented generic counting algorithm. + Works better for complex variants. You may get + different allele count result from the default + counting algorithm +``` diff --git a/getbasecountsmultisample/1.2.5/container/Dockerfile b/getbasecountsmultisample/1.2.5/container/Dockerfile new file mode 100644 index 00000000..d8c10a6a --- /dev/null +++ b/getbasecountsmultisample/1.2.5/container/Dockerfile @@ -0,0 +1,50 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG GBCMS_VERSION=1.2.5 +ARG VCS_REF +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.revision=${VCS_REF} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.gbcms=${GBCMS_VERSION} \ + org.opencontainers.image.source="https://github.com/msk-access/GetBaseCountsMultiSample/releases/" + +LABEL org.opencontainers.image.description="This container uses Ubuntu 16.04 as the base image to build GetBaseCountsMultiSample version ${GBCMS_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget ca-certificates openssl gcc g++ make zlib1g-dev cmake libjsoncpp-dev && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget --no-check-certificate "https://github.com/msk-access/GetBaseCountsMultiSample/archive/refs/tags/${GBCMS_VERSION}.tar.gz" && \ + tar xzvf /usr/src/${GBCMS_VERSION}.tar.gz && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ + rm -r build/ && \ + mkdir build && \ + cd build/ && \ + cmake -DCMAKE_CXX_FLAGS=-std=c++03 .. && \ + make && \ + make install && \ + cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/ && \ + make && \ + cp GetBaseCountsMultiSample /usr/local/bin/ + diff --git a/getbasecountsmultisample/1.2.5/example_input.yaml b/getbasecountsmultisample/1.2.5/example_input.yaml new file mode 100644 index 00000000..5aa7ca49 --- /dev/null +++ b/getbasecountsmultisample/1.2.5/example_input.yaml @@ -0,0 +1,27 @@ +filter_duplicate: 0 +fragment_count: 1 +generic_counting: true +genotyping_bams: + - class: File + path: /Users/shahr2/Documents/test_reference/bam/duplex/SeraCare_0-5.bam + - class: File + path: /Users/shahr2/Documents/test_reference/bam/SeraCare_0-5.bam +genotyping_bams_ids: + - test1 + - test2 +maf: + class: File + path: >- + /Users/shahr2/Downloads/SeraCare_0-5.F22.combined-variants.vep_keptrmv_taggedHotspots.maf +maq: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +omaf: true +output: test.maf +ref_fasta: + class: File + path: >- + /Users/shahr2/Documents/test_reference/reference/versions/hg19/Homo_sapiens_assembly19.fasta +vcf: null + diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl new file mode 100644 index 00000000..1a073fac --- /dev/null +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -0,0 +1,173 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: getbasecountsmultisample_1_2_5 +baseCommand: + - GetBaseCountsMultiSample +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: genotyping_bams + type: + - File + - type: array + items: File + doc: Input bam file + - id: genotyping_bams_ids + type: + - string + - type: array + items: string + doc: >- + Input bam, sample identifier to be used for "Tumor Sample Barcode" for maf + or Sample name in the header for vcf + - id: filter_duplicate + type: int + inputBinding: + position: 0 + prefix: '--filter_duplicate' + doc: >- + Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default + 1 + - id: fragment_count + type: int + inputBinding: + position: 0 + prefix: '--fragment_count' + doc: 'Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0' + - id: maf + type: File + inputBinding: + position: 0 + prefix: '--maf' + doc: >- + Input variant file in TCGA maf format. --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually exclusive + - id: maq + type: int? + inputBinding: + position: 0 + prefix: '--maq' + doc: Mapping quality threshold. Default 20 + - id: omaf + type: boolean? + inputBinding: + position: 0 + prefix: '--omaf' + doc: Output the result in maf format + - id: output + type: string? + inputBinding: + position: 0 + prefix: '--output' + valueFrom: |- + ${ + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') + } else { + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') + } + } + doc: Filename for output of raw fillout data in MAF/VCF format + - id: ref_fasta + type: File + inputBinding: + position: 0 + prefix: '--fasta' + doc: Input reference sequence file + - id: vcf + type: File? + inputBinding: + position: 0 + prefix: '--vcf' + doc: >- + Input variant file in vcf-like format(the first 5 columns are used). --maf + or --vcf need to be specified at least once. But --maf and --vcf are + mutually exclusive + - id: generic_counting + type: boolean? + inputBinding: + position: 0 + prefix: '--generic_counting' + doc: >- + Use the newly implemented generic counting algorithm. Works better for + complex variants. You may get different allele count result from the + default counting algorithm +outputs: + - id: fillout + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') + } else { + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') + } + } +label: getbasecountsmultisample_1.2.5 +arguments: + - position: 0 + prefix: '' + shellQuote: false + valueFrom: | + $('--bam_fof bam_fof.tsv') + - position: 0 + prefix: '--thread' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gbcms:1.2.5' + - class: InitialWorkDirRequirement + listing: + - entryname: bam_fof.tsv + entry: |- + ${ + if (typeof(inputs.genotyping_bams_ids) == 'object') { + return inputs.genotyping_bams_ids.map(function(sid, i) { + return sid + "\t" + + inputs.genotyping_bams[i].path + }).join("\n") + } else { + return inputs.genotyping_bams_ids + "\t" + inputs.genotyping_bams.path + "\n" + } + } + writable: false + - class: InlineJavascriptRequirement + - class: StepInputExpressionRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': GetBaseCountsMultiSample + 'doap:revision': 1.2.5 diff --git a/manta_1.5.1/container/Dockerfile b/manta_1.5.1/container/Dockerfile new file mode 100644 index 00000000..f1171f34 --- /dev/null +++ b/manta_1.5.1/container/Dockerfile @@ -0,0 +1,83 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG VCS_REF +ARG UBUNTU_VERSION=16.04 +ARG LICENSE="Apache-2.0" +ARG MANTA_VERSION=1.5.1 +ARG SAMTOOLS_VERSION=1.9 +ARG HTSLIB_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.label-schema.vcs-ref=${VCS_REF} \ + org.label-schema.vcs-url="https://github.com/msk-access/cwl-commandlinetools" \ + org.opencontainers.image.version.manta=${MANTA_VERSION} \ + org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} \ + org.opencontainers.image.version.samtools=${HTSLIB_VERSION} \ + org.opencontainers.image.version.ubuntu=${UBUNTU_VERSION} \ + org.opencontainers.image.source.manta="https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2" \ + org.opencontainers.image.source.samtools=" https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2" \ + org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2" + +LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ + manta version ${MANTA_VERSION}, \ + samtools version ${SAMTOOLS_VERSION} and \ + htslib version ${HTSLIB_VERSION}" + +################## INSTALL ########################## + + +#UBUNTU +RUN apt-get update \ + && apt install -y g++ libbz2-dev liblzma-dev make ncurses-dev wget zlib1g-dev libcurl4-openssl-dev wget python bzip2 \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#MANTA +RUN apt-get update \ + && wget https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && tar -jxvf manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64/share/demo \ + && ln -s $PWD/manta-${MANTA_VERSION}.centos6_x86_64 /usr/local/bin/manta \ + && cp $PWD/manta-${MANTA_VERSION}.centos6_x86_64/libexec/convertInversion.py /usr/local/bin \ + && chmod +x /usr/local/bin/* \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#SAMTOOLS +RUN apt-get update && apt-get install -y \ + && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd htslib-${HTSLIB_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && cd ../.. \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd samtools-${SAMTOOLS_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && rm -r /htslib-${HTSLIB_VERSION}/test \ + && cp -r /htslib-${HTSLIB_VERSION}/* /usr/local/bin \ + && rm -r /samtools-${SAMTOOLS_VERSION}/test \ + && cp -r /samtools-${SAMTOOLS_VERSION}/* /usr/local/bin \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* diff --git a/manta_1.5.1/example_inputs.yaml b/manta_1.5.1/example_inputs.yaml new file mode 100644 index 00000000..bd82b9b3 --- /dev/null +++ b/manta_1.5.1/example_inputs.yaml @@ -0,0 +1,10 @@ +call_regions: null +generateEvidenceBam: true +memory_overhead: null +memory_per_job: null +non_wgs: true +normal_bam: /path/to/normal_bam +number_of_threads: null +output_contigs: true +reference_fasta: /path/to/reference_fasta +tumor_bam: /path/to/tumor_bam diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl new file mode 100644 index 00000000..9c07140f --- /dev/null +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -0,0 +1,177 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: manta_1_5_1 +baseCommand: + - /usr/bin/python + - /usr/local/bin/manta/bin/configManta.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: call_regions + type: File? + inputBinding: + position: -5 + prefix: '--callRegions' + doc: >- + bgzip-compressed, tabix-indexed BED file specifiying regions to which + variant analysis will be restricted + secondaryFiles: + - .tbi + - id: non_wgs + type: boolean? + inputBinding: + position: -6 + prefix: '--exome' + doc: toggles on settings for WES + - id: normal_bam + type: File? + inputBinding: + position: -2 + prefix: '--normalBam' + doc: >- + Normal sample BAM or CRAM file. May be specified more than once, multiple + inputs will be treated as each BAM file representing a different sample. + [optional] (no default) + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: output_contigs + type: boolean? + inputBinding: + position: -7 + prefix: '--outputContig' + doc: >- + if true, outputs assembled contig sequences in final VCF files, in the + INFO field CONTIG + - id: reference_fasta + type: File + inputBinding: + position: -4 + prefix: '--referenceFasta' + doc: 'samtools-indexed reference fasta file [required]' + secondaryFiles: + - .fai + - id: tumor_bam + type: File + inputBinding: + position: -3 + prefix: '--tumorBam' + doc: Tumor sample BAM or CRAM file. Only up to one tumor bam file accepted. + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: generateEvidenceBam + type: boolean? + inputBinding: + position: -8 + prefix: '--generateEvidenceBam' + separate: false + doc: Generate a bam of supporting reads for all SVs +outputs: + - id: manta_all_candidates_vcf_gz + type: File + outputBinding: + glob: results/variants/candidateSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_diploid_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/diploidSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_small_candidates_vcf_gz + type: File + outputBinding: + glob: results/variants/candidateSmallIndels.vcf.gz + secondaryFiles: + - .tbi + - id: manta_somatic_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/somaticSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_tumor_only_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/tumorSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_evidence_bams + type: 'File[]?' + outputBinding: + glob: |- + ${ if(inputs.generateEvidenceBam){ + return 'results/evidence/*.bam' + } + } + secondaryFiles: + - .bai +doc: Setup and execute Manta 1.51 +label: manta_1.5.1 +arguments: + - position: -1 + prefix: '--runDir' + valueFrom: $(runtime.outdir) + - position: 0 + shellQuote: false + valueFrom: '&&' + - /usr/bin/python + - runWorkflow.py + - '-m' + - local + - position: 1 + prefix: '-j' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 24000 + coresMin: 12 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/manta:1.5.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': manta + 'doap:revision': 1.5.1 diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md deleted file mode 100644 index eba9e12c..00000000 --- a/marianas_collapsing_first_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index df5a19f0..024f9b53 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_first_pass_cwl baseCommand: - java @@ -44,19 +45,19 @@ inputs: - .fai - ^.dict outputs: - - id: first_pass_output_file + - id: marianas_first_pass_output_file type: File outputBinding: glob: first-pass.txt - - id: alt_allele_file + - id: marianas_first_pass_alt_allele_file type: File outputBinding: glob: first-pass-alt-alleles.txt - - id: first_pass_insertions + - id: marianas_first_pass_insertions type: File outputBinding: glob: first-pass-insertions.txt - - id: first_pass_output_dir + - id: marianas_first_pass_output_dir type: Directory outputBinding: glob: . @@ -123,21 +124,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md deleted file mode 100644 index b3cdf8d7..00000000 --- a/marianas_collapsing_second_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 7975c3c2..4ec6b3ac 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_second_pass_cwl baseCommand: - java @@ -48,19 +49,19 @@ inputs: inputBinding: position: 9 outputs: - - id: collapsed_fastq_1 + - id: marianas_second_pass_collapsed_fastq_1 type: File outputBinding: glob: collapsed_R1_.fastq - - id: collapsed_fastq_2 + - id: marianas_second_pass_collapsed_fastq_2 type: File outputBinding: glob: collapsed_R2_.fastq - - id: second_pass_alt_alleles + - id: marianas_second_pass_alt_alleles type: File outputBinding: glob: second-pass-alt-alleles.txt - - id: second_pass_insertions + - id: marianas_second_pass_insertions type: File outputBinding: glob: second-pass-insertions.txt @@ -127,21 +128,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md deleted file mode 100644 index a5b4e900..00000000 --- a/marianas_process_loop_umi_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/container/Dockerfile b/marianas_process_loop_umi_1.8.1/container/Dockerfile index 343ec6a6..ef4d2eff 100644 --- a/marianas_process_loop_umi_1.8.1/container/Dockerfile +++ b/marianas_process_loop_umi_1.8.1/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.marianas=${MARIANAS_VERSION} \ - org.opencontainers.image.source.marianas="https://github.com/mskcc/Marianas/releases/" + org.opencontainers.image.source="https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to use Marianas version ${MARIANAS_VERSION}" @@ -30,5 +30,5 @@ ENV MARIANAS_VERSION 1.8.1 WORKDIR /usr/src/ -RUN wget https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar -RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar +RUN wget https://github.com/mskcc/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar +RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 616ddb01..018aff9a 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_process_loop_umi_cwl baseCommand: - java @@ -30,19 +31,19 @@ inputs: inputBinding: position: 0 outputs: - - id: processed_fastq_1 + - id: marianas_process_loop_umi_processed_fastq_1 type: File outputBinding: glob: '$(inputs.fastq1.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: processed_fastq_2 + - id: marianas_process_loop_umi_processed_fastq_2 type: File outputBinding: glob: '$(inputs.fastq2.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: clipping_info + - id: marianas_process_loop_umi_clipping_info type: File outputBinding: glob: info.txt - - id: composite_umi_frequencies + - id: marianas_process_loop_umi_composite_umi_frequencies type: File outputBinding: glob: composite-umi-frequencies.txt @@ -110,21 +111,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 8000 + inputs.memory_overhead -# } else { -# return 10000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/README.md b/marianas_separate_bams_1.8.1/README.md deleted file mode 100644 index 998ce5c6..00000000 --- a/marianas_separate_bams_1.8.1/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# CWL and Dockerfile for running Marianas - SeparateBams - -## Version of tools in docker image (../marianas_process_loop_umi_1.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl - [-h] --input_bam INPUT_BAM [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input_bam INPUT_BAM -``` diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index 64dd863f..4488cf91 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: marianas_separate_bams_1.8.1 + sbg: 'https://www.sevenbridges.com/' +id: marianas_separate_bams_1_8_1 baseCommand: - java inputs: @@ -13,13 +14,13 @@ inputs: inputBinding: position: 1 outputs: - - id: duplex-bam + - id: marianas_separate_bams_duplex_bam type: File outputBinding: glob: '*duplex.bam' secondaryFiles: - ^.bai - - id: simplex-bam + - id: marianas_separate_bams_simplex_bam type: File outputBinding: glob: '*simplex.bam' @@ -91,7 +92,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile new file mode 100644 index 00000000..86cb1d51 --- /dev/null +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -0,0 +1,39 @@ +################## BASE IMAGE ###################### + +FROM continuumio/miniconda3:4.7.12 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG MINICONDA3_VERSION=4.7.12 +ARG LICENSE="Apache-2.0" +ARG MERGE_FASTQ_VERSION=0.1.7 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.miniconda3=${MINICONDA3_VERSION} \ + org.opencontainers.image.version.merge_fastq=${MERGE_FASTQ_VERSION} \ + org.opencontainers.image.source.merge_fastq="https://pypi.org/project/merge-fastq/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses miniconda ${MINICONDA3_VERSION} as the base image to build \ + merge fastq version ${MERGE_FASTQ_VERSION}" + +################## INSTALL ########################## + +#Ubuntu +RUN conda create --name merge_fastq python=3.7 && \ + echo "source activate merge_fastq" > ~/.bashrc + +#Set the path of environment as default +ENV PATH /opt/conda/envs/merge_fastq/bin:$PATH + +#MERGE_FASTQ +RUN pip install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} diff --git a/merge_fastq_0.1.7/example_inputs.yaml b/merge_fastq_0.1.7/example_inputs.yaml new file mode 100644 index 00000000..31e86cce --- /dev/null +++ b/merge_fastq_0.1.7/example_inputs.yaml @@ -0,0 +1,20 @@ +fastq1: + - class: File + path: >- + /path/to/R1_001.fastq.gz + - class: File + path: >- + /path/to/R1_002.fastq.gz +fastq2: + - class: File + path: >- + /path/to/R2_001.fastq.gz + - class: File + path: >- + /path/to/R2_002.fastq.gz +memory_overhead: null +memory_per_job: null +number_of_threads: null +out_fastq1_name: null +out_fastq2_name: null +output_path: null diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl new file mode 100644 index 00000000..cad51a26 --- /dev/null +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -0,0 +1,113 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: merge_fastq_0_1_7 +baseCommand: + - merge_fastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: fastq1 + type: + type: array + items: File + inputBinding: + prefix: '--fastq1' + inputBinding: + position: 0 + doc: >- + Full path to gziped READ1 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + - id: fastq2 + type: + type: array + items: File + inputBinding: + prefix: '--fastq2' + inputBinding: + position: 1 + doc: >- + Full path to gziped READ2 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + - id: output_path + type: string? + inputBinding: + position: 2 + prefix: '--output-path' + doc: 'Full path to write the output files (default: Current working directory)' + - id: out_fastq1_name + type: string? + inputBinding: + position: 2 + prefix: '--out-fastq1' + doc: >- + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + - id: out_fastq2_name + type: string? + inputBinding: + position: 2 + prefix: '--out-fastq2' + doc: >- + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) +outputs: + - id: merge_fastq_1 + type: File + outputBinding: + glob: |- + ${ + if(inputs.out_fastq1_name){ + return inputs.out_fastq1_name + } else { + return 'merged_fastq_R1.fastq.gz' + } + } + - id: merge_fastq_2 + type: File + outputBinding: + glob: |- + ${ + if(inputs.out_fastq2_name){ + return inputs.out_fastq2_name + } else { + return 'merged_fastq_R2.fastq.gz' + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/merge_fastq:0.1.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': merge_fastq + 'doap:revision': 0.1.7 diff --git a/mosdepth_0.3.3/example_inputs.yml b/mosdepth_0.3.3/example_inputs.yml new file mode 100644 index 00000000..540ecf3c --- /dev/null +++ b/mosdepth_0.3.3/example_inputs.yml @@ -0,0 +1,10 @@ +bed: + class: File + path: "/path/to/bed" +input_bam: +- class: File + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam/index" +prefix: "sample_prefix" \ No newline at end of file diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl new file mode 100644 index 00000000..7d46c539 --- /dev/null +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -0,0 +1,137 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mosdepth_0_3_3 +baseCommand: + - mosdepth +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: bed + type: File? + inputBinding: + position: 0 + prefix: '-b' + shellQuote: false + doc: optional BED file or (integer) window-sizes. + - id: chrom + type: File? + inputBinding: + position: 0 + prefix: '-c' + shellQuote: false + doc: chromosome to restrict depth calculation. + - id: input_bam + type: + - File + - type: array + items: File + doc: Required list of input bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: prefix + type: string? + doc: Prefix for the output files + - id: flag + type: int? + inputBinding: + position: 0 + prefix: '-F' + doc: exclude reads with any of the bits in FLAG set + - id: mapq + type: int? + inputBinding: + position: 0 + prefix: '-Q' + doc: mapping quality threshold. reads with a mapping quality less than this are ignored +outputs: + - id: per_base_bed + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.per-base.bed.gz' + } else { + return 'per-base.bed.gz' + } + } + - id: per_region_bed + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.regions.bed.gz' + } else { + return 'regions.bed.gz' + } + } + - id: global_distribution + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.global.dist.txt' + } else { + return 'mosdepth.global.dist.txt' + } + } + - id: region_distribution + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.region.dist.txt' + } else { + return 'mosdepth.region.dist.txt' + } + } +doc: 'fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.' +label: mosdepth_0.3.3 +arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) + - position: 99 + valueFrom: $(inputs.prefix) + - position: 100 + valueFrom: $(inputs.input_bam) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/mosdepth:0.3.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charalk@mskcc.org' + 'foaf:name': Carmelina + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mosdepth + 'doap:revision': 0.3.3 diff --git a/msisensor_0.2/msisensor-0.2.cwl b/msisensor_0.2/msisensor-0.2.cwl index 14757a55..d7edc89e 100644 --- a/msisensor_0.2/msisensor-0.2.cwl +++ b/msisensor_0.2/msisensor-0.2.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/msisensor_0.6/msisensor-0.6.cwl b/msisensor_0.6/msisensor-0.6.cwl index d0436482..e2f26ac6 100644 --- a/msisensor_0.6/msisensor-0.6.cwl +++ b/msisensor_0.6/msisensor-0.6.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/multiqc/1.10.1.7/README.md b/multiqc/1.10.1.7/README.md new file mode 100644 index 00000000..02fffc9c --- /dev/null +++ b/multiqc/1.10.1.7/README.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.10.1.7 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1.7/multiqc.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner multiqc.cwl --helpusage: multiqc_1.10.1.cwl [-h] + [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY] + [--qc_files_dir QC_FILES_DIR] + [--qc_list_of_dirs QC_LIST_OF_DIRS] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY + qc files which shall be part of the multiqc summary; + optional, only one of qc_files_array or + qc_files_array_of_array must be provided + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --qc_list_of_dirs QC_LIST_OF_DIRS + qc files in multiple directories + --report_name REPORT_NAME + name used for the html report + --config CONFIG +``` diff --git a/multiqc/1.10.1.7/example_inputs.json b/multiqc/1.10.1.7/example_inputs.json new file mode 100644 index 00000000..87e4b59e --- /dev/null +++ b/multiqc/1.10.1.7/example_inputs.json @@ -0,0 +1,20 @@ +{ + "config": { + "class": "File", + "path": "/path_to/config.yaml" + }, + "qc_files_array": null, + "qc_files_array_of_array": [], + "qc_files_dir": null, + "qc_list_of_dirs": [ + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_a" + }, + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_b" + } + ], + "report_name": null +} diff --git a/multiqc/1.10.1.7/multiqc.cwl b/multiqc/1.10.1.7/multiqc.cwl new file mode 100644 index 00000000..1f308ad2 --- /dev/null +++ b/multiqc/1.10.1.7/multiqc.cwl @@ -0,0 +1,70 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - multiqc +arguments: + - valueFrom: . + position: 999 +inputs: + - id: qc_files_dir + type: Directory? + doc: | + qc files in a Directory + - default: multiqc_1.10.1.7 + id: report_name + type: string + inputBinding: + position: 0 + prefix: '--filename' + doc: name used for the html report and the corresponding zip file + - id: config + type: File? + inputBinding: + position: 0 + prefix: '--config' +outputs: + - id: multiqc_output_dir + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = inputs.report_name.replace('.html', ''); + return self[0] + } + - id: multiqc_html + type: File + outputBinding: + glob: |- + ${ + if (inputs.report_name) { + return inputs.report_name + ".html" + } else { + return "multiqc_1.10.1.7.html" + } + } + - id: multiqc_zip + type: File? + outputBinding: + glob: $(inputs.report_name)_data.zip +doc: | + Run multiqc on log files from supported bioinformatic tools. +hints: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.7' +requirements: + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.qc_files_dir) + writable: true + - class: InlineJavascriptRequirement diff --git a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl index a1838139..abcd85ac 100644 --- a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl +++ b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl @@ -42,7 +42,7 @@ inputs: position: 4 prefix: '--normal-sample' outputs: - - id: output + - id: mutect2_vcf_gz type: File? outputBinding: glob: >- diff --git a/mutect_1.1.5/container/Dockerfile b/mutect_1.1.5/container/Dockerfile new file mode 100644 index 00000000..6a87b817 --- /dev/null +++ b/mutect_1.1.5/container/Dockerfile @@ -0,0 +1,31 @@ +################## BASE IMAGE ###################### +FROM openjdk:7 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=0.1.0 +ARG MUTECT_VERSION=1.1.5 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Yu Hu" + +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.mutect=${MUTECT_VERSION} +LABEL org.opencontainers.image.source="https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip" + +LABEL org.opencontainers.image.description="This container uses java7 as the base image to build mutect version ${MUTECT_VERSION}" + +################## INSTALL ########################## + +ENV MUTECT_VERSION=${MUTECT_VERSION} + +#Get Mutect +RUN wget https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip +RUN unzip muTect-${MUTECT_VERSION}-bin.zip +RUN rm muTect-${MUTECT_VERSION}-bin.zip +RUN chmod a+x muTect-${MUTECT_VERSION}.jar +RUN mv muTect-${MUTECT_VERSION}.jar /usr/local/bin/muTect.jar + diff --git a/mutect_1.1.5/example_inputs.yaml b/mutect_1.1.5/example_inputs.yaml new file mode 100644 index 00000000..cd5ea18b --- /dev/null +++ b/mutect_1.1.5/example_inputs.yaml @@ -0,0 +1,32 @@ +input_file_normal: + class: File + path: /path/to/normal/bam/file +input_file_tumor: + class: File + path: /path/to/tumor/bam/file +normal_sample_name: normalsamplename +tumor_sample_name: tumorsamplename + +read_filter: BadCigar +downsample_to_coverage: 50000 +fraction_contamination: 0.0005 +minimum_mutation_cell_fraction: 0.0005 + +cosmic: + class: File + path: /path/to/cosmic/coding/mutations/file +dbsnp: + class: File + path: /path/to/dbsnp/vcf/file +intervals: + class: File + path: /path/to/canonicaltargets/bed/file +reference_sequence: + class: File + path: /path/to/genomic/reference/file + +out: output.mutect.txt +vcf: output.mutect.vcf + +tmp_dir: /path/to/tmp/dir + diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl new file mode 100644 index 00000000..456c328a --- /dev/null +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -0,0 +1,739 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mutect_v1_1_5 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: BQSR + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--BQSR' + doc: >- + The input covariates table file which enables on-the-fly base quality + score recalibration + - id: absolute_copy_number_data + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--absolute_copy_number_data' + doc: >- + Absolute Copy Number Data, as defined by Absolute, to use in power + calculations + - id: arg_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--arg_file' + doc: Reads arguments from the specified file + - id: bam_tumor_sample_name + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--bam_tumor_sample_name' + doc: >- + if the tumor bam contains multiple samples, only use read groups with SM + equal to this value + - id: baq + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--baq' + doc: >- + Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + - id: baqGapOpenPenalty + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--baqGapOpenPenalty' + doc: >- + BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps + better for whole genome call sets + - id: clipping_bias_pvalue_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--clipping_bias_pvalue_threshold' + doc: >- + pvalue threshold for fishers exact test of clipping bias in mutant reads + vs ref reads + - id: cosmic + type: File? + inputBinding: + position: 0 + prefix: '--cosmic' + doc: VCF file of COSMIC sites + secondaryFiles: + - ^.vcf.idx + - id: coverage_20_q20_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--coverage_20_q20_file' + doc: write out 20x of Q20 coverage in WIGGLE format to this file + - id: coverage_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--coverage_file' + doc: write out coverage in WIGGLE format to this file + - id: dbsnp + type: File? + inputBinding: + position: 0 + prefix: '--dbsnp' + doc: VCF file of DBSNP information + secondaryFiles: + - ^.vcf.idx + - id: dbsnp_normal_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--dbsnp_normal_lod' + doc: LOD threshold for calling normal non-variant at dbsnp sites + - id: defaultBaseQualities + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--defaultBaseQualities' + doc: >- + If reads are missing some or all base quality scores, this value will be + used for all base quality scores + - default: false + id: disableRandomization + type: boolean? + inputBinding: + position: 0 + prefix: '--disableRandomization' + doc: >- + Completely eliminates randomization from nondeterministic methods. To be + used mostly in the testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + - default: false + id: disable_indel_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_indel_quals' + doc: >- + If true, disables printing of base insertion and base deletion tags (with + -BQSR) + - id: downsample_to_coverage + type: int? + inputBinding: + position: 0 + prefix: '--downsample_to_coverage' + doc: Target coverage threshold for downsampling to coverage + - default: NONE + id: downsampling_type + type: string? + inputBinding: + position: 0 + prefix: '--downsampling_type' + doc: >- + Type of reads downsampling to employ at a given locus. Reads will be + selected randomly to be removed from the pile based on the method + described here (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all possible reads at a locus + - default: false + id: emit_original_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--emit_original_quals' + doc: >- + If true, enables printing of the OQ tag with the original base qualities + (with -BQSR) + - default: true + id: enable_extended_output + type: boolean + inputBinding: + position: 0 + prefix: '--enable_extended_output' + - id: excludeIntervals + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--excludeIntervals' + doc: >- + One or more genomic intervals to exclude from processing. Can be + explicitly specified on the command line or in a file (including a rod + file) + - default: false + id: filter_mismatching_base_and_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--filter_mismatching_base_and_quals' + doc: >- + if a read has mismatching number of bases and base qualities, filter out + the read instead of blowing up. + - default: false + id: force_alleles + type: boolean? + inputBinding: + position: 0 + prefix: '--force_alleles' + doc: force output for all alleles at each site + - default: false + id: force_output + type: boolean? + inputBinding: + position: 0 + prefix: '--force_output' + doc: force output for each site + - id: fraction_contamination + type: float? + inputBinding: + position: 0 + prefix: '--fraction_contamination' + doc: >- + estimate of fraction (0-1) of physical contamination with other unrelated + samples + - id: fraction_mapq0_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--fraction_mapq0_threshold' + doc: >- + threshold for determining if there is relatedness between the alt and ref + allele read piles + - id: gap_events_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--gap_events_threshold' + doc: >- + how many gapped events (ins/del) are allowed in proximity to this + candidate + - id: gatk_key + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--gatk_key' + doc: >- + GATK Key file. Required if running with -et NO_ET. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + - id: heavily_clipped_read_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--heavily_clipped_read_fraction' + doc: >- + if this fraction or more of the bases in a read are soft/hard clipped, do + not use this read for mutation calling + - id: initial_tumor_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--initial_tumor_lod' + doc: Initial LOD threshold for calling tumor variant + - id: input_file_normal + type: File? + inputBinding: + position: 0 + prefix: '--input_file:normal' + doc: SAM or BAM file(s) + secondaryFiles: + - ^.bai + - id: input_file_tumor + type: File? + inputBinding: + position: 0 + prefix: '--input_file:tumor' + doc: SAM or BAM file(s) + secondaryFiles: + - ^.bai + - id: interval_merging + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_merging' + doc: >- + Indicates the interval merging rule we should use for abutting intervals + (ALL| OVERLAPPING_ONLY) + - id: interval_padding + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_padding' + doc: >- + Indicates how many basepairs of padding to include around each of the + intervals specified with the -L/ + - id: interval_set_rule + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_set_rule' + doc: >- + Indicates the set merging approach the interval parser should use to + combine the various -L or -XL inputs (UNION| INTERSECTION) + - id: intervals + type: + - string + - File + - 'null' + inputBinding: + position: 0 + prefix: '--intervals' + doc: >- + One or more genomic intervals over which to operate. Can be explicitly + specified on the command line or in a file (including a rod file) + - default: false + id: keep_program_records + type: boolean? + inputBinding: + position: 0 + prefix: '--keep_program_records' + doc: >- + Should we override the Walkers default and keep program records from the + SAM header + - id: log_to_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--log_to_file' + doc: Set the logging location + - id: logging_level + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--logging_level' + doc: >- + Set the minimum level of logging, i.e. setting INFO gets you INFO up to + FATAL, setting ERROR gets you ERROR and FATAL level logging. + - id: maxRuntime + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--maxRuntime' + doc: >- + If provided, that GATK will stop execution cleanly as soon after + maxRuntime has been exceeded, truncating the run but not exiting with a + failure. By default the value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + - id: maxRuntimeUnits + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--maxRuntimeUnits' + doc: >- + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + - id: max_alt_allele_in_normal_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_allele_in_normal_fraction' + doc: threshold for maximum alternate allele fraction in normal + - id: max_alt_alleles_in_normal_count + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_alleles_in_normal_count' + doc: threshold for maximum alternate allele counts in normal + - id: max_alt_alleles_in_normal_qscore_sum + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_alleles_in_normal_qscore_sum' + doc: threshold for maximum alternate allele quality score sum in normal + - id: min_qscore + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--min_qscore' + doc: threshold for minimum base quality score + - id: minimum_mutation_cell_fraction + type: float? + inputBinding: + position: 0 + prefix: '--minimum_mutation_cell_fraction' + doc: >- + minimum fraction of cells which are presumed to have a mutation, used to + handle non-clonality and contamination + - id: minimum_normal_allele_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--minimum_normal_allele_fraction' + doc: >- + minimum allele fraction to be considered in normal, useful for normal + sample contaminated with tumor + - default: false + id: monitorThreadEfficiency + type: boolean? + inputBinding: + position: 0 + prefix: '--monitorThreadEfficiency' + doc: Enable GATK threading efficiency monitoring + - default: false + id: nonDeterministicRandomSeed + type: boolean? + inputBinding: + position: 0 + prefix: '--nonDeterministicRandomSeed' + doc: >- + Makes the GATK behave non deterministically, that is, the random numbers + generated will be different in every run + - default: false + id: noop + type: boolean? + inputBinding: + position: 0 + prefix: '--noop' + doc: 'used for debugging, basically exit as soon as we get the reads' + - id: normal_depth_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--normal_depth_file' + doc: write out normal read depth in WIGGLE format to this file + - id: normal_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--normal_lod' + doc: LOD threshold for calling normal non-germline + - id: normal_sample_name + type: string + inputBinding: + position: 0 + prefix: '--normal_sample_name' + doc: name to use for normal in output files + - id: num_bam_file_handles + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--num_bam_file_handles' + doc: The total number of BAM file handles to keep open simultaneously + - id: num_cpu_threads_per_data_thread + type: string? + inputBinding: + position: 0 + prefix: '--num_cpu_threads_per_data_thread' + doc: >- + How many CPU threads should be allocated per data thread to running this + analysis? + - id: num_threads + type: string? + inputBinding: + position: 0 + prefix: '--num_threads' + doc: How many data threads should be allocated to running this analysis. + - default: false + id: only_passing_calls + type: boolean? + inputBinding: + position: 0 + prefix: '--only_passing_calls' + doc: only emit passing calls + - default: mutect_variants.txt + id: out + type: string? + inputBinding: + position: 0 + prefix: '--out' + doc: Call-stats output + - id: pedigree + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigree' + doc: Pedigree files for samples + - id: pedigreeString + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigreeString' + doc: Pedigree string for samples + - id: pedigreeValidationType + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigreeValidationType' + doc: >- + How strict should we be in validating the pedigree information? + (STRICT|SILENT) + - id: performanceLog + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--performanceLog' + doc: 'If provided, a GATK runtime performance log will be written to this file' + - id: phone_home + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--phone_home' + doc: >- + What kind of GATK run report should we generate? STANDARD is the default, + can be NO_ET so nothing is posted to the run repository. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + - id: pir_mad_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pir_mad_threshold' + doc: threshold for clustered read position artifact MAD + - id: pir_median_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pir_median_threshold' + doc: threshold for clustered read position artifact median + - id: power_constant_af + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_constant_af' + doc: Allelic fraction constant to use in power calculations + - id: power_constant_qscore + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_constant_qscore' + doc: Phred scale quality score constant to use in power calculations + - id: power_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_file' + doc: write out power in WIGGLE format to this file + - id: preserve_qscores_less_than + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--preserve_qscores_less_than' + doc: >- + Bases with quality scores less than this threshold wont be recalibrated + (with -BQSR) + - id: read_buffer_size + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--read_buffer_size' + doc: Number of reads per SAM file to buffer in memory + - id: read_filter + type: string? + inputBinding: + position: 0 + prefix: '--read_filter' + doc: Specify filtration criteria to apply to each read individually + - id: read_group_black_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--read_group_black_list' + doc: >- + Filters out read groups matching - or a .txt file containing + the filter strings one per line. + - id: reference_sequence + type: File + inputBinding: + position: 0 + prefix: '--reference_sequence' + secondaryFiles: + - .fai + - ^.dict + - default: false + id: remove_program_records + type: boolean? + inputBinding: + position: 0 + prefix: '--remove_program_records' + doc: >- + Should we override the Walkers default and remove program records from the + SAM header + - default: false + id: required_maximum_alt_allele_mapping_quality_score + type: boolean? + inputBinding: + position: 0 + prefix: '--required_maximum_alt_allele_mapping_quality_score' + doc: >- + required minimum value for + tumor alt allele + maximum mapping quality score + - default: false + id: somatic_classification_normal_power_threshold + type: boolean? + inputBinding: + position: 0 + prefix: '--somatic_classification_normal_power_threshold' + doc: >- + Power threshold for normal to + determine germline vs + variant + - id: tag + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tag' + doc: >- + Arbitrary tag string to identify this GATK run as part of a group of runs, + for later analysis + - id: tumor_depth_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_depth_file' + doc: write out tumor read depth in WIGGLE format to this file + - id: tumor_f_pretest + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_f_pretest' + doc: >- + for computational efficiency, reject sites with allelic fraction below + this threshold + - id: tumor_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_lod' + doc: LOD threshold for calling tumor variant + - id: tumor_sample_name + type: string + inputBinding: + position: 0 + prefix: '--tumor_sample_name' + doc: name to use for tumor in output files + - id: unsafe + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--unsafe' + doc: >- + If set, enables unsafe operations - nothing will be checked at runtime. + For expert users only who know what they are doing. We do not support + usage of this argument. (ALLOW_UNINDEXED_BAM| ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + - default: false + id: useOriginalQualities + type: boolean? + inputBinding: + position: 0 + prefix: '--useOriginalQualities' + doc: >- + If set, use the original base quality scores from the OQ tag when present + instead of the standard scores + - id: validation_strictness + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--validation_strictness' + doc: How strict should we be with validation (STRICT|LENIENT|SILENT) + - default: mutect_variants.vcf + id: vcf + type: string? + inputBinding: + position: 0 + prefix: '--vcf' + doc: VCF output of mutation candidates +outputs: + - id: mutect_output_txt + type: File? + outputBinding: + glob: $(inputs.out) + - id: mutext_output_vcf + type: File? + outputBinding: + glob: $(inputs.vcf) +arguments: + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx1G" + } + else { + return "-Xmx1G" + } + } + - '-XX:-UseGCOverheadLimit' + - '-jar' + - /usr/local/bin/muTect.jar + - '--analysis_type' + - MuTect +requirements: + - class: ResourceRequirement + ramMin: 34000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/mutect:1.1.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:huy1@mskcc.org' + 'foaf:name': Yu Hu + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': MuTect + 'doap:revision': 1.1.5 diff --git a/octopus/0.7.4/example_input.yaml b/octopus/0.7.4/example_input.yaml new file mode 100644 index 00000000..70b44e10 --- /dev/null +++ b/octopus/0.7.4/example_input.yaml @@ -0,0 +1,21 @@ +error_models: null +input: + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-L001-d/current/C-0A8NCE-L001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-N001-d/current/C-0A8NCE-N001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam +normalId: C-0A8NCE-N001-d +output_file_name: oct.vcf +reference: + class: File + metadata: {} + path: >- + /juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta +skipRegions_file: null +skipRegions_singleEntry: null +somaticOnlyCalls: null +targettedCalling_file: null +targettedCalling_singleEntry: null +tumorOnlySample: null \ No newline at end of file diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl new file mode 100644 index 00000000..0c7d74c1 --- /dev/null +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -0,0 +1,123 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: octopus_0_7_4 +baseCommand: + - octopus +inputs: + - id: input + type: 'File[]' + inputBinding: + position: 0 + prefix: '-I' + secondaryFiles: ^.bai + doc: Tumor and normal bam files with .bai + - id: normalId + type: string? + inputBinding: + position: 0 + prefix: '-N' + doc: add the name of the normal sample + - id: tumorOnlySample + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + doc: mention this parameter if it is tumor only sample. + - id: somaticOnlyCalls + type: boolean? + inputBinding: + position: 0 + prefix: '--somatics-only' + doc: if somatics only call is required. Use this with -f ON parameter + - id: targettedCalling_singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-T' + doc: >- + list of regions to call variants from. + + eg 1. chr1: all of chr1. + + 2. chr2:10,000,000: the single position 10000000 in chr2. + + chr3:5,000,000-: everything from 3. chr3:5,000,000 onwards. + + 4. chr4:100,000,000-200,000,000: everything between chr4:100,000,000 and + chr4:200,000,000. The interval is half open so position chr4:200,000,000 + is not included. + - id: skipRegions_singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-K' + doc: to skip a set of regions + - id: targettedCalling_file + type: File? + inputBinding: + position: 0 + prefix: '-t' + doc: regions in a text or bed file + - id: skipRegions_file + type: File? + inputBinding: + position: 0 + prefix: '-k' + doc: regions in text or bed file format + - id: error_models + type: string? + inputBinding: + position: 0 + prefix: '--sequence-error-model' + doc: >- + error model will be in the format - [library preparation]<.sequencer> + eg: PCR.NOVASEQ + - id: reference + type: File + inputBinding: + position: 0 + prefix: '-R' + secondaryFiles: + - .fai + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: '-o' +outputs: + - id: outputVCF + type: File + outputBinding: + glob: '${ if (inputs.output) return inputs.output; return null; }' +label: octopus +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/octopus:0.7.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': octopus + 'doap:revision': 0.7.4 + diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 091ebc34..576e07ae 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_add_or_replace_read_groups_1_96 baseCommand: - java @@ -45,7 +46,7 @@ inputs: doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required - - id: read_group_sequnecing_center + - id: read_group_sequencing_center type: string inputBinding: position: 0 @@ -53,7 +54,7 @@ inputs: separate: false doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library - type: int + type: string inputBinding: position: 0 prefix: RGLB= @@ -94,13 +95,6 @@ inputs: prefix: RGDT= separate: false doc: 'Read Group run date Default value: null.' - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -133,8 +127,11 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_add_or_replace_read_groups_bam type: File outputBinding: glob: |- @@ -173,9 +170,30 @@ arguments: return "-Xmx15G" } } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: O= separate: false @@ -186,11 +204,12 @@ arguments: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..9c25bd7d --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml @@ -0,0 +1,20 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl new file mode 100644 index 00000000..715b53bc --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -0,0 +1,218 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: RGID= + separate: false + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequencing_center + type: string + inputBinding: + position: 0 + prefix: RGCN= + separate: false + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: string + inputBinding: + position: 0 + prefix: RGLB= + separate: false + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: RGPU= + separate: false + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: RGSM= + separate: false + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: RGPL= + separate: false + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: RGDS= + separate: false + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: RGDT= + separate: false + doc: 'Read Group run date Default value: null.' + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: picard_add_or_replace_read_groups_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_2.21.2 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..aa9cd8b0 --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml @@ -0,0 +1,22 @@ +bam_compression_level: +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl new file mode 100644 index 00000000..07d930e5 --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -0,0 +1,235 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: '-SO' + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: '--RGID' + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequencing_center + type: string + inputBinding: + position: 0 + prefix: '--RGCN' + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: string + inputBinding: + position: 0 + prefix: '--RGLB' + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: '--RGPU' + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: '--RGSM' + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: '--RGPL' + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: '--RGDS' + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: '--RGDT' + doc: 'Read Group run date Default value: null.' + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_add_or_replace_read_groups_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..f8a51739 --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml @@ -0,0 +1,26 @@ +assume_sorted: true +bam_compression_level: null +create_bam_index: null +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +max_insert_size: null +memory_overhead: null +memory_per_job: null +metrics_acciumulation_level: null +number_of_threads: null +output_file_name: null +reference_sequence: + class: File + metadata: {} + path: "/path/to/reference.fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +stop_after: null +tmp_dir: null +validation_stringency: null diff --git a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl new file mode 100644 index 00000000..24a6302c --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl @@ -0,0 +1,179 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: MAX_INSERT_SIZE= + separate: false + doc: >- + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. This option can be + set to 'null' to clear the default value. + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: reference_sequence + type: File + inputBinding: + position: 0 + prefix: R= + separate: false + doc: >- + Reference sequence file. Note that while this argument isn't required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.dict + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. +outputs: + - id: picard_collect_alignment_summary_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +label: picard_collect_alignment_summary_metrics_2.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectAlignmentSummaryMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 12000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index 23ceec08..0c98a858 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_collect_alignment_summary_metrics_2.8.1 + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2.21.2 baseCommand: - java inputs: @@ -95,7 +96,7 @@ inputs: Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. outputs: - - id: alignment_metrics + - id: picard_collect_alignment_summary_metrics_txt type: File outputBinding: glob: |- @@ -106,7 +107,7 @@ outputs: return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') } } -label: picard_collect_alignment_summary_metrics_2.8.1 +label: picard_collect_alignment_summary_metrics_2.21.2 arguments: - position: 0 valueFrom: |- @@ -137,7 +138,7 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + valueFrom: /usr/picard/picard.jar - position: 0 valueFrom: CollectAlignmentSummaryMetrics - position: 0 @@ -156,7 +157,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -175,4 +176,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 2.8.1 + 'doap:revision': 2.21.2 diff --git a/picard_collectmultiplemetric_2.21.2/example_inputs.yml b/picard_collectmultiplemetric_2.21.2/example_inputs.yml new file mode 100644 index 00000000..72207111 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/example_inputs.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "Sample.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: diff --git a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl new file mode 100644 index 00000000..0d841d74 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl @@ -0,0 +1,268 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collectmultiplemetrics_2.21.2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + separate: false + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + separate: false + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + separate: false + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + separate: false + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + separate: false + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + separate: false + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + separate: false + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: picard_collectmultiplemetrics_alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: picard_collectmultiplemetrics_error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: picard_collectmultiplemetrics_gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: picard_collectmultiplemetrics_insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: picard_collectmultiplemetrics_quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: picard_collectmultiplemetrics_quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' +label: picard_collectmultiplemetrices_2.21.2 +arguments: + - position: 0 + prefix: '' + separate: false + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + prefix: '' + separate: false + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_collectmultiplemetric_2.8.1/example_inputs.yml b/picard_collectmultiplemetric_2.8.1/example_inputs.yml new file mode 100644 index 00000000..72207111 --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/example_inputs.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "Sample.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl new file mode 100644 index 00000000..fa277b31 --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -0,0 +1,268 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collectmultiplemetrics_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + separate: false + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + separate: false + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + separate: false + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + separate: false + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + separate: false + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + separate: false + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + separate: false + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: picard_collectmultiplemetrics_alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: picard_collectmultiplemetrics_error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: picard_collectmultiplemetrics_gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: picard_collectmultiplemetrics_insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: picard_collectmultiplemetrics_quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: picard_collectmultiplemetrics_quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' +label: picard_collectmultiplemetrices_2.8.1 +arguments: + - position: 0 + prefix: '' + separate: false + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + prefix: '' + separate: false + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index cfc74168..d1b9c299 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -1,10 +1,12 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: + cwltool: 'http://commonwl.org/cwltool#' dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_fix_mate_information_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_1_96 baseCommand: - java inputs: @@ -27,8 +29,7 @@ inputs: - ^.bai - id: output_file_name type: string? - doc: >- - Output file name (bam or sam). Not Required + doc: Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -39,13 +40,6 @@ inputs: Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, coordinate} - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -78,8 +72,11 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- @@ -120,9 +117,30 @@ arguments: return "-Xmx15G" } } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: O= separate: false @@ -135,11 +153,12 @@ arguments: } } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_2.21.2/example_inputs.yaml b/picard_fix_mate_information_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..1d9e4ee2 --- /dev/null +++ b/picard_fix_mate_information_2.21.2/example_inputs.yaml @@ -0,0 +1,12 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl new file mode 100644 index 00000000..731652ae --- /dev/null +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -0,0 +1,166 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_2.21.2 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_fix_mate_information_2.9.0/README.md b/picard_fix_mate_information_2.9.0/README.md new file mode 100644 index 00000000..3485d823 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Picard - FixMateInformation + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.9.0.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_jobStore.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] [--sort_order SORT_ORDER] + [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/picard_fix_mate_information_2.9.0/container/Dockerfile b/picard_fix_mate_information_2.9.0/container/Dockerfile new file mode 100644 index 00000000..643fa70d --- /dev/null +++ b/picard_fix_mate_information_2.9.0/container/Dockerfile @@ -0,0 +1,44 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG JAVA_VERSION=8 +ARG LICENSE="Apache-2.0" +ARG PICARD_VERSION=2.9.0 +ARG R_VERSION="3.3.3" + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.picard=${PICARD_VERSION} \ + org.opencontainers.image.version.R=${R_VERSION} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.source.picard="https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" \ + org.opencontainers.image.source.R="r-base" + +LABEL org.opencontainers.image.description="This container uses openjdk ${JAVA_VERSION} as the base image to build \ + picard version ${PICARD_VERSION}, \ + R version ${R_VERSION}" + +# Install ant, git for building +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + git \ + unzip \ + wget \ + r-base && \ + apt-get clean autoclean && \ + apt-get autoremove -y + +WORKDIR /usr/src + +RUN wget "https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" && \ + cp -s /usr/src/picard.jar /usr/local/bin/ diff --git a/picard_fix_mate_information_2.9.0/example_inputs.yaml b/picard_fix_mate_information_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..91d8e497 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/example_inputs.yaml @@ -0,0 +1,5 @@ +create_bam_index: true +input: + class: File + path: "/path/to/sample_id.bam" +output_file_name: sample_id_fm.bam diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl new file mode 100644 index 00000000..f0f5eb7a --- /dev/null +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -0,0 +1,182 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + cwltool: 'http://commonwl.org/cwltool#' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_2.9.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 diff --git a/picard_fix_mate_information_4.1.8.1/example_inputs.yaml b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..a6581208 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml @@ -0,0 +1,14 @@ +bam_compression_level: +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl new file mode 100644 index 00000000..5140be34 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -0,0 +1,180 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_4_1_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: The input file to fix. This option may be specified 0 or more times + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: '-SO' + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx20G" + } + else { + return "-Xmx20G" + } + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 30000 + coresMin: 12 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/picard_hsmetrics_2.21.2/example_inputs.yaml b/picard_hsmetrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.21.2/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl new file mode 100644 index 00000000..10503349 --- /dev/null +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -0,0 +1,206 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2_21_2 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + doc: >- + An interval list file that contains the locations of the baits used. + Default value: null. This option must be specified at least 1 times. + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + doc: >- + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: + 20. This option can be set to 'null' to clear the default value. + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + doc: >- + Minimum base quality for a base to contribute coverage. Default value: 20. + This option can be set to 'null' to clear the default value. + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: + true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + doc: >- + An interval list file that contains the locations of the targets. Default + value: null. This option must be specified at least 1 times. + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + doc: An aligned SAM or BAM file. Required. + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + doc: The output file to write the metrics to. Required. + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + doc: >- + An optional file to output per target coverage information to. Default + value: null. + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + doc: >- + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. This option can be set to 'null' to clear + the default value. + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. This option can be set + to 'null' to clear the default value. + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. This option can be set to 'null' to clear the + default value. +outputs: + - id: picard_hsmetrics_txt + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +label: picard_hsmetrics_2.21.2 +arguments: + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': hsmetrics + 'doap:revision': 2.21.2 diff --git a/picard_hsmetrics_2.8.1/example_inputs.yaml b/picard_hsmetrics_2.8.1/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.8.1/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl new file mode 100644 index 00000000..e24d9fbf --- /dev/null +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -0,0 +1,158 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2_8_1 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false +outputs: + - id: picard_hsmetrics_txt + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +label: picard_hsmetrics_2.8.1 +arguments: + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.2' + - class: InlineJavascriptRequirement + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': hsmetrics + 'doap:revision': 2.8.1 diff --git a/picard_mark_duplicates_1.96/README.md b/picard_mark_duplicates_1.96/README.md deleted file mode 100644 index bb651139..00000000 --- a/picard_mark_duplicates_1.96/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml -``` diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index f6b19306..186f7a0b 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_mark_duplicates_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_1_96 baseCommand: - java inputs: @@ -96,7 +97,7 @@ inputs: position: 0 prefix: AS=true outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(/.bam/, ''_md.bam''))' @@ -115,7 +116,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.21.2/example_inputs.yaml b/picard_mark_duplicates_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..4cb5941e --- /dev/null +++ b/picard_mark_duplicates_2.21.2/example_inputs.yaml @@ -0,0 +1,15 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +tmp_dir: null +validation_stringency: null diff --git a/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl new file mode 100644 index 00000000..e5869717 --- /dev/null +++ b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl @@ -0,0 +1,181 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: ASO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + separate: false + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: picard_mark_duplicates_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_2.21.2 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_mark_duplicates_2.8.1/README.md b/picard_mark_duplicates_2.8.1/README.md deleted file mode 100644 index 173d0b7e..00000000 --- a/picard_mark_duplicates_2.8.1/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index ccc6165a..402a37fa 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_mark_duplicates_2_8_1 baseCommand: - java @@ -114,7 +115,7 @@ inputs: experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- @@ -150,10 +151,10 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 17000 + coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'mskaccess/picard:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -173,6 +174,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 2.8.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/picard_mark_duplicates_2.9.0/README.md b/picard_mark_duplicates_2.9.0/README.md new file mode 100644 index 00000000..ece95d84 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/README.md @@ -0,0 +1,91 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.9.0.cwl example_inputs.yaml +``` + +## Usage +``` +usage: picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--duplication_metrics DUPLICATION_METRICS] [--sort_order SORT_ORDER] + [--tmp_dir TMP_DIR] [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--assume_sorted] + [--duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY] + [--optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` diff --git a/picard_mark_duplicates_2.9.0/example_inputs.yaml b/picard_mark_duplicates_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..234d0b62 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/example_inputs.yaml @@ -0,0 +1,7 @@ +create_bam_index: true +duplication_metrics: mark_duplicates_md.metrics +input: + class: File + path: "path/to/sample_id.bam" +optical_duplicate_pixel_distance: 2500 +output: mark_duplicates_md.bam diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl new file mode 100644 index 00000000..7c032bce --- /dev/null +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl @@ -0,0 +1,206 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + valueFrom: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + doc: File to write duplication metrics to Required. + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: READ_NAME_REGEX= + separate: false +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - ^.metrics +label: picard_mark_duplicates_2.9.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 diff --git a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..08a4e56d --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml @@ -0,0 +1,19 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +sorting_collection_size_ratio: 0.25 +read_name_regex: 'null' +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +validation_stringency: null + diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl new file mode 100644 index 00000000..c6418246 --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -0,0 +1,251 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: -I + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: -M + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: -ASO + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: --TMP_DIR + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: --COMPRESSION_LEVEL + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: --READ_NAME_REGEX + doc: >- + MarkDuplicates can use the tile and cluster positions to estimate the rate of + optical duplication in addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. By default (with no + READ_NAME_REGEX specified), MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set READ_NAME_REGEX to 'null' to + disable optical duplicate detection. Note that without optical duplicate + counts, library size estimation will be less accurate. If the read name does + not follow a standard Illumina colon-separation convention, but does contain + tile and x,y coordinates, a regular expression can be specified to extract + three variables: tile/region, x coordinate and y coordinate from a read name. + The regular expression must contain three capture groups for the three variables, + in order. It must match the entire read name. e.g. if field names were separated + by semi-colon (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ Note that if no + READ_NAME_REGEX is specified, the read name is split on ':'. For 5 element names, + the 3rd, 4th and 5th elements are assumed to be tile, x and y values. For 7 + element names (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be + tile, x and y values. + - id: sorting_collection_size_ratio + type: int? + inputBinding: + position: 0 + prefix: --SORTING_COLLECTION_SIZE_RATIO + doc: >- + This number, plus the maximum RAM available to the JVM, determine the memory + footprint used by some of the sorting collections. If you are running out of memory, try reducing this number. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: --DUPLICATE_SCORING_STRATEGY + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: --OPTICAL_DUPLICATE_PIXEL_DISTANCE + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: picard_mark_duplicates_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_4.1.8.1 +arguments: + - position: 0 + valueFrom: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return \"-Xmx15G\" + } + else { + return \"-Xmx15G\" + } + }" + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: -O + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..946e86c6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +toil-ionox0[cwl]==0.0.7 +pytz +typing==3.7.4 + +# From fixing pkg_resources.ContextualVersionConflict: +ruamel.yaml==0.15.77 + +# From requirements_dev +pip==21.1 +bumpversion==0.5.3 +wheel==0.32.1 +watchdog==0.9.0 +flake8==3.5.0 +tox==3.5.2 +coverage==4.5.1 +Sphinx==1.8.1 +twine==1.12.1 +pytest==3.8.2 +pytest-runner==4.2 +coloredlogs==10.0.0 diff --git a/requirements_p2.7.txt b/requirements_p2.7.txt new file mode 100644 index 00000000..24958578 --- /dev/null +++ b/requirements_p2.7.txt @@ -0,0 +1,14 @@ +toil-ionox0[cwl]==0.0.7 +pytz==2019.2 +typing==3.7.4.1 +ruamel.yaml==0.15.77 +bumpversion==0.5.3 +watchdog==0.9.0 +flake8==3.7.8 +tox==3.14.0 +coverage==4.5.4 +twine==1.15.0 +pytest==4.6.5 +pytest-runner==5.1 +coloredlogs==10.0 +pathlib2==2.3.2 diff --git a/samtools-merge_1.9/samtools-merge_1.9.cwl b/samtools-merge_1.9/samtools-merge_1.9.cwl index 5746b362..b7b5fbbc 100644 --- a/samtools-merge_1.9/samtools-merge_1.9.cwl +++ b/samtools-merge_1.9/samtools-merge_1.9.cwl @@ -11,7 +11,7 @@ inputs: position: 2 doc: Input array containing files to be merged outputs: - - id: output_file + - id: samtools_merge_bam type: File outputBinding: glob: '*merged.bam' diff --git a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl index eb0eaeb9..d3ce72f1 100644 --- a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl +++ b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl @@ -1,6 +1,5 @@ class: CommandLineTool cwlVersion: v1.0 -$namespaces: baseCommand: - samtools - sort @@ -43,7 +42,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_file + - id: samtools_sort_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''bam'', ''sorted.bam''))' diff --git a/samtools_view_1.3.1/samtools_view_1.3.1.cwl b/samtools_view_1.3.1/samtools_view_1.3.1.cwl index 87616cff..6c738f64 100644 --- a/samtools_view_1.3.1/samtools_view_1.3.1.cwl +++ b/samtools_view_1.3.1/samtools_view_1.3.1.cwl @@ -197,7 +197,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_bam + - id: samtools_view_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''sam'', ''bam''))' diff --git a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl index e334ca81..9d75bfe9 100644 --- a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl +++ b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl @@ -26,7 +26,7 @@ inputs: position: 0 prefix: '--output' outputs: - - id: interval_files + - id: gatk_scatter_intervals_interval_files type: 'File[]' outputBinding: glob: $(inputs.output)/*.interval_list diff --git a/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl new file mode 100644 index 00000000..07d00699 --- /dev/null +++ b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl @@ -0,0 +1,150 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_2_2 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality + - id: max_depth + type: int? + inputBinding: + position: 0 + prefix: --max_depth + doc: >- + Maximum read depth for calculation +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_positions.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.2' + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.2.2 diff --git a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl new file mode 100644 index 00000000..16405ef6 --- /dev/null +++ b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl @@ -0,0 +1,150 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_2_3 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_positions.tsv' + } + - id: sequence_qc_noise_by_substitution + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_by_substitution.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.3' + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.2.3 diff --git a/sequence_qc/README.md b/sequence_qc/README.md new file mode 100644 index 00000000..c7b71103 --- /dev/null +++ b/sequence_qc/README.md @@ -0,0 +1,61 @@ +# CWL and Dockerfile for running sequence_qc + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| sequence_qc | 0.1.19 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner sequence_qc_0.1.19.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +toil-cwl-runner sequence_qc_0.1.19.cwl -h + +usage: sequence_qc_0.1.19.cwl [-h] --reference REFERENCE --bam_file BAM_FILE + --bed_file BED_FILE --sample_id SAMPLE_ID + [--threshold THRESHOLD] [--truncate TRUNCATE] + [--min_mapq MIN_MAPQ] [--min_basq MIN_BASQ] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Path to reference fasta, containing all regions in + bed_file + --bam_file BAM_FILE Path to BAM file for calculating noise [required] + --bed_file BED_FILE Path to BED file containing regions over which to + calculate noise [required] + --sample_id SAMPLE_ID + Prefix to include in all output file names + --threshold THRESHOLD + Alt allele frequency past which to ignore positions + from the calculation. + --truncate TRUNCATE Whether to exclude trailing bases from reads that only + partially overlap the bed file (0 or 1) + --min_mapq MIN_MAPQ Exclude reads with a lower mapping quality + --min_basq MIN_BASQ Exclude bases with a lower base quality +``` diff --git a/sequence_qc/example_inputs.yaml b/sequence_qc/example_inputs.yaml new file mode 100644 index 00000000..04cf48b4 --- /dev/null +++ b/sequence_qc/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /path/to/fasta +bam_file: + class: File + metadata: {} + path: /path/to/bam +bed_file: + class: File + metadata: {} + path: /path/to/bed +sample_id: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..1c8fad57 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[bumpversion] +current_version = 1.2.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:cwl_commandlinetools/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..53b77ff1 --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.md') as readme_file: + readme = readme_file.read() + +requirements = [ ] + +setup_requirements = ['pytest-runner', ] + +test_requirements = ['pytest>=3', ] + +setup( + author="msk-access", + author_email='msk.access@gmail.com', + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.7', + ], + description="Central location for CWL CommandLineTools", + install_requires=requirements, + license="Apache Software License 2.0", + long_description=readme + '\n\n', + include_package_data=True, + keywords='cwl-commandlinetools', + name='cwl-commandlinetools', + packages=find_packages(include=['cwl_commandlinetools', 'cwl_commandlinetools.*']), + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/msk-access/cwl-commandlinetools', + version='1.2.0', + zip_safe=False, +) diff --git a/test-yamls/msi-test-one.yaml b/test-yamls/msi-test-one.yaml deleted file mode 100644 index 4115b769..00000000 --- a/test-yamls/msi-test-one.yaml +++ /dev/null @@ -1,4 +0,0 @@ -n: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -t: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -d: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -o: "my_output_prefix" diff --git a/test-yamls/msi-test.yaml b/test-yamls/msi-test.yaml deleted file mode 100644 index 5af76e7b..00000000 --- a/test-yamls/msi-test.yaml +++ /dev/null @@ -1,4 +0,0 @@ -normal_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -tumor_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -msi_file: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -output_prefix: "msi_run_prefix" diff --git a/test-yamls/snp-pileup-to-facets.yaml b/test-yamls/snp-pileup-to-facets.yaml deleted file mode 100644 index 6bdb254a..00000000 --- a/test-yamls/snp-pileup-to-facets.yaml +++ /dev/null @@ -1,4 +0,0 @@ -bam_normal: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -bam_tumor: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -tumor_sample_name: tumor_sample -facets_vcf: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/b37/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf } diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..688b77f7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Unit test package for cwl_commandlinetools.""" diff --git a/tests/test_cwl_commandlinetools.py b/tests/test_cwl_commandlinetools.py new file mode 100644 index 00000000..9ce7b51e --- /dev/null +++ b/tests/test_cwl_commandlinetools.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for `cwl_commandlinetools` package.""" + +import pytest + + +def test_content(): + """Sample pytest test function with the pytest fixture as an argument.""" + print("All good") diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..29dc8a5e --- /dev/null +++ b/tox.ini @@ -0,0 +1,30 @@ +[tox] +envlist = py27, py37 flake8 + +[travis] +python = + 3.7: py37 + 2.7: py27 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 cwl_commandlinetools + +[testenv:py37] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements.txt + +commands = + py.test --capture=sys --basetemp={envtmpdir} tests + +[testenv:py27] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_p2.7.txt + +commands = + py.test --capture=sys --basetemp={envtmpdir} tests \ No newline at end of file diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 5b91cbbc..95d53149 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -24,9 +24,6 @@ inputs: inputBinding: position: 0 prefix: '--cores' - - id: path_to_trim_galore - type: File? - doc: Path to trim_galore executable file - id: adapter type: string? inputBinding: @@ -163,7 +160,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskcc/trim_galore:0.1.0' + dockerPull: 'ghcr.io/msk-access/trim_galore:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md deleted file mode 100644 index a13033b0..00000000 --- a/utilities_ubuntu_18.04/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# CWL and Dockerfile for running utilites from Ubuntu 18.04 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs_toolname.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml -``` diff --git a/utilities_ubuntu_18.04/example_inputs_mv.yaml b/utilities_ubuntu_18.04/example_inputs_mv.yaml new file mode 100644 index 00000000..ae4246e9 --- /dev/null +++ b/utilities_ubuntu_18.04/example_inputs_mv.yaml @@ -0,0 +1,6 @@ +force: null +infile: /path/to/source/file +memory_overhead: null +memory_per_job: null +outfile: /path/to/destination/file +verbose: null diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl new file mode 100644 index 00000000..401b5b34 --- /dev/null +++ b/utilities_ubuntu_18.04/mv.cwl @@ -0,0 +1,70 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mv +baseCommand: + - mv +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: infile + type: File + inputBinding: + position: 1 + doc: 'Source with path that needs to be moved' + - id: outfile + type: string + inputBinding: + position: 2 + doc: 'Target destination for the source' + - id: force + type: boolean? + inputBinding: + position: 0 + prefix: '-f' + doc: 'Do not prompt for confirmation before overwriting the destination path.' + - id: verbose + type: boolean? + inputBinding: + position: 0 + prefix: '-v' + doc: 'Cause mv to be verbose, showing files after they are moved.' +outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.outfile) +label: mv +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ubuntu:18.04' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mv + 'doap:revision': 18.04 \ No newline at end of file diff --git a/vardictjava/container/Dockerfile b/vardictjava/container/Dockerfile new file mode 100644 index 00000000..cd6c9371 --- /dev/null +++ b/vardictjava/container/Dockerfile @@ -0,0 +1,22 @@ +FROM alpine:3.8 + +LABEL maintainer="Nikhil Kumar (kumarn1@mskcc.org)" \ + version.image="1.0.0" \ + version.vardict="1.8.2" \ + version.r="3.5.1" \ + version.perl="5.26.2-r1" \ + version.alpine="3.8" \ + source.vardict="https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2" \ + source.r="https://pkgs.alpinelinux.org/package/edge/community/x86/R" \ + source.perl="https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl" + +ENV VARDICT_VERSION 1.8.2 + +RUN apk add --update \ + && apk add ca-certificates openssl bash perl \ + && apk add openjdk8-jre-base \ + && apk add R R-dev \ + && cd /tmp && wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v${VARDICT_VERSION}/VarDict-${VARDICT_VERSION}.zip \ + && unzip VarDict-${VARDICT_VERSION}.zip \ + && mv /tmp/VarDict-${VARDICT_VERSION} /usr/bin/vardict \ + && rm -rf /var/cache/apk/* /tmp/* diff --git a/vardictjava/v1.8.2/example_inputs.yaml b/vardictjava/v1.8.2/example_inputs.yaml new file mode 100644 index 00000000..81f4d3b0 --- /dev/null +++ b/vardictjava/v1.8.2/example_inputs.yaml @@ -0,0 +1,18 @@ +G: + class: File + path: "/path/to/ref/file" +b: + class: File + path: "/path/to/bam/file" +bedfile: + class: File + path: "/path/to/bed/file" +c: "1" +f: "0" +S: "2" +E: "3" +g: "5" +th: "4" +vcf: "output.vcf" +N: "name" +f_1: "0" diff --git a/vardictjava/v1.8.2/teststrandbias.cwl b/vardictjava/v1.8.2/teststrandbias.cwl new file mode 100644 index 00000000..4e49fbd7 --- /dev/null +++ b/vardictjava/v1.8.2/teststrandbias.cwl @@ -0,0 +1,53 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- Rscript +- /usr/bin/vardict/bin/teststrandbias.R +id: teststrandbias + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 2 + ramMin: 12000 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 + + +inputs: + input_vardict: + type: File +outputs: + output_var: + type: File? + outputBinding: + glob: output_teststrandbias.var + +stdin: $(inputs.input_vardict.path) +stdout: output_teststrandbias.var + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/var_to_vcf.cwl b/vardictjava/v1.8.2/var_to_vcf.cwl new file mode 100644 index 00000000..7f16c6bc --- /dev/null +++ b/vardictjava/v1.8.2/var_to_vcf.cwl @@ -0,0 +1,81 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- perl +- /usr/bin/vardict/bin/var2vcf_valid.pl +id: vardict_var2vcf + +arguments: +- position: 0 + prefix: -N + valueFrom: "${\n return inputs.N + \"|\" + inputs.N2;\n}" + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 4 + ramMin: 32000 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 + +inputs: + + S: + type: boolean? + doc: If set variants that didnt pass filters will not be present in VCF file. + inputBinding: + position: 0 + prefix: -S + + N: + type: string? + doc: Tumor Sample Name + + f: + type: string? + doc: The threshold for allele frequency, default - 0.05 or 5%% + inputBinding: + position: 0 + prefix: -f + + vcf: + type: string + doc: output vcf file + + input_vcf: + type: File? + +outputs: + output: + type: File + outputBinding: + glob: ${ return inputs.vcf; } + +stdin: $(inputs.input_vcf.path) +stdout: ${ return inputs.vcf; } + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/vardict_app.cwl b/vardictjava/v1.8.2/vardict_app.cwl new file mode 100644 index 00000000..e2a9d1e3 --- /dev/null +++ b/vardictjava/v1.8.2/vardict_app.cwl @@ -0,0 +1,122 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: +- /usr/bin/vardict/bin/VarDict +id: vardict + +arguments: +- position: 1 + prefix: -b + valueFrom: "${\n return inputs.b.path;\n}" +# - position: 0 +# prefix: -N +# valueFrom: "${\n if (inputs.N2)\n return [inputs.N, inputs.N2];\n else\n\ +# \ return inputs.N;\n}" + + +requirements: + EnvVarRequirement: + envDef: + JAVA_OPTS: '"-Xms8g" "-Xmx95g"' + InlineJavascriptRequirement: {} + ResourceRequirement: + coresMin: 4 + ramMin: 64000 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vardictjava:1.8.2 + +inputs: + G: + type: File + doc: The reference fasta. Should be indexed (.fai) + secondaryFiles: + - .fai + inputBinding: + position: 0 + prefix: -G + + f: + type: string? + doc: The threshold for allele frequency, default - 0.01 or 1%% + inputBinding: + position: 0 + prefix: -f + + N: + type: string? + doc: Tumor Sample Name + + b: + type: File? + secondaryFiles: + - .bai + doc: Tumor bam + + c: + type: string? + doc: The column for chromosome + inputBinding: + position: 0 + prefix: -c + + S: + type: string? + doc: The column for region start, e.g. gene start + inputBinding: + position: 0 + prefix: -S + + E: + type: string? + doc: The column for region end, e.g. gene end + inputBinding: + position: 0 + prefix: -E + + g: + type: string? + doc: The column for gene name, or segment annotation + inputBinding: + position: 0 + prefix: -g + + bedfile: + type: File? + inputBinding: + position: 1 + +outputs: + output: + type: File + outputBinding: + glob: vardict_app_output.vcf + + +stdout: vardict_app_output.vcf + + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/vardict_workflow_single_sample.cwl b/vardictjava/v1.8.2/vardict_workflow_single_sample.cwl new file mode 100644 index 00000000..17054b33 --- /dev/null +++ b/vardictjava/v1.8.2/vardict_workflow_single_sample.cwl @@ -0,0 +1,99 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +class: Workflow +id: vardict +label: vardict +requirements: + MultipleInputFeatureRequirement: {} + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + InlineJavascriptRequirement: {} + StepInputExpressionRequirement: {} +inputs: + bedfile: + type: File? + + G: + type: File + secondaryFiles: ['.fai'] + f: + type: string? + N: + type: string? + b: + type: File? + secondaryFiles: ['.bai'] + c: + type: string? + S: + type: string? + E: + type: string? + g: + type: string? + vcf: + type: string? + f_1: + type: string? + +outputs: + output: + type: File + outputSource: var_to_vcf/output +steps: + vardict: + run: ./vardict_app.cwl + in: + G: G + f: f + N: N + S: S + c: c + b: b + bedfile: bedfile + E: E + g: g + out: [output] + teststrandbias: + run: ./teststrandbias.cwl + in: + input_vardict: vardict/output + out: [output_var] + var_to_vcf: + run: ./var_to_vcf.cwl + in: + N: N + f: f_1 + vcf: vcf + input_vcf: teststrandbias/output_var + out: [output] + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vcf2maf_1.6.17/README.md b/vcf2maf_1.6.17/README.md new file mode 100644 index 00000000..491ef1c3 --- /dev/null +++ b/vcf2maf_1.6.17/README.md @@ -0,0 +1,90 @@ +# CWL and Dockerfile for running vcf2maf v1.6.17 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| vcf2maf | 1.6.17 | https://github.com/mskcc/vcf2maf/archive/v1.6.17.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner vcf2maf_1.6.17.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --any_allele ANY_ALLELE + When reporting co-located variants, allow mismatched + variant alleles too + --buffer_size BUFFER_SIZE + Number of variants VEP loads at a time; Reduce this + for low memory systems + --cache_version CACHE_VERSION + Version of VEP and its cache to use + --custom_enst CUSTOM_ENST + List of custom ENST IDs that override canonical + selection + --maf_center MAF_CENTER + Variant calling center to report in MAF + --max_filter_ac MAX_FILTER_AC + Use tag common_variant if the filter-vcf reports a + subpopulation AC higher than this + --min_hom_vaf MIN_HOM_VAF + If GT undefined in VCF, minimum allele fraction to + call a variant homozygous + --ncbi_build NCBI_BUILD + Genome build of variants in input + --normal_id NORMAL_ID + Matched_Norm_Sample_Barcode to report in the MAF + --output_maf OUTPUT_MAF + Path to output MAF file + --ref_fasta REF_FASTA + Reference FASTA file + --remap_chain REMAP_CHAIN + Chain file to remap variants to a different assembly + before running VEP + --retain_fmt RETAIN_FMT + Comma-delimited names of FORMAT fields to retain as + extra columns in MAF [] + --retain_info RETAIN_INFO + Comma-delimited names of INFO fields to retain as + extra columns in MAF + --species SPECIES Species of variants in input + --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF + --vcf_normal_id VCF_NORMAL_ID + Matched normal ID used in VCFs genotype columns + --vcf_tumor_id VCF_TUMOR_ID + Tumor sample ID used in VCFs genotype columns + --vep_data VEP_DATA VEPs base cache/plugin directory + --vep_forks VEP_FORKS + Number of forked processes to use when running VEP + --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep + binary + + +``` diff --git a/vcf2maf_1.6.17/container/Dockerfile b/vcf2maf_1.6.17/container/Dockerfile new file mode 100644 index 00000000..61af253c --- /dev/null +++ b/vcf2maf_1.6.17/container/Dockerfile @@ -0,0 +1,94 @@ +################## BASE IMAGE ###################### +FROM alpine:3.8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=1.0.0 +ARG VCF2MAF_VERSION=1.6.17 +ARG HTSLIB_VERSION=1.9 +ARG SAMTOOLS_VERSION=1.9 +ARG BCFTOOLS_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="86" +LABEL org.opencontainers.image.version.htslib=${HTSLIB_VERSION} +LABEL org.opencontainers.image.version.bcftools=${BCFTOOLS_VERSION} +LABEL org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} +LABEL org.opencontainers.image.version.perl="5.26.2-r1" +LABEL org.opencontainers.image.version.alpine="3.8" +LABEL org.opencontainers.image.source.vcf2maf="https://github.com/mskcc/vcf2maf/releases/tag/v${VCF2MAF_VERSION}" +LABEL org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/tag/${HTSLIB_VERSION}" +LABEL org.opencontainers.image.source.bcftools="https://github.com/samtools/bcftools/releases/tag/${BCFTOOLS_VERSION}" +LABEL org.opencontainers.image.source.samtools="https://github.com/samtools/samtools/releases/tag/${SAMTOOLS_VERSION}" + + +LABEL org.opencontainers.image.description="This container uses alpine3.8 as the base image to build vcf2maf version ${VCF2MAF_VERSION}" + +################## INSTALL ########################## + +ENV VCF2MAF_VERSION=${VCF2MAF_VERSION} +ENV VEP_VERSION=86 +ENV VEP_DATA=/var/cache +ENV VEP_PATH=/usr/bin/vep +ENV HTSLIB_VERSION=${HTSLIB_VERSION} +ENV SAMTOOLS_VERSION=${SAMTOOLS_VERSION} +ENV BCFTOOLS_VERSION=${BCFTOOLS_VERSION} + + +RUN apk add --update \ + # install all the build-related tools + && apk add ca-certificates gcc g++ make git curl curl-dev wget gzip perl perl-dev musl-dev libgcrypt-dev zlib-dev bzip2-dev xz-dev ncurses-dev rsync \ + # install system packages and Perl modules + && apk add expat-dev libressl-dev perl-net-ssleay mariadb-dev libxml2-dev perl-dbd-mysql perl-module-metadata perl-gd perl-db_file perl-archive-zip perl-cgi perl-dbi perl-encode perl-time-hires perl-file-copy-recursive perl-json \ + # install cpanminus + && curl -L https://cpanmin.us | perl - App::cpanminus \ + # install perl libraries that VEP will need + && cpanm --notest LWP LWP::Simple LWP::Protocol::https Archive::Extract Archive::Tar Archive::Zip \ + CGI DBI Encode version Time::HiRes File::Copy::Recursive Perl::OSType Module::Metadata \ + Sereal JSON Bio::Root::Version Set::IntervalTree PerlIO::gzip \ + # install htslib (for vep) + && cd /tmp && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd /tmp/htslib-${HTSLIB_VERSION} \ + && ./configure \ + && make && make install \ + # download/unzip vep + && cd /tmp && wget https://github.com/Ensembl/ensembl-tools/archive/release/${VEP_VERSION}.zip \ + && unzip ${VEP_VERSION} \ + # install vep + && cd /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor \ + && perl INSTALL.pl --AUTO a 2>&1 | tee install.log \ + && cd /tmp && mv /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor /usr/bin/vep \ + # download and unpack VEP's offline cache + && mkdir -p ${VEP_DATA} \ + && rsync -zvh rsync://ftp.ensembl.org/ensembl/pub/release-86/variation/VEP/homo_sapiens_vep_86_GRCh37.tar.gz ${VEP_DATA} \ + && tar -zxf ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz -C ${VEP_DATA} \ + && cd /usr/bin/vep \ + && perl convert_cache.pl --species homo_sapiens --version 86_GRCh37 --dir ${VEP_DATA} \ + && rm ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz \ + # install bcftools + && cd /tmp && wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && tar xvjf bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/bcftools-${BCFTOOLS_VERSION} \ + && make HTSDIR=/tmp/htslib-${HTSLIB_VERSION} && make install \ + # install samtools + && cd /tmp && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/samtools-${SAMTOOLS_VERSION} \ + && ./configure --with-htslib=/tmp/htslib-${HTSLIB_VERSION} \ + && make && make install \ + # install vcf2maf + && cd /tmp && wget -O vcf2maf-v${VCF2MAF_VERSION} https://github.com/mskcc/vcf2maf/archive/v${VCF2MAF_VERSION}.zip \ + && unzip vcf2maf-v${VCF2MAF_VERSION} \ + && mkdir -p /usr/bin/vcf2maf/ \ + && cp -r vcf2maf-${VCF2MAF_VERSION}/* /usr/bin/vcf2maf/ \ + # clean up + && rm -rf /var/cache/apk/* /tmp/* \ + && chmod +x /usr/bin/runscript.sh \ + && exec /run_test.sh diff --git a/vcf2maf_1.6.17/example_inputs.yaml b/vcf2maf_1.6.17/example_inputs.yaml new file mode 100644 index 00000000..3c32cc16 --- /dev/null +++ b/vcf2maf_1.6.17/example_inputs.yaml @@ -0,0 +1,20 @@ +input_vcf: + class: File + path: /path/to/vcf_file +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: genome_string +filter_vcf: + class: File + path: /path/to/filter/vcf +vep_data: vep_cache_path_str (/var/cache in container) +ref_fasta: + class: File + path: /path/to/ref/fasta +vep_path: vep_path (/usr/bin/vep in container) +custom_enst: custom_enst_str +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: output_maf_str \ No newline at end of file diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl new file mode 100644 index 00000000..eca0fb95 --- /dev/null +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -0,0 +1,217 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.17 + +baseCommand: + - perl + - /usr/bin/vcf2maf/vcf2maf.pl + +inputs: + + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + + cache_version: + type: + - 'null' + - string + default: '86' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', File] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + max_filter_ac: + type: + - 'null' + - int + default: 10 + doc: Use tag common_variant if the filter-vcf reports a subpopulation AC higher + than this + inputBinding: + prefix: --max-filter-ac + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + filter_vcf: + type: + - 'null' + - string + - File + doc: The non-TCGA VCF from exac.broadinstitute.org + inputBinding: + prefix: --filter-vcf + secondaryFiles: + - .tbi + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: mskaccess/vcf2maf:1.6.17 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.17 diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/vcf2maf_1.6.21/README.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile new file mode 100644 index 00000000..919941e3 --- /dev/null +++ b/vcf2maf_1.6.21/container/Dockerfile @@ -0,0 +1,79 @@ +FROM clearlinux:latest AS builder + +ARG VCF2MAF_VERSION=1.6.21 +ARG HTSLIB_VERSION=1.10.2 +ARG SAMTOOLS_VERSION=1.10 +ARG BCFTOOLS_VERSION=1.10.2 +ARG VEP_VERSION=105.0 +# Install a minimal versioned OS into /install_root, and bundled tools if any +ENV CLEAR_VERSION=33980 +RUN swupd os-install --no-progress --no-boot-update --no-scripts \ + --version ${CLEAR_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --bundles os-core-update,which + +# Download and install conda into /usr/bin +ENV MINICONDA_VERSION=py37_4.9.2 +RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ + curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ + sh /tmp/miniconda.sh -bfp /usr + +#Download and install vcf2maf +RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ + && unzip v${VCF2MAF_VERSION}.zip \ + && rm v${VCF2MAF_VERSION}.zip + +# Use conda to install remaining tools/dependencies into /usr/local +ENV VEP_VERSION=${VEP_VERSION} \ + HTSLIB_VERSION=${HTSLIB_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ + SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ + LIFTOVER_VERSION=377 +RUN conda create -qy -p /usr/local \ + -c conda-forge \ + -c bioconda \ + -c defaults \ + ensembl-vep==${VEP_VERSION} \ + htslib==${HTSLIB_VERSION} \ + bcftools==${BCFTOOLS_VERSION} \ + samtools==${SAMTOOLS_VERSION} \ + ucsc-liftover==${LIFTOVER_VERSION} + +#Copy offline cache +COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache +COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache +RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ + && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ + && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ + && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ + && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ + && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ + && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ + && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz + +RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep + +# Deploy the minimal OS and tools into a clean target layer +FROM scratch +ARG VCF2MAF_VERSION=1.6.21 +ENV VEP_VERSION=${VEP_VERSION} + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL maintainer="Suleyman Vural " +LABEL maintainer="Cyriac Kandoth " +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="105" + +COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data +COPY --from=builder /.vep /.vep/ +COPY --from=builder /install_root / +COPY --from=builder /usr/local /usr/local + +RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl + +WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml new file mode 100644 index 00000000..906ab455 --- /dev/null +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -0,0 +1,14 @@ +input_vcf: + class: File + path: /path/to/input.vcf +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: "GRCh37" +vep_data: /.vep/ # location in the container +ref_fasta: "/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz" # location in the container +vep_path: /usr/local/bin/ # location in the container +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: "output.maf" diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl new file mode 100644 index 00000000..2416a380 --- /dev/null +++ b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl @@ -0,0 +1,195 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.21 + +baseCommand: + - perl + - /opt/vcf2maf-1.6.21/vcf2maf.pl + +inputs: + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + cache_version: + type: + - 'null' + - string + default: '105' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', string] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: ghcr.io/msk-access/vcf2maf:1.6.21 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:vurals@mskcc.org + foaf:name: Suleyman Vural + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.21 diff --git a/waltz_count_reads_3.1.1/container/Dockerfile b/waltz_count_reads_3.1.1/container/Dockerfile index 70d13a31..cfad2f10 100644 --- a/waltz_count_reads_3.1.1/container/Dockerfile +++ b/waltz_count_reads_3.1.1/container/Dockerfile @@ -5,14 +5,14 @@ FROM java:8 ################## ARGUMENTS ####################### ARG BUILD_DATE ARG BUILD_VERSION=1.0.0 -ARG WALTZ_VERSION_cmd=3.1.1 +ARG WALTZ_VERSION_cmd=3.2.0 ################## LABELS ####################### LABEL org.opencontainers.image.authors='Shalabh Suman (sumans@mskcc.org)' LABEL org.opencontainers.image.created=$BUILD_DATE #LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/blob/master/Waltz-2.0.jar" -LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar" +LABEL org.opencontainers.image.source = "https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.revision = "10037a1" LABEL org.opencontainers.image.title = "waltz" LABEL org.opencontainers.image.description = "Syntax to build image: docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') -t mskcc/waltz:1.0.0 ." @@ -24,7 +24,7 @@ ENV WALTZ_VERSION=${WALTZ_VERSION_cmd} # Install Waltz #RUN wget https://github.com/juberpatel/Waltz/blob/master/Waltz-${WALTZ_VERSION}.jar -RUN wget https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-${WALTZ_VERSION}.jar +RUN wget https://github.com/mskcc/Waltz/releases/download/v${WALTZ_VERSION}/Waltz-${WALTZ_VERSION}.jar RUN mv Waltz-${WALTZ_VERSION}.jar /usr/local/bin/Waltz.jar #ENV PATH=$PATH:/usr/local/bin/Waltz.jar \ No newline at end of file diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index b06321c7..f177b5d5 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_count_reads baseCommand: - java @@ -33,15 +34,15 @@ inputs: inputBinding: position: 4 outputs: - - id: covered_regions + - id: waltz_count_reads_covered_regions type: File outputBinding: glob: '*.covered-regions' - - id: fragment_sizes + - id: waltz_count_reads_fragment_sizes type: File outputBinding: glob: '*.fragment-sizes' - - id: read_counts + - id: waltz_count_reads_read_counts type: File outputBinding: glob: '*.read-counts' @@ -75,10 +76,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 65a73de9..1fdcdb1d 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_pileupmetrics baseCommand: - java @@ -37,19 +38,19 @@ inputs: inputBinding: position: 13 outputs: - - id: pileup + - id: waltz_pileupmetrics_pileup type: File outputBinding: glob: '*-pileup.txt' - - id: pileup_without_duplicates + - id: waltz_pileupmetrics_pileup_without_duplicates type: File outputBinding: glob: '*-pileup-without-duplicates.txt' - - id: intervals + - id: waltz_pileupmetrics_intervals type: File outputBinding: glob: '*-intervals.txt' - - id: intervals_without_duplicates + - id: waltz_pileupmetrics_intervals_without_duplicates type: File outputBinding: glob: '*-intervals-without-duplicates.txt' @@ -87,10 +88,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -110,4 +109,3 @@ requirements: - class: 'doap:Version' 'doap:name': waltz 'doap:revision': 3.1.1 -