diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..d4a2c440
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,21 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+
+[LICENSE]
+insert_final_newline = false
+
+[Makefile]
+indent_style = tab
diff --git a/.gitbook.yaml b/.gitbook.yaml
new file mode 100644
index 00000000..afdeba9f
--- /dev/null
+++ b/.gitbook.yaml
@@ -0,0 +1 @@
+root: ./docs
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 00000000..14740fbe
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,15 @@
+* cwl-commandlinetools version:
+* Python version:
+* Operating System:
+
+### Description
+
+Describe what you were trying to get done.
+Tell us what happened, what went wrong, and what you expected to happen.
+
+### What I Did
+
+```
+Paste the command(s) you ran and the output.
+If there was a crash, please include the traceback here.
+```
diff --git a/.gitignore b/.gitignore
index 76d33366..de23585e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
# Distribution / packaging
.Python
+env/
build/
develop-eggs/
dist/
@@ -81,8 +82,10 @@ celerybeat-schedule
# SageMath parsed files
*.sage.py
-# Environments
+# dotenv
.env
+
+# virtualenv
.venv
env/
venv/
@@ -108,7 +111,6 @@ venv.bak/
# vscode
.vscode/*
-!.vscode/settings.json
-!.vscode/tasks.json
-!.vscode/launch.json
-!.vscode/extensions.json
\ No newline at end of file
+
+# pycharm
+.idea
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..c0504869
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,31 @@
+# Config file for automatic testing at travis-ci.org
+
+language: python
+python:
+ - 3.7
+ - 2.7
+
+# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install:
+ - pip install -U tox-travis
+ - pip install -r requirements.txt
+
+# Command to run tests, e.g. python setup.py test
+script:
+ - find . -name '*.cwl' | xargs -n 1 cwltool --validate
+
+# Assuming you have installed the travis-ci CLI tool, after you
+# create the Github repo and add it to Travis, run the
+# following command to finish PyPI deployment setup:
+# $ travis encrypt --add deploy.password
+deploy:
+ provider: pypi
+ distributions: sdist bdist_wheel
+ user: __token__
+ password:
+ secure: g9Ja5QDLc1WGu50xpmXl6wcP7qRNzfYZk7i3PEJtQNO6JLPtxEmBgDAb4+RedRxLo9MRmws/n/bFkTOSP837d+tJ91cYN6TFbVu2teWiR6hblDX/Twhbceq/MjdYJyAVsH+KpuORjuJGqzk2I4QLzI+B/0mXuWcE4EPaCZ5mpm0aYYOTLW1Ukxl1j/PoV8wWC2glItLQ02zIvLyr276+en+RAdWYwqW8sY7rn4hI6VaM78OMsc2/cvG27X82SX4rBxJ3/VveslAc3O7Kck02ltOPyOLI3w++HEVvhHAaCK3kDxNEYQCMly1lDYWTfAGm2F5TZ5mgt2adb08AN//0GnWQOfciHh3JUrIt7po7B5Zs8kmZNGGTJFog8o+btU4pAeCDIt61lFyMo7VVpvPzR4ToiGP3zBvGEgnZd7WpTI0H0E4oc821vl9SAN+3aWQhDxDHl+z3VDwpZTA18mgQikFNc7asKDSXCAGoStI/YFWjw3X+tvFGMXR+R6dpmeSplFFSOx9L3TbrtymWProH8MOyxSVNDdQG6Vz41bN9IS47GRI+/1A9jXxwGurKY1ZL7HZDApDx42Fn2RdOFQNyLCeCneI+RUXtkHY56tH3GpBmnnJX6bKPrn4+VIbgd1VCahPrG8INqxx+SE4JojdIQHcxDy924PCL8mS4hakE4Z0=
+ on:
+ tags: true
+ repo: msk-access/cwl-commandlinetools
+ python: 2.7
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..e994ec71
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,16 @@
+Apache Software License 2.0
+
+Copyright (c) 2019, msk-access
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..405d5d7e
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,9 @@
+include LICENSE
+include README.md
+
+recursive-include tests *
+recursive-include *.cwl
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
+recursive-include docs *.jpg *.png *.gif
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..4dba77a2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,77 @@
+.PHONY: clean clean-test clean-pyc clean-build docs help
+.DEFAULT_GOAL := help
+
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+
+try:
+ from urllib import pathname2url
+except:
+ from urllib.request import pathname2url
+
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+ match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+ if match:
+ target, help = match.groups()
+ print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+help:
+ @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
+
+clean-build: ## remove build artifacts
+ rm -fr build/
+ rm -fr dist/
+ rm -fr .eggs/
+ find . -name '*.egg-info' -exec rm -fr {} +
+ find . -name '*.egg' -exec rm -f {} +
+
+clean-pyc: ## remove Python file artifacts
+ find . -name '*.pyc' -exec rm -f {} +
+ find . -name '*.pyo' -exec rm -f {} +
+ find . -name '*~' -exec rm -f {} +
+ find . -name '__pycache__' -exec rm -fr {} +
+
+clean-test: ## remove test and coverage artifacts
+ rm -fr .tox/
+ rm -f .coverage
+ rm -fr htmlcov/
+ rm -fr .pytest_cache
+
+lint: ## check style with flake8
+ flake8 cwl_commandlinetools tests
+
+test: ## run tests quickly with the default Python
+ pytest
+
+test-all: ## run tests on every Python version with tox
+ tox
+
+coverage: ## check code coverage quickly with the default Python
+ coverage run --source cwl_commandlinetools -m pytest
+ coverage report -m
+ coverage html
+ $(BROWSER) htmlcov/index.html
+
+release: dist ## package and upload a release
+ twine upload dist/*
+
+dist: clean ## builds source and wheel package
+ python setup.py sdist
+ python setup.py bdist_wheel
+ ls -l dist
+
+install: clean ## install the package to the active Python's site-packages
+ python setup.py install
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..650f0f8e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+---
+description: Central location for storing common workflow language based command line tools for building msk-access workflows
+---
+
+# MSK-ACCESS command-line tools
+
+[](https://badge.fury.io/py/cwl-commandlinetools)
+[](https://travis-ci.com/msk-access/cwl-commandlinetools)
+
+- Free software: Apache Software License 2.0
+* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/)
+
+## Features
+
+Create command line tools in common workflow language to generate msk-access workflows.
+
+## Installation
+
+Clone the repository:
+
+```
+git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git
+```
+
+**Follow the README in respective tool folder for execution of the tool.**
+
+
+## Credits
+
+- CMO ACCESS Informatics Team
+- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
+ - Cookiecutter: https://github.com/audreyr/cookiecutter
+ - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
+
diff --git a/Workflows/bwa_sort_merge.cwl b/Workflows/bwa_sort_merge.cwl
deleted file mode 100644
index 99b25be9..00000000
--- a/Workflows/bwa_sort_merge.cwl
+++ /dev/null
@@ -1,86 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: bwa_sort_merge
-label: bwa_sort_merge
-
-inputs:
- - id: reference_sequence
- type: File
- secondaryFiles:
- - .amb
- - .ann
- - .bwt
- - .pac
- - .sa
- - .fai
- - id: read_pair
- type:
- type: array
- items:
- items: File
- type: array
- - id: sample_id
- type: string
- - id: lane_id
- type: 'string[]'
-
-outputs:
- - id: sample_id_output
- outputSource:
- - bwa_sort/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: output_md_metrics
- outputSource:
- - gatk_markduplicatesgatk/output_md_metrics
- type: File
- - id: output_md_bam
- outputSource:
- - gatk_markduplicatesgatk/output_md_bam
- type: File
-
-steps:
- - id: samtools_merge
- in:
- - id: input_bams
- source:
- - bwa_sort/output_file
- out:
- - id: output_file
- run: ../CommandLineTools/samtools-merge_1.9/samtools-merge_1.9.cwl
- - id: bwa_sort
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pair
- - id: sample_id
- source: sample_id
- - id: lane_id
- source: lane_id
- out:
- - id: output_file
- - id: sample_id_output
- - id: lane_id_output
- run: ./bwa_sort.cwl
- label: bwa_sort
- scatter:
- - read_pair
- - lane_id
- scatterMethod: dotproduct
- - id: gatk_markduplicatesgatk
- in:
- - id: input_bam
- source: samtools_merge/output_file
- out:
- - id: output_md_bam
- - id: output_md_metrics
- run: ../CommandLineTools/mark-duplicates_4.1.0.0/mark-duplicates_4.1.0.0.cwl
- label: GATK MarkDuplicates
-
-requirements:
- - class: SubworkflowFeatureRequirement
- - class: ScatterFeatureRequirement
diff --git a/Workflows/make_bam.cwl b/Workflows/make_bam.cwl
deleted file mode 100644
index 5d1dfdef..00000000
--- a/Workflows/make_bam.cwl
+++ /dev/null
@@ -1,107 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: make_bam
-label: make_bam
-
-inputs:
- - id: read_pairs_normal
- type:
- type: array
- items:
- items: File
- type: array
- - id: lane_ids_normal
- type: 'string[]'
- - id: reference_sequence
- type: File
- secondaryFiles:
- - .amb
- - .ann
- - .bwt
- - .pac
- - .sa
- - .fai
- - id: sample_id_normal
- type: string
- - id: sample_id_tumor
- type: string
- - id: read_pairs_tumor
- type:
- type: array
- items:
- items: File
- type: array
- - id: lane_ids_tumor
- type: 'string[]'
-
-outputs:
- - id: sample_id_output_normal
- outputSource:
- - make_bam_Normal/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: normal_bam
- outputSource:
- - make_bam_Normal/output_md_bam
- type: File
- - id: sample_id_output_tumor
- outputSource:
- - make_bam_Tumor/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: tumor_bam
- outputSource:
- - make_bam_Tumor/output_md_bam
- type: File
- - id: tumor_metrics
- outputSource:
- - make_bam_Tumor/output_md_metrics
- type: File
- - id: normal_metrics
- outputSource:
- - make_bam_Normal/output_md_metrics
- type: File
-
-steps:
- - id: make_bam_Normal
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pairs_normal
- - id: sample_id
- source: sample_id_normal
- - id: lane_id
- source:
- - lane_ids_normal
- out:
- - id: sample_id_output
- - id: output_md_metrics
- - id: output_md_bam
- run: ./bwa_sort_merge.cwl
- label: make_bam_Normal
- - id: make_bam_Tumor
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pairs_tumor
- - id: sample_id
- source: sample_id_tumor
- - id: lane_id
- source:
- - lane_ids_tumor
- out:
- - id: sample_id_output
- - id: output_md_metrics
- - id: output_md_bam
- run: ./bwa_sort_merge.cwl
- label: make_bam_Tumor
-requirements:
- - class: SubworkflowFeatureRequirement
diff --git a/Workflows/msisensor-run-both.cwl b/Workflows/msisensor-run-both.cwl
deleted file mode 100644
index a9d2f373..00000000
--- a/Workflows/msisensor-run-both.cwl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env cwl-runner
-
-$namespaces:
- dct: http://purl.org/dc/terms/
- foaf: http://xmlns.com/foaf/0.1/
- doap: http://usefulinc.com/ns/doap#
-
-cwlVersion: v1.0
-
-class: Workflow
-id: msisensor-run-both
-requirements:
- StepInputExpressionRequirement: {}
- MultipleInputFeatureRequirement: {}
- ScatterFeatureRequirement: {}
- SubworkflowFeatureRequirement: {}
- InlineJavascriptRequirement: {}
-
-inputs:
- normal_bam:
- type: File
- secondaryFiles: [ ".bai" ]
- tumor_bam:
- type: File
- secondaryFiles: [ ".bai" ]
- output_prefix: string
- msi_file: File
-
-outputs:
- msisensor_0.2_output:
- type: File
- outputSource: msisensor_0.2/output
-
- msisensor_0.6_output:
- type: File
- outputSource: msisensor_0.6/output
-
-steps:
- msisensor_0.2:
- run: ../CommandLineTools/msisensor_0.2/msisensor-0.2.cwl
- in:
- output_prefix: output_prefix
- d: msi_file
- n: normal_bam
- t: tumor_bam
- o:
- valueFrom: ${ return inputs.output_prefix + "_0.2.txt"; }
- out: [ output ]
-
- msisensor_0.6:
- run: ../CommandLineTools/msisensor_0.6/msisensor-0.6.cwl
- in:
- output_prefix: output_prefix
- d: msi_file
- n: normal_bam
- t: tumor_bam
- o:
- valueFrom: ${ return inputs.output_prefix + "_0.6.txt"; }
- out: [ output ]
diff --git a/Workflows/mutect_wf.cwl b/Workflows/mutect_wf.cwl
deleted file mode 100644
index c046e18a..00000000
--- a/Workflows/mutect_wf.cwl
+++ /dev/null
@@ -1,65 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: mutect_wf
-label: mutect_wf
-inputs:
- - id: scatter-count
- type: int?
- - id: output
- type: string
- - id: intervals
- type: File?
- - id: reference
- type: File?
- - id: tumor_sample
- type: string?
- - id: normal_sample
- type: string?
- - id: input_normal
- type: File?
- - id: input_tumor
- type: File?
-
-outputs:
- - id: output_1
- outputSource:
- - mutect2/output
- type: 'File[]?'
-
-steps:
- - id: scatterintervals
- in:
- - id: reference
- source: reference
- - id: intervals
- source: intervals
- - id: scatter-count
- source: scatter-count
- - id: output
- source: output
- out:
- - id: interval_files
- run: ../CommandLineTools/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl
- label: ScatterIntervals
- - id: mutect2
- in:
- - id: reference
- source: reference
- - id: intervals
- source: scatterintervals/interval_files
- - id: input
- source: input_tumor
- - id: tumor_sample
- source: tumor_sample
- - id: input_normal
- source: input_normal
- - id: normal_sample
- source: normal_sample
- out:
- - id: output
- run: ../CommandLineTools/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl
- label: Mutect2
- scatter:
- - intervals
-requirements:
- - class: ScatterFeatureRequirement
diff --git a/Workflows/snp-pileup-to-facets.cwl b/Workflows/snp-pileup-to-facets.cwl
deleted file mode 100644
index 619a8753..00000000
--- a/Workflows/snp-pileup-to-facets.cwl
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env cwl-runner
-
-class: Workflow
-cwlVersion: v1.0
-
-requirements:
- InlineJavascriptRequirement: {}
- StepInputExpressionRequirement: {}
- SubworkflowFeatureRequirement: {}
- MultipleInputFeatureRequirement: {}
- ScatterFeatureRequirement: {}
-
-inputs:
- facets_vcf:
- type: File
- secondaryFiles:
- - .gz
-
- bam_normal:
- type: File
-
- bam_tumor:
- type: File
-
- tumor_sample_name:
- type: string
-
-outputs:
-
- snp_pileup_out:
- type: File
- outputSource: do_snp_pileup/output_file
-
- facets_png:
- type: File[]?
- outputSource: do_facets/png_files
-
- facets_txt_purity:
- type: File?
- outputSource: do_facets/txt_files_purity
-
- facets_txt_hisens:
- type: File?
- outputSource: do_facets/txt_files_hisens
-
- facets_out_files:
- type: File[]?
- outputSource: do_facets/out_files
-
- facets_rdata:
- type: File[]?
- outputSource: do_facets/rdata_files
-
- facets_seg:
- type: File[]?
- outputSource: do_facets/seg_files
-
-steps:
- do_snp_pileup:
- run: ../CommandLineTools/snp-pileup_0.1.1/htstools-0.1.1.cwl
- in:
- vcf_file: facets_vcf
- bam_normal: bam_normal
- bam_tumor: bam_tumor
- output_file:
- valueFrom: ${ return inputs.bam_normal.basename.replace(".bam", "") + "_" + inputs.bam_tumor.basename.replace(".bam", "") + ".dat.gz"; }
- out: [ output_file ]
-
- do_facets:
- run: ../CommandLineTools/facets_1.5.6/facets.doFacets-1.5.6.cwl
- in:
- genome:
- valueFrom: ${ return "hg19"; }
- counts_file: do_snp_pileup/output_file
- TAG:
- valueFrom: ${ return inputs.counts_file.basename.replace(".dat.gz", ""); }
- tumor_id: tumor_sample_name
- directory:
- valueFrom: ${ return "."; }
- out: [ png_files, txt_files_purity, txt_files_hisens, out_files, rdata_files, seg_files ]
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 00000000..d62ed77c
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+"""Top-level package for cwl-commandlinetools."""
+
+__author__ = """msk-access"""
+__email__ = 'msk.access@gmail.com'
+__version__ = '1.1.1'
diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl
index e9ac545e..0f31e4d4 100644
--- a/abra2_2.17/abra2_2.17.cwl
+++ b/abra2_2.17/abra2_2.17.cwl
@@ -184,7 +184,7 @@ requirements:
ramMin: 60000
coresMin: 16
- class: DockerRequirement
- dockerPull: 'mskcc/abra2:0.1.0'
+ dockerPull: 'ghcr.io/msk-access/abra2:0.6.3'
- class: InlineJavascriptRequirement
'dct:contributor':
- class: 'foaf:Organization'
diff --git a/abra2_2.17/container/Dockerfile b/abra2_2.17/container/Dockerfile
index 9f1bc82f..e7c64df9 100644
--- a/abra2_2.17/container/Dockerfile
+++ b/abra2_2.17/container/Dockerfile
@@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \
org.opencontainers.image.licenses=${LICENSE} \
org.opencontainers.image.version.java=${JAVA_VERSION} \
org.opencontainers.image.version.abra2=${ABRA2_VERSION} \
- org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/"
+ org.opencontainers.image.source="https://github.com/mozack/abra2/releases/"
LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}"
@@ -37,4 +37,4 @@ RUN apt-get update && \
RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \
chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \
- cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
\ No newline at end of file
+ cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl
index 8c9be47a..1af3c194 100644
--- a/abra2_2.19/abra2_2.19.cwl
+++ b/abra2_2.19/abra2_2.19.cwl
@@ -4,7 +4,8 @@ $namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
-id: abra2_2.19
+ sbg: 'https://www.sevenbridges.com/'
+id: abra2_2_19
baseCommand:
- java
inputs:
@@ -173,7 +174,7 @@ requirements:
ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}"
coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}"
- class: DockerRequirement
- dockerPull: 'mskcc/abra2:0.2.0'
+ dockerPull: 'aphoid/abra2:2.19'
- class: InlineJavascriptRequirement
'dct:contributor':
- class: 'foaf:Organization'
@@ -193,6 +194,3 @@ requirements:
- class: 'doap:Version'
'doap:name': abra2
'doap:revision': 2.19
- - class: 'doap:Version'
- 'doap:name': cwl-wrapper
- 'doap:revision': 1.0.0
diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl
new file mode 100644
index 00000000..e24cb107
--- /dev/null
+++ b/abra2_2.22/abra2_2.22.cwl
@@ -0,0 +1,246 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: abra2_2_22
+baseCommand:
+ - java
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input_bam
+ type:
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: '--in'
+ doc: Required list of input sam or bam file (s) separated by comma
+ secondaryFiles:
+ - ^.bai
+ - id: working_directory
+ type: string?
+ doc: Set the temp directory (overrides java.io.tmpdir)
+ - id: reference_fasta
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--ref'
+ doc: Genome reference location
+ secondaryFiles:
+ - .fai
+ - id: targets
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--targets'
+ - id: kmer_size
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--kmer'
+ doc: >-
+ Optional assembly kmer size(delimit with commas if multiple sizes
+ specified)
+ - id: maximum_average_depth
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--mad'
+ doc: >-
+ Regions with average depth exceeding this value will be downsampled
+ (default: 1000)
+ - id: soft_clip_contig
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sc'
+ doc: >-
+ Soft clip contig args
+ [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len]
+ (default:16,13,80,15)
+ - id: maximum_mixmatch_rate
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--mmr'
+ doc: >-
+ Max allowed mismatch rate when mapping reads back to contigs (default:
+ 0.05)
+ - id: scoring_gap_alignments
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sga'
+ doc: >-
+ Scoring used for contig alignments(match,
+ mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1)
+ - id: contig_anchor
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--ca'
+ doc: >-
+ Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge]
+ (default:10,2)
+ - id: window_size
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--ws'
+ doc: |-
+ Processing window size and overlap
+ (size,overlap) (default: 400,200)
+ - id: consensus_sequence
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--cons'
+ doc: Use positional consensus sequence when aligning high quality soft clipping
+ - id: output_bams
+ type:
+ - string
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: '--out'
+ doc: Required list of output sam or bam file (s) separated by comma
+ - id: ignore_bad_assembly
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--ignore-bad-assembly'
+ doc: Use this option to avoid parsing errors for corrupted assemblies
+ - id: bam_index
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--index'
+ doc: >-
+ Enable BAM index generation when outputting sorted alignments (may require
+ additonal memory)
+ - id: input_vcf
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--in-vcf'
+ doc: >-
+ VCF containing known (or suspected) variant sites. Very large files
+ should be avoided.
+ - id: no_edge_complex_indel
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-edge-ci'
+ doc: Prevent output of complex indels at read start or read end
+ - id: no_sort
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--nosort'
+ doc: Do not attempt to sort final output
+outputs:
+ - id: abra_realigned_bam
+ type:
+ - 'null'
+ - File
+ - type: array
+ items: File
+ outputBinding:
+ glob: |-
+ ${
+ return inputs.output_bams
+ }
+ secondaryFiles:
+ - ^.bai
+label: abra2_2.22
+arguments:
+ - position: 0
+ valueFrom: |-
+ ${
+ if (inputs.memory_per_job && inputs.memory_overhead) {
+
+ if (inputs.memory_per_job % 1000 == 0) {
+
+ return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G"
+ }
+ else {
+
+ return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G"
+ }
+ }
+ else if (inputs.memory_per_job && !inputs.memory_overhead) {
+
+ if (inputs.memory_per_job % 1000 == 0) {
+
+ return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G"
+ }
+ else {
+
+ return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G"
+ }
+ }
+ else if (!inputs.memory_per_job && inputs.memory_overhead) {
+
+ return "-Xmx20G"
+ }
+ else {
+
+ return "-Xmx20G"
+ }
+ }
+ - position: 0
+ prefix: '-jar'
+ valueFrom: /usr/local/bin/abra2.jar
+ - position: 0
+ prefix: '--threads'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+ - position: 0
+ prefix: '--tmpdir'
+ valueFrom: |-
+ ${
+ if(inputs.working_directory)
+ return inputs.working_directory;
+ return runtime.tmpdir
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 60000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/abra2:2.22'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:kumarn1@mskcc.org'
+ 'foaf:name': Nikhil Kumar
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': abra2
+ 'doap:revision': 2.22
diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile
new file mode 100644
index 00000000..0ec95e41
--- /dev/null
+++ b/abra2_2.22/container/Dockerfile
@@ -0,0 +1,40 @@
+################## BASE IMAGE ######################
+
+FROM openjdk:8
+
+################## ARGUMENTS########################
+
+ARG BUILD_DATE
+ARG BUILD_VERSION
+ARG LICENSE="Apache-2.0"
+ARG JAVA_VERSION=8
+ARG ABRA2_VERSION=2.22
+
+################## METADATA ########################
+
+LABEL org.opencontainers.image.vendor="MSKCC"
+LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)"
+
+LABEL org.opencontainers.image.created=${BUILD_DATE} \
+ org.opencontainers.image.version=${BUILD_VERSION} \
+ org.opencontainers.image.licenses=${LICENSE} \
+ org.opencontainers.image.version.java=${JAVA_VERSION} \
+ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \
+ org.opencontainers.image.source="https://github.com/mozack/abra2/releases/"
+
+LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}"
+
+################## INSTALL ##########################
+
+WORKDIR /usr/src
+
+RUN apt-get update && \
+ apt-get --no-install-recommends install -y \
+ wget && \
+ apt-get clean autoclean && \
+ apt-get autoremove -y && \
+ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \
+ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \
+ cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
diff --git a/abra2_2.22/example_inputs.yaml b/abra2_2.22/example_inputs.yaml
new file mode 100644
index 00000000..928aafd8
--- /dev/null
+++ b/abra2_2.22/example_inputs.yaml
@@ -0,0 +1,30 @@
+bam_index: true
+no_edge_complex_indel: true
+consensus_sequence:
+contig_anchor:
+ignore_bad_assembly:
+input_bam:
+ class: File
+ path: "path/to/alignment.bam"
+input_vcf:
+kmer_size:
+maximum_average_depth:
+maximum_mixmatch_rate:
+memory_overhead:
+memory_per_job:
+no_sort:
+number_of_threads:
+output_bam: name_of_realigned_abra.bam
+path_to_abra:
+reference_fasta:
+ class: File
+ path: "/path/to/reference.fasta"
+scoring_gap_alignments:
+soft_clip_contig:
+targets:
+ class: File
+ metadata: {}
+ path: "/path/to/target.bed"
+ secondaryFiles: []
+window_size:
+working_directory:
diff --git a/access_utils/0.1.1/README.md b/access_utils/0.1.1/README.md
new file mode 100644
index 00000000..45bd9f8b
--- /dev/null
+++ b/access_utils/0.1.1/README.md
@@ -0,0 +1,32 @@
+# CWL and Dockerfile for running merge_fastq
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| access_utils | 0.1.1 | |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.json to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner general_stats_parse.cwl example_inputs.json
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner general_stats_parse.cwl -h
+```
diff --git a/access_utils/0.1.1/example_inputs.yaml b/access_utils/0.1.1/example_inputs.yaml
new file mode 100644
index 00000000..27fccad3
--- /dev/null
+++ b/access_utils/0.1.1/example_inputs.yaml
@@ -0,0 +1,2 @@
+dir: /path/to/sample_info_directory
+samples-json: /path/to/sample_meta.json
diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl
new file mode 100644
index 00000000..4c39e5db
--- /dev/null
+++ b/access_utils/0.1.1/general_stats_parse.cwl
@@ -0,0 +1,61 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: general_stats_parse
+baseCommand:
+ - general_stats_parse.py
+arguments:
+ - --dir
+ - .
+inputs:
+ - id: directory
+ type: Directory
+ doc: Directory containing results.
+ - id: samples-json
+ type: File
+ inputBinding:
+ prefix: '--samples-json'
+ doc: Sample JSON file.
+ - id: config
+ type: File?
+ inputBinding:
+ prefix: '--config'
+ doc: MultQC config file.
+outputs:
+ - id: aggregate_parsed_stats
+ label: aggregate_parsed_stats
+ type: Directory
+ outputBinding:
+ glob: .
+ outputEval: |-
+ ${
+ self[0].basename = "aggregate_qc_stats";
+ return self[0]
+ }
+label: general_stats_parse
+requirements:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1'
+ - class: InitialWorkDirRequirement
+ listing:
+ - entry: $(inputs.directory)
+ writable: true
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl
new file mode 100644
index 00000000..fe2317e9
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl
@@ -0,0 +1,57 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: tabix
+baseCommand:
+ - bgzip
+inputs:
+ - id: input
+ type: File
+ inputBinding:
+ position: 0
+ doc: input VCF file
+outputs:
+ - id: zippedVcf
+ type: File?
+ outputBinding:
+ glob: >-
+ ${ if(inputs.output_file_name){ return
+ inputs.output_file_name } else { return
+ inputs.input.basename.replace(/.vcf/,'.vcf.gz') } }
+label: tabix
+arguments:
+ - position: 0
+ prefix: ''
+ valueFrom: '-c'
+requirements:
+ - class: ResourceRequirement
+ ramMin: 8000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InlineJavascriptRequirement
+stdout: >-
+ ${ if (inputs.output_file_name) return inputs.output_file_name;
+ return inputs.input.basename.replace('.vcf','.vcf.gz'); }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bcftools
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl
new file mode 100644
index 00000000..039078be
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl
@@ -0,0 +1,50 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: tabix_1_3_1
+baseCommand:
+ - tabix
+inputs:
+ - 'sbg:toolDefaultValue': vcf
+ id: preset
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: 'gff, bed, sam, vcf'
+ - id: file
+ type: File
+ inputBinding:
+ position: 1
+ doc: Input bgziped vcf file
+ 'sbg:fileTypes': .gz
+outputs: []
+label: tabix_1.3.1
+requirements:
+ - class: ResourceRequirement
+ ramMin: 4000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/tabix:1.3.1'
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': tabix
+ 'doap:revision': 1.3.1
diff --git a/bcftools_1.15.1/example_input_bgzip.yaml b/bcftools_1.15.1/example_input_bgzip.yaml
new file mode 100644
index 00000000..05320852
--- /dev/null
+++ b/bcftools_1.15.1/example_input_bgzip.yaml
@@ -0,0 +1,3 @@
+input:
+ class: File
+ path: /Users/ksivaprakasam/Documents/vep.vcf
diff --git a/bcftools_1.15.1/example_input_tabix.yaml b/bcftools_1.15.1/example_input_tabix.yaml
new file mode 100644
index 00000000..12f3bdad
--- /dev/null
+++ b/bcftools_1.15.1/example_input_tabix.yaml
@@ -0,0 +1,6 @@
+file:
+ class: File
+ metadata: {}
+ path: ../../../vep.vcf.gz
+ secondaryFiles: []
+preset: vcf
diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md
new file mode 100644
index 00000000..770e8751
--- /dev/null
+++ b/bcftools_1.6/README.md
@@ -0,0 +1,69 @@
+# CWL and Dockerfile for running bcftools v1.6
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| alpine:3.8 base image | 3.8 | - |
+| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir bcftools_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --allow_overlaps First coordinate of the next file can precede last
+ record of the current file.
+ --compact_PS Do not output PS tag at each site, only at the start
+ of a new phase set block.
+ --ligate Ligate phased VCFs by matching phase at overlapping
+ haplotypes
+ --list LIST Read the list of files from a file.
+ --min_PQ MIN_PQ Break phase set if phasing quality is lower than
+ [30]
+ --naive Concatenate BCF files without recompression
+ (dangerous, use with caution)
+ --no_version do not append version and command line to the header
+ --output OUTPUT Write output to a file [standard output]
+ --output_type OUTPUT_TYPE
+ b - compressed BCF, u - uncompressed BCF, z
+ - compressed VCF, v - uncompressed VCF [v]
+ --regions REGIONS Restrict to comma-separated list of regions
+ --regions_file REGIONS_FILE
+ Restrict to regions listed in a file
+ --remove_duplicates Alias for -d none
+ --rm_dups RM_DUPS Output duplicate records present in multiple
+ files only once -
+ --threads THREADS Number of extra output compression threads [0]
+ --vcf_files_csi VCF_FILES_CSI
+ Array of vcf files to be concatenated into one vcf
+ --vcf_files_tbi VCF_FILES_TBI
+ Array of vcf files to be concatenated into one vcf
+
+```
diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl
new file mode 100644
index 00000000..98f4122c
--- /dev/null
+++ b/bcftools_1.6/bcftools_concat_1.6.cwl
@@ -0,0 +1,180 @@
+
+class: CommandLineTool
+cwlVersion: v1.0
+
+$namespaces:
+ dct: http://purl.org/dc/terms/
+ doap: http://usefulinc.com/ns/doap#
+ foaf: http://xmlns.com/foaf/0.1/
+ sbg: https://www.sevenbridges.com/
+
+id: bcftools_concat_v1_6
+
+baseCommand:
+ - /usr/bin/bcftools
+ - concat
+
+doc: |
+ concatenate VCF/BCF files from the same set of samples
+
+inputs:
+
+ memory_per_job:
+ type: ["null",int]
+ doc: Memory per job in megabytes
+
+ memory_overhead:
+ type: ["null",int]
+ doc: Memory overhead per job in megabytes
+
+ threads:
+ type: ["null", string]
+ doc: Number of extra output compression threads [0]
+ inputBinding:
+ prefix: --threads
+
+ compact_PS:
+ type: ["null", boolean]
+ default: false
+ doc: Do not output PS tag at each site, only at the start of a new phase set block.
+ inputBinding:
+ prefix: --compact-PS
+
+ remove_duplicates:
+ type: ["null", boolean]
+ default: false
+ doc: Alias for -d none
+ inputBinding:
+ prefix: --remove-duplicates
+
+ ligate:
+ type: ["null", boolean]
+ default: false
+ doc: Ligate phased VCFs by matching phase at overlapping haplotypes
+ inputBinding:
+ prefix: --ligate
+
+ output_type:
+ type: ["null", string]
+ doc: b - compressed BCF, u - uncompressed BCF, z - compressed VCF, v - uncompressed VCF [v]
+ inputBinding:
+ prefix: --output-type
+
+ no_version:
+ type: ["null", boolean]
+ default: false
+ doc: do not append version and command line to the header
+ inputBinding:
+ prefix: --no-version
+
+ naive:
+ type: ["null", boolean]
+ default: false
+ doc: Concatenate BCF files without recompression (dangerous, use with caution)
+ inputBinding:
+ prefix: --naive
+
+ allow_overlaps:
+ type: ["null", boolean]
+ default: false
+ doc: First coordinate of the next file can precede last record of the current file.
+ inputBinding:
+ prefix: --allow-overlaps
+
+ min_PQ:
+ type: ["null", string]
+ doc: Break phase set if phasing quality is lower than [30]
+ inputBinding:
+ prefix: --min-PQ
+
+ regions_file:
+ type: ["null", string]
+ doc: Restrict to regions listed in a file
+ inputBinding:
+ prefix: --regions-file
+
+ regions:
+ type: ["null", string]
+ doc: Restrict to comma-separated list of regions
+ inputBinding:
+ prefix: --regions
+
+ rm_dups:
+ type: ["null", string]
+ doc: Output duplicate records present in multiple files only once -
+ inputBinding:
+ prefix: --rm-dups
+
+ output:
+ type: string
+ doc: Write output to a file [standard output]
+ default: "bcftools_concat.vcf"
+ inputBinding:
+ prefix: --output
+
+ list:
+ type: ['null', string]
+ doc: Read the list of files from a file.
+ inputBinding:
+ prefix: --file-list
+
+ vcf_files_tbi:
+ type:
+ - 'null'
+ - type: array
+ items: File
+ secondaryFiles:
+ - .tbi
+ doc: Array of vcf files to be concatenated into one vcf
+ inputBinding:
+ position: 1
+
+ vcf_files_csi:
+ type:
+ - 'null'
+ - type: array
+ items: File
+ secondaryFiles:
+ - ^.bcf.csi
+ doc: Array of vcf files to be concatenated into one vcf
+ inputBinding:
+ position: 1
+
+outputs:
+ bcftools_concat_vcf_output_file:
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return null;
+ }
+
+requirements:
+ InlineJavascriptRequirement: {}
+ ResourceRequirement:
+ ramMin: 8000
+ coresMin: 1
+ DockerRequirement:
+ dockerPull: ghcr.io/msk-access/bcftools:1.6
+
+
+dct:contributor:
+ - class: foaf:Organization
+ foaf:member:
+ - class: foaf:Person
+ foaf:mbox: mailto:kumarn1@mskcc.org
+ foaf:name: Nikhil Kumar
+ foaf:name: Memorial Sloan Kettering Cancer Center
+dct:creator:
+ - class: foaf:Organization
+ foaf:member:
+ - class: foaf:Person
+ foaf:mbox: mailto:kumarn1@mskcc.org
+ foaf:name: Nikhil Kumar
+ foaf:name: Memorial Sloan Kettering Cancer Center
+doap:release:
+ - class: doap:Version
+ doap:name: bcftools
+ doap:revision: 1.6
diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml
new file mode 100644
index 00000000..0512753d
--- /dev/null
+++ b/bcftools_1.6/example_inputs.yaml
@@ -0,0 +1,9 @@
+vcf_files_tbi:
+ class: File
+ path: /path/to/vcf/and/tbi/files
+tumor_sample_name: tumor_sample_name
+normal_sample_name: normal_sample_name
+allow_overlaps: allow_overlaps_boolean
+rm_dups: rm_dups_str
+output_type: output_type_str
+output: output_file_name
diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md
deleted file mode 100644
index 75de8a57..00000000
--- a/bedtools_genomecov_v2.28.0_cv2/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# CWL and Dockerfile for running Bedtools GenomeCov
-
-## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |
-
-[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
-## CWL
-
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml
-```
-
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
-
-```bash
-#Using CWLTOOL
-> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml
-
-#Using toil-cwl-runner
-> mkdir run_directory
-> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
-```
-
-## Usage
-
-```bash
-> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help
-
-usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT
- --output_file_name OUTPUT_FILE_NAME
- [--memory_overhead MEMORY_OVERHEAD]
- [--memory_per_job MEMORY_PER_JOB]
- [--number_of_threads NUMBER_OF_THREADS]
- [--option_bedgraph]
- [job_order]
-
-positional arguments:
- job_order Job input json file
-
-optional arguments:
- -h, --help show this help message and exit
- --input INPUT The input file can be in BAM format (Note: BAM must be
- sorted by position)
- --output_file_name OUTPUT_FILE_NAME
- --memory_overhead MEMORY_OVERHEAD
- --memory_per_job MEMORY_PER_JOB
- --number_of_threads NUMBER_OF_THREADS
- --option_bedgraph option flag parameter to choose output file format.
- -bg refers to bedgraph format
\ No newline at end of file
diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
index 43f392dd..396931b9 100644
--- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
+++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
@@ -39,7 +39,7 @@ inputs:
option flag parameter to choose output file format. -bg refers to bedgraph
format
outputs:
- - id: output_file
+ - id: bedtools_genomecove_bedgraph
type: File
outputBinding:
glob: |-
@@ -55,7 +55,7 @@ requirements:
ramMin: 20000
coresMin: 1
- class: DockerRequirement
- dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
stdout: |-
${
diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md
deleted file mode 100644
index 960664db..00000000
--- a/bedtools_merge_v2.28.0_cv2/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# CWL and Dockerfile for running Bedtools Merge
-
-## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |
-
-[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
-## CWL
-
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml
-```
-
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
-
-```bash
-#Using CWLTOOL
-> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml
-
-#Using toil-cwl-runner
-> mkdir run_directory
-> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
-```
-
-## Usage
-
-```bash
-> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help
-
-usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name
- OUTPUT_FILE_NAME
- [--memory_overhead MEMORY_OVERHEAD]
- [--memory_per_job MEMORY_PER_JOB]
- [--number_of_threads NUMBER_OF_THREADS]
- [--distance_between_features DISTANCE_BETWEEN_FEATURES]
- [job_order]
-
-positional arguments:
- job_order Job input json file
-
-optional arguments:
- -h, --help show this help message and exit
- --input INPUT BEDgraph format file generated from Bedtools Genomecov
- module
- --output_file_name OUTPUT_FILE_NAME
- --memory_overhead MEMORY_OVERHEAD
- --memory_per_job MEMORY_PER_JOB
- --number_of_threads NUMBER_OF_THREADS
- --distance_between_features DISTANCE_BETWEEN_FEATURES
- Maximum distance between features allowed for features
- to be merged.
\ No newline at end of file
diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
index 21e7cc7b..e6259ff3 100644
--- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
+++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
@@ -33,8 +33,8 @@ inputs:
shellQuote: false
doc: Maximum distance between features allowed for features to be merged.
outputs:
- - id: output_file
- type: File?
+ - id: bedtools_merge_bed
+ type: File
outputBinding:
glob: |-
${
@@ -49,7 +49,7 @@ requirements:
ramMin: 20000
coresMin: 1
- class: DockerRequirement
- dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
stdout: |-
${
diff --git a/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl
new file mode 100644
index 00000000..eefa400e
--- /dev/null
+++ b/bedtools_sortvcf_2.28.0/bedtools_sortbed_vcf.cwl
@@ -0,0 +1,54 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: sortvcf
+baseCommand:
+ - sortBed
+inputs:
+ - id: input
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-i'
+ doc: input VCF file
+outputs:
+ - id: sorted.vcf
+ type: File?
+ outputBinding:
+ glob: >-
+ ${ if(inputs.output_file_name){ return
+ inputs.output_file_name } else { return
+ inputs.input.basename.replace(/.vcf/,'.sorted.vcf') } }
+label: sortvcf
+requirements:
+ - class: ResourceRequirement
+ ramMin: 2000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
+ - class: InlineJavascriptRequirement
+stdout: >-
+ ${ if (inputs.output_file_name) return inputs.output_file_name;
+ return inputs.input.basename.replace('.vcf','.sorted.vcf'); }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': sortbed
+ 'doap:revision': 2.28.0
diff --git a/bedtools_sortvcf_2.28.0/example_input.yaml b/bedtools_sortvcf_2.28.0/example_input.yaml
new file mode 100644
index 00000000..1b65b0c9
--- /dev/null
+++ b/bedtools_sortvcf_2.28.0/example_input.yaml
@@ -0,0 +1 @@
+input: null
diff --git a/biometrics_extract/0.2.13/biometrics_extract.cwl b/biometrics_extract/0.2.13/biometrics_extract.cwl
new file mode 100644
index 00000000..19ee1121
--- /dev/null
+++ b/biometrics_extract/0.2.13/biometrics_extract.cwl
@@ -0,0 +1,139 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_extract_0_2_13
+baseCommand:
+ - biometrics
+ - extract
+inputs:
+ - id: sample_bam
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--sample-bam'
+ doc: BAM file.
+ secondaryFiles:
+ - ^.bai
+ - id: sample_sex
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sample-sex'
+ doc: Expected sample sex (i.e. M or F).
+ - id: sample_group
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sample-group'
+ doc: The sample group (e.g. the sample patient ID).
+ - id: sample_name
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--sample-name'
+ doc: >-
+ Sample name. If not specified, sample name is automatically figured out
+ from the BAM file.
+ - id: fafile
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--fafile'
+ doc: Path to reference fasta.
+ secondaryFiles:
+ - ^.fasta.fai
+ - id: vcf_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--vcf'
+ doc: VCF file containing the SNPs to be queried.
+ - id: bed_file
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--bed'
+ doc: BED file containing the intervals to be queried.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 1
+ id: min_mapping_quality
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-mapping-quality'
+ doc: Minimum mapping quality of reads to be used for pileup.
+ - default: 1
+ id: min_base_quality
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-base-quality'
+ doc: Minimum base quality of reads to be used for pileup.
+ - default: 10
+ id: min_coverage
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-coverage'
+ doc: Minimum coverage to count a site.
+ - default: 0.1
+ id: min_homozygous_thresh
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--min-homozygous-thresh'
+ doc: Minimum threshold to define homozygous.
+ - id: default_genotype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--default-genotype'
+ doc: Default genotype if coverage is too low (options are Het or Hom).
+outputs:
+ - id: biometrics_extract_pickle
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.database) {
+ return inputs.database + '/' + inputs.sample_name + '.pickle';
+ } else {
+ return inputs.sample_name + '.pickle';
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_extract/0.2.13/example_inputs.yaml b/biometrics_extract/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..566b496a
--- /dev/null
+++ b/biometrics_extract/0.2.13/example_inputs.yaml
@@ -0,0 +1,24 @@
+sample_type:
+ - "Normal"
+sample_sex:
+ - "M"
+sample_name:
+ - "test"
+sample_group:
+ - "test"
+fafile:
+ class: File
+ path: /path/to/fasta
+sample_bam:
+ - class: File
+ path: /path/to/bam
+bed_file: null
+vcf_file:
+ class: File
+ path: /path/to/vcf
+database: null
+min_mapping_quality: null
+min_base_quality: null
+min_coverage: null
+min_homozygous_thresh: null
+default_genotype: null
diff --git a/biometrics_extract/0.2.5/biometrics_extract.cwl b/biometrics_extract/0.2.5/biometrics_extract.cwl
new file mode 100644
index 00000000..0abf2aec
--- /dev/null
+++ b/biometrics_extract/0.2.5/biometrics_extract.cwl
@@ -0,0 +1,172 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_extract
+baseCommand:
+ - biometrics
+ - extract
+inputs:
+ - id: sample_bam
+ type:
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: --sample-bam
+ secondaryFiles:
+ - ^.bai
+ doc: >-
+ BAM file.
+ - id: sample_type
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-type
+ doc: >-
+ Sample types: Normal or Tumor.
+ - id: sample_sex
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-sex
+ doc: >-
+ Expected sample sex (i.e. M or F).
+ - id: sample_group
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-group
+ doc: >-
+ The sample group (e.g. the sample patient ID).
+ - id: sample_name
+ type:
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-name
+ doc: >-
+ Sample name. If not specified, sample name is automatically figured out from the BAM file.
+ - id: fafile
+ type: File
+ inputBinding:
+ position: 0
+ prefix: --fafile
+ secondaryFiles:
+ - ^.fasta.fai
+ doc: >-
+ Path to reference fasta.
+ - id: vcf_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: --vcf
+ doc: >-
+ VCF file containing the SNPs to be queried.
+ - id: bed_file
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: --bed
+ doc: >-
+ BED file containing the intervals to be queried.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: min_mapping_quality
+ type: int?
+ default: 1
+ inputBinding:
+ position: 0
+ prefix: --min-mapping-quality
+ doc: >-
+ Minimum mapping quality of reads to be used for pileup.
+ - id: min_base_quality
+ type: int?
+ default: 1
+ inputBinding:
+ position: 0
+ prefix: --min-base-quality
+ doc: >-
+ Minimum base quality of reads to be used for pileup.
+ - id: min_coverage
+ type: int?
+ default: 10
+ inputBinding:
+ position: 0
+ prefix: --min-coverage
+ doc: >-
+ Minimum coverage to count a site.
+ - id: min_homozygous_thresh
+ type: float?
+ default: 0.1
+ inputBinding:
+ position: 0
+ prefix: --min-homozygous-thresh
+ doc: >-
+ Minimum threshold to define homozygous.
+ - id: default_genotype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --default-genotype
+ doc: >-
+ Default genotype if coverage is too low (options are Het or Hom).
+outputs:
+ - id: biometrics_extract_pickle
+ type:
+ type: array
+ items: File
+ outputBinding:
+ glob: |-
+ ${
+ return inputs.sample_name.map(val => {
+ if (inputs.database) {
+ return inputs.database + '/' + val + '.pk';
+ } else {
+ return val + '.pk';
+ }
+ });
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_extract/0.2.5/example_inputs.yaml b/biometrics_extract/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..566b496a
--- /dev/null
+++ b/biometrics_extract/0.2.5/example_inputs.yaml
@@ -0,0 +1,24 @@
+sample_type:
+ - "Normal"
+sample_sex:
+ - "M"
+sample_name:
+ - "test"
+sample_group:
+ - "test"
+fafile:
+ class: File
+ path: /path/to/fasta
+sample_bam:
+ - class: File
+ path: /path/to/bam
+bed_file: null
+vcf_file:
+ class: File
+ path: /path/to/vcf
+database: null
+min_mapping_quality: null
+min_base_quality: null
+min_coverage: null
+min_homozygous_thresh: null
+default_genotype: null
diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md
new file mode 100644
index 00000000..5687fef3
--- /dev/null
+++ b/biometrics_extract/README.md
@@ -0,0 +1,69 @@
+# CWL for running biometrics extract tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_extract.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_extract.cwl [-h] [--sample_bam SAMPLE_BAM]
+ [--sample_type SAMPLE_TYPE]
+ [--sample_sex SAMPLE_SEX]
+ [--sample_group SAMPLE_GROUP]
+ [--sample_name SAMPLE_NAME] --fafile
+ FAFILE --vcf_file VCF_FILE --bed_file
+ BED_FILE --database DATABASE
+ [--min_mapping_quality MIN_MAPPING_QUALITY]
+ [--min_base_quality MIN_BASE_QUALITY]
+ [--min_coverage MIN_COVERAGE]
+ [--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH]
+ [--default_genotype DEFAULT_GENOTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --sample_bam SAMPLE_BAM
+ BAM file.
+ --sample_type SAMPLE_TYPE
+ Sample types: Normal or Tumor.
+ --sample_sex SAMPLE_SEX
+ Expected sample sex (i.e. M or F).
+ --sample_group SAMPLE_GROUP
+ The sample group (e.g. the sample patient ID).
+ --sample_name SAMPLE_NAME
+ Sample name. If not specified, sample name is
+ automatically figured out from the BAM file.
+ --fafile FAFILE Path to reference fasta.
+ --vcf_file VCF_FILE VCF file containing the SNPs to be queried.
+ --bed_file BED_FILE BED file containing the intervals to be queried.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --min_mapping_quality MIN_MAPPING_QUALITY
+ Minimum mapping quality of reads to be used for
+ pileup.
+ --min_base_quality MIN_BASE_QUALITY
+ Minimum base quality of reads to be used for pileup.
+ --min_coverage MIN_COVERAGE
+ Minimum coverage to count a site.
+ --min_homozygous_thresh MIN_HOMOZYGOUS_THRESH
+ Minimum threshold to define homozygous.
+ --default_genotype DEFAULT_GENOTYPE
+ Default genotype if coverage is too low (options are
+ Het or Hom).
+```
diff --git a/biometrics_genotype/0.2.13/biometrics_genotype.cwl b/biometrics_genotype/0.2.13/biometrics_genotype.cwl
new file mode 100644
index 00000000..d4d1449a
--- /dev/null
+++ b/biometrics_genotype/0.2.13/biometrics_genotype.cwl
@@ -0,0 +1,149 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_genotype_0_2_13
+baseCommand:
+ - biometrics
+ - genotype
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: '--input'
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.05
+ id: discordance_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--discordance-threshold'
+ doc: >-
+ Discordance values less than this are regarded as matching samples.
+ (default: 0.05)
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+ - default: 2
+ id: threads
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--threads'
+ doc: Number of threads to use.
+outputs:
+ - id: biometrics_genotype_comparisons
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_comparison.csv'
+ } else {
+ return 'genotype_comparison.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_input.csv'
+ } else {
+ return 'genotype_clusters_input.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_database.csv'
+ } else {
+ return 'genotype_clusters_database.csv'
+ }
+ }
+ - id: biometrics_genotype_plot_input
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_input.html'
+ }
+ - id: biometrics_genotype_plot_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_database.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_genotype/0.2.13/example_inputs.yaml b/biometrics_genotype/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..0bc68d94
--- /dev/null
+++ b/biometrics_genotype/0.2.13/example_inputs.yaml
@@ -0,0 +1,12 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+prefix: 'test'
+outdir: null
+plot: true
+no_db_comparison: false
+threads: null
+discordance_threshold: null
diff --git a/biometrics_genotype/0.2.5/biometrics_genotype.cwl b/biometrics_genotype/0.2.5/biometrics_genotype.cwl
new file mode 100644
index 00000000..51285328
--- /dev/null
+++ b/biometrics_genotype/0.2.5/biometrics_genotype.cwl
@@ -0,0 +1,145 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_genotype
+baseCommand:
+ - biometrics
+ - genotype
+inputs:
+ - id: input
+ type:
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: --input
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: discordance_threshold
+ type: float?
+ default: 0.05
+ inputBinding:
+ position: 0
+ prefix: --discordance-threshold
+ doc: >-
+ Discordance values less than this are regarded as matching samples. (default: 0.05)
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+ - id: threads
+ type: int?
+ default: 2
+ inputBinding:
+ position: 0
+ prefix: --threads
+ doc: >-
+ Number of threads to use.
+outputs:
+ - id: biometrics_genotype_comparisons
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_comparison.csv'
+ } else {
+ return 'genotype_comparison.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_input.csv'
+ } else {
+ return 'genotype_clusters_input.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_database.csv'
+ } else {
+ return 'genotype_clusters_database.csv'
+ }
+ }
+ - id: biometrics_genotype_plot_input
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_input.html'
+ }
+ - id: biometrics_genotype_plot_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_database.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_genotype/0.2.5/example_inputs.yaml b/biometrics_genotype/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..0bc68d94
--- /dev/null
+++ b/biometrics_genotype/0.2.5/example_inputs.yaml
@@ -0,0 +1,12 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+prefix: 'test'
+outdir: null
+plot: true
+no_db_comparison: false
+threads: null
+discordance_threshold: null
diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md
new file mode 100644
index 00000000..fd398b99
--- /dev/null
+++ b/biometrics_genotype/README.md
@@ -0,0 +1,53 @@
+# CWL for running biometrics genotype tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_genotype.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_genotype.cwl [-h] --input INPUT [--database DATABASE]
+ [--discordance_threshold DISCORDANCE_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison] [--threads THREADS]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --discordance_threshold DISCORDANCE_THRESHOLD
+ Discordance values less than this are regarded as
+ matching samples. (default: 0.05)
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+ --threads THREADS Number of threads to use.
+```
diff --git a/biometrics_major/0.2.13/biometrics_major.cwl b/biometrics_major/0.2.13/biometrics_major.cwl
new file mode 100644
index 00000000..b5a42a9e
--- /dev/null
+++ b/biometrics_major/0.2.13/biometrics_major.cwl
@@ -0,0 +1,123 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_major_0_2_13
+baseCommand:
+ - biometrics
+ - major
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.6
+ id: major_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--major-threshold'
+ doc: Major contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_major_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.csv'
+ } else {
+ return 'major_contamination.csv'
+ }
+ }
+ - id: biometrics_major_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.json'
+ } else {
+ return 'major_contamination.json'
+ }
+ }
+ - id: biometrics_major_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'major_contamination.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_major/0.2.13/example_inputs.yaml b/biometrics_major/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..da03de55
--- /dev/null
+++ b/biometrics_major/0.2.13/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+minor_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_major/0.2.5/biometrics_major.cwl b/biometrics_major/0.2.5/biometrics_major.cwl
new file mode 100644
index 00000000..217c9d96
--- /dev/null
+++ b/biometrics_major/0.2.5/biometrics_major.cwl
@@ -0,0 +1,120 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_major
+baseCommand:
+ - biometrics
+ - major
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: major_threshold
+ type: float?
+ default: 0.6
+ inputBinding:
+ position: 0
+ prefix: --major-threshold
+ doc: >-
+ Major contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_major_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.csv'
+ } else {
+ return 'major_contamination.csv'
+ }
+ }
+ - id: biometrics_major_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.json'
+ } else {
+ return 'major_contamination.json'
+ }
+ }
+ - id: biometrics_major_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'major_contamination.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_major/0.2.5/example_inputs.yaml b/biometrics_major/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..da03de55
--- /dev/null
+++ b/biometrics_major/0.2.5/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+minor_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_major/README.md b/biometrics_major/README.md
new file mode 100644
index 00000000..10fa476e
--- /dev/null
+++ b/biometrics_major/README.md
@@ -0,0 +1,51 @@
+# CWL for running biometrics major tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_major.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_major.cwl [-h] --input INPUT [--database DATABASE]
+ [--major_threshold MAJOR_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --major_threshold MAJOR_THRESHOLD
+ Major contamination threshold for bad sample.
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/biometrics_minor/0.2.13/biometrics_minor.cwl b/biometrics_minor/0.2.13/biometrics_minor.cwl
new file mode 100644
index 00000000..2535eb6b
--- /dev/null
+++ b/biometrics_minor/0.2.13/biometrics_minor.cwl
@@ -0,0 +1,130 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_minor_0_2_13
+baseCommand:
+ - biometrics
+ - minor
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.002
+ id: minor_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--minor-threshold'
+ doc: Minor contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_minor_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.csv'
+ } else {
+ return 'minor_contamination.csv'
+ }
+ }
+ - id: biometrics_minor_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.json'
+ } else {
+ return 'minor_contamination.json'
+ }
+ }
+ - id: biometrics_minor_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination.html'
+ }
+ - id: biometrics_minor_sites_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination_sites.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_minor/0.2.13/example_inputs.yaml b/biometrics_minor/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..bddb4c72
--- /dev/null
+++ b/biometrics_minor/0.2.13/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+major_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_minor/0.2.5/biometrics_minor.cwl b/biometrics_minor/0.2.5/biometrics_minor.cwl
new file mode 100644
index 00000000..dc0410cb
--- /dev/null
+++ b/biometrics_minor/0.2.5/biometrics_minor.cwl
@@ -0,0 +1,127 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_minor
+baseCommand:
+ - biometrics
+ - minor
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: minor_threshold
+ type: float?
+ default: 0.002
+ inputBinding:
+ position: 0
+ prefix: --minor-threshold
+ doc: >-
+ Minor contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_minor_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.csv'
+ } else {
+ return 'minor_contamination.csv'
+ }
+ }
+ - id: biometrics_minor_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.json'
+ } else {
+ return 'minor_contamination.json'
+ }
+ }
+ - id: biometrics_minor_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination.html'
+ }
+ - id: biometrics_minor_sites_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination_sites.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_minor/0.2.5/example_inputs.yaml b/biometrics_minor/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..bddb4c72
--- /dev/null
+++ b/biometrics_minor/0.2.5/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+major_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md
new file mode 100644
index 00000000..af94ea40
--- /dev/null
+++ b/biometrics_minor/README.md
@@ -0,0 +1,51 @@
+# CWL for running biometrics minor tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_minor.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_minor.cwl [-h] --input INPUT [--database DATABASE]
+ [--minor_threshold MINOR_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --minor_threshold MINOR_THRESHOLD
+ Minor contamination threshold for bad sample.
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl
new file mode 100644
index 00000000..e8755e55
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl
@@ -0,0 +1,110 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_sexmismatch_0_2_13
+baseCommand:
+ - biometrics
+ - sexmismatch
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 50
+ id: coverage_threshold
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--coverage-threshold'
+ doc: Samples with Y chromosome above this value will be considered male.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_sexmismatch_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.csv'
+ } else {
+ return 'sex_mismatch.csv'
+ }
+ }
+ - id: biometrics_sexmismatch_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.json'
+ } else {
+ return 'sex_mismatch.json'
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_sexmismatch/0.2.13/example_inputs.yaml b/biometrics_sexmismatch/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..60832e43
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.13/example_inputs.yaml
@@ -0,0 +1,10 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+coverage_threshold: null
+prefix: null
+json: true
+no_db_comparison: null
diff --git a/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl
new file mode 100644
index 00000000..bae28a19
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl
@@ -0,0 +1,106 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_sexmismatch
+baseCommand:
+ - biometrics
+ - sexmismatch
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: coverage_threshold
+ type: int?
+ default: 50
+ inputBinding:
+ position: 0
+ prefix: --coverage-threshold
+ doc: >-
+ Samples with Y chromosome above this value will be considered male.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_sexmismatch_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.csv'
+ } else {
+ return 'sex_mismatch.csv'
+ }
+ }
+ - id: biometrics_sexmismatch_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.json'
+ } else {
+ return 'sex_mismatch.json'
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_sexmismatch/0.2.5/example_inputs.yaml b/biometrics_sexmismatch/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..60832e43
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.5/example_inputs.yaml
@@ -0,0 +1,10 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+coverage_threshold: null
+prefix: null
+json: true
+no_db_comparison: null
diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md
new file mode 100644
index 00000000..81b02145
--- /dev/null
+++ b/biometrics_sexmismatch/README.md
@@ -0,0 +1,52 @@
+# CWL for running biometrics sexmismatch tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_sexmismatch.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_sexmismatch.cwl [-h] --input INPUT
+ [--database DATABASE]
+ [--coverage_threshold COVERAGE_THRESHOLD]
+ [--prefix PREFIX] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --coverage_threshold COVERAGE_THRESHOLD
+ Samples with Y chromosome above this value will be
+ considered male.
+ --prefix PREFIX Output file prefix.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md
new file mode 100644
index 00000000..fd6785d6
--- /dev/null
+++ b/bwa_mem_0.7.17/README.md
@@ -0,0 +1,136 @@
+# CWL and Dockerfile for running BWA MEM
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| ------ | ------- | ----------------------------------------------- |
+| ubuntu | 16.04 | - |
+| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 |
+
+[](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com")
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr &
+```
+
+### Usage
+
+```
+usage: bwa_mem_0.7.17.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --reads
+ READS --reference REFERENCE [-A A] [-B B] [-C]
+ [-E E] [-L L] [-M] [-O O] [-P] [-S] [-T T] [-U U]
+ [-a] [-c C] [-d D] [-k K] [-K K] [--output OUTPUT]
+ [-p] [-r R] [-v V] [-w W] [-y Y] [-D D] [-W W]
+ [-m M] [-e] [-x X] [-j J] [--he HE] [-V] [-Y] [-I I]
+ [-R R] [--sample_id SAMPLE_ID] [--lane_id LANE_ID]
+ [--platform PLATFORM]
+ [--platform_unit PLATFORM_UNIT]
+ [--center_name CENTER_NAME]
+ [--library_id LIBRARY_ID]
+ [job_order]
+
+bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff]
+[-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen]
+[-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel]
+db.prefix reads.fq [mates.fq] Align 70bp-1Mbp query sequences with the BWA-MEM
+algorithm. Briefly, the algorithm works by seeding alignments with maximal
+exact matches (MEMs) and then extending seeds with the affine-gap Smith-
+Waterman algorithm (SW). If mates.fq file is absent and option -p is not set,
+this command regards input reads are single-end. If mates.fq is present, this
+command assumes the i-th read in reads.fq and the i-th read in mates.fq
+constitute a read pair. If -p is used, the command assumes the 2i-th and the
+(2i+1)-th read in reads.fq constitute a read pair (such input file is said to
+be interleaved). In this case, mates.fq is ignored. In the paired-end mode,
+the mem command will infer the read orientation and the insert size
+distribution from a batch of reads. The BWA-MEM algorithm performs local
+alignment. It may produce multiple primary alignments for different part of a
+query sequence. This is a crucial feature for long sequences. However, some
+tools such as Picard’s markDuplicates does not work with split alignments. One
+may consider to use option -M to flag shorter split hits as secondary.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --reads READS
+ --reference REFERENCE
+ -A A score for a sequence match, which scales options
+ -TdBOELU unless overridden [1]
+ -B B penalty for a mismatch [4]
+ -C append FASTA/FASTQ comment to SAM output
+ -E E gap extension penalty; a gap of size k cost '{-O} +
+ {-E}*k' [1,1]
+ -L L penalty for 5'- and 3'-end clipping [5,5]
+ -M
+ -O O gap open penalties for deletions and insertions [6,6]
+ -P skip pairing; mate rescue performed unless -S also in
+ use
+ -S skip mate rescue
+ -T T minimum score to output [30]
+ -U U penalty for an unpaired read pair [17]
+ -a output all alignments for SE or unpaired PE
+ -c C skip seeds with more than INT occurrences [500]
+ -d D off-diagonal X-dropoff [100]
+ -k K minimum seed length [19]
+ -K K process INT input bases in each batch regardless of
+ nThreads (for reproducibility) []
+ --output OUTPUT
+ -p smart pairing (ignoring in2.fq)
+ -r R look for internal seeds inside a seed longer than {-k}
+ * FLOAT [1.5]
+ -v V verbosity level: 1=error, 2=warning, 3=message,
+ 4+=debugging [3]
+ -w W band width for banded alignment [100]
+ -y Y seed occurrence for the 3rd round seeding [20]
+ -D D drop chains shorter than FLOAT fraction of the longest
+ overlapping chain [0.50]
+ -W W discard a chain if seeded bases shorter than INT [0]
+ -m M perform at most INT rounds of mate rescues for each
+ read [50]
+ -e
+ -x X read type. Setting -x changes multiple parameters
+ unless overridden [null] pacbio: -k17 -W40 -r10 -A1
+ -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: -k14 -W20
+ -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to
+ ref) intractg: -B9 -O16 -L5 (intra-species contigs to
+ ref)
+ -j J treat ALT contigs as part of the primary assembly
+ (i.e. ignore .alt file)
+ --he HE if there are 80% of the max
+ score, output all in XA [5,200]
+ -V output the reference FASTA header in the XR tag
+ -Y use soft clipping for supplementary alignments
+ -I I
+ -R R STR read group header line such as '@RG\tID -foo\tSM
+ -bar' [null]
+ --sample_id SAMPLE_ID
+ --lane_id LANE_ID
+ --platform PLATFORM
+ --platform_unit PLATFORM_UNIT
+ --center_name CENTER_NAME
+ --library_id LIBRARY_ID
+```
diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl
new file mode 100644
index 00000000..5e7e55dd
--- /dev/null
+++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl
@@ -0,0 +1,351 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+baseCommand:
+ - bwa
+ - mem
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: reads
+ type: 'File[]'
+ inputBinding:
+ position: 3
+ - id: reference
+ type: File
+ inputBinding:
+ position: 2
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ - .fai
+ - id: A
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-A'
+ doc: >-
+ score for a sequence match, which scales options -TdBOELU unless
+ overridden [1]
+ - id: B
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-B'
+ doc: 'penalty for a mismatch [4]'
+ - id: C
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-C'
+ doc: append FASTA/FASTQ comment to SAM output
+ - id: E
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-E'
+ itemSeparator: ','
+ doc: 'gap extension penalty; a gap of size k cost ''{-O} + {-E}*k'' [1,1]'
+ - id: L
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-L'
+ itemSeparator: ','
+ doc: 'penalty for 5''- and 3''-end clipping [5,5]'
+ - id: M
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-M'
+ - id: O
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-O'
+ itemSeparator: ','
+ doc: 'gap open penalties for deletions and insertions [6,6]'
+ - id: P
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-P'
+ doc: skip pairing; mate rescue performed unless -S also in use
+ - id: S
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-S'
+ doc: skip mate rescue
+ - id: T
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-T'
+ doc: 'minimum score to output [30]'
+ - id: U
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-U'
+ doc: 'penalty for an unpaired read pair [17]'
+ - id: a
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-a'
+ doc: output all alignments for SE or unpaired PE
+ - id: c
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-c'
+ doc: 'skip seeds with more than INT occurrences [500]'
+ - id: d
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-d'
+ doc: 'off-diagonal X-dropoff [100]'
+ - id: k
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-k'
+ doc: 'minimum seed length [19]'
+ - id: K
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-K'
+ doc: >-
+ process INT input bases in each batch regardless of nThreads (for
+ reproducibility) []
+ - id: output
+ type: string?
+ - id: p
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: smart pairing (ignoring in2.fq)
+ - id: r
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: 'look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]'
+ - id: v
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-v'
+ doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]'
+ - id: w
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-w'
+ doc: 'band width for banded alignment [100]'
+ - id: 'y'
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-y'
+ doc: 'seed occurrence for the 3rd round seeding [20]'
+ - id: D
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '-D'
+ doc: >-
+ drop chains shorter than FLOAT fraction of the longest overlapping chain
+ [0.50]
+ - id: W
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-W'
+ doc: 'discard a chain if seeded bases shorter than INT [0]'
+ - id: m
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-m'
+ doc: 'perform at most INT rounds of mate rescues for each read [50]'
+ - id: e
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-e'
+ - id: x
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-x'
+ doc: >-
+ read type. Setting -x changes multiple parameters unless overridden [null]
+ pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d:
+ -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)
+ intractg: -B9 -O16 -L5 (intra-species contigs to ref)
+ - id: H
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-H'
+ doc: >-
+ Use hard clipping ’H’ in the SAM output. This option may dramatically
+ reduce the redundancy of output when mapping long contig or BAC sequences
+ - id: j
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-j'
+ doc: >-
+ treat ALT contigs as part of the primary assembly (i.e. ignore
+ .alt file)
+ - id: he
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-h'
+ itemSeparator: ','
+ doc: >-
+ if there are 80% of the max score, output all in XA
+ [5,200]
+ - id: V
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-V'
+ doc: output the reference FASTA header in the XR tag
+ - id: 'Y'
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-Y'
+ doc: use soft clipping for supplementary alignments
+ - id: I
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-M'
+ - id: R
+ type: string?
+ doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]'
+ - id: sample_id
+ type: string?
+ - id: lane_id
+ type: string?
+ - id: platform
+ type: string?
+ - id: platform_unit
+ type: string?
+ - id: center_name
+ type: string?
+ - id: library_id
+ type: string?
+outputs:
+ - id: bwa_mem_output_sam
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam');
+ }
+doc: >-
+ bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff]
+ [-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen]
+ [-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel]
+ db.prefix reads.fq [mates.fq]
+
+ Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the
+ algorithm works by seeding alignments with maximal exact matches (MEMs) and
+ then extending seeds with the affine-gap Smith-Waterman algorithm (SW).
+
+
+ If mates.fq file is absent and option -p is not set, this command regards
+ input reads are single-end. If mates.fq is present, this command assumes the
+ i-th read in reads.fq and the i-th read in mates.fq constitute a read pair. If
+ -p is used, the command assumes the 2i-th and the (2i+1)-th read in reads.fq
+ constitute a read pair (such input file is said to be interleaved). In this
+ case, mates.fq is ignored. In the paired-end mode, the mem command will infer
+ the read orientation and the insert size distribution from a batch of reads.
+
+
+ The BWA-MEM algorithm performs local alignment. It may produce multiple
+ primary alignments for different part of a query sequence. This is a crucial
+ feature for long sequences. However, some tools such as Picard’s
+ markDuplicates does not work with split alignments. One may consider to use
+ option -M to flag shorter split hits as secondary.
+label: bwa_mem_0.7.17
+arguments:
+ - position: 0
+ prefix: '-t'
+ valueFrom: $(runtime.cores)
+ - position: 0
+ prefix: '-R'
+ valueFrom: |-
+ ${
+ if (inputs.sample_id) {
+ var rg_id = "@RG\\tID:" + inputs.sample_id + "\\tSM:" + inputs.sample_id;
+ if (inputs.library_id) {
+ rg_id += "\\tLB:" + inputs.library_id;
+ } if (inputs.platform) {
+ rg_id += "\\tPL:" + inputs.platform;
+ } if (inputs.platform_unit) {
+ rg_id += "\\tPU:" + inputs.platform_unit;
+ } if (inputs.center_name) {
+ rg_id += "\\tCN:" + inputs.center_name;
+ }
+ return rg_id
+ } else {
+ return inputs.R
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 34000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bwa:0.7.17'
+ - class: InlineJavascriptRequirement
+stdout: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam');
+ }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bwa
+ 'doap:revision': 0.7.17
diff --git a/bwa_mem_0.7.17/container/Dockerfile b/bwa_mem_0.7.17/container/Dockerfile
new file mode 100644
index 00000000..49cc6831
--- /dev/null
+++ b/bwa_mem_0.7.17/container/Dockerfile
@@ -0,0 +1,23 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Ian Johnson (johnsoni@mskcc.org)" \
+ version.image="0.1.0" \
+ version.bwa="0.7.17" \
+ version.ubuntu="16.04" \
+ source.bwa="https://github.com/lh3/bwa/releases/tag/v0.7.17"
+
+ENV BWA_VERSION 0.7.17
+
+RUN apt-get -y update \
+ # install build tools and dependencies
+ && apt-get -y install build-essential zlib1g-dev wget unzip \
+ # download and unzip bwa
+ && cd /tmp && wget "https://github.com/lh3/bwa/archive/v${BWA_VERSION}.zip" \
+ && unzip "v${BWA_VERSION}.zip" \
+ # build
+ && cd "/tmp/bwa-${BWA_VERSION}" \
+ && make \
+ # move binaries to /usr/bin
+ && mv "/tmp/bwa-${BWA_VERSION}/bwa" /usr/bin \
+ # clean up
+ && rm -rf /tmp/*
diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml
new file mode 100644
index 00000000..74683384
--- /dev/null
+++ b/bwa_mem_0.7.17/example_inputs.yaml
@@ -0,0 +1,9 @@
+reads:
+- class: File
+ path: "path/to/fastq_R1.fastq"
+- class: File
+ path: "path/to/fastq_R2.fastq"
+reference:
+ class: File
+ path: "/path/to/reference.fasta"
+sample_id: test_sample_id
diff --git a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
index 8741d4fd..41701c6c 100644
--- a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
+++ b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
@@ -4,7 +4,6 @@ $namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
- sbg: 'https://www.sevenbridges.com/'
baseCommand:
- bwa
- mem
diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py
new file mode 100644
index 00000000..30f6dd27
--- /dev/null
+++ b/cwl_commandlinetools/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+"""Top-level package for cwl-commandlinetools."""
+
+__author__ = """msk-access"""
+__email__ = 'msk.access@gmail.com'
+__version__ = '1.2.0'
diff --git a/cwl_commandlinetools/cwl_commandlinetools.py b/cwl_commandlinetools/cwl_commandlinetools.py
new file mode 100644
index 00000000..7fbbae4f
--- /dev/null
+++ b/cwl_commandlinetools/cwl_commandlinetools.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+"""Main module."""
diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md
new file mode 100644
index 00000000..08dcf390
--- /dev/null
+++ b/delly_0.9.1/README.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 0.9.1 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/delly_0.9.1/container/Dockerfile b/delly_0.9.1/container/Dockerfile
new file mode 100644
index 00000000..6b66905c
--- /dev/null
+++ b/delly_0.9.1/container/Dockerfile
@@ -0,0 +1,75 @@
+# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile
+# modify for additional functionality
+################## Base Image ##########
+FROM ubuntu:18.04
+################## ARGUMENTS/Environments ##########
+
+ARG BUILD_DATE
+ARG BUILD_VERSION
+ARG LICENSE="Apache-2.0"
+ARG DELLY_VERSION
+ARG VCS_REF
+################## METADATA ########################
+LABEL org.opencontainers.image.vendor="MSKCC"
+LABEL org.opencontainers.image.authors="Eric Buehlere (buehlere@mskcc.org)"
+
+LABEL org.opencontainers.image.created=${BUILD_DATE} \
+ org.opencontainers.image.version=${BUILD_VERSION} \
+ org.opencontainers.image.licenses=${LICENSE} \
+ org.opencontainers.image.version.delly=${DELLY_VERSION} \
+ org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \
+ org.opencontainers.image.vcs-ref=${VCS_REF}
+
+LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \
+ DELLY version ${DELLY_VERSION}"
+
+
+################## INSTALL ##########################
+RUN apt-get update && apt-get install -y \
+ autoconf \
+ build-essential \
+ cmake \
+ g++ \
+ gfortran \
+ git \
+ libcurl4-gnutls-dev \
+ hdf5-tools \
+ libboost-date-time-dev \
+ libboost-program-options-dev \
+ libboost-system-dev \
+ libboost-filesystem-dev \
+ libboost-iostreams-dev \
+ libbz2-dev \
+ libhdf5-dev \
+ libncurses-dev \
+ liblzma-dev \
+ zlib1g-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# set environment
+ENV BOOST_ROOT /usr
+
+# install delly
+RUN cd /opt \
+ && git clone --recursive https://github.com/dellytools/delly.git \
+ && cd /opt/delly/ \
+ && git checkout ${DELLY_VERSION} \
+ && make STATIC=1 all \
+ && make install
+
+
+# Multi-stage build
+FROM alpine:latest
+RUN mkdir -p /opt/delly/bin
+WORKDIR /opt/delly/bin
+COPY --from=0 /opt/delly/bin/delly .
+
+# Workdir
+WORKDIR /root/
+
+# Add Delly to PATH
+ENV PATH="/opt/delly/bin:${PATH}"
+
+# by default /bin/sh
+CMD ["/bin/sh"]
diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl
new file mode 100644
index 00000000..9c6f82dc
--- /dev/null
+++ b/delly_0.9.1/delly_0.9.1.cwl
@@ -0,0 +1,129 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+baseCommand:
+ - delly
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: out_file
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-o'
+ shellQuote: false
+ doc: The name to be used for the output bcf file
+ - id: reference_genome
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-g'
+ shellQuote: false
+ doc: reference genome fasta file
+ - id: input_bams
+ type:
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 99
+ shellQuote: false
+ doc: >-
+ an indexed bam tumor file, an indexed bam control file or it can be an
+ array of indexed bam files
+ secondaryFiles:
+ - ^.bai
+ - id: exclude_regions
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-x'
+ shellQuote: false
+ doc: file with regions to exclude
+ - id: vcffile
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-v'
+ shellQuote: false
+ doc: input VCF/BCF file for genotyping
+ - id: svtype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-t'
+ shellQuote: false
+ doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]'
+ - id: geno_qual
+ type: int?
+ inputBinding:
+ position: 71
+ prefix: '-u'
+ doc: min. mapping quality for genotyping
+ - id: dump
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-d'
+ doc: gzipped output file for SV-reads (optional)
+ - id: map_qual
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-q'
+ doc: min. paired-end (PE) mapping quality
+ - id: qual_tra
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: min. PE quality for translocation
+ - id: mad_cutoff
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-s'
+ doc: 'insert size cutoff, median+s*MAD (deletions only)'
+outputs:
+ - id: bcf_out
+ type: File
+ outputBinding:
+ glob: $(inputs.out_file)
+arguments:
+ - call
+requirements:
+ - class: ShellCommandRequirement
+ - class: ResourceRequirement
+ ramMin: 60000
+ coresMin: 16
+hints:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/delly:0.9.1'
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': delly
+ 'doap:revision': 0.9.1
diff --git a/delly_0.9.1/example_input.yml b/delly_0.9.1/example_input.yml
new file mode 100644
index 00000000..d84fb73c
--- /dev/null
+++ b/delly_0.9.1/example_input.yml
@@ -0,0 +1,5 @@
+out_file: "name_of_output.bcf"
+reference_genome: {class: File, path: path_to_file.fasta}
+input_bams:
+ - {class: File, path: /path/to/file.bam}
+ - {class: File, path: /path/to/file.bam}
diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md
deleted file mode 100644
index b1cd50f0..00000000
--- a/disambiguate_1.0.0/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
- # CWL and Dockerfile for running Disambiguate
-
-## Version of tools in docker image (/container/Dockerfile)
-
-Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`.
-
-| Tool | Version | Location | Notes |
-|--- |--- |--- | - |
-| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub|
-| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - |
-| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - |
-
-[](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com")
-
-
-## CWL
-
-- CWL specification 1.0
-- Use `example_inputs.yaml` to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml
-```
-
-## Command
-```
-USAGE:
-
- cwltool disambiguate_1.0.0.cwl \
- --prefix \
- --output_dir \
- [--aligner ] \
-
-
-Where:
-
- --prefix
- (required) Sample ID or name used as prefix. Do not include .bam
-
- --output_dir
- (required) Output directory
-
- --aligner
- Aligner option {bwa(default),tophat,hisat2,star}
-
-
- (required) Species A BAM file
-
-
- (required) Species B BAM file
-```
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..324e5df3
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,34 @@
+---
+description: >-
+ Central location for storing common workflow language based command line tools
+ for building msk-access workflows
+---
+
+# MSK-ACCESS command-line tools
+
+[](https://badge.fury.io/py/cwl-commandlinetools) [](https://travis-ci.com/msk-access/cwl-commandlinetools)
+
+* Free software: Apache Software License 2.0
+* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/)
+
+## Features
+
+Create command line tools in common workflow language to generate msk-access workflows.
+
+## Installation
+
+Clone the repository:
+
+```text
+git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git
+```
+
+**Follow the README in repsective tool folder for execution of the tool.**
+
+## Credits
+
+This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template.
+
+* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter)
+* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage)
+
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
new file mode 100644
index 00000000..66fd9a0f
--- /dev/null
+++ b/docs/SUMMARY.md
@@ -0,0 +1,90 @@
+# Table of contents
+
+* [MSK-ACCESS command-line tools](README.md)
+* [ABRA2](abra2/README.md)
+ * [v2.17](abra2/abra2_2.17.md)
+ * [v2.22](abra2/abra2_2.22.md)
+* access_utils
+ * [0.1.1](../access_utils/0.1.1/README.md)
+* [bcftools](bcftools/README.md)
+ * [bcftools concat v1.6](bcftools/bcftools_concat_1.6.md)
+ * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md)
+ * [bcftools tabix v1.15.1](bcftools/bcftools_tabix_v1.15.1.md)
+* [Bedtools](bedtools/README.md)
+ * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md)
+ * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md)
+ * [sortVcf v2.28.0\_cv2](bedtools/bedtools_sortvcf_v2.28.0_cv2.md)
+* Biometrics
+ * [extract](../biometrics_extract/README.md)
+ * [minor](../biometrics_minor/README.md)
+ * [major](../biometrics_major/README.md)
+ * [genotype](../biometrics_genotype/README.md)
+ * [sexmismatch](../biometrics_sexmismatch/README.md)
+* [Delly](delly/README.md)
+ * [delly call 0.9.1](delly/delly_call_0.9.1.md)
+* [Disambiguate](disambiguate/README.md)
+ * [v1.0.0](disambiguate/disambiguate_1.0.0.md)
+* [Fgbio](fgbio/README.md)
+ * [CallDuplexConsensusReads v1.2.0](fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md)
+ * [CollectDuplexSeqMetrics v1.2.0](fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md)
+ * [GroupReadsByUmi v1.2.0](fgbio/fgbio_group_reads_by_umi_1.2.0.md)
+ * [FastqToBam v1.2.0](fgbio/fgbio_fastq_to_bam_1.2.0.md)
+ * [FilterConsensusReads v1.2.0](fgbio/fgbio_filter_consensus_reads_1.2.0.md)
+ * [simplex\_filter v0.1.8](fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md)
+* [GATK](gatk/README.md)
+ * [ApplyBQSR v4.1.2.0](gatk/gatk_applybqsr_4.1.2.0.md)
+ * [ApplyBQSR v4.1.8.1](gatk/gatk_apply_bqsr_4.1.8.1.md)
+ * [BaseRecalibrator v4.1.2.0](gatk/gatk_baserecalibrator_4.1.2.0.md)
+ * [BaseRecalibrator v4.1.8.1](gatk/gatk_base_recalibrator_4.1.8.1.md)
+ * [MergeBamAlignment v4.1.8.0](gatk/gatk_merge_bam_alignment_4.1.8.0.md)
+ * [MergeSamFiles v4.1.8.0](gatk/gatk_merge_sam_files_4.1.8.0.md)
+ * [SamToFastq v4.1.8.0](gatk/samtofastq-v4.1.8.0.md)
+* GetBaseCountsMultiSample
+ * [1.2.5](../getbasecountsmultisample/1.2.5/README.md)
+* [Manta](manta/README.md)
+ * [Manta v1.5.1](manta/manta_1.5.1.md)
+* [Marianas](marianas/README.md)
+ * [Collapsing First Pass v1.8.1](marianas/marianas_collapsing_first_pass_1.8.1.md)
+ * [Collapsing Second Pass v1.8.1](marianas/marianas_collapsing_second_pass_1.8.1.md)
+ * [Process Loop UMI v1.8.1](marianas/marianas_process_loop_umi_1.8.1.md)
+ * [Seprate BAMs v1.8.1](marianas/marianas_separate_bams_1.8.1.md)
+* MultiQC
+ * [MultiQC v1.10.1.7](../multiqc/1.10.1.7/README.md)
+* [MuTect](mutect/README.md)
+ * [MuTect 1.1.5](mutect/mutect_1.1.5.md)
+* [Merge Fastq](merge-fastq/README.md)
+ * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md)
+* [Mosdepth](mosdepth/README.md)
+ * [0.3.3](mosdepth/mosdepth_0.3.3.md)
+* [Octopus](octopus/README.md)
+ * [v0.7.4](octopus/octopus_0.7.4.md)
+* [Picard Tools](picard-tools/README.md)
+ * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md)
+ * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md)
+ * [AddOrReplaceReadGroups v4.1.8.1](picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md)
+ * [CollectAlignmentSummaryMetrics v2.8.1](picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md)
+ * [CollectAlignmentSummaryMetrics v2.21.2](picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md)
+ * [CollectMultipleMetrics v2.8.1](picard-tools/picard_collectmultiplemetric_2.8.1.md)
+ * [CollectMultipleMetrics v2.21.2](picard-tools/picard_collectmultiplemetric_2.21.2.md)
+ * [FixMateInformation v1.96](picard-tools/picard_fix_mate_information_1.96.md)
+ * [FixMateInformation v2.21.2](picard-tools/picard_fix_mate_information_2.21.2.md)
+ * [FixMateInformation v4.1.8.1](picard-tools/picard_fix_mate_information_4.1.8.1.md)
+ * [HSmetrics v2.8.1](picard-tools/picard_hsmetrics_2.8.1.md)
+ * [HSmetrics v2.21.2](picard-tools/picard_hsmetrics_2.21.2.md)
+ * [MarkDuplicates v1.96](picard-tools/picard_mark_duplicates_1.96.md)
+ * [MarkDuplicates v2.8.1](picard-tools/picard_mark_duplicates_2.8.1.md)
+ * [MarkDuplicates v2.21.2](picard-tools/picard_mark_duplicates_2.21.2.md)
+ * [MarkDuplicates v4.1.8.1](picard-tools/picard_mark_duplicates_4.1.8.1.md)
+* [Trim Galore](trim-galore/README.md)
+ * [v0.6.2](trim-galore/trim_galore_0.6.2.md)
+* [Ubuntu utilites](ubuntu-utilites/README.md)
+ * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md)
+* [VarDictJava](vardictjava/README.md)
+ * [v1.8.2](vardictjava/vardictjava_1.8.2.md)
+* [VCF2MAF](vcf2maf/README.md)
+ * [1.6.21](vcf2maf/vcf2maf_1.6.21.md)
+* [Waltz](waltz/README.md)
+ * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md)
+ * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md)
+
+
diff --git a/docs/abra2/README.md b/docs/abra2/README.md
new file mode 100644
index 00000000..5cd595c7
--- /dev/null
+++ b/docs/abra2/README.md
@@ -0,0 +1,2 @@
+# ABRA2
+
diff --git a/abra2_2.17/README.md b/docs/abra2/abra2_2.17.md
similarity index 73%
rename from abra2_2.17/README.md
rename to docs/abra2/abra2_2.17.md
index 89818e32..e2f9457b 100644
--- a/abra2_2.17/README.md
+++ b/docs/abra2/abra2_2.17.md
@@ -1,25 +1,25 @@
-# CWL and Dockerfile for running ABRA2
+# v2.17
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| openjdk | 8 | - |
-| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk | 8 | - |
+| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) |
-[](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/abra2:0.1.0) [](https://microbadger.com/images/mskcc/abra2:0.1.0) [](https://microbadger.com/images/mskcc/abra2:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner abra2_2.17.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command:**
```bash
#Using CWLTOOL
@@ -30,9 +30,9 @@
> toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr &
```
-### Usage
+### Usage
-```
+```text
usage: abra2_2.17.cwl [-h]
positional arguments:
@@ -85,4 +85,5 @@ optional arguments:
VCF containing known (or suspected) variant sites.
Very large files should be avoided.
--no_sort Do not attempt to sort final output
- ```
\ No newline at end of file
+```
+
diff --git a/docs/abra2/abra2_2.22.md b/docs/abra2/abra2_2.22.md
new file mode 100644
index 00000000..c23aaada
--- /dev/null
+++ b/docs/abra2/abra2_2.22.md
@@ -0,0 +1,21 @@
+# v2.22
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk | 8 | - |
+| ABRA2 | 2.22 | [https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar](https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar) |
+
+[](https://microbadger.com/images/mskcc/abra2:0.2.0) [](https://microbadger.com/images/mskcc/abra2:0.2.0) [](https://microbadger.com/images/mskcc/abra2:0.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml
+```
+
diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md
new file mode 100644
index 00000000..a08fa464
--- /dev/null
+++ b/docs/bcftools/README.md
@@ -0,0 +1 @@
+# BCFTOOLS
diff --git a/docs/bcftools/bcftools_bgzip_v1.15.1.md b/docs/bcftools/bcftools_bgzip_v1.15.1.md
new file mode 100644
index 00000000..5ff81c8f
--- /dev/null
+++ b/docs/bcftools/bcftools_bgzip_v1.15.1.md
@@ -0,0 +1,43 @@
+## CWL and Docker for Running bgzip using bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| --------------------- | ------- | ------------------------------------- |
+| alpine:3.8 base image | 3.8 | - |
+| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io/):
+
+```
+toil-cwl-runner bcftools_bgzip_1.15.1.cwl example_input_bgzip.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```shell
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: bcftools_bgzip_1.15.1.cwl [-h] --input INPUT
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/bcftools/bcftools_concat_1.6.md b/docs/bcftools/bcftools_concat_1.6.md
new file mode 100644
index 00000000..eabc4ff7
--- /dev/null
+++ b/docs/bcftools/bcftools_concat_1.6.md
@@ -0,0 +1,69 @@
+# CWL and Dockerfile for running bcftools v1.6
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 |
+| bcftools | 1.6 | https://github.com/samtools/bcftools/archive/1.6.zip |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir bcftools_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --allow_overlaps First coordinate of the next file can precede last
+ record of the current file.
+ --compact_PS Do not output PS tag at each site, only at the start
+ of a new phase set block.
+ --ligate Ligate phased VCFs by matching phase at overlapping
+ haplotypes
+ --list LIST Read the list of files from a file.
+ --min_PQ MIN_PQ Break phase set if phasing quality is lower than
+ [30]
+ --naive Concatenate BCF files without recompression
+ (dangerous, use with caution)
+ --no_version do not append version and command line to the header
+ --output OUTPUT Write output to a file [standard output]
+ --output_type OUTPUT_TYPE
+ b - compressed BCF, u - uncompressed BCF, z
+ - compressed VCF, v - uncompressed VCF [v]
+ --regions REGIONS Restrict to comma-separated list of regions
+ --regions_file REGIONS_FILE
+ Restrict to regions listed in a file
+ --remove_duplicates Alias for -d none
+ --rm_dups RM_DUPS Output duplicate records present in multiple
+ files only once -
+ --threads THREADS Number of extra output compression threads [0]
+ --vcf_files_csi VCF_FILES_CSI
+ Array of vcf files to be concatenated into one vcf
+ --vcf_files_tbi VCF_FILES_TBI
+ Array of vcf files to be concatenated into one vcf
+
+```
diff --git a/docs/bcftools/bcftools_tabix_v1.15.1.md b/docs/bcftools/bcftools_tabix_v1.15.1.md
new file mode 100644
index 00000000..e2adfbc6
--- /dev/null
+++ b/docs/bcftools/bcftools_tabix_v1.15.1.md
@@ -0,0 +1,43 @@
+## CWL and Docker for Running tabs using bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| --------------------- | ------- | ------------------------------------- |
+| alpine:3.8 base image | 3.8 | - |
+| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io/):
+
+```
+toil-cwl-runner bcftools_tabix_1.15.1.cwl example_input_tabix.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```shell
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: bcftools_tabix_1.15.1.cwl [-h] --input INPUT [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/bedtools/README.md b/docs/bedtools/README.md
new file mode 100644
index 00000000..13a0e51c
--- /dev/null
+++ b/docs/bedtools/README.md
@@ -0,0 +1,2 @@
+# Bedtools
+
diff --git a/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md
new file mode 100644
index 00000000..da41c5b2
--- /dev/null
+++ b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# genomecov v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help
+
+usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format
+
diff --git a/docs/bedtools/bedtools_merge_v2.28.0_cv2.md b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md
new file mode 100644
index 00000000..507a8994
--- /dev/null
+++ b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# merge v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help
+
+usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged.
+
diff --git a/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md
new file mode 100644
index 00000000..7b76d73b
--- /dev/null
+++ b/docs/bedtools/bedtools_sortvcf_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# SortVCF v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :------- | :----------- | :----------------------------------------------------------- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+toil-cwl-runner bedtools_sortbed_vcf.cwl example_input.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+cwltool --singularity --non-strict bedtools_sortbed_vcf.cwl inputs.yaml
+
+#Using toil-cwl-runner
+mkdir run_directory
+toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_sortbed_vcf.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+```shell
+Usage: bedtools_sortbed_vcf.cwl [-h] --input INPUT [job_order]
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/delly/README.md b/docs/delly/README.md
new file mode 100644
index 00000000..3cf4e541
--- /dev/null
+++ b/docs/delly/README.md
@@ -0,0 +1,2 @@
+# Delly
+
diff --git a/docs/delly/delly_call_0.9.1.md b/docs/delly/delly_call_0.9.1.md
new file mode 100644
index 00000000..08dcf390
--- /dev/null
+++ b/docs/delly/delly_call_0.9.1.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 0.9.1 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/docs/disambiguate/README.md b/docs/disambiguate/README.md
new file mode 100644
index 00000000..4850c2fd
--- /dev/null
+++ b/docs/disambiguate/README.md
@@ -0,0 +1,2 @@
+# Disambiguate
+
diff --git a/docs/disambiguate/disambiguate_1.0.0.md b/docs/disambiguate/disambiguate_1.0.0.md
new file mode 100644
index 00000000..8e065892
--- /dev/null
+++ b/docs/disambiguate/disambiguate_1.0.0.md
@@ -0,0 +1,53 @@
+# v1.0.0
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`.
+
+| Tool | Version | Location | Notes |
+| :--- | :--- | :--- | :--- |
+| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub |
+| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - |
+| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - |
+
+[](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [](https://microbadger.com/images/mskcc/disambiguate:1.0.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use `example_inputs.yaml` to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml
+```
+
+## Command
+
+```text
+USAGE:
+
+ cwltool disambiguate_1.0.0.cwl \
+ --prefix \
+ --output_dir \
+ [--aligner ] \
+
+
+Where:
+
+ --prefix
+ (required) Sample ID or name used as prefix. Do not include .bam
+
+ --output_dir
+ (required) Output directory
+
+ --aligner
+ Aligner option {bwa(default),tophat,hisat2,star}
+
+
+ (required) Species A BAM file
+
+
+ (required) Species B BAM file
+```
+
diff --git a/docs/fgbio/README.md b/docs/fgbio/README.md
new file mode 100644
index 00000000..f1d1ca8d
--- /dev/null
+++ b/docs/fgbio/README.md
@@ -0,0 +1,2 @@
+# Fgbio
+
diff --git a/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md
new file mode 100644
index 00000000..c8d3d73e
--- /dev/null
+++ b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md
@@ -0,0 +1,79 @@
+# CallDuplexConsensusReads v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--read_name_prefix READ_NAME_PREFIX]
+ [--read_group_id READ_GROUP_ID]
+ [--error_rate_pre_umi ERROR_RATE_PRE_UMI]
+ [--error_rate_post_umi ERROR_RATE_POST_UMI]
+ [--min_input_base_quality MIN_INPUT_BASE_QUALITY]
+ [--trim]
+ [--sort_order SORT_ORDER]
+ [--min_reads MIN_READS]
+ [--max_reads_per_strand MAX_READS_PER_STRAND]
+ [--threads THREADS]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input SAM or BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ Output SAM or BAM file to write consensus reads.
+ --read_name_prefix READ_NAME_PREFIX
+ The prefix all consensus read names
+ --read_group_id READ_GROUP_ID
+ The new read group ID for all the consensus reads.
+ --error_rate_pre_umi ERROR_RATE_PRE_UMI
+ The Phred-scaled error rate for an error prior to the
+ UMIs being integrated.
+ --error_rate_post_umi ERROR_RATE_POST_UMI
+ The Phred-scaled error rate for an error post the UMIs
+ have been integrated.
+ --min_input_base_quality MIN_INPUT_BASE_QUALITY
+ Ignore bases in raw reads that have Q below this
+ value.
+ --trim If true, quality trim input reads in addition to
+ masking low Q bases
+ --sort_order SORT_ORDER
+ The sort order of the output, if :none: then the same
+ as the input.
+ --min_reads MIN_READS
+ The minimum number of input reads to a consensus read.
+ --max_reads_per_strand MAX_READS_PER_STRAND
+ The maximum number of reads to use when building a
+ single-strand consensus. If more than this many reads
+ are present in a tag family, the family is randomly
+ downsampled to exactly max-reads reads.
+```
+
diff --git a/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md
new file mode 100644
index 00000000..78812a55
--- /dev/null
+++ b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md
@@ -0,0 +1,62 @@
+# CollectDuplexSeqMetrics v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_collect_duplex_seq_metrics_1.2.0.cwl
+ [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_prefix
+ OUTPUT_PREFIX [--intervals INTERVALS] [--description DESCRIPTION]
+ [--duplex_umi_counts DUPLEX_UMI_COUNTS] [--min_ab_reads MIN_AB_READS]
+ [--min_ba_reads MIN_BA_READS] [--umi_tag UMI_TAG] [--mi_tag MI_TAG]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input BAM file generated by GroupReadByUmi.
+ --output_prefix OUTPUT_PREFIX
+ Prefix of output files to write.
+ --intervals INTERVALS
+ Optional set of intervals over which to restrict
+ analysis. [Optional].
+ --description DESCRIPTION
+ Description of data set used to label plots. Defaults
+ to sample/library. [Optional].
+ --duplex_umi_counts DUPLEX_UMI_COUNTS
+ If true, produce the .duplex_umi_counts.txt file with
+ counts of duplex UMI observations. [Optional].
+ --min_ab_reads MIN_AB_READS
+ Minimum AB reads to call a tag family a 'duplex'.
+ [Optional].
+ --min_ba_reads MIN_BA_READS
+ Minimum BA reads to call a tag family a 'duplex'.
+ [Optional].
+ --umi_tag UMI_TAG The tag containing the raw UMI. [Optional].
+ --mi_tag MI_TAG The output tag for UMI grouping. [Optional].
+```
+
diff --git a/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md
new file mode 100644
index 00000000..3d4ede7f
--- /dev/null
+++ b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md
@@ -0,0 +1,82 @@
+# FastqToBam v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--read-structures READ_STRUCTURES]
+ [--sort] [--umi-tag UMI_TAG]
+ [--read-group-id READ_GROUP_ID]
+ [--sample SAMPLE] [--library LIBRARY]
+ [--platform PLATFORM]
+ [--platform-unit PLATFORM_UNIT]
+ [--platform-model PLATFORM_MODEL]
+ [--sequencing-center SEQUENCING_CENTER]
+ [--predicted-insert-size PREDICTED_INSERT_SIZE]
+ [--description DESCRIPTION]
+ [--comment COMMENT] [--run-date RUN_DATE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Fastq files corresponding to each sequencing read
+ (e.g. R1, I1, etc.).
+ --output_file_name OUTPUT_FILE_NAME
+ The output SAM or BAM file to be written.
+ --read-structures READ_STRUCTURES
+ Read structures, one for each of the FASTQs.
+ https://github.com/fulcrumgenomics/fgbio/wiki/Read-
+ Structures
+ --sort If true, queryname sort the BAM file, otherwise
+ preserve input order.
+ --umi-tag UMI_TAG Tag in which to store molecular barcodes/UMIs
+ --read-group-id READ_GROUP_ID
+ Read group ID to use in the file header.
+ --sample SAMPLE The name of the sequenced sample.
+ --library LIBRARY The name/ID of the sequenced library.
+ --platform PLATFORM Sequencing Platform
+ --platform-unit PLATFORM_UNIT
+ Platform unit (e.g. ‘..')
+ --platform-model PLATFORM_MODEL
+ Platform model to insert into the group header (ex.
+ miseq, hiseq2500, hiseqX)
+ --sequencing-center SEQUENCING_CENTER
+ The sequencing center from which the data originated
+ --predicted-insert-size PREDICTED_INSERT_SIZE
+ Predicted median insert size, to insert into the read
+ group header
+ --description DESCRIPTION
+ Description of the read group.
+ --comment COMMENT Comment(s) to include in the output file’s header
+ --run-date RUN_DATE Date the run was produced, to insert into the read
+ group header
+```
+
diff --git a/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md
new file mode 100644
index 00000000..470166a1
--- /dev/null
+++ b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md
@@ -0,0 +1,80 @@
+# FilterConsensusReads v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ --reference_fasta
+ REFERENCE_FASTA
+ [--reverse_per_base_tags]
+ [--min_reads MIN_READS]
+ [--max_read_error_rate MAX_READ_ERROR_RATE]
+ [--max_base_error_rate MAX_BASE_ERROR_RATE]
+ [--min_base_quality MIN_BASE_QUALITY]
+ [--max_no_call_fraction MAX_NO_CALL_FRACTION]
+ [--min_mean_base_quality MIN_MEAN_BASE_QUALITY]
+ [--require_single_strand_agreement]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input SAM or BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ Output SAM or BAM file to write consensus reads.
+ --reference_fasta REFERENCE_FASTA
+ Reference fasta file.
+ --reverse_per_base_tags
+ Reverse [complement] per base tags on reverse strand
+ reads.
+ --min_reads MIN_READS
+ The minimum number of reads supporting a consensus
+ base/read. (Max 3 values)
+ --max_read_error_rate MAX_READ_ERROR_RATE
+ The maximum raw-read error rate across the entire
+ consensus read. (Max 3 values)
+ --max_base_error_rate MAX_BASE_ERROR_RATE
+ The maximum error rate for a single consensus base.
+ (Max 3 values)
+ --min_base_quality MIN_BASE_QUALITY
+ Mask (make N) consensus bases with quality less than
+ this threshold.
+ --max_no_call_fraction MAX_NO_CALL_FRACTION
+ Maximum fraction of no-calls in the read after
+ filtering
+ --min_mean_base_quality MIN_MEAN_BASE_QUALITY
+ The minimum mean base quality across the consensus
+ read
+ --require_single_strand_agreement
+ Mask (make N) consensus bases where the AB and BA
+ consensus reads disagree (for duplex-sequencing only).
+```
+
diff --git a/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md
new file mode 100644
index 00000000..87ad2aaa
--- /dev/null
+++ b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md
@@ -0,0 +1,68 @@
+# GroupReadsByUmi v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--family_size_histogram FAMILY_SIZE_HISTOGRAM]
+ [--raw_tag RAW_TAG]
+ [--assign_tag ASSIGN_TAG]
+ [--min_map_q MIN_MAP_Q]
+ [--include_non_pf_reads]
+ --strategy STRATEGY
+ [--edits EDITS]
+ [--min_umi_length MIN_UMI_LENGTH]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ The output SAM or BAM file to be written.
+ --family_size_histogram FAMILY_SIZE_HISTOGRAM
+ Optional output of tag family size counts.
+ --raw_tag RAW_TAG The tag containing the raw UMI.
+ --assign_tag ASSIGN_TAG
+ The output tag for UMI grouping.
+ --min_map_q MIN_MAP_Q
+ Minimum mapping quality.
+ --include_non_pf_reads
+ --strategy STRATEGY The UMI assignment strategy.
+ (identity,edit,adjacency,paired)
+ --edits EDITS The allowable number of edits between UMIs.
+ --min_umi_length MIN_UMI_LENGTH
+ The minimum UMI length. If not specified then all UMIs
+ must have the same length, otherwise discard reads
+ with UMIs shorter than this length and allow for
+ differing UMI lengths.
+```
+
diff --git a/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md
new file mode 100644
index 00000000..b12b00a3
--- /dev/null
+++ b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md
@@ -0,0 +1,44 @@
+# simplex\_filter v0.1.8
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio\_postprocessing | 0.1.8 | [https://github.com/msk-access/fgbio\_postprocessing](https://github.com/msk-access/fgbio_postprocessing) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl
+ [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM
+ [--output_file_name OUTPUT_FILE_NAME]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input_bam INPUT_BAM
+ Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+```
+
diff --git a/docs/gatk/README.md b/docs/gatk/README.md
new file mode 100644
index 00000000..ebad94ae
--- /dev/null
+++ b/docs/gatk/README.md
@@ -0,0 +1,2 @@
+# GATK
+
diff --git a/docs/gatk/gatk_apply_bqsr_4.1.8.1.md b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md
new file mode 100644
index 00000000..c919c1f4
--- /dev/null
+++ b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md
@@ -0,0 +1,43 @@
+# ApplyBQSR v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_apply\_bqsr\_4.1.8.1.cwl --help
+
+usage: gatk\_apply\_bqsr\_4.1.8.1.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS
+
diff --git a/docs/gatk/gatk_applybqsr_4.1.2.0.md b/docs/gatk/gatk_applybqsr_4.1.2.0.md
new file mode 100644
index 00000000..709855a1
--- /dev/null
+++ b/docs/gatk/gatk_applybqsr_4.1.2.0.md
@@ -0,0 +1,43 @@
+# ApplyBQSR v4.1.2.0
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help
+
+usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS
+
diff --git a/docs/gatk/gatk_base_recalibrator_4.1.8.1.md b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md
new file mode 100644
index 00000000..9b90a39f
--- /dev/null
+++ b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md
@@ -0,0 +1,43 @@
+# BaseRecalibrator v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_base\_recalibrator\_4.1.8.1.cwl --help
+
+usage: gatk\_base\_recalibrator\_4.1.8.1.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2
+
diff --git a/docs/gatk/gatk_baserecalibrator_4.1.2.0.md b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md
new file mode 100644
index 00000000..41f341b4
--- /dev/null
+++ b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md
@@ -0,0 +1,43 @@
+# BaseRecalibrator v4.1.2.0
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help
+
+usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2
+
diff --git a/docs/gatk/gatk_downsamplesam_4.1.8.1.md b/docs/gatk/gatk_downsamplesam_4.1.8.1.md
new file mode 100644
index 00000000..b0f093d9
--- /dev/null
+++ b/docs/gatk/gatk_downsamplesam_4.1.8.1.md
@@ -0,0 +1,113 @@
+# ApplyBQSR v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_downsamplesam_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_downsamplesam_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl --help
+
+usage: gatk_downsamplesam_4.1.8.1.cwl [-h] --input INPUT --reference REFERENCE
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--output_file_name_metrics OUTPUT_FILE_NAME_METRICS]
+ [--probability PROBABILITY]
+ [--random_seed RANDOM_SEED]
+ [--strategy STRATEGY]
+ [--arguments_file ARGUMENTS_FILE]
+ [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER]
+ [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER]
+ [--create_output_bam_index]
+ [--create_output_bam_md5]
+ [--disable_bam_index_caching]
+ [--disable_read_filter DISABLE_READ_FILTER]
+ [--disable_sequence_dictionary_validation]
+ [--exclude_intervals EXCLUDE_INTERVALS]
+ [--gatk_config_file GATK_CONFIG_FILE]
+ [--gcs_max_retries GCS_MAX_RETRIES]
+ [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS]
+ [--QUIET] [--read_filter READ_FILTER]
+ [--read_index READ_INDEX]
+ [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES]
+ [--lenient]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--temporary_directory TEMPORARY_DIRECTORY]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT BAM/SAM file containing reads
+ --reference REFERENCE
+ Reference sequence file
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name. Not Required
+ --output_file_name_metrics OUTPUT_FILE_NAME_METRICS
+ Output file name for metrics file. Not Required
+ --probability PROBABILITY
+ The probability of keeping any individual read,
+ between 0 and 1.
+ --random_seed RANDOM_SEED
+ Random seed used for deterministic results. Setting to
+ null will cause multiple invocations to produce
+ different results.
+ --strategy STRATEGY The --STRATEGY argument is an enumerated type
+ (Strategy), which can have one of the following
+ values: HighAccuracy ConstantMemory Chained default
+ Strategy ConstantMemory
+ --arguments_file ARGUMENTS_FILE
+ --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER
+ --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER
+ --create_output_bam_index
+ --create_output_bam_md5
+ --disable_bam_index_caching
+ --disable_read_filter DISABLE_READ_FILTER
+ Read filters to be disabled before analysis
+ --disable_sequence_dictionary_validation
+ --exclude_intervals EXCLUDE_INTERVALS
+ --gatk_config_file GATK_CONFIG_FILE
+ --gcs_max_retries GCS_MAX_RETRIES
+ --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS
+ --QUIET
+ --read_filter READ_FILTER
+ --read_index READ_INDEX
+ --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES
+ --lenient
+ --number_of_threads NUMBER_OF_THREADS
+ --memory_per_job MEMORY_PER_JOB
+ --memory_overhead MEMORY_OVERHEAD
+ --temporary_directory TEMPORARY_DIRECTORY
+ Default value: null.
+
+
diff --git a/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md
new file mode 100644
index 00000000..8a593757
--- /dev/null
+++ b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md
@@ -0,0 +1,245 @@
+# MergeBamAlignment v4.1.8.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: gatk_merge_bam_alignment_4.1.8.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --unmapped_bam UNMAPPED_BAM
+ --reference REFERENCE
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--add_mate_cigar]
+ [--add_pg_tag_to_reads]
+ [--aligned_bam ALIGNED_BAM]
+ [--aligned_reads_only]
+ [--aligner_proper_pair_flags]
+ [--attributes_to_remove ATTRIBUTES_TO_REMOVE]
+ [--attributes_to_retain ATTRIBUTES_TO_RETAIN]
+ [--attributes_to_reverse ATTRIBUTES_TO_REVERSE]
+ [--attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT]
+ [--clip_adapters]
+ [--clip_overlapping_reads]
+ [--expected_orientations EXPECTED_ORIENTATIONS]
+ [--hard_clip_overlapping_reads]
+ [--include_secondary_alignments]
+ [--is_bisulfite_sequence]
+ [--jump_size JUMP_SIZE]
+ [--matching_dictionary_tags MATCHING_DICTIONARY_TAGS]
+ [--max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS]
+ [--min_unclipped_bases MIN_UNCLIPPED_BASES]
+ [--paired_run]
+ [--primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY]
+ [--read1_aligned_bam READ1_ALIGNED_BAM]
+ [--read1_trim READ1_TRIM]
+ [--read2_aligned_bam READ2_ALIGNED_BAM]
+ [--read2_trim READ2_TRIM]
+ [--sort_order SORT_ORDER]
+ [--unmap_contaminant_reads]
+ [--unmapped_read_strategy UNMAPPED_READ_STRATEGY]
+ [--validation_stringency VALIDATION_STRINGENCY]
+ [--create_index]
+ [--create_md5_file]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --unmapped_bam UNMAPPED_BAM
+ Original SAM or BAM file of unmapped reads, which must
+ be in queryname order. Reads MUST be unmapped.
+ Required.
+ --reference REFERENCE
+ Reference sequence file. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Merged SAM or BAM file to write to. Required.
+ --add_mate_cigar Adds the mate CIGAR tag (MC) if true, does not if
+ false. Default value: true. Possible values: {true,
+ false}
+ --add_pg_tag_to_reads
+ Add PG tag to each read in a SAM or BAM Default value:
+ true. Possible values: {true, false}
+ --aligned_bam ALIGNED_BAM
+ SAM or BAM file(s) with alignment data. This argument
+ may be specified 0 or more times. Default value: null.
+ Cannot be used in conjunction with argument(s)
+ READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM
+ (R2_ALIGNED)
+ --aligned_reads_only Whether to output only aligned reads. Default value:
+ false. Possible values: {true, false}
+ --aligner_proper_pair_flags
+ Use the aligners idea of what a proper pair is rather
+ than computing in this program. Default value: false.
+ Possible values: {true, false}
+ --attributes_to_remove ATTRIBUTES_TO_REMOVE
+ Attributes from the alignment record that should be
+ removed when merging. This overrides
+ ATTRIBUTES_TO_RETAIN if they share common tags. This
+ argument may be specified 0 or more times. Default
+ value: null.
+ --attributes_to_retain ATTRIBUTES_TO_RETAIN
+ Reserved alignment attributes (tags starting with X,
+ Y, or Z) that should be brought over from the
+ alignment data when merging. This argument may be
+ specified 0 or more times. Default value: null.
+ --attributes_to_reverse ATTRIBUTES_TO_REVERSE
+ Attributes on negative strand reads that need to be
+ reversed. This argument may be specified 0 or more
+ times. Default value: [OQ, U2].
+ --attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT
+ Attributes on negative strand reads that need to be
+ reverse complemented. This argument may be specified 0
+ or more times. Default value: [E2, SQ].
+ --clip_adapters Whether to clip adapters where identified. Default
+ value: true. Possible values: {true, false}
+ --clip_overlapping_reads
+ For paired reads, clip the 3' end of each read if
+ necessary so that it does not extend past the 5' end
+ of its mate. Clipping will be either soft or hard
+ clipping, depending on CLIP_OVERLAPPING_READS_OPERATOR
+ setting. Hard clipped bases and their qualities will
+ be stored in the XB and XQ tags respectively. Default
+ value: true. Possible values: {true, false}
+ --expected_orientations EXPECTED_ORIENTATIONS
+ The expected orientation of proper read pairs.
+ Replaces JUMP_SIZE This argument may be specified 0 or
+ more times. Default value: null. Possible values: {FR,
+ RF, TANDEM} Cannot be used in conjunction with
+ argument(s) JUMP_SIZE (JUMP)
+ --hard_clip_overlapping_reads
+ If true, hard clipping will be applied to overlapping
+ reads. By default, soft clipping is used. Default
+ value: false. Possible values: {true, false}
+ --include_secondary_alignments
+ If false, do not write secondary alignments to output.
+ Default value: true. Possible values: {true, false}
+ --is_bisulfite_sequence
+ Whether the lane is bisulfite sequence (used when
+ calculating the NM tag). Default value: false.
+ Possible values: {true, false}
+ --jump_size JUMP_SIZE
+ The expected jump size (required if this is a jumping
+ library). Deprecated. Use EXPECTED_ORIENTATIONS
+ instead Default value: null. Cannot be used in
+ conjunction with argument(s) EXPECTED_ORIENTATIONS
+ (ORIENTATIONS)
+ --matching_dictionary_tags MATCHING_DICTIONARY_TAGS
+ List of Sequence Records tags that must be equal (if
+ present) in the reference dictionary and in the
+ aligned file. Mismatching tags will cause an error if
+ in this list, and a warning otherwise. This argument
+ may be specified 0 or more times. Default value: [M5,
+ LN].
+ --max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS
+ The maximum number of insertions or deletions
+ permitted for an alignment to be included. Alignments
+ with more than this many insertions or deletions will
+ be ignored. Set to -1 to allow any number of
+ insertions or deletions. Default value: 1.
+ --min_unclipped_bases MIN_UNCLIPPED_BASES
+ If UNMAP_CONTAMINANT_READS is set, require this many
+ unclipped bases or else the read will be marked as
+ contaminant. Default value: 32.
+ --paired_run DEPRECATED. This argument is ignored and will be
+ removed. Default value: true. Possible values: {true,
+ false}
+ --primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY
+ Strategy for selecting primary alignment when the
+ aligner has provided more than one alignment for a
+ pair or fragment, and none are marked as primary, more
+ than one is marked as primary, or the primary
+ alignment is filtered out for some reason. For all
+ strategies, ties are resolved arbitrarily. Default
+ value: BestMapq. BestMapq (Expects that multiple
+ alignments will be correlated with HI tag, and prefers
+ the pair of alignments with the largest MAPQ, in the
+ absence of a primary selected by the aligner.)
+ EarliestFragment (Prefers the alignment which maps the
+ earliest base in the read. Note that EarliestFragment
+ may not be used for paired reads.) BestEndMapq
+ (Appropriate for cases in which the aligner is not
+ pair-aware, and does not output the HI tag. It simply
+ picks the alignment for each end with the highest
+ MAPQ, and makes those alignments primary, regardless
+ of whether the two alignments make sense together.)
+ MostDistant (Appropriate for a non-pair-aware aligner.
+ Picks the alignment pair with the largest insert size.
+ If all alignments would be chimeric, it picks the
+ alignments for each end with the best MAPQ.)
+ --read1_aligned_bam READ1_ALIGNED_BAM
+ SAM or BAM file(s) with alignment data from the first
+ read of a pair. This argument may be specified 0 or
+ more times. Default value: null. Cannot be used in
+ conjunction with argument(s) ALIGNED_BAM (ALIGNED)
+ --read1_trim READ1_TRIM
+ The number of bases trimmed from the beginning of read
+ 1 prior to alignment Default value: 0.
+ --read2_aligned_bam READ2_ALIGNED_BAM
+ SAM or BAM file(s) with alignment data from the second
+ read of a pair. This argument may be specified 0 or
+ more times. Default value: null. Cannot be used in
+ conjunction with argument(s) ALIGNED_BAM (ALIGNED)
+ --read2_trim READ2_TRIM
+ The number of bases trimmed from the beginning of read
+ 2 prior to alignment Default value: 0.
+ --sort_order SORT_ORDER
+ The order in which the merged reads should be output.
+ Default value: coordinate. Possible values: {unsorted,
+ queryname, coordinate, duplicate, unknown}
+ --unmap_contaminant_reads
+ Detect reads originating from foreign organisms (e.g.
+ bacterial DNA in a non-bacterial sample),and unmap +
+ label those reads accordingly. Default value: false.
+ Possible values: {true, false}
+ --unmapped_read_strategy UNMAPPED_READ_STRATEGY
+ How to deal with alignment information in reads that
+ are being unmapped (e.g. due to cross-species
+ contamination.) Currently ignored unless
+ UNMAP_CONTAMINANT_READS = true. Note that the
+ DO_NOT_CHANGE strategy will actually reset the cigar
+ and set the mapping quality on unmapped reads since
+ otherwisethe result will be an invalid record. To
+ force no change use the DO_NOT_CHANGE_INVALID
+ strategy. Default value: DO_NOT_CHANGE. Possible
+ values: {COPY_TO_TAG, DO_NOT_CHANGE,
+ DO_NOT_CHANGE_INVALID, MOVE_TO_TAG}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --create_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value: false.
+ Possible values: {true, false}
+ --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ
+ files created. Default value: false. Possible values:
+ {true, false}
+```
+
diff --git a/docs/gatk/gatk_merge_sam_files_4.1.8.0.md b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md
new file mode 100644
index 00000000..0602f9be
--- /dev/null
+++ b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md
@@ -0,0 +1,98 @@
+# MergeSamFiles v4.1.8.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--assume_sorted] [--comment COMMENT]
+ [--create_index] [--create_md5_file]
+ [--intervals INTERVALS]
+ [--merge_sequence_dictionaries]
+ [--reference_sequence REFERENCE_SEQUENCE]
+ [--sort_order SORT_ORDER]
+ [--use_threading]
+ [--validation_stringency VALIDATION_STRINGENCY]
+ [--verbosity VERBOSITY]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT SAM or BAM input file This argument must be specified
+ at least once. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ SAM or BAM file to write merged result to Required.
+ --assume_sorted If true, assume that the input files are in the same
+ sort order as the requested output sort order, even if
+ their headers say otherwise. Default value: false.
+ Possible values: {true, false}
+ --comment COMMENT Comment(s) to include in the merged output files
+ header. This argument may be specified 0 or more
+ times. Default value: null.
+ --create_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value: false.
+ Possible values: {true, false}
+ --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ
+ files created. Default value: false. Possible values:
+ {true, false}
+ --intervals INTERVALS
+ An interval list file that contains the locations of
+ the positions to merge. Assume bam are sorted and
+ indexed. The resulting file will contain alignments
+ that may overlap with genomic regions outside the
+ requested region. Unmapped reads are discarded.
+ Default value: null.
+ --merge_sequence_dictionaries
+ Merge the sequence dictionaries Default value: false.
+ Possible values: {true, false}
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Default value: null.
+ --sort_order SORT_ORDER
+ Sort order of output file Default value: coordinate.
+ Possible values: {unsorted, queryname, coordinate,
+ duplicate, unknown}
+ --use_threading Option to create a background thread to encode,
+ compress and write to disk the output file. The
+ threaded version uses about 20% more CPU and decreases
+ runtime by ~20% when writing out a compressed BAM
+ file. Default value: false. Possible values: {true,
+ false}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ Possible values: {STRICT, LENIENT, SILENT}
+ --verbosity VERBOSITY
+ Control verbosity of logging. Default value: INFO.
+ Possible values: {ERROR, WARNING, INFO, DEBUG}
+```
+
diff --git a/docs/gatk/samtofastq-v4.1.8.0.md b/docs/gatk/samtofastq-v4.1.8.0.md
new file mode 100644
index 00000000..b2b44b12
--- /dev/null
+++ b/docs/gatk/samtofastq-v4.1.8.0.md
@@ -0,0 +1,2 @@
+# SamToFastq v4.1.8.0
+
diff --git a/docs/manta/README.md b/docs/manta/README.md
new file mode 100644
index 00000000..1849796d
--- /dev/null
+++ b/docs/manta/README.md
@@ -0,0 +1,2 @@
+# Manta
+
diff --git a/docs/manta/manta_1.5.1.md b/docs/manta/manta_1.5.1.md
new file mode 100644
index 00000000..9b285335
--- /dev/null
+++ b/docs/manta/manta_1.5.1.md
@@ -0,0 +1,70 @@
+# Manta v1.5.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| ubuntu base image | 16.04 | - |
+| manta | 1.5.1 | [https://github.com/Illumina/manta/releases/download/](https://github.com/Illumina/manta/releases/download/) |
+| samtools | 1.9 | [https://github.com/samtools/samtools/releases/download/](https://github.com/samtools/samtools/releases/download/) |
+| htslib | 1.9 | "[https://github.com/samtools/htslib/releases/download/](https://github.com/samtools/htslib/releases/download/) |
+
+[](https://microbadger.com/images/mskaccess/manta:0.0.2)[](https://microbadger.com/images/mskaccess/manta:0.0.2) [](https://microbadger.com/images/mskaccess/manta:0.0.2) [](https://microbadger.com/images/mskaccess/manta:0.0.2)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/manta_1.5.1/manta_1.51.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir toil_log
+> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/manta.1.5.1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner manta_1.5.1.cwl --help
+usage: manta_1.5.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --call_regions CALL_REGIONS
+ bgzip-compressed, tabix-indexed BED file specifiying
+ regions to which variant analysis will be restricted
+ --non_wgs toggles on settings for WES
+ --normal_bam NORMAL_BAM
+ Normal sample BAM or CRAM file. May be specified more
+ than once, multiple inputs will be treated as each BAM
+ file representing a different sample. [optional] (no
+ default)
+ --output_contigs if true, outputs assembled contig sequences in final
+ VCF files, in the INFO field CONTIG
+ --reference_fasta REFERENCE_FASTA
+ samtools-indexed reference fasta file [required]
+ --tumor_bam TUMOR_BAM
+ Tumor sample BAM or CRAM file. Only up to one tumor
+ bam file accepted.
+ --generateEvidenceBam
+ Generate a bam of supporting reads for all SVs
+```
+
diff --git a/docs/marianas/README.md b/docs/marianas/README.md
new file mode 100644
index 00000000..67939d60
--- /dev/null
+++ b/docs/marianas/README.md
@@ -0,0 +1,2 @@
+# Marianas
+
diff --git a/docs/marianas/marianas_collapsing_first_pass_1.8.1.md b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md
new file mode 100644
index 00000000..f0b24afe
--- /dev/null
+++ b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md
@@ -0,0 +1,19 @@
+# Collapsing First Pass v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml
+```
+
diff --git a/docs/marianas/marianas_collapsing_second_pass_1.8.1.md b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md
new file mode 100644
index 00000000..7117bec5
--- /dev/null
+++ b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md
@@ -0,0 +1,19 @@
+# Collapsing Second Pass v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml
+```
+
diff --git a/docs/marianas/marianas_process_loop_umi_1.8.1.md b/docs/marianas/marianas_process_loop_umi_1.8.1.md
new file mode 100644
index 00000000..7c1efc78
--- /dev/null
+++ b/docs/marianas/marianas_process_loop_umi_1.8.1.md
@@ -0,0 +1,19 @@
+# Process Loop UMI v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml
+```
+
diff --git a/docs/marianas/marianas_separate_bams_1.8.1.md b/docs/marianas/marianas_separate_bams_1.8.1.md
new file mode 100644
index 00000000..232c89da
--- /dev/null
+++ b/docs/marianas/marianas_separate_bams_1.8.1.md
@@ -0,0 +1,33 @@
+# Seprate BAMs v1.8.1
+
+## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl
+ [-h] --input_bam INPUT_BAM [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input_bam INPUT_BAM
+```
+
diff --git a/docs/merge-fastq/README.md b/docs/merge-fastq/README.md
new file mode 100644
index 00000000..19544a3d
--- /dev/null
+++ b/docs/merge-fastq/README.md
@@ -0,0 +1,2 @@
+# Merge Fastq
+
diff --git a/docs/merge-fastq/merge_fastq_0.1.7.md b/docs/merge-fastq/merge_fastq_0.1.7.md
new file mode 100644
index 00000000..ec1c9213
--- /dev/null
+++ b/docs/merge-fastq/merge_fastq_0.1.7.md
@@ -0,0 +1,67 @@
+# v0.1.7
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| miniconda | 3 | [https://hub.docker.com/r/continuumio/miniconda3](https://hub.docker.com/r/continuumio/miniconda3) |
+| merge\_fastq | 0.1.7 | [https://pypi.org/project/merge-fastq/](https://pypi.org/project/merge-fastq/) |
+
+[](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner merge_fastq_0.1.7.cwl --help
+usage: merge_fastq_0.1.7.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --fastq1 FASTQ1 Full path to gziped READ1 fastq files, can be
+ specified multiple times for example: --fastq1
+ test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz
+ [required]
+ --fastq2 FASTQ2 Full path to gziped READ2 fastq files, can be
+ specified multiple times for example: --fastq1
+ test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz
+ [required]
+ --output_path OUTPUT_PATH
+ Full path to write the output files (default: Current
+ working directory)
+ --out_fastq1_name OUT_FASTQ1_NAME
+ Name of the merged output READ1 fastq file(default:
+ merged_fastq_R1.fastq.gz)
+ --out_fastq2_name OUT_FASTQ2_NAME
+ Name of the merged output READ2 fastq file(default:
+ merged_fastq_R2.fastq.gz)
+```
+
diff --git a/docs/mosdepth/README.md b/docs/mosdepth/README.md
new file mode 100644
index 00000000..d576ad29
--- /dev/null
+++ b/docs/mosdepth/README.md
@@ -0,0 +1,2 @@
+# Mosdepth
+
diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md
new file mode 100644
index 00000000..44080bde
--- /dev/null
+++ b/docs/mosdepth/mosdepth_0.3.3.md
@@ -0,0 +1,68 @@
+Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**.
+
+`mosdepth` can output:
++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome.
++ mean per-window depth given a window size--as would be used for CNV calling.
++ the mean per-region given a BED file of regions.
+* the mean or median per-region cumulative coverage histogram given a window size
++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide.
++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20)
++ threshold output to indicate how many bases in each region are covered at the given thresholds.
++ A summary of mean depths per chromosome and within specified regions per chromosome.
+
+# CWL for running Mosdepth - Coverage tool
+## Version of tools in docker image
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| mosdepth | 0.3.3 | [https://hub.docker.com/r/brentp/mosdepth/tags](https://hub.docker.com/r/brentp/mosdepth/tags) [https://github.com/brentp/mosdepth/releases/tag/v0.3.3](https://github.com/brentp/mosdepth/releases/tag/v0.3.3) |
+
+[](https://github.com/brentp/mosdepth/releases/tag/v0.3.3)|
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml &
+```
+
+### Usage
+
+```bash
+usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] [--bed BED]
+ [--chrom CHROM] [--prefix PREFIX] [--flag FLAG]
+ [--mapq MAPQ]
+ [job_order]
+
+fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --bed BED optional BED file or (integer) window-sizes.
+ --chrom CHROM chromosome to restrict depth calculation.
+ --prefix PREFIX Prefix for the output files
+ --flag FLAG exclude reads with any of the bits in FLAG set
+ --mapq MAPQ mapping quality threshold. reads with a mapping
+ quality less than this are ignored
+```
diff --git a/docs/mutect/README.md b/docs/mutect/README.md
new file mode 100644
index 00000000..639d595b
--- /dev/null
+++ b/docs/mutect/README.md
@@ -0,0 +1,2 @@
+# MuTect
+
diff --git a/docs/mutect/mutect_1.1.5.md b/docs/mutect/mutect_1.1.5.md
new file mode 100644
index 00000000..f5a70ddb
--- /dev/null
+++ b/docs/mutect/mutect_1.1.5.md
@@ -0,0 +1,273 @@
+# MuTect 1.1.5
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk:7 base image | 7 | - |
+| muTect | 1.1.5 | [https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip](https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir mutect_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/mutect_toil_log/cwltoil.log --jobStore /path/to/mutect_jobStore --batchSystem lsf --workDir /path/to/mutect_toil_log --outdir . --writeLogs /path/to/mutect_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml > mutect_toil.stdout 2> mutect_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: toil-cwl-runner mutect_1.1.5.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --BQSR BQSR The input covariates table file which enables on-the-
+ fly base quality score recalibration
+ --absolute_copy_number_data ABSOLUTE_COPY_NUMBER_DATA
+ Absolute Copy Number Data, as defined by Absolute, to
+ use in power calculations
+ --arg_file ARG_FILE Reads arguments from the specified file
+ --bam_tumor_sample_name BAM_TUMOR_SAMPLE_NAME
+ if the tumor bam contains multiple samples, only use
+ read groups with SM equal to this value
+ --baq BAQ Type of BAQ calculation to apply in the engine
+ (OFF|CALCULATE_AS_NECESSARY| RECALCULATE)
+ --baqGapOpenPenalty BAQGAPOPENPENALTY
+ BAQ gap open penalty (Phred Scaled). Default value is
+ 40. 30 is perhaps better for whole genome call sets
+ --clipping_bias_pvalue_threshold CLIPPING_BIAS_PVALUE_THRESHOLD
+ pvalue threshold for fishers exact test of clipping
+ bias in mutant reads vs ref reads
+ --cosmic COSMIC VCF file of COSMIC sites
+ --coverage_20_q20_file COVERAGE_20_Q20_FILE
+ write out 20x of Q20 coverage in WIGGLE format to this
+ file
+ --coverage_file COVERAGE_FILE
+ write out coverage in WIGGLE format to this file
+ --dbsnp DBSNP VCF file of DBSNP information
+ --dbsnp_normal_lod DBSNP_NORMAL_LOD
+ LOD threshold for calling normal non-variant at dbsnp
+ sites
+ --defaultBaseQualities DEFAULTBASEQUALITIES
+ If reads are missing some or all base quality scores,
+ this value will be used for all base quality scores
+ --disableRandomization
+ Completely eliminates randomization from
+ nondeterministic methods. To be used mostly in the
+ testing framework where dynamic parallelism can result
+ in differing numbers of calls to the generator.
+ --disable_indel_quals
+ If true, disables printing of base insertion and base
+ deletion tags (with -BQSR)
+ --downsample_to_coverage DOWNSAMPLE_TO_COVERAGE
+ Target coverage threshold for downsampling to coverage
+ --downsampling_type DOWNSAMPLING_TYPE
+ Type of reads downsampling to employ at a given locus.
+ Reads will be selected randomly to be removed from the
+ pile based on the method described here
+ (NONE|ALL_READS| BY_SAMPLE) given locus; note that
+ downsampled reads are randomly selected from all
+ possible reads at a locus
+ --emit_original_quals
+ If true, enables printing of the OQ tag with the
+ original base qualities (with -BQSR)
+ --enable_extended_output
+ --excludeIntervals EXCLUDEINTERVALS
+ One or more genomic intervals to exclude from
+ processing. Can be explicitly specified on the command
+ line or in a file (including a rod file)
+ --filter_mismatching_base_and_quals
+ if a read has mismatching number of bases and base
+ qualities, filter out the read instead of blowing up.
+ --force_alleles force output for all alleles at each site
+ --force_output force output for each site
+ --fraction_contamination FRACTION_CONTAMINATION
+ estimate of fraction (0-1) of physical contamination
+ with other unrelated samples
+ --fraction_mapq0_threshold FRACTION_MAPQ0_THRESHOLD
+ threshold for determining if there is relatedness
+ between the alt and ref allele read piles
+ --gap_events_threshold GAP_EVENTS_THRESHOLD
+ how many gapped events (ins/del) are allowed in
+ proximity to this candidate
+ --gatk_key GATK_KEY GATK Key file. Required if running with -et NO_ET.
+ Please see -phone-home-and-how-does-it-affect-
+ me#latest for details.
+ --heavily_clipped_read_fraction HEAVILY_CLIPPED_READ_FRACTION
+ if this fraction or more of the bases in a read are
+ soft/hard clipped, do not use this read for mutation
+ calling
+ --initial_tumor_lod INITIAL_TUMOR_LOD
+ Initial LOD threshold for calling tumor variant
+ --input_file_normal INPUT_FILE_NORMAL
+ SAM or BAM file(s)
+ --input_file_tumor INPUT_FILE_TUMOR
+ SAM or BAM file(s)
+ --interval_merging INTERVAL_MERGING
+ Indicates the interval merging rule we should use for
+ abutting intervals (ALL| OVERLAPPING_ONLY)
+ --interval_padding INTERVAL_PADDING
+ Indicates how many basepairs of padding to include
+ around each of the intervals specified with the -L/
+ --interval_set_rule INTERVAL_SET_RULE
+ Indicates the set merging approach the interval parser
+ should use to combine the various -L or -XL inputs
+ (UNION| INTERSECTION)
+ --java_7 JAVA_7
+ --keep_program_records
+ Should we override the Walkers default and keep
+ program records from the SAM header
+ --log_to_file LOG_TO_FILE
+ Set the logging location
+ --logging_level LOGGING_LEVEL
+ Set the minimum level of logging, i.e. setting INFO
+ gets you INFO up to FATAL, setting ERROR gets you
+ ERROR and FATAL level logging.
+ --maxRuntime MAXRUNTIME
+ If provided, that GATK will stop execution cleanly as
+ soon after maxRuntime has been exceeded, truncating
+ the run but not exiting with a failure. By default the
+ value is interpreted in minutes, but this can be
+ changed by maxRuntimeUnits
+ --maxRuntimeUnits MAXRUNTIMEUNITS
+ The TimeUnit for maxRuntime (NANOSECONDS|
+ MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS)
+ --max_alt_allele_in_normal_fraction MAX_ALT_ALLELE_IN_NORMAL_FRACTION
+ threshold for maximum alternate allele fraction in
+ normal
+ --max_alt_alleles_in_normal_count MAX_ALT_ALLELES_IN_NORMAL_COUNT
+ threshold for maximum alternate allele counts in
+ normal
+ --max_alt_alleles_in_normal_qscore_sum MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM
+ threshold for maximum alternate allele quality score
+ sum in normal
+ --min_qscore MIN_QSCORE
+ threshold for minimum base quality score
+ --minimum_mutation_cell_fraction MINIMUM_MUTATION_CELL_FRACTION
+ minimum fraction of cells which are presumed to have a
+ mutation, used to handle non-clonality and
+ contamination
+ --minimum_normal_allele_fraction MINIMUM_NORMAL_ALLELE_FRACTION
+ minimum allele fraction to be considered in normal,
+ useful for normal sample contaminated with tumor
+ --monitorThreadEfficiency
+ Enable GATK threading efficiency monitoring
+ --mutect MUTECT
+ --nonDeterministicRandomSeed
+ Makes the GATK behave non deterministically, that is,
+ the random numbers generated will be different in
+ every run
+ --noop used for debugging, basically exit as soon as we get
+ the reads
+ --normal_depth_file NORMAL_DEPTH_FILE
+ write out normal read depth in WIGGLE format to this
+ file
+ --normal_lod NORMAL_LOD
+ LOD threshold for calling normal non-germline
+ --normal_sample_name NORMAL_SAMPLE_NAME
+ name to use for normal in output files
+ --num_bam_file_handles NUM_BAM_FILE_HANDLES
+ The total number of BAM file handles to keep open
+ simultaneously
+ --num_cpu_threads_per_data_thread NUM_CPU_THREADS_PER_DATA_THREAD
+ How many CPU threads should be allocated per data
+ thread to running this analysis?
+ --num_threads NUM_THREADS
+ How many data threads should be allocated to running
+ this analysis.
+ --only_passing_calls only emit passing calls
+ --pedigree PEDIGREE Pedigree files for samples
+ --pedigreeString PEDIGREESTRING
+ Pedigree string for samples
+ --pedigreeValidationType PEDIGREEVALIDATIONTYPE
+ How strict should we be in validating the pedigree
+ information? (STRICT|SILENT)
+ --performanceLog PERFORMANCELOG
+ If provided, a GATK runtime performance log will be
+ written to this file
+ --phone_home PHONE_HOME
+ What kind of GATK run report should we generate?
+ STANDARD is the default, can be NO_ET so nothing is
+ posted to the run repository. Please see -phone-home-
+ and-how-does-it-affect-me#latest for details.
+ (NO_ET|STANDARD|STDOUT)
+ --pir_mad_threshold PIR_MAD_THRESHOLD
+ threshold for clustered read position artifact MAD
+ --pir_median_threshold PIR_MEDIAN_THRESHOLD
+ threshold for clustered read position artifact median
+ --power_constant_af POWER_CONSTANT_AF
+ Allelic fraction constant to use in power calculations
+ --power_constant_qscore POWER_CONSTANT_QSCORE
+ Phred scale quality score constant to use in power
+ calculations
+ --power_file POWER_FILE
+ write out power in WIGGLE format to this file
+ --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN
+ Bases with quality scores less than this threshold
+ wont be recalibrated (with -BQSR)
+ --read_buffer_size READ_BUFFER_SIZE
+ Number of reads per SAM file to buffer in memory
+ --read_filter READ_FILTER
+ Specify filtration criteria to apply to each read
+ individually
+ --read_group_black_list READ_GROUP_BLACK_LIST
+ Filters out read groups matching - or a
+ .txt file containing the filter strings one per line.
+ --reference_sequence REFERENCE_SEQUENCE
+ --remove_program_records
+ Should we override the Walkers default and remove
+ program records from the SAM header
+ --required_maximum_alt_allele_mapping_quality_score
+ required minimum value for
+
+ tumor alt allele maximum mapping quality score
+ --somatic_classification_normal_power_threshold
+ Power threshold for normal to
+
+ determine germline vs variant
+ --tag TAG Arbitrary tag string to identify this GATK run as part
+ of a group of runs, for later analysis
+ --tumor_depth_file TUMOR_DEPTH_FILE
+ write out tumor read depth in WIGGLE format to this
+ file
+ --tumor_f_pretest TUMOR_F_PRETEST
+ for computational efficiency, reject sites with
+ allelic fraction below this threshold
+ --tumor_lod TUMOR_LOD
+ LOD threshold for calling tumor variant
+ --tumor_sample_name TUMOR_SAMPLE_NAME
+ name to use for tumor in output files
+ --unsafe UNSAFE If set, enables unsafe operations - nothing will be
+ checked at runtime. For expert users only who know
+ what they are doing. We do not support usage of this
+ argument. (ALLOW_UNINDEXED_BAM|
+ ALLOW_UNSET_BAM_SORT_ORDER|
+ NO_READ_ORDER_VERIFICATION|
+ ALLOW_SEQ_DICT_INCOMPATIBILITY|
+ LENIENT_VCF_PROCESSING|ALL)
+ --useOriginalQualities
+ If set, use the original base quality scores from the
+ OQ tag when present instead of the standard scores
+ --validation_strictness VALIDATION_STRICTNESS
+ How strict should we be with validation
+ (STRICT|LENIENT|SILENT)
+ --vcf VCF VCF output of mutation candidates
+```
+
diff --git a/docs/octopus/README.md b/docs/octopus/README.md
new file mode 100644
index 00000000..c19f208c
--- /dev/null
+++ b/docs/octopus/README.md
@@ -0,0 +1,2 @@
+# Octopus
+
diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md
new file mode 100644
index 00000000..2967f783
--- /dev/null
+++ b/docs/octopus/octopus_0.7.4.md
@@ -0,0 +1,74 @@
+## CWL and Docker for Running Octopus
+
+## Version of tools in [docker image](https://hub.docker.com/r/dancooke/octopus/tags)
+
+| Tool | Version | Location |
+| ------- | ------- | ---------------------------------------------------------- |
+| Octopus | v0.7.4 | https://github.com/luntergroup/octopus/releases/tag/v0.7.4 |
+
+### CWL
+
+CWL specification 1.0
+Use example_input.yaml to see the inputs to the cwl
+Example Command using [toil](https://toil.readthedocs.io/):
+`toil-cwl-runner octopus_0-7-4.cwl example_input.yaml`
+
+If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing type==X86_64 && you can use the following command
+
+### Using CWLTOOL
+
+```
+cwltool --singularity --non-strict /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml
+```
+
+### Using toil-cwl-runner
+
+```shell
+mkdir octopus_toil_log
+toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log --jobStore /path/to/octopus_jobStore --batchSystem lsf --workDir /path/to/octopus_toil_log --outdir . --writeLogs /path/to/octopus_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml > octopus_toil.stdout 2> octopus_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: octopus_0-7-4.cwl [-h] --input INPUT [--normalId NORMALID]
+ [--tumorOnlySample] [--somaticOnlyCalls]
+ [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY]
+ [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY]
+ [--targettedCalling_file TARGETTEDCALLING_FILE]
+ [--skipRegions_file SKIPREGIONS_FILE]
+ [--error_models ERROR_MODELS] --reference REFERENCE
+ --output_file_name OUTPUT_FILE_NAME
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Tumor and normal bam files with .bai
+ --normalId NORMALID add the name of the normal sample
+ --tumorOnlySample mention this parameter if it is tumor only sample.
+ --somaticOnlyCalls if somatics only call is required. Use this with -f ON
+ parameter
+ --targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY
+ list of regions to call variants from. eg 1. chr1: all
+ of chr1. 2. chr2:10,000,000: the single position
+ 10000000 in chr2. chr3:5,000,000-: everything from 3.
+ chr3:5,000,000 onwards. 4.
+ chr4:100,000,000-200,000,000: everything between
+ chr4:100,000,000 and chr4:200,000,000. The interval is
+ half open so position chr4:200,000,000 is not
+ included.
+ --skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY
+ to skip a set of regions
+ --targettedCalling_file TARGETTEDCALLING_FILE
+ regions in a text or bed file
+ --skipRegions_file SKIPREGIONS_FILE
+ regions in text or bed file format
+ --error_models ERROR_MODELS
+ error model will be in the format - [library
+ preparation]<.sequencer> eg: PCR.NOVASEQ
+ --reference REFERENCE
+ --output_file_name OUTPUT_FILE_NAME
+```
diff --git a/docs/picard-tools/README.md b/docs/picard-tools/README.md
new file mode 100644
index 00000000..094001ac
--- /dev/null
+++ b/docs/picard-tools/README.md
@@ -0,0 +1,2 @@
+# Picard Tools
+
diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
similarity index 78%
rename from picard_add_or_replace_read_groups_1.96/README.md
rename to docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
index b07355a4..e5a70249 100644
--- a/picard_add_or_replace_read_groups_1.96/README.md
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
@@ -1,26 +1,26 @@
-# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups
+# AddOrReplaceReadGroups v1.96
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip |
-| R | 3.3.3 | r-base for opnejdk:8 |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
-[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md
new file mode 100644
index 00000000..70eb1246
--- /dev/null
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md
@@ -0,0 +1,90 @@
+# AddOrReplaceReadGroups v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardAddOrReplaceReadGroup_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl --help
+usage: picard_add_or_replace_read_groups_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --read_group_identifier READ_GROUP_IDENTIFIER
+ Read Group ID Default value: 1. This option can be set
+ to 'null' to clear the default value Required
+ --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER
+ Read Group sequencing center name Default value: null.
+ Required
+ --read_group_library READ_GROUP_LIBRARY
+ Read Group Library. Required
+ --read_group_platform_unit READ_GROUP_PLATFORM_UNIT
+ Read Group platform unit (eg. run barcode) Required.
+ --read_group_sample_name READ_GROUP_SAMPLE_NAME
+ Read Group sample name. Required
+ --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM
+ Read Group platform (e.g. illumina, solid) Required.
+ --read_group_description READ_GROUP_DESCRIPTION
+ Read Group description Default value: null.
+ --read_group_run_date READ_GROUP_RUN_DATE
+ Read Group run date Default value: null.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md
new file mode 100644
index 00000000..707df7f2
--- /dev/null
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md
@@ -0,0 +1,95 @@
+# AddOrReplaceReadGroups v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardAddOrReplaceReadGroup_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl --help
+usage: picard_add_or_replace_read_groups_4.1.8.1.cwl
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file ( sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --read_group_identifier READ_GROUP_IDENTIFIER
+ Read Group ID Default value: 1. This option can be set
+ to 'null' to clear the default value Required
+ --read_group_sequencing_center READ_GROUP_SEQUENCING_CENTER
+ Read Group sequencing center name Default value: null.
+ Required
+ --read_group_library READ_GROUP_LIBRARY
+ Read Group Library. Required
+ --read_group_platform_unit READ_GROUP_PLATFORM_UNIT
+ Read Group platform unit (eg. run barcode) Required.
+ --read_group_sample_name READ_GROUP_SAMPLE_NAME
+ Read Group sample name. Required
+ --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM
+ Read Group platform (e.g. illumina, solid) Required.
+ --read_group_description READ_GROUP_DESCRIPTION
+ Read Group description Default value: null.
+ --read_group_run_date READ_GROUP_RUN_DATE
+ Read Group run date Default value: null.
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md
new file mode 100644
index 00000000..b88b626f
--- /dev/null
+++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md
@@ -0,0 +1,78 @@
+# CollectAlignmentSummaryMetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collect_alignment_summary_metrics_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
similarity index 85%
rename from picard_collect_alignment_summary_metrics_2.8.1/README.md
rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
index 16d0b11b..1a318d60 100644
--- a/picard_collect_alignment_summary_metrics_2.8.1/README.md
+++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
@@ -1,19 +1,18 @@
-# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics
+# CollectAlignmentSummaryMetrics v2.8.1
-## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar |
-| R | 3.3.3 | r-base for opnejdk:8 |
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml
@@ -78,3 +77,4 @@ optional arguments:
Default value: 0. This option can be set to 'null' to
clear the default value.
```
+
diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md
new file mode 100644
index 00000000..14b0eaee
--- /dev/null
+++ b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md
@@ -0,0 +1,78 @@
+# CollectMultipleMetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collectmultiplemetrics_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md
new file mode 100644
index 00000000..259b84d5
--- /dev/null
+++ b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md
@@ -0,0 +1,80 @@
+# CollectMultipleMetrics v2.8.1
+
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collectmultiplemetrics_2-8-1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/picard_fix_mate_information_1.96/README.md b/docs/picard-tools/picard_fix_mate_information_1.96.md
similarity index 71%
rename from picard_fix_mate_information_1.96/README.md
rename to docs/picard-tools/picard_fix_mate_information_1.96.md
index 567a78e3..22a9cd50 100644
--- a/picard_fix_mate_information_1.96/README.md
+++ b/docs/picard-tools/picard_fix_mate_information_1.96.md
@@ -1,26 +1,26 @@
-# CWL and Dockerfile for running Picard - FixMateInformation
+# FixMateInformation v1.96
-## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile)
+## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip |
-| R | 3.3.3 | r-base for opnejdk:8 |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
-[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -33,7 +33,7 @@
### Usage
-```
+```text
usage: picard_fix_mate_information_1.96.cwl [-h]
positional arguments:
@@ -72,4 +72,5 @@ optional arguments:
coordinate-sorted BAM file. Default value:false. This
option can be set to 'null' to clear the default
value. Possible values:{true, false}
-```
\ No newline at end of file
+```
+
diff --git a/docs/picard-tools/picard_fix_mate_information_2.21.2.md b/docs/picard-tools/picard_fix_mate_information_2.21.2.md
new file mode 100644
index 00000000..659bf4b1
--- /dev/null
+++ b/docs/picard-tools/picard_fix_mate_information_2.21.2.md
@@ -0,0 +1,72 @@
+# FixMateInformation v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardFixMate_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: picard_fix_mate_information_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input file to fix. This option may be specified 0
+ or more times
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md
new file mode 100644
index 00000000..7039d4d4
--- /dev/null
+++ b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md
@@ -0,0 +1,77 @@
+# FixMateInformation v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardFixMate_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: picard_fix_mate_information_4.1.8.1.cwl
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input file to fix. This option may be specified 0
+ or more times
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_hsmetrics_2.21.2.md b/docs/picard-tools/picard_hsmetrics_2.21.2.md
new file mode 100644
index 00000000..8c43151b
--- /dev/null
+++ b/docs/picard-tools/picard_hsmetrics_2.21.2.md
@@ -0,0 +1,86 @@
+# HSmetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+> toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_hsmetrics_2.21.2.cwl [-h]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --bait_intervals BAIT_INTERVALS
+ An interval list file that contains the locations of
+ the baits used. Default value: null. This option must
+ be specified at least 1 times.
+ --bait_set_name BAIT_SET_NAME
+ Bait set name. If not provided it is inferred from the
+ filename of the bait intervals. Default value: null
+ --minimum_mapping_quality MINIMUM_MAPPING_QUALITY
+ Minimum mapping quality for a read to contribute
+ coverage. Default value: 20. This option can be set to
+ 'null' to clear the default value.
+ --minimum_base_quality MINIMUM_BASE_QUALITY
+ Minimum base quality for a base to contribute
+ coverage. Default value: 20. This option can be set to
+ 'null' to clear the default value.
+ --clip_overlapping_reads
+ True if we are to clip overlapping reads, false
+ otherwise. Default value: true. This option can be set
+ to 'null' to clear the default value. Possible values:
+ {true, false}
+ --target_intervals TARGET_INTERVALS
+ An interval list file that contains the locations of
+ the targets. Default value: null. This option must be
+ specified at least 1 times.
+ --input INPUT An aligned SAM or BAM file. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ The output file to write the metrics to. Required.
+ --metric_accumulation_level METRIC_ACCUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --per_target_coverage PER_TARGET_COVERAGE
+ An optional file to output per target coverage
+ information to. Default value: null.
+ --per_base_coverage PER_BASE_COVERAGE
+ An optional file to output per base coverage
+ information to. The per-base file contains one line
+ per target base and can grow very large. It is not
+ recommended for use with large target sets. Default
+ value: null.
+ --near_distance NEAR_DISTANCE
+ The maximum distance between a read and the nearest
+ probe/bait/amplicon for the read to be considered
+ 'near probe' and included in percent selected. Default
+ value: 250. This option can be set to 'null' to clear
+ the default value.
+ --coverage_cap COVERAGE_CAP
+ Parameter to set a max coverage limit for Theoretical
+ Sensitivity calculations. Default is 200. Default
+ value: 200. This option can be set to 'null' to clear
+ the default value.
+ --sample_size SAMPLE_SIZE
+ Sample Size used for Theoretical Het Sensitivity
+ sampling. Default is 10000. Default value: 10000. This
+ option can be set to 'null' to clear the default
+ value.
+```
+
diff --git a/docs/picard-tools/picard_hsmetrics_2.8.1.md b/docs/picard-tools/picard_hsmetrics_2.8.1.md
new file mode 100644
index 00000000..627d3286
--- /dev/null
+++ b/docs/picard-tools/picard_hsmetrics_2.8.1.md
@@ -0,0 +1,26 @@
+# HSmetrics v2.8.1
+
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_hsmetrics_2.8.1.cwl [-h]
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_1.96.md b/docs/picard-tools/picard_mark_duplicates_1.96.md
new file mode 100644
index 00000000..dd13d5ed
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_1.96.md
@@ -0,0 +1,20 @@
+# MarkDuplicates v1.96
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_2.21.2.md b/docs/picard-tools/picard_mark_duplicates_2.21.2.md
new file mode 100644
index 00000000..f978a4ba
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_2.21.2.md
@@ -0,0 +1,77 @@
+# MarkDuplicates v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: picard_mark_duplicates_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --duplication_metrics DUPLICATION_METRICS
+ File to write duplication metrics to Required.
+ --assume_sort_order ASSUME_SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY
+ The scoring strategy for choosing the non-duplicate
+ among candidates. Default value:SUM_OF_BASE_QUALITIES.
+ This option can be set to 'null' to clear the default
+ value.Possible values: {SUM_OF_BASE_QUALITIES,
+ TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}
+ --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE
+ The maximum offset between two duplicate clusters in
+ order to consider them optical duplicates. The default
+ is appropriate for unpatterned versions of the
+ Illumina platform. For the patterned flowcell models,
+ 2500 is moreappropriate. For other platforms and
+ models, users should experiment to find what works
+ best. Default value: 100. This option can be set to
+ 'null' to clear the default value.
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_2.8.1.md b/docs/picard-tools/picard_mark_duplicates_2.8.1.md
new file mode 100644
index 00000000..cfb0fc92
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_2.8.1.md
@@ -0,0 +1,20 @@
+# MarkDuplicates v2.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md
new file mode 100644
index 00000000..fe6c11f1
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md
@@ -0,0 +1,115 @@
+# MarkDuplicates v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: picard_mark_duplicates_4.1.8.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --duplication_metrics DUPLICATION_METRICS
+ File to write duplication metrics to Required.
+ --assume_sort_order ASSUME_SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --read_name_regex READ_NAME_REGEX
+ MarkDuplicates can use the tile and cluster positions
+ to estimate the rate of optical duplication in
+ addition to the dominant source of duplication, PCR,
+ to provide a more accurate estimation of library size.
+ By default (with no READ_NAME_REGEX specified),
+ MarkDuplicates will attempt to extract coordinates
+ using a split on ':' (see Note below). Set
+ READ_NAME_REGEX to 'null' to disable optical duplicate
+ detection. Note that without optical duplicate counts,
+ library size estimation will be less accurate. If the
+ read name does not follow a standard Illumina colon-
+ separation convention, but does contain tile and x,y
+ coordinates, a regular expression can be specified to
+ extract three variables: tile/region, x coordinate and
+ y coordinate from a read name. The regular expression
+ must contain three capture groups for the three
+ variables, in order. It must match the entire read
+ name. e.g. if field names were separated by semi-colon
+ (';') this example regex could be specified
+ (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$
+ Note that if no READ_NAME_REGEX is specified, the read
+ name is split on ':'. For 5 element names, the 3rd,
+ 4th and 5th elements are assumed to be tile, x and y
+ values. For 7 element names (CASAVA 1.8), the 5th,
+ 6th, and 7th elements are assumed to be tile, x and y
+ values.
+ --sorting_collection_size_ratio SORTING_COLLECTION_SIZE_RATIO
+ This number, plus the maximum RAM available to the
+ JVM, determine the memory footprint used by some of
+ the sorting collections. If you are running out of
+ memory, try reducing this number.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY
+ The scoring strategy for choosing the non-duplicate
+ among candidates. Default value:SUM_OF_BASE_QUALITIES.
+ This option can be set to 'null' to clear the default
+ value.Possible values: {SUM_OF_BASE_QUALITIES,
+ TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}
+ --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE
+ The maximum offset between two duplicate clusters in
+ order to consider them optical duplicates. The default
+ is appropriate for unpatterned versions of the
+ Illumina platform. For the patterned flowcell models,
+ 2500 is moreappropriate. For other platforms and
+ models, users should experiment to find what works
+ best. Default value: 100. This option can be set to
+ 'null' to clear the default value.
+```
+
diff --git a/docs/trim-galore/README.md b/docs/trim-galore/README.md
new file mode 100644
index 00000000..2f5f0270
--- /dev/null
+++ b/docs/trim-galore/README.md
@@ -0,0 +1,2 @@
+# Trim Galore
+
diff --git a/trim_galore_0.6.2/README.md b/docs/trim-galore/trim_galore_0.6.2.md
similarity index 83%
rename from trim_galore_0.6.2/README.md
rename to docs/trim-galore/trim_galore_0.6.2.md
index 3727924e..4ef02a4d 100644
--- a/trim_galore_0.6.2/README.md
+++ b/docs/trim-galore/trim_galore_0.6.2.md
@@ -1,25 +1,25 @@
-# CWL and Dockerfile for running Trim Galore
+# v0.6.2
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| Ubuntu base image | 18.04 | - |
-| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ |
-| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip |
-| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Ubuntu base image | 18.04 | - |
+| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) |
+| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) |
+| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) |
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -32,7 +32,7 @@
### Usage
-```
+```text
usage: trim_galore_0.6.2.cwl [-h]
positional arguments:
@@ -92,4 +92,5 @@ optional arguments:
--error_rate ERROR_RATE
Maximum allowed error rate (no. of errors divided by
the length of the matching region) (default: 0.1)
-```
\ No newline at end of file
+```
+
diff --git a/docs/ubuntu-utilites/README.md b/docs/ubuntu-utilites/README.md
new file mode 100644
index 00000000..5d69693e
--- /dev/null
+++ b/docs/ubuntu-utilites/README.md
@@ -0,0 +1,2 @@
+# Ubuntu utilites
+
diff --git a/docs/ubuntu-utilites/utilities_ubuntu_18.04.md b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md
new file mode 100644
index 00000000..ba695d67
--- /dev/null
+++ b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md
@@ -0,0 +1,26 @@
+# v18.04
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Ubuntu base image | 18.04 | - |
+
+## Available tools
+
+| Tool | Description |
+| :--- | :--- |
+| sort.cwl | sort lines of text files |
+| gzip.cwl | compress or expand files |
+| mv.cwl | move \(rename\) files |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs\_toolname.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml
+```
+
diff --git a/docs/vardictjava/README.md b/docs/vardictjava/README.md
new file mode 100644
index 00000000..5cf8e57f
--- /dev/null
+++ b/docs/vardictjava/README.md
@@ -0,0 +1,3 @@
+# VardictJava
+
+
diff --git a/docs/vardictjava/vardictjava_1.8.2.md b/docs/vardictjava/vardictjava_1.8.2.md
new file mode 100644
index 00000000..3d9ae093
--- /dev/null
+++ b/docs/vardictjava/vardictjava_1.8.2.md
@@ -0,0 +1,69 @@
+# Vardict v1.8.3 - Single sample mode
+To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here:
+https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode
+
+
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| ubuntu base image (alpine) | 3.8 | - |
+| vardict | 1.8.2 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) |
+| perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) |
+| r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner vardict_workflow_single_sample.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/vardict_1.8.3/vardict_workflow_single_sample.cwl /path/to/inputs.yaml
+
+#Using Toil-cwl-runner
+toil-cwl-runner --singularity vardict_workflow_single_sample.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner vardict_workflow_single_sample.cwl --help
+usage: vardict_workflow_single_sample.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+
+-h, --help show this help message and exit
+-E E
+ The column for the region end, e.g. gene end
+-G G
+ The reference fasta. Should be indexed (.fai).
+-N N
+ The sample name to be used directly.
+-S S
+ The column for the region start, e.g. gene start
+-b B
+ The indexed BAM file.
+-c C
+ The column for chromosome
+-f F
+ The threshold for allele frequency, default: 0.01 or 1%
+--f_1 F_1
+ The minimum allele frequency. Used for var2vcf_valid.pl
+-g G
+ The column for a gene name, or segment annotation
+--vcf VCF
+ vcf file name given to var2vcf_valid.pl
+--bedfile BEDFILE
+```
diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md
new file mode 100644
index 00000000..e349a6cd
--- /dev/null
+++ b/docs/vcf2maf/README.md
@@ -0,0 +1,3 @@
+# VCF2MAF
+
+
diff --git a/docs/vcf2maf/vcf2maf_1.6.21.md b/docs/vcf2maf/vcf2maf_1.6.21.md
new file mode 100644
index 00000000..6d8cd35d
--- /dev/null
+++ b/docs/vcf2maf/vcf2maf_1.6.21.md
@@ -0,0 +1,76 @@
+# CWL and Dockerfile for running vcf2maf v1.6.21
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| clearlinux (base image) | - | - |
+| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip |
+| VEP | 105 | - | - |
+|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh
+|BCFTOOLS_VERSION | 1.10.2 | - | - |
+|SAMTOOLS_VERSION | 1.10 | - | - |
+|VCF2MAF_VERSION | 1.6.21 | - | - |
+|HTSLIB_VERSION | 1.10.2 | - | - |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir vcf2maf_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr &
+```
+
+### Usage
+
+```
+Usage:
+ perl vcf2maf.pl --help
+ perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID
+
+--input-vcf Path to input file in VCF format
+--output-maf Path to output MAF file
+--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF]
+--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR]
+--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL]
+--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id]
+--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id]
+--custom-enst List of custom ENST IDs that override canonical selection
+--vep-path Folder containing the vep script [~/miniconda3/bin]
+--vep-data VEP's base cache/plugin directory [~/.vep]
+--vep-forks Number of forked processes to use when running VEP [4]
+--vep-custom String to pass into VEP's --custom option []
+--vep-config Config file to pass into VEP's --config option []
+--vep-overwrite Allow VEP to overwrite output VCF if it exists
+--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000]
+--any-allele When reporting co-located variants, allow mismatched variant alleles too
+--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found
+--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events)
+--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
+--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004]
+--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens]
+--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37]
+--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version]
+--maf-center Variant calling center to report in MAF [.]
+--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF []
+--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF []
+--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF []
+--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7]
+--remap-chain Chain file to remap variants to a different assembly before running VEP
+--verbose Print more things to log progress
+--help Print a brief help message and quit
+--man Print the detailed manual
+```
diff --git a/docs/waltz/README.md b/docs/waltz/README.md
new file mode 100644
index 00000000..0d43eaff
--- /dev/null
+++ b/docs/waltz/README.md
@@ -0,0 +1,2 @@
+# Waltz
+
diff --git a/waltz_count_reads_3.1.1/README.md b/docs/waltz/waltz_count_reads_3.1.1.md
similarity index 70%
rename from waltz_count_reads_3.1.1/README.md
rename to docs/waltz/waltz_count_reads_3.1.1.md
index 8ad83443..e3131e9f 100644
--- a/waltz_count_reads_3.1.1/README.md
+++ b/docs/waltz/waltz_count_reads_3.1.1.md
@@ -1,24 +1,25 @@
-# CWL and Dockerfile for running Waltz - Count Reads
+# CountReads v3.1.1
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) |
[](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1)
+
## CWL
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -55,3 +56,4 @@ optional arguments:
--number_of_threads NUMBER_OF_THREADS
--bed_file BED_FILE
```
+
diff --git a/waltz_pileupmatrices_3.1.1/README.md b/docs/waltz/waltz_pileupmatrices_3.1.1.md
similarity index 70%
rename from waltz_pileupmatrices_3.1.1/README.md
rename to docs/waltz/waltz_pileupmatrices_3.1.1.md
index b5aed666..cc432392 100644
--- a/waltz_pileupmatrices_3.1.1/README.md
+++ b/docs/waltz/waltz_pileupmatrices_3.1.1.md
@@ -1,24 +1,25 @@
-# CWL and Dockerfile for running Waltz - PileupMetrics
+# PileupMetrics v3.1.1
-## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile)
+## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) |
[](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1)
+
## CWL
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -56,3 +57,4 @@ optional arguments:
--number_of_threads NUMBER_OF_THREADS
--bed_file BED_FILE
```
+
diff --git a/expression_tools/README.md b/expression_tools/README.md
new file mode 100644
index 00000000..f0c79bf1
--- /dev/null
+++ b/expression_tools/README.md
@@ -0,0 +1,17 @@
+# CWL Expression tools
+
+## Available tools
+
+| Tool | Description |
+| -------- | ------------------------ |
+| put_in_dir.cwl | put the list of files into the same directory |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs_toolname.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner put_in_dir.cwl example_inputs_gzip.yaml
+```
diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl
new file mode 100644
index 00000000..382b64e5
--- /dev/null
+++ b/expression_tools/put_in_dir.cwl
@@ -0,0 +1,103 @@
+#!/usr/bin/env cwl-runner
+# originally from https://github.com/mskcc/pluto-cwl
+
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+class: ExpressionTool
+id: put-in-dir
+
+inputs:
+ output_directory_name:
+ type: string
+ doc: >-
+ Put all `files` in a directory called `output_directory_name`.
+ output_subdirectory_name:
+ type: string?
+ doc: >-
+ If specified, nest all `files` within a directory called `output_subdirectory_name`, which itself is within `output_directory_name`.
+ files:
+ type:
+ type: array
+ items:
+ - File
+ - type: array
+ items:
+ - File
+ - Directory
+ - 'null'
+
+outputs:
+ directory:
+ type: Directory
+
+# This tool returns a Directory object,
+# which holds all output files from the list
+# of supplied input files
+expression: |
+ ${
+ var output_files = [];
+ var input_files = inputs.files.filter(function(single_file) {
+ return String(single_file).toUpperCase() != 'NONE';
+ });
+
+ for (var i = 0; i < input_files.length; i++) {
+ // Handle list of list of files
+ if (input_files[i] && input_files[i].length) {
+ for (var ii = 0; ii < input_files[i].length; ii++) {
+ output_files.push(input_files[i][ii]);
+ }
+ // Handle list of files
+ } else if (input_files[i]) {
+ output_files.push(input_files[i]);
+ }
+ }
+
+ if (inputs.output_subdirectory_name) {
+ return {
+ 'directory': {
+ 'class': 'Directory',
+ 'basename': inputs.output_directory_name,
+ 'listing': [
+ {
+ 'class': 'Directory',
+ 'basename': inputs.output_subdirectory_name,
+ 'listing': output_files
+ }
+ ]
+ }
+ };
+ } else {
+ return {
+ 'directory': {
+ 'class': 'Directory',
+ 'basename': inputs.output_directory_name,
+ 'listing': output_files
+ }
+ };
+ }
+
+ }
+
+requirements:
+ - class: ResourceRequirement
+ ramMin: 2000
+ coresMin: 1
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md
new file mode 100644
index 00000000..10e9866a
--- /dev/null
+++ b/fastp_0.20.1/README.md
@@ -0,0 +1,84 @@
+# CWL and Dockerfile for running Fastp
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| fastp | 0.20.1 | quay.io/biocontainers/fastp:0.20.1--h8b12597_0 |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner ./fastp_0.20.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool ./fastp_0.20.1.cwl example_inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir toil_log
+> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/fastp-0_20_1/fastp-0_20_1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr &
+```
+
+### Usage
+```
+usage: fastp_0.20.1.cwl [-h] --read1_input READ1_INPUT --read1_output_path
+ READ1_OUTPUT_PATH [--read2_input READ2_INPUT]
+ [--read2_output_path READ2_OUTPUT_PATH]
+ [--unpaired1_path UNPAIRED1_PATH]
+ [--unpaired2_path UNPAIRED2_PATH]
+ [--failed_reads_path FAILED_READS_PATH]
+ [--read1_adapter_sequence READ1_ADAPTER_SEQUENCE]
+ [--read2_adapter_sequence READ2_ADAPTER_SEQUENCE]
+ [--minimum_read_length MINIMUM_READ_LENGTH]
+ --json_output_path JSON_OUTPUT_PATH --html_output_path
+ HTML_OUTPUT_PATH
+ [job_order]
+
+Setup and execute Fastp
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --read1_input READ1_INPUT
+ read1 input file name
+ --read1_output_path READ1_OUTPUT_PATH
+ read1 output file name
+ --read2_input READ2_INPUT
+ read2 input file name, for PE data
+ --read2_output_path READ2_OUTPUT_PATH
+ read2 output file name
+ --unpaired1_path UNPAIRED1_PATH
+ for PE input, if read1 passed QC but read2 not, it
+ will be written to unpaired1.
+ --unpaired2_path UNPAIRED2_PATH
+ for PE input, if read2 passed QC but read1 not, it
+ will be written to unpaired2.
+ --failed_reads_path FAILED_READS_PATH
+ specify the file to store reads that cannot pass the
+ filters.
+ --read1_adapter_sequence READ1_ADAPTER_SEQUENCE
+ the adapter for read1. For SE data, if not specified,
+ the adapter will be auto-detected. For PE data, this
+ is used if R1/R2 are found not overlapped.
+ --read2_adapter_sequence READ2_ADAPTER_SEQUENCE
+ the adapter for read2. For PE data, this is used if
+ R1/R2 are found not overlapped.
+ --minimum_read_length MINIMUM_READ_LENGTH
+ reads shorter than length_required will be discarded,
+ default is 15.
+ --json_output_path JSON_OUTPUT_PATH
+ the json format report file name
+ --html_output_path HTML_OUTPUT_PATH
+ the html format report file name
+```
diff --git a/fastp_0.20.1/example_inputs.yaml b/fastp_0.20.1/example_inputs.yaml
new file mode 100644
index 00000000..5d3af3bf
--- /dev/null
+++ b/fastp_0.20.1/example_inputs.yaml
@@ -0,0 +1,13 @@
+read1_input:
+ class: File
+ path: "./test_data/R1.fq"
+read2_input:
+ class: File
+ path: "./test_data/R2.fq"
+read1_output_path: "./R1.output"
+read2_output_path: "./R2.output"
+read1_adapter_sequence: "GATCGGAAGAGC"
+read2_adapter_sequence: "AGATCGGAAGAGC"
+minimum_read_length: 25
+json_output_path: "sample_name.json"
+html_output_path: "sample_name.html"
diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl
new file mode 100644
index 00000000..f6356a6a
--- /dev/null
+++ b/fastp_0.20.1/fastp_0.20.1.cwl
@@ -0,0 +1,225 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fastp_0_20_1
+baseCommand:
+ - fastp
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ doc: 'worker thread number, default is 2 (int [=2])'
+ - id: read1_input
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--in1'
+ doc: |
+ read1 input file name
+ - id: read1_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--out1'
+ doc: |
+ read1 output file name
+ - id: read2_input
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--in2'
+ doc: |
+ read2 input file name, for PE data
+ - id: read2_output_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--out2'
+ doc: |
+ read2 output file name
+ - id: unpaired1_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--unpaired1'
+ doc: >
+ for PE input, if read1 passed QC but read2 not, it will be written to
+ unpaired1.
+ - id: unpaired2_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--unpaired2'
+ doc: >
+ for PE input, if read2 passed QC but read1 not, it will be written to
+ unpaired2.
+ - id: failed_reads_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--failed_out'
+ doc: |
+ specify the file to store reads that cannot pass the filters.
+ - id: read1_adapter_sequence
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--adapter_sequence'
+ doc: >
+ the adapter for read1. For SE data, if not specified, the adapter will be
+ auto-detected. For PE data, this is used if R1/R2 are found not
+ overlapped.
+ - id: read2_adapter_sequence
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--adapter_sequence_r2'
+ doc: >
+ the adapter for read2. For PE data, this is used if R1/R2 are found not
+ overlapped.
+ - id: minimum_read_length
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--length_required'
+ doc: |
+ reads shorter than length_required will be discarded, default is 15.
+ - id: maximum_read_length
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--length_limit'
+ doc: >
+ reads longer than length_limit will be discarded, default 0 means no
+ limitation.
+ - id: max_len_read1
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--max_len1'
+ doc: >-
+ if read1 is longer than max_len1, then trim read1 at its tail to make it
+ as long as max_len1. Default 0 means no limitation
+ - id: max_len_read2
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--max_len2'
+ doc: >-
+ if read2 is longer than max_len2, then trim read2 at its tail to make it
+ as long as max_len2. Default 0 means no limitation. If it's not specified,
+ it will follow read1's settings
+ - default: fastp.json
+ id: json_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: |
+ the json format report file name
+ - default: fastp.html
+ id: html_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--html'
+ doc: |
+ the html format report file name
+ - id: disable_quality_filtering
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--disable_quality_filtering'
+ doc: >-
+ quality filtering is enabled by default. If this option is specified,
+ quality filtering is disabled
+ - id: disable_trim_poly_g
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--disable_trim_poly_g'
+ doc: >-
+ disable polyG tail trimming, by default trimming is automatically enabled
+ for Illumina NextSeq/NovaSeq data
+ - id: verbose
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--verbose'
+ doc: output verbose log information (i.e. when every 1M reads are processed)
+outputs:
+ - id: fastp_json_output
+ type: File
+ outputBinding:
+ glob: $(inputs.json_output_path)
+ - id: fastp_html_output
+ type: File
+ outputBinding:
+ glob: $(inputs.html_output_path)
+ - id: fastp_read1_output
+ type: File
+ outputBinding:
+ glob: $(inputs.read1_output_path)
+ - id: fastp_read2_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.read2_output_path)
+ - id: fastp_unpaired1_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.unpaired1_path)
+ - id: fastp_unpaired2_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.unpaired2_path)
+doc: Setup and execute Fastp
+label: fastp_0.20.1
+arguments:
+ - position: 0
+ prefix: '--thread'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 17000
+ coresMin: 4
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/fastp:0.20.1--h8b12597_0'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:fraihaa@mskcc.org'
+ 'foaf:name': Adrian Fraiha
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': fastp
+ 'doap:revision': 0.20.1
diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml
new file mode 100644
index 00000000..59eaa165
--- /dev/null
+++ b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml
@@ -0,0 +1,17 @@
+error_rate_post_umi: null
+error_rate_pre_umi: null
+input: /path/to/bam_file
+max_reads_per_strand: null
+memory_overhead: null
+memory_per_job: null
+min_input_base_quality: null
+min_reads:
+ - 1
+ - 1
+ - 0
+number_of_threads: null
+output_file_name: null
+read_group_id: null
+read_name_prefix: null
+sort_order: null
+trim: null
diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl
new file mode 100644
index 00000000..96211f57
--- /dev/null
+++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl
@@ -0,0 +1,226 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fgbio_call_duplex_consensus_reads_1_2_0
+baseCommand:
+ - fgbio
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input
+ type: File
+ inputBinding:
+ position: 2
+ prefix: '--input'
+ shellQuote: false
+ doc: The input SAM or BAM file.
+ - id: output_file_name
+ type: string?
+ doc: Output SAM or BAM file to write consensus reads.
+ - id: read_name_prefix
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--read-name-prefix'
+ doc: The prefix all consensus read names
+ - id: read_group_id
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--read-group-id'
+ doc: The new read group ID for all the consensus reads.
+ - id: error_rate_pre_umi
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--error-rate-pre-umi'
+ doc: >-
+ The Phred-scaled error rate for an error prior to the UMIs being
+ integrated.
+ - id: error_rate_post_umi
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--error-rate-post-umi'
+ doc: >-
+ The Phred-scaled error rate for an error post the UMIs have been
+ integrated.
+ - id: min_input_base_quality
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-input-base-quality'
+ doc: Ignore bases in raw reads that have Q below this value.
+ - id: trim
+ type: boolean?
+ inputBinding:
+ position: 2
+ prefix: '--trim'
+ doc: 'If true, quality trim input reads in addition to masking low Q bases'
+ - id: sort_order
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--sort-order'
+ doc: 'The sort order of the output, if :none: then the same as the input.'
+ - id: min_reads
+ type: 'int[]'
+ inputBinding:
+ position: 2
+ prefix: '--min-reads'
+ itemSeparator: ' '
+ shellQuote: false
+ doc: The minimum number of input reads to a consensus read.
+ - id: max_reads_per_strand
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--max-reads-per-strand'
+ doc: >-
+ The maximum number of reads to use when building a single-strand
+ consensus. If more than this many reads are present in a tag family, the
+ family is randomly downsampled to exactly max-reads reads.
+ - id: temporary_directory
+ type: string?
+ doc: 'Default value: null.'
+ - id: async_io
+ type: string?
+ inputBinding:
+ position: 0
+ separate: false
+ prefix: '--async-io='
+ doc: >-
+ 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'
+outputs:
+ - id: fgbio_call_duplex_consensus_reads_bam
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_file_name)
+ return inputs.output_file_name;
+ return inputs.input.basename.replace(/.bam/,'_cons.bam');
+ }
+doc: >-
+ Calls duplex consensus sequences from reads generated from the same
+ double-stranded source molecule. Prior to running this tool, read must have
+ been grouped with GroupReadsByUmi using the paired strategy. Doing so will
+ apply (by default) MI tags to all reads of the form */A and */B where the /A
+ and /B suffixes with the same identifier denote reads that are derived from
+ opposite strands of the same source duplex molecule.
+
+
+ Reads from the same unique molecule are first partitioned by source strand and
+ assembled into single strand consensus molecules as described by
+ CallMolecularConsensusReads. Subsequently, for molecules that have at least
+ one observation of each strand, duplex consensus reads are assembled by
+ combining the evidence from the two single strand consensus reads.
+
+
+ Because of the nature of duplex sequencing, this tool does not support
+ fragment reads - if found in the input they are ignored. Similarly, read pairs
+ for which consensus reads cannot be generated for one or other read (R1 or R2)
+ are omitted from the output.
+
+
+ Consensus reads have a number of additional optional tags set in the resulting
+ BAM file. The tag names follow a pattern where the first letter (a, b or c)
+ denotes that the tag applies to the first single strand consensus (a), second
+ single-strand consensus (b) or the final duplex consensus (c). The second
+ letter is intended to capture the meaning of the tag (e.g. d=depth, m=min
+ depth, e=errors/error-rate) and is upper case for values that are one per read
+ and lower case for values that are one per base.
+label: fgbio_call_duplex_consensus_reads_1.2.0
+arguments:
+ - position: 0
+ valueFrom: |-
+ ${
+ if(inputs.memory_per_job && inputs.memory_overhead) {
+ if(inputs.memory_per_job % 1000 == 0) {
+ return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G"
+ }
+ else {
+ return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G"
+ }
+ }
+ else if (inputs.memory_per_job && !inputs.memory_overhead){
+ if(inputs.memory_per_job % 1000 == 0) {
+ return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G"
+ }
+ else {
+ return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G"
+ }
+ }
+ else if(!inputs.memory_per_job && inputs.memory_overhead){
+ return "-Xmx10G"
+ }
+ else {
+ return "-Xmx10G"
+ }
+ }
+ - position: 0
+ valueFrom: '-XX:-UseGCOverheadLimit'
+ - position: 1
+ valueFrom: CallDuplexConsensusReads
+ - position: 0
+ prefix: '--tmp-dir='
+ separate: false
+ valueFrom: |-
+ ${
+ if(inputs.temporary_directory)
+ return inputs.temporary_directory;
+ return runtime.tmpdir
+ }
+ - position: 2
+ prefix: '--output'
+ shellQuote: false
+ valueFrom: |-
+ ${
+ if(inputs.output_file_name)
+ return inputs.output_file_name;
+ return inputs.input.basename.replace(/.bam/,'_cons.bam');
+ }
+ - position: 2
+ prefix: '--threads'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ShellCommandRequirement
+ - class: ResourceRequirement
+ ramMin: 20000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': fgbio CallDuplexConsensusReads
+ 'doap:revision': 1.2.0
diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml
new file mode 100644
index 00000000..38dd911b
--- /dev/null
+++ b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml
@@ -0,0 +1,16 @@
+input:
+ class: File
+ metadata: {}
+ path: /path/to/bam
+output_prefix: prefix
+intervals:
+ class: File?
+ metadata: {}
+ path: /path/to/intervals
+description: null
+duplex_umi_counts: null
+min_ab_reads: null
+min_ba_reads: null
+number_of_threads: null
+umi_tag: null
+mi_tag: null
diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl
new file mode 100644
index 00000000..28697020
--- /dev/null
+++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl
@@ -0,0 +1,294 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fgbio_collect_duplex_seq_metrics_1_2_0
+baseCommand:
+ - fgbio
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input
+ type: File
+ inputBinding:
+ position: 2
+ prefix: '--input'
+ doc: Input BAM file generated by GroupReadByUmi.
+ - id: output_prefix
+ type: string?
+ doc: Prefix of output files to write.
+ - id: intervals
+ type: File?
+ inputBinding:
+ position: 2
+ prefix: '--intervals'
+ doc: 'Optional set of intervals over which to restrict analysis. [Optional].'
+ - id: description
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--description'
+ doc: >-
+ Description of data set used to label plots. Defaults to sample/library.
+ [Optional].
+ - id: duplex_umi_counts
+ type: boolean?
+ inputBinding:
+ position: 2
+ prefix: '--duplex-umi-counts'
+ doc: >-
+ If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI
+ observations. [Optional].
+ - id: min_ab_reads
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-ab-reads'
+ doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].'
+ - id: min_ba_reads
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-ba-reads'
+ doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].'
+ - id: umi_tag
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--umi-tag'
+ doc: 'The tag containing the raw UMI. [Optional].'
+ - id: mi_tag
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--mi-tag'
+ doc: 'The output tag for UMI grouping. [Optional].'
+ - id: temporary_directory
+ type: string?
+ doc: 'Default value: null.'
+ - id: async_io
+ type: string?
+ inputBinding:
+ position: 0
+ separate: false
+ prefix: '--async-io='
+ doc: >-
+ 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'
+outputs:
+ - id: fgbio_collect_duplex_seq_metrics_family_size
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.family_sizes.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.family_sizes.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_family_size
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_family_sizes.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_yield_metrics.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_umi_counts
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.umi_counts.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.umi_counts.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_qc
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_qc.pdf'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_qc.pdf')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output_prefix) {
+ return inputs.output_prefix + '.duplex_umi_counts.txt'
+ } else {
+ return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt')
+ }
+ }
+doc: >-
+ Collects a suite of metrics to QC duplex sequencing data.
+
+ Inputs ------
+
+ The input to this tool must be a BAM file that is either:
+
+ 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it
+ was produced in) 2. A BAM file that has MI tags present on all reads (usually
+ set by 'GroupReadsByUmi' and has been sorted with
+ 'SortBam' into 'TemplateCoordinate' order.
+
+ Calculation of metrics may be restricted to a set of regions using the
+ '--intervals' parameter. This can significantly affect results as off-target
+ reads in duplex sequencing experiments often have very different properties
+ than on-target reads due to the lack of enrichment.
+
+ Several metrics are calculated related to the fraction of tag families that
+ have duplex coverage. The definition of "duplex" is controlled by the
+ '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any
+ tag family with at least one observation of each strand as a duplex, but this
+ could be made more stringent, e.g. by setting '--min-ab-reads=3
+ --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must
+ be the higher value.
+
+ Outputs -------
+
+ The following output files are produced:
+
+ 1.