Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions biometrics_extract/0.2.16/biometrics_extract.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
class: CommandLineTool
cwlVersion: v1.0
$namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
sbg: 'https://www.sevenbridges.com/'
id: biometrics_extract_0_2_15
baseCommand:
- biometrics
- extract
inputs:
- id: sample_bam
type: File
inputBinding:
position: 0
prefix: '--sample-bam'
doc: BAM file.
secondaryFiles:
- ^.bai
- id: sample_sex
type: string?
inputBinding:
position: 0
prefix: '--sample-sex'
doc: Expected sample sex (i.e. M or F).
- id: sample_group
type: string?
inputBinding:
position: 0
prefix: '--sample-group'
doc: The sample group (e.g. the sample patient ID).
- id: sample_name
type: string
inputBinding:
position: 0
prefix: '--sample-name'
doc: >-
Sample name. If not specified, sample name is automatically figured out
from the BAM file.
- id: fafile
type: File
inputBinding:
position: 0
prefix: '--fafile'
doc: Path to reference fasta.
secondaryFiles:
- ^.fasta.fai
- id: vcf_file
type: File
inputBinding:
position: 0
prefix: '--vcf'
doc: VCF file containing the SNPs to be queried.
- id: bed_file
type: File?
inputBinding:
position: 0
prefix: '--bed'
doc: BED file containing the intervals to be queried.
- id: database
type: string?
inputBinding:
position: 0
prefix: '--database'
doc: >-
Directory to store the intermediate files after running the extraction
step.
- default: 1
id: min_mapping_quality
type: int?
inputBinding:
position: 0
prefix: '--min-mapping-quality'
doc: Minimum mapping quality of reads to be used for pileup.
- default: 1
id: min_base_quality
type: int?
inputBinding:
position: 0
prefix: '--min-base-quality'
doc: Minimum base quality of reads to be used for pileup.
- default: 10
id: min_coverage
type: int?
inputBinding:
position: 0
prefix: '--min-coverage'
doc: Minimum coverage to count a site.
- default: 0.1
id: min_homozygous_thresh
type: float?
inputBinding:
position: 0
prefix: '--min-homozygous-thresh'
doc: Minimum threshold to define homozygous.
- id: default_genotype
type: string?
inputBinding:
position: 0
prefix: '--default-genotype'
doc: Default genotype if coverage is too low (options are Het or Hom).
- id: file_type
type: string?
doc: >-
Specify the type of bam file you are generating the pickle for to be
incorporated in pickle file name (Myeloid_1_L001_duplex.pickle)
outputs:
- id: ALL_FPsummary.txt
type: File
outputBinding:
glob: ALL_FPsummary.txt
- id: biometrics_extract_pickle
type: File
outputBinding:
glob: |-
${
if (inputs.database) {
return inputs.database + '/' + inputs.sample_name + '.pickle';
}
else {
return inputs.sample_name + '.pickle';
}
}
outputEval: |-
${
if (inputs.file_type) {
self[0].basename = inputs.sample_name + '_' + inputs.file_type + ".pickle";
return self;
}
else {
return self;
}
}
requirements:
- class: ResourceRequirement
ramMin: 24000
coresMin: 4
- class: DockerRequirement
dockerPull: 'ghcr.io/msk-access/biometrics:0.2.15'
- class: InlineJavascriptRequirement
'dct:contributor':
- class: 'foaf:Organization'
'foaf:member':
- class: 'foaf:Person'
'foaf:mbox': 'mailto:murphyc4@mskcc.org'
'foaf:name': Charlie Murphy
- class: 'foaf:Person'
'foaf:mbox': 'mailto:shahr2@mskcc.org'
'foaf:name': Ronak Shah
- class: 'foaf:Person'
'foaf:mbox': 'mailto:charlk@mskcc.org'
'foaf:name': Carmelina Charlambous
'foaf:name': Memorial Sloan Kettering Cancer Center
'dct:creator':
- class: 'foaf:Organization'
'foaf:member':
- class: 'foaf:Person'
'foaf:mbox': 'mailto:shahr2@mskcc.org'
'foaf:name': Ronak Shah
'foaf:name': Memorial Sloan Kettering Cancer Center
'doap:release':
- class: 'doap:Version'
'doap:name': biometrics
'doap:revision': 0.2.15
24 changes: 24 additions & 0 deletions biometrics_extract/0.2.16/example_inputs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
sample_type:
- "Normal"
sample_sex:
- "M"
sample_name:
- "test"
sample_group:
- "test"
fafile:
class: File
path: /path/to/fasta
sample_bam:
- class: File
path: /path/to/bam
bed_file: null
vcf_file:
class: File
path: /path/to/vcf
database: null
min_mapping_quality: null
min_base_quality: null
min_coverage: null
min_homozygous_thresh: null
default_genotype: null
76 changes: 76 additions & 0 deletions docs/biometrics/biometrics_extract_0.2.16.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# CWL and Dockerfile for running biometrics

## Version of tools in docker image (/container/Dockerfile)

| Tool | Version | Location |
|--- |--- |--- |
| biometrics_extract | 0.2.16 | <https://github.com/msk-access/biometrics> |

## CWL

- CWL specification 1.0
- Use example_inputs.json to see the inputs to the cwl
- Example Command using [toil](https://toil.readthedocs.io):

```bash
> toil-cwl-runner biometrics_extract.cwl example_inputs.json
```

```bash
#Using CWLTOOL
> cwltool --singularity --non-strict /path/to/biometrics_extract.cwl /path/to/example_inputs.json

#Using toil-cwl-runner
> mkdir tool_toil_log
> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/biometrics_extract.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
```

### Usage

```bash
> toil-cwl-runner biometrics_extract.cwl -h
usage: biometrics_extract/0.2.16/biometrics_extract.cwl [-h] --sample_bam SAMPLE_BAM
[--sample_sex SAMPLE_SEX]
[--sample_group SAMPLE_GROUP] --sample_name
SAMPLE_NAME --fafile FAFILE --vcf_file VCF_FILE
[--bed_file BED_FILE] [--database DATABASE]
[--min_mapping_quality MIN_MAPPING_QUALITY]
[--min_base_quality MIN_BASE_QUALITY]
[--min_coverage MIN_COVERAGE]
[--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH]
[--default_genotype DEFAULT_GENOTYPE]
[--file_type FILE_TYPE]
[job_order]

positional arguments:
job_order Job input json file

optional arguments:
-h, --help show this help message and exit
--sample_bam SAMPLE_BAM
BAM file.
--sample_sex SAMPLE_SEX
Expected sample sex (i.e. M or F).
--sample_group SAMPLE_GROUP
The sample group (e.g. the sample patient ID).
--sample_name SAMPLE_NAME
Sample name. If not specified, sample name is automatically figured out from the
BAM file.
--fafile FAFILE Path to reference fasta.
--vcf_file VCF_FILE VCF file containing the SNPs to be queried.
--bed_file BED_FILE BED file containing the intervals to be queried.
--database DATABASE Directory to store the intermediate files after running the extraction step.
--min_mapping_quality MIN_MAPPING_QUALITY
Minimum mapping quality of reads to be used for pileup.
--min_base_quality MIN_BASE_QUALITY
Minimum base quality of reads to be used for pileup.
--min_coverage MIN_COVERAGE
Minimum coverage to count a site.
--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH
Minimum threshold to define homozygous.
--default_genotype DEFAULT_GENOTYPE
Default genotype if coverage is too low (options are Het or Hom).
--file_type FILE_TYPE
Specify the type of bam file you are generating the pickle for to be incorporated
in pickle file name (Myeloid_1_L001_duplex.pickle)
```