diff --git a/CHANGELOG.md b/CHANGELOG.md index 60ab3dd5..5e31c9da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # JaneliaSciComp/easifish: Changelog +* Added spots count and spots intensities + * Added spots warping using Bigstream * Added RS-FISH module for spot extraction diff --git a/conf/modules.config b/conf/modules.config index 069cc844..bf261c63 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -16,3 +16,5 @@ includeConfig './multiscale-modules.config' includeConfig './segmentation-modules.config' includeConfig './spot-extraction-modules.config' includeConfig './warp-spots-modules.config' +includeConfig './spots-features-modules.config' + diff --git a/conf/spots-features-modules.config b/conf/spots-features-modules.config new file mode 100644 index 00000000..2c13bf6f --- /dev/null +++ b/conf/spots-features-modules.config @@ -0,0 +1,10 @@ +params { + spots_counts_cores = 1 + spots_counts_mem_gb = 2 + spots_props_cores = 1 + spots_props_mem_gb = 2 + spots_counts_subdir = 'spots-counts' + spots_props_subdir = 'spots-props' + dapi_channel = '' + bleeding_channel = '' +} diff --git a/examples/tiny_sample_params.json b/examples/tiny_sample_params.json index 29f01953..a2463f52 100644 --- a/examples/tiny_sample_params.json +++ b/examples/tiny_sample_params.json @@ -33,7 +33,8 @@ "cellpose_test": false, "cellpose_verbose": true, - "cellpose_dask_workers": 3, + "cellpose_dask_workers": 1, + "cellpose_dask_worker_cpus": 4, "cellpose_merge_with_iou_only": false, "cellpose_worker_runtime_opts": "--nv", @@ -50,6 +51,9 @@ "multiscale_spark_worker_cores": 20, "multiscale_spark_gb_per_core": 5, + "dapi_channel": "c1", + "bleeding_channel": "c0", + "lsf_opts": "-P scicompsoft", "cellpose_worker_lsf_opts": "-q gpu_l4 -gpu 'num=1'" } diff --git a/modules/local/post_rs_fish/main.nf b/modules/local/post_rs_fish/main.nf index 1ae36269..9a38ed60 100644 --- a/modules/local/post_rs_fish/main.nf +++ b/modules/local/post_rs_fish/main.nf @@ -49,5 +49,4 @@ process POST_RS_FISH { post-rs-fish: v1 END_VERSIONS """ - } diff --git a/modules/local/spots/counts/main.nf b/modules/local/spots/counts/main.nf new file mode 100644 index 00000000..a3473cb7 --- /dev/null +++ b/modules/local/spots/counts/main.nf @@ -0,0 +1,60 @@ +process SPOTS_COUNTS { + tag { meta.id } + container { task.ext.container ?: 'ghcr.io/janeliascicomp/easifish-spots-utils:v1' } + cpus { ncpus } + memory { "${mem_in_gb}GB" } + + input: + tuple val(meta), + path(input_image_path, stageAs: 'image/*'), + val(input_dataset), + path(labels_path, stageAs: 'labels/*'), + val(labels_dataset), + path(spots_input_dir, stageAs: 'spots/*'), + val(input_pattern), + path(output_dir, stageAs: 'output/*') + val(ncpus) + val(mem_in_gb) + + output: + tuple val(meta), + env(full_input_image_path), + val(input_dataset), + env(full_spots_input_dir), + env(output_csv_file), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + full_input_image_path=\$(readlink -e ${input_image_path}) + full_labels_path=\$(readlink -e ${labels_path}) + full_spots_input_dir=\$(readlink -e ${spots_input_dir}) + full_output_dir=\$(readlink ${output_dir}) + + mkdir -p \${full_output_dir} + + output_csv_file="\${full_output_dir}/count.csv" + + CMD=( + python + /opt/scripts/spots-utils/labeled-spots-counts.py + --image-container \${full_input_image_path} + --image-subpath ${input_dataset} + --labels-container \${full_labels_path} + --labels-subpath ${labels_dataset} + --spots-pattern \"\${full_spots_input_dir}/${input_pattern}\" + --output \${output_csv_file} + ) + echo "CMD: \${CMD[@]}" + (exec "\${CMD[@]}") + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + assign-spots: v1 + END_VERSIONS + """ + +} diff --git a/modules/local/spots/props/main.nf b/modules/local/spots/props/main.nf new file mode 100644 index 00000000..2779d342 --- /dev/null +++ b/modules/local/spots/props/main.nf @@ -0,0 +1,62 @@ +process SPOTS_PROPS { + tag { meta.id } + container { task.ext.container ?: 'ghcr.io/janeliascicomp/easifish-spots-utils:v1' } + cpus { ncpus } + memory { "${mem_in_gb}GB" } + + input: + tuple val(meta), + path(input_image_path, stageAs: 'image/*'), + val(input_dataset), + path(labels_path, stageAs: 'labels/*'), + val(labels_dataset), + val(dapi_dataset), + val(bleed_dataset), + path(output_dir, stageAs: 'output/*'), + val(output_name) + val(ncpus) + val(mem_in_gb) + + output: + tuple val(meta), + env(full_input_image_path), + val(input_dataset), + env(output_csv_file) , emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + dapi_dataset_arg = dapi_dataset ? "--dapi-subpath ${dapi_dataset}" : "" + bleed_dataset_arg = bleed_dataset ? "--bleed-subpath ${bleed_dataset}" : "" + """ + full_input_image_path=\$(readlink -e ${input_image_path}) + full_labels_path=\$(readlink -e ${labels_path}) + full_output_dir=\$(readlink ${output_dir}) + + mkdir -p \${full_output_dir} + + output_csv_file="\${full_output_dir}/${output_name}" + + CMD=( + python + /opt/scripts/spots-utils/labeled-spots-props.py + --image-container \${full_input_image_path} + --image-subpath ${input_dataset} + --labels-container \${full_labels_path} + --labels-subpath ${labels_dataset} + ${dapi_dataset_arg} + ${bleed_dataset_arg} + --output \${output_csv_file} + ) + echo "CMD: \${CMD[@]}" + (exec "\${CMD[@]}") + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + measure-spots: v1 + END_VERSIONS + """ + +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 7658f2a0..ec57a315 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -514,6 +514,166 @@ }, "help_text": "Registration parameters" }, + "spot_extraction": { + "title": "Spot extraction", + "type": "object", + "description": "Spot extraction options", + "help_text": "Spot extraction arguments", + "fa_icon": "far fa-map", + "properties": { + "skip_spot_extraction": { + "type": "boolean", + "description": "If set skip spot extraction" + }, + "spot_extraction_subdir": { + "type": "string", + "fa_icon": "fas fa-folder-open", + "description": "Spot extraction results sub-directory" + }, + "spot_extraction_ids": { + "type": "string", + "description": "rounds used for extracting spots" + }, + "spot_channels": { + "type": "string", + "description": "Comma delimited list of spot channels. This together with spot_scales is used for determining the datasets used for spot extraction." + }, + "spot_scales": { + "type": "string", + "description": "image scale used for spot extraction. This together with spot_channels is used for determining the datasets used for spot extraction." + }, + "spot_subpaths": { + "type": "string", + "description": "alternative for defining the datasets used for spot extraction. If this is defined it takes priority over spot_channels and spot_scales." + }, + "rsfish_min_intensity": { + "type": "integer", + "description": "Minimal intensity of the image. Default: 0", + "fa_icon": "fas fa-angle-double-down" + }, + "rsfish_max_intensity": { + "type": "integer", + "description": "Maximal intensity of the image. Default: 4096", + "fa_icon": "fas fa-angle-double-up" + }, + "rsfish_anisotropy": { + "type": "number", + "description": "The anisotropy factor. Default: 0.7", + "help_text": "Scaling of z relative to xy. Can be determined using the RS-FISH anisotropy plugin in Fiji.", + "fa_icon": "fas fa-arrows-alt" + }, + "rsfish_sigma": { + "type": "number", + "description": "Sigma value for Difference-of-Gaussian (DoG) calculation. Default 1.5", + "fa_icon": "fab fa-etsy" + }, + "rsfish_threshold": { + "type": "number", + "description": "Threshold value for Difference-of-Gaussian (DoG) calculation. Default: 0.007", + "fa_icon": "fas fa-level-down-alt" + }, + "rsfish_background": { + "type": "integer", + "description": "Background subtraction method, 0 == None, 1 == Mean, 2==Median, 3==RANSAC on Mean, 4==RANSAC on Median. Default: 0 (None)", + "fa_icon": "fas fa-level-down-alt" + }, + "rsfish_intensity": { + "type": "integer", + "description": "Intensity calculation method, 0 == Linear Interpolation, 1 == Gaussian fit (on inlier pixels), 2 == Integrate spot intensities (on candidate pixels). Default: 0 (Linear Interpolation)", + "fa_icon": "fas fa-level-down-alt" + }, + "rsfish_spark_workers": { + "type": "integer", + "fa_icon": "fas fa-cogs", + "description": "Number of Spark workers to use for spot extraction.", + "default": 2 + }, + "rsfish_min_spark_workers": { + "type": "integer", + "fa_icon": "fas fa-cogs", + "description": "Minimum number of Spark workers needed to start spot extraction pipeline.", + "default": 1 + }, + "rsfish_spark_worker_cores": { + "type": "integer", + "fa_icon": "fas fa-microchip", + "description": "Number of cores allocated to each Spark worker.", + "default": 5 + }, + "rsfish_spark_gb_per_core": { + "type": "integer", + "fa_icon": "fas fa-cog", + "description": "Size of memory (in GB) that is allocated for each core of a Spark worker.", + "help_text": "The total memory usage for extracting spots from one acquisition will be workers * worker_cores * gb_per_core.", + "default": 5 + }, + "rsfish_spark_driver_cores": { + "type": "integer", + "fa_icon": "fas fa-microchip", + "description": "Number of cores allocated for the Spark driver. Default: 1", + "default": 1 + }, + "rsfish_spark_driver_mem_gb": { + "type": "integer", + "fa_icon": "fas fa-memory", + "description": "Amount of memory to allocate for the Spark driver. Default: 12g", + "default": 4 + } + } + }, + "spots_features": { + "title": "Spots stats and characteristics", + "type": "object", + "description": "Various spots characteristcs, such as counts, area, intensity", + "fa_icon": "far fa-map", + "properties": { + "spots_counts_subdir": { + "type": "string", + "fa_icon": "fas fa-folder-open", + "description": "data sub-directory used for spots counts" + }, + "spots_props_subdir": { + "type": "string", + "fa_icon": "fas fa-folder-open", + "description": "data sub-directory used for spots properties" + }, + "dapi_channel": { + "type": "string", + "fa_icon": "fas fa-asterisk", + "description": "Name of the DAPI channel.", + "help_text": "The DAPI channel is used as a reference channel for registration, segmentation, and spot extraction." + }, + "bleed_channel": { + "type": "string", + "fa_icon": "fas fa-asterisk", + "description": "Channel (other than DAPI) that needs bleedthrough correction." + }, + "spots_counts_cores": { + "type": "integer", + "fa_icon": "fas fa-microchip", + "description": "Number of cores needed for getting spots counts.", + "default": 1 + }, + "spots_counts_mem_gb": { + "type": "integer", + "fa_icon": "fas fa-memory", + "description": "Amount of memory needed for getting spots counts", + "default": 2 + }, + "spots_props_cores": { + "type": "integer", + "fa_icon": "fas fa-microchip", + "description": "Number of cores needed for getting spots properties (intensity).", + "default": 1 + }, + "spots_props_mem_gb": { + "type": "integer", + "fa_icon": "fas fa-memory", + "description": "Amount of memory needed for getting spots properties (intensity)", + "default": 2 + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -1030,6 +1190,12 @@ { "$ref": "#/definitions/segmentation" }, + { + "$ref": "#/definitions/spot_extraction" + }, + { + "$ref": "#/definitions/spots_features" + }, { "$ref": "#/definitions/generic_options" } diff --git a/workflows/easifish.nf b/workflows/easifish.nf index c83d798e..f3dd02c0 100644 --- a/workflows/easifish.nf +++ b/workflows/easifish.nf @@ -19,12 +19,14 @@ include { */ -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { STITCHING } from '../subworkflows/local/stitching' -include { REGISTRATION } from './registration' -include { SEGMENTATION } from './segmentation' -include { SPOT_EXTRACTION } from './spot_extraction' -include { WARP_SPOTS } from './warp_spots' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { STITCHING } from '../subworkflows/local/stitching' +include { REGISTRATION } from './registration' +include { SEGMENTATION } from './segmentation' +include { SPOT_EXTRACTION } from './spot_extraction' +include { WARP_SPOTS } from './warp_spots' +include { SPOTS_STATS } from './spots_features' +include { EXTRACT_SPOTS_PROPS } from './spots_features' def validate_params() { @@ -160,14 +162,32 @@ workflow EASIFISH { spot_extraction_results.subscribe { log.debug "Spot extraction result: $it " } - WARP_SPOTS( + def warped_spots_results = WARP_SPOTS( registration_results, spot_extraction_results, outdir, + ) // final_spot_results includes spots for fixed and warped spots from the moving rounds + + warped_spots_results.subscribe { log.debug "Warped spots results: $it " } + + def spots_stats_results = SPOTS_STATS( + warped_spots_results, + segmentation_results, + outdir, ) + spots_stats_results.subscribe { log.debug "Spots stats: $it " } + + def spots_props = EXTRACT_SPOTS_PROPS( + registration_results, + segmentation_results, + outdir, + ) + + spots_props.subscribe { log.debug "Spots props: $it " } } + workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) diff --git a/workflows/spots_features.nf b/workflows/spots_features.nf new file mode 100644 index 00000000..1ab6ab5a --- /dev/null +++ b/workflows/spots_features.nf @@ -0,0 +1,258 @@ +include { SPOTS_PROPS } from '../modules/local/spots/props/main' +include { SPOTS_COUNTS } from '../modules/local/spots/counts/main' +include { as_list } from './util_functions' + +workflow SPOTS_STATS { + take: + ch_spots // channel: [ meta_spots, meta_reg, spots_input_image, spots_input_dataset, spots, warped_spots ] + ch_segmentation // channel: [ meta, seg_input_image, seg_input_dataset, seg_labels ] + outputdir + + main: + def ch_segmentation_with_id = ch_segmentation + | map { + def (meta, seg_input_image, seg_input_dataset, seg_labels) = it + def id = meta.id + def r = [ id, meta, seg_input_image, seg_input_dataset, seg_labels ] + log.debug "Segmentation data for spots sizes: $r" + r + } + + def spots_counts_input = ch_spots + | filter { + def (meta_spots, meta_reg, spots_input_image, spots_input_dataset, source_spots, final_spots) = it + if (!final_spots) { + log.debug "Filter out spots input for: $it" + return false + } + return true + } + | map { + def (meta_spots, meta_reg, spots_input_image, spots_input_dataset, source_spots, final_spots) = it + def id = meta_reg.fix_id + def r = [ id, meta_spots, spots_input_image, spots_input_dataset, source_spots, final_spots ] + log.debug "Spots data for spots sizes: $r" + r + } + | groupTuple(by: 0) + | join(ch_segmentation_with_id, by: 0) + | flatMap { + def (id, + all_meta_spots, + all_spots_image_containers, all_spots_datasets, + all_source_spots, all_final_spots, + meta_seg, + seg_input_image, + seg_input_dataset, + seg_labels) = it + + [all_meta_spots, + all_spots_image_containers, + all_spots_datasets, + all_source_spots, + all_final_spots].transpose() + .collect { + def (meta_spots, spots_image_container, spots_dataset, source_spots, final_spots) = it + def spots_dir = file(final_spots).parent + def spots_counts_output_dir = file("${outputdir}/${params.spots_counts_subdir}/${meta_spots.id}") + def adjusted_spots_dataset = sync_image_scale_with_labels_scale(spots_dataset, seg_input_dataset) + + def r = [ + meta_spots, + spots_image_container, adjusted_spots_dataset, + seg_labels, seg_input_dataset, + spots_dir, + '*coord.csv', + spots_counts_output_dir, + ] + log.debug "Spots sizes input: $r" + r + } + } + + def spots_counts_outputs = SPOTS_COUNTS( + spots_counts_input, + params.spots_counts_cores, + params.spots_counts_mem_gb, + ) + + emit: + done = spots_counts_outputs.results +} + +workflow EXTRACT_SPOTS_PROPS { + take: + ch_registration + ch_segmentation + outdir + + main: + def registered_images = ch_registration + | map { + def (reg_meta, + fix, fix_subpath, + mov, mov_subpath, + warped, warped_subpath, + transform_output, + transform_name, transform_subpath, + inv_transform_output, + inv_transform_name, inv_transform_subpath) = it + // include the registered round id + // so that it can be used for properly creating the dataset + [ reg_meta.fix_id, reg_meta.mov_id, warped, warped_subpath ] + } + + def fixed_images = ch_registration + | map { + def (reg_meta, + fix, fix_subpath, + mov, mov_subpath, + warped, warped_subpath, + transform_output, + transform_name, transform_subpath, + inv_transform_output, + inv_transform_name, inv_transform_subpath) = it + // the fixed round id is included + // to make the result compatible with the one for moving rounds + [ reg_meta.fix_id, reg_meta.fix_id, fix, fix_subpath ] + } + | unique { it[0] } // unique by id + + def ch_segmentation_with_id = ch_segmentation + | map { + def (meta, seg_input_image, seg_input_dataset, seg_labels) = it + def id = meta.id + def r = [ id, meta, seg_input_image, seg_input_dataset, seg_labels ] + log.debug "Segmentation data for regionprops: $r" + r + } + + def regionprops_inputs = fixed_images + | concat(registered_images) + | flatMap { + def (join_id, image_id, image_container, image_dataset) = it + log.debug "Images for regionprops: $it" + get_spot_subpaths(image_id).collect { subpath, result_name -> + def r = [ + join_id, + image_id, + image_container, + subpath, + result_name, + ] + log.debug "Image for regionprops: $r" + r + } + } + | combine(ch_segmentation_with_id, by: 0) + | map { + def (join_id, + image_id, image_container, image_dataset, result_name, + meta, + seg_input_image, seg_input_dataset, seg_labels) = it + log.debug "Combined cell images with segmentation: $it" + + def regionprops_output_dir = file("${outdir}/${params.spots_props_subdir}/${image_id}") + def adjusted_image_dataset = sync_image_scale_with_labels_scale(image_dataset, seg_input_dataset) + + def dapi_dataset = params.dapi_channel + ? change_dataset_channel(adjusted_image_dataset, params.dapi_channel) + : '' + def bleeding_dataset = params.bleeding_channel + ? change_dataset_channel(adjusted_image_dataset, params.bleeding_channel) + : '' + + def r = [ + meta, + image_container, adjusted_image_dataset, + seg_labels, seg_input_dataset, + dapi_dataset, + bleeding_dataset, + regionprops_output_dir, + result_name, + ] + log.debug "Cell regionprops input: $r" + r + } + + def spots_props_results = SPOTS_PROPS( + regionprops_inputs, + params.spots_props_cores, + params.spots_props_mem_gb, + ) + + emit: + done = spots_props_results.results +} + + +def sync_image_scale_with_labels_scale(image_dataset, labels_dataset) { + def image_dataset_comps = image_dataset.split('/') + def labels_dataset_comps = labels_dataset.split('/') + if (labels_dataset_comps && labels_dataset_comps[-1] != image_dataset[-1]) { + log.debug "Use labels scale for input image: ${labels_dataset_comps[-1]}" + image_dataset_comps[-1] = labels_dataset_comps[-1] + } + return image_dataset_comps.join('/') +} + +def get_spot_subpaths(id) { + if (!params.spot_subpaths && !params.spot_channels && !params.spot_scales) { + return [ + ['', ''], // empty subpath, empty resultnane - the input image container contains the array dataset + ] + } else if (params.spot_subpaths) { + // in this case the subpaths parameters must match exactly the container datasets + return as_list(params.spot_subpaths) + .collect { subpath -> + def spot_ch = get_dataset_channel(subpath) + [ + "${id}/${subpath}", + "${id}-${spot_ch}-props.csv", + ] + } + } else { + def spot_channels; + if (params.spot_channels) { + spot_channels = as_list(params.spot_channels) + log.debug "Use specified spot channels: $spot_channels" + } else { + // all but the last channel which typically is DAPI + def all_channels = as_list(params.channels) + if (params.dapi_channel) { + spot_channels = all_channels.findAll { it != params.dapi_channel } + } else { + // automatically consider DAPI the last channel + // this may throw an exception if the channel list is empty or a singleton + spot_channels = all_channels[0..-2] // all but the last channel + } + log.debug "Spot channels: $spot_channels (all from ${params.channels} except the last one)" + } + def spot_scales = as_list(params.spot_scales) + + return [spot_channels, spot_scales].combinations() + .collect { ch, scale -> + // when channel and scale is used we also prepend the stitched dataset + def dataset = "${ch}/${scale}" + def r = [ + "${id}/${dataset}", + "${id}-${ch}-props.csv", + ] + log.debug "Spot dataset: $r" + r + } + } +} + +def change_dataset_channel(image_dataset, channel) { + def image_dataset_comps = image_dataset.split('/') + if (image_dataset_comps) { + image_dataset_comps[-2] = channel + } + return image_dataset_comps.join('/') +} + +def get_dataset_channel(image_dataset) { + def image_dataset_comps = image_dataset.split('/') + return image_dataset_comps ? image_dataset_comps[-2] : '' +}