From ce31ec79d0b1402cce7683c14cf57d07f41b01e4 Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Thu, 12 Mar 2026 13:32:29 +0000 Subject: [PATCH 01/46] replace upload with ena-webin-handler --- conf/modules.config | 9 +++ modules/local/ena_webin_cli_download/main.nf | 23 +++++++ modules/local/ena_webin_cli_wrapper/main.nf | 43 +++++++++++++ modules/local/genome_upload/main.nf | 23 ++++--- nextflow.config | 5 ++ nextflow_schema.json | 6 ++ workflows/genomesubmit.nf | 68 +++++++++++++------- 7 files changed, 143 insertions(+), 34 deletions(-) create mode 100644 modules/local/ena_webin_cli_download/main.nf create mode 100644 modules/local/ena_webin_cli_wrapper/main.nf diff --git a/conf/modules.config b/conf/modules.config index 2dcf97c..7ab6666 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -43,6 +43,15 @@ process { ] } + withName: 'ENA_WEBIN_CLI_WRAPPER' { + ext.args = { params.mode == 'mags' || params.mode == 'bins' || params.mode == 'metagenomic_assemblies' ? "--context genome": "--context reads"} + publishDir = [ + path: { "${params.outdir}/${params.mode}/upload/webin_cli" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/modules/local/ena_webin_cli_download/main.nf b/modules/local/ena_webin_cli_download/main.nf new file mode 100644 index 0000000..67fb118 --- /dev/null +++ b/modules/local/ena_webin_cli_download/main.nf @@ -0,0 +1,23 @@ +process ENA_WEBIN_CLI_DOWNLOAD { + label 'process_single' + + input: + tuple val(version) + + output: + tuple path("webin-cli-*.jar"), emit: webin_cli_jar + + when: + task.ext.when == null || task.ext.when + + script: + + """ + wget https://github.com/enasequence/webin-cli/releases/download/${version}/webin-cli-${version}.jar + """ + + stub: + """ + touch webin-cli-stub.jar + """ +} diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf new file mode 100644 index 0000000..a3de9fa --- /dev/null +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -0,0 +1,43 @@ +/* + * ena-webin-cli wrapper script that runs ena-webin-cli and handles errors +*/ +process ENA_WEBIN_CLI_WRAPPER { + + label 'process_low' + tag "${meta.id}" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mgnify-pipelines-toolkit:1.4.18--pyhdfd78af_0': + 'quay.io/biocontainers/mgnify-pipelines-toolkit:1.4.18--pyhdfd78af_0' }" + + input: + tuple val(meta), path(submission_item), path(manifest) + path(webin_cli_jar) + + output: + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def mode = params.test_upload ? "--test" : "" + def submit_or_validate = params.webincli_submit ? "--mode submit": "--mode validate" + + """ + # change FASTA path in manifest to current workdir + export ITEM_FULL_PATH=\$(readlink -f ${submission_item}) + sed 's|^FASTA\t.*|FASTA\t'"\${ITEM_FULL_PATH}"'|g' ${manifest} > ${prefix}_updated_manifest.manifest + + webin_cli_handler \\ + -m ${prefix}_updated_manifest.manifest \\ + --webin-cli-jar ${webin_cli_jar} \\ + ${submit_or_validate} \\ + ${mode} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + biopython: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('biopython').version)") + END_VERSIONS + """ +} diff --git a/modules/local/genome_upload/main.nf b/modules/local/genome_upload/main.nf index f8bf1a5..f91d369 100644 --- a/modules/local/genome_upload/main.nf +++ b/modules/local/genome_upload/main.nf @@ -4,9 +4,6 @@ process GENOME_UPLOAD { container "quay.io/biocontainers/genome-uploader:2.5.1--pyhdfd78af_1" - secret 'WEBIN_ACCOUNT' - secret 'WEBIN_PASSWORD' - input: path(mags) path(table_for_upload) @@ -18,7 +15,7 @@ process GENOME_UPLOAD { path "results/{MAG,bin}_upload/genome_samples.xml" , emit: upload_genome_samples path "results/{MAG,bin}_upload/registered_{MAGs,bins}*.tsv", emit: upload_registered_mags path "results/{MAG,bin}_upload/submission.xml" , emit: upload_submission_xml - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('genome_uploader'), eval("genome_upload --version 2>&1 | sed 's/genome_uploader //g'"), topic: versions, emit: versions_genome_uploader when: task.ext.when == null || task.ext.when @@ -30,8 +27,8 @@ process GENOME_UPLOAD { def mode = (!params.test_upload) ? "--live" : "" """ - export ENA_WEBIN=\$WEBIN_ACCOUNT - export ENA_WEBIN_PASSWORD=\$WEBIN_PASSWORD + echo ${ENA_WEBIN} + echo ${ENA_WEBIN_PASSWORD} genome_upload \\ -u $params.submission_study \\ @@ -43,10 +40,16 @@ process GENOME_UPLOAD { ${mode} \\ --out results \\ ${args} + """ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - genome_uploader: \$(genome_upload --version 2>&1 | sed 's/genome_uploader //g') - END_VERSIONS + stub: + """ + mkdir results/MAG_upload + touch results/MAG_upload/ENA_backup.json + touch results/MAG_upload/genome_samples.xml + touch results/MAG_upload/submission.xml + touch results/MAG_upload/registered_MAGs_test.tsv + mkdir results/MAG_upload/manifests_test + touch results/MAG_upload/manifests_test/test_1.manifest """ } diff --git a/nextflow.config b/nextflow.config index 0464317..73c9299 100644 --- a/nextflow.config +++ b/nextflow.config @@ -5,6 +5,10 @@ Default config options for all compute environments ---------------------------------------------------------------------------------------- */ +env { + ENA_WEBIN = env('ENA_WEBIN') ?: secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = env('ENA_WEBIN_PASSWORD') ?: secrets.ENA_WEBIN_PASSWORD +} // Global default params, used in configs params { @@ -24,6 +28,7 @@ params { upload_force = true test_upload = true webincli_submit = true + webin_cli_version = "9.0.3" // rna detection rrna_limit = 80 diff --git a/nextflow_schema.json b/nextflow_schema.json index c8eac29..0dcf33c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -297,6 +297,12 @@ "description": "Submit or validate", "default": true, "help": "Flag to run submission or validation. Submission (true) will run upload of data with ena-webin-cli. Validation (false) validates correctness of input files, it does not do submission. Default: true (submit)" + }, + "webin_cli_version": { + "type": "string", + "description": "Version of webon-cli.jar to use for submission", + "default": "9.0.3", + "help": "Check version https://github.com/enasequence/webin-cli" } } } diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 2e7ae0a..a1cdbfa 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -3,19 +3,21 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GENOME_UPLOAD } from '../modules/local/genome_upload' -include { ENA_WEBIN_CLI } from '../modules/local/ena_webin_cli' +// TODO rename when we will have register_study module separately +include { GENOME_UPLOAD as REGISTER_STUDY_AND_CREATE_MANIFESTS } from '../modules/local/genome_upload' +include { ENA_WEBIN_CLI_WRAPPER as SUBMIT } from '../modules/local/ena_webin_cli_wrapper' +include { ENA_WEBIN_CLI_DOWNLOAD } from '../modules/local/ena_webin_cli_download' -include { COVERM_GENOME } from '../modules/nf-core/coverm/genome' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' +include { COVERM_GENOME } from '../modules/nf-core/coverm/genome' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' -include { GENOME_EVALUATION } from '../subworkflows/local/genome_evaluation' -include { RNA_DETECTION } from '../subworkflows/local/rna_detection' +include { GENOME_EVALUATION } from '../subworkflows/local/genome_evaluation' +include { RNA_DETECTION } from '../subworkflows/local/rna_detection' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -186,22 +188,40 @@ workflow GENOMESUBMIT { newLine: true ) - //GENOME_UPLOAD( - // genome_fasta.map{meta, fasta -> fasta}.collect(), - // genome_metadata_csv, - // params.mode - //) - //ch_versions = ch_versions.mix( GENOME_UPLOAD.out.versions ) + // --------- Generate manifests + REGISTER_STUDY_AND_CREATE_MANIFESTS( + fasta_updated_with_stats.map{meta, fasta -> fasta}.collect(), + genome_metadata_csv, + params.mode // mags or bins + ) + + // All manifests were generated in one run + // Manifests should be saparated into differen channels using prefix as id + manifests_ch = REGISTER_STUDY_AND_CREATE_MANIFESTS.out.manifests.flatten() + .map { manifest -> + def prefix = manifest.name.replaceAll(/_\d+\.manifest$/, '') + def meta = [id: prefix] + [ meta, manifest ] + } + // Combine fasta and manifests + ch_combined = fasta_updated_with_stats + .map { meta, fasta -> [meta.id, meta, fasta] } + .join( + manifests_ch.map { meta, manifest -> [meta.id, manifest] } // Has only [id: prefix] + ) + .map { id, full_meta, fasta, manifest -> + [full_meta, fasta, manifest] + } - //manifests_ch = GENOME_UPLOAD.out.manifests.flatten() - // .map { manifest -> - // def prefix = manifest.name.replaceAll(/_\d+\.manifest$/, '') - // def meta = [id: prefix] - // [ meta, manifest ] - //} - //combined_ch = ch_mags.join(manifests_ch) + // --------- Upload data to ENA + ENA_WEBIN_CLI_DOWNLOAD ( + params.webin_cli_version + ) - //ENA_WEBIN_CLI( combined_ch ) + SUBMIT ( + ch_combined, + ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar + ) //ch_versions = ch_versions.mix( ENA_WEBIN_CLI.out.versions.first() ) // From 952ab91883e8845beb74805d234ae768e19fc4d3 Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Thu, 12 Mar 2026 14:19:36 +0000 Subject: [PATCH 02/46] additions --- README.md | 4 +- .../ena_webin_cli_download/environment.yml | 6 +++ modules/local/ena_webin_cli_download/main.nf | 4 +- modules/local/ena_webin_cli_download/meta.yml | 35 +++++++++++++ .../ena_webin_cli_download/tests/main.nf.test | 51 +++++++++++++++++++ .../ena_webin_cli_wrapper/environment.yml | 7 +++ modules/local/ena_webin_cli_wrapper/main.nf | 8 +-- modules/local/ena_webin_cli_wrapper/meta.yml | 48 +++++++++++++++++ .../ena_webin_cli_wrapper/nextflow.config | 16 ++++++ .../nextflow.config | 4 +- modules/local/genome_upload/nextflow.config | 4 ++ nextflow.config | 5 -- 12 files changed, 178 insertions(+), 14 deletions(-) create mode 100644 modules/local/ena_webin_cli_download/environment.yml create mode 100644 modules/local/ena_webin_cli_download/meta.yml create mode 100644 modules/local/ena_webin_cli_download/tests/main.nf.test create mode 100644 modules/local/ena_webin_cli_wrapper/environment.yml create mode 100644 modules/local/ena_webin_cli_wrapper/meta.yml create mode 100644 modules/local/ena_webin_cli_wrapper/nextflow.config create mode 100644 modules/local/genome_upload/nextflow.config diff --git a/README.md b/README.md index 22f9661..5a8ab9e 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ Currently, the pipeline supports three submission modes, each routed to a dedica Setup your environment secrets before running the pipeline: -`nextflow secrets set WEBIN_ACCOUNT "Webin-XXX"` +`nextflow secrets set ENA_WEBIN "Webin-XXX"` -`nextflow secrets set WEBIN_PASSWORD "XXX"` +`nextflow secrets set ENA_WEBIN_PASSWORD "XXX"` Make sure you update commands above with your authorised credentials. diff --git a/modules/local/ena_webin_cli_download/environment.yml b/modules/local/ena_webin_cli_download/environment.yml new file mode 100644 index 0000000..61dc7ba --- /dev/null +++ b/modules/local/ena_webin_cli_download/environment.yml @@ -0,0 +1,6 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - "conda-forge::wget" diff --git a/modules/local/ena_webin_cli_download/main.nf b/modules/local/ena_webin_cli_download/main.nf index 67fb118..b156b58 100644 --- a/modules/local/ena_webin_cli_download/main.nf +++ b/modules/local/ena_webin_cli_download/main.nf @@ -2,10 +2,10 @@ process ENA_WEBIN_CLI_DOWNLOAD { label 'process_single' input: - tuple val(version) + val(version) output: - tuple path("webin-cli-*.jar"), emit: webin_cli_jar + path("webin-cli-*.jar"), emit: webin_cli_jar when: task.ext.when == null || task.ext.when diff --git a/modules/local/ena_webin_cli_download/meta.yml b/modules/local/ena_webin_cli_download/meta.yml new file mode 100644 index 0000000..99f949b --- /dev/null +++ b/modules/local/ena_webin_cli_download/meta.yml @@ -0,0 +1,35 @@ +name: "ena_webin_cli_download" +description: Downloads the ENA Webin-CLI JAR file from GitHub releases. +keywords: + - ena + - webin + - submission + - download +tools: + - "wget": + description: "A free utility for non-interactive download of files from the web." + homepage: "https://www.gnu.org/software/wget/" + documentation: "https://www.gnu.org/software/wget/manual/" + licence: ["GPL-3.0-or-later"] + identifier: null + +input: + - - version: + type: value + description: | + Version of the Webin-CLI JAR to download. + Example: "7.4.1" + +output: + webin_cli_jar: + - - "webin-cli-*.jar": + type: file + description: The downloaded Webin-CLI JAR file. + pattern: "webin-cli-*.jar" + +authors: + - "@KateSakharova" + - "@ochkalova" +maintainers: + - "@KateSakharova" + - "@ochkalova" diff --git a/modules/local/ena_webin_cli_download/tests/main.nf.test b/modules/local/ena_webin_cli_download/tests/main.nf.test new file mode 100644 index 0000000..888d9c6 --- /dev/null +++ b/modules/local/ena_webin_cli_download/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process ENA_WEBIN_CLI_DOWNLOAD" + script "../main.nf" + process "ENA_WEBIN_CLI_DOWNLOAD" + + tag "modules" + tag "ena_webin_cli_download" + + test("ENA_WEBIN_CLI_DOWNLOAD - downloads webin-cli jar") { + + when { + process { + """ + input[0] = "9.0.3" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.webin_cli_jar.size() == 1 }, + { assert process.out.webin_cli_jar[0].toString().endsWith(".jar") } + ) + } + + } + + test("ENA_WEBIN_CLI_DOWNLOAD - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "9.0.3" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/local/ena_webin_cli_wrapper/environment.yml b/modules/local/ena_webin_cli_wrapper/environment.yml new file mode 100644 index 0000000..05f2127 --- /dev/null +++ b/modules/local/ena_webin_cli_wrapper/environment.yml @@ -0,0 +1,7 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mgnify-pipelines-toolkit" + - "conda-forge::openjdk" diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index a3de9fa..8ba38f6 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -5,9 +5,7 @@ process ENA_WEBIN_CLI_WRAPPER { label 'process_low' tag "${meta.id}" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mgnify-pipelines-toolkit:1.4.18--pyhdfd78af_0': - 'quay.io/biocontainers/mgnify-pipelines-toolkit:1.4.18--pyhdfd78af_0' }" + container "community.wave.seqera.io/library/ena-webin-cli_mgnify-pipelines-toolkit:49478611e9515066" input: tuple val(meta), path(submission_item), path(manifest) @@ -27,6 +25,10 @@ process ENA_WEBIN_CLI_WRAPPER { export ITEM_FULL_PATH=\$(readlink -f ${submission_item}) sed 's|^FASTA\t.*|FASTA\t'"\${ITEM_FULL_PATH}"'|g' ${manifest} > ${prefix}_updated_manifest.manifest + + echo ${ENA_WEBIN} + echo ${ENA_WEBIN_PASSWORD} + webin_cli_handler \\ -m ${prefix}_updated_manifest.manifest \\ --webin-cli-jar ${webin_cli_jar} \\ diff --git a/modules/local/ena_webin_cli_wrapper/meta.yml b/modules/local/ena_webin_cli_wrapper/meta.yml new file mode 100644 index 0000000..0469709 --- /dev/null +++ b/modules/local/ena_webin_cli_wrapper/meta.yml @@ -0,0 +1,48 @@ +name: "ena_webin_cli_wrapper" +description: Runs ENA Webin-CLI to validate or submit assemblies, using the webin_cli_handler script from mgnify-pipelines-toolkit. +keywords: + - ena + - webin + - submission + - assembly +tools: + - "mgnify-pipelines-toolkit": + description: "A toolkit of utilities for MGnify pipelines, including webin_cli_handler for robust Webin-CLI submission." + homepage: "https://github.com/EBI-Metagenomics/mgnify-pipelines-toolkit" + documentation: "https://github.com/EBI-Metagenomics/mgnify-pipelines-toolkit" + licence: ["Apache-2.0"] + identifier: null + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1' ]` + - submission_item: + type: file + description: | + FASTA file for submission (assembly, MAG, or bin). + - manifest: + type: file + description: | + Webin-CLI submission manifest file. + - - webin_cli_jar: + type: file + description: | + The Webin-CLI JAR file downloaded by ena_webin_cli_download. + pattern: "webin-cli-*.jar" + +output: + versions: + - "versions.yml": + type: file + description: File containing software versions. + pattern: "versions.yml" + +authors: + - "@KateSakharova" + - "@ochkalova" +maintainers: + - "@KateSakharova" + - "@ochkalova" diff --git a/modules/local/ena_webin_cli_wrapper/nextflow.config b/modules/local/ena_webin_cli_wrapper/nextflow.config new file mode 100644 index 0000000..b2fb248 --- /dev/null +++ b/modules/local/ena_webin_cli_wrapper/nextflow.config @@ -0,0 +1,16 @@ +params { + // Use ENA test server and validate only (no actual submission) + test_upload = true + webincli_submit = false +} + +process { + withName: ENA_WEBIN_CLI_WRAPPER { + ext.args = "" + } +} + +env { + ENA_WEBIN = secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = secrets.ENA_WEBIN_PASSWORD +} diff --git a/modules/local/generate_assembly_manifest/nextflow.config b/modules/local/generate_assembly_manifest/nextflow.config index 9a4037e..495e622 100644 --- a/modules/local/generate_assembly_manifest/nextflow.config +++ b/modules/local/generate_assembly_manifest/nextflow.config @@ -4,6 +4,6 @@ process { } } env { - ENA_WEBIN = secrets.WEBIN_ACCOUNT - ENA_WEBIN_PASSWORD = secrets.WEBIN_PASSWORD + ENA_WEBIN = secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = secrets.ENA_WEBIN_PASSWORD } diff --git a/modules/local/genome_upload/nextflow.config b/modules/local/genome_upload/nextflow.config new file mode 100644 index 0000000..9110b4b --- /dev/null +++ b/modules/local/genome_upload/nextflow.config @@ -0,0 +1,4 @@ +env { + ENA_WEBIN = secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = secrets.ENA_WEBIN_PASSWORD +} diff --git a/nextflow.config b/nextflow.config index 73c9299..b1e083f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -5,11 +5,6 @@ Default config options for all compute environments ---------------------------------------------------------------------------------------- */ -env { - ENA_WEBIN = env('ENA_WEBIN') ?: secrets.ENA_WEBIN - ENA_WEBIN_PASSWORD = env('ENA_WEBIN_PASSWORD') ?: secrets.ENA_WEBIN_PASSWORD -} - // Global default params, used in configs params { From e2709e1ed39f2f718edbb86357f54ec9edac09ed Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Thu, 12 Mar 2026 16:56:31 +0000 Subject: [PATCH 03/46] add condition for manifest parsing --- modules/local/ena_webin_cli_wrapper/main.nf | 4 --- nextflow.config | 4 +-- workflows/genomesubmit.nf | 32 +++++++++++---------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index 8ba38f6..e675cc8 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -25,10 +25,6 @@ process ENA_WEBIN_CLI_WRAPPER { export ITEM_FULL_PATH=\$(readlink -f ${submission_item}) sed 's|^FASTA\t.*|FASTA\t'"\${ITEM_FULL_PATH}"'|g' ${manifest} > ${prefix}_updated_manifest.manifest - - echo ${ENA_WEBIN} - echo ${ENA_WEBIN_PASSWORD} - webin_cli_handler \\ -m ${prefix}_updated_manifest.manifest \\ --webin-cli-jar ${webin_cli_jar} \\ diff --git a/nextflow.config b/nextflow.config index b1e083f..56c5e39 100644 --- a/nextflow.config +++ b/nextflow.config @@ -219,8 +219,8 @@ env { R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" - ENA_WEBIN = secrets.WEBIN_ACCOUNT - ENA_WEBIN_PASSWORD = secrets.WEBIN_PASSWORD + ENA_WEBIN = secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = secrets.ENA_WEBIN_PASSWORD } // Set bash options diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index a1cdbfa..8776afb 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -4,20 +4,20 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // TODO rename when we will have register_study module separately -include { GENOME_UPLOAD as REGISTER_STUDY_AND_CREATE_MANIFESTS } from '../modules/local/genome_upload' -include { ENA_WEBIN_CLI_WRAPPER as SUBMIT } from '../modules/local/ena_webin_cli_wrapper' -include { ENA_WEBIN_CLI_DOWNLOAD } from '../modules/local/ena_webin_cli_download' +include { GENOME_UPLOAD as CREATE_MANIFESTS } from '../modules/local/genome_upload' +include { ENA_WEBIN_CLI_WRAPPER as SUBMIT } from '../modules/local/ena_webin_cli_wrapper' +include { ENA_WEBIN_CLI_DOWNLOAD } from '../modules/local/ena_webin_cli_download' -include { COVERM_GENOME } from '../modules/nf-core/coverm/genome' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' +include { COVERM_GENOME } from '../modules/nf-core/coverm/genome' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' -include { GENOME_EVALUATION } from '../subworkflows/local/genome_evaluation' -include { RNA_DETECTION } from '../subworkflows/local/rna_detection' +include { GENOME_EVALUATION } from '../subworkflows/local/genome_evaluation' +include { RNA_DETECTION } from '../subworkflows/local/rna_detection' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -189,17 +189,19 @@ workflow GENOMESUBMIT { ) // --------- Generate manifests - REGISTER_STUDY_AND_CREATE_MANIFESTS( + CREATE_MANIFESTS( fasta_updated_with_stats.map{meta, fasta -> fasta}.collect(), genome_metadata_csv, params.mode // mags or bins ) // All manifests were generated in one run - // Manifests should be saparated into differen channels using prefix as id - manifests_ch = REGISTER_STUDY_AND_CREATE_MANIFESTS.out.manifests.flatten() + // Manifests should be separated into different channels using prefix as id + manifests_ch = CREATE_MANIFESTS.out.manifests.flatten() .map { manifest -> - def prefix = manifest.name.replaceAll(/_\d+\.manifest$/, '') + def prefix = params.test_upload ? + manifest.name.replaceAll(/_\d+\.manifest$/, '') : + manifest.name.replaceAll(/\.manifest$/, '') def meta = [id: prefix] [ meta, manifest ] } From 0453f2e8d02ad12fc4af092aea65baeccabf7481 Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Thu, 12 Mar 2026 16:59:07 +0000 Subject: [PATCH 04/46] rename --- modules/local/registerstudy/nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/registerstudy/nextflow.config b/modules/local/registerstudy/nextflow.config index 3f71a8e..b58908d 100644 --- a/modules/local/registerstudy/nextflow.config +++ b/modules/local/registerstudy/nextflow.config @@ -4,6 +4,6 @@ process { } } env { - ENA_WEBIN = secrets.WEBIN_ACCOUNT - ENA_WEBIN_PASSWORD = secrets.WEBIN_PASSWORD + ENA_WEBIN = secrets.ENA_WEBIN + ENA_WEBIN_PASSWORD = secrets.ENA_WEBIN_PASSWORD } From 931fc86e1d14276521efc31052b38d6a5447fd82 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Tue, 17 Mar 2026 11:10:08 +0000 Subject: [PATCH 05/46] refactor and add check to only download CAT_db if local db is not provided --- workflows/genomesubmit.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index a532559..53a3a43 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -174,21 +174,21 @@ workflow GENOMESUBMIT { ) // build input structures for CAT_DB depending on what provided as input - def cat_db_input = (params.cat_db != null && params.cat_db != '') + def cat_db_input = params.cat_db ? channel.of( [['id': 'CAT_DB'], file(params.cat_db)] ) : channel.empty() - def cat_db_id_input = (params.cat_db_download_id != null && params.cat_db_download_id != '') + def cat_db_id_input = (!params.cat_db && params.cat_db_download_id) ? channel.of( [['id': 'CAT_DB_id'], params.cat_db_download_id] ) : channel.empty() FASTA_CLASSIFY_CATPACK ( - RENAME_FASTA_FOR_CATPACK.out.renamed_fasta, - channel.empty(), + RENAME_FASTA_FOR_CATPACK.out.renamed_fasta, // ch_bins + channel.empty(), // ch_contigs - empty because we classify bins, not contigs cat_db_input, cat_db_id_input, - false, // generate summaries - '.fasta' + false, // disable summary generation + '.fasta' // bin_suffix - the suffix of the renamed fasta files ) fasta_updated_with_taxonomy = FASTA_CLASSIFY_CATPACK.out.bat_classification From f72c32ea51f6ca254101b0240fe5c78e157c98c4 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Tue, 17 Mar 2026 11:11:31 +0000 Subject: [PATCH 06/46] add possible fna extension to schemas because we can handle it --- assets/schema_input_assembly.json | 4 ++-- assets/schema_input_genome.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/schema_input_assembly.json b/assets/schema_input_assembly.json index 3b55e28..d342b31 100644 --- a/assets/schema_input_assembly.json +++ b/assets/schema_input_assembly.json @@ -17,8 +17,8 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?a\\.gz$", - "errorMessage": "FASTA file must be provided and have extension '.fa', '.fasta', '.fas', '.fna' (optionally gzipped)", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.(fa|fasta|fna)\\.gz$", + "errorMessage": "FASTA file must be provided and have extension '.fa', '.fasta', '.fna' (optionally gzipped)", "description": "Metagenomic assembly FASTA file" }, "fastq_1": { diff --git a/assets/schema_input_genome.json b/assets/schema_input_genome.json index dfd01c3..622b9ab 100644 --- a/assets/schema_input_genome.json +++ b/assets/schema_input_genome.json @@ -17,8 +17,8 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?a\\.gz$", - "errorMessage": "FASTA file for sequences 1 must be provided, cannot contain spaces and must have extension '.fa.gz' or '.fasta.gz'", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.(fa|fasta|fna)\\.gz$", + "errorMessage": "FASTA file for sequences 1 must be provided, cannot contain spaces and must have extension '.fa.gz', '.fasta.gz', or '.fna.gz'", "description": "MAG/bin sequence file" }, "accession": { From c599e6573112e337ef63e6f99df102f2a582b363 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Tue, 17 Mar 2026 11:58:43 +0000 Subject: [PATCH 07/46] patch genome_evaluation to only download db if there are genomes to analyse --- subworkflows/local/genome_evaluation.nf | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/genome_evaluation.nf b/subworkflows/local/genome_evaluation.nf index b80b854..6eb4fd9 100644 --- a/subworkflows/local/genome_evaluation.nf +++ b/subworkflows/local/genome_evaluation.nf @@ -20,18 +20,27 @@ include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict workflow GENOME_EVALUATION { take: - ch_fasta // [meta, fasta_file] + ch_fasta // channel: [ val(meta), path(fasta) ] main: ch_versions = channel.empty() - // Run checkM2 database download if there is no db path provided + // + // Database preparation + // + if (!params.checkm2_db || !file(params.checkm2_db).exists()) { - CHECKM2_DATABASEDOWNLOAD(params.checkm2_db_zenodo_id) - ch_check2_db = CHECKM2_DATABASEDOWNLOAD.out.database + // Conditional download: only trigger if ch_fasta has items + ch_download_trigger = ch_fasta + .map { _meta, _fasta -> params.checkm2_db_zenodo_id } + .first() // Only need one trigger regardless of how many fasta files + + CHECKM2_DATABASEDOWNLOAD(ch_download_trigger) + ch_checkm2_db = CHECKM2_DATABASEDOWNLOAD.out.database } else { - ch_check2_db = channel.of( + // Use existing database + ch_checkm2_db = channel.of( [ [id: "checkm2_db"], file(params.checkm2_db), @@ -39,13 +48,17 @@ workflow GENOME_EVALUATION { ) } + // + // Genome evaluation + // + CHECKM2_PREDICT( ch_fasta, - ch_check2_db.first(), + ch_checkm2_db, ) emit: - genome_evaluation = CHECKM2_PREDICT.out.checkm2_tsv // [meta, stats.tsv] - stats_versions = CHECKM2_PREDICT.out.versions_checkm2_predict + genome_evaluation = CHECKM2_PREDICT.out.checkm2_tsv // channel: [ val(meta), path(tsv) ] + stats_versions = CHECKM2_PREDICT.out.versions_checkm2_predict } From 8f52a6c6df57c984c0c87c384d5ea81c78ea1aec Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Tue, 17 Mar 2026 12:20:40 +0000 Subject: [PATCH 08/46] patch fasta_classify_catpack to only download db if there are genomes to analyse --- .../fasta_classify_catpack.diff | 18 ++++++++++++++++++ .../nf-core/fasta_classify_catpack/main.nf | 11 +++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/fasta_classify_catpack/fasta_classify_catpack.diff b/subworkflows/nf-core/fasta_classify_catpack/fasta_classify_catpack.diff index 6a541ba..e704c20 100644 --- a/subworkflows/nf-core/fasta_classify_catpack/fasta_classify_catpack.diff +++ b/subworkflows/nf-core/fasta_classify_catpack/fasta_classify_catpack.diff @@ -21,6 +21,24 @@ Changes in 'fasta_classify_catpack/main.nf': ch_cat_db_input.other.subscribe { _meta, _db -> error("Error: A DB was provided to FASTA_CLASSIFY_CATPACK that is not a `.tar.gz` or a directory.") +@@ -51,8 +55,15 @@ + taxonomy: [meta, dir / 'tax'] + } + +- // Download and prepare db from scratch if no pre-built db provided +- CATPACK_DOWNLOAD(ch_cat_db_download_id) ++ // Download and prepare db from scratch if no pre-built db provided - only trigger if ch_bins OR ch_contigs has items ++ // Mix both channels and use first item to trigger download once ++ ch_download_trigger = ch_bins ++ .mix(ch_contigs) ++ .first() ++ .combine(ch_cat_db_download_id) ++ .map { _meta, _fasta, db_meta, db_id -> [db_meta, db_id] } ++ ++ CATPACK_DOWNLOAD(ch_download_trigger) + + CATPACK_PREPARE( + CATPACK_DOWNLOAD.out.fasta, 'subworkflows/nf-core/fasta_classify_catpack/tests/main.nf.test.snap' is unchanged 'subworkflows/nf-core/fasta_classify_catpack/tests/nextflow.config' is unchanged diff --git a/subworkflows/nf-core/fasta_classify_catpack/main.nf b/subworkflows/nf-core/fasta_classify_catpack/main.nf index e362a5c..1d024ea 100644 --- a/subworkflows/nf-core/fasta_classify_catpack/main.nf +++ b/subworkflows/nf-core/fasta_classify_catpack/main.nf @@ -55,8 +55,15 @@ workflow FASTA_CLASSIFY_CATPACK { taxonomy: [meta, dir / 'tax'] } - // Download and prepare db from scratch if no pre-built db provided - CATPACK_DOWNLOAD(ch_cat_db_download_id) + // Download and prepare db from scratch if no pre-built db provided - only trigger if ch_bins OR ch_contigs has items + // Mix both channels and use first item to trigger download once + ch_download_trigger = ch_bins + .mix(ch_contigs) + .first() + .combine(ch_cat_db_download_id) + .map { _meta, _fasta, db_meta, db_id -> [db_meta, db_id] } + + CATPACK_DOWNLOAD(ch_download_trigger) CATPACK_PREPARE( CATPACK_DOWNLOAD.out.fasta, From 81dbeb2a580189c65385ba5ed64206a5ce033a2e Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Tue, 17 Mar 2026 17:17:15 +0000 Subject: [PATCH 09/46] add FASTAVALIDATOR to check fasta file formatting in GENOMESUBMIT workflow --- workflows/genomesubmit.nf | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 53a3a43..6a5484b 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -9,6 +9,7 @@ include { ENA_WEBIN_CLI_WRAPPER as SUBMIT } from '../modules/local/ena_webin_c include { ENA_WEBIN_CLI_DOWNLOAD } from '../modules/local/ena_webin_cli_download' include { RENAME_FASTA_FOR_CATPACK } from '../modules/local/rename_fasta_for_catpack' +include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' include { COVERM_GENOME } from '../modules/nf-core/coverm/genome' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-schema' @@ -75,8 +76,19 @@ workflow GENOMESUBMIT { genome_fasta = genome_fasta_and_reads.map{meta, fasta, _fq1 -> [meta, fasta]} genome_reads = genome_fasta_and_reads.map{meta, _fasta, reads -> [meta, reads]} + // --------- Check fasta files are properly formatted + FASTAVALIDATOR ( + genome_fasta, + "true" // is_metagenome flag + ) + // TODO add some logging here to track discarded assemblies + validated_fastas = genome_fasta.join(FASTAVALIDATOR.out.success_log) + .map { meta, fasta, _log -> + [meta, fasta] + } + // --------- Genome coverage calculation - genome_fasta + validated_fastas .branch { meta, fasta -> genome_coverage_ref_input: meta.genome_coverage == null genome_coverage_present: true // Everything else goes here From 76498c281f85c0e4422438fe12ac1dfed28ca0f5 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 10:51:46 +0000 Subject: [PATCH 10/46] include FASTAVALIDATOR step to genomesubmit --- .../nf-core/fastavalidator/fastavalidator.diff | 8 ++++---- modules/nf-core/fastavalidator/main.nf | 4 ++-- modules/nf-core/fastavalidator/meta.yml | 4 ++-- workflows/assemblysubmit.nf | 2 +- workflows/genomesubmit.nf | 17 ++++++++++------- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/modules/nf-core/fastavalidator/fastavalidator.diff b/modules/nf-core/fastavalidator/fastavalidator.diff index 4937db2..8575327 100644 --- a/modules/nf-core/fastavalidator/fastavalidator.diff +++ b/modules/nf-core/fastavalidator/fastavalidator.diff @@ -7,9 +7,9 @@ Changes in 'fastavalidator/meta.yml': description: Input fasta file pattern: "*.fasta" ontologies: [] -+ - - is_metagenome: ++ - - count_contigs: + type: boolean -+ description: True if the fasta file is from a metagenome. Enables number of contigs check. ++ description: Enables number of contigs check (ENA requires more than 1 contig for a contig-level assembly submission) output: success_log: - - meta: @@ -21,7 +21,7 @@ Changes in 'fastavalidator/main.nf': input: tuple val(meta), path(fasta) -+ val(is_metagenome) ++ val(count_contigs) output: - tuple val(meta), path('*.success.log') , emit: success_log , optional: true @@ -49,7 +49,7 @@ Changes in 'fastavalidator/main.nf': + # One more check: count contigs. More than 1 contig required. + echo "[INFO] Checking contig count..." + -+ if [ "${is_metagenome}" = true ]; then ++ if [ "${count_contigs}" = true ]; then + if [[ "${fasta}" == *.gz ]]; then + CONTIGS=\$(zcat "${fasta}" | grep -c '^>') + else diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf index 87db6ca..c662fc9 100644 --- a/modules/nf-core/fastavalidator/main.nf +++ b/modules/nf-core/fastavalidator/main.nf @@ -9,7 +9,7 @@ process FASTAVALIDATOR { input: tuple val(meta), path(fasta) - val(is_metagenome) + val(count_contigs) output: tuple val(meta), path('*.success.log'), emit: success_log , optional: true @@ -33,7 +33,7 @@ process FASTAVALIDATOR { # One more check: count contigs. More than 1 contig required. echo "[INFO] Checking contig count..." - if [ "${is_metagenome}" = true ]; then + if [ "${count_contigs}" = true ]; then if [[ "${fasta}" == *.gz ]]; then CONTIGS=\$(zcat "${fasta}" | grep -c '^>') else diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml index 35083d2..6d38dde 100644 --- a/modules/nf-core/fastavalidator/meta.yml +++ b/modules/nf-core/fastavalidator/meta.yml @@ -30,9 +30,9 @@ input: description: Input fasta file pattern: "*.fasta" ontologies: [] - - - is_metagenome: + - - count_contigs: type: boolean - description: True if the fasta file is from a metagenome. Enables number of contigs check. + description: Enables number of contigs check (ENA requires more than 1 contig for a contig-level assembly submission) output: success_log: - - meta: diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 918e1d7..3e0c351 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -69,7 +69,7 @@ workflow ASSEMBLYSUBMIT { // Check fasta files are properly formatted FASTAVALIDATOR ( assembly_fasta, - "true" // is_metagenome flag + "true" // enables number of contigs check - ENA requires more than 1 contig for an assembly submission ) // TODO add some logging here to track discarded assemblies validated_fastas = assembly_fasta.join(FASTAVALIDATOR.out.success_log) diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 6a5484b..d84468a 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -79,7 +79,7 @@ workflow GENOMESUBMIT { // --------- Check fasta files are properly formatted FASTAVALIDATOR ( genome_fasta, - "true" // is_metagenome flag + "true" // enables number of contigs check - ENA requires more than 1 contig for a bin/MAG submission ) // TODO add some logging here to track discarded assemblies validated_fastas = genome_fasta.join(FASTAVALIDATOR.out.success_log) @@ -89,19 +89,22 @@ workflow GENOMESUBMIT { // --------- Genome coverage calculation validated_fastas - .branch { meta, fasta -> + .branch { meta, _fasta -> genome_coverage_ref_input: meta.genome_coverage == null genome_coverage_present: true // Everything else goes here } .set { branched_coverage_results } - genome_reads.filter { meta, reads -> meta.genome_coverage == null } - .map { meta, reads -> [meta, reads] } - .set { genome_coverage_fq_input } + branched_coverage_results.genome_coverage_ref_input.join(genome_reads) + .multiMap { meta, fasta, fastq -> + genome: [ meta, fasta ] + raw_reads: [ meta, fastq ] + } + .set { coverm_input } COVERM_GENOME ( - genome_coverage_fq_input, - branched_coverage_results.genome_coverage_ref_input, + coverm_input.raw_reads, + coverm_input.genome, false, false, 'file' From 6264ee56403ddee95c567ce37e83a053641475b1 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 16:17:51 +0000 Subject: [PATCH 11/46] add more published results to modules.conf --- conf/modules.config | 77 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 998ee51..f4b0324 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,40 +12,84 @@ process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + // + // TAXOMIC CLASSIFICATION SUBWORKFLOW + // withName: 'CATPACK_ADDNAMES_BINS' { ext.args = '--only_official' publishDir = [ - path: { "${params.outdir}/${params.mode}/taxonomy" }, + path: { "${params.outdir}/${params.mode}/${meta.id}/taxonomy" }, mode: params.publish_dir_mode, pattern: "*.txt", + ] + } + + withName: 'CATPACK_BINS' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/${meta.id}/taxonomy" }, + mode: params.publish_dir_mode, + pattern: "*.bin2classification.txt" + ] + } + + // + // RNA DETECTION SUBWORKFLOW + // + + withName: 'BARRNAP' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/${meta.id}/rna/barrnap" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'TRNASCANSE' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/${meta.id}/rna/trnascanse" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // + // GENOME QUALITY ASSESSMENT SUBWORKFLOW + // + withName: 'CHECKM2_PREDICT' { publishDir = [ - path: { "${params.outdir}/${params.mode}/checkm2" }, + path: { "${params.outdir}/${params.mode}/${meta.id}/checkm2" }, mode: params.publish_dir_mode, pattern: "*_checkm2_report.tsv", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + // + // COVERAGE CALCULATION + // + withName: 'COVERM_GENOME' { ext.args = '--min-covered-fraction 0 --methods mean' publishDir = [ - path: { "${params.outdir}/${params.mode}/coverage" }, + path: { "${params.outdir}/${params.mode}/${meta.id}/coverage" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'COVERM_CONTIG' { + publishDir = [ + path: { "${params.outdir}/${params.mode}/${meta.id}/coverage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // + // SUBMISSION AND MANIFEST GENERATION + // + withName: 'GENOME_UPLOAD' { publishDir = [ path: { "${params.outdir}/${params.mode}/upload/manifests" }, @@ -56,7 +100,7 @@ process { withName: 'ENA_WEBIN_CLI' { publishDir = [ - path: { "${params.outdir}/${params.mode}/upload/webin_cli" }, + path: { "${params.outdir}/${params.mode}/${meta.id}/upload/webin_cli" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -65,12 +109,20 @@ process { withName: 'ENA_WEBIN_CLI_WRAPPER' { ext.args = { params.mode == 'mags' || params.mode == 'bins' || params.mode == 'metagenomic_assemblies' ? "--context genome": "--context reads"} publishDir = [ - path: { "${params.outdir}/${params.mode}/upload/webin_cli" }, + path: { "${params.outdir}/${params.mode}/${meta.id}/upload/webin_cli" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'GENERATE_ASSEMBLY_MANIFEST|ENA_WEBIN_CLI|REGISTERSTUDY' { + ext.args = { params.test_upload ? "--test" : "" } + } + + // + // MULTIQC REPORT + // + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ @@ -80,7 +132,4 @@ process { ] } - withName: 'GENERATE_ASSEMBLY_MANIFEST|ENA_WEBIN_CLI|REGISTERSTUDY' { - ext.args = { params.test_upload ? "--test" : "" } - } } From f6eb67777cd01d4c77602598545c57678532ad40 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:10:08 +0000 Subject: [PATCH 12/46] add tests for genomesubmit in --mode mag --- .../test_samplesheets/complete_metadata.csv | 2 + .../no_coverage_paired_reads.csv | 2 + .../no_coverage_single_reads.csv | 2 + assets/test_samplesheets/no_quality.csv | 2 + assets/test_samplesheets/no_rna_presence.csv | 2 + assets/test_samplesheets/no_taxonomy.csv | 2 + conf/test_mag_complete_metadata.conf | 39 ++++++++++++++++++ conf/test_mag_no_coverage_paired_reads.conf | 39 ++++++++++++++++++ conf/test_mag_no_coverage_single_reads.conf | 39 ++++++++++++++++++ conf/test_mag_no_quality.conf | 41 +++++++++++++++++++ conf/test_mag_no_rna_presence.conf | 39 ++++++++++++++++++ conf/test_mag_no_taxonomy.conf | 39 ++++++++++++++++++ nextflow.config | 14 +++++-- tests/mag_complete_metadata.nf.test | 38 +++++++++++++++++ tests/mag_no_coverage_paired_reads.nf.test | 38 +++++++++++++++++ tests/mag_no_coverage_single_reads.nf.test | 38 +++++++++++++++++ tests/mag_no_quality.nf.test | 38 +++++++++++++++++ tests/mag_no_rna_presence.nf.test | 38 +++++++++++++++++ tests/mag_no_taxonomy.nf.test | 38 +++++++++++++++++ 19 files changed, 486 insertions(+), 4 deletions(-) create mode 100644 assets/test_samplesheets/complete_metadata.csv create mode 100644 assets/test_samplesheets/no_coverage_paired_reads.csv create mode 100644 assets/test_samplesheets/no_coverage_single_reads.csv create mode 100644 assets/test_samplesheets/no_quality.csv create mode 100644 assets/test_samplesheets/no_rna_presence.csv create mode 100644 assets/test_samplesheets/no_taxonomy.csv create mode 100644 conf/test_mag_complete_metadata.conf create mode 100644 conf/test_mag_no_coverage_paired_reads.conf create mode 100644 conf/test_mag_no_coverage_single_reads.conf create mode 100644 conf/test_mag_no_quality.conf create mode 100644 conf/test_mag_no_rna_presence.conf create mode 100644 conf/test_mag_no_taxonomy.conf create mode 100644 tests/mag_complete_metadata.nf.test create mode 100644 tests/mag_no_coverage_paired_reads.nf.test create mode 100644 tests/mag_no_coverage_single_reads.nf.test create mode 100644 tests/mag_no_quality.nf.test create mode 100644 tests/mag_no_rna_presence.nf.test create mode 100644 tests/mag_no_taxonomy.nf.test diff --git a/assets/test_samplesheets/complete_metadata.csv b/assets/test_samplesheets/complete_metadata.csv new file mode 100644 index 0000000..62a1e40 --- /dev/null +++ b/assets/test_samplesheets/complete_metadata.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +complete_metadata,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/no_coverage_paired_reads.csv b/assets/test_samplesheets/no_coverage_paired_reads.csv new file mode 100644 index 0000000..77eeb09 --- /dev/null +++ b/assets/test_samplesheets/no_coverage_paired_reads.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +no_coverage_paired_reads,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_2.fastq,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/no_coverage_single_reads.csv b/assets/test_samplesheets/no_coverage_single_reads.csv new file mode 100644 index 0000000..26ac547 --- /dev/null +++ b/assets/test_samplesheets/no_coverage_single_reads.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +no_coverage_single_reads,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/no_quality.csv b/assets/test_samplesheets/no_quality.csv new file mode 100644 index 0000000..7da85be --- /dev/null +++ b/assets/test_samplesheets/no_quality.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +no_quality,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,,,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/no_rna_presence.csv b/assets/test_samplesheets/no_rna_presence.csv new file mode 100644 index 0000000..ca3695b --- /dev/null +++ b/assets/test_samplesheets/no_rna_presence.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +no_rna_presence,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospiraceae.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,92.81,1.09,66.04,sediment metagenome,No,marine,cable bacteria,marine sediment,,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__Candidatus Electrothrix marina diff --git a/assets/test_samplesheets/no_taxonomy.csv b/assets/test_samplesheets/no_taxonomy.csv new file mode 100644 index 0000000..bdb54ff --- /dev/null +++ b/assets/test_samplesheets/no_taxonomy.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +no_taxonomy,/Users/sofia/mgnify/seqsubmit/tests/data/GCF_000005845.2_ASM584v2_genomic.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,88.71,5.04,10.04,sediment metagenome,No,marine,cable bacteria,marine sediment,No, diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf new file mode 100644 index 0000000..0a61d3f --- /dev/null +++ b/conf/test_mag_complete_metadata.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags complete_metadata profile' + config_profile_description = 'Single-case MAG test with complete metadata values provided' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/complete_metadata.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + checkm2_db = null + +} diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf new file mode 100644 index 0000000..3531dd6 --- /dev/null +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags no_coverage_paired_reads profile' + config_profile_description = 'Single-case MAG test with missing genome_coverage and paired-end reads' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/no_coverage_paired_reads.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + checkm2_db = null + +} diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf new file mode 100644 index 0000000..fb86a95 --- /dev/null +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags no_coverage_single_reads profile' + config_profile_description = 'Single-case MAG test with missing genome_coverage and single-end reads' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/no_coverage_single_reads.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + checkm2_db = null + +} diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf new file mode 100644 index 0000000..874f2f9 --- /dev/null +++ b/conf/test_mag_no_quality.conf @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags no_quality profile' + config_profile_description = 'Single-case MAG test with missing completeness and contamination values' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/no_quality.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + # CheckM2 doesn't allow usage of small test database, so real database will be downloaded during the test run, + # which is not ideal but necessary to test the pipeline's behaviour with missing quality values + checkm2_db = null + +} diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf new file mode 100644 index 0000000..c31650d --- /dev/null +++ b/conf/test_mag_no_rna_presence.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags no_rna_presence profile' + config_profile_description = 'Single-case MAG test with missing RNA_presence value' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/no_rna_presence.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + checkm2_db = null + +} diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf new file mode 100644 index 0000000..d65964f --- /dev/null +++ b/conf/test_mag_no_taxonomy.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags no_taxonomy profile' + config_profile_description = 'Single-case MAG test with missing NCBI_lineage value' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/no_taxonomy.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = "${projectDir}/tests/data/cat_db/" + checkm2_db = null + +} diff --git a/nextflow.config b/nextflow.config index a4199de..d484a1b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -186,10 +186,16 @@ profiles { singularity.runOptions = '--nv' } // TODO: figure out how to better orginise tests for different workflow types (bins, mags, metagenomic_assemblies) - test { includeConfig 'conf/test.config' } - test_genome { includeConfig 'conf/test_genome.config' } - test_assembly { includeConfig 'conf/test_assembly.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_genome { includeConfig 'conf/test_genome.config' } + test_mag_complete_metadata { includeConfig 'conf/test_mag_complete_metadata.conf' } + test_mag_no_coverage_single_reads { includeConfig 'conf/test_mag_no_coverage_single_reads.conf' } + test_mag_no_coverage_paired_reads { includeConfig 'conf/test_mag_no_coverage_paired_reads.conf' } + test_mag_no_quality { includeConfig 'conf/test_mag_no_quality.conf' } + test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } + test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } + test_assembly { includeConfig 'conf/test_assembly.config' } + test_full { includeConfig 'conf/test_full.config' } } // Load nf-core custom profiles from different institutions diff --git a/tests/mag_complete_metadata.nf.test b/tests/mag_complete_metadata.nf.test new file mode 100644 index 0000000..e3c3091 --- /dev/null +++ b/tests/mag_complete_metadata.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow - complete_metadata" + script "../main.nf" + tag "pipeline" + tag "test_mag_complete_metadata" + profile "test_mag_complete_metadata" + + test("-profile test_mag_complete_metadata") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_no_coverage_paired_reads.nf.test b/tests/mag_no_coverage_paired_reads.nf.test new file mode 100644 index 0000000..3b71f06 --- /dev/null +++ b/tests/mag_no_coverage_paired_reads.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - no_coverage_paired_reads" + script "../main.nf" + tag "pipeline" + tag "test_mag_no_coverage_paired_reads" + profile "test_mag_no_coverage_paired_reads" + + test("-profile test_mag_no_coverage_paired_reads") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_no_coverage_single_reads.nf.test b/tests/mag_no_coverage_single_reads.nf.test new file mode 100644 index 0000000..0f5b3e8 --- /dev/null +++ b/tests/mag_no_coverage_single_reads.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - no_coverage_single_reads" + script "../main.nf" + tag "pipeline" + tag "test_mag_no_coverage_single_reads" + profile "test_mag_no_coverage_single_reads" + + test("-profile test_mag_no_coverage_single_reads") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_no_quality.nf.test b/tests/mag_no_quality.nf.test new file mode 100644 index 0000000..42ad265 --- /dev/null +++ b/tests/mag_no_quality.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - no_quality" + script "../main.nf" + tag "pipeline" + tag "test_mag_no_quality" + profile "test_mag_no_quality" + + test("-profile test_mag_no_quality") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_no_rna_presence.nf.test b/tests/mag_no_rna_presence.nf.test new file mode 100644 index 0000000..68b597d --- /dev/null +++ b/tests/mag_no_rna_presence.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - no_rna_presence" + script "../main.nf" + tag "pipeline" + tag "test_mag_no_rna_presence" + profile "test_mag_no_rna_presence" + + test("-profile test_mag_no_rna_presence") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_no_taxonomy.nf.test b/tests/mag_no_taxonomy.nf.test new file mode 100644 index 0000000..be79c96 --- /dev/null +++ b/tests/mag_no_taxonomy.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - no_taxonomy" + script "../main.nf" + tag "pipeline" + tag "test_mag_no_taxonomy" + profile "test_mag_no_taxonomy" + + test("-profile test_mag_no_taxonomy") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} From 2ae8f952d3a8c699413cf193e3576314722f0639 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:10:25 +0000 Subject: [PATCH 13/46] update methods.md --- docs/methods.md | 66 +++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/docs/methods.md b/docs/methods.md index 522ad49..0f15a33 100644 --- a/docs/methods.md +++ b/docs/methods.md @@ -7,7 +7,7 @@ - `GENOMESUBMIT` for `--mode mags` and `--mode bins` - `ASSEMBLYSUBMIT` for `--mode metagenomic_assemblies` -This page documents the methods that are currently implemented in the pipeline and includes placeholders for methods that will be documented once they are implemented. +This page documents the methods that are currently implemented in the pipeline. ## `GENOMESUBMIT` methods @@ -16,19 +16,21 @@ This page documents the methods that are currently implemented in the pipeline a The `GENOMESUBMIT` workflow: 1. Reads the samplesheet and associated genome FASTA files. -2. Reuses `RNA_presence` values supplied in the samplesheet when they are already present. -3. Calculates `RNA_presence` internally for entries where this field is missing. -4. Collects genome metadata into the tabular format required by `genome_upload`. +2. Validates genome FASTA files. +3. Reuses provided or calculates missing values for RNA genes presence, coverage, taxonomy, and genome quality metrics. +4. Collects genome metadata into the tabular format required by `genome_uploader`. 5. Generates submission manifests for ENA. 6. Performs submission to ENA. -### RNA presence detection +### Genome FASTA validation -#### When RNA detection runs +Genome FASTA files are validated with the `FASTAVALIDATOR` module before downstream processing. Each file is checked for FASTA format validity and contig count. A genome must contain at least two contigs to pass validation, which is an ENA requirement for contig-level submissions. -The workflow only runs internal RNA detection for entries where the `RNA_presence` column is empty. If a value is already supplied in the samplesheet, that value is passed through unchanged. +Only FASTA files that pass validation are retained for downstream processing and submission. -#### Tools used +### RNA presence detection + +The workflow only runs internal RNA detection for entries where the `RNA_presence` column is empty. If a value is already supplied in the samplesheet, that value is passed through unchanged. RNA detection is implemented through the `RNA_DETECTION` subworkflow and combines: @@ -64,7 +66,7 @@ $$ If multiple hits are found for the same subunit, the workflow keeps the best recovered percentage for that subunit. -A subunit is considered present when its best recovered percentage is greater than or equal to `params.rrna_limit`. The current default is `80`. +A subunit is considered present when its best recovered percentage is greater than or equal to `--rrna_limit`. The current default is `80`. #### tRNA detection @@ -90,39 +92,25 @@ The result is written as a two-column TSV file containing the genome identifier ### Genome coverage calculation -> [!NOTE] -> Placeholder section. -> -> This section will describe the internal genome coverage calculation once it is implemented in the workflow. -> -> For now, `genome_coverage` is treated as submission metadata. +Entries that already contain `genome_coverage` are passed through unchanged. For entries where coverage is missing, the workflow joins validated FASTA files with their associated read files and runs `coverm genome` through the `COVERM_GENOME` module (single-end or paired-end mode is selected from sample metadata). + +Genome coverage values from `coverm genome` output TSV are parsed and merged into submission metadata. ### Taxonomy assignment -> [!NOTE] -> Placeholder section. -> -> This section will describe the taxonomy assignment method once it is implemented in the workflow. -> -> For now, taxonomy is expected to be provided by the user in the `NCBI_lineage` column. +If `NCBI_lineage` is already present in the input samlesheet, the value is retained. If it is missing, the workflow runs taxonomy classification using the `CAT_pack` tool. + +Before classification, input FASTA files are normalized to a `.fasta` suffix by `RENAME_FASTA_FOR_CATPACK`. Classification is then run in bin mode (`CAT_pack bins`, followed by `CAT_pack add_names`). -### Completeness assessment +Database input is taken from `--cat_db` when provided; otherwise the workflow uses `--cat_db_download_id` to download and prepare a CATPACK database. The resulting classification table is parsed, and the lineage field is written to `NCBI_lineage`. -> [!NOTE] -> Placeholder section. -> -> This section will describe the completeness estimation method once it is implemented in the workflow. -> -> For now, completeness is expected to be provided by the user in the `completeness` column. +### Completeness and contamination assessment -### Contamination assessment +Completeness and contamination are evaluated together in a shared genome quality step. -> [!NOTE] -> Placeholder section. -> -> This section will describe the contamination estimation method once it is implemented in the workflow. -> -> For now, contamination is expected to be provided by the user in the `contamination` column. +The workflow checks three samplesheet fields: `completeness`, `contamination`, and `stats_generation_software`. If all three are already present, those values are reused. If any of them is missing, the genome is analysed with `CheckM2 predict`. If `--checkm2_db` is supplied and exists, that database is used directly. Otherwise, the workflow downloads a CheckM2 database from Zenodo (using the configured database ID) and then runs prediction. + +For records that run `CheckM2`, completeness and contamination are extracted from the generated quality report (`quality_report.tsv`) and used `CheckM2` version is recorded as `stats_generation_software`. ## `ASSEMBLYSUBMIT` methods @@ -139,17 +127,19 @@ The `ASSEMBLYSUBMIT` workflow: ### Assembly FASTA validation -Assembly FASTA files are validated with `FASTAVALIDATOR` before downstream processing. Only assemblies that pass validation continue to the coverage and submission steps. +Assembly FASTA files are validated with `FASTAVALIDATOR` before downstream processing. Each file is checked for FASTA format validity and contig count. A genome must contain at least two contigs to pass validation, which is an ENA requirement for contig-level submissions. + +Only assemblies with successful validation are forwarded to coverage estimation, metadata/manifest generation and submission. ### Coverage calculation If the `coverage` column is already populated in the samplesheet, that value is used directly. -If `coverage` is missing, the workflow joins each validated assembly with its associated read files and calculates coverage with `coverm contig`. +If `coverage` is missing, the workflow calculates coverage with `coverm contig`. `coverm contig` outputs per-contig depth. The workflow then reads this file and calculates the arithmetic mean across all contigs. -If the per-contig coverage values are $c_1, c_2, \ldots, c_n$, the workflow currently computes assembly coverage as nweighted mean across contigs: +If the per-contig coverage values are $c_1, c_2, \ldots, c_n$, the workflow currently computes assembly coverage as an unweighted mean across contigs: $$ \bar{c} = \frac{1}{n} \sum_{i=1}^{n} c_i From d422d96db40bed1aaa48d520ad06e230978ee0f1 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:19:21 +0000 Subject: [PATCH 14/46] update container for webin-cli-wrapper --- modules/local/ena_webin_cli_wrapper/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index e675cc8..c610fe3 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -5,7 +5,7 @@ process ENA_WEBIN_CLI_WRAPPER { label 'process_low' tag "${meta.id}" - container "community.wave.seqera.io/library/ena-webin-cli_mgnify-pipelines-toolkit:49478611e9515066" + container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.20" input: tuple val(meta), path(submission_item), path(manifest) @@ -35,7 +35,6 @@ process ENA_WEBIN_CLI_WRAPPER { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version 2>&1 | sed 's/Python //g') - biopython: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('biopython').version)") END_VERSIONS """ } From 201d0ffaa20ea7240f6053695ef00d1347ade7f9 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:20:37 +0000 Subject: [PATCH 15/46] fix typo --- conf/test_mag_no_quality.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index 874f2f9..90d682d 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -34,8 +34,8 @@ params { test_upload = true cat_db = null - # CheckM2 doesn't allow usage of small test database, so real database will be downloaded during the test run, - # which is not ideal but necessary to test the pipeline's behaviour with missing quality values + // CheckM2 doesn't allow usage of small test database, so real database will be downloaded during the test run, + // which is not ideal but necessary to test the pipeline's behaviour with missing quality values checkm2_db = null } From 1ca99b33d152ebd875f9b7844440fb1b303c660a Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:26:30 +0000 Subject: [PATCH 16/46] add output for accession TSV and update meta description to webin-cli-wrapper --- modules/local/ena_webin_cli_wrapper/main.nf | 2 ++ modules/local/ena_webin_cli_wrapper/meta.yml | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index c610fe3..3964efc 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -12,6 +12,7 @@ process ENA_WEBIN_CLI_WRAPPER { path(webin_cli_jar) output: + tuple val(meta), path("*_accessions.tsv"), emit: accessions path "versions.yml", emit: versions script: @@ -27,6 +28,7 @@ process ENA_WEBIN_CLI_WRAPPER { webin_cli_handler \\ -m ${prefix}_updated_manifest.manifest \\ + -o ${prefix}_accessions.tsv \\ --webin-cli-jar ${webin_cli_jar} \\ ${submit_or_validate} \\ ${mode} \\ diff --git a/modules/local/ena_webin_cli_wrapper/meta.yml b/modules/local/ena_webin_cli_wrapper/meta.yml index 0469709..a0abd93 100644 --- a/modules/local/ena_webin_cli_wrapper/meta.yml +++ b/modules/local/ena_webin_cli_wrapper/meta.yml @@ -34,7 +34,12 @@ input: pattern: "webin-cli-*.jar" output: - versions: + - - accessions: + type: file + description: | + TSV file containing the accession assigned by ENA for the submitted item. + File has two columns: "alias" and "accession". + - - versions: - "versions.yml": type: file description: File containing software versions. From b6121484e873bb1d3ebf9b1d6356a85b52102bcb Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:33:11 +0000 Subject: [PATCH 17/46] add test with invalid assembly --- assets/test_samplesheets/one_contig.csv | 2 ++ conf/test_mag_one_contig.conf | 39 +++++++++++++++++++++++++ nextflow.config | 1 + tests/mag_one_contig.nf.test | 38 ++++++++++++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 assets/test_samplesheets/one_contig.csv create mode 100644 conf/test_mag_one_contig.conf create mode 100644 tests/mag_one_contig.nf.test diff --git a/assets/test_samplesheets/one_contig.csv b/assets/test_samplesheets/one_contig.csv new file mode 100644 index 0000000..32d50b8 --- /dev/null +++ b/assets/test_samplesheets/one_contig.csv @@ -0,0 +1,2 @@ +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +one_contig,/Users/sofia/mgnify/seqsubmit/tests/data/one_contig_assembly.fasta.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf new file mode 100644 index 0000000..0ea695f --- /dev/null +++ b/conf/test_mag_one_contig.conf @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags one_contig profile' + config_profile_description = 'Single-case MAG test with one contig assembly' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/one_contig.csv" + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = null + checkm2_db = null + +} diff --git a/nextflow.config b/nextflow.config index d484a1b..787f6e1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -192,6 +192,7 @@ profiles { test_mag_no_coverage_single_reads { includeConfig 'conf/test_mag_no_coverage_single_reads.conf' } test_mag_no_coverage_paired_reads { includeConfig 'conf/test_mag_no_coverage_paired_reads.conf' } test_mag_no_quality { includeConfig 'conf/test_mag_no_quality.conf' } + test_mag_one_contig { includeConfig 'conf/test_mag_one_contig.conf' } test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } test_assembly { includeConfig 'conf/test_assembly.config' } diff --git a/tests/mag_one_contig.nf.test b/tests/mag_one_contig.nf.test new file mode 100644 index 0000000..6bdfb03 --- /dev/null +++ b/tests/mag_one_contig.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test genome submission workflow stub - one_contig" + script "../main.nf" + tag "pipeline" + tag "test_mag_one_contig" + profile "test_mag_one_contig" + + test("-profile test_mag_one_contig") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} From 6a38731bf616bdb6bb0ccde71b0eed8ebac6b696 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:36:47 +0000 Subject: [PATCH 18/46] append mag_ to all samplesheets for genomesubmit testing for convenience --- .../{complete_metadata.csv => mag_complete_metadata.csv} | 0 ...verage_paired_reads.csv => mag_no_coverage_paired_reads.csv} | 0 ...verage_single_reads.csv => mag_no_coverage_single_reads.csv} | 0 assets/test_samplesheets/{no_quality.csv => mag_no_quality.csv} | 0 .../{no_rna_presence.csv => mag_no_rna_presence.csv} | 0 .../test_samplesheets/{no_taxonomy.csv => mag_no_taxonomy.csv} | 0 assets/test_samplesheets/{one_contig.csv => mag_one_contig.csv} | 0 conf/test_mag_complete_metadata.conf | 2 +- conf/test_mag_no_coverage_paired_reads.conf | 2 +- conf/test_mag_no_coverage_single_reads.conf | 2 +- conf/test_mag_no_quality.conf | 2 +- conf/test_mag_no_rna_presence.conf | 2 +- conf/test_mag_no_taxonomy.conf | 2 +- conf/test_mag_one_contig.conf | 2 +- 14 files changed, 7 insertions(+), 7 deletions(-) rename assets/test_samplesheets/{complete_metadata.csv => mag_complete_metadata.csv} (100%) rename assets/test_samplesheets/{no_coverage_paired_reads.csv => mag_no_coverage_paired_reads.csv} (100%) rename assets/test_samplesheets/{no_coverage_single_reads.csv => mag_no_coverage_single_reads.csv} (100%) rename assets/test_samplesheets/{no_quality.csv => mag_no_quality.csv} (100%) rename assets/test_samplesheets/{no_rna_presence.csv => mag_no_rna_presence.csv} (100%) rename assets/test_samplesheets/{no_taxonomy.csv => mag_no_taxonomy.csv} (100%) rename assets/test_samplesheets/{one_contig.csv => mag_one_contig.csv} (100%) diff --git a/assets/test_samplesheets/complete_metadata.csv b/assets/test_samplesheets/mag_complete_metadata.csv similarity index 100% rename from assets/test_samplesheets/complete_metadata.csv rename to assets/test_samplesheets/mag_complete_metadata.csv diff --git a/assets/test_samplesheets/no_coverage_paired_reads.csv b/assets/test_samplesheets/mag_no_coverage_paired_reads.csv similarity index 100% rename from assets/test_samplesheets/no_coverage_paired_reads.csv rename to assets/test_samplesheets/mag_no_coverage_paired_reads.csv diff --git a/assets/test_samplesheets/no_coverage_single_reads.csv b/assets/test_samplesheets/mag_no_coverage_single_reads.csv similarity index 100% rename from assets/test_samplesheets/no_coverage_single_reads.csv rename to assets/test_samplesheets/mag_no_coverage_single_reads.csv diff --git a/assets/test_samplesheets/no_quality.csv b/assets/test_samplesheets/mag_no_quality.csv similarity index 100% rename from assets/test_samplesheets/no_quality.csv rename to assets/test_samplesheets/mag_no_quality.csv diff --git a/assets/test_samplesheets/no_rna_presence.csv b/assets/test_samplesheets/mag_no_rna_presence.csv similarity index 100% rename from assets/test_samplesheets/no_rna_presence.csv rename to assets/test_samplesheets/mag_no_rna_presence.csv diff --git a/assets/test_samplesheets/no_taxonomy.csv b/assets/test_samplesheets/mag_no_taxonomy.csv similarity index 100% rename from assets/test_samplesheets/no_taxonomy.csv rename to assets/test_samplesheets/mag_no_taxonomy.csv diff --git a/assets/test_samplesheets/one_contig.csv b/assets/test_samplesheets/mag_one_contig.csv similarity index 100% rename from assets/test_samplesheets/one_contig.csv rename to assets/test_samplesheets/mag_one_contig.csv diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf index 0a61d3f..b6d3ec3 100644 --- a/conf/test_mag_complete_metadata.conf +++ b/conf/test_mag_complete_metadata.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/complete_metadata.csv" + input = "${projectDir}/assets/test_samplesheets/mag_complete_metadata.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf index 3531dd6..c4eec06 100644 --- a/conf/test_mag_no_coverage_paired_reads.conf +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/no_coverage_paired_reads.csv" + input = "${projectDir}/assets/test_samplesheets/mag_no_coverage_paired_reads.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf index fb86a95..794798d 100644 --- a/conf/test_mag_no_coverage_single_reads.conf +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/no_coverage_single_reads.csv" + input = "${projectDir}/assets/test_samplesheets/mag_no_coverage_single_reads.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index 90d682d..f067a8b 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/no_quality.csv" + input = "${projectDir}/assets/test_samplesheets/mag_no_quality.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf index c31650d..df4796d 100644 --- a/conf/test_mag_no_rna_presence.conf +++ b/conf/test_mag_no_rna_presence.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/no_rna_presence.csv" + input = "${projectDir}/assets/test_samplesheets/mag_no_rna_presence.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf index d65964f..256b26d 100644 --- a/conf/test_mag_no_taxonomy.conf +++ b/conf/test_mag_no_taxonomy.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/no_taxonomy.csv" + input = "${projectDir}/assets/test_samplesheets/mag_no_taxonomy.csv" mode = "mags" submission_study = "PRJEB98843" diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf index 0ea695f..a792048 100644 --- a/conf/test_mag_one_contig.conf +++ b/conf/test_mag_one_contig.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/one_contig.csv" + input = "${projectDir}/assets/test_samplesheets/mag_one_contig.csv" mode = "mags" submission_study = "PRJEB98843" From 9e4f9801985a5d9eab15cc761a83ee14f38eb53e Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 18 Mar 2026 18:45:08 +0000 Subject: [PATCH 19/46] add tests for assemblysubmit workflow --- .../assembly_complete_metadata.csv | 2 + .../assembly_no_coverage_paired_reads.csv | 2 + .../test_samplesheets/assembly_one_contig.csv | 2 + conf/test_assembly_complete_metadata.config | 37 ++++++++++++++++++ ...t_assembly_no_coverage_paired_reads.config | 37 ++++++++++++++++++ conf/test_assembly_one_contig.config | 37 ++++++++++++++++++ nextflow.config | 3 ++ tests/assembly_complete_metadata.nf.test | 38 +++++++++++++++++++ .../assembly_no_coverage_paired_reads.nf.test | 38 +++++++++++++++++++ tests/assembly_one_contig.nf.test | 38 +++++++++++++++++++ 10 files changed, 234 insertions(+) create mode 100644 assets/test_samplesheets/assembly_complete_metadata.csv create mode 100644 assets/test_samplesheets/assembly_no_coverage_paired_reads.csv create mode 100644 assets/test_samplesheets/assembly_one_contig.csv create mode 100644 conf/test_assembly_complete_metadata.config create mode 100644 conf/test_assembly_no_coverage_paired_reads.config create mode 100644 conf/test_assembly_one_contig.config create mode 100644 tests/assembly_complete_metadata.nf.test create mode 100644 tests/assembly_no_coverage_paired_reads.nf.test create mode 100644 tests/assembly_one_contig.nf.test diff --git a/assets/test_samplesheets/assembly_complete_metadata.csv b/assets/test_samplesheets/assembly_complete_metadata.csv new file mode 100644 index 0000000..b0d2424 --- /dev/null +++ b/assets/test_samplesheets/assembly_complete_metadata.csv @@ -0,0 +1,2 @@ +sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version +complete_metadata,/Users/sofia/mgnify/seqsubmit/tests/data/contigs.fasta.gz,,,30,ERR000003,MEGAHIT,1.2.9 diff --git a/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv b/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv new file mode 100644 index 0000000..60f8463 --- /dev/null +++ b/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv @@ -0,0 +1,2 @@ +sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version +no_coverage_paired_reads,/Users/sofia/mgnify/seqsubmit/tests/data/contigs.fasta.gz,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_2.fastq,,ERR000001,SPAdes,3.15 diff --git a/assets/test_samplesheets/assembly_one_contig.csv b/assets/test_samplesheets/assembly_one_contig.csv new file mode 100644 index 0000000..16734ba --- /dev/null +++ b/assets/test_samplesheets/assembly_one_contig.csv @@ -0,0 +1,2 @@ +sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version +one_contig,/Users/sofia/mgnify/seqsubmit/tests/data/one_contig_assembly.fasta.gz,,,45,ERR000002,Velvet,1.2.10 diff --git a/conf/test_assembly_complete_metadata.config b/conf/test_assembly_complete_metadata.config new file mode 100644 index 0000000..da91978 --- /dev/null +++ b/conf/test_assembly_complete_metadata.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '8.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode metagenomic_assemblies complete_metadata profile' + config_profile_description = 'Single-case assembly test with complete metadata values provided' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/assembly_complete_metadata.csv" + outdir = 'test_output' + + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + ena_raw_reads_study_accession = "PRJEB65995" + library = "metagenome" + centre_name = "TEST_CENTER" + +} diff --git a/conf/test_assembly_no_coverage_paired_reads.config b/conf/test_assembly_no_coverage_paired_reads.config new file mode 100644 index 0000000..a606eca --- /dev/null +++ b/conf/test_assembly_no_coverage_paired_reads.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '8.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode metagenomic_assemblies no_coverage_paired_reads profile' + config_profile_description = 'Single-case assembly test with missing coverage and paired-end reads' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv" + outdir = 'test_output' + + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + ena_raw_reads_study_accession = "PRJEB65995" + library = "metagenome" + centre_name = "TEST_CENTER" + +} diff --git a/conf/test_assembly_one_contig.config b/conf/test_assembly_one_contig.config new file mode 100644 index 0000000..683570a --- /dev/null +++ b/conf/test_assembly_one_contig.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '8.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode metagenomic_assemblies one_contig profile' + config_profile_description = 'Single-case assembly test with one contig assembly' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test_samplesheets/assembly_one_contig.csv" + outdir = 'test_output' + + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + ena_raw_reads_study_accession = "PRJEB65995" + library = "metagenome" + centre_name = "TEST_CENTER" + +} diff --git a/nextflow.config b/nextflow.config index 787f6e1..a2cfce3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,6 +196,9 @@ profiles { test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } test_assembly { includeConfig 'conf/test_assembly.config' } + test_assembly_complete_metadata { includeConfig 'conf/test_assembly_complete_metadata.config' } + test_assembly_no_coverage_paired_reads { includeConfig 'conf/test_assembly_no_coverage_paired_reads.config' } + test_assembly_one_contig { includeConfig 'conf/test_assembly_one_contig.config' } test_full { includeConfig 'conf/test_full.config' } } diff --git a/tests/assembly_complete_metadata.nf.test b/tests/assembly_complete_metadata.nf.test new file mode 100644 index 0000000..4bd673d --- /dev/null +++ b/tests/assembly_complete_metadata.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test assembly submission workflow stub - complete_metadata" + script "../main.nf" + tag "pipeline" + tag "test_assembly_complete_metadata" + profile "test_assembly_complete_metadata" + + test("-profile test_assembly_complete_metadata") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/assembly_no_coverage_paired_reads.nf.test b/tests/assembly_no_coverage_paired_reads.nf.test new file mode 100644 index 0000000..45e7113 --- /dev/null +++ b/tests/assembly_no_coverage_paired_reads.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test assembly submission workflow stub - no_coverage_paired_reads" + script "../main.nf" + tag "pipeline" + tag "test_assembly_no_coverage_paired_reads" + profile "test_assembly_no_coverage_paired_reads" + + test("-profile test_assembly_no_coverage_paired_reads") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/assembly_one_contig.nf.test b/tests/assembly_one_contig.nf.test new file mode 100644 index 0000000..2e46730 --- /dev/null +++ b/tests/assembly_one_contig.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test assembly submission workflow stub - one_contig" + script "../main.nf" + tag "pipeline" + tag "test_assembly_one_contig" + profile "test_assembly_one_contig" + + test("-profile test_assembly_one_contig") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} From 75de363bd63cbffa764d9b285802f69bb4f60f25 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 19 Mar 2026 14:45:10 +0000 Subject: [PATCH 20/46] create output dir for metadata file if it doesn't exist --- workflows/assemblysubmit.nf | 26 ++++++++++++++++---------- workflows/genomesubmit.nf | 5 +++-- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 3e0c351..57db15e 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -138,7 +138,12 @@ workflow ASSEMBLYSUBMIT { ].join(',') def content = "${header}\n${row}" - def csv_file = file("${params.outdir}/${params.mode}/${meta.id}_assembly_metadata.csv") + + // Create output directory if it doesn't exist + def outDir = file("${params.outdir}/${params.mode}") + outDir.mkdirs() + + def csv_file = file("${outDir}/${meta.id}_assembly_metadata.csv") csv_file.text = content [meta, csv_file] @@ -209,17 +214,18 @@ workflow ASSEMBLYSUBMIT { ) ) - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) + // MULTIQC ( + // ch_multiqc_files.collect(), + // ch_multiqc_config.toList(), + // ch_multiqc_custom_config.toList(), + // ch_multiqc_logo.toList(), + // [], + // [] + // ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + // multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + multiqc_report = channel.empty() // TODO re-enable when multiqc is added back in versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index d84468a..8dc2b9b 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -221,7 +221,7 @@ workflow GENOMESUBMIT { .map { meta, fasta -> [ meta.id, - fasta, + fasta.getName(), meta.accession, meta.assembly_software, meta.binning_software, @@ -240,7 +240,8 @@ workflow GENOMESUBMIT { ].join('\t') } .collectFile( - name: "${params.outdir}/${params.mode}/genomes_metadata.csv", + name: 'genomes_metadata.csv', + storeDir: "${params.outdir}/${params.mode}", seed: [ 'genome_name', 'genome_path', From f9bd3d437d6acf4dc6fa23138f7e78d07ee6cfa9 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 19 Mar 2026 15:29:56 +0000 Subject: [PATCH 21/46] push test data and samplesheets to nf-core/test-datasets --- assets/test_samplesheets/assembly_complete_metadata.csv | 2 -- .../test_samplesheets/assembly_no_coverage_paired_reads.csv | 2 -- assets/test_samplesheets/assembly_one_contig.csv | 2 -- assets/test_samplesheets/mag_complete_metadata.csv | 2 -- assets/test_samplesheets/mag_no_coverage_paired_reads.csv | 2 -- assets/test_samplesheets/mag_no_coverage_single_reads.csv | 2 -- assets/test_samplesheets/mag_no_quality.csv | 2 -- assets/test_samplesheets/mag_no_rna_presence.csv | 2 -- assets/test_samplesheets/mag_no_taxonomy.csv | 2 -- assets/test_samplesheets/mag_one_contig.csv | 2 -- conf/test_assembly_complete_metadata.config | 4 +++- conf/test_assembly_no_coverage_paired_reads.config | 4 +++- conf/test_assembly_one_contig.config | 4 +++- conf/test_mag_complete_metadata.conf | 4 +++- conf/test_mag_no_coverage_paired_reads.conf | 4 +++- conf/test_mag_no_coverage_single_reads.conf | 4 +++- conf/test_mag_no_quality.conf | 4 +++- conf/test_mag_no_rna_presence.conf | 4 +++- conf/test_mag_no_taxonomy.conf | 6 ++++-- conf/test_mag_one_contig.conf | 4 +++- 20 files changed, 31 insertions(+), 31 deletions(-) delete mode 100644 assets/test_samplesheets/assembly_complete_metadata.csv delete mode 100644 assets/test_samplesheets/assembly_no_coverage_paired_reads.csv delete mode 100644 assets/test_samplesheets/assembly_one_contig.csv delete mode 100644 assets/test_samplesheets/mag_complete_metadata.csv delete mode 100644 assets/test_samplesheets/mag_no_coverage_paired_reads.csv delete mode 100644 assets/test_samplesheets/mag_no_coverage_single_reads.csv delete mode 100644 assets/test_samplesheets/mag_no_quality.csv delete mode 100644 assets/test_samplesheets/mag_no_rna_presence.csv delete mode 100644 assets/test_samplesheets/mag_no_taxonomy.csv delete mode 100644 assets/test_samplesheets/mag_one_contig.csv diff --git a/assets/test_samplesheets/assembly_complete_metadata.csv b/assets/test_samplesheets/assembly_complete_metadata.csv deleted file mode 100644 index b0d2424..0000000 --- a/assets/test_samplesheets/assembly_complete_metadata.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version -complete_metadata,/Users/sofia/mgnify/seqsubmit/tests/data/contigs.fasta.gz,,,30,ERR000003,MEGAHIT,1.2.9 diff --git a/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv b/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv deleted file mode 100644 index 60f8463..0000000 --- a/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version -no_coverage_paired_reads,/Users/sofia/mgnify/seqsubmit/tests/data/contigs.fasta.gz,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_2.fastq,,ERR000001,SPAdes,3.15 diff --git a/assets/test_samplesheets/assembly_one_contig.csv b/assets/test_samplesheets/assembly_one_contig.csv deleted file mode 100644 index 16734ba..0000000 --- a/assets/test_samplesheets/assembly_one_contig.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version -one_contig,/Users/sofia/mgnify/seqsubmit/tests/data/one_contig_assembly.fasta.gz,,,45,ERR000002,Velvet,1.2.10 diff --git a/assets/test_samplesheets/mag_complete_metadata.csv b/assets/test_samplesheets/mag_complete_metadata.csv deleted file mode 100644 index 62a1e40..0000000 --- a/assets/test_samplesheets/mag_complete_metadata.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -complete_metadata,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/mag_no_coverage_paired_reads.csv b/assets/test_samplesheets/mag_no_coverage_paired_reads.csv deleted file mode 100644 index 77eeb09..0000000 --- a/assets/test_samplesheets/mag_no_coverage_paired_reads.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -no_coverage_paired_reads,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_2.fastq,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/mag_no_coverage_single_reads.csv b/assets/test_samplesheets/mag_no_coverage_single_reads.csv deleted file mode 100644 index 26ac547..0000000 --- a/assets/test_samplesheets/mag_no_coverage_single_reads.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -no_coverage_single_reads,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,/Users/sofia/mgnify/seqsubmit/tests/data/fastq_1.fastq,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/mag_no_quality.csv b/assets/test_samplesheets/mag_no_quality.csv deleted file mode 100644 index 7da85be..0000000 --- a/assets/test_samplesheets/mag_no_quality.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -no_quality,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospira_eligens.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,,,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/assets/test_samplesheets/mag_no_rna_presence.csv b/assets/test_samplesheets/mag_no_rna_presence.csv deleted file mode 100644 index ca3695b..0000000 --- a/assets/test_samplesheets/mag_no_rna_presence.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -no_rna_presence,/Users/sofia/mgnify/seqsubmit/tests/data/nf-datasets/bin_lachnospiraceae.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,92.81,1.09,66.04,sediment metagenome,No,marine,cable bacteria,marine sediment,,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__Candidatus Electrothrix marina diff --git a/assets/test_samplesheets/mag_no_taxonomy.csv b/assets/test_samplesheets/mag_no_taxonomy.csv deleted file mode 100644 index bdb54ff..0000000 --- a/assets/test_samplesheets/mag_no_taxonomy.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -no_taxonomy,/Users/sofia/mgnify/seqsubmit/tests/data/GCF_000005845.2_ASM584v2_genomic.fa.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,88.71,5.04,10.04,sediment metagenome,No,marine,cable bacteria,marine sediment,No, diff --git a/assets/test_samplesheets/mag_one_contig.csv b/assets/test_samplesheets/mag_one_contig.csv deleted file mode 100644 index 32d50b8..0000000 --- a/assets/test_samplesheets/mag_one_contig.csv +++ /dev/null @@ -1,2 +0,0 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -one_contig,/Users/sofia/mgnify/seqsubmit/tests/data/one_contig_assembly.fasta.gz,SRR14332510,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ diff --git a/conf/test_assembly_complete_metadata.config b/conf/test_assembly_complete_metadata.config index da91978..032ec5f 100644 --- a/conf/test_assembly_complete_metadata.config +++ b/conf/test_assembly_complete_metadata.config @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/assembly_complete_metadata.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_complete_metadata.csv' outdir = 'test_output' mode = "metagenomic_assemblies" @@ -35,3 +35,5 @@ params { centre_name = "TEST_CENTER" } + +docker.enabled = true diff --git a/conf/test_assembly_no_coverage_paired_reads.config b/conf/test_assembly_no_coverage_paired_reads.config index a606eca..b01bb59 100644 --- a/conf/test_assembly_no_coverage_paired_reads.config +++ b/conf/test_assembly_no_coverage_paired_reads.config @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/assembly_no_coverage_paired_reads.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_no_coverage_paired_reads.csv' outdir = 'test_output' mode = "metagenomic_assemblies" @@ -35,3 +35,5 @@ params { centre_name = "TEST_CENTER" } + +docker.enabled = true diff --git a/conf/test_assembly_one_contig.config b/conf/test_assembly_one_contig.config index 683570a..cc6029f 100644 --- a/conf/test_assembly_one_contig.config +++ b/conf/test_assembly_one_contig.config @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/assembly_one_contig.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_one_contig.csv' outdir = 'test_output' mode = "metagenomic_assemblies" @@ -35,3 +35,5 @@ params { centre_name = "TEST_CENTER" } + +docker.enabled = true diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf index b6d3ec3..50a11e6 100644 --- a/conf/test_mag_complete_metadata.conf +++ b/conf/test_mag_complete_metadata.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_complete_metadata.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_complete_metadata.csv' mode = "mags" submission_study = "PRJEB98843" @@ -37,3 +37,5 @@ params { checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf index c4eec06..b1c24d6 100644 --- a/conf/test_mag_no_coverage_paired_reads.conf +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_no_coverage_paired_reads.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_coverage_paired_reads.csv' mode = "mags" submission_study = "PRJEB98843" @@ -37,3 +37,5 @@ params { checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf index 794798d..21dc09e 100644 --- a/conf/test_mag_no_coverage_single_reads.conf +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_no_coverage_single_reads.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_coverage_single_reads.csv' mode = "mags" submission_study = "PRJEB98843" @@ -37,3 +37,5 @@ params { checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index f067a8b..e1b8bef 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_no_quality.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_quality.csv' mode = "mags" submission_study = "PRJEB98843" @@ -39,3 +39,5 @@ params { checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf index df4796d..2cd4413 100644 --- a/conf/test_mag_no_rna_presence.conf +++ b/conf/test_mag_no_rna_presence.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_no_rna_presence.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_rna_presence.csv' mode = "mags" submission_study = "PRJEB98843" @@ -37,3 +37,5 @@ params { checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf index 256b26d..321ec8b 100644 --- a/conf/test_mag_no_taxonomy.conf +++ b/conf/test_mag_no_taxonomy.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_no_taxonomy.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_taxonomy.csv' mode = "mags" submission_study = "PRJEB98843" @@ -33,7 +33,9 @@ params { test_upload = true - cat_db = "${projectDir}/tests/data/cat_db/" + cat_db = params.pipelines_testdata_base_path + 'seqsubmit/test_data/small_cat_db/tax-db.tar.gz' checkm2_db = null } + +docker.enabled = true diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf index a792048..a4cb817 100644 --- a/conf/test_mag_one_contig.conf +++ b/conf/test_mag_one_contig.conf @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/test_samplesheets/mag_one_contig.csv" + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_one_contig.csv' mode = "mags" submission_study = "PRJEB98843" @@ -37,3 +37,5 @@ params { checkm2_db = null } + +docker.enabled = true From 57f22b89d5f5fa317dc124cb562f61df14c6ed23 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 19 Mar 2026 15:40:05 +0000 Subject: [PATCH 22/46] add mode-specific test tags, update .nftignore to exclude results files with unstable content --- tests/.nftignore | 14 ++++++++++++++ tests/assembly_complete_metadata.nf.test | 1 + tests/assembly_no_coverage_paired_reads.nf.test | 1 + tests/assembly_one_contig.nf.test | 1 + tests/mag_complete_metadata.nf.test | 3 ++- tests/mag_no_coverage_paired_reads.nf.test | 3 ++- tests/mag_no_coverage_single_reads.nf.test | 3 ++- tests/mag_no_quality.nf.test | 11 +++++++++-- tests/mag_no_rna_presence.nf.test | 3 ++- tests/mag_no_taxonomy.nf.test | 3 ++- tests/mag_one_contig.nf.test | 3 ++- 11 files changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index 83f7a0a..b99c781 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -8,3 +8,17 @@ multiqc/multiqc_data/llms-full.txt multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html pipeline_info/*.{html,json,txt,yml} +**_webin-cli.report +**/webin_cli/*accessions.tsv +**/MAG_upload/registered_MAGs.tsv +**/MAG_upload/registered_MAGs_test.tsv +**/bin_upload/registered_bins.tsv +**/bin_upload/registered_bins_test.tsv +**/MAG_upload/manifests/ +**/bin_upload/manifests/ +**/MAG_upload/manifests_test/* +**/bin_upload/manifests_test/* +**/MAG_upload/genome_samples.xml +**/bin_upload/genome_samples.xml +**/rna/trnascanse/*.stats +**/rna/trnascanse/*.log diff --git a/tests/assembly_complete_metadata.nf.test b/tests/assembly_complete_metadata.nf.test index 4bd673d..bad0eea 100644 --- a/tests/assembly_complete_metadata.nf.test +++ b/tests/assembly_complete_metadata.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test assembly submission workflow stub - complete_metadata" script "../main.nf" tag "pipeline" + tag "mode_assembly" tag "test_assembly_complete_metadata" profile "test_assembly_complete_metadata" diff --git a/tests/assembly_no_coverage_paired_reads.nf.test b/tests/assembly_no_coverage_paired_reads.nf.test index 45e7113..41f2b64 100644 --- a/tests/assembly_no_coverage_paired_reads.nf.test +++ b/tests/assembly_no_coverage_paired_reads.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test assembly submission workflow stub - no_coverage_paired_reads" script "../main.nf" tag "pipeline" + tag "mode_assembly" tag "test_assembly_no_coverage_paired_reads" profile "test_assembly_no_coverage_paired_reads" diff --git a/tests/assembly_one_contig.nf.test b/tests/assembly_one_contig.nf.test index 2e46730..2ce8f4e 100644 --- a/tests/assembly_one_contig.nf.test +++ b/tests/assembly_one_contig.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test assembly submission workflow stub - one_contig" script "../main.nf" tag "pipeline" + tag "mode_assembly" tag "test_assembly_one_contig" profile "test_assembly_one_contig" diff --git a/tests/mag_complete_metadata.nf.test b/tests/mag_complete_metadata.nf.test index e3c3091..9b42848 100644 --- a/tests/mag_complete_metadata.nf.test +++ b/tests/mag_complete_metadata.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow - complete_metadata" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_complete_metadata" profile "test_mag_complete_metadata" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_no_coverage_paired_reads.nf.test b/tests/mag_no_coverage_paired_reads.nf.test index 3b71f06..791428a 100644 --- a/tests/mag_no_coverage_paired_reads.nf.test +++ b/tests/mag_no_coverage_paired_reads.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow stub - no_coverage_paired_reads" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_no_coverage_paired_reads" profile "test_mag_no_coverage_paired_reads" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_no_coverage_single_reads.nf.test b/tests/mag_no_coverage_single_reads.nf.test index 0f5b3e8..4f7d22f 100644 --- a/tests/mag_no_coverage_single_reads.nf.test +++ b/tests/mag_no_coverage_single_reads.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow stub - no_coverage_single_reads" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_no_coverage_single_reads" profile "test_mag_no_coverage_single_reads" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_no_quality.nf.test b/tests/mag_no_quality.nf.test index 42ad265..62f7723 100644 --- a/tests/mag_no_quality.nf.test +++ b/tests/mag_no_quality.nf.test @@ -1,8 +1,15 @@ +// ------------------------------------------------------------------ +// This test will download real CheckM2 DB and run Diamond search, +// which is slow and requires 20+Gb of memory +// We only run this test on specific occasions and it is not included +// in the main test suite +// ------------------------------------------------------------------ nextflow_pipeline { name "Test genome submission workflow stub - no_quality" script "../main.nf" - tag "pipeline" + // tag "pipeline" + // tag "mode_mag" tag "test_mag_no_quality" profile "test_mag_no_quality" @@ -16,7 +23,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_no_rna_presence.nf.test b/tests/mag_no_rna_presence.nf.test index 68b597d..07ed498 100644 --- a/tests/mag_no_rna_presence.nf.test +++ b/tests/mag_no_rna_presence.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow stub - no_rna_presence" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_no_rna_presence" profile "test_mag_no_rna_presence" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_no_taxonomy.nf.test b/tests/mag_no_taxonomy.nf.test index be79c96..e820278 100644 --- a/tests/mag_no_taxonomy.nf.test +++ b/tests/mag_no_taxonomy.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow stub - no_taxonomy" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_no_taxonomy" profile "test_mag_no_taxonomy" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots diff --git a/tests/mag_one_contig.nf.test b/tests/mag_one_contig.nf.test index 6bdfb03..6ce83bb 100644 --- a/tests/mag_one_contig.nf.test +++ b/tests/mag_one_contig.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test genome submission workflow stub - one_contig" script "../main.nf" tag "pipeline" + tag "mode_mag" tag "test_mag_one_contig" profile "test_mag_one_contig" @@ -16,7 +17,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') // Early failure no need to test the rest of snapshots From d0b47ea78d7e6c1d9703f8081c83a6a62701924e Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 19 Mar 2026 15:50:57 +0000 Subject: [PATCH 23/46] update usage doc --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 0833bb6..db6da67 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -50,7 +50,7 @@ mag_001,data/mag_001.fasta.gz,SRR24458089,,,SPAdes 3.15.5,MetaBAT2 2.15,default, | Column | Description | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Unique identifier of this particular data entry. It is used as the alias when submitting to ENA, so it must be unique within one Webin account. | -| `fasta` | Path to MAG/bin contigs in FASTA format compressed with `gzip`. | +| `fasta` | Path to MAG/bin contigs in FASTA format compressed with `gzip`. All names of the FASTA files must be unique to prevent pipeline errors. | | `accession` | ENA accession of the run or metagenomic assembly used to generate the MAG/bin. | | `fastq_1` | Path to the read file in FASTQ format used to generate the source metagenomic assembly. Required if `genome_coverage` is not provided. | | `fastq_2` | Path to the second read file in FASTQ format for paired-end data used to generate the source metagenomic assembly. Leave empty for single-end reads. | From c3757d888672284073cf779ec9f27d878f14e4f6 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 19 Mar 2026 16:25:32 +0000 Subject: [PATCH 24/46] add more tests --- ...t_assembly_no_coverage_single_reads.config | 39 ++++++++++++++++++ ...st_mag_multiple_bins_missing_metadata.conf | 41 +++++++++++++++++++ .../assembly_no_coverage_single_reads.nf.test | 39 ++++++++++++++++++ ...mag_multiple_bins_missing_metadata.nf.test | 39 ++++++++++++++++++ 4 files changed, 158 insertions(+) create mode 100644 conf/test_assembly_no_coverage_single_reads.config create mode 100644 conf/test_mag_multiple_bins_missing_metadata.conf create mode 100644 tests/assembly_no_coverage_single_reads.nf.test create mode 100644 tests/mag_multiple_bins_missing_metadata.nf.test diff --git a/conf/test_assembly_no_coverage_single_reads.config b/conf/test_assembly_no_coverage_single_reads.config new file mode 100644 index 0000000..baca43e --- /dev/null +++ b/conf/test_assembly_no_coverage_single_reads.config @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '8.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode metagenomic_assemblies no_coverage_single_reads profile' + config_profile_description = 'Single-case assembly test with missing coverage and single-end reads' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_no_coverage_single_reads.csv' + outdir = 'test_output' + + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + ena_raw_reads_study_accession = "PRJEB65995" + library = "metagenome" + centre_name = "TEST_CENTER" + +} + +docker.enabled = true diff --git a/conf/test_mag_multiple_bins_missing_metadata.conf b/conf/test_mag_multiple_bins_missing_metadata.conf new file mode 100644 index 0000000..4d1c693 --- /dev/null +++ b/conf/test_mag_multiple_bins_missing_metadata.conf @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '16.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test --mode mags multiple_bins_missing_metadata profile' + config_profile_description = 'Multi-bin MAG test with mixed missing metadata fields' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_multiple_bins_missing_metadata.csv' + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = params.pipelines_testdata_base_path + 'seqsubmit/test_data/small_cat_db/tax-db.tar.gz' + checkm2_db = null + +} + +docker.enabled = true diff --git a/tests/assembly_no_coverage_single_reads.nf.test b/tests/assembly_no_coverage_single_reads.nf.test new file mode 100644 index 0000000..f990601 --- /dev/null +++ b/tests/assembly_no_coverage_single_reads.nf.test @@ -0,0 +1,39 @@ +nextflow_pipeline { + + name "Test assembly submission workflow stub - no_coverage_single_reads" + script "../main.nf" + tag "pipeline" + tag "mode_assembly" + tag "test_assembly_no_coverage_single_reads" + profile "test_assembly_no_coverage_single_reads" + + test("-profile test_assembly_no_coverage_single_reads") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test b/tests/mag_multiple_bins_missing_metadata.nf.test new file mode 100644 index 0000000..a5189b5 --- /dev/null +++ b/tests/mag_multiple_bins_missing_metadata.nf.test @@ -0,0 +1,39 @@ +nextflow_pipeline { + + name "Test genome submission workflow - multiple_bins_missing_metadata" + script "../main.nf" + tag "pipeline" + tag "mode_mag" + tag "test_mag_multiple_bins_missing_metadata" + profile "test_mag_multiple_bins_missing_metadata" + + test("-profile test_mag_multiple_bins_missing_metadata") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} From ef79beb907ec9fa8f993a6d3d17b9399fc3578fd Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 13:35:07 +0000 Subject: [PATCH 25/46] add snapshots for tests --- tests/assembly_complete_metadata.nf.test.snap | 30 +++++++ ...mbly_no_coverage_paired_reads.nf.test.snap | 33 ++++++++ ...mbly_no_coverage_single_reads.nf.test.snap | 33 ++++++++ tests/assembly_one_contig.nf.test.snap | 24 ++++++ tests/mag_complete_metadata.nf.test.snap | 41 ++++++++++ ...ultiple_bins_missing_metadata.nf.test.snap | 78 +++++++++++++++++++ .../mag_no_coverage_paired_reads.nf.test.snap | 47 +++++++++++ .../mag_no_coverage_single_reads.nf.test.snap | 47 +++++++++++ tests/mag_no_rna_presence.nf.test.snap | 59 ++++++++++++++ tests/mag_no_taxonomy.nf.test.snap | 46 +++++++++++ tests/mag_one_contig.nf.test.snap | 25 ++++++ 11 files changed, 463 insertions(+) create mode 100644 tests/assembly_complete_metadata.nf.test.snap create mode 100644 tests/assembly_no_coverage_paired_reads.nf.test.snap create mode 100644 tests/assembly_no_coverage_single_reads.nf.test.snap create mode 100644 tests/assembly_one_contig.nf.test.snap create mode 100644 tests/mag_complete_metadata.nf.test.snap create mode 100644 tests/mag_multiple_bins_missing_metadata.nf.test.snap create mode 100644 tests/mag_no_coverage_paired_reads.nf.test.snap create mode 100644 tests/mag_no_coverage_single_reads.nf.test.snap create mode 100644 tests/mag_no_rna_presence.nf.test.snap create mode 100644 tests/mag_no_taxonomy.nf.test.snap create mode 100644 tests/mag_one_contig.nf.test.snap diff --git a/tests/assembly_complete_metadata.nf.test.snap b/tests/assembly_complete_metadata.nf.test.snap new file mode 100644 index 0000000..616dc54 --- /dev/null +++ b/tests/assembly_complete_metadata.nf.test.snap @@ -0,0 +1,30 @@ +{ + "-profile test_assembly_complete_metadata": { + "content": [ + 3, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "metagenomic_assemblies", + "metagenomic_assemblies/complete_metadata", + "metagenomic_assemblies/complete_metadata/upload", + "metagenomic_assemblies/complete_metadata/upload/webin_cli", + "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_webin-cli.report", + "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T14:50:08.737906" + } +} \ No newline at end of file diff --git a/tests/assembly_no_coverage_paired_reads.nf.test.snap b/tests/assembly_no_coverage_paired_reads.nf.test.snap new file mode 100644 index 0000000..6e04685 --- /dev/null +++ b/tests/assembly_no_coverage_paired_reads.nf.test.snap @@ -0,0 +1,33 @@ +{ + "-profile test_assembly_no_coverage_paired_reads": { + "content": [ + 4, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "metagenomic_assemblies", + "metagenomic_assemblies/no_coverage_paired_reads", + "metagenomic_assemblies/no_coverage_paired_reads/coverage", + "metagenomic_assemblies/no_coverage_paired_reads/coverage/no_coverage_paired_reads.depth.txt", + "metagenomic_assemblies/no_coverage_paired_reads/upload", + "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli", + "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_webin-cli.report", + "metagenomic_assemblies/no_coverage_paired_reads_assembly_metadata.csv", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "no_coverage_paired_reads.depth.txt:md5,bb5f99e74d21df3c73e0ae0f388bcbcb", + "no_coverage_paired_reads_assembly_metadata.csv:md5,91a2616ccedc6bb93c2209153bec50f0" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T17:02:24.792759" + } +} \ No newline at end of file diff --git a/tests/assembly_no_coverage_single_reads.nf.test.snap b/tests/assembly_no_coverage_single_reads.nf.test.snap new file mode 100644 index 0000000..bcb6ed3 --- /dev/null +++ b/tests/assembly_no_coverage_single_reads.nf.test.snap @@ -0,0 +1,33 @@ +{ + "-profile test_assembly_no_coverage_single_reads": { + "content": [ + 4, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "metagenomic_assemblies", + "metagenomic_assemblies/no_coverage_single_reads", + "metagenomic_assemblies/no_coverage_single_reads/coverage", + "metagenomic_assemblies/no_coverage_single_reads/coverage/no_coverage_single_reads.depth.txt", + "metagenomic_assemblies/no_coverage_single_reads/upload", + "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli", + "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli/no_coverage_single_reads_webin-cli.report", + "metagenomic_assemblies/no_coverage_single_reads_assembly_metadata.csv", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "no_coverage_single_reads.depth.txt:md5,bd88282b17e851377b1dd223839be150", + "no_coverage_single_reads_assembly_metadata.csv:md5,fc1beef7e8439eaf5329e02587460009" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T17:03:02.602442" + } +} \ No newline at end of file diff --git a/tests/assembly_one_contig.nf.test.snap b/tests/assembly_one_contig.nf.test.snap new file mode 100644 index 0000000..6d11b7c --- /dev/null +++ b/tests/assembly_one_contig.nf.test.snap @@ -0,0 +1,24 @@ +{ + "-profile test_assembly_one_contig": { + "content": [ + 1, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T14:55:59.974186" + } +} \ No newline at end of file diff --git a/tests/mag_complete_metadata.nf.test.snap b/tests/mag_complete_metadata.nf.test.snap new file mode 100644 index 0000000..b6497c6 --- /dev/null +++ b/tests/mag_complete_metadata.nf.test.snap @@ -0,0 +1,41 @@ +{ + "-profile test_mag_complete_metadata": { + "content": [ + 4, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/complete_metadata", + "mags/complete_metadata/upload", + "mags/complete_metadata/upload/webin_cli", + "mags/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", + "mags/genomes_metadata.csv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,ae2b884015d1850fa63365e8a64d9fc8", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T15:47:41.671411" + } +} \ No newline at end of file diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test.snap b/tests/mag_multiple_bins_missing_metadata.nf.test.snap new file mode 100644 index 0000000..7cce2f8 --- /dev/null +++ b/tests/mag_multiple_bins_missing_metadata.nf.test.snap @@ -0,0 +1,78 @@ +{ + "-profile test_mag_multiple_bins_missing_metadata": { + "content": [ + 16, + { + "BARRNAP": { + "barrnap": 0.9 + }, + "COUNT_RNA": { + "python": "3.8.13" + }, + "COVERM_GENOME": { + "coverm": "0.7.0" + }, + "TRNASCANSE": { + "tRNAscan-SE": "2.0.12" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/genomes_metadata.csv", + "mags/no_coverage_paired_reads", + "mags/no_coverage_paired_reads/coverage", + "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", + "mags/no_coverage_paired_reads/upload", + "mags/no_coverage_paired_reads/upload/webin_cli", + "mags/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_accessions.tsv", + "mags/no_rna_presence", + "mags/no_rna_presence/rna", + "mags/no_rna_presence/rna/barrnap", + "mags/no_rna_presence/rna/barrnap/no_rna_presence_bac.gff", + "mags/no_rna_presence/rna/trnascanse", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.log", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.stats", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.tsv", + "mags/no_rna_presence/upload", + "mags/no_rna_presence/upload/webin_cli", + "mags/no_rna_presence/upload/webin_cli/no_rna_presence_accessions.tsv", + "mags/no_taxonomy", + "mags/no_taxonomy/taxonomy", + "mags/no_taxonomy/taxonomy/no_taxonomy.bin2classification.txt", + "mags/no_taxonomy/taxonomy/no_taxonomy.txt", + "mags/no_taxonomy/upload", + "mags/no_taxonomy/upload/webin_cli", + "mags/no_taxonomy/upload/webin_cli/no_taxonomy_accessions.tsv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,a75d1d35c762863c487f010f6a000910", + "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", + "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", + "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", + "no_taxonomy.bin2classification.txt:md5,e24c109efe807c7044a866999fd736f1", + "no_taxonomy.txt:md5,0f2082d3e397fd630d605dd60993b131", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T17:12:22.739259" + } +} \ No newline at end of file diff --git a/tests/mag_no_coverage_paired_reads.nf.test.snap b/tests/mag_no_coverage_paired_reads.nf.test.snap new file mode 100644 index 0000000..29c10f5 --- /dev/null +++ b/tests/mag_no_coverage_paired_reads.nf.test.snap @@ -0,0 +1,47 @@ +{ + "-profile test_mag_no_coverage_paired_reads": { + "content": [ + 5, + { + "COVERM_GENOME": { + "coverm": "0.7.0" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/genomes_metadata.csv", + "mags/no_coverage_paired_reads", + "mags/no_coverage_paired_reads/coverage", + "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", + "mags/no_coverage_paired_reads/upload", + "mags/no_coverage_paired_reads/upload/webin_cli", + "mags/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_accessions.tsv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,0538b1aec26168b35a416bd995b0a4a9", + "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T17:13:06.044172" + } +} \ No newline at end of file diff --git a/tests/mag_no_coverage_single_reads.nf.test.snap b/tests/mag_no_coverage_single_reads.nf.test.snap new file mode 100644 index 0000000..cd10aa2 --- /dev/null +++ b/tests/mag_no_coverage_single_reads.nf.test.snap @@ -0,0 +1,47 @@ +{ + "-profile test_mag_no_coverage_single_reads": { + "content": [ + 5, + { + "COVERM_GENOME": { + "coverm": "0.7.0" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/genomes_metadata.csv", + "mags/no_coverage_single_reads", + "mags/no_coverage_single_reads/coverage", + "mags/no_coverage_single_reads/coverage/no_coverage_single_reads.tsv", + "mags/no_coverage_single_reads/upload", + "mags/no_coverage_single_reads/upload/webin_cli", + "mags/no_coverage_single_reads/upload/webin_cli/no_coverage_single_reads_accessions.tsv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,1f56050125362470f351ed99065af980", + "no_coverage_single_reads.tsv:md5,3791d9be880cbfacbc527e47623e3c9a", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T17:13:44.562959" + } +} \ No newline at end of file diff --git a/tests/mag_no_rna_presence.nf.test.snap b/tests/mag_no_rna_presence.nf.test.snap new file mode 100644 index 0000000..dd7e479 --- /dev/null +++ b/tests/mag_no_rna_presence.nf.test.snap @@ -0,0 +1,59 @@ +{ + "-profile test_mag_no_rna_presence": { + "content": [ + 7, + { + "BARRNAP": { + "barrnap": 0.9 + }, + "COUNT_RNA": { + "python": "3.8.13" + }, + "TRNASCANSE": { + "tRNAscan-SE": "2.0.12" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/genomes_metadata.csv", + "mags/no_rna_presence", + "mags/no_rna_presence/rna", + "mags/no_rna_presence/rna/barrnap", + "mags/no_rna_presence/rna/barrnap/no_rna_presence_bac.gff", + "mags/no_rna_presence/rna/trnascanse", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.log", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.stats", + "mags/no_rna_presence/rna/trnascanse/no_rna_presence.tsv", + "mags/no_rna_presence/upload", + "mags/no_rna_presence/upload/webin_cli", + "mags/no_rna_presence/upload/webin_cli/no_rna_presence_accessions.tsv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,0f567491f038d4a0c1dbf4c05cdc26c0", + "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", + "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T16:30:50.102158" + } +} \ No newline at end of file diff --git a/tests/mag_no_taxonomy.nf.test.snap b/tests/mag_no_taxonomy.nf.test.snap new file mode 100644 index 0000000..49244fa --- /dev/null +++ b/tests/mag_no_taxonomy.nf.test.snap @@ -0,0 +1,46 @@ +{ + "-profile test_mag_no_taxonomy": { + "content": [ + 8, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/genomes_metadata.csv", + "mags/no_taxonomy", + "mags/no_taxonomy/taxonomy", + "mags/no_taxonomy/taxonomy/no_taxonomy.bin2classification.txt", + "mags/no_taxonomy/taxonomy/no_taxonomy.txt", + "mags/no_taxonomy/upload", + "mags/no_taxonomy/upload/webin_cli", + "mags/no_taxonomy/upload/webin_cli/no_taxonomy_accessions.tsv", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,b1d01539d787b77e30b9bacd5b23d51f", + "no_taxonomy.bin2classification.txt:md5,e24c109efe807c7044a866999fd736f1", + "no_taxonomy.txt:md5,0f2082d3e397fd630d605dd60993b131", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T15:57:14.366463" + } +} \ No newline at end of file diff --git a/tests/mag_one_contig.nf.test.snap b/tests/mag_one_contig.nf.test.snap new file mode 100644 index 0000000..f2f3324 --- /dev/null +++ b/tests/mag_one_contig.nf.test.snap @@ -0,0 +1,25 @@ +{ + "-profile test_mag_one_contig": { + "content": [ + 2, + { + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-19T12:28:15.857999" + } +} \ No newline at end of file From 4fc0b10391f75db7d6b3a20666a97c613173d55f Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 13:35:43 +0000 Subject: [PATCH 26/46] pdate container for webin-cli-wrapper --- modules/local/ena_webin_cli_wrapper/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index 3964efc..df9f97a 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -5,7 +5,7 @@ process ENA_WEBIN_CLI_WRAPPER { label 'process_low' tag "${meta.id}" - container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.20" + container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.21" input: tuple val(meta), path(submission_item), path(manifest) From 59f14ab38b316d168c66aef1498eb571a0cb0845 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 13:36:31 +0000 Subject: [PATCH 27/46] add test profiles import to nextflow.config --- nextflow.config | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/nextflow.config b/nextflow.config index a2cfce3..f7cc8c9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -186,20 +186,22 @@ profiles { singularity.runOptions = '--nv' } // TODO: figure out how to better orginise tests for different workflow types (bins, mags, metagenomic_assemblies) - test { includeConfig 'conf/test.config' } - test_genome { includeConfig 'conf/test_genome.config' } - test_mag_complete_metadata { includeConfig 'conf/test_mag_complete_metadata.conf' } - test_mag_no_coverage_single_reads { includeConfig 'conf/test_mag_no_coverage_single_reads.conf' } - test_mag_no_coverage_paired_reads { includeConfig 'conf/test_mag_no_coverage_paired_reads.conf' } - test_mag_no_quality { includeConfig 'conf/test_mag_no_quality.conf' } - test_mag_one_contig { includeConfig 'conf/test_mag_one_contig.conf' } - test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } - test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } - test_assembly { includeConfig 'conf/test_assembly.config' } - test_assembly_complete_metadata { includeConfig 'conf/test_assembly_complete_metadata.config' } - test_assembly_no_coverage_paired_reads { includeConfig 'conf/test_assembly_no_coverage_paired_reads.config' } - test_assembly_one_contig { includeConfig 'conf/test_assembly_one_contig.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_genome { includeConfig 'conf/test_genome.config' } + test_assembly { includeConfig 'conf/test_assembly.config' } + test_mag_complete_metadata { includeConfig 'conf/test_mag_complete_metadata.conf' } + test_mag_no_coverage_single_reads { includeConfig 'conf/test_mag_no_coverage_single_reads.conf' } + test_mag_no_coverage_paired_reads { includeConfig 'conf/test_mag_no_coverage_paired_reads.conf' } + test_mag_no_quality { includeConfig 'conf/test_mag_no_quality.conf' } + test_mag_one_contig { includeConfig 'conf/test_mag_one_contig.conf' } + test_mag_multiple_bins_missing_metadata { includeConfig 'conf/test_mag_multiple_bins_missing_metadata.conf' } + test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } + test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } + test_assembly_complete_metadata { includeConfig 'conf/test_assembly_complete_metadata.config' } + test_assembly_no_coverage_single_reads { includeConfig 'conf/test_assembly_no_coverage_single_reads.config' } + test_assembly_no_coverage_paired_reads { includeConfig 'conf/test_assembly_no_coverage_paired_reads.config' } + test_assembly_one_contig { includeConfig 'conf/test_assembly_one_contig.config' } } // Load nf-core custom profiles from different institutions From ea385d7b25304c4c276d47a4086a2396cd1bfe92 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 15:19:17 +0000 Subject: [PATCH 28/46] replace webin-cli with webin_cli_wrapper in assemblysubmit workflow --- modules/local/ena_webin_cli/main.nf | 61 ------------------- modules/local/ena_webin_cli/meta.yml | 48 --------------- modules/local/ena_webin_cli_wrapper/main.nf | 5 +- tests/assembly_complete_metadata.nf.test.snap | 6 +- ...mbly_no_coverage_paired_reads.nf.test.snap | 6 +- ...mbly_no_coverage_single_reads.nf.test.snap | 6 +- tests/assembly_one_contig.nf.test.snap | 4 +- workflows/assemblysubmit.nf | 32 ++++++---- 8 files changed, 33 insertions(+), 135 deletions(-) delete mode 100644 modules/local/ena_webin_cli/main.nf delete mode 100644 modules/local/ena_webin_cli/meta.yml diff --git a/modules/local/ena_webin_cli/main.nf b/modules/local/ena_webin_cli/main.nf deleted file mode 100644 index 25b12f4..0000000 --- a/modules/local/ena_webin_cli/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -process ENA_WEBIN_CLI { - tag "$meta.id" - label 'process_low' - - container "quay.io/biocontainers/ena-webin-cli:9.0.1--hdfd78af_1" - - stageInMode 'copy' - secret 'WEBIN_ACCOUNT' - secret 'WEBIN_PASSWORD' - - input: - tuple val(meta), path(submission_item), path(manifest) - - output: - tuple val(meta), path("*webin-cli.report"), emit: webin_report - tuple val(meta), env('STATUS') , emit: upload_status - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def mode = params.test_upload ? "-test" : "" - def submit_or_validate = params.webincli_submit ? "-submit": "-validate" - - """ - # change FASTA path in manifest to current workdir - export ITEM_FULL_PATH=\$(readlink -f ${submission_item}) - sed 's|^FASTA\t.*|FASTA\t'"\${ITEM_FULL_PATH}"'|g' ${manifest} > ${prefix}_updated_manifest.manifest - - ena-webin-cli \\ - -context=genome \\ - -manifest=${prefix}_updated_manifest.manifest \\ - -userName="\${WEBIN_ACCOUNT}" \\ - -password="\${WEBIN_PASSWORD}" \\ - ${submit_or_validate} \\ - ${mode} - - mv webin-cli.report "${prefix}_webin-cli.report" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ena-webin-cli: \$(ena-webin-cli -version 2>&1 ) - END_VERSIONS - - # status check - if grep -q "submission has been completed successfully" "${prefix}_webin-cli.report"; then - # first time submission completed successfully - export STATUS="success" - true - elif grep -q "object being added already exists in the submission account with accession" "${prefix}_webin-cli.report"; then - # there was attempt to re-submit already submitted genome - export STATUS="success" - true - else - export STATUS="failed" - false - fi - """ -} diff --git a/modules/local/ena_webin_cli/meta.yml b/modules/local/ena_webin_cli/meta.yml deleted file mode 100644 index 53a914f..0000000 --- a/modules/local/ena_webin_cli/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: ena_webin_cli -description: ENA data submission tool using Webin account details -keywords: - - ena - - submission - - upload - - webin -tools: - - ena_webin_cli: - description: | - Data submissions to ENA can be made using the Webin command line submission interface (Webin-CLI). Webin submission account credentials are required to use the program. - documentation: https://github.com/enasequence/webin-cli - licence: ["Apache License"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information. - - submission_item: - type: file - description: | - Target FASTA file for submission (mag/bin/assembly) - - manifest: - type: file - description: | - Submission manifest - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - webin_report: - type: file - description: Submission report - pattern: "*webin-cli.report" - - STATUS: - type: bool - description: Submission status success(true) or failed(false - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@KateSakharova" - - "@ochkalova" diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index df9f97a..7918ce1 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -6,6 +6,7 @@ process ENA_WEBIN_CLI_WRAPPER { label 'process_low' tag "${meta.id}" container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.21" + stageInMode 'copy' input: tuple val(meta), path(submission_item), path(manifest) @@ -18,7 +19,7 @@ process ENA_WEBIN_CLI_WRAPPER { script: def args = task.ext.args ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - def mode = params.test_upload ? "--test" : "" + def test_flag = params.test_upload ? "--test" : "" def submit_or_validate = params.webincli_submit ? "--mode submit": "--mode validate" """ @@ -31,7 +32,7 @@ process ENA_WEBIN_CLI_WRAPPER { -o ${prefix}_accessions.tsv \\ --webin-cli-jar ${webin_cli_jar} \\ ${submit_or_validate} \\ - ${mode} \\ + ${test_flag} \\ ${args} cat <<-END_VERSIONS > versions.yml diff --git a/tests/assembly_complete_metadata.nf.test.snap b/tests/assembly_complete_metadata.nf.test.snap index 616dc54..a209618 100644 --- a/tests/assembly_complete_metadata.nf.test.snap +++ b/tests/assembly_complete_metadata.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_complete_metadata": { "content": [ - 3, + 4, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -12,7 +12,7 @@ "metagenomic_assemblies/complete_metadata", "metagenomic_assemblies/complete_metadata/upload", "metagenomic_assemblies/complete_metadata/upload/webin_cli", - "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_webin-cli.report", + "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" @@ -25,6 +25,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T14:50:08.737906" + "timestamp": "2026-03-20T15:12:31.987836" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_paired_reads.nf.test.snap b/tests/assembly_no_coverage_paired_reads.nf.test.snap index 6e04685..ec2957e 100644 --- a/tests/assembly_no_coverage_paired_reads.nf.test.snap +++ b/tests/assembly_no_coverage_paired_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_no_coverage_paired_reads": { "content": [ - 4, + 5, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -14,7 +14,7 @@ "metagenomic_assemblies/no_coverage_paired_reads/coverage/no_coverage_paired_reads.depth.txt", "metagenomic_assemblies/no_coverage_paired_reads/upload", "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli", - "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_webin-cli.report", + "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_accessions.tsv", "metagenomic_assemblies/no_coverage_paired_reads_assembly_metadata.csv", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" @@ -28,6 +28,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T17:02:24.792759" + "timestamp": "2026-03-20T15:13:14.194671" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_single_reads.nf.test.snap b/tests/assembly_no_coverage_single_reads.nf.test.snap index bcb6ed3..dff4302 100644 --- a/tests/assembly_no_coverage_single_reads.nf.test.snap +++ b/tests/assembly_no_coverage_single_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_no_coverage_single_reads": { "content": [ - 4, + 5, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -14,7 +14,7 @@ "metagenomic_assemblies/no_coverage_single_reads/coverage/no_coverage_single_reads.depth.txt", "metagenomic_assemblies/no_coverage_single_reads/upload", "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli", - "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli/no_coverage_single_reads_webin-cli.report", + "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli/no_coverage_single_reads_accessions.tsv", "metagenomic_assemblies/no_coverage_single_reads_assembly_metadata.csv", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" @@ -28,6 +28,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T17:03:02.602442" + "timestamp": "2026-03-20T15:13:50.079728" } } \ No newline at end of file diff --git a/tests/assembly_one_contig.nf.test.snap b/tests/assembly_one_contig.nf.test.snap index 6d11b7c..1f21b2b 100644 --- a/tests/assembly_one_contig.nf.test.snap +++ b/tests/assembly_one_contig.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_one_contig": { "content": [ - 1, + 2, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -19,6 +19,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T14:55:59.974186" + "timestamp": "2026-03-20T15:14:00.09396" } } \ No newline at end of file diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 57db15e..9130073 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -4,17 +4,18 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { COVERM_CONTIG } from '../modules/nf-core/coverm/contig/main' -include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' -include { GENERATE_ASSEMBLY_MANIFEST } from '../modules/local/generate_assembly_manifest/main' -include { REGISTERSTUDY } from '../modules/local/registerstudy/main' -include { ENA_WEBIN_CLI } from '../modules/local/ena_webin_cli' - -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' +include { COVERM_CONTIG } from '../modules/nf-core/coverm/contig/main' +include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' +include { GENERATE_ASSEMBLY_MANIFEST } from '../modules/local/generate_assembly_manifest/main' +include { REGISTERSTUDY } from '../modules/local/registerstudy/main' +include { ENA_WEBIN_CLI_WRAPPER as SUBMIT } from '../modules/local/ena_webin_cli_wrapper' +include { ENA_WEBIN_CLI_DOWNLOAD } from '../modules/local/ena_webin_cli_download' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqsubmit_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -167,8 +168,13 @@ workflow ASSEMBLYSUBMIT { study_accession_ch.first() ) - ENA_WEBIN_CLI( - assemblies_with_coverage.join(GENERATE_ASSEMBLY_MANIFEST.out.manifest) + ENA_WEBIN_CLI_DOWNLOAD ( + params.webin_cli_version + ) + + SUBMIT ( + assemblies_with_coverage.join(GENERATE_ASSEMBLY_MANIFEST.out.manifest), + ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar ) // From b6dfc78d46dcb16c65d2988cfdccc6a32b9b51c6 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 15:25:52 +0000 Subject: [PATCH 29/46] remove echoed credentials --- modules/local/genome_upload/main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/local/genome_upload/main.nf b/modules/local/genome_upload/main.nf index f91d369..6d88fd3 100644 --- a/modules/local/genome_upload/main.nf +++ b/modules/local/genome_upload/main.nf @@ -27,9 +27,6 @@ process GENOME_UPLOAD { def mode = (!params.test_upload) ? "--live" : "" """ - echo ${ENA_WEBIN} - echo ${ENA_WEBIN_PASSWORD} - genome_upload \\ -u $params.submission_study \\ --genome_info ${table_for_upload} \\ From a47409ab9ce66386d95acb92c01d1e4fca163bf6 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 16:35:07 +0000 Subject: [PATCH 30/46] update docs --- CITATIONS.md | 34 ++++++++++++++++++ README.md | 34 +++++++++++------- assets/samplesheet_genomes.csv | 6 ++-- assets/schema_input_genome.json | 1 + docs/output.md | 63 ++++++++++++++++++++++----------- docs/usage.md | 26 ++++++++++++-- nextflow_schema.json | 8 ++--- workflows/genomesubmit.nf | 4 +-- 8 files changed, 132 insertions(+), 44 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 8934342..030338b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,6 +14,40 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [CoverM](https://github.com/wwood/CoverM) + +> Aroney ST, Newell RJ, Nissen JN, Camargo AP, Tyson GW, Woodcroft BJ. CoverM: Read alignment statistics for metagenomics. Bioinformatics. 2025;41(4):btaf147. doi: 10.1093/bioinformatics/btaf147. + +- [CheckM2](https://github.com/chklovski/CheckM2) + +> Chklovski A, Parks DH, Woodcroft BJ, Tyson GW. CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning. Nat Methods. 2023;20(8):1203-1212. doi: 10.1038/s41592-023-01940-w. + +- [CAT and BAT](https://doi.org/10.1186/s13059-019-1817-x) + +> von Meijenfeldt FAB, Arkhipova K, Cambuy DD, Coutinho FH, Dutilh BE. Robust taxonomic classification of uncharted microbial sequences and bins with CAT and BAT. Genome Biol. 2019;20(1):217. doi: 10.1186/s13059-019-1817-x. + +- [tRNAscan-SE 2.0](https://doi.org/10.1093/nar/gkab688) + +> Chan PP, Lin BY, Mak AJ, Lowe TM. tRNAscan-SE 2.0: Improved detection and functional classification of transfer RNA genes. Nucleic Acids Res. 2021;49(16):9077-9096. doi: 10.1093/nar/gkab688. + +- [barrnap](https://github.com/tseemann/barrnap) + + > Seemann T. Barrnap: rapid ribosomal RNA prediction. GitHub repository. https://github.com/tseemann/barrnap + +## Submission and helper tools + +- [ENA Webin-CLI](https://github.com/enasequence/webin-cli) + + > European Nucleotide Archive. Webin command line submission interface (Webin-CLI). GitHub repository. https://github.com/enasequence/webin-cli + +- [assembly_uploader](https://github.com/EBI-Metagenomics/assembly_uploader) + + > EBI Metagenomics. ENA Metagenome Assembly uploader. GitHub repository. https://github.com/EBI-Metagenomics/assembly_uploader + +- [genome_uploader](https://github.com/EBI-Metagenomics/genome_uploader) + + > EBI Metagenomics. ENA public Bins and MAGs uploader. GitHub repository. https://github.com/EBI-Metagenomics/genome_uploader + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 4efe08a..c102e5a 100644 --- a/README.md +++ b/README.md @@ -55,33 +55,39 @@ The input must follow `assets/schema_input_genome.json`. Required columns: - `sample` -- `fasta` (must end with `.fa.gz` or `.fasta.gz`) +- `fasta` (must end with `.fa.gz`, `.fasta.gz`, or `.fna.gz`) - `accession` - `assembly_software` - `binning_software` - `binning_parameters` -- `stats_generation_software` - `metagenome` - `environmental_medium` - `broad_environment` - `local_environment` - `co-assembly` -Columns that required for now, but will be optional in the nearest future: +At least one of the following must be provided per row: + +- reads (`fastq_1`, optional `fastq_2` for paired-end) +- `genome_coverage` + +Additional supported columns: +- `stats_generation_software` - `completeness` - `contamination` -- `genome_coverage` - `RNA_presence` - `NCBI_lineage` -Those fields are metadata required for [genome_uploader](https://github.com/EBI-Metagenomics/genome_uploader) package. +If `genome_coverage`, `stats_generation_software`, `completeness`, `contamination`, `RNA_presence`, or `NCBI_lineage` are missing, the workflow can calculate or infer them when the required inputs are available. + +Those fields are metadata required for the [genome_uploader](https://github.com/EBI-Metagenomics/genome_uploader) package. -Example `samplesheet_genome.csv`: +Example `samplesheet_genomes.csv`: ```csv -sample,fasta,accession,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage -lachnospira_eligens,data/bin_lachnospira_eligens.fa.gz,SRR24458089,spades_v3.15.5,metabat2_v2.6,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable_bacteria,marine_sediment,No,d__Bacteria;p__Proteobacteria;s_unclassified_Proteobacteria +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +lachnospira_eligens,data/bin_lachnospira_eligens.fa.gz,SRR24458089,,,spades_v3.15.5,metabat2_v2.6,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable_bacteria,marine_sediment,No,d__Bacteria;p__Proteobacteria;s__unclassified_Proteobacteria ``` ### `metagenomic_assemblies` mode (`ASSEMBLYSUBMIT`) @@ -91,7 +97,7 @@ The input must follow `assets/schema_input_assembly.json`. Required columns: - `sample` -- `fasta` (must end with `.fa.gz` or `.fasta.gz`) +- `fasta` (must end with `.fa.gz`, `.fasta.gz`, or `.fna.gz`) - `run_accession` - `assembler` - `assembler_version` @@ -116,6 +122,10 @@ assembly_2,data/contigs_2.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9 > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. +### Database setup (`CheckM2` and `CAT_pack`) + +The `mags`/`bins` workflow requires databases for completeness/contamination estimation and taxonomy assignment. See [Usage documentation](usage.md) for details. + ### Required parameters: | Parameter | Description | @@ -131,7 +141,7 @@ assembly_2,data/contigs_2.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9 | Parameter | Description | | ------------------- | ---------------------------------------------------------------------------------------- | | `--upload_tpa` | Flag to control the type of assembly study (third party assembly or not). Default: false | -| `--test_upload` | Upload to TEST ENA server instead of LIVE. Default: false | +| `--test_upload` | Upload to TEST ENA server instead of LIVE. Default: true | | `--webincli_submit` | If set to false, submissions will be validated, but not submitted. Default: true | General command template: @@ -196,8 +206,8 @@ For more details and further functionality, please refer to the [usage documenta Key output locations in `--outdir`: -- `upload/manifests/`: generated manifest files for submission -- `upload/webin_cli/`: ENA Webin CLI reports +- `mags/` or `bins/`: genome metadata, manifests, and per-sample submission support files +- `metagenomic_assemblies/`: assembly metadata CSVs and per-sample coverage files - `multiqc/`: MultiQC summary report - `pipeline_info/`: execution reports, trace, DAG, and software versions diff --git a/assets/samplesheet_genomes.csv b/assets/samplesheet_genomes.csv index 44a8138..4db4f00 100644 --- a/assets/samplesheet_genomes.csv +++ b/assets/samplesheet_genomes.csv @@ -1,3 +1,3 @@ -sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,rRNA_presence,NCBI_lineage -lachnospira_eligens,https://github.com/nf-core/test-datasets/raw/seqsubmit/test_data/bins/bin_lachnospira_eligens.fa.gz,SRR24458089,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,False,marine,cable bacteria,marine sediment,False,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ -lachnospiraceae,https://github.com/nf-core/test-datasets/raw/seqsubmit/test_data/bins/bin_lachnospiraceae.fa.gz,SRR24458087,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,92.81,1.09,66.04,sediment metagenome,False,marine,cable bacteria,marine sediment,False,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__Candidatus Electrothrix marina +sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage +lachnospira_eligens,https://github.com/nf-core/test-datasets/raw/seqsubmit/test_data/bins/bin_lachnospira_eligens.fa.gz,SRR24458089,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__ +lachnospiraceae,https://github.com/nf-core/test-datasets/raw/seqsubmit/test_data/bins/bin_lachnospiraceae.fa.gz,SRR24458087,,,spades_v3.15.5,mags_v1,default,CheckM2_v1.0.1,92.81,1.09,66.04,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfobacterales;f__Desulfobulbaceae;g__Candidatus Electrothrix;s__Candidatus Electrothrix marina diff --git a/assets/schema_input_genome.json b/assets/schema_input_genome.json index 622b9ab..f851f85 100644 --- a/assets/schema_input_genome.json +++ b/assets/schema_input_genome.json @@ -117,6 +117,7 @@ "required": [ "sample", "fasta", + "accession", "assembly_software", "co-assembly", "binning_software", diff --git a/docs/output.md b/docs/output.md index ae97140..8cdc8c5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,46 +2,67 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the files currently published into `--outdir` by `nf-core/seqsubmit`. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +The exact directory structure depends on the selected `--mode`. - +## Common outputs -## Pipeline overview +### `pipeline_info/` -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +The pipeline writes general run metadata to `pipeline_info/` for all modes. -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +
+Output files -### MultiQC +- `pipeline_info/` + - `execution_report_.html`: Nextflow execution report. + - `execution_timeline_.html`: Nextflow execution timeline. + - `execution_trace_.txt`: task-level trace table. + - `pipeline_dag_.html`: rendered workflow DAG. + - `params_.json`: parameter snapshot written at pipeline start. + - `nf_core_seqsubmit_software_mqc_versions.yml`: collated software versions gathered during the run. + - `pipeline_report.html`: optional summary report generated when `--email` or `--email_on_fail` is used. + - `pipeline_report.txt`: text version of the optional summary report. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides the execution report, timeline, trace, and DAG outputs. These files are the main resources for troubleshooting failed runs and reviewing resource usage. + +## `mags` and `bins` outputs + +When `--mode mags` or `--mode bins` is used, results are written under `mags/` or `bins/`.
Output files -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `/` + - `genomes_metadata.csv`: tabular metadata assembled for submission. + - `upload/manifests/`: manifests generated by `genome_uploader`. + - `databases/checkm2/`: downloaded CheckM2 database file (when downloaded during the run). + - `databases/cat_pack/`: prepared CAT_pack database directories (when generated during the run). + - `/coverage/`: `coverm genome` output for samples where `genome_coverage` had to be computed. + - `/rna/barrnap/`: `barrnap` output GFF for samples where `RNA_presence` had to be inferred. + - `/rna/trnascanse/`: `tRNAscan-SE` outputs for the same RNA inference step. + - `/taxonomy/`: CAT/BAT taxonomic classification for samples where `NCBI_lineage` was missing. + - `/checkm2/`: CheckM2 reports for samples where completeness/contamination metadata had to be computed. + - `/upload/webin_cli/`: Webin-CLI wrapper output TSV with accessions for submitted genomes.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +Many of these per-sample directories are conditional. For example, if `genome_coverage` is already provided in the samplesheet, the corresponding `coverage/` directory will not be created for that sample. -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +## `metagenomic_assemblies` outputs -### Pipeline information +When `--mode metagenomic_assemblies` is used, results are written under `metagenomic_assemblies/`.
Output files -- `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - - Parameters used by the pipeline run: `params.json`. +- `metagenomic_assemblies/` + - `_assembly_metadata.csv`: per-assembly metadata CSV generated for manifest creation. + - `/coverage/`: `coverm contig` output for samples where `coverage` had to be calculated from reads.
-[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. +Assembly study registration, manifest generation, and Webin-CLI submission are executed by the workflow, but their intermediate outputs are not currently published into `--outdir` by the pipeline. diff --git a/docs/usage.md b/docs/usage.md index db6da67..1ac66b4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -24,8 +24,8 @@ Before running the pipeline, make sure that: Set your Webin credentials as Nextflow secrets: ```bash -nextflow secrets set WEBIN_ACCOUNT "Webin-XXX" -nextflow secrets set WEBIN_PASSWORD "XXX" +nextflow secrets set ENA_WEBIN "Webin-XXX" +nextflow secrets set ENA_WEBIN_PASSWORD "XXX" ``` ## Samplesheet input @@ -99,6 +99,28 @@ assembly_002,data/assembly_002.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9 An example file is available at [assets/samplesheet_assembly.csv](../assets/samplesheet_assembly.csv). +## Database preparation (`mags` / `bins`) + +The `GENOMESUBMIT` workflow uses `CheckM2` and `CAT_pack` that require specialized databases for completeness/contamination assessment and taxonomy assignment. + +You can either provide pre-existing databases or let the pipeline prepare them during execution. + +- `CheckM2`: + - provide the path to local database with `--checkm2_db`, otherwise the pipeline downloads version specified with `--checkm2_db_zenodo_id` (by default `14897628`). + +- `CAT_pack`: + - provide the path to local database (containing `tax/` and `db/` folders or `tar.gz` archive) with `--cat_db`, otherwise the pipeline constructs version specified with `--cat_db_download_id` (by default `nr`). + +See [CAT_pack documentation](https://github.com/MGXlab/CAT_pack) and [CheckM2 documentation](https://github.com/chklovski/CheckM2) for more details on usage and creation of databases. + +> [!IMPORTANT] +> `CAT_pack` database creation can take significant time. +> +> Reusing an existing database is strongly recommended for repeated runs. +> +> Databases created/downloaded by the pipeline are published under: +> `${params.outdir}/databases/` + ## Running the pipeline General command template: diff --git a/nextflow_schema.json b/nextflow_schema.json index 3d4182a..ac65902 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -51,7 +51,7 @@ "properties": { "rrna_limit": { "type": "number", - "description": "Minimum number of rRNA.", + "description": "Minimum percentage of 16S, 23S, and 5S rRNA gene length recovered to count the gene as present.", "default": 80, "fa_icon": "fas fa-users-cog" }, @@ -72,7 +72,7 @@ "properties": { "cat_db": { "type": "string", - "description": "Path to CAT_pack DB", + "description": "Path to local CAT_pack DB", "help": "Database should be pre-downloaded using CATpack commands. The folder with database should contain sub-folders 'db' with files .dmnd, .fastaid2LCAtaxid, .taxids_with_multiple_offspring, etc. And subfolder 'tax' that should contain taxonomy files like .dmp, .taxdump.tar.gz, .prot.accession2taxid.FULL.gz, etc", "fa_icon": "fas fa-users-cog" }, @@ -294,9 +294,9 @@ }, "upload_force": { "type": "boolean", - "description": "Flag to control upload retries", + "description": "Enables force mode for genome_uploader (used for MAGs/BINs submission)", "default": true, - "help": "Forces reset of sample xmls generation. This is useful if you changed something in your tsv table, or if ENA metadata haven't been downloaded correctly (you can check this in ENA_backup.json). Default: true" + "help": "Forces reset of bin/MAG sample xmls generation. This is useful if you changed something in your tsv table, or if ENA metadata haven't been downloaded correctly (you can check this in ENA_backup.json). Default: true" }, "submission_study": { "type": "string", diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 8dc2b9b..db08a2e 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -276,8 +276,8 @@ workflow GENOMESUBMIT { manifests_ch = CREATE_MANIFESTS.out.manifests.flatten() .map { manifest -> def prefix = params.test_upload ? - manifest.name.replaceAll(/_\d+\.manifest$/, '') : - manifest.name.replaceAll(/\.manifest$/, '') + manifest.name.replaceAll(/_\d+\.manifest$/, '') : // Remove extension and hash suffix appended in test mode + manifest.name.replaceAll(/\.manifest$/, '') // Remove only extension in live mode def meta = [id: prefix] [ meta, manifest ] } From 707becf1bad22a2bbf067f5980caef55cb315cab Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 16:35:44 +0000 Subject: [PATCH 31/46] remove webin-cli from modules.config, add publishDirs for databases --- conf/modules.config | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f4b0324..5d3204b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -33,6 +33,23 @@ process { ] } + withName: 'CATPACK_PREPARE' { + publishDir = [ + [ + path: { "${params.outdir}/databases/cat_pack/" }, + mode: params.publish_dir_mode, + pattern: '*/db/', + saveAs: { filename -> "db" } + ], + [ + path: { "${params.outdir}/databases/cat_pack/" }, + mode: params.publish_dir_mode, + pattern: '*/tax/', + saveAs: { filename -> "tax" } + ] + ] +} + // // RNA DETECTION SUBWORKFLOW // @@ -65,6 +82,14 @@ process { ] } + withName: 'CHECKM2_DATABASEDOWNLOAD' { + publishDir = [ + path: { "${params.outdir}/databases/checkm2/" }, + mode: params.publish_dir_mode, + pattern: '*.dmnd' + ] + } + // // COVERAGE CALCULATION // @@ -98,14 +123,6 @@ process { ] } - withName: 'ENA_WEBIN_CLI' { - publishDir = [ - path: { "${params.outdir}/${params.mode}/${meta.id}/upload/webin_cli" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'ENA_WEBIN_CLI_WRAPPER' { ext.args = { params.mode == 'mags' || params.mode == 'bins' || params.mode == 'metagenomic_assemblies' ? "--context genome": "--context reads"} publishDir = [ @@ -115,7 +132,7 @@ process { ] } - withName: 'GENERATE_ASSEMBLY_MANIFEST|ENA_WEBIN_CLI|REGISTERSTUDY' { + withName: 'GENERATE_ASSEMBLY_MANIFEST|REGISTERSTUDY' { ext.args = { params.test_upload ? "--test" : "" } } From 30bdba54df85feb3572be5eb2691ede44ca5fdb9 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 16:48:08 +0000 Subject: [PATCH 32/46] enable MULTIQC in assemblysubmit and genomesubmit workflows --- workflows/assemblysubmit.nf | 19 +++++++++---------- workflows/genomesubmit.nf | 18 +++++++++--------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 9130073..5634bbe 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -220,18 +220,17 @@ workflow ASSEMBLYSUBMIT { ) ) - // MULTIQC ( - // ch_multiqc_files.collect(), - // ch_multiqc_config.toList(), - // ch_multiqc_custom_config.toList(), - // ch_multiqc_logo.toList(), - // [], - // [] - // ) + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) emit: - // multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - multiqc_report = channel.empty() // TODO re-enable when multiqc is added back in + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index db08a2e..dc3f60c 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -345,17 +345,17 @@ workflow GENOMESUBMIT { ) ) - //MULTIQC ( - // ch_multiqc_files.collect(), - // ch_multiqc_config.toList(), - // ch_multiqc_custom_config.toList(), - // ch_multiqc_logo.toList(), - // [], - // [] - //) + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) emit: - multiqc_report = channel.empty() // MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From f6ab40d2543c950bea1b5ab6a243964d52977488 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 16:48:18 +0000 Subject: [PATCH 33/46] update schema --- assets/schema_input_assembly.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input_assembly.json b/assets/schema_input_assembly.json index d342b31..2b5fcea 100644 --- a/assets/schema_input_assembly.json +++ b/assets/schema_input_assembly.json @@ -18,7 +18,7 @@ "format": "file-path", "exists": true, "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.(fa|fasta|fna)\\.gz$", - "errorMessage": "FASTA file must be provided and have extension '.fa', '.fasta', '.fna' (optionally gzipped)", + "errorMessage": "FASTA file must be provided and have extension '.fa.gz', '.fasta.gz', '.fna.gz'", "description": "Metagenomic assembly FASTA file" }, "fastq_1": { From fe7a8cce26bc2771e6d6f5fa6deabb7b161f1bd5 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 20 Mar 2026 17:01:35 +0000 Subject: [PATCH 34/46] remove commented code --- workflows/genomesubmit.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index dc3f60c..4d4950a 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -300,7 +300,6 @@ workflow GENOMESUBMIT { ch_combined, ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar ) - //ch_versions = ch_versions.mix( ENA_WEBIN_CLI.out.versions.first() ) // // Collate and save software versions From f9171655c1bee6848e10656a6475f5252015f779 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 25 Mar 2026 14:22:41 +0000 Subject: [PATCH 35/46] remove docker.enabled = true from test profiles --- conf/test_assembly_complete_metadata.config | 2 -- conf/test_assembly_no_coverage_paired_reads.config | 2 -- conf/test_assembly_no_coverage_single_reads.config | 2 -- conf/test_assembly_one_contig.config | 2 -- conf/test_mag_complete_metadata.conf | 2 -- conf/test_mag_multiple_bins_missing_metadata.conf | 2 -- conf/test_mag_no_coverage_paired_reads.conf | 2 -- conf/test_mag_no_coverage_single_reads.conf | 2 -- conf/test_mag_no_quality.conf | 2 -- conf/test_mag_no_rna_presence.conf | 2 -- conf/test_mag_no_taxonomy.conf | 2 -- conf/test_mag_one_contig.conf | 2 -- 12 files changed, 24 deletions(-) diff --git a/conf/test_assembly_complete_metadata.config b/conf/test_assembly_complete_metadata.config index 032ec5f..603e764 100644 --- a/conf/test_assembly_complete_metadata.config +++ b/conf/test_assembly_complete_metadata.config @@ -35,5 +35,3 @@ params { centre_name = "TEST_CENTER" } - -docker.enabled = true diff --git a/conf/test_assembly_no_coverage_paired_reads.config b/conf/test_assembly_no_coverage_paired_reads.config index b01bb59..6d89e50 100644 --- a/conf/test_assembly_no_coverage_paired_reads.config +++ b/conf/test_assembly_no_coverage_paired_reads.config @@ -35,5 +35,3 @@ params { centre_name = "TEST_CENTER" } - -docker.enabled = true diff --git a/conf/test_assembly_no_coverage_single_reads.config b/conf/test_assembly_no_coverage_single_reads.config index baca43e..81ed513 100644 --- a/conf/test_assembly_no_coverage_single_reads.config +++ b/conf/test_assembly_no_coverage_single_reads.config @@ -35,5 +35,3 @@ params { centre_name = "TEST_CENTER" } - -docker.enabled = true diff --git a/conf/test_assembly_one_contig.config b/conf/test_assembly_one_contig.config index cc6029f..460e8ee 100644 --- a/conf/test_assembly_one_contig.config +++ b/conf/test_assembly_one_contig.config @@ -35,5 +35,3 @@ params { centre_name = "TEST_CENTER" } - -docker.enabled = true diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf index 50a11e6..05c4f61 100644 --- a/conf/test_mag_complete_metadata.conf +++ b/conf/test_mag_complete_metadata.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_multiple_bins_missing_metadata.conf b/conf/test_mag_multiple_bins_missing_metadata.conf index 4d1c693..e5e6e7e 100644 --- a/conf/test_mag_multiple_bins_missing_metadata.conf +++ b/conf/test_mag_multiple_bins_missing_metadata.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf index b1c24d6..103ddc7 100644 --- a/conf/test_mag_no_coverage_paired_reads.conf +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf index 21dc09e..1c40a6b 100644 --- a/conf/test_mag_no_coverage_single_reads.conf +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index e1b8bef..1ed5ce3 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -39,5 +39,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf index 2cd4413..f6052df 100644 --- a/conf/test_mag_no_rna_presence.conf +++ b/conf/test_mag_no_rna_presence.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf index 321ec8b..0d384bf 100644 --- a/conf/test_mag_no_taxonomy.conf +++ b/conf/test_mag_no_taxonomy.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf index a4cb817..298d4b6 100644 --- a/conf/test_mag_one_contig.conf +++ b/conf/test_mag_one_contig.conf @@ -37,5 +37,3 @@ params { checkm2_db = null } - -docker.enabled = true From 2df682c1ce8c1d4e1efd6c0c2f05f7efd3a6b254 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 25 Mar 2026 14:58:04 +0000 Subject: [PATCH 36/46] downgrade MULTIQC container version to 1.25.1 to prevent segfaults in 1.32 --- modules/nf-core/multiqc/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index c1158fb..005e36a 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -2,9 +2,10 @@ process MULTIQC { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c6c120d559d7ee04c7442b61ad7cf5a9e8970be5feefb37d68eeaa60c1034eb/data' : - 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' }" + // TODO: version is temporarily set to 1.25.1 to avoid segfault happening in 1.32 + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/multiqc:1.25.1--6d0dfb7dbe16fbf9' + : 'community.wave.seqera.io/library/multiqc:1.25.1--214d24b7702218de'}" input: path multiqc_files, stageAs: "?/*" From 797e019a0b07c63a49753383edc7b5137fa28762 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Wed, 25 Mar 2026 17:34:53 +0000 Subject: [PATCH 37/46] fixes in tests, snapshot updates --- conf/test_assembly.config | 8 +- conf/test_assembly_complete_metadata.config | 10 +-- ...t_assembly_no_coverage_paired_reads.config | 10 +-- ...t_assembly_no_coverage_single_reads.config | 10 +-- conf/test_assembly_one_contig.config | 10 +-- conf/test_mag_complete_metadata.conf | 2 - ...st_mag_multiple_bins_missing_metadata.conf | 2 - conf/test_mag_no_coverage_paired_reads.conf | 2 - conf/test_mag_no_coverage_single_reads.conf | 2 - conf/test_mag_no_quality.conf | 2 - conf/test_mag_no_rna_presence.conf | 2 - conf/test_mag_no_taxonomy.conf | 2 - conf/test_mag_one_contig.conf | 2 - .../local/count_rna/tests/data/ecoli.stats | 84 +++++++++++++++++++ .../local/count_rna/tests/data/ecoli_bac.gff | 23 +++++ .../tests/main.nf.test.snap | 19 +++++ .../tests/main.nf.test | 36 ++++---- .../tests/main.nf.test.snap | 35 +++----- .../tests/main.nf.test.snap | 56 +++++++++++++ tests/assembly_complete_metadata.nf.test.snap | 20 ++++- ...mbly_no_coverage_paired_reads.nf.test.snap | 18 +++- ...mbly_no_coverage_single_reads.nf.test.snap | 18 +++- ...ly_no_study_complete_metadata.nf.test.snap | 47 +++++++++++ tests/assembly_one_contig.nf.test.snap | 20 ++++- tests/mag_complete_metadata.nf.test.snap | 18 +++- ...ultiple_bins_missing_metadata.nf.test.snap | 18 +++- .../mag_no_coverage_paired_reads.nf.test.snap | 18 +++- .../mag_no_coverage_single_reads.nf.test.snap | 18 +++- tests/mag_no_rna_presence.nf.test.snap | 18 +++- ...ag_no_study_complete_metadata.nf.test.snap | 58 +++++++++++++ tests/mag_no_taxonomy.nf.test.snap | 18 +++- tests/mag_one_contig.nf.test.snap | 19 ++++- 32 files changed, 513 insertions(+), 112 deletions(-) create mode 100644 modules/local/count_rna/tests/data/ecoli.stats create mode 100644 modules/local/count_rna/tests/data/ecoli_bac.gff create mode 100644 modules/local/ena_webin_cli_download/tests/main.nf.test.snap create mode 100644 modules/local/rename_fasta_for_catpack/tests/main.nf.test.snap create mode 100644 tests/assembly_no_study_complete_metadata.nf.test.snap create mode 100644 tests/mag_no_study_complete_metadata.nf.test.snap diff --git a/conf/test_assembly.config b/conf/test_assembly.config index 389e102..cc7430a 100644 --- a/conf/test_assembly.config +++ b/conf/test_assembly.config @@ -23,13 +23,11 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'assets/samplesheet_assembly.csv' outdir = 'test_output' - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - centre_name = "TEST_CENTER" + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" } diff --git a/conf/test_assembly_complete_metadata.config b/conf/test_assembly_complete_metadata.config index 603e764..16e4ccf 100644 --- a/conf/test_assembly_complete_metadata.config +++ b/conf/test_assembly_complete_metadata.config @@ -23,15 +23,11 @@ params { config_profile_description = 'Single-case assembly test with complete metadata values provided' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_complete_metadata.csv' outdir = 'test_output' - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - ena_raw_reads_study_accession = "PRJEB65995" - library = "metagenome" - centre_name = "TEST_CENTER" + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" } diff --git a/conf/test_assembly_no_coverage_paired_reads.config b/conf/test_assembly_no_coverage_paired_reads.config index 6d89e50..65c73e4 100644 --- a/conf/test_assembly_no_coverage_paired_reads.config +++ b/conf/test_assembly_no_coverage_paired_reads.config @@ -23,15 +23,11 @@ params { config_profile_description = 'Single-case assembly test with missing coverage and paired-end reads' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_no_coverage_paired_reads.csv' outdir = 'test_output' - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - ena_raw_reads_study_accession = "PRJEB65995" - library = "metagenome" - centre_name = "TEST_CENTER" + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" } diff --git a/conf/test_assembly_no_coverage_single_reads.config b/conf/test_assembly_no_coverage_single_reads.config index 81ed513..814de31 100644 --- a/conf/test_assembly_no_coverage_single_reads.config +++ b/conf/test_assembly_no_coverage_single_reads.config @@ -23,15 +23,11 @@ params { config_profile_description = 'Single-case assembly test with missing coverage and single-end reads' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_no_coverage_single_reads.csv' outdir = 'test_output' - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - ena_raw_reads_study_accession = "PRJEB65995" - library = "metagenome" - centre_name = "TEST_CENTER" + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" } diff --git a/conf/test_assembly_one_contig.config b/conf/test_assembly_one_contig.config index 460e8ee..b27784e 100644 --- a/conf/test_assembly_one_contig.config +++ b/conf/test_assembly_one_contig.config @@ -23,15 +23,11 @@ params { config_profile_description = 'Single-case assembly test with one contig assembly' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/assembly_one_contig.csv' outdir = 'test_output' - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - ena_raw_reads_study_accession = "PRJEB65995" - library = "metagenome" - centre_name = "TEST_CENTER" + mode = "metagenomic_assemblies" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" } diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf index 05c4f61..eabc483 100644 --- a/conf/test_mag_complete_metadata.conf +++ b/conf/test_mag_complete_metadata.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with complete metadata values provided' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_complete_metadata.csv' mode = "mags" diff --git a/conf/test_mag_multiple_bins_missing_metadata.conf b/conf/test_mag_multiple_bins_missing_metadata.conf index e5e6e7e..cd5ddd2 100644 --- a/conf/test_mag_multiple_bins_missing_metadata.conf +++ b/conf/test_mag_multiple_bins_missing_metadata.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Multi-bin MAG test with mixed missing metadata fields' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_multiple_bins_missing_metadata.csv' mode = "mags" diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf index 103ddc7..f3b6ba3 100644 --- a/conf/test_mag_no_coverage_paired_reads.conf +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with missing genome_coverage and paired-end reads' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_coverage_paired_reads.csv' mode = "mags" diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf index 1c40a6b..cd4cc20 100644 --- a/conf/test_mag_no_coverage_single_reads.conf +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with missing genome_coverage and single-end reads' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_coverage_single_reads.csv' mode = "mags" diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index 1ed5ce3..75aea62 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with missing completeness and contamination values' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_quality.csv' mode = "mags" diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf index f6052df..d559b09 100644 --- a/conf/test_mag_no_rna_presence.conf +++ b/conf/test_mag_no_rna_presence.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with missing RNA_presence value' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_rna_presence.csv' mode = "mags" diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf index 0d384bf..28a2ca8 100644 --- a/conf/test_mag_no_taxonomy.conf +++ b/conf/test_mag_no_taxonomy.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with missing NCBI_lineage value' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_no_taxonomy.csv' mode = "mags" diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf index 298d4b6..c63811a 100644 --- a/conf/test_mag_one_contig.conf +++ b/conf/test_mag_one_contig.conf @@ -23,8 +23,6 @@ params { config_profile_description = 'Single-case MAG test with one contig assembly' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_one_contig.csv' mode = "mags" diff --git a/modules/local/count_rna/tests/data/ecoli.stats b/modules/local/count_rna/tests/data/ecoli.stats new file mode 100644 index 0000000..4f7cfe3 --- /dev/null +++ b/modules/local/count_rna/tests/data/ecoli.stats @@ -0,0 +1,84 @@ + +tRNAscan-SE v.2.0.12 (Nov 2022) scan results (on host 311e9175be92) +Started: Wed Mar 25 17:02:42 UTC 2026 + +------------------------------------------------------------ +Sequence file(s) to search: ecoli.fna +Search Mode: Eukaryotic +Results written to: lachnospiraceae.tsv +Output format: Tabular +Searching with: Infernal First Pass->Infernal +Isotype-specific model scan: Yes +Covariance model: /usr/local/lib/tRNAscan-SE/models/TRNAinf-euk.cm + /usr/local/lib/tRNAscan-SE/models/TRNAinf-euk-SeC.cm +Infernal first pass cutoff score: 10 + +Temporary directory: . +Search log saved in: lachnospiraceae.log +Search statistics saved in: lachnospiraceae.stats +------------------------------------------------------------ + +First-pass Stats: +--------------- +Sequences read: 2 +Seqs w/at least 1 hit: 1 +Bases read: 4642052 (x2 for both strands) +Bases in tRNAs: 6562 +tRNAs predicted: 87 +Av. tRNA length: 75 +Script CPU time: 0.81 s +Scan CPU time: 237.75 s +Scan speed: 39.0 Kbp/sec + +First pass search(es) ended: Wed Mar 25 17:04:43 UTC 2026 + +Infernal Stats: +----------- +Candidate tRNAs read: 87 +Infernal-confirmed tRNAs: 87 +Bases scanned by Infernal: 8302 +% seq scanned by Infernal: 0.1 % +Script CPU time: 0.68 s +Infernal CPU time: 90.17 s +Scan speed: 92.1 bp/sec + +Infernal analysis of tRNAs ended: Wed Mar 25 17:05:53 UTC 2026 + +Overall scan speed: 28184.0 bp/sec + +tRNAs decoding Standard 20 AA: 84 +Selenocysteine tRNAs (TCA): 1 +Possible suppressor tRNAs (CTA,TTA,TCA): 0 +tRNAs with undetermined/unknown isotypes: 0 +Predicted pseudogenes: 2 + ------- +Total tRNAs: 87 + +tRNAs with introns: 0 + +| + +Isotype / Anticodon Counts: + +Ala : 5 AGC: GGC: 2 CGC: TGC: 3 +Gly : 6 ACC: GCC: 4 CCC: 1 TCC: 1 +Pro : 3 AGG: GGG: 1 CGG: 1 TGG: 1 +Thr : 5 AGT: GGT: 2 CGT: 2 TGT: 1 +Val : 7 AAC: GAC: 2 CAC: TAC: 5 +Ser : 5 AGA: GGA: 2 CGA: 1 TGA: 1 ACT: GCT: 1 +Arg : 7 ACG: 4 GCG: CCG: 1 TCG: CCT: 1 TCT: 1 +Leu : 8 AAG: GAG: 1 CAG: 4 TAG: 1 CAA: 1 TAA: 1 +Phe : 2 AAA: GAA: 2 +Asn : 4 ATT: GTT: 4 +Lys : 6 CTT: TTT: 6 +Asp : 3 ATC: GTC: 3 +Glu : 4 CTC: TTC: 4 +His : 1 ATG: GTG: 1 +Gln : 4 CTG: 2 TTG: 2 +Ile : 3 AAT: GAT: 3 CAT: TAT: +Met : 8 CAT: 8 +Tyr : 3 ATA: GTA: 3 +Supres : 0 CTA: TTA: TCA: +Cys : 1 ACA: GCA: 1 +Trp : 1 CCA: 1 +SelCys : 1 TCA: 1 diff --git a/modules/local/count_rna/tests/data/ecoli_bac.gff b/modules/local/count_rna/tests/data/ecoli_bac.gff new file mode 100644 index 0000000..aab13bb --- /dev/null +++ b/modules/local/count_rna/tests/data/ecoli_bac.gff @@ -0,0 +1,23 @@ +##gff-version 3 +NC_000913.3 barrnap:0.9 rRNA 223774 225311 0 + . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 225761 228661 0 + . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 228760 228870 1.9e-11 + . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 2726074 2726184 1.9e-11 - . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 2726282 2729182 0 - . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 2729617 2731154 0 - . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3423428 3423538 4.4e-11 - . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3423673 3423783 1.9e-11 - . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3423881 3426781 0 - . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3427222 3428759 0 - . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3941811 3943348 0 + . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3943706 3946606 0 + . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 3946704 3946814 1.9e-11 + . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4035534 4037071 0 + . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4037521 4040422 0 + . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4040521 4040631 2.5e-11 + . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4166662 4168199 0 + . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4168643 4171543 0 + . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4171641 4171751 6.5e-11 + . Name=5S_rRNA;product=5S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4208150 4209687 0 + . Name=16S_rRNA;product=16S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4210045 4212945 0 + . Name=23S_rRNA;product=23S ribosomal RNA +NC_000913.3 barrnap:0.9 rRNA 4213044 4213154 6.5e-11 + . Name=5S_rRNA;product=5S ribosomal RNA diff --git a/modules/local/ena_webin_cli_download/tests/main.nf.test.snap b/modules/local/ena_webin_cli_download/tests/main.nf.test.snap new file mode 100644 index 0000000..6f3bc74 --- /dev/null +++ b/modules/local/ena_webin_cli_download/tests/main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "ENA_WEBIN_CLI_DOWNLOAD - stub": { + "content": [ + { + "0": [ + "webin-cli-stub.jar:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "webin_cli_jar": [ + "webin-cli-stub.jar:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:02:51.004952" + } +} \ No newline at end of file diff --git a/modules/local/generate_assembly_manifest/tests/main.nf.test b/modules/local/generate_assembly_manifest/tests/main.nf.test index d04d7a2..2b96eae 100644 --- a/modules/local/generate_assembly_manifest/tests/main.nf.test +++ b/modules/local/generate_assembly_manifest/tests/main.nf.test @@ -24,22 +24,30 @@ nextflow_process { } then { - assert process.success - assertAll( - { assert snapshot( - sanitizeOutput(process.out), - path(process.out.versions[0]).yaml - ).match() }, - { assert process.out.manifest.size() == 1 }, - { assert process.out.manifest[0][1].toString().endsWith(".manifest") }, - { - def manifestContent = path(process.out.manifest[0][1]).text - assert manifestContent.contains("PRJ12345") : "PRJ12345 not found in manifest file" - } - ) - } + assert process.success + assertAll( + // Only snapshot versions, not the manifest because it contains a timestamp + { assert snapshot( + path(process.out.versions[0]).yaml + ).match() }, + { assert process.out.manifest.size() == 1 }, + { assert process.out.manifest[0][1].toString().endsWith(".manifest") }, + { + // Validate manifest structure + def manifestContent = path(process.out.manifest[0][1]).text + def lines = manifestContent.readLines() + + // Check length of manifest + assert lines.size() == 10 + // Check required fields are present + assert manifestContent.contains("SAMPLE SAMEA7687881") + assert manifestContent.contains("STUDY PRJ12345") + assert manifestContent.contains("RUN_REF ERR4918394") + } + ) } +} test("GENERATE_ASSEMBLY_MANIFEST completes with expected outputs - stub") { diff --git a/modules/local/generate_assembly_manifest/tests/main.nf.test.snap b/modules/local/generate_assembly_manifest/tests/main.nf.test.snap index cf8a9e1..054687f 100644 --- a/modules/local/generate_assembly_manifest/tests/main.nf.test.snap +++ b/modules/local/generate_assembly_manifest/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,4711ed8f2fd35e895aefafebd29f0333" + "versions.yml:md5,0664035de44b4d88c1a70a357c1a24f2" ], "manifest": [ [ @@ -22,46 +22,33 @@ ] ], "versions": [ - "versions.yml:md5,4711ed8f2fd35e895aefafebd29f0333" + "versions.yml:md5,0664035de44b4d88c1a70a357c1a24f2" ] }, { "GENERATE_ASSEMBLY_MANIFEST": { - "assembly_uploader": null + "assembly_uploader": "assembly_uploader 1.3.4" } } ], - "timestamp": "2026-03-13T14:02:14.937082", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.4" - } + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:53:50.281438" }, "GENERATE_ASSEMBLY_MANIFEST completes with expected outputs": { "content": [ - { - "manifest": [ - [ - { - "id": "test" - }, - "233126d4c4d0.manifest:md5,cacedcfcce220081e7aa2f98c2f4ffd6" - ] - ], - "versions": [ - "versions.yml:md5,0664035de44b4d88c1a70a357c1a24f2" - ] - }, { "GENERATE_ASSEMBLY_MANIFEST": { "assembly_uploader": "assembly_uploader 1.3.4" } } ], - "timestamp": "2026-03-13T12:32:23.722449", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.4" - } + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T17:34:01.598736" } } \ No newline at end of file diff --git a/modules/local/rename_fasta_for_catpack/tests/main.nf.test.snap b/modules/local/rename_fasta_for_catpack/tests/main.nf.test.snap new file mode 100644 index 0000000..112fec2 --- /dev/null +++ b/modules/local/rename_fasta_for_catpack/tests/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "RENAME_FASTA_FOR_CATPACK - uncompressed fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,3dc842bfe6f1059ccb006fc454437ebf" + ] + ], + "renamed_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,3dc842bfe6f1059ccb006fc454437ebf" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:51:01.874131" + }, + "RENAME_FASTA_FOR_CATPACK - compressed fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test_compressed" + }, + "test_compressed.fasta:md5,b349eb81b2fc8db6a6cee870c8d4b054" + ] + ], + "renamed_fasta": [ + [ + { + "id": "test_compressed" + }, + "test_compressed.fasta:md5,b349eb81b2fc8db6a6cee870c8d4b054" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:51:04.916201" + } +} \ No newline at end of file diff --git a/tests/assembly_complete_metadata.nf.test.snap b/tests/assembly_complete_metadata.nf.test.snap index a209618..3db4357 100644 --- a/tests/assembly_complete_metadata.nf.test.snap +++ b/tests/assembly_complete_metadata.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_complete_metadata": { "content": [ - 4, + 5, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -14,17 +14,31 @@ "metagenomic_assemblies/complete_metadata/upload/webin_cli", "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", + "metagenomic_assemblies/multiqc", + "metagenomic_assemblies/multiqc/multiqc_data", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", + "metagenomic_assemblies/multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef" + "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef", + "multiqc.log:md5,116b00d7c9902649a7600d4d3bf6d978", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,cc2c7f72ffa5687ac3462f39cf527a1e", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,d9ee69a0934f1cfdec20d02c7c57a5b5" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-20T15:12:31.987836" + "timestamp": "2026-03-25T16:05:23.60975" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_paired_reads.nf.test.snap b/tests/assembly_no_coverage_paired_reads.nf.test.snap index ec2957e..23823f5 100644 --- a/tests/assembly_no_coverage_paired_reads.nf.test.snap +++ b/tests/assembly_no_coverage_paired_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_no_coverage_paired_reads": { "content": [ - 5, + 6, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -9,6 +9,14 @@ }, [ "metagenomic_assemblies", + "metagenomic_assemblies/multiqc", + "metagenomic_assemblies/multiqc/multiqc_data", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", + "metagenomic_assemblies/multiqc/multiqc_report.html", "metagenomic_assemblies/no_coverage_paired_reads", "metagenomic_assemblies/no_coverage_paired_reads/coverage", "metagenomic_assemblies/no_coverage_paired_reads/coverage/no_coverage_paired_reads.depth.txt", @@ -20,6 +28,12 @@ "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ + "multiqc.log:md5,f0581c03f3c6b93dbd9d7041bf413023", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,52d4ff25685c9220279f3273484eda88", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,23ccd334e5826645d5db1fbbeff1df85", "no_coverage_paired_reads.depth.txt:md5,bb5f99e74d21df3c73e0ae0f388bcbcb", "no_coverage_paired_reads_assembly_metadata.csv:md5,91a2616ccedc6bb93c2209153bec50f0" ] @@ -28,6 +42,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-20T15:13:14.194671" + "timestamp": "2026-03-25T16:06:20.33222" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_single_reads.nf.test.snap b/tests/assembly_no_coverage_single_reads.nf.test.snap index dff4302..f2cb148 100644 --- a/tests/assembly_no_coverage_single_reads.nf.test.snap +++ b/tests/assembly_no_coverage_single_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_assembly_no_coverage_single_reads": { "content": [ - 5, + 6, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -9,6 +9,14 @@ }, [ "metagenomic_assemblies", + "metagenomic_assemblies/multiqc", + "metagenomic_assemblies/multiqc/multiqc_data", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", + "metagenomic_assemblies/multiqc/multiqc_report.html", "metagenomic_assemblies/no_coverage_single_reads", "metagenomic_assemblies/no_coverage_single_reads/coverage", "metagenomic_assemblies/no_coverage_single_reads/coverage/no_coverage_single_reads.depth.txt", @@ -20,6 +28,12 @@ "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ + "multiqc.log:md5,29712ce710262129eabfcfb6ffa641d0", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,d3d129462271bcec26d19ca670c2158f", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,809384ac9d6289c97e7a47365afd2734", "no_coverage_single_reads.depth.txt:md5,bd88282b17e851377b1dd223839be150", "no_coverage_single_reads_assembly_metadata.csv:md5,fc1beef7e8439eaf5329e02587460009" ] @@ -28,6 +42,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-20T15:13:50.079728" + "timestamp": "2026-03-25T16:06:57.003548" } } \ No newline at end of file diff --git a/tests/assembly_no_study_complete_metadata.nf.test.snap b/tests/assembly_no_study_complete_metadata.nf.test.snap new file mode 100644 index 0000000..9041e71 --- /dev/null +++ b/tests/assembly_no_study_complete_metadata.nf.test.snap @@ -0,0 +1,47 @@ +{ + "-profile test_assembly_no_study_complete_metadata": { + "content": [ + 6, + { + "REGISTERSTUDY": { + "mgnify-pipelines-toolkit": "1.4.21" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "metagenomic_assemblies", + "metagenomic_assemblies/complete_metadata", + "metagenomic_assemblies/complete_metadata/upload", + "metagenomic_assemblies/complete_metadata/upload/webin_cli", + "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", + "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", + "metagenomic_assemblies/multiqc", + "metagenomic_assemblies/multiqc/multiqc_data", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", + "metagenomic_assemblies/multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef", + "multiqc.log:md5,0ffd75914a9aed929a0709ae6ed074cc", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,14518f81b55710a5b7e07ff86e77f635", + "multiqc_software_versions.txt:md5,e640e15de64dd5b779b09e71f35dbd21", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,c2d27f7d2dc117dd4791d2970dbcb8e0" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:09:09.948413" + } +} \ No newline at end of file diff --git a/tests/assembly_one_contig.nf.test.snap b/tests/assembly_one_contig.nf.test.snap index 1f21b2b..4a39753 100644 --- a/tests/assembly_one_contig.nf.test.snap +++ b/tests/assembly_one_contig.nf.test.snap @@ -1,24 +1,38 @@ { "-profile test_assembly_one_contig": { "content": [ - 2, + 3, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" } }, [ + "metagenomic_assemblies", + "metagenomic_assemblies/multiqc", + "metagenomic_assemblies/multiqc/multiqc_data", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", + "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", + "metagenomic_assemblies/multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - + "multiqc.log:md5,de4283936c9042d00e5d725265508488", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,c355c6eca7cd2259c9f1691275da2520", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,5fb9871586cecc4be089a248dc65cd42" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-20T15:14:00.09396" + "timestamp": "2026-03-25T16:09:27.221529" } } \ No newline at end of file diff --git a/tests/mag_complete_metadata.nf.test.snap b/tests/mag_complete_metadata.nf.test.snap index b6497c6..5d00c40 100644 --- a/tests/mag_complete_metadata.nf.test.snap +++ b/tests/mag_complete_metadata.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_complete_metadata": { "content": [ - 4, + 5, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -14,6 +14,14 @@ "mags/complete_metadata/upload/webin_cli", "mags/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/upload", "mags/upload/manifests", "mags/upload/manifests/results", @@ -28,6 +36,12 @@ ], [ "genomes_metadata.csv:md5,ae2b884015d1850fa63365e8a64d9fc8", + "multiqc.log:md5,3d7cbf4832b5d456a9bc16b53b3ff94b", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,44d2f20f0db17bd3cc1d0257ab07bbe5", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,fa67f512e57e908e19c840713ad0d1a4", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" ] @@ -36,6 +50,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T15:47:41.671411" + "timestamp": "2026-03-25T16:10:57.783771" } } \ No newline at end of file diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test.snap b/tests/mag_multiple_bins_missing_metadata.nf.test.snap index 7cce2f8..2c14895 100644 --- a/tests/mag_multiple_bins_missing_metadata.nf.test.snap +++ b/tests/mag_multiple_bins_missing_metadata.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_multiple_bins_missing_metadata": { "content": [ - 16, + 17, { "BARRNAP": { "barrnap": 0.9 @@ -22,6 +22,14 @@ [ "mags", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/no_coverage_paired_reads", "mags/no_coverage_paired_reads/coverage", "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", @@ -60,6 +68,12 @@ ], [ "genomes_metadata.csv:md5,a75d1d35c762863c487f010f6a000910", + "multiqc.log:md5,66e1af1df4fd84eb6ac1aa57ecb5d6b3", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,8123a0125e961755dd4447b63520b4ce", + "multiqc_software_versions.txt:md5,1dea254d76424e9a0e5ad498e05d45ed", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,80665f9539038d7fab651749febec217", "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", @@ -73,6 +87,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T17:12:22.739259" + "timestamp": "2026-03-25T16:37:24.426014" } } \ No newline at end of file diff --git a/tests/mag_no_coverage_paired_reads.nf.test.snap b/tests/mag_no_coverage_paired_reads.nf.test.snap index 29c10f5..a56bd92 100644 --- a/tests/mag_no_coverage_paired_reads.nf.test.snap +++ b/tests/mag_no_coverage_paired_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_no_coverage_paired_reads": { "content": [ - 5, + 6, { "COVERM_GENOME": { "coverm": "0.7.0" @@ -13,6 +13,14 @@ [ "mags", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/no_coverage_paired_reads", "mags/no_coverage_paired_reads/coverage", "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", @@ -33,6 +41,12 @@ ], [ "genomes_metadata.csv:md5,0538b1aec26168b35a416bd995b0a4a9", + "multiqc.log:md5,0d23f6c098aafb7527aa84bff4f017bc", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,16ed22f43c6846b619cde79610357816", + "multiqc_software_versions.txt:md5,db58c67ef62a45494220d8d9c0963d74", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,f3ec6f48c2a841f9ac25475ce3eb83d7", "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" @@ -42,6 +56,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T17:13:06.044172" + "timestamp": "2026-03-25T16:38:18.072528" } } \ No newline at end of file diff --git a/tests/mag_no_coverage_single_reads.nf.test.snap b/tests/mag_no_coverage_single_reads.nf.test.snap index cd10aa2..4638334 100644 --- a/tests/mag_no_coverage_single_reads.nf.test.snap +++ b/tests/mag_no_coverage_single_reads.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_no_coverage_single_reads": { "content": [ - 5, + 6, { "COVERM_GENOME": { "coverm": "0.7.0" @@ -13,6 +13,14 @@ [ "mags", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/no_coverage_single_reads", "mags/no_coverage_single_reads/coverage", "mags/no_coverage_single_reads/coverage/no_coverage_single_reads.tsv", @@ -33,6 +41,12 @@ ], [ "genomes_metadata.csv:md5,1f56050125362470f351ed99065af980", + "multiqc.log:md5,55c57a082a2b02f585e3b4d95e2e1e0e", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,64125ac19d849254a6f9d9199a722d32", + "multiqc_software_versions.txt:md5,db58c67ef62a45494220d8d9c0963d74", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,ab21405b339e9c57d8df14fe935573af", "no_coverage_single_reads.tsv:md5,3791d9be880cbfacbc527e47623e3c9a", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" @@ -42,6 +56,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T17:13:44.562959" + "timestamp": "2026-03-25T16:39:04.761638" } } \ No newline at end of file diff --git a/tests/mag_no_rna_presence.nf.test.snap b/tests/mag_no_rna_presence.nf.test.snap index dd7e479..bca8bdb 100644 --- a/tests/mag_no_rna_presence.nf.test.snap +++ b/tests/mag_no_rna_presence.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_no_rna_presence": { "content": [ - 7, + 8, { "BARRNAP": { "barrnap": 0.9 @@ -19,6 +19,14 @@ [ "mags", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/no_rna_presence", "mags/no_rna_presence/rna", "mags/no_rna_presence/rna/barrnap", @@ -44,6 +52,12 @@ ], [ "genomes_metadata.csv:md5,0f567491f038d4a0c1dbf4c05cdc26c0", + "multiqc.log:md5,3c9fde84606f1a1483713c1562446677", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,e96bef4fe2a8a61b6904748c0becaeee", + "multiqc_software_versions.txt:md5,6476cd933d258f4900a523bfdf417d9f", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,d69b3c359ce4eb5dfc15b85f5f08e7c1", "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", @@ -54,6 +68,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T16:30:50.102158" + "timestamp": "2026-03-25T16:43:47.785065" } } \ No newline at end of file diff --git a/tests/mag_no_study_complete_metadata.nf.test.snap b/tests/mag_no_study_complete_metadata.nf.test.snap new file mode 100644 index 0000000..905741e --- /dev/null +++ b/tests/mag_no_study_complete_metadata.nf.test.snap @@ -0,0 +1,58 @@ +{ + "-profile test_mag_no_study_complete_metadata": { + "content": [ + 6, + { + "REGISTERSTUDY": { + "mgnify-pipelines-toolkit": "1.4.21" + }, + "Workflow": { + "nf-core/seqsubmit": "v1.0.0dev" + } + }, + [ + "mags", + "mags/complete_metadata", + "mags/complete_metadata/upload", + "mags/complete_metadata/upload/webin_cli", + "mags/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", + "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", + "mags/upload", + "mags/upload/manifests", + "mags/upload/manifests/results", + "mags/upload/manifests/results/MAG_upload", + "mags/upload/manifests/results/MAG_upload/ENA_backup.json", + "mags/upload/manifests/results/MAG_upload/genome_samples.xml", + "mags/upload/manifests/results/MAG_upload/manifests_test", + "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", + "mags/upload/manifests/results/MAG_upload/submission.xml", + "pipeline_info", + "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" + ], + [ + "genomes_metadata.csv:md5,ae2b884015d1850fa63365e8a64d9fc8", + "multiqc.log:md5,eff5627704089c7e458bd72016d7b7da", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,992efdc0fa158002b32319ac9fad692e", + "multiqc_software_versions.txt:md5,e640e15de64dd5b779b09e71f35dbd21", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,decd9656cca793864b9f5d8214b04544", + "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "25.04.1" + }, + "timestamp": "2026-03-25T16:44:33.682989" + } +} \ No newline at end of file diff --git a/tests/mag_no_taxonomy.nf.test.snap b/tests/mag_no_taxonomy.nf.test.snap index 49244fa..8ca37da 100644 --- a/tests/mag_no_taxonomy.nf.test.snap +++ b/tests/mag_no_taxonomy.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_no_taxonomy": { "content": [ - 8, + 9, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -10,6 +10,14 @@ [ "mags", "mags/genomes_metadata.csv", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "mags/no_taxonomy", "mags/no_taxonomy/taxonomy", "mags/no_taxonomy/taxonomy/no_taxonomy.bin2classification.txt", @@ -31,6 +39,12 @@ ], [ "genomes_metadata.csv:md5,b1d01539d787b77e30b9bacd5b23d51f", + "multiqc.log:md5,fe17075d5d9582044aaff80aad847961", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,37284229add460f4535b3c27c946df30", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,57bcf167566caa403280a0038f7d79cd", "no_taxonomy.bin2classification.txt:md5,e24c109efe807c7044a866999fd736f1", "no_taxonomy.txt:md5,0f2082d3e397fd630d605dd60993b131", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", @@ -41,6 +55,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T15:57:14.366463" + "timestamp": "2026-03-25T16:48:43.584726" } } \ No newline at end of file diff --git a/tests/mag_one_contig.nf.test.snap b/tests/mag_one_contig.nf.test.snap index f2f3324..34d5862 100644 --- a/tests/mag_one_contig.nf.test.snap +++ b/tests/mag_one_contig.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_mag_one_contig": { "content": [ - 2, + 3, { "Workflow": { "nf-core/seqsubmit": "v1.0.0dev" @@ -9,17 +9,30 @@ }, [ "mags", + "mags/multiqc", + "mags/multiqc/multiqc_data", + "mags/multiqc/multiqc_data/multiqc.log", + "mags/multiqc/multiqc_data/multiqc_citations.txt", + "mags/multiqc/multiqc_data/multiqc_data.json", + "mags/multiqc/multiqc_data/multiqc_software_versions.txt", + "mags/multiqc/multiqc_data/multiqc_sources.txt", + "mags/multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - + "multiqc.log:md5,e3309f477c6395ea61d4923ed792aebc", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_data.json:md5,7c8d1fd2dccde4249299d3f9617d2aa2", + "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", + "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", + "multiqc_report.html:md5,a6ed0c8727067113dbefe2c1264c73cd" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-19T12:28:15.857999" + "timestamp": "2026-03-25T16:49:04.514285" } } \ No newline at end of file From 87b473a987011fc4840e4d0bb6fc47951900da46 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 26 Mar 2026 10:43:10 +0000 Subject: [PATCH 38/46] add notes to readme to ensure expected order of columns in the samplesheet --- README.md | 6 ++++++ docs/usage.md | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/README.md b/README.md index d1fef8c..0a65eb9 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,9 @@ sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binnin lachnospira_eligens,data/bin_lachnospira_eligens.fa.gz,SRR24458089,,,spades_v3.15.5,metabat2_v2.6,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable_bacteria,marine_sediment,No,d__Bacteria;p__Proteobacteria;s__unclassified_Proteobacteria ``` +> [!IMPORTANT] +> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown. + ### `metagenomic_assemblies` mode (`ASSEMBLYSUBMIT`) The input must follow `assets/schema_input_assembly.json`. @@ -117,6 +120,9 @@ assembly_1,data/contigs_1.fasta.gz,data/reads_1.fastq.gz,data/reads_2.fastq.gz,, assembly_2,data/contigs_2.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9 ``` +> [!IMPORTANT] +> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown. + ## Usage > [!NOTE] diff --git a/docs/usage.md b/docs/usage.md index 2e63367..f6358c0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -47,6 +47,9 @@ sample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binnin mag_001,data/mag_001.fasta.gz,SRR24458089,,,SPAdes 3.15.5,MetaBAT2 2.15,default,CheckM2 1.0.1,92.81,1.09,66.04,sediment metagenome,No,marine,cable bacteria,marine sediment,No,d__Bacteria;p__Proteobacteria;s__ ``` +> [!IMPORTANT] +> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown. + | Column | Description | | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Unique identifier of this particular data entry. It is used as the alias when submitting to ENA, so it must be unique within one Webin account. | @@ -86,6 +89,9 @@ assembly_001,data/assembly_001.fasta.gz,data/assembly_001_R1.fastq.gz,data/assem assembly_002,data/assembly_002.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9 ``` +> [!IMPORTANT] +> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown. + | Column | Description | | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Unique identifier of this particular data entry. It is used as the alias when submitting to ENA, so it must be unique within one Webin account. | From 05919f22ab3fd9315dc9e92e27c5285ee900ebf2 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 26 Mar 2026 15:43:29 +0000 Subject: [PATCH 39/46] use splitCsv for coverage file parsing --- workflows/assemblysubmit.nf | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 323b658..4b0a4a7 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -95,19 +95,16 @@ workflow ASSEMBLYSUBMIT { false // interleaved ) - // Calculate average coverage using map operator + // Calculate average coverage using splitCsv operator average_coverage_ch = COVERM_CONTIG.out.coverage - .map { meta, coverage_file -> - // Read the file and calculate average - def lines = coverage_file.readLines() - if (lines.size() < 2) { - return [meta, 0.0] - } - def coverages = lines[1..-1].collect { line -> - line.split('\t')[1] as Double - } + .splitCsv(sep: '\t', skip: 1) + .map { meta, row -> + [meta, row[1] as Double] + } + .groupTuple() + .map { meta, coverages -> def average = coverages.sum() / coverages.size() - return [meta, average] + [meta, average] } // Update metadata with calculated coverage From 25b3d8d5e64c1e3f4eae55eff0ada1eb9dbed3a3 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Thu, 26 Mar 2026 16:23:49 +0000 Subject: [PATCH 40/46] apply vangelis linter --- CITATIONS.md | 8 ++-- conf/base.config | 3 -- conf/modules.config | 10 ++--- conf/test_genome.config | 2 - docs/output.md | 60 +++++++++++++++---------- modules.json | 3 +- modules/nf-core/multiqc/multiqc.diff | 65 ++++++++++++++++++++++++++++ ro-crate-metadata.json | 2 +- workflows/assemblysubmit.nf | 5 --- workflows/genomesubmit.nf | 1 - 10 files changed, 114 insertions(+), 45 deletions(-) create mode 100644 modules/nf-core/multiqc/multiqc.diff diff --git a/CITATIONS.md b/CITATIONS.md index 030338b..495f422 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,19 +16,19 @@ - [CoverM](https://github.com/wwood/CoverM) -> Aroney ST, Newell RJ, Nissen JN, Camargo AP, Tyson GW, Woodcroft BJ. CoverM: Read alignment statistics for metagenomics. Bioinformatics. 2025;41(4):btaf147. doi: 10.1093/bioinformatics/btaf147. +> Aroney ST, Newell RJ, Nissen JN, Camargo AP, Tyson GW, Woodcroft BJ. CoverM: Read alignment statistics for metagenomics. Bioinformatics. 2025;41(4):btaf147. doi: 10.1093/bioinformatics/btaf147. PubMed PMID: 40193404; PubMed Central PMCID: PMC11993303. - [CheckM2](https://github.com/chklovski/CheckM2) -> Chklovski A, Parks DH, Woodcroft BJ, Tyson GW. CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning. Nat Methods. 2023;20(8):1203-1212. doi: 10.1038/s41592-023-01940-w. +> Chklovski A, Parks DH, Woodcroft BJ, Tyson GW. CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning. Nat Methods. 2023;20(8):1203-1212. doi: 10.1038/s41592-023-01940-w. PubMed PMID: 37500759; PubMed Central PMCID: not available. - [CAT and BAT](https://doi.org/10.1186/s13059-019-1817-x) -> von Meijenfeldt FAB, Arkhipova K, Cambuy DD, Coutinho FH, Dutilh BE. Robust taxonomic classification of uncharted microbial sequences and bins with CAT and BAT. Genome Biol. 2019;20(1):217. doi: 10.1186/s13059-019-1817-x. +> von Meijenfeldt FAB, Arkhipova K, Cambuy DD, Coutinho FH, Dutilh BE. Robust taxonomic classification of uncharted microbial sequences and bins with CAT and BAT. Genome Biol. 2019;20(1):217. doi: 10.1186/s13059-019-1817-x. PubMed PMID: 31640809; PubMed Central PMCID: PMC6805573. - [tRNAscan-SE 2.0](https://doi.org/10.1093/nar/gkab688) -> Chan PP, Lin BY, Mak AJ, Lowe TM. tRNAscan-SE 2.0: Improved detection and functional classification of transfer RNA genes. Nucleic Acids Res. 2021;49(16):9077-9096. doi: 10.1093/nar/gkab688. +> Chan PP, Lin BY, Mak AJ, Lowe TM. tRNAscan-SE 2.0: Improved detection and functional classification of transfer RNA genes. Nucleic Acids Res. 2021;49(16):9077-9096. doi: 10.1093/nar/gkab688. PubMed PMID: 34417604; PubMed Central PMCID: PMC8450103. - [barrnap](https://github.com/tseemann/barrnap) diff --git a/conf/base.config b/conf/base.config index 42eb0c6..6a56c96 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -24,8 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } diff --git a/conf/modules.config b/conf/modules.config index 5d3204b..f61f7ed 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -83,11 +83,11 @@ process { } withName: 'CHECKM2_DATABASEDOWNLOAD' { - publishDir = [ - path: { "${params.outdir}/databases/checkm2/" }, - mode: params.publish_dir_mode, - pattern: '*.dmnd' - ] + publishDir = [ + path: { "${params.outdir}/databases/checkm2/" }, + mode: params.publish_dir_mode, + pattern: '*.dmnd' + ] } // diff --git a/conf/test_genome.config b/conf/test_genome.config index bab14f9..c26a027 100644 --- a/conf/test_genome.config +++ b/conf/test_genome.config @@ -23,8 +23,6 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/samplesheet_genomesubmit.csv' outdir = 'test_output' diff --git a/docs/output.md b/docs/output.md index 8cdc8c5..ff2ac63 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,32 +2,13 @@ ## Introduction -This document describes the files currently published into `--outdir` by `nf-core/seqsubmit`. +This document describes the output produced by the `nf-core/seqsubmit` pipeline. -The exact directory structure depends on the selected `--mode`. +The directories listed below will be created in the results directory (set with `--outdir`) after the pipeline has finished. All paths are relative to the top-level results directory. -## Common outputs - -### `pipeline_info/` - -The pipeline writes general run metadata to `pipeline_info/` for all modes. - -
-Output files +## Pipeline overview -- `pipeline_info/` - - `execution_report_.html`: Nextflow execution report. - - `execution_timeline_.html`: Nextflow execution timeline. - - `execution_trace_.txt`: task-level trace table. - - `pipeline_dag_.html`: rendered workflow DAG. - - `params_.json`: parameter snapshot written at pipeline start. - - `nf_core_seqsubmit_software_mqc_versions.yml`: collated software versions gathered during the run. - - `pipeline_report.html`: optional summary report generated when `--email` or `--email_on_fail` is used. - - `pipeline_report.txt`: text version of the optional summary report. - -
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides the execution report, timeline, trace, and DAG outputs. These files are the main resources for troubleshooting failed runs and reviewing resource usage. +The pipeline is built using [Nextflow](https://www.nextflow.io/) and performs automated submission of sequence data to ENA. Exact steps and generated outputs depend on the data type and `--mode` executed (`mags`, `bins` or `metagenomic_assemblies`). ## `mags` and `bins` outputs @@ -66,3 +47,36 @@ When `--mode metagenomic_assemblies` is used, results are written under `metagen Assembly study registration, manifest generation, and Webin-CLI submission are executed by the workflow, but their intermediate outputs are not currently published into `--outdir` by the pipeline. + +## Common outputs + +### MultiQC + +
+Output files + +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
+ +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/modules.json b/modules.json index 158c21f..3e38af7 100644 --- a/modules.json +++ b/modules.json @@ -69,7 +69,8 @@ "multiqc": { "branch": "master", "git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/multiqc/multiqc.diff" }, "trnascanse": { "branch": "master", diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff new file mode 100644 index 0000000..e0c8c7b --- /dev/null +++ b/modules/nf-core/multiqc/multiqc.diff @@ -0,0 +1,65 @@ +Changes in component 'nf-core/multiqc' +'modules/nf-core/multiqc/environment.yml' is unchanged +'modules/nf-core/multiqc/meta.yml' is unchanged +Changes in 'multiqc/main.nf': +--- modules/nf-core/multiqc/main.nf ++++ modules/nf-core/multiqc/main.nf +@@ -2,9 +2,10 @@ + label 'process_single' + + conda "${moduleDir}/environment.yml" +- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c6c120d559d7ee04c7442b61ad7cf5a9e8970be5feefb37d68eeaa60c1034eb/data' : +- 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' }" ++ // TODO: version is temporarily set to 1.25.1 to avoid segfault happening in 1.32 ++ container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ++ ? 'oras://community.wave.seqera.io/library/multiqc:1.25.1--6d0dfb7dbe16fbf9' ++ : 'community.wave.seqera.io/library/multiqc:1.25.1--214d24b7702218de'}" + + input: + path multiqc_files, stageAs: "?/*" + +'modules/nf-core/multiqc/tests/main.nf.test.snap' is unchanged +Changes in 'multiqc/tests/nextflow.config': +--- modules/nf-core/multiqc/tests/nextflow.config ++++ modules/nf-core/multiqc/tests/nextflow.config +@@ -2,4 +2,4 @@ + withName: 'MULTIQC' { + ext.prefix = null + } +-} ++} +Changes in 'multiqc/tests/main.nf.test': +--- modules/nf-core/multiqc/tests/main.nf.test ++++ modules/nf-core/multiqc/tests/main.nf.test +@@ -15,7 +15,7 @@ + when { + process { + """ +- input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) ++ input[0] = channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] +@@ -41,8 +41,8 @@ + when { + process { + """ +- input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) +- input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) ++ input[0] = channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) ++ input[1] = channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] +@@ -68,7 +68,7 @@ + when { + process { + """ +- input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) ++ input[0] = channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + +************************************************************ diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 3ff34eb..7e0e70b 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-11-20T09:32:34+00:00", - "description": "

\n \n \n \"nf-core/seqsubmit\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqsubmit)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqsubmit/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqsubmit/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqsubmit/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqsubmit/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqsubmit/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqsubmit)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqsubmit-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqsubmit)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqsubmit** is a Nextflow pipeline for submitting sequence data to [ENA](https://www.ebi.ac.uk/ena/browser/home).\nCurrently, the pipeline supports three submission modes, each routed to a dedicated workflow and requiring its own input samplesheet structure:\n\n- `mags` for Metagenome Assembled Genomes (MAGs) submission with `GENOMESUBMIT` workflow\n- `bins` for bins submission with `GENOMESUBMIT` workflow\n- `metagenomic_assemblies` for assembly submission with `ASSEMBLYSUBMIT` workflow\n\n![seqsubmit workflow diagram](assets/seqsubmit_schema.png)\n\n## Requirements\n\n- [Nextflow](https://www.nextflow.io/) `>=25.04.0`\n- Webin account registered at https://www.ebi.ac.uk/ena/submit/webin/login\n- Raw reads used to assemble contigs submitted to [INSDC](https://www.insdc.org/) and associated accessions available\n\nSetup your environment secrets before running the pipeline:\n\n`nextflow secrets set WEBIN_ACCOUNT \"Webin-XXX\"`\n\n`nextflow secrets set WEBIN_PASSWORD \"XXX\"`\n\nMake sure you update commands above with your authorised credentials.\n\n## Input samplesheets\n\nFor detailed descriptions of all samplesheet columns, see the [usage documentation](docs/usage.md#samplesheet-input).\n\n### `mags` and `bins` modes (`GENOMESUBMIT`)\n\nThe input must follow `assets/schema_input_genome.json`.\n\nRequired columns:\n\n- `sample`\n- `fasta` (must end with `.fa.gz` or `.fasta.gz`)\n- `accession`\n- `assembly_software`\n- `binning_software`\n- `binning_parameters`\n- `stats_generation_software`\n- `metagenome`\n- `environmental_medium`\n- `broad_environment`\n- `local_environment`\n- `co-assembly`\n\nColumns that required for now, but will be optional in the nearest future:\n\n- `completeness`\n- `contamination`\n- `genome_coverage`\n- `RNA_presence`\n- `NCBI_lineage`\n\nThose fields are metadata required for [genome_uploader](https://github.com/EBI-Metagenomics/genome_uploader) package.\n\nExample `samplesheet_genome.csv`:\n\n```csv\nsample,fasta,accession,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage\nlachnospira_eligens,data/bin_lachnospira_eligens.fa.gz,SRR24458089,spades_v3.15.5,metabat2_v2.6,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable_bacteria,marine_sediment,No,d__Bacteria;p__Proteobacteria;s_unclassified_Proteobacteria\n```\n\n### `metagenomic_assemblies` mode (`ASSEMBLYSUBMIT`)\n\nThe input must follow `assets/schema_input_assembly.json`.\n\nRequired columns:\n\n- `sample`\n- `fasta` (must end with `.fa.gz` or `.fasta.gz`)\n- `run_accession`\n- `assembler`\n- `assembler_version`\n\nAt least one of the following must be provided per row:\n\n- reads (`fastq_1`, optional `fastq_2` for paired-end)\n- `coverage`\n\nIf `coverage` is missing and reads are provided, the workflow calculates average coverage with `coverm`.\n\nExample `samplesheet_assembly.csv`:\n\n```csv\nsample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version\nassembly_1,data/contigs_1.fasta.gz,data/reads_1.fastq.gz,data/reads_2.fastq.gz,,ERR011322,SPAdes,3.15.5\nassembly_2,data/contigs_2.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9\n```\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n### Required parameters:\n\n| Parameter | Description |\n| -------------------- | --------------------------------------------------------------------------------- |\n| `--mode` | Type of the data to be submitted. Options: `[mags, bins, metagenomic_assemblies]` |\n| `--input` | Path to the samplesheet describing the data to be submitted |\n| `--outdir` | Path to the output directory for pipeline results |\n| `--submission_study` | ENA study accession (PRJ/ERP) to submit the data to |\n| `--centre_name` | Name of the submitter's organisation |\n\n### Optional parameters:\n\n| Parameter | Description |\n| ------------------- | ---------------------------------------------------------------------------------------- |\n| `--upload_tpa` | Flag to control the type of assembly study (third party assembly or not). Default: false |\n| `--test_upload` | Upload to TEST ENA server instead of LIVE. Default: false |\n| `--webincli_submit` | If set to false, submissions will be validated, but not submitted. Default: true |\n\nGeneral command template:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile \\\n --mode \\\n --input \\\n --centre_name \\\n --submission_study \\\n --outdir \n```\n\nValidation run (submission to the ENA TEST server) in `mags` mode:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode mags \\\n --input assets/samplesheet_genomes.csv \\\n --submission_study \\\n --centre_name TEST_CENTER \\\n --webincli_submit true \\\n --test_upload true \\\n --outdir results/validate_mags\n```\n\nValidation run (submission to the ENA TEST server) in `metagenomic_assemblies` mode:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode metagenomic_assemblies \\\n --input assets/samplesheet_assembly.csv \\\n --submission_study \\\n --centre_name TEST_CENTER \\\n --webincli_submit true \\\n --test_upload true \\\n --outdir results/validate_assemblies\n```\n\nLive submission example:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode metagenomic_assemblies \\\n --input assets/samplesheet_assembly.csv \\\n --submission_study PRJEB98843 \\\n --test_upload false \\\n --webincli_submit true \\\n --outdir results/live_assembly\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqsubmit/usage) and the [parameter documentation](https://nf-co.re/seqsubmit/parameters).\n\n## Pipeline output\n\nKey output locations in `--outdir`:\n\n- `upload/manifests/`: generated manifest files for submission\n- `upload/webin_cli/`: ENA Webin CLI reports\n- `multiqc/`: MultiQC summary report\n- `pipeline_info/`: execution reports, trace, DAG, and software versions\n\nFor full details, see the [output documentation](https://nf-co.re/seqsubmit/output).\n\n## Credits\n\nnf-core/seqsubmit was originally written by [Martin Beracochea](https://github.com/mberacochea), [Ekaterina Sakharova](https://github.com/KateSakharova), [Sofiia Ochkalova](https://github.com/ochkalova), [Evangelos Karatzas](https://github.com/vagkaratzas).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqsubmit` channel](https://nfcore.slack.com/channels/seqsubmit) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\n\nIf you use this pipeline please make sure to cite all used software.\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **MGnify: the microbiome sequence data analysis resource in 2023**\n>\n> Richardson L, Allen B, Baldi G, Beracochea M, Bileschi ML, Burdett T, et al.\n>\n> Vol. 51, Nucleic Acids Research. Oxford University Press (OUP); 2022. p. D753\u20139. Available from: http://dx.doi.org/10.1093/nar/gkac1080\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/seqsubmit\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqsubmit)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqsubmit/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqsubmit/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqsubmit/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqsubmit/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqsubmit/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqsubmit)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqsubmit-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqsubmit)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqsubmit** is a Nextflow pipeline for submitting sequence data to [ENA](https://www.ebi.ac.uk/ena/browser/home).\nCurrently, the pipeline supports three submission modes, each routed to a dedicated workflow and requiring its own input samplesheet structure:\n\n- `mags` for Metagenome Assembled Genomes (MAGs) submission with `GENOMESUBMIT` workflow\n- `bins` for bins submission with `GENOMESUBMIT` workflow\n- `metagenomic_assemblies` for assembly submission with `ASSEMBLYSUBMIT` workflow\n\n![seqsubmit workflow diagram](assets/seqsubmit_schema.png)\n\n## Requirements\n\n- [Nextflow](https://www.nextflow.io/) `>=25.04.0`\n- Webin account registered at https://www.ebi.ac.uk/ena/submit/webin/login\n- Raw reads used to assemble contigs submitted to [INSDC](https://www.insdc.org/) and associated accessions available\n\nSetup your environment secrets before running the pipeline:\n\n`nextflow secrets set ENA_WEBIN \"Webin-XXX\"`\n\n`nextflow secrets set ENA_WEBIN_PASSWORD \"XXX\"`\n\nMake sure you update commands above with your authorised credentials.\n\n## Input samplesheets\n\nFor detailed descriptions of all samplesheet columns, see the [usage documentation](docs/usage.md#samplesheet-input).\n\n### `mags` and `bins` modes (`GENOMESUBMIT`)\n\nThe input must follow `assets/schema_input_genome.json`.\n\nRequired columns:\n\n- `sample`\n- `fasta` (must end with `.fa.gz`, `.fasta.gz`, or `.fna.gz`)\n- `accession`\n- `assembly_software`\n- `binning_software`\n- `binning_parameters`\n- `metagenome`\n- `environmental_medium`\n- `broad_environment`\n- `local_environment`\n- `co-assembly`\n\nAt least one of the following must be provided per row:\n\n- reads (`fastq_1`, optional `fastq_2` for paired-end)\n- `genome_coverage`\n\nAdditional supported columns:\n\n- `stats_generation_software`\n- `completeness`\n- `contamination`\n- `RNA_presence`\n- `NCBI_lineage`\n\nIf `genome_coverage`, `stats_generation_software`, `completeness`, `contamination`, `RNA_presence`, or `NCBI_lineage` are missing, the workflow can calculate or infer them when the required inputs are available.\n\nThose fields are metadata required for the [genome_uploader](https://github.com/EBI-Metagenomics/genome_uploader) package.\n\nExample `samplesheet_genomes.csv`:\n\n```csv\nsample,fasta,accession,fastq_1,fastq_2,assembly_software,binning_software,binning_parameters,stats_generation_software,completeness,contamination,genome_coverage,metagenome,co-assembly,broad_environment,local_environment,environmental_medium,RNA_presence,NCBI_lineage\nlachnospira_eligens,data/bin_lachnospira_eligens.fa.gz,SRR24458089,,,spades_v3.15.5,metabat2_v2.6,default,CheckM2_v1.0.1,61.0,0.21,32.07,sediment metagenome,No,marine,cable_bacteria,marine_sediment,No,d__Bacteria;p__Proteobacteria;s__unclassified_Proteobacteria\n```\n\n> [!IMPORTANT]\n> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown.\n\n### `metagenomic_assemblies` mode (`ASSEMBLYSUBMIT`)\n\nThe input must follow `assets/schema_input_assembly.json`.\n\nRequired columns:\n\n- `sample`\n- `fasta` (must end with `.fa.gz`, `.fasta.gz`, or `.fna.gz`)\n- `run_accession`\n- `assembler`\n- `assembler_version`\n\nAt least one of the following must be provided per row:\n\n- reads (`fastq_1`, optional `fastq_2` for paired-end)\n- `coverage`\n\nIf `coverage` is missing and reads are provided, the workflow calculates average coverage with `coverm`.\n\nExample `samplesheet_assembly.csv`:\n\n```csv\nsample,fasta,fastq_1,fastq_2,coverage,run_accession,assembler,assembler_version\nassembly_1,data/contigs_1.fasta.gz,data/reads_1.fastq.gz,data/reads_2.fastq.gz,,ERR011322,SPAdes,3.15.5\nassembly_2,data/contigs_2.fasta.gz,,,42.7,ERR011323,MEGAHIT,1.2.9\n```\n\n> [!IMPORTANT]\n> **Samplesheet column requirements**: All columns shown in the example above must be present in your samplesheet, even if some values are empty. Columns must be in exactly the same order as shown.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n### Submission study\n\nAll data submitted through this pipeline must be associated with an ENA study (project). You can either pass an accession of your existing study via `--submission_study`or provide a metadata file via `--study_metadata` and the pipeline will register the study with ENA before submitting your data.\n\nSee the [usage documentation](docs/usage.md#submission-study) for more details.\n\n### Database setup (`CheckM2` and `CAT_pack`)\n\nThe `mags`/`bins` workflow requires databases for completeness/contamination estimation and taxonomy assignment. See [Usage documentation](usage.md) for details.\n\n### Required parameters:\n\n| Parameter | Description |\n| ------------------------------------------ | ----------------------------------------------------------------------------------------------------------------- |\n| `--mode` | Type of the data to be submitted. Options: `[mags, bins, metagenomic_assemblies]` |\n| `--input` | Path to the samplesheet describing the data to be submitted |\n| `--outdir` | Path to the output directory for pipeline results |\n| `--submission_study` OR `--study_metadata` | ENA study accession (PRJ/ERP) to submit the data to OR metadata file in JSON/TSV/CSV format to register new study |\n| `--centre_name` | Name of the submitter's organisation |\n\n### Optional parameters:\n\n| Parameter | Description |\n| ------------------- | ---------------------------------------------------------------------------------------- |\n| `--upload_tpa` | Flag to control the type of assembly study (third party assembly or not). Default: false |\n| `--test_upload` | Upload to TEST ENA server instead of LIVE. Default: true |\n| `--webincli_submit` | If set to false, submissions will be validated, but not submitted. Default: true |\n\nGeneral command template:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile \\\n --mode \\\n --input \\\n --centre_name \\\n --submission_study \\\n --outdir \n```\n\nValidation run (submission to the ENA TEST server) in `mags` mode:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode mags \\\n --input assets/samplesheet_genomes.csv \\\n --submission_study \\\n --centre_name TEST_CENTER \\\n --webincli_submit true \\\n --test_upload true \\\n --outdir results/validate_mags\n```\n\nValidation run (submission to the ENA TEST server) in `metagenomic_assemblies` mode:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode metagenomic_assemblies \\\n --input assets/samplesheet_assembly.csv \\\n --submission_study \\\n --centre_name TEST_CENTER \\\n --webincli_submit true \\\n --test_upload true \\\n --outdir results/validate_assemblies\n```\n\nLive submission example:\n\n```bash\nnextflow run nf-core/seqsubmit \\\n -profile docker \\\n --mode metagenomic_assemblies \\\n --input assets/samplesheet_assembly.csv \\\n --submission_study PRJEB98843 \\\n --test_upload false \\\n --webincli_submit true \\\n --outdir results/live_assembly\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqsubmit/usage) and the [parameter documentation](https://nf-co.re/seqsubmit/parameters).\n\n## Pipeline output\n\nKey output locations in `--outdir`:\n\n- `mags/` or `bins/`: genome metadata, manifests, and per-sample submission support files\n- `metagenomic_assemblies/`: assembly metadata CSVs and per-sample coverage files\n- `multiqc/`: MultiQC summary report\n- `pipeline_info/`: execution reports, trace, DAG, and software versions\n\nFor full details, see the [output documentation](https://nf-co.re/seqsubmit/output).\n\n## Credits\n\nnf-core/seqsubmit was originally written by [Martin Beracochea](https://github.com/mberacochea), [Ekaterina Sakharova](https://github.com/KateSakharova), [Sofiia Ochkalova](https://github.com/ochkalova), [Evangelos Karatzas](https://github.com/vagkaratzas).\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqsubmit` channel](https://nfcore.slack.com/channels/seqsubmit) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\n\nIf you use this pipeline please make sure to cite all used software.\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **MGnify: the microbiome sequence data analysis resource in 2023**\n>\n> Richardson L, Allen B, Baldi G, Beracochea M, Bileschi ML, Burdett T, et al.\n>\n> Vol. 51, Nucleic Acids Research. Oxford University Press (OUP); 2022. p. D753\u20139. Available from: http://dx.doi.org/10.1093/nar/gkac1080\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 4b0a4a7..5743f7d 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -72,14 +72,11 @@ workflow ASSEMBLYSUBMIT { assembly_fasta, "true" // enables number of contigs check - ENA requires more than 1 contig for an assembly submission ) - // TODO add some logging here to track discarded assemblies validated_fastas = assembly_fasta.join(FASTAVALIDATOR.out.success_log) .map { meta, fasta, _log -> [meta, fasta] } - // TODO add human decontamination step - // For assemblies without coverage, calculate coverage with CoverM validated_fastas.filter { meta, _fasta -> meta.coverage == null } .join(reads_fastq) @@ -124,8 +121,6 @@ workflow ASSEMBLYSUBMIT { .filter { meta, _fasta -> meta.coverage != null } .mix( assemblies_with_added_cov_ch ) - // TODO add validation step to check number of lines in CSV matches number of assemblies - assembly_metadata_csv = assemblies_with_coverage .map { meta, fasta -> def header = 'Runs,Coverage,Assembler,Version,Filepath,Sample' diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index 2d2fb4a..a17299c 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -81,7 +81,6 @@ workflow GENOMESUBMIT { genome_fasta, "true" // enables number of contigs check - ENA requires more than 1 contig for a bin/MAG submission ) - // TODO add some logging here to track discarded assemblies validated_fastas = genome_fasta.join(FASTAVALIDATOR.out.success_log) .map { meta, fasta, _log -> [meta, fasta] From 962acb03aedcb9d793d9364127e2d2981d2001f5 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 27 Mar 2026 12:11:05 +0000 Subject: [PATCH 41/46] add fallback publishDir and explicitly disable publishing where needed --- conf/modules.config | 65 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f61f7ed..7f87441 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,6 +12,23 @@ process { + // Default publishDir for all processes, can be overridden by individual process definitions below + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + // + // VALIDATION OF INPUT FASTA FILES + // + + withName: 'FASTAVALIDATOR' { + publishDir = [ + enabled: false + ] + } + // // TAXOMIC CLASSIFICATION SUBWORKFLOW // @@ -34,21 +51,27 @@ process { } withName: 'CATPACK_PREPARE' { - publishDir = [ - [ - path: { "${params.outdir}/databases/cat_pack/" }, - mode: params.publish_dir_mode, - pattern: '*/db/', - saveAs: { filename -> "db" } - ], - [ - path: { "${params.outdir}/databases/cat_pack/" }, - mode: params.publish_dir_mode, - pattern: '*/tax/', - saveAs: { filename -> "tax" } + publishDir = [ + [ + path: { "${params.outdir}/databases/cat_pack/" }, + mode: params.publish_dir_mode, + pattern: '*/db/', + saveAs: { filename -> "db" } + ], + [ + path: { "${params.outdir}/databases/cat_pack/" }, + mode: params.publish_dir_mode, + pattern: '*/tax/', + saveAs: { filename -> "tax" } + ] ] - ] -} + } + + withName: 'RENAME_FASTA_FOR_CATPACK|CAT_DB_UNTAR' { + publishDir = [ + enabled: false + ] + } // // RNA DETECTION SUBWORKFLOW @@ -70,6 +93,12 @@ process { ] } + withName: 'COUNT_RNA' { + publishDir = [ + enabled: false + ] + } + // // GENOME QUALITY ASSESSMENT SUBWORKFLOW // @@ -136,6 +165,12 @@ process { ext.args = { params.test_upload ? "--test" : "" } } + withName: 'ENA_WEBIN_CLI_DOWNLOAD|REGISTERSTUDY|GENERATE_ASSEMBLY_MANIFEST' { + publishDir = [ + enabled: false + ] + } + // // MULTIQC REPORT // @@ -143,7 +178,7 @@ process { withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/${params.mode}/multiqc" }, + path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From e881c0afc072ca4220ca1f81472ced1b27f47abf Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 27 Mar 2026 12:46:21 +0000 Subject: [PATCH 42/46] update snapshots --- tests/assembly_complete_metadata.nf.test.snap | 25 +++++++---------- ...mbly_no_coverage_paired_reads.nf.test.snap | 27 ++++++++----------- ...mbly_no_coverage_single_reads.nf.test.snap | 27 ++++++++----------- ...ly_no_study_complete_metadata.nf.test.snap | 25 +++++++---------- tests/assembly_one_contig.nf.test.snap | 26 +++++++----------- tests/mag_complete_metadata.nf.test.snap | 27 ++++++++----------- ...ultiple_bins_missing_metadata.nf.test.snap | 27 ++++++++----------- .../mag_no_coverage_paired_reads.nf.test.snap | 27 ++++++++----------- .../mag_no_coverage_single_reads.nf.test.snap | 27 ++++++++----------- tests/mag_no_rna_presence.nf.test.snap | 27 ++++++++----------- ...ag_no_study_complete_metadata.nf.test.snap | 27 ++++++++----------- tests/mag_no_taxonomy.nf.test.snap | 27 ++++++++----------- tests/mag_one_contig.nf.test.snap | 25 +++++++---------- 13 files changed, 139 insertions(+), 205 deletions(-) diff --git a/tests/assembly_complete_metadata.nf.test.snap b/tests/assembly_complete_metadata.nf.test.snap index 3db4357..12945aa 100644 --- a/tests/assembly_complete_metadata.nf.test.snap +++ b/tests/assembly_complete_metadata.nf.test.snap @@ -14,31 +14,26 @@ "metagenomic_assemblies/complete_metadata/upload/webin_cli", "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", - "metagenomic_assemblies/multiqc", - "metagenomic_assemblies/multiqc/multiqc_data", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", - "metagenomic_assemblies/multiqc/multiqc_report.html", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef", - "multiqc.log:md5,116b00d7c9902649a7600d4d3bf6d978", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,cc2c7f72ffa5687ac3462f39cf527a1e", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,d9ee69a0934f1cfdec20d02c7c57a5b5" + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:05:23.60975" + "timestamp": "2026-03-27T12:17:10.294831" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_paired_reads.nf.test.snap b/tests/assembly_no_coverage_paired_reads.nf.test.snap index 23823f5..3f62ac9 100644 --- a/tests/assembly_no_coverage_paired_reads.nf.test.snap +++ b/tests/assembly_no_coverage_paired_reads.nf.test.snap @@ -9,14 +9,6 @@ }, [ "metagenomic_assemblies", - "metagenomic_assemblies/multiqc", - "metagenomic_assemblies/multiqc/multiqc_data", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", - "metagenomic_assemblies/multiqc/multiqc_report.html", "metagenomic_assemblies/no_coverage_paired_reads", "metagenomic_assemblies/no_coverage_paired_reads/coverage", "metagenomic_assemblies/no_coverage_paired_reads/coverage/no_coverage_paired_reads.depth.txt", @@ -24,24 +16,27 @@ "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli", "metagenomic_assemblies/no_coverage_paired_reads/upload/webin_cli/no_coverage_paired_reads_accessions.tsv", "metagenomic_assemblies/no_coverage_paired_reads_assembly_metadata.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - "multiqc.log:md5,f0581c03f3c6b93dbd9d7041bf413023", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,52d4ff25685c9220279f3273484eda88", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,23ccd334e5826645d5db1fbbeff1df85", "no_coverage_paired_reads.depth.txt:md5,bb5f99e74d21df3c73e0ae0f388bcbcb", - "no_coverage_paired_reads_assembly_metadata.csv:md5,91a2616ccedc6bb93c2209153bec50f0" + "no_coverage_paired_reads_assembly_metadata.csv:md5,91a2616ccedc6bb93c2209153bec50f0", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:06:20.33222" + "timestamp": "2026-03-27T12:17:51.720414" } } \ No newline at end of file diff --git a/tests/assembly_no_coverage_single_reads.nf.test.snap b/tests/assembly_no_coverage_single_reads.nf.test.snap index f2cb148..dea32b5 100644 --- a/tests/assembly_no_coverage_single_reads.nf.test.snap +++ b/tests/assembly_no_coverage_single_reads.nf.test.snap @@ -9,14 +9,6 @@ }, [ "metagenomic_assemblies", - "metagenomic_assemblies/multiqc", - "metagenomic_assemblies/multiqc/multiqc_data", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", - "metagenomic_assemblies/multiqc/multiqc_report.html", "metagenomic_assemblies/no_coverage_single_reads", "metagenomic_assemblies/no_coverage_single_reads/coverage", "metagenomic_assemblies/no_coverage_single_reads/coverage/no_coverage_single_reads.depth.txt", @@ -24,24 +16,27 @@ "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli", "metagenomic_assemblies/no_coverage_single_reads/upload/webin_cli/no_coverage_single_reads_accessions.tsv", "metagenomic_assemblies/no_coverage_single_reads_assembly_metadata.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - "multiqc.log:md5,29712ce710262129eabfcfb6ffa641d0", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,d3d129462271bcec26d19ca670c2158f", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,809384ac9d6289c97e7a47365afd2734", "no_coverage_single_reads.depth.txt:md5,bd88282b17e851377b1dd223839be150", - "no_coverage_single_reads_assembly_metadata.csv:md5,fc1beef7e8439eaf5329e02587460009" + "no_coverage_single_reads_assembly_metadata.csv:md5,fc1beef7e8439eaf5329e02587460009", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:06:57.003548" + "timestamp": "2026-03-27T12:18:28.483809" } } \ No newline at end of file diff --git a/tests/assembly_no_study_complete_metadata.nf.test.snap b/tests/assembly_no_study_complete_metadata.nf.test.snap index 9041e71..f2c272d 100644 --- a/tests/assembly_no_study_complete_metadata.nf.test.snap +++ b/tests/assembly_no_study_complete_metadata.nf.test.snap @@ -17,31 +17,26 @@ "metagenomic_assemblies/complete_metadata/upload/webin_cli", "metagenomic_assemblies/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "metagenomic_assemblies/complete_metadata_assembly_metadata.csv", - "metagenomic_assemblies/multiqc", - "metagenomic_assemblies/multiqc/multiqc_data", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", - "metagenomic_assemblies/multiqc/multiqc_report.html", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "complete_metadata_assembly_metadata.csv:md5,d5b1575095ece78d988395b874440bef", - "multiqc.log:md5,0ffd75914a9aed929a0709ae6ed074cc", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,14518f81b55710a5b7e07ff86e77f635", - "multiqc_software_versions.txt:md5,e640e15de64dd5b779b09e71f35dbd21", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,c2d27f7d2dc117dd4791d2970dbcb8e0" + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:09:09.948413" + "timestamp": "2026-03-27T12:20:59.186788" } } \ No newline at end of file diff --git a/tests/assembly_one_contig.nf.test.snap b/tests/assembly_one_contig.nf.test.snap index 4a39753..2ebe8b9 100644 --- a/tests/assembly_one_contig.nf.test.snap +++ b/tests/assembly_one_contig.nf.test.snap @@ -8,31 +8,25 @@ } }, [ - "metagenomic_assemblies", - "metagenomic_assemblies/multiqc", - "metagenomic_assemblies/multiqc/multiqc_data", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc.log", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_citations.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_data.json", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_software_versions.txt", - "metagenomic_assemblies/multiqc/multiqc_data/multiqc_sources.txt", - "metagenomic_assemblies/multiqc/multiqc_report.html", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - "multiqc.log:md5,de4283936c9042d00e5d725265508488", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,c355c6eca7cd2259c9f1691275da2520", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,5fb9871586cecc4be089a248dc65cd42" + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:09:27.221529" + "timestamp": "2026-03-27T12:21:16.358644" } } \ No newline at end of file diff --git a/tests/mag_complete_metadata.nf.test.snap b/tests/mag_complete_metadata.nf.test.snap index 5d00c40..fff69a6 100644 --- a/tests/mag_complete_metadata.nf.test.snap +++ b/tests/mag_complete_metadata.nf.test.snap @@ -14,14 +14,6 @@ "mags/complete_metadata/upload/webin_cli", "mags/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/upload", "mags/upload/manifests", "mags/upload/manifests/results", @@ -31,25 +23,28 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,ae2b884015d1850fa63365e8a64d9fc8", - "multiqc.log:md5,3d7cbf4832b5d456a9bc16b53b3ff94b", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,44d2f20f0db17bd3cc1d0257ab07bbe5", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,fa67f512e57e908e19c840713ad0d1a4", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:10:57.783771" + "timestamp": "2026-03-27T12:22:02.015517" } } \ No newline at end of file diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test.snap b/tests/mag_multiple_bins_missing_metadata.nf.test.snap index 2c14895..a1c91f8 100644 --- a/tests/mag_multiple_bins_missing_metadata.nf.test.snap +++ b/tests/mag_multiple_bins_missing_metadata.nf.test.snap @@ -22,14 +22,6 @@ [ "mags", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/no_coverage_paired_reads", "mags/no_coverage_paired_reads/coverage", "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", @@ -63,30 +55,33 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,a75d1d35c762863c487f010f6a000910", - "multiqc.log:md5,66e1af1df4fd84eb6ac1aa57ecb5d6b3", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,8123a0125e961755dd4447b63520b4ce", - "multiqc_software_versions.txt:md5,1dea254d76424e9a0e5ad498e05d45ed", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,80665f9539038d7fab651749febec217", "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", "no_taxonomy.bin2classification.txt:md5,e24c109efe807c7044a866999fd736f1", "no_taxonomy.txt:md5,0f2082d3e397fd630d605dd60993b131", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:37:24.426014" + "timestamp": "2026-03-27T12:32:04.486728" } } \ No newline at end of file diff --git a/tests/mag_no_coverage_paired_reads.nf.test.snap b/tests/mag_no_coverage_paired_reads.nf.test.snap index a56bd92..b0ccfa1 100644 --- a/tests/mag_no_coverage_paired_reads.nf.test.snap +++ b/tests/mag_no_coverage_paired_reads.nf.test.snap @@ -13,14 +13,6 @@ [ "mags", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/no_coverage_paired_reads", "mags/no_coverage_paired_reads/coverage", "mags/no_coverage_paired_reads/coverage/no_coverage_paired_reads.tsv", @@ -36,26 +28,29 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,0538b1aec26168b35a416bd995b0a4a9", - "multiqc.log:md5,0d23f6c098aafb7527aa84bff4f017bc", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,16ed22f43c6846b619cde79610357816", - "multiqc_software_versions.txt:md5,db58c67ef62a45494220d8d9c0963d74", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,f3ec6f48c2a841f9ac25475ce3eb83d7", "no_coverage_paired_reads.tsv:md5,fb8374996c3bad9ddd296684d8c28628", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:38:18.072528" + "timestamp": "2026-03-27T12:32:54.180853" } } \ No newline at end of file diff --git a/tests/mag_no_coverage_single_reads.nf.test.snap b/tests/mag_no_coverage_single_reads.nf.test.snap index 4638334..a95ad24 100644 --- a/tests/mag_no_coverage_single_reads.nf.test.snap +++ b/tests/mag_no_coverage_single_reads.nf.test.snap @@ -13,14 +13,6 @@ [ "mags", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/no_coverage_single_reads", "mags/no_coverage_single_reads/coverage", "mags/no_coverage_single_reads/coverage/no_coverage_single_reads.tsv", @@ -36,26 +28,29 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,1f56050125362470f351ed99065af980", - "multiqc.log:md5,55c57a082a2b02f585e3b4d95e2e1e0e", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,64125ac19d849254a6f9d9199a722d32", - "multiqc_software_versions.txt:md5,db58c67ef62a45494220d8d9c0963d74", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,ab21405b339e9c57d8df14fe935573af", "no_coverage_single_reads.tsv:md5,3791d9be880cbfacbc527e47623e3c9a", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:39:04.761638" + "timestamp": "2026-03-27T12:33:41.342215" } } \ No newline at end of file diff --git a/tests/mag_no_rna_presence.nf.test.snap b/tests/mag_no_rna_presence.nf.test.snap index bca8bdb..de398e7 100644 --- a/tests/mag_no_rna_presence.nf.test.snap +++ b/tests/mag_no_rna_presence.nf.test.snap @@ -19,14 +19,6 @@ [ "mags", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/no_rna_presence", "mags/no_rna_presence/rna", "mags/no_rna_presence/rna/barrnap", @@ -47,27 +39,30 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,0f567491f038d4a0c1dbf4c05cdc26c0", - "multiqc.log:md5,3c9fde84606f1a1483713c1562446677", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,e96bef4fe2a8a61b6904748c0becaeee", - "multiqc_software_versions.txt:md5,6476cd933d258f4900a523bfdf417d9f", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,d69b3c359ce4eb5dfc15b85f5f08e7c1", "no_rna_presence_bac.gff:md5,df19e1b84ba6f691d20c72b397c88abf", "no_rna_presence.tsv:md5,abd958e8addba39c9e4d7f6b97a1a7c6", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:43:47.785065" + "timestamp": "2026-03-27T12:38:24.654035" } } \ No newline at end of file diff --git a/tests/mag_no_study_complete_metadata.nf.test.snap b/tests/mag_no_study_complete_metadata.nf.test.snap index 905741e..11d461d 100644 --- a/tests/mag_no_study_complete_metadata.nf.test.snap +++ b/tests/mag_no_study_complete_metadata.nf.test.snap @@ -17,14 +17,6 @@ "mags/complete_metadata/upload/webin_cli", "mags/complete_metadata/upload/webin_cli/complete_metadata_accessions.tsv", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/upload", "mags/upload/manifests", "mags/upload/manifests/results", @@ -34,25 +26,28 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,ae2b884015d1850fa63365e8a64d9fc8", - "multiqc.log:md5,eff5627704089c7e458bd72016d7b7da", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,992efdc0fa158002b32319ac9fad692e", - "multiqc_software_versions.txt:md5,e640e15de64dd5b779b09e71f35dbd21", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,decd9656cca793864b9f5d8214b04544", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:44:33.682989" + "timestamp": "2026-03-27T12:39:07.952593" } } \ No newline at end of file diff --git a/tests/mag_no_taxonomy.nf.test.snap b/tests/mag_no_taxonomy.nf.test.snap index 8ca37da..b7ad2b2 100644 --- a/tests/mag_no_taxonomy.nf.test.snap +++ b/tests/mag_no_taxonomy.nf.test.snap @@ -10,14 +10,6 @@ [ "mags", "mags/genomes_metadata.csv", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", "mags/no_taxonomy", "mags/no_taxonomy/taxonomy", "mags/no_taxonomy/taxonomy/no_taxonomy.bin2classification.txt", @@ -34,27 +26,30 @@ "mags/upload/manifests/results/MAG_upload/manifests_test", "mags/upload/manifests/results/MAG_upload/registered_MAGs_test.tsv", "mags/upload/manifests/results/MAG_upload/submission.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ "genomes_metadata.csv:md5,b1d01539d787b77e30b9bacd5b23d51f", - "multiqc.log:md5,fe17075d5d9582044aaff80aad847961", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,37284229add460f4535b3c27c946df30", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,57bcf167566caa403280a0038f7d79cd", "no_taxonomy.bin2classification.txt:md5,e24c109efe807c7044a866999fd736f1", "no_taxonomy.txt:md5,0f2082d3e397fd630d605dd60993b131", "ENA_backup.json:md5,e339bef4b1e1e11c37ef72f3d9ef2c93", - "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6" + "submission.xml:md5,4b7d1d7e1b883a3eac57e1267943a8d6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:48:43.584726" + "timestamp": "2026-03-27T12:43:17.099315" } } \ No newline at end of file diff --git a/tests/mag_one_contig.nf.test.snap b/tests/mag_one_contig.nf.test.snap index 34d5862..f11ba98 100644 --- a/tests/mag_one_contig.nf.test.snap +++ b/tests/mag_one_contig.nf.test.snap @@ -9,30 +9,25 @@ }, [ "mags", - "mags/multiqc", - "mags/multiqc/multiqc_data", - "mags/multiqc/multiqc_data/multiqc.log", - "mags/multiqc/multiqc_data/multiqc_citations.txt", - "mags/multiqc/multiqc_data/multiqc_data.json", - "mags/multiqc/multiqc_data/multiqc_software_versions.txt", - "mags/multiqc/multiqc_data/multiqc_sources.txt", - "mags/multiqc/multiqc_report.html", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml" ], [ - "multiqc.log:md5,e3309f477c6395ea61d4923ed792aebc", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,7c8d1fd2dccde4249299d3f9617d2aa2", - "multiqc_software_versions.txt:md5,57fdf3fb8ab5ca39ae384d3cf1ae9e58", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,a6ed0c8727067113dbefe2c1264c73cd" + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-25T16:49:04.514285" + "timestamp": "2026-03-27T12:43:36.609186" } } \ No newline at end of file From 9a17ecaf9aed50d912341d78768991ecfbe9c86b Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 27 Mar 2026 12:46:40 +0000 Subject: [PATCH 43/46] add secrets export in github actions --- .github/actions/nf-test/action.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index 3b9724c..77778a0 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -56,6 +56,12 @@ runs: channel-priority: strict conda-remove-defaults: true + - name: Configure Nextflow secrets + shell: bash + run: | + nextflow secrets set ENA_WEBIN "$WEBIN_ACCOUNT" + nextflow secrets set ENA_WEBIN_PASSWORD "$WEBIN_PASSWORD" + - name: Run nf-test shell: bash env: From 56fd07f8e0f17f9da01271050869f7f2d6683a12 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 27 Mar 2026 13:04:24 +0000 Subject: [PATCH 44/46] reduce memory allocation in conf files to 12GB for github actions --- conf/test.config | 2 +- conf/test_genome.config | 2 +- conf/test_mag_complete_metadata.conf | 2 +- conf/test_mag_multiple_bins_missing_metadata.conf | 2 +- conf/test_mag_no_coverage_paired_reads.conf | 2 +- conf/test_mag_no_coverage_single_reads.conf | 2 +- conf/test_mag_no_quality.conf | 2 +- conf/test_mag_no_rna_presence.conf | 2 +- conf/test_mag_no_study_complete_metadata.config | 2 +- conf/test_mag_no_taxonomy.conf | 2 +- conf/test_mag_one_contig.conf | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/conf/test.config b/conf/test.config index bb8ae69..f1d0141 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,7 +1,7 @@ process { resourceLimits = [ cpus: 2, - memory: '15.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_genome.config b/conf/test_genome.config index c26a027..86f7c97 100644 --- a/conf/test_genome.config +++ b/conf/test_genome.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '20.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_complete_metadata.conf b/conf/test_mag_complete_metadata.conf index eabc483..a25e31b 100644 --- a/conf/test_mag_complete_metadata.conf +++ b/conf/test_mag_complete_metadata.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_multiple_bins_missing_metadata.conf b/conf/test_mag_multiple_bins_missing_metadata.conf index cd5ddd2..fac4044 100644 --- a/conf/test_mag_multiple_bins_missing_metadata.conf +++ b/conf/test_mag_multiple_bins_missing_metadata.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_coverage_paired_reads.conf b/conf/test_mag_no_coverage_paired_reads.conf index f3b6ba3..954a9a2 100644 --- a/conf/test_mag_no_coverage_paired_reads.conf +++ b/conf/test_mag_no_coverage_paired_reads.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_coverage_single_reads.conf b/conf/test_mag_no_coverage_single_reads.conf index cd4cc20..4892424 100644 --- a/conf/test_mag_no_coverage_single_reads.conf +++ b/conf/test_mag_no_coverage_single_reads.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_quality.conf b/conf/test_mag_no_quality.conf index 75aea62..9d3724a 100644 --- a/conf/test_mag_no_quality.conf +++ b/conf/test_mag_no_quality.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_rna_presence.conf b/conf/test_mag_no_rna_presence.conf index d559b09..1577455 100644 --- a/conf/test_mag_no_rna_presence.conf +++ b/conf/test_mag_no_rna_presence.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_study_complete_metadata.config b/conf/test_mag_no_study_complete_metadata.config index aea18b1..18ccfad 100644 --- a/conf/test_mag_no_study_complete_metadata.config +++ b/conf/test_mag_no_study_complete_metadata.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_no_taxonomy.conf b/conf/test_mag_no_taxonomy.conf index 28a2ca8..bb9f687 100644 --- a/conf/test_mag_no_taxonomy.conf +++ b/conf/test_mag_no_taxonomy.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } diff --git a/conf/test_mag_one_contig.conf b/conf/test_mag_one_contig.conf index c63811a..d9f5702 100644 --- a/conf/test_mag_one_contig.conf +++ b/conf/test_mag_one_contig.conf @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 2, - memory: '16.GB', + memory: '12.GB', time: '1.h' ] } From 861b92adec343bea7a4e958252804ca1d13d0fc5 Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Fri, 27 Mar 2026 13:49:04 +0000 Subject: [PATCH 45/46] clean up tests, make test_mag_multiple_bins_missing_metadata.nf default test.config --- conf/test.config | 30 ++++++++++++++ conf/test_assembly.config | 33 ---------------- conf/test_genome.config | 33 ---------------- ...st_mag_multiple_bins_missing_metadata.conf | 37 ------------------ nextflow.config | 3 -- nf-test.config | 2 +- tests/default.nf.test | 9 ++++- ...data.nf.test.snap => default.nf.test.snap} | 4 +- ...mag_multiple_bins_missing_metadata.nf.test | 39 ------------------- 9 files changed, 40 insertions(+), 150 deletions(-) delete mode 100644 conf/test_assembly.config delete mode 100644 conf/test_genome.config delete mode 100644 conf/test_mag_multiple_bins_missing_metadata.conf rename tests/{mag_multiple_bins_missing_metadata.nf.test.snap => default.nf.test.snap} (97%) delete mode 100644 tests/mag_multiple_bins_missing_metadata.nf.test diff --git a/conf/test.config b/conf/test.config index f1d0141..8f56178 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,3 +1,15 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/seqsubmit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + process { resourceLimits = [ cpus: 2, @@ -5,3 +17,21 @@ process { time: '1.h' ] } + +params { + config_profile_name = 'Test profile for minimal pipeline test' + config_profile_description = 'Runs --mode mags with multiple mags and missing metadata' + + // Input data + input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_multiple_bins_missing_metadata.csv' + + mode = "mags" + submission_study = "PRJEB98843" + centre_name = "TEST_CENTER" + + test_upload = true + + cat_db = params.pipelines_testdata_base_path + 'seqsubmit/test_data/small_cat_db/tax-db.tar.gz' + checkm2_db = null + +} diff --git a/conf/test_assembly.config b/conf/test_assembly.config deleted file mode 100644 index cc7430a..0000000 --- a/conf/test_assembly.config +++ /dev/null @@ -1,33 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/seqsubmit -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 2, - memory: '8.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - input = 'assets/samplesheet_assembly.csv' - outdir = 'test_output' - - mode = "metagenomic_assemblies" - submission_study = "PRJEB98843" - centre_name = "TEST_CENTER" - -} diff --git a/conf/test_genome.config b/conf/test_genome.config deleted file mode 100644 index 86f7c97..0000000 --- a/conf/test_genome.config +++ /dev/null @@ -1,33 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/seqsubmit -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 2, - memory: '12.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/samplesheet_genomesubmit.csv' - outdir = 'test_output' - - mode = "mags" - submission_study = "PRJEB98843" - centre_name = "TEST_CENTER" - -} diff --git a/conf/test_mag_multiple_bins_missing_metadata.conf b/conf/test_mag_multiple_bins_missing_metadata.conf deleted file mode 100644 index fac4044..0000000 --- a/conf/test_mag_multiple_bins_missing_metadata.conf +++ /dev/null @@ -1,37 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/seqsubmit -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 2, - memory: '12.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test --mode mags multiple_bins_missing_metadata profile' - config_profile_description = 'Multi-bin MAG test with mixed missing metadata fields' - - // Input data - input = params.pipelines_testdata_base_path + 'seqsubmit/samplesheets/mag_multiple_bins_missing_metadata.csv' - - mode = "mags" - submission_study = "PRJEB98843" - centre_name = "TEST_CENTER" - - test_upload = true - - cat_db = params.pipelines_testdata_base_path + 'seqsubmit/test_data/small_cat_db/tax-db.tar.gz' - checkm2_db = null - -} diff --git a/nextflow.config b/nextflow.config index 1c4d9fe..21871a0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -185,15 +185,12 @@ profiles { // TODO: figure out how to better orginise tests for different workflow types (bins, mags, metagenomic_assemblies) test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - test_genome { includeConfig 'conf/test_genome.config' } - test_assembly { includeConfig 'conf/test_assembly.config' } test_mag_complete_metadata { includeConfig 'conf/test_mag_complete_metadata.conf' } test_mag_no_study_complete_metadata { includeConfig 'conf/test_mag_no_study_complete_metadata.config' } test_mag_no_coverage_single_reads { includeConfig 'conf/test_mag_no_coverage_single_reads.conf' } test_mag_no_coverage_paired_reads { includeConfig 'conf/test_mag_no_coverage_paired_reads.conf' } test_mag_no_quality { includeConfig 'conf/test_mag_no_quality.conf' } test_mag_one_contig { includeConfig 'conf/test_mag_one_contig.conf' } - test_mag_multiple_bins_missing_metadata { includeConfig 'conf/test_mag_multiple_bins_missing_metadata.conf' } test_mag_no_rna_presence { includeConfig 'conf/test_mag_no_rna_presence.conf' } test_mag_no_taxonomy { includeConfig 'conf/test_mag_no_taxonomy.conf' } test_assembly_complete_metadata { includeConfig 'conf/test_assembly_complete_metadata.config' } diff --git a/nf-test.config b/nf-test.config index 613fc05..65efb9f 100644 --- a/nf-test.config +++ b/nf-test.config @@ -15,7 +15,7 @@ config { profile "test" // list of filenames or patterns that should be trigger a full test run - triggers 'nextflow.config', 'nf-test.config', 'conf/test_genome.config', 'conf/test_assembly.config', 'tests/nextflow.config', 'tests/.nftignore' + triggers 'nextflow.config', 'nf-test.config', 'tests/nextflow.config', 'tests/.nftignore' // load the necessary plugins plugins { diff --git a/tests/default.nf.test b/tests/default.nf.test index 44f2465..4b446ab 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -3,6 +3,8 @@ nextflow_pipeline { name "Test pipeline" script "../main.nf" tag "pipeline" + tag "mode_mag" + tag "test_mag_multiple_bins_missing_metadata" test("-profile test") { @@ -14,12 +16,15 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), // All stable path name, with a relative path diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test.snap b/tests/default.nf.test.snap similarity index 97% rename from tests/mag_multiple_bins_missing_metadata.nf.test.snap rename to tests/default.nf.test.snap index a1c91f8..e480827 100644 --- a/tests/mag_multiple_bins_missing_metadata.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,5 +1,5 @@ { - "-profile test_mag_multiple_bins_missing_metadata": { + "-profile test": { "content": [ 17, { @@ -82,6 +82,6 @@ "nf-test": "0.9.0", "nextflow": "25.04.1" }, - "timestamp": "2026-03-27T12:32:04.486728" + "timestamp": "2026-03-27T13:44:33.345037" } } \ No newline at end of file diff --git a/tests/mag_multiple_bins_missing_metadata.nf.test b/tests/mag_multiple_bins_missing_metadata.nf.test deleted file mode 100644 index a5189b5..0000000 --- a/tests/mag_multiple_bins_missing_metadata.nf.test +++ /dev/null @@ -1,39 +0,0 @@ -nextflow_pipeline { - - name "Test genome submission workflow - multiple_bins_missing_metadata" - script "../main.nf" - tag "pipeline" - tag "mode_mag" - tag "test_mag_multiple_bins_missing_metadata" - profile "test_mag_multiple_bins_missing_metadata" - - test("-profile test_mag_multiple_bins_missing_metadata") { - - when { - params { - outdir = "$outputDir" - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // Early failure no need to test the rest of snapshots - assert workflow.success - assertAll( - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } -} From b100a7dc753048acdaf6823b80649b025ea64eaf Mon Sep 17 00:00:00 2001 From: Sofia Ochkalova Date: Mon, 30 Mar 2026 10:14:49 +0100 Subject: [PATCH 46/46] delete slow and greedy nf-test that runs CheckM2 --- tests/mag_no_quality.nf.test | 45 ------------------------------------ 1 file changed, 45 deletions(-) delete mode 100644 tests/mag_no_quality.nf.test diff --git a/tests/mag_no_quality.nf.test b/tests/mag_no_quality.nf.test deleted file mode 100644 index 62f7723..0000000 --- a/tests/mag_no_quality.nf.test +++ /dev/null @@ -1,45 +0,0 @@ -// ------------------------------------------------------------------ -// This test will download real CheckM2 DB and run Diamond search, -// which is slow and requires 20+Gb of memory -// We only run this test on specific occasions and it is not included -// in the main test suite -// ------------------------------------------------------------------ -nextflow_pipeline { - - name "Test genome submission workflow stub - no_quality" - script "../main.nf" - // tag "pipeline" - // tag "mode_mag" - tag "test_mag_no_quality" - profile "test_mag_no_quality" - - test("-profile test_mag_no_quality") { - - when { - params { - outdir = "$outputDir" - } - } - - then { - // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', '**/manifests_test/*']) - // stable_path: All files in ${params.outdir}/ with stable content - def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // Early failure no need to test the rest of snapshots - assert workflow.success - assertAll( - { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqsubmit_software_mqc_versions.yml"), - // All stable path name, with a relative path - stable_name, - // All files with stable contents - stable_path - ).match() } - ) - } - } -}