diff --git a/api/app/alphafold/k8s_job.py b/api/app/alphafold/k8s_job.py index fc363b1..dae8991 100644 --- a/api/app/alphafold/k8s_job.py +++ b/api/app/alphafold/k8s_job.py @@ -1,4 +1,5 @@ from kubernetes import client +import shlex from app.shared.job_submitting import generate_salt from config import Config @@ -16,7 +17,9 @@ def set_db_paths(modelPreset, jobConfig): return db_paths_cmd def construct_command(jobConfig, user): - output_dir = f'/mnt/output/{user}/{jobConfig["simplename"]}' + simplename_quoted = shlex.quote(jobConfig["simplename"]) + user_quoted = shlex.quote(user) + output_dir = f'/mnt/output/{user_quoted}/{simplename_quoted}' db_paths_cmd = set_db_paths(jobConfig["modelPreset"], jobConfig) salt = generate_salt() @@ -24,54 +27,55 @@ def construct_command(jobConfig, user): mkdir_cmd = f'mkdir -p {output_dir}' alphafold_cmd = ( f'python /app/alphafold/run_alphafold.py ' - f'--fasta_paths={jobConfig["input"]} ' - f'--uniref90_database_path={jobConfig["uniref90"]} ' - f'--mgnify_database_path={jobConfig["mgnify"]} ' - f'--data_dir={jobConfig["data"]} ' - f'--template_mmcif_dir={jobConfig["mmcif"]} ' - f'--obsolete_pdbs_path={jobConfig["obsolete"]} ' + f'--fasta_paths={shlex.quote(jobConfig["input"])} ' + f'--uniref90_database_path={shlex.quote(jobConfig["uniref90"])} ' + f'--mgnify_database_path={shlex.quote(jobConfig["mgnify"])} ' + f'--data_dir={shlex.quote(jobConfig["data"])} ' + f'--template_mmcif_dir={shlex.quote(jobConfig["mmcif"])} ' + f'--obsolete_pdbs_path={shlex.quote(jobConfig["obsolete"])} ' f'{db_paths_cmd}' f'{jobConfig["uniclust"]} {jobConfig["full"]} ' - f'--output_dir=/mnt/output/{user} ' - f'--max_template_date={jobConfig["maxTemplateDate"]} ' - f'--db_preset={jobConfig["dbPreset"]} ' + f'--output_dir=/mnt/output/{user_quoted} ' + f'--max_template_date={shlex.quote(jobConfig["maxTemplateDate"])} ' + f'--db_preset={shlex.quote(jobConfig["dbPreset"])} ' f'{jobConfig["reduced"]} ' - f'--model_preset={jobConfig["modelPreset"]} ' + f'--model_preset={shlex.quote(jobConfig["modelPreset"])} ' f'--benchmark=False ' - f'--use_precomputed_msas={jobConfig["reuseMSAs"]} ' - f'--num_multimer_predictions_per_model={jobConfig["predictionsPerModel"]} ' + f'--use_precomputed_msas={shlex.quote(jobConfig["reuseMSAs"])} ' + f'--num_multimer_predictions_per_model={shlex.quote(jobConfig["predictionsPerModel"])} ' f'--models_to_relax={"all" if jobConfig["runRelax"] else "none"} ' f'--use_gpu_relax=True ' f'--logtostderr 2>&1 | tee {output_dir}/stdout' ) public_symlink_cmd = ( f'if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; ' - f'then ln -sfr {output_dir} /mnt/output/public/{jobConfig["simplename"]} ; fi' + f'then ln -sfr {output_dir} /mnt/output/public/{simplename_quoted} ; fi' ) readme_cmd = ( f'if [ -f "{Config.README_ALPHAFOLD2}" ]; then ' f'cp "{Config.README_ALPHAFOLD2}" {output_dir}/README.md; fi' ) compression_cmd = ( - f'cd /mnt/output/{user}; ' - f'cp -r {jobConfig["simplename"]} /storage; ' - f'zip -0 -r {jobConfig["simplename"]}.zip {jobConfig["simplename"]}; ' - f'mv {jobConfig["simplename"]}.zip {jobConfig["simplename"]}/download-{salt}.zip' + f'cd /mnt/output/{user_quoted}; ' + f'cp -r {simplename_quoted} /storage; ' + f'zip -0 -r {simplename_quoted}.zip {simplename_quoted}; ' + f'mv {simplename_quoted}.zip {simplename_quoted}/download-{salt}.zip' ) create_done_file_cmd = ( f'if [ -s "{output_dir}/ranking_debug.json" ] ; ' f'then touch "{output_dir}/alphafold.done"; fi' ) + email_quoted = shlex.quote(jobConfig.get("email", "")) email_notification_cmd = ( - f'if [ ! -z "{jobConfig["email"]}" ]; ' + f'if [ ! -z {email_quoted} ]; ' f'then if [ -s "{output_dir}/ranking_debug.json" ] ; ' - f'then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\n' - f'Subject:Alphafold computation has finished\n\n' - f'Your AlphaFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; ' + f'then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n' + f'Subject:Alphafold computation has finished\\n\\n' + f'Your AlphaFold computation {simplename_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{simplename_quoted} to view the result of your computation\\n" | ssmtp -t; ' f'else echo -e ' - f'"To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\n' - f'Subject:Alphafold computation has failed\n\n' - f'Your alphafold computation \"{jobConfig["simplename"]}\" has failed.\n" ' + f'"To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n' + f'Subject:Alphafold computation has failed\\n\\n' + f'Your alphafold computation {simplename_quoted} has failed.\\n" ' f'| cat - {output_dir}/stdout | ssmtp -t; exit 1; ' f' fi; fi' ) diff --git a/api/app/alphafold3/routes.py b/api/app/alphafold3/routes.py index 5be1d7f..a5adcc3 100644 --- a/api/app/alphafold3/routes.py +++ b/api/app/alphafold3/routes.py @@ -7,6 +7,7 @@ from app.alphafold3.validation import Job from app.alphafold3.v1_submission import save_input_config, run_alphafold3_prediction, save_json_input, save_ccd_file from app.shared.job_submitting import check_running_jobs_limit +from app.shared.input_validation import validate_email import logging @@ -71,6 +72,10 @@ def submit_af3_job_json(current_user): computation_config = json.loads(request.form["data"]) json_file = request.files.get("jsonFile") + email_err = validate_email(computation_config.get("email", "")) + if email_err: + return email_err + save_json = save_json_input(json_file, computation_config, current_user) if save_json: return save_json diff --git a/api/app/alphafold3/v1_submission.py b/api/app/alphafold3/v1_submission.py index d084912..d074e9c 100644 --- a/api/app/alphafold3/v1_submission.py +++ b/api/app/alphafold3/v1_submission.py @@ -3,6 +3,7 @@ import json import os import random +import shlex import string from app.shared.common import get_input_path, get_working_directory, get_output_path from app.shared.common import NAMESPACE @@ -105,11 +106,13 @@ def create_job_object(data, user): """Create a Kubernetes job object from the input data.""" salt=''.join(random.choice(string.ascii_letters + string.digits) for i in range(64)) - output_dir = f"/mnt/output/{user}/{data['name']}" - input_json = f"/mnt/input/{user}/{data['name']}.json" + name_quoted = shlex.quote(data['name']) + user_quoted = shlex.quote(user) + output_dir = f"/mnt/output/{user_quoted}/{name_quoted}" + input_json = f"/mnt/input/{user_quoted}/{name_quoted}.json" stdout_log = f"{output_dir}/stdout" use_precomputed = data.get("precomputedMSA") or "precomputedTemplates" in data - sanitised_name = data["name"].lower() + sanitised_name = shlex.quote(data["name"].lower()) mkdir_cmd = f"mkdir -p {output_dir}" if use_precomputed: @@ -128,34 +131,35 @@ def create_job_object(data, user): ) public_symlink_cmd = ( f'if [ "{data["public"]}" == "True" ] ; ' - f'then ln -sfr {output_dir} /mnt/output/public/{data["name"]} ; fi' + f'then ln -sfr {output_dir} /mnt/output/public/{name_quoted} ; fi' ) readme_cmd = ( f'if [ -f "{Config.README_ALPHAFOLD3}" ]; then ' f'cp "{Config.README_ALPHAFOLD3}" {output_dir}/README.md; fi' ) compression_cmd = ( - f'cd /mnt/output/{user} ; ' - f'cp -r {data["name"]} /storage; ' - f'zip -0 -r {data["name"]}.zip {data["name"]}; ' - f'mv {data["name"]}.zip {data["name"]}/download-{salt}.zip' + f'cd /mnt/output/{user_quoted} ; ' + f'cp -r {name_quoted} /storage; ' + f'zip -0 -r {name_quoted}.zip {name_quoted}; ' + f'mv {name_quoted}.zip {name_quoted}/download-{salt}.zip' ) create_done_file_cmd = ( f'if [ -s "{output_dir}/{sanitised_name}/{sanitised_name}_ranking_scores.csv" ] ; ' f'then touch "{output_dir}/alphafold3.done"; fi' ) + email_quoted = shlex.quote(data.get("email", "")) email_notification_cmd = ( - f'if [ ! -z "{data["email"]}" ]; ' - f'echo "Sending email notification to {data["email"]}"; ' + f'if [ ! -z {email_quoted} ]; ' + f'echo "Sending email notification to {email_quoted}"; ' f'then if [ -s "{output_dir}/{sanitised_name}/{sanitised_name}_ranking_scores.csv" ] ; ' - f'then echo -e "To:{data["email"]}\nFrom:{Config.EMAIL_FROM}\n' - f'Subject:AlphaFold 3 computation has finished successfully\n\n' - f'Your AlphaFold 3 computation \"{data["name"]}\" has finished, please visit {Config.BASE_URL}/result/{data["name"]} to view or download the result of your computation.\n" | ssmtp -t; ' + f'then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n' + f'Subject:AlphaFold 3 computation has finished successfully\\n\\n' + f'Your AlphaFold 3 computation {name_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{name_quoted} to view or download the result of your computation.\\n" | ssmtp -t; ' f'else echo -e ' - f'"To:{data["email"]}\nFrom:{Config.EMAIL_FROM}\n' - f'Subject:AlphaFold 3 computation has failed\n\n' - f'Your AlphaFold 3 computation \"{data["name"]}\" has failed.\n" ' - f'| cat - /mnt/output/{user}/{data["name"]}/stdout | ssmtp -t; exit 1; ' + f'"To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n' + f'Subject:AlphaFold 3 computation has failed\\n\\n' + f'Your AlphaFold 3 computation {name_quoted} has failed.\\n" ' + f'| cat - /mnt/output/{user_quoted}/{name_quoted}/stdout | ssmtp -t; exit 1; ' f' fi; fi' ) @@ -343,6 +347,10 @@ def save_json_input(json_file, computation_config, user): def save_ccd_file(file, job_name, user): """Save the CCD file to the server.""" + from app.shared.input_validation import validate_job_name + validation_error = validate_job_name(job_name) + if validation_error: + return validation_error if file: ccd_path = get_input_path(job_name+"-ccd", "cif", user) logging.info(f"Saving CCD file to {ccd_path}") diff --git a/api/app/alphafold3/validation.py b/api/app/alphafold3/validation.py index 50e848e..4e74389 100644 --- a/api/app/alphafold3/validation.py +++ b/api/app/alphafold3/validation.py @@ -5,6 +5,9 @@ import json import logging +# strict email regex +_EMAIL_RE = re.compile(r"^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") + # Define TypedDict for each sequence variant class ProteinDict(TypedDict): protein: "Protein" @@ -72,9 +75,9 @@ class Job(BaseModel): @validator("email") def validate_email_af3(cls, v): - if "@" not in v: + if not isinstance(v, str) or len(v) > 254: raise ValueError("Invalid email format") - if not re.match(r"[^@]+@[^@]+\.[^@]+", v): + if not _EMAIL_RE.match(v): raise ValueError("Invalid email format") return v diff --git a/api/app/colabfold/utilities.py b/api/app/colabfold/utilities.py index 119d2dc..7661381 100644 --- a/api/app/colabfold/utilities.py +++ b/api/app/colabfold/utilities.py @@ -1,4 +1,5 @@ import random +import shlex import string from flask import jsonify from kubernetes import client @@ -206,7 +207,10 @@ def create_file_config(jobConfig): def create_job_object(jobConfig, user): """Create a Kubernetes Job object.""" salt=''.join(random.choice(string.ascii_letters + string.digits) for i in range(64)) - cfArgs = f'mkdir -p /mnt/output/{user}/{jobConfig["simplename"]} && /opt/conda/bin/colabfold_batch {jobConfig["input"]} /mnt/output/{user}/{jobConfig["simplename"]} --model-type {jobConfig["modelPreset"]} --use-gpu-relax --num-relax {jobConfig["numRelax"]} {jobConfig["templateMode"]} --msa-mode {jobConfig["msaMode"]} {jobConfig["maxMSA"]} --pair-mode {jobConfig["pairMode"]} {jobConfig["useDropout"]} --recycle-early-stop-tolerance {jobConfig["recycleTolerance"]} --num-recycle {jobConfig["numRecycles"]} --num-models {jobConfig["numModels"]} --num-seeds {jobConfig["numSeeds"]} --host-url http://colabsearch.colabsearch-ns.svc.cluster.local 2>&1 | tee /mnt/output/{user}/{jobConfig["simplename"]}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user}/{jobConfig["simplename"]} /mnt/output/public/{jobConfig["simplename"]} ; fi ; if [ -f "{Config.README_COLABFOLD}" ]; then cp "{Config.README_COLABFOLD}" /mnt/output/{user}/{jobConfig["simplename"]}/README.md; fi ; cd /mnt/output/{user} ; cp -r {jobConfig["simplename"]} /storage ; zip -0 -r {jobConfig["simplename"]}.zip {jobConfig["simplename"]}; mv {jobConfig["simplename"]}.zip {jobConfig["simplename"]}/download-{salt}.zip ; cd "/mnt/output/{user}/{jobConfig["simplename"]}"; if ls *.done.txt ; then touch "/mnt/output/{user}/{jobConfig["simplename"]}/colabfold.done"; fi; if [ ! -z "{jobConfig["email"]}" ]; then cd "/mnt/output/{user}/{jobConfig["simplename"]}"; if ls *.done.txt ; then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ColabFold computation has finished\n\nYour ColabFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:Colabfold computation has failed\n\nYour ColabFold computation \"{jobConfig["simplename"]}\" has failed.\n" | cat - /mnt/output/{user}/{jobConfig["simplename"]}/stdout | ssmtp -t; fi; fi' + email_quoted = shlex.quote(jobConfig.get("email", "")) + simplename_quoted = shlex.quote(jobConfig["simplename"]) + user_quoted = shlex.quote(user) + cfArgs = f'mkdir -p /mnt/output/{user_quoted}/{simplename_quoted} && /opt/conda/bin/colabfold_batch {shlex.quote(jobConfig["input"])} /mnt/output/{user_quoted}/{simplename_quoted} --model-type {shlex.quote(jobConfig["modelPreset"])} --use-gpu-relax --num-relax {shlex.quote(str(jobConfig["numRelax"]))} {jobConfig["templateMode"]} --msa-mode {shlex.quote(jobConfig["msaMode"])} {jobConfig["maxMSA"]} --pair-mode {shlex.quote(jobConfig["pairMode"])} {jobConfig["useDropout"]} --recycle-early-stop-tolerance {shlex.quote(jobConfig["recycleTolerance"])} --num-recycle {shlex.quote(jobConfig["numRecycles"])} --num-models {shlex.quote(jobConfig["numModels"])} --num-seeds {shlex.quote(jobConfig["numSeeds"])} --host-url http://colabsearch.colabsearch-ns.svc.cluster.local 2>&1 | tee /mnt/output/{user_quoted}/{simplename_quoted}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user_quoted}/{simplename_quoted} /mnt/output/public/{simplename_quoted} ; fi ; if [ -f "{Config.README_COLABFOLD}" ]; then cp "{Config.README_COLABFOLD}" /mnt/output/{user_quoted}/{simplename_quoted}/README.md; fi ; cd /mnt/output/{user_quoted} ; cp -r {simplename_quoted} /storage ; zip -0 -r {simplename_quoted}.zip {simplename_quoted}; mv {simplename_quoted}.zip {simplename_quoted}/download-{salt}.zip ; cd "/mnt/output/{user_quoted}/{simplename_quoted}"; if ls *.done.txt ; then touch "/mnt/output/{user_quoted}/{simplename_quoted}/colabfold.done"; fi; if [ ! -z {email_quoted} ]; then cd "/mnt/output/{user_quoted}/{simplename_quoted}"; if ls *.done.txt ; then echo -e "To:{email_quoted}\nFrom:{shlex.quote(Config.EMAIL_FROM)}\nSubject:ColabFold computation has finished\n\nYour ColabFold computation {simplename_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{simplename_quoted} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{email_quoted}\nFrom:{shlex.quote(Config.EMAIL_FROM)}\nSubject:Colabfold computation has failed\n\nYour ColabFold computation {simplename_quoted} has failed.\n" | cat - /mnt/output/{user_quoted}/{simplename_quoted}/stdout | ssmtp -t; fi; fi' if len(jobConfig['proteinSequence']) > 5000: logging.info(f"Large sequence detected ({len(jobConfig['proteinSequence'])} residues), allocating more resources.") diff --git a/api/app/esmfold/utilities.py b/api/app/esmfold/utilities.py index a79485c..48e9293 100644 --- a/api/app/esmfold/utilities.py +++ b/api/app/esmfold/utilities.py @@ -1,4 +1,5 @@ import random +import shlex import string from flask import jsonify from kubernetes import client @@ -104,7 +105,10 @@ def create_file_config(jobConfig): def create_job_object(jobConfig, user): """Create Kubernetes Job Object.""" salt = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(64)) - esmfArgs = f'mkdir -p /mnt/output/{user}/{jobConfig["outputDir"]} && /usr/bin/esm-fold -i {jobConfig["input"]} -o /mnt/output/{user}/{jobConfig["outputDir"]} --num-recycles {jobConfig["numRecycles"]} -m /data/esmfold 2>&1 | tee /mnt/output/{user}/{jobConfig["outputDir"]}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user}/{jobConfig["outputDir"]} /mnt/output/public/{jobConfig["outputDir"]} ; fi ; if [ -f "{Config.README_ESMFOLD}" ]; then cp "{Config.README_ESMFOLD}" /mnt/output/{user}/{jobConfig["outputDir"]}/README.md; fi ; cd /mnt/output/{user} ; cp -r {jobConfig["outputDir"]} /storage ; zip -0 -r {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}; mv {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}/download-{salt}.zip ; if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then touch "/mnt/output/{user}/{jobConfig["outputDir"]}/esmfold.done"; fi; if [ ! -z "{jobConfig["email"]}" ]; then if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ESMFold computation has finished\n\nYour ESMFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ESMFold computation has failed\n\nYour ESMFold computation \"{jobConfig["simplename"]}\" has failed.\n" | cat - /mnt/output/{user}/{jobConfig["outputDir"]}/stdout | ssmtp -t; fi; fi' + email_quoted = shlex.quote(jobConfig.get("email", "")) + output_dir_quoted = shlex.quote(jobConfig["outputDir"]) + user_quoted = shlex.quote(user) + esmfArgs = f'mkdir -p /mnt/output/{user_quoted}/{output_dir_quoted} && /usr/bin/esm-fold -i {shlex.quote(jobConfig["input"])} -o /mnt/output/{user_quoted}/{output_dir_quoted} --num-recycles {shlex.quote(jobConfig["numRecycles"])} -m /data/esmfold 2>&1 | tee /mnt/output/{user_quoted}/{output_dir_quoted}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user_quoted}/{output_dir_quoted} /mnt/output/public/{output_dir_quoted} ; fi ; if [ -f "{Config.README_ESMFOLD}" ]; then cp "{Config.README_ESMFOLD}" /mnt/output/{user_quoted}/{output_dir_quoted}/README.md; fi ; cd /mnt/output/{user_quoted} ; cp -r {output_dir_quoted} /storage ; zip -0 -r {output_dir_quoted}.zip {output_dir_quoted}; mv {output_dir_quoted}.zip {output_dir_quoted}/download-{salt}.zip ; if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then touch "/mnt/output/{user_quoted}/{output_dir_quoted}/esmfold.done"; fi; if [ ! -z {email_quoted} ]; then if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:ESMFold computation has finished\\n\\nYour ESMFold computation {shlex.quote(jobConfig["simplename"])} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{shlex.quote(jobConfig["simplename"])} to view the result of your computation\\n" | ssmtp -t; else echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:ESMFold computation has failed\\n\\nYour ESMFold computation {shlex.quote(jobConfig["simplename"])} has failed.\\n" | cat - /mnt/output/{user_quoted}/{output_dir_quoted}/stdout | ssmtp -t; fi; fi' job = client.V1Job( api_version="batch/v1", diff --git a/api/app/omegafold/utilities.py b/api/app/omegafold/utilities.py index cf2a94f..fec2379 100644 --- a/api/app/omegafold/utilities.py +++ b/api/app/omegafold/utilities.py @@ -1,4 +1,5 @@ import random +import shlex import string from flask import jsonify from kubernetes import client @@ -116,7 +117,10 @@ def create_file_config(jobConfig): def create_job_object(jobConfig, user): """Create Kubernetes Job Object.""" salt = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(64)) - ofArgs = f'mkdir -p /mnt/output/{user}/{jobConfig["outputDir"]} && /usr/local/bin/omegafold {jobConfig["input"]} /mnt/output/{user}/{jobConfig["outputDir"]} --num_cycle {jobConfig["numCycle"]} --subbatch_size {jobConfig["subbatchSize"]} --weights_file {jobConfig["weights_file"]} --pseudo_msa_mask_rate {jobConfig["pseudoMsaMask"]} --num_pseudo_msa {jobConfig["numPseudoMSAs"]} 2>&1 | tee /mnt/output/{user}/{jobConfig["outputDir"]}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user}/{jobConfig["outputDir"]} /mnt/output/public/{jobConfig["outputDir"]} ; fi ; if [ -f "{Config.README_OMEGAFOLD}" ]; then cp "{Config.README_OMEGAFOLD}" /mnt/output/{user}/{jobConfig["outputDir"]}/README.md; fi ; cd /mnt/output/{user} ; cp -r {jobConfig["outputDir"]} /storage ; zip -0 -r {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}; mv {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}/download-{salt}.zip ; if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then touch "/mnt/output/{user}/{jobConfig["outputDir"]}/omegafold.done"; fi; if [ ! -z "{jobConfig["email"]}" ]; then if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:OmegaFold computation has finished\n\nYour OmegaFold computation "\"{jobConfig["simplename"]}\"" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:Omegafold computation has failed\n\nYour omegafold computation "\"{jobConfig["simplename"]}\"" has failed.\n" | cat - /mnt/output/{user}/{jobConfig["outputDir"]}/stdout | ssmtp -t; fi; fi' + email_quoted = shlex.quote(jobConfig.get("email", "")) + output_dir_quoted = shlex.quote(jobConfig["outputDir"]) + user_quoted = shlex.quote(user) + ofArgs = f'mkdir -p /mnt/output/{user_quoted}/{output_dir_quoted} && /usr/local/bin/omegafold {shlex.quote(jobConfig["input"])} /mnt/output/{user_quoted}/{output_dir_quoted} --num_cycle {shlex.quote(jobConfig["numCycle"])} --subbatch_size {shlex.quote(jobConfig["subbatchSize"])} --weights_file {shlex.quote(jobConfig["weights_file"])} --pseudo_msa_mask_rate {shlex.quote(jobConfig["pseudoMsaMask"])} --num_pseudo_msa {shlex.quote(jobConfig["numPseudoMSAs"])} 2>&1 | tee /mnt/output/{user_quoted}/{output_dir_quoted}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user_quoted}/{output_dir_quoted} /mnt/output/public/{output_dir_quoted} ; fi ; if [ -f "{Config.README_OMEGAFOLD}" ]; then cp "{Config.README_OMEGAFOLD}" /mnt/output/{user_quoted}/{output_dir_quoted}/README.md; fi ; cd /mnt/output/{user_quoted} ; cp -r {output_dir_quoted} /storage ; zip -0 -r {output_dir_quoted}.zip {output_dir_quoted}; mv {output_dir_quoted}.zip {output_dir_quoted}/download-{salt}.zip ; if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then touch "/mnt/output/{user_quoted}/{output_dir_quoted}/omegafold.done"; fi; if [ ! -z {email_quoted} ]; then if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:OmegaFold computation has finished\\n\\nYour OmegaFold computation {shlex.quote(jobConfig["simplename"])} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{shlex.quote(jobConfig["simplename"])} to view the result of your computation\\n" | ssmtp -t; else echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:Omegafold computation has failed\\n\\nYour omegafold computation {shlex.quote(jobConfig["simplename"])} has failed.\\n" | cat - /mnt/output/{user_quoted}/{output_dir_quoted}/stdout | ssmtp -t; fi; fi' job = client.V1Job( api_version="batch/v1", diff --git a/api/app/shared/common.py b/api/app/shared/common.py index b2dec8c..bc79e20 100644 --- a/api/app/shared/common.py +++ b/api/app/shared/common.py @@ -28,7 +28,7 @@ def get_input_dir(user="public"): def get_input_path(job, file_type, user="public"): """ - Return the path to the input directory. Based on the given user parameter, + Return the path to the input directory. Based on the given user parameter, it returns the path to the public or user's directory and the appropriate .fasta or .json file. Parameters: @@ -47,6 +47,10 @@ def get_input_path(job, file_type, user="public"): # Construct the full path file_path = os.path.join(base_dir, "input", user, f"{job}.{file_type}") + file_path = os.path.abspath(file_path) + expected_prefix = os.path.abspath(os.path.join(base_dir, "input", user)) + os.sep + if not file_path.startswith(expected_prefix): + raise ValueError("Invalid job name: path traversal detected.") return file_path @@ -56,7 +60,7 @@ def get_output_path(job_name, user="public"): base_dir = get_working_directory() """ - Return the path to the output directory. Based on the given user parameter, + Return the path to the output directory. Based on the given user parameter, it returns the path to the public or user's directory, optionally including the job name. Parameters: @@ -69,7 +73,12 @@ def get_output_path(job_name, user="public"): base_dir = get_working_directory() # Return the path to the specific job's output directory - return os.path.join(base_dir, "output", user, job_name) + output_path = os.path.join(base_dir, "output", user, job_name) + output_path = os.path.abspath(output_path) + expected_prefix = os.path.abspath(os.path.join(base_dir, "output", user)) + os.sep + if not output_path.startswith(expected_prefix): + raise ValueError("Invalid job name: path traversal detected.") + return output_path def get_user_jobs(user): diff --git a/api/app/shared/input_validation.py b/api/app/shared/input_validation.py index ee59e68..bd31a86 100644 --- a/api/app/shared/input_validation.py +++ b/api/app/shared/input_validation.py @@ -3,12 +3,17 @@ from datetime import datetime import logging +# strict email regex +_EMAIL_RE = re.compile(r"^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") + def validate_job_name(job_name): """Validate the job name.""" if not isinstance(job_name, str): return jsonify({"error": "Job name must be a string."}), 400 if len(job_name) > 36: return jsonify({"error": "Job name must be less than 36 characters."}), 400 + if ".." in job_name or "/" in job_name or "\\" in job_name: + return jsonify({"error": "Job name must consist of alphanumeric characters or '-'."}), 400 if not re.match(r"^[a-zA-Z0-9]([-a-zA-Z0-9]*[a-zA-Z0-9])?$", job_name): return jsonify({"error": "Job name must consist of alphanumeric characters or '-'."}), 400 return None @@ -56,6 +61,10 @@ def validate_numeric_input(value): def validate_email(email): """Validate the email address.""" - if not re.match(r"[^@]+@[^@]+\.[^@]+", email): + if not isinstance(email, str): + return jsonify({"error": "Invalid email address."}), 400 + if len(email) > 254: + return jsonify({"error": "Invalid email address."}), 400 + if not _EMAIL_RE.match(email): return jsonify({"error": "Invalid email address."}), 400 return None \ No newline at end of file diff --git a/api/tests/unit/test_input_validation.py b/api/tests/unit/test_input_validation.py index 45fbcb3..09b3d98 100644 --- a/api/tests/unit/test_input_validation.py +++ b/api/tests/unit/test_input_validation.py @@ -81,7 +81,9 @@ def test_validate_email(app): # Valid cases assert validate_email("example@e-infra.com") is None assert validate_email("user2654@mail.muni.cz") is None - + assert validate_email("first.last@sub.domain.org") is None + assert validate_email("user_name@domain.co.uk") is None + # Invalid cases response, status_code = validate_email("example.com") # Missing '@' assert status_code == 400 @@ -94,4 +96,78 @@ def test_validate_email(app): response, status_code = validate_email("example@muni") # Missing domain assert status_code == 400 assert "Invalid email address" in response.json["error"] - + + # Command injection payloads must be rejected + response, status_code = validate_email('email@example.com$(echo ahoj > /storage/hehe.txt)') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com`whoami`') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com; rm -rf /') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com|cat /etc/passwd') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com && ls') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com < /etc/passwd') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email('email@example.com > /tmp/out') + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email(123) # Non-string + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + response, status_code = validate_email("a" * 250 + "@test.com") # Too long (>254) + assert status_code == 400 + assert "Invalid email address" in response.json["error"] + + +def test_validate_job_name_path_traversal(app): + """Test that path traversal payloads are rejected by validate_job_name.""" + traversal_names = [ + "../etc/passwd", + "..", + "foo/../bar", + "foo/bar", + "foo\\bar", + "foo; rm -rf /", + "foo|cat /etc/passwd", + "foo$(whoami)", + "foo`whoami`", + ] + for name in traversal_names: + result = validate_job_name(name) + assert result is not None, f"Expected rejection for job name: {name!r}" + response, status_code = result + assert status_code == 400 + assert "Job name must consist" in response.json["error"] + + +def test_get_input_path_rejects_traversal(app, tmp_path): + """Test that get_input_path raises ValueError when path traversal is attempted.""" + from app.shared.common import get_input_path + with patch("app.shared.common.get_working_directory", return_value=str(tmp_path)): + with pytest.raises(ValueError, match="path traversal detected"): + get_input_path("../../../etc/passwd", "json", "user1") + + +def test_get_output_path_rejects_traversal(app, tmp_path): + """Test that get_output_path raises ValueError when path traversal is attempted.""" + from app.shared.common import get_output_path + with patch("app.shared.common.get_working_directory", return_value=str(tmp_path)): + with pytest.raises(ValueError, match="path traversal detected"): + get_output_path("../../../etc/passwd", "user1") + diff --git a/web/app/alphafold/page.tsx b/web/app/alphafold/page.tsx index 1640c09..c47b5e8 100644 --- a/web/app/alphafold/page.tsx +++ b/web/app/alphafold/page.tsx @@ -93,7 +93,7 @@ EPRHAQLKNLLFFMLKSSSDRVIPQFETTYTELFQGLETELAKNGKAKFNDVGEQAAFRFLGRAYFNSNPEETKLGTSAP if (!email) { errors.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Email is invalid."; } diff --git a/web/app/colabfold/page.tsx b/web/app/colabfold/page.tsx index 5b3f90e..ebf36e1 100644 --- a/web/app/colabfold/page.tsx +++ b/web/app/colabfold/page.tsx @@ -144,7 +144,7 @@ export default function Colabfold() { if (!email) { errors.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Email is invalid."; } diff --git a/web/app/esmfold/page.tsx b/web/app/esmfold/page.tsx index f051438..81c66a8 100644 --- a/web/app/esmfold/page.tsx +++ b/web/app/esmfold/page.tsx @@ -123,7 +123,7 @@ export default function ESMfold() { if (!email) { errors.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Email is invalid."; } diff --git a/web/app/hooks/alphafold3/useJsonValidation.ts b/web/app/hooks/alphafold3/useJsonValidation.ts index 416149a..3836b04 100644 --- a/web/app/hooks/alphafold3/useJsonValidation.ts +++ b/web/app/hooks/alphafold3/useJsonValidation.ts @@ -29,7 +29,7 @@ export const useJsonValidation = (jsonFile: File | null, jobName: string, email: if (!email || email.trim() === "") { errorsJson.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errorsJson.email = "Invalid email format."; } diff --git a/web/app/hooks/useFormValidation.ts b/web/app/hooks/useFormValidation.ts index 3d9b454..9877609 100644 --- a/web/app/hooks/useFormValidation.ts +++ b/web/app/hooks/useFormValidation.ts @@ -26,7 +26,7 @@ export const useFormValidation = (jobName: string, modelSeeds: string, sequences if (!email) { errors.email = "Email is required"; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Invalid email format"; } diff --git a/web/app/multifold/page.tsx b/web/app/multifold/page.tsx index 8269363..5d01966 100644 --- a/web/app/multifold/page.tsx +++ b/web/app/multifold/page.tsx @@ -114,7 +114,7 @@ export default function MultiFold() { if (!email || email.trim() === "") { errors.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Invalid email format."; } diff --git a/web/app/omegafold/page.tsx b/web/app/omegafold/page.tsx index 8148a51..d0aae44 100644 --- a/web/app/omegafold/page.tsx +++ b/web/app/omegafold/page.tsx @@ -126,7 +126,7 @@ export default function Omegafold() { if (!email) { errors.email = "Email is required."; - } else if (!/\b[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+\b/.test(email)) { + } else if (!/^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email)) { errors.email = "Email is invalid."; }