Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 29 additions & 25 deletions api/app/alphafold/k8s_job.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from kubernetes import client
import shlex

from app.shared.job_submitting import generate_salt
from config import Config
Expand All @@ -16,62 +17,65 @@ def set_db_paths(modelPreset, jobConfig):
return db_paths_cmd

def construct_command(jobConfig, user):
output_dir = f'/mnt/output/{user}/{jobConfig["simplename"]}'
simplename_quoted = shlex.quote(jobConfig["simplename"])
user_quoted = shlex.quote(user)
output_dir = f'/mnt/output/{user_quoted}/{simplename_quoted}'
db_paths_cmd = set_db_paths(jobConfig["modelPreset"], jobConfig)
salt = generate_salt()

# Construct the command for running Alphafold and handling the output
mkdir_cmd = f'mkdir -p {output_dir}'
alphafold_cmd = (
f'python /app/alphafold/run_alphafold.py '
f'--fasta_paths={jobConfig["input"]} '
f'--uniref90_database_path={jobConfig["uniref90"]} '
f'--mgnify_database_path={jobConfig["mgnify"]} '
f'--data_dir={jobConfig["data"]} '
f'--template_mmcif_dir={jobConfig["mmcif"]} '
f'--obsolete_pdbs_path={jobConfig["obsolete"]} '
f'--fasta_paths={shlex.quote(jobConfig["input"])} '
f'--uniref90_database_path={shlex.quote(jobConfig["uniref90"])} '
f'--mgnify_database_path={shlex.quote(jobConfig["mgnify"])} '
f'--data_dir={shlex.quote(jobConfig["data"])} '
f'--template_mmcif_dir={shlex.quote(jobConfig["mmcif"])} '
f'--obsolete_pdbs_path={shlex.quote(jobConfig["obsolete"])} '
f'{db_paths_cmd}'
f'{jobConfig["uniclust"]} {jobConfig["full"]} '
f'--output_dir=/mnt/output/{user} '
f'--max_template_date={jobConfig["maxTemplateDate"]} '
f'--db_preset={jobConfig["dbPreset"]} '
f'--output_dir=/mnt/output/{user_quoted} '
f'--max_template_date={shlex.quote(jobConfig["maxTemplateDate"])} '
f'--db_preset={shlex.quote(jobConfig["dbPreset"])} '
f'{jobConfig["reduced"]} '
f'--model_preset={jobConfig["modelPreset"]} '
f'--model_preset={shlex.quote(jobConfig["modelPreset"])} '
f'--benchmark=False '
f'--use_precomputed_msas={jobConfig["reuseMSAs"]} '
f'--num_multimer_predictions_per_model={jobConfig["predictionsPerModel"]} '
f'--use_precomputed_msas={shlex.quote(jobConfig["reuseMSAs"])} '
f'--num_multimer_predictions_per_model={shlex.quote(jobConfig["predictionsPerModel"])} '
f'--models_to_relax={"all" if jobConfig["runRelax"] else "none"} '
f'--use_gpu_relax=True '
f'--logtostderr 2>&1 | tee {output_dir}/stdout'
)
public_symlink_cmd = (
f'if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; '
f'then ln -sfr {output_dir} /mnt/output/public/{jobConfig["simplename"]} ; fi'
f'then ln -sfr {output_dir} /mnt/output/public/{simplename_quoted} ; fi'
)
readme_cmd = (
f'if [ -f "{Config.README_ALPHAFOLD2}" ]; then '
f'cp "{Config.README_ALPHAFOLD2}" {output_dir}/README.md; fi'
)
compression_cmd = (
f'cd /mnt/output/{user}; '
f'cp -r {jobConfig["simplename"]} /storage; '
f'zip -0 -r {jobConfig["simplename"]}.zip {jobConfig["simplename"]}; '
f'mv {jobConfig["simplename"]}.zip {jobConfig["simplename"]}/download-{salt}.zip'
f'cd /mnt/output/{user_quoted}; '
f'cp -r {simplename_quoted} /storage; '
f'zip -0 -r {simplename_quoted}.zip {simplename_quoted}; '
f'mv {simplename_quoted}.zip {simplename_quoted}/download-{salt}.zip'
)
create_done_file_cmd = (
f'if [ -s "{output_dir}/ranking_debug.json" ] ; '
f'then touch "{output_dir}/alphafold.done"; fi'
)
email_quoted = shlex.quote(jobConfig.get("email", ""))
email_notification_cmd = (
f'if [ ! -z "{jobConfig["email"]}" ]; '
f'if [ ! -z {email_quoted} ]; '
f'then if [ -s "{output_dir}/ranking_debug.json" ] ; '
f'then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\n'
f'Subject:Alphafold computation has finished\n\n'
f'Your AlphaFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; '
f'then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n'
f'Subject:Alphafold computation has finished\\n\\n'
f'Your AlphaFold computation {simplename_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{simplename_quoted} to view the result of your computation\\n" | ssmtp -t; '
f'else echo -e '
f'"To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\n'
f'Subject:Alphafold computation has failed\n\n'
f'Your alphafold computation \"{jobConfig["simplename"]}\" has failed.\n" '
f'"To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n'
f'Subject:Alphafold computation has failed\\n\\n'
f'Your alphafold computation {simplename_quoted} has failed.\\n" '
f'| cat - {output_dir}/stdout | ssmtp -t; exit 1; '
f' fi; fi'
)
Expand Down
5 changes: 5 additions & 0 deletions api/app/alphafold3/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app.alphafold3.validation import Job
from app.alphafold3.v1_submission import save_input_config, run_alphafold3_prediction, save_json_input, save_ccd_file
from app.shared.job_submitting import check_running_jobs_limit
from app.shared.input_validation import validate_email
import logging


Expand Down Expand Up @@ -71,6 +72,10 @@ def submit_af3_job_json(current_user):
computation_config = json.loads(request.form["data"])
json_file = request.files.get("jsonFile")

email_err = validate_email(computation_config.get("email", ""))
if email_err:
return email_err

save_json = save_json_input(json_file, computation_config, current_user)
if save_json:
return save_json
Expand Down
42 changes: 25 additions & 17 deletions api/app/alphafold3/v1_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import os
import random
import shlex
import string
from app.shared.common import get_input_path, get_working_directory, get_output_path
from app.shared.common import NAMESPACE
Expand Down Expand Up @@ -105,11 +106,13 @@ def create_job_object(data, user):
"""Create a Kubernetes job object from the input data."""

salt=''.join(random.choice(string.ascii_letters + string.digits) for i in range(64))
output_dir = f"/mnt/output/{user}/{data['name']}"
input_json = f"/mnt/input/{user}/{data['name']}.json"
name_quoted = shlex.quote(data['name'])
user_quoted = shlex.quote(user)
output_dir = f"/mnt/output/{user_quoted}/{name_quoted}"
input_json = f"/mnt/input/{user_quoted}/{name_quoted}.json"
stdout_log = f"{output_dir}/stdout"
use_precomputed = data.get("precomputedMSA") or "precomputedTemplates" in data
sanitised_name = data["name"].lower()
sanitised_name = shlex.quote(data["name"].lower())

mkdir_cmd = f"mkdir -p {output_dir}"
if use_precomputed:
Expand All @@ -128,34 +131,35 @@ def create_job_object(data, user):
)
public_symlink_cmd = (
f'if [ "{data["public"]}" == "True" ] ; '
f'then ln -sfr {output_dir} /mnt/output/public/{data["name"]} ; fi'
f'then ln -sfr {output_dir} /mnt/output/public/{name_quoted} ; fi'
)
readme_cmd = (
f'if [ -f "{Config.README_ALPHAFOLD3}" ]; then '
f'cp "{Config.README_ALPHAFOLD3}" {output_dir}/README.md; fi'
)
compression_cmd = (
f'cd /mnt/output/{user} ; '
f'cp -r {data["name"]} /storage; '
f'zip -0 -r {data["name"]}.zip {data["name"]}; '
f'mv {data["name"]}.zip {data["name"]}/download-{salt}.zip'
f'cd /mnt/output/{user_quoted} ; '
f'cp -r {name_quoted} /storage; '
f'zip -0 -r {name_quoted}.zip {name_quoted}; '
f'mv {name_quoted}.zip {name_quoted}/download-{salt}.zip'
)
create_done_file_cmd = (
f'if [ -s "{output_dir}/{sanitised_name}/{sanitised_name}_ranking_scores.csv" ] ; '
f'then touch "{output_dir}/alphafold3.done"; fi'
)
email_quoted = shlex.quote(data.get("email", ""))
email_notification_cmd = (
f'if [ ! -z "{data["email"]}" ]; '
f'echo "Sending email notification to {data["email"]}"; '
f'if [ ! -z {email_quoted} ]; '
f'echo "Sending email notification to {email_quoted}"; '
f'then if [ -s "{output_dir}/{sanitised_name}/{sanitised_name}_ranking_scores.csv" ] ; '
f'then echo -e "To:{data["email"]}\nFrom:{Config.EMAIL_FROM}\n'
f'Subject:AlphaFold 3 computation has finished successfully\n\n'
f'Your AlphaFold 3 computation \"{data["name"]}\" has finished, please visit {Config.BASE_URL}/result/{data["name"]} to view or download the result of your computation.\n" | ssmtp -t; '
f'then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n'
f'Subject:AlphaFold 3 computation has finished successfully\\n\\n'
f'Your AlphaFold 3 computation {name_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{name_quoted} to view or download the result of your computation.\\n" | ssmtp -t; '
f'else echo -e '
f'"To:{data["email"]}\nFrom:{Config.EMAIL_FROM}\n'
f'Subject:AlphaFold 3 computation has failed\n\n'
f'Your AlphaFold 3 computation \"{data["name"]}\" has failed.\n" '
f'| cat - /mnt/output/{user}/{data["name"]}/stdout | ssmtp -t; exit 1; '
f'"To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\n'
f'Subject:AlphaFold 3 computation has failed\\n\\n'
f'Your AlphaFold 3 computation {name_quoted} has failed.\\n" '
f'| cat - /mnt/output/{user_quoted}/{name_quoted}/stdout | ssmtp -t; exit 1; '
f' fi; fi'
)

Expand Down Expand Up @@ -343,6 +347,10 @@ def save_json_input(json_file, computation_config, user):

def save_ccd_file(file, job_name, user):
"""Save the CCD file to the server."""
from app.shared.input_validation import validate_job_name
validation_error = validate_job_name(job_name)
if validation_error:
return validation_error
if file:
ccd_path = get_input_path(job_name+"-ccd", "cif", user)
logging.info(f"Saving CCD file to {ccd_path}")
Expand Down
7 changes: 5 additions & 2 deletions api/app/alphafold3/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import json
import logging

# strict email regex
_EMAIL_RE = re.compile(r"^[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")

# Define TypedDict for each sequence variant
class ProteinDict(TypedDict):
protein: "Protein"
Expand Down Expand Up @@ -72,9 +75,9 @@ class Job(BaseModel):

@validator("email")
def validate_email_af3(cls, v):
if "@" not in v:
if not isinstance(v, str) or len(v) > 254:
raise ValueError("Invalid email format")
if not re.match(r"[^@]+@[^@]+\.[^@]+", v):
if not _EMAIL_RE.match(v):
raise ValueError("Invalid email format")
return v

Expand Down
6 changes: 5 additions & 1 deletion api/app/colabfold/utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import random
import shlex
import string
from flask import jsonify
from kubernetes import client
Expand Down Expand Up @@ -206,7 +207,10 @@ def create_file_config(jobConfig):
def create_job_object(jobConfig, user):
"""Create a Kubernetes Job object."""
salt=''.join(random.choice(string.ascii_letters + string.digits) for i in range(64))
cfArgs = f'mkdir -p /mnt/output/{user}/{jobConfig["simplename"]} && /opt/conda/bin/colabfold_batch {jobConfig["input"]} /mnt/output/{user}/{jobConfig["simplename"]} --model-type {jobConfig["modelPreset"]} --use-gpu-relax --num-relax {jobConfig["numRelax"]} {jobConfig["templateMode"]} --msa-mode {jobConfig["msaMode"]} {jobConfig["maxMSA"]} --pair-mode {jobConfig["pairMode"]} {jobConfig["useDropout"]} --recycle-early-stop-tolerance {jobConfig["recycleTolerance"]} --num-recycle {jobConfig["numRecycles"]} --num-models {jobConfig["numModels"]} --num-seeds {jobConfig["numSeeds"]} --host-url http://colabsearch.colabsearch-ns.svc.cluster.local 2>&1 | tee /mnt/output/{user}/{jobConfig["simplename"]}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user}/{jobConfig["simplename"]} /mnt/output/public/{jobConfig["simplename"]} ; fi ; if [ -f "{Config.README_COLABFOLD}" ]; then cp "{Config.README_COLABFOLD}" /mnt/output/{user}/{jobConfig["simplename"]}/README.md; fi ; cd /mnt/output/{user} ; cp -r {jobConfig["simplename"]} /storage ; zip -0 -r {jobConfig["simplename"]}.zip {jobConfig["simplename"]}; mv {jobConfig["simplename"]}.zip {jobConfig["simplename"]}/download-{salt}.zip ; cd "/mnt/output/{user}/{jobConfig["simplename"]}"; if ls *.done.txt ; then touch "/mnt/output/{user}/{jobConfig["simplename"]}/colabfold.done"; fi; if [ ! -z "{jobConfig["email"]}" ]; then cd "/mnt/output/{user}/{jobConfig["simplename"]}"; if ls *.done.txt ; then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ColabFold computation has finished\n\nYour ColabFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:Colabfold computation has failed\n\nYour ColabFold computation \"{jobConfig["simplename"]}\" has failed.\n" | cat - /mnt/output/{user}/{jobConfig["simplename"]}/stdout | ssmtp -t; fi; fi'
email_quoted = shlex.quote(jobConfig.get("email", ""))
simplename_quoted = shlex.quote(jobConfig["simplename"])
user_quoted = shlex.quote(user)
cfArgs = f'mkdir -p /mnt/output/{user_quoted}/{simplename_quoted} && /opt/conda/bin/colabfold_batch {shlex.quote(jobConfig["input"])} /mnt/output/{user_quoted}/{simplename_quoted} --model-type {shlex.quote(jobConfig["modelPreset"])} --use-gpu-relax --num-relax {shlex.quote(str(jobConfig["numRelax"]))} {jobConfig["templateMode"]} --msa-mode {shlex.quote(jobConfig["msaMode"])} {jobConfig["maxMSA"]} --pair-mode {shlex.quote(jobConfig["pairMode"])} {jobConfig["useDropout"]} --recycle-early-stop-tolerance {shlex.quote(jobConfig["recycleTolerance"])} --num-recycle {shlex.quote(jobConfig["numRecycles"])} --num-models {shlex.quote(jobConfig["numModels"])} --num-seeds {shlex.quote(jobConfig["numSeeds"])} --host-url http://colabsearch.colabsearch-ns.svc.cluster.local 2>&1 | tee /mnt/output/{user_quoted}/{simplename_quoted}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user_quoted}/{simplename_quoted} /mnt/output/public/{simplename_quoted} ; fi ; if [ -f "{Config.README_COLABFOLD}" ]; then cp "{Config.README_COLABFOLD}" /mnt/output/{user_quoted}/{simplename_quoted}/README.md; fi ; cd /mnt/output/{user_quoted} ; cp -r {simplename_quoted} /storage ; zip -0 -r {simplename_quoted}.zip {simplename_quoted}; mv {simplename_quoted}.zip {simplename_quoted}/download-{salt}.zip ; cd "/mnt/output/{user_quoted}/{simplename_quoted}"; if ls *.done.txt ; then touch "/mnt/output/{user_quoted}/{simplename_quoted}/colabfold.done"; fi; if [ ! -z {email_quoted} ]; then cd "/mnt/output/{user_quoted}/{simplename_quoted}"; if ls *.done.txt ; then echo -e "To:{email_quoted}\nFrom:{shlex.quote(Config.EMAIL_FROM)}\nSubject:ColabFold computation has finished\n\nYour ColabFold computation {simplename_quoted} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{simplename_quoted} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{email_quoted}\nFrom:{shlex.quote(Config.EMAIL_FROM)}\nSubject:Colabfold computation has failed\n\nYour ColabFold computation {simplename_quoted} has failed.\n" | cat - /mnt/output/{user_quoted}/{simplename_quoted}/stdout | ssmtp -t; fi; fi'

if len(jobConfig['proteinSequence']) > 5000:
logging.info(f"Large sequence detected ({len(jobConfig['proteinSequence'])} residues), allocating more resources.")
Expand Down
6 changes: 5 additions & 1 deletion api/app/esmfold/utilities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import random
import shlex
import string
from flask import jsonify
from kubernetes import client
Expand Down Expand Up @@ -104,7 +105,10 @@ def create_file_config(jobConfig):
def create_job_object(jobConfig, user):
"""Create Kubernetes Job Object."""
salt = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(64))
esmfArgs = f'mkdir -p /mnt/output/{user}/{jobConfig["outputDir"]} && /usr/bin/esm-fold -i {jobConfig["input"]} -o /mnt/output/{user}/{jobConfig["outputDir"]} --num-recycles {jobConfig["numRecycles"]} -m /data/esmfold 2>&1 | tee /mnt/output/{user}/{jobConfig["outputDir"]}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user}/{jobConfig["outputDir"]} /mnt/output/public/{jobConfig["outputDir"]} ; fi ; if [ -f "{Config.README_ESMFOLD}" ]; then cp "{Config.README_ESMFOLD}" /mnt/output/{user}/{jobConfig["outputDir"]}/README.md; fi ; cd /mnt/output/{user} ; cp -r {jobConfig["outputDir"]} /storage ; zip -0 -r {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}; mv {jobConfig["outputDir"]}.zip {jobConfig["outputDir"]}/download-{salt}.zip ; if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then touch "/mnt/output/{user}/{jobConfig["outputDir"]}/esmfold.done"; fi; if [ ! -z "{jobConfig["email"]}" ]; then if [ -s "/mnt/output/{user}/{jobConfig["outputDir"]}/"*.pdb ] ; then echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ESMFold computation has finished\n\nYour ESMFold computation \"{jobConfig["simplename"]}\" has finished, please visit {Config.BASE_URL}/result/{jobConfig["simplename"]} to view the result of your computation\n" | ssmtp -t; else echo -e "To:{jobConfig["email"]}\nFrom:{Config.EMAIL_FROM}\nSubject:ESMFold computation has failed\n\nYour ESMFold computation \"{jobConfig["simplename"]}\" has failed.\n" | cat - /mnt/output/{user}/{jobConfig["outputDir"]}/stdout | ssmtp -t; fi; fi'
email_quoted = shlex.quote(jobConfig.get("email", ""))
output_dir_quoted = shlex.quote(jobConfig["outputDir"])
user_quoted = shlex.quote(user)
esmfArgs = f'mkdir -p /mnt/output/{user_quoted}/{output_dir_quoted} && /usr/bin/esm-fold -i {shlex.quote(jobConfig["input"])} -o /mnt/output/{user_quoted}/{output_dir_quoted} --num-recycles {shlex.quote(jobConfig["numRecycles"])} -m /data/esmfold 2>&1 | tee /mnt/output/{user_quoted}/{output_dir_quoted}/stdout && if [ "{jobConfig["makeResultsPublic"]}" == "true" ] ; then ln -sfr /mnt/output/{user_quoted}/{output_dir_quoted} /mnt/output/public/{output_dir_quoted} ; fi ; if [ -f "{Config.README_ESMFOLD}" ]; then cp "{Config.README_ESMFOLD}" /mnt/output/{user_quoted}/{output_dir_quoted}/README.md; fi ; cd /mnt/output/{user_quoted} ; cp -r {output_dir_quoted} /storage ; zip -0 -r {output_dir_quoted}.zip {output_dir_quoted}; mv {output_dir_quoted}.zip {output_dir_quoted}/download-{salt}.zip ; if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then touch "/mnt/output/{user_quoted}/{output_dir_quoted}/esmfold.done"; fi; if [ ! -z {email_quoted} ]; then if [ -s "/mnt/output/{user_quoted}/{output_dir_quoted}/"*.pdb ] ; then echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:ESMFold computation has finished\\n\\nYour ESMFold computation {shlex.quote(jobConfig["simplename"])} has finished, please visit {shlex.quote(Config.BASE_URL)}/result/{shlex.quote(jobConfig["simplename"])} to view the result of your computation\\n" | ssmtp -t; else echo -e "To:{email_quoted}\\nFrom:{shlex.quote(Config.EMAIL_FROM)}\\nSubject:ESMFold computation has failed\\n\\nYour ESMFold computation {shlex.quote(jobConfig["simplename"])} has failed.\\n" | cat - /mnt/output/{user_quoted}/{output_dir_quoted}/stdout | ssmtp -t; fi; fi'

job = client.V1Job(
api_version="batch/v1",
Expand Down
Loading