From f8fa662dffda33fb6f33116bfec3febaf94b95db Mon Sep 17 00:00:00 2001 From: rblancomi Date: Mon, 2 Mar 2026 05:02:12 +0100 Subject: [PATCH 01/10] fix: elements var in omega reader refered before assignment --- src/create_input/readers/clonalstructure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/create_input/readers/clonalstructure.py b/src/create_input/readers/clonalstructure.py index e53dfcf..4378366 100644 --- a/src/create_input/readers/clonalstructure.py +++ b/src/create_input/readers/clonalstructure.py @@ -80,7 +80,7 @@ def omega(config: dict, output_dir: str) -> pd.DataFrame: impacts = OMEGA_IMPACTS if not config["impact"] else config["impact"] if not config["elements"]: # elements = [elem for elem in data["element"].unique() if "--" not in elem] # removes sub-genic regions - elements = [f"{elem}_{impact}" for impact in impacts for elem in elements] + elements = [f"{elem}_{impact}" for impact in impacts for elem in data["element"].unique()] else: elements = config["elements"] samples = data["sample"].unique() if not config["samples"] else config["samples"] From dbbd0f917623e679f088e902315d852665c0a945 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Mon, 2 Mar 2026 05:12:48 +0100 Subject: [PATCH 02/10] fix: elements var in omega reader refered before assignment --- src/create_input/readers/clonalstructure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/create_input/readers/clonalstructure.py b/src/create_input/readers/clonalstructure.py index 4378366..e53dfcf 100644 --- a/src/create_input/readers/clonalstructure.py +++ b/src/create_input/readers/clonalstructure.py @@ -80,7 +80,7 @@ def omega(config: dict, output_dir: str) -> pd.DataFrame: impacts = OMEGA_IMPACTS if not config["impact"] else config["impact"] if not config["elements"]: # elements = [elem for elem in data["element"].unique() if "--" not in elem] # removes sub-genic regions - elements = [f"{elem}_{impact}" for impact in impacts for elem in data["element"].unique()] + elements = [f"{elem}_{impact}" for impact in impacts for elem in elements] else: elements = config["elements"] samples = data["sample"].unique() if not config["samples"] else config["samples"] From 3ae1c22e90ecb9e1e94f553ca2e6459aaa755f39 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Mon, 2 Mar 2026 05:49:57 +0100 Subject: [PATCH 03/10] real fix: elements var in omega reader refered before assignment --- src/create_input/readers/clonalstructure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/create_input/readers/clonalstructure.py b/src/create_input/readers/clonalstructure.py index e53dfcf..4378366 100644 --- a/src/create_input/readers/clonalstructure.py +++ b/src/create_input/readers/clonalstructure.py @@ -80,7 +80,7 @@ def omega(config: dict, output_dir: str) -> pd.DataFrame: impacts = OMEGA_IMPACTS if not config["impact"] else config["impact"] if not config["elements"]: # elements = [elem for elem in data["element"].unique() if "--" not in elem] # removes sub-genic regions - elements = [f"{elem}_{impact}" for impact in impacts for elem in elements] + elements = [f"{elem}_{impact}" for impact in impacts for elem in data["element"].unique()] else: elements = config["elements"] samples = data["sample"].unique() if not config["samples"] else config["samples"] From b3ef37fae9419e47df1f52394013e6505db11d3b Mon Sep 17 00:00:00 2001 From: rblancomi Date: Mon, 2 Mar 2026 23:00:20 +0100 Subject: [PATCH 04/10] fix: avoid NoneType error when predictors_intercept_0 = null --- src/regressions/utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/regressions/utils.py b/src/regressions/utils.py index 482d0e6..fbaa9ce 100644 --- a/src/regressions/utils.py +++ b/src/regressions/utils.py @@ -64,14 +64,22 @@ def add_intercept(predictor_term: str, """ """ + # no predictors to force a 0 intercept in config + if config["predictors_intercept_0"] is None: + intercept = " + 1" + return intercept + + # if predictors contain at least one that requires zero intercept forcing, force it predictors_intercept_0 = config["predictors_intercept_0"] if not isinstance(predictors_intercept_0, list): predictors_intercept_0 = list(predictors_intercept_0) for pred_int_0 in predictors_intercept_0: if pred_int_0 in predictor_term: intercept = " - 1" - else: - intercept = " + 1" + return intercept + + # otherwise, intercept can be calculated + intercept = " + 1" return intercept From e6d6898db24e3673bea009deb6ba4360bfeb29a3 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Mon, 2 Mar 2026 23:53:10 +0100 Subject: [PATCH 05/10] chore: improve plotting log --- src/plot/coefplot/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plot/coefplot/main.py b/src/plot/coefplot/main.py index 45dc599..987f4e9 100644 --- a/src/plot/coefplot/main.py +++ b/src/plot/coefplot/main.py @@ -41,7 +41,7 @@ def main(config_file: str) -> None: # make plots per model, per metric, per mode models = os.listdir(regres_dir) for model in models: - logger.info(f"Plots for {model}") + logger.info(f"Plots for {model} models") model_dir = os.path.join(regres_dir, model) metrics = os.listdir(model_dir) for metric in metrics: From 64c0a9abdff0509dcb25ff8ecec6f153bf342885 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Tue, 3 Mar 2026 00:14:28 +0100 Subject: [PATCH 06/10] testing if multi is done with empty variables --- src/regressions/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/regressions/main.py b/src/regressions/main.py index 27f89e9..59b1bc2 100644 --- a/src/regressions/main.py +++ b/src/regressions/main.py @@ -84,6 +84,10 @@ def main(config_file: str) -> None: elements, predictors, forced_predictors = multi_rules(output_dir_uni, config) + print(elements) + print(predictors) + print(forced_predictors) + results = run_model(data, results, elements, predictors, config, mode = "multi") if forced_predictors: From fd26e8c920267ab5d59f888b4828f469d18e6efe Mon Sep 17 00:00:00 2001 From: rblancomi Date: Tue, 3 Mar 2026 00:43:40 +0100 Subject: [PATCH 07/10] fix: multi regressions run when no elements/predictors remaining --- src/regressions/main.py | 33 +++++++++++++++++---------------- uv.lock | 10 ---------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/regressions/main.py b/src/regressions/main.py index 59b1bc2..90b2c5b 100644 --- a/src/regressions/main.py +++ b/src/regressions/main.py @@ -76,24 +76,25 @@ def main(config_file: str) -> None: # run multivariate model (if applicable) if config["multi"]: logger.info("Multivariate analysis selected. Continue.") - output_dir_multi = os.path.join(output_dir, metric, "multivariate") - os.makedirs(output_dir_multi, exist_ok = True) - - # restart storage - results = init_storage(elements, predictors) - + elements, predictors, forced_predictors = multi_rules(output_dir_uni, config) - print(elements) - print(predictors) - print(forced_predictors) - results = run_model(data, results, elements, predictors, config, - mode = "multi") - if forced_predictors: - results = clean_multi(results, forced_predictors) - for res_elem in results: - file = os.path.join(output_dir_multi, f"{res_elem}.tsv") - results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t") + if not elements or predictors: + logger.info("Multivariate analysis not possible: no remaining variables after applying rules") + + else: + output_dir_multi = os.path.join(output_dir, metric, "multivariate") + os.makedirs(output_dir_multi, exist_ok = True) + # restart storage + results = init_storage(elements, predictors) + + results = run_model(data, results, elements, predictors, config, + mode = "multi") + if forced_predictors: + results = clean_multi(results, forced_predictors) + for res_elem in results: + file = os.path.join(output_dir_multi, f"{res_elem}.tsv") + results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t") return None \ No newline at end of file diff --git a/uv.lock b/uv.lock index 5b765ea..de2c69f 100644 --- a/uv.lock +++ b/uv.lock @@ -19,7 +19,6 @@ requires-dist = [ { name = "click", specifier = ">=8.3.1" }, { name = "daiquiri", specifier = ">=3.4.0" }, { name = "pandas", specifier = ">=2.3.3" }, - { name = "ps", specifier = ">=0.1.5" }, { name = "pyyaml", specifier = ">=6.0.3" }, { name = "seaborn", specifier = ">=0.13.2" }, { name = "statsmodels", specifier = ">=0.14.5" }, @@ -432,15 +431,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630 }, ] -[[package]] -name = "ps" -version = "0.1.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e9/92/1be013a288a347fdbd00bd600fea3965140c1c5b4d66b0555e5a8f253e2b/ps-0.1.5.tar.gz", hash = "sha256:d429d005132d754972a69d0a2200c9911c092bb4d80d673e8c72f68b1691d384", size = 18273 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/0e/6bc22d0c3a8644f18b985d8421361101786dc43db0b053b3287d88a89c56/ps-0.1.5-py3-none-any.whl", hash = "sha256:179fcff367855750012c8d5ab5d550a34cb9e78e389f1fd568cce24c2df9dd04", size = 18308 }, -] - [[package]] name = "pyparsing" version = "3.2.5" From 1d408f8d9a1ef37d6c55e2230c5f084be6caf308 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Wed, 4 Mar 2026 05:17:46 +0100 Subject: [PATCH 08/10] add: bypass Singular Matrix error in models --- src/regressions/models.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/regressions/models.py b/src/regressions/models.py index 792af6d..a514f4a 100644 --- a/src/regressions/models.py +++ b/src/regressions/models.py @@ -3,6 +3,7 @@ import daiquiri import pandas as pd import statsmodels.formula.api as smf +import numpy as np from src import __logger_name__ from src.regressions.utils import add_intercept, correct_pvals, fill_storage @@ -31,7 +32,8 @@ def linear_me(data: pd.DataFrame, formula: str, config: dict): def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, config: dict, mode=str) -> dict: - """ """ + """ + """ if mode == "uni": terms = product(elements, predictors) @@ -43,9 +45,18 @@ def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, co formula = f"{element} ~ {predictors}{intercept}" logger.debug(f"Running: {formula}") - model = MODELS[config["model"]] - model_res = model(data, formula, config) - results = fill_storage(results, model_res, element, predictors, intercept) + + try: + model = MODELS[config["model"]] + model_res = model(data, formula, config) + results = fill_storage(results, model_res, element, predictors, intercept) + except np.linalg.LinAlgError as e: + if "Singular matrix" in str(e): + logger.warning(f"Singular matrix encountered for formula: {formula}. Skipping this model.") + else: + raise + except Exception as e: + logger.error(f"Unexpected error for formula {formula}: {str(e)}") if config["correct_pvals"]: results = correct_pvals(results) From b3053042f83e7dd8cd2213acbc484d68fe04bdfe Mon Sep 17 00:00:00 2001 From: rblancomi Date: Tue, 17 Mar 2026 20:07:03 +0100 Subject: [PATCH 09/10] fix: avoid init_storage for multi regressions with formulas as predictors --- example.yml | 2 +- src/regressions/main.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/example.yml b/example.yml index d0f49c9..89d79a9 100644 --- a/example.yml +++ b/example.yml @@ -60,7 +60,7 @@ metrics: elements_total_by: included samples_total_by: included general: - output_dir: ./test/ + output_dir: ./test/deepcsa_implementation/testing-dissapearing-dirs handle_na: ignore elements: samples: '^P19_|^ALL_GENES$|^all_samples$' diff --git a/src/regressions/main.py b/src/regressions/main.py index 90b2c5b..d373c10 100644 --- a/src/regressions/main.py +++ b/src/regressions/main.py @@ -76,18 +76,16 @@ def main(config_file: str) -> None: # run multivariate model (if applicable) if config["multi"]: logger.info("Multivariate analysis selected. Continue.") - + # restart storage + results = init_storage(elements, predictors) elements, predictors, forced_predictors = multi_rules(output_dir_uni, config) - - if not elements or predictors: + if not elements or not predictors: logger.info("Multivariate analysis not possible: no remaining variables after applying rules") else: output_dir_multi = os.path.join(output_dir, metric, "multivariate") os.makedirs(output_dir_multi, exist_ok = True) - # restart storage - results = init_storage(elements, predictors) results = run_model(data, results, elements, predictors, config, mode = "multi") From a5ae5b02e5eff48e67f3c9a67ecc064c96c36ca5 Mon Sep 17 00:00:00 2001 From: rblancomi Date: Tue, 17 Mar 2026 22:25:12 +0100 Subject: [PATCH 10/10] chore: ./test/ as default test folder in example.yml --- example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example.yml b/example.yml index 89d79a9..d0f49c9 100644 --- a/example.yml +++ b/example.yml @@ -60,7 +60,7 @@ metrics: elements_total_by: included samples_total_by: included general: - output_dir: ./test/deepcsa_implementation/testing-dissapearing-dirs + output_dir: ./test/ handle_na: ignore elements: samples: '^P19_|^ALL_GENES$|^all_samples$'