diff --git a/src/create_input/readers/clonalstructure.py b/src/create_input/readers/clonalstructure.py index e53dfcf..4378366 100644 --- a/src/create_input/readers/clonalstructure.py +++ b/src/create_input/readers/clonalstructure.py @@ -80,7 +80,7 @@ def omega(config: dict, output_dir: str) -> pd.DataFrame: impacts = OMEGA_IMPACTS if not config["impact"] else config["impact"] if not config["elements"]: # elements = [elem for elem in data["element"].unique() if "--" not in elem] # removes sub-genic regions - elements = [f"{elem}_{impact}" for impact in impacts for elem in elements] + elements = [f"{elem}_{impact}" for impact in impacts for elem in data["element"].unique()] else: elements = config["elements"] samples = data["sample"].unique() if not config["samples"] else config["samples"] diff --git a/src/plot/coefplot/main.py b/src/plot/coefplot/main.py index 45dc599..987f4e9 100644 --- a/src/plot/coefplot/main.py +++ b/src/plot/coefplot/main.py @@ -41,7 +41,7 @@ def main(config_file: str) -> None: # make plots per model, per metric, per mode models = os.listdir(regres_dir) for model in models: - logger.info(f"Plots for {model}") + logger.info(f"Plots for {model} models") model_dir = os.path.join(regres_dir, model) metrics = os.listdir(model_dir) for metric in metrics: diff --git a/src/regressions/main.py b/src/regressions/main.py index 27f89e9..d373c10 100644 --- a/src/regressions/main.py +++ b/src/regressions/main.py @@ -76,20 +76,23 @@ def main(config_file: str) -> None: # run multivariate model (if applicable) if config["multi"]: logger.info("Multivariate analysis selected. Continue.") - output_dir_multi = os.path.join(output_dir, metric, "multivariate") - os.makedirs(output_dir_multi, exist_ok = True) - # restart storage results = init_storage(elements, predictors) - elements, predictors, forced_predictors = multi_rules(output_dir_uni, config) - results = run_model(data, results, elements, predictors, config, - mode = "multi") - if forced_predictors: - results = clean_multi(results, forced_predictors) - for res_elem in results: - file = os.path.join(output_dir_multi, f"{res_elem}.tsv") - results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t") + if not elements or not predictors: + logger.info("Multivariate analysis not possible: no remaining variables after applying rules") + + else: + output_dir_multi = os.path.join(output_dir, metric, "multivariate") + os.makedirs(output_dir_multi, exist_ok = True) + + results = run_model(data, results, elements, predictors, config, + mode = "multi") + if forced_predictors: + results = clean_multi(results, forced_predictors) + for res_elem in results: + file = os.path.join(output_dir_multi, f"{res_elem}.tsv") + results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t") return None \ No newline at end of file diff --git a/src/regressions/models.py b/src/regressions/models.py index 792af6d..a514f4a 100644 --- a/src/regressions/models.py +++ b/src/regressions/models.py @@ -3,6 +3,7 @@ import daiquiri import pandas as pd import statsmodels.formula.api as smf +import numpy as np from src import __logger_name__ from src.regressions.utils import add_intercept, correct_pvals, fill_storage @@ -31,7 +32,8 @@ def linear_me(data: pd.DataFrame, formula: str, config: dict): def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, config: dict, mode=str) -> dict: - """ """ + """ + """ if mode == "uni": terms = product(elements, predictors) @@ -43,9 +45,18 @@ def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, co formula = f"{element} ~ {predictors}{intercept}" logger.debug(f"Running: {formula}") - model = MODELS[config["model"]] - model_res = model(data, formula, config) - results = fill_storage(results, model_res, element, predictors, intercept) + + try: + model = MODELS[config["model"]] + model_res = model(data, formula, config) + results = fill_storage(results, model_res, element, predictors, intercept) + except np.linalg.LinAlgError as e: + if "Singular matrix" in str(e): + logger.warning(f"Singular matrix encountered for formula: {formula}. Skipping this model.") + else: + raise + except Exception as e: + logger.error(f"Unexpected error for formula {formula}: {str(e)}") if config["correct_pvals"]: results = correct_pvals(results) diff --git a/src/regressions/utils.py b/src/regressions/utils.py index 482d0e6..fbaa9ce 100644 --- a/src/regressions/utils.py +++ b/src/regressions/utils.py @@ -64,14 +64,22 @@ def add_intercept(predictor_term: str, """ """ + # no predictors to force a 0 intercept in config + if config["predictors_intercept_0"] is None: + intercept = " + 1" + return intercept + + # if predictors contain at least one that requires zero intercept forcing, force it predictors_intercept_0 = config["predictors_intercept_0"] if not isinstance(predictors_intercept_0, list): predictors_intercept_0 = list(predictors_intercept_0) for pred_int_0 in predictors_intercept_0: if pred_int_0 in predictor_term: intercept = " - 1" - else: - intercept = " + 1" + return intercept + + # otherwise, intercept can be calculated + intercept = " + 1" return intercept diff --git a/uv.lock b/uv.lock index 5b765ea..de2c69f 100644 --- a/uv.lock +++ b/uv.lock @@ -19,7 +19,6 @@ requires-dist = [ { name = "click", specifier = ">=8.3.1" }, { name = "daiquiri", specifier = ">=3.4.0" }, { name = "pandas", specifier = ">=2.3.3" }, - { name = "ps", specifier = ">=0.1.5" }, { name = "pyyaml", specifier = ">=6.0.3" }, { name = "seaborn", specifier = ">=0.13.2" }, { name = "statsmodels", specifier = ">=0.14.5" }, @@ -432,15 +431,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630 }, ] -[[package]] -name = "ps" -version = "0.1.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e9/92/1be013a288a347fdbd00bd600fea3965140c1c5b4d66b0555e5a8f253e2b/ps-0.1.5.tar.gz", hash = "sha256:d429d005132d754972a69d0a2200c9911c092bb4d80d673e8c72f68b1691d384", size = 18273 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/0e/6bc22d0c3a8644f18b985d8421361101786dc43db0b053b3287d88a89c56/ps-0.1.5-py3-none-any.whl", hash = "sha256:179fcff367855750012c8d5ab5d550a34cb9e78e389f1fd568cce24c2df9dd04", size = 18308 }, -] - [[package]] name = "pyparsing" version = "3.2.5"