Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/create_input/readers/clonalstructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def omega(config: dict, output_dir: str) -> pd.DataFrame:
impacts = OMEGA_IMPACTS if not config["impact"] else config["impact"]
if not config["elements"]:
# elements = [elem for elem in data["element"].unique() if "--" not in elem] # removes sub-genic regions
elements = [f"{elem}_{impact}" for impact in impacts for elem in elements]
elements = [f"{elem}_{impact}" for impact in impacts for elem in data["element"].unique()]
else:
elements = config["elements"]
samples = data["sample"].unique() if not config["samples"] else config["samples"]
Expand Down
2 changes: 1 addition & 1 deletion src/plot/coefplot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def main(config_file: str) -> None:
# make plots per model, per metric, per mode
models = os.listdir(regres_dir)
for model in models:
logger.info(f"Plots for {model}")
logger.info(f"Plots for {model} models")
model_dir = os.path.join(regres_dir, model)
metrics = os.listdir(model_dir)
for metric in metrics:
Expand Down
25 changes: 14 additions & 11 deletions src/regressions/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,23 @@ def main(config_file: str) -> None:
# run multivariate model (if applicable)
if config["multi"]:
logger.info("Multivariate analysis selected. Continue.")
output_dir_multi = os.path.join(output_dir, metric, "multivariate")
os.makedirs(output_dir_multi, exist_ok = True)

# restart storage
results = init_storage(elements, predictors)

elements, predictors, forced_predictors = multi_rules(output_dir_uni,
config)
results = run_model(data, results, elements, predictors, config,
mode = "multi")
if forced_predictors:
results = clean_multi(results, forced_predictors)
for res_elem in results:
file = os.path.join(output_dir_multi, f"{res_elem}.tsv")
results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t")
if not elements or not predictors:
logger.info("Multivariate analysis not possible: no remaining variables after applying rules")

else:
output_dir_multi = os.path.join(output_dir, metric, "multivariate")
os.makedirs(output_dir_multi, exist_ok = True)

Comment on lines +83 to +89
results = run_model(data, results, elements, predictors, config,
mode = "multi")
if forced_predictors:
results = clean_multi(results, forced_predictors)
for res_elem in results:
file = os.path.join(output_dir_multi, f"{res_elem}.tsv")
results[res_elem].dropna(axis = 0, how = "all").to_csv(file, sep = "\t")

return None
19 changes: 15 additions & 4 deletions src/regressions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import daiquiri
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

from src import __logger_name__
from src.regressions.utils import add_intercept, correct_pvals, fill_storage
Expand Down Expand Up @@ -31,7 +32,8 @@ def linear_me(data: pd.DataFrame, formula: str, config: dict):


def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, config: dict, mode=str) -> dict:
""" """
"""
"""

if mode == "uni":
terms = product(elements, predictors)
Expand All @@ -43,9 +45,18 @@ def main(data: pd.DataFrame, results: dict, elements: list, predictors: list, co

formula = f"{element} ~ {predictors}{intercept}"
logger.debug(f"Running: {formula}")
model = MODELS[config["model"]]
model_res = model(data, formula, config)
results = fill_storage(results, model_res, element, predictors, intercept)

try:
model = MODELS[config["model"]]
model_res = model(data, formula, config)
results = fill_storage(results, model_res, element, predictors, intercept)
except np.linalg.LinAlgError as e:
if "Singular matrix" in str(e):
logger.warning(f"Singular matrix encountered for formula: {formula}. Skipping this model.")
else:
raise
except Exception as e:
logger.error(f"Unexpected error for formula {formula}: {str(e)}")

if config["correct_pvals"]:
results = correct_pvals(results)
Expand Down
12 changes: 10 additions & 2 deletions src/regressions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,22 @@ def add_intercept(predictor_term: str,
"""
"""

# no predictors to force a 0 intercept in config
if config["predictors_intercept_0"] is None:
intercept = " + 1"
return intercept

# if predictors contain at least one that requires zero intercept forcing, force it
predictors_intercept_0 = config["predictors_intercept_0"]
if not isinstance(predictors_intercept_0, list):
predictors_intercept_0 = list(predictors_intercept_0)
Comment on lines +67 to 75
for pred_int_0 in predictors_intercept_0:
if pred_int_0 in predictor_term:
intercept = " - 1"
else:
intercept = " + 1"
return intercept

# otherwise, intercept can be calculated
intercept = " + 1"

return intercept

Expand Down
10 changes: 0 additions & 10 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading