diff --git a/massql/msql_engine_filters.py b/massql/msql_engine_filters.py index f2236c7..127034c 100644 --- a/massql/msql_engine_filters.py +++ b/massql/msql_engine_filters.py @@ -248,11 +248,20 @@ def ms2prod_condition(condition, ms1_df, ms2_df, reference_conditions_register): mz_min = mz - mz_tol mz_max = mz + mz_tol + massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None)) intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None)) ms2_filtered_df = ms2_df[(ms2_df["mz"] > mz_min) & (ms2_df["mz"] < mz_max) & intensity_mask] + + if massdefect_min > 0 or massdefect_max < 1: + ms2_filtered_df["mz_defect"] = ms2_filtered_df["mz"] - ms2_filtered_df["mz"].astype(int) + + ms2_filtered_df = ms2_filtered_df[ + (ms2_filtered_df["mz_defect"] > massdefect_min) & + (ms2_filtered_df["mz_defect"] < massdefect_max) + ] # Setting the intensity match register _set_intensity_register(ms2_filtered_df, reference_conditions_register, condition) @@ -328,6 +337,7 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register): nl_min = mz - mz_tol nl_max = mz + mz_tol + massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None)) intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None)) ms2_filtered_df = ms2_df[ @@ -336,6 +346,14 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register): intensity_mask ] + if massdefect_min > 0 or massdefect_max < 1: + ms2_filtered_df["mz_defect"] = (ms2_filtered_df["precmz"] - ms2_filtered_df["mz"]) - (ms2_filtered_df["precmz"] - ms2_filtered_df["mz"]).astype(int) + + ms2_filtered_df = ms2_filtered_df[ + (ms2_filtered_df["mz_defect"] > massdefect_min) & + (ms2_filtered_df["mz_defect"] < massdefect_max) + ] + # Setting the intensity match register _set_intensity_register(ms2_filtered_df, reference_conditions_register, condition) @@ -402,6 +420,7 @@ def ms2prec_condition(condition, ms1_df, ms2_df, reference_conditions_register): (ms2_filtered_df["precmz_defect"] < massdefect_max) ] else: + massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None)) mz_tol = _get_mz_tolerance(condition.get("qualifiers", None), mz) mz_min = mz - mz_tol mz_max = mz + mz_tol @@ -411,6 +430,14 @@ def ms2prec_condition(condition, ms1_df, ms2_df, reference_conditions_register): (ms2_df["precmz"] < mz_max) ] + if massdefect_min > 0 or massdefect_max < 1: + ms2_filtered_df["precmz_defect"] = ms2_filtered_df["precmz"] - ms2_filtered_df["precmz"].astype(int) + + ms2_filtered_df = ms2_filtered_df[ + (ms2_filtered_df["precmz_defect"] > massdefect_min) & + (ms2_filtered_df["precmz_defect"] < massdefect_max) + ] + ms2_list.append(ms2_filtered_df) if len(ms2_list) == 1: @@ -494,7 +521,7 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_ (ms1_df["mz"] < mz_max) & intensity_mask] - if massdefect_min > 0 or massdefect_max < 1: + if massdefect_min != 0 or massdefect_max != 1: ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int) ms1_filtered_df = ms1_filtered_df[ @@ -608,7 +635,7 @@ def ms1_filter(condition, ms1_df): (ms1_df["mz"] < mz_max) & intensity_mask] - if massdefect_min > 0 or massdefect_max < 1: + if massdefect_min != 0 or massdefect_max != 1: ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int) ms1_filtered_df = ms1_filtered_df[ diff --git a/tests/test_query.py b/tests/test_query.py index 13666cf..8745ef3 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -615,6 +615,26 @@ def test_massdefect_ANY_query(): results_df = msql_engine.process_query(query, "tests/data/GNPS00002_A3_p.mzML") assert(len(results_df) == 77) +def test_massdefect_bug_reproduction(): + f = "tests/data/GNPS00002_A3_p.mzML" + + # MS2PROD MASSDEFECT + exclude_query = "QUERY MS2DATA WHERE MS2PROD=226.18:MASSDEFECT=massdefect(min=0.8,max=0.9)" + df_exclude = msql_engine.process_query(exclude_query, f) + assert len(df_exclude) == 0 + + # MS1MZ MASSDEFECT Full Range + full_range_query = "QUERY MS1DATA WHERE MS1MZ=226.18:MASSDEFECT=massdefect(min=0.0,max=1.0)" + df_full = msql_engine.process_query(full_range_query, f) + # Get baseline + df_all = msql_engine.process_query("QUERY MS1DATA WHERE MS1MZ=226.18", f) + assert len(df_full) == len(df_all) + + # MS1MZ MASSDEFECT Exclusion + exclude_ms1_query = "QUERY MS1DATA WHERE MS1MZ=226.18:MASSDEFECT=massdefect(min=0.8,max=0.9)" + df_ms1_exclude = msql_engine.process_query(exclude_ms1_query, f) + assert len(df_ms1_exclude) == 0 + def test_advanced_filters(): query = """ QUERY scansum(MS1DATA) FILTER MS1MZ=ANY:TOLERANCEMZ=35:MASSDEFECT=massdefect(min=0.1332, max=0.2112)