Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion massql/msql_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ def _evalute_variable_query(parsed_dict, input_filename,

presearch_parse["conditions"] = non_variable_conditions

ms1_df, ms2_df = _executeconditions_query(presearch_parse, input_filename,
ms1_df, ms2_df = _executeconditions_query(presearch_parse, input_filename,
ms1_input_df=ms1_df, ms2_input_df=ms2_df,
cache=cache, cache_dir=cache_dir, cache_file=cache_file)
variable_x_ms1_df = ms1_df

Expand Down
2 changes: 1 addition & 1 deletion tests/get_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ wget --no-verbose --output-document=NS_1x_test.mzML "https://massiveproxy.gnps2.
wget --no-verbose --output-document=JB_182_2_fe.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084289/ccms_peak/JB_182_2_fe.mzML"
wget --no-verbose --output-document=S_N2_neutral_Zn.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000083387/updates/2019-11-12_allegraaron_e893cb7e/peak/S_N2_neutral_Zn.mzML"
wget --no-verbose --output-document=gnps-library.json "https://external.gnps2.org/gnpslibrary/GNPS-LIBRARY.json"
wget --no-verbose --output-document=specs_ms.mgf "http://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?task=5ecfcf81cb3c471698995b194d8246a0&block=main&file=spectra/specs_ms.mgf"
wget --no-verbose --tries=3 --waitretry=5 --output-document=specs_ms.mgf "https://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?task=5ecfcf81cb3c471698995b194d8246a0&block=main&file=spectra/specs_ms.mgf"
wget --no-verbose --output-document=1810E-II.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084691/ccms_peak/1810E-II.mzML"
wget --no-verbose --output-document=T04251505.mzXML "https://massiveproxy.gnps2.org/massiveproxy/MSV000082797/ccms_peak/raw/MTBLS368/T04251505.mzXML"
wget --no-verbose --output-document=isa_9_fe.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084030/ccms_peak/isa_9_fe.mzML"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"conditions": [
{
"conditiontype": "where",
"qualifiers": {
"qualifiermztolerance": {
"comparator": "equal",
"name": "qualifiermztolerance",
"unit": "mz",
"value": 0.05
},
"type": "qualifier"
},
"type": "ms2productcondition",
"value": [
337.25
]
},
{
"conditiontype": "where",
"qualifiers": {
"qualifiermztolerance": {
"comparator": "equal",
"name": "qualifiermztolerance",
"unit": "mz",
"value": 0.05
},
"type": "qualifier"
},
"type": "ms2productcondition",
"value": [
319.24
]
},
{
"conditiontype": "where",
"type": "ms2precursorcondition",
"value": [
"X"
]
},
{
"conditiontype": "where",
"qualifiers": {
"qualifiermztolerance": {
"comparator": "equal",
"name": "qualifiermztolerance",
"unit": "mz",
"value": 0.05
},
"type": "qualifier"
},
"type": "ms2productcondition",
"value": [
"X-390.277"
]
},
{
"conditiontype": "where",
"qualifiers": {
"qualifierintensitymatch": {
"comparator": "equal",
"name": "qualifierintensitymatch",
"value": "Y"
},
"qualifierintensityreference": {
"name": "qualifierintensityreference"
},
"qualifierppmtolerance": {
"comparator": "equal",
"name": "qualifierppmtolerance",
"unit": "ppm",
"value": 40.0
},
"type": "qualifier"
},
"type": "ms2productcondition",
"value": [
319.24
]
},
{
"conditiontype": "where",
"qualifiers": {
"qualifierintensitymatch": {
"comparator": "equal",
"name": "qualifierintensitymatch",
"value": "Y*300.0"
},
"qualifierintensitytolpercent": {
"comparator": "equal",
"name": "qualifierintensitytolpercent",
"value": 99.0
},
"qualifierppmtolerance": {
"comparator": "equal",
"name": "qualifierppmtolerance",
"unit": "ppm",
"value": 40.0
},
"type": "qualifier"
},
"type": "ms2productcondition",
"value": [
201.16
]
}
],
"query": "QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99",
"querytype": {
"datatype": "datams2data",
"function": "functionscaninfo"
}
}
6 changes: 5 additions & 1 deletion tests/test_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ def test_extract_mzXML():
assert(len(merged_summary_df) == 5)

def test_extract_MGF():
mgf_path = "tests/data/specs_ms.mgf"
assert os.path.exists(mgf_path), f"Test data file {mgf_path} not found - download may have failed"
assert os.path.getsize(mgf_path) > 1000, f"Test data file {mgf_path} appears corrupt (too small) - download may have failed"

query = "QUERY scaninfo(MS2DATA)"
results_df = msql_engine.process_query(query, "tests/data/specs_ms.mgf")
results_df = msql_engine.process_query(query, mgf_path)
print(results_df)

assert(len(results_df) > 1)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_queries.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@ QUERY scaninfo(MS2DATA) WHERE MS2PREC=X AND MOBILITY=range(min=X*0.0006775+0.405
QUERY scaninfo(MS2DATA) WHERE MS2PROD=(58.06513 OR 60.04439 OR 70.06513 OR 72.08078 OR 74.06004 OR 84.04439 OR 84.08078 OR 86.09643 OR 87.05529 OR 88.0393 OR 88.07569 OR 100.11208 OR 101.07094 OR 101.10732 OR 102.05495 OR 102.09134 OR 104.05285 OR 110.07127 OR 114.12773 OR 115.08659 OR 115.12297 OR 116.0706 OR 118.0685 OR 120.08078 OR 124.08692 OR 129.10224 OR 129.11347 OR 129.13862 OR 130.08625 OR 132.08415 OR 134.09643 OR 136.07569 OR 138.10257 OR 143.12912 OR 148.11208 OR 150.09134 OR 157.14477 OR 159.09167 OR 164.10699 OR 173.10732 OR 187.12297):CARDINALITY=range(min=2,max=5):TOLERANCEPPM=10:INTENSITYPERCENT=5
QUERY scaninfo(MS2DATA) WHERE MS2PROD=226.18:TOLERANCEPPM=5:EXCLUDED
QUERY scaninfo(MS2DATA) WHERE MS2PROD=formula(C10)
QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2
QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2
QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99
55 changes: 55 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,61 @@ def test_ms2_intensitypercent_gt_lt_eq_tripartite():
assert scans_gt.issubset(scans_eq), "INTENSITYPERCENT= (>=) must include all scans matched by INTENSITYPERCENT>"


def test_ms2_variable_with_intensitymatch():
"""Test a complex query combining MS2PREC=X variable, MS2PROD with INTENSITYMATCH and INTENSITYMATCHREFERENCE."""
import pandas as pd
import numpy as np

query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99"

# Verify it parses correctly
parse_obj = msql_parser.parse_msql(query)
print(json.dumps(parse_obj, indent=4))

condition_types = [c["type"] for c in parse_obj["conditions"]]
assert condition_types.count("ms2productcondition") == 5
assert condition_types.count("ms2precursorcondition") == 1

# Check INTENSITYMATCH qualifiers are present
ref_conditions = [c for c in parse_obj["conditions"]
if "qualifiers" in c and "qualifierintensityreference" in c.get("qualifiers", {})]
assert len(ref_conditions) == 1

match_conditions = [c for c in parse_obj["conditions"]
if "qualifiers" in c and "qualifierintensitytolpercent" in c.get("qualifiers", {})]
assert len(match_conditions) == 1
assert match_conditions[0]["qualifiers"]["qualifierintensitymatch"]["value"] == "Y*300.0"

# Test execution with synthetic data that has matching peaks
precmz = 710.0
rows = []
peaks = [
(337.25, 1000.0),
(319.24, 500.0), # reference Y
(201.16, 150000.0), # Y*300 = 500*300 = 150000
(319.723, 800.0), # X-390.277 = 710-390.277 = 319.723
]
for mz, intensity in peaks:
rows.append({
'scan': 1, 'ms1scan': 0, 'rt': 1.0,
'mz': mz, 'i': intensity,
'precmz': precmz, 'charge': 1, 'polarity': 1,
'i_norm': intensity / max(p[1] for p in peaks),
'i_tic_norm': intensity / sum(p[1] for p in peaks),
})
ms2_df = pd.DataFrame(rows)
ms1_df = pd.DataFrame({
'scan': [0], 'rt': [1.0], 'mz': [precmz], 'i': [10000.0],
'i_norm': [1.0], 'i_tic_norm': [1.0], 'polarity': [1]
})

results_df = msql_engine.process_query(query, "tests/data/GNPS00002_A3_p.mzML",
ms1_df=ms1_df, ms2_df=ms2_df)
print(results_df)
assert len(results_df) > 0, "Query should find the matching scan in synthetic data"
assert 1 in results_df["scan"].values


def debug_query():
query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2"

Expand Down
Loading