diff --git a/.cursorrrules b/.cursorrrules index 81cafc3c..dc990877 100644 --- a/.cursorrrules +++ b/.cursorrrules @@ -1,3 +1,3 @@ -Launch all commands in the conda environement "bgbench" +Launch all commands in the conda environement "ogbench" It should be done via the conda shell activate. diff --git a/README.md b/README.md index 97777e3b..d4a1aa1e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -[![Code Quality](https://github.com/geometric-intelligence/bgbench/actions/workflows/code-quality-main.yaml/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/code-quality-main.yaml) -[![Dependencies](https://github.com/geometric-intelligence/bgbench/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/dependabot/dependabot-updates) -[![Tests](https://github.com/geometric-intelligence/bgbench/actions/workflows/test.yml/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/test.yml) +[![Code Quality](https://github.com/geometric-intelligence/ogbench/actions/workflows/code-quality-main.yaml/badge.svg)](https://github.com/geometric-intelligence/ogbench/actions/workflows/code-quality-main.yaml) +[![Dependencies](https://github.com/geometric-intelligence/ogbench/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/geometric-intelligence/ogbench/actions/workflows/dependabot/dependabot-updates) +[![Tests](https://github.com/geometric-intelligence/ogbench/actions/workflows/test.yml/badge.svg)](https://github.com/geometric-intelligence/ogbench/actions/workflows/test.yml) # OGBench — Omics Graph Benchmarking @@ -19,12 +19,12 @@ A benchmarking framework for Graph Neural Networks on omics datasets. OGBench pr ## Installation ```bash -git clone git@github.com:geometric-intelligence/bgbench.git -cd bgbench +git clone git@github.com:geometric-intelligence/ogbench.git +cd ogbench -conda create -n bgbench python=3.12 +conda create -n ogbench python=3.12 curl -LsSf https://astral.sh/uv/install.sh | sh -conda activate bgbench +conda activate ogbench uv venv uv pip install -e '.[all]' @@ -175,7 +175,7 @@ pytest tests/nn/ -v # specific module ### Project Structure ``` -bgbench/ +ogbench/ ├── ogbench/ # Main Python package │ ├── run.py # Training entry point │ ├── baseline.py # Sklearn baseline experiments diff --git a/configs/hparams_search/multi_dataset_grid_search.yaml b/configs/hparams_search/multi_dataset_grid_search.yaml index 3a40ae17..6ee3cf66 100644 --- a/configs/hparams_search/multi_dataset_grid_search.yaml +++ b/configs/hparams_search/multi_dataset_grid_search.yaml @@ -22,9 +22,9 @@ seeds: [42, 123, 456] # Fixed parameters (not searched) fixed: - logger.wandb.project: bgbench_dataset_grid_search_final + logger.wandb.project: ogbench_dataset_grid_search_final logger.wandb.entity: bioshape-lab - paths.root_dir: /scratch/lcornelis/bgbench + paths.root_dir: /scratch/lcornelis/ogbench # Shared hyperparameter grid (applied to all models) shared_grid: diff --git a/configs/hparams_search/smoke_test.yaml b/configs/hparams_search/smoke_test.yaml index f213f57d..a7e2766f 100644 --- a/configs/hparams_search/smoke_test.yaml +++ b/configs/hparams_search/smoke_test.yaml @@ -10,7 +10,7 @@ models: seeds: [42] fixed: - logger.wandb.project: bgbench_smoke_test + logger.wandb.project: ogbench_smoke_test shared_grid: dataset.loader.parameters.node_sample_ratio: diff --git a/croissant_bgbench.json b/croissant_ogbench.json similarity index 100% rename from croissant_bgbench.json rename to croissant_ogbench.json diff --git a/env_setup.sh b/env_setup.sh index 8a2d0b35..e570fa2c 100755 --- a/env_setup.sh +++ b/env_setup.sh @@ -26,11 +26,11 @@ if ! command -v conda &> /dev/null; then echo "Conda installation complete" fi -if ! conda env list | grep -q "bgbench"; then - conda create -n bgbench python=3.12 -y +if ! conda env list | grep -q "ogbench"; then + conda create -n ogbench python=3.12 -y fi -conda activate bgbench +conda activate ogbench pip install --upgrade pip pip install -e '.[all]' diff --git a/notebooks/datasets.ipynb b/notebooks/datasets.ipynb index a40af1c5..a870116f 100644 --- a/notebooks/datasets.ipynb +++ b/notebooks/datasets.ipynb @@ -17,7 +17,7 @@ "import pandas as pd\n", "import seaborn as sns\n", "\n", - "sys.path.append(\"/home/johmathe/bgbench\")\n", + "sys.path.append(\"/home/johmathe/ogbench\")\n", "os.environ[\"PYTHONPATH\"] = os.pathsep.join(sys.path)\n", "from src.data import hf_datamodule\n" ] @@ -115,7 +115,7 @@ ], "metadata": { "kernelspec": { - "display_name": "bgbench", + "display_name": "ogbench", "language": "python", "name": "python3" }, diff --git a/ogbench/transforms/liftings/graph2hypergraph/knn_lifting.py b/ogbench/transforms/liftings/graph2hypergraph/knn_lifting.py index 56922af8..6d02343d 100644 --- a/ogbench/transforms/liftings/graph2hypergraph/knn_lifting.py +++ b/ogbench/transforms/liftings/graph2hypergraph/knn_lifting.py @@ -57,6 +57,14 @@ def lift_topology(self, data: torch_geometric.data.Data) -> dict: The lifted topology. """ num_nodes = data.x.shape[0] + # pyg-lib's knn_graph segfaults on empty inputs, so short-circuit. + if num_nodes == 0: + empty_incidence = torch.zeros(0, 0).to_sparse_coo() + return { + 'incidence_hyperedges': empty_incidence, + 'num_hyperedges': 0, + 'x_0': data.x, + } data.pos = data.x num_hyperedges = num_nodes incidence_1 = torch.zeros(num_nodes, num_nodes) diff --git a/pyproject.toml b/pyproject.toml index 72b9b693..4fb99c72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ dependencies=[ "torch-scatter", "torch==2.8.0", "torch-cluster", + "pyg-lib>=0.6.0", "combat>=0.3.3", "topomodelx @ git+https://github.com/pyt-team/TopoModelX.git", "toponetx @ git+https://github.com/pyt-team/TopoNetX.git", @@ -96,8 +97,8 @@ all = ["ogbench[dev, doc]"] ogbench-train = "ogbench.run:main" [project.urls] -homepage="https://geometric-intelligence.github.io/bgbench/index.html" -repository="https://github.com/geometric-intelligence/bgbench" +homepage="https://geometric-intelligence.github.io/ogbench/index.html" +repository="https://github.com/geometric-intelligence/ogbench" [tool.ruff] line-length = 99 diff --git a/tutorials/benchmarking_stats.ipynb b/tutorials/benchmarking_stats.ipynb index 3a3d0b56..af7e64da 100644 --- a/tutorials/benchmarking_stats.ipynb +++ b/tutorials/benchmarking_stats.ipynb @@ -281,7 +281,7 @@ ], "metadata": { "kernelspec": { - "display_name": "bgbench", + "display_name": "ogbench", "language": "python", "name": "python3" }, diff --git a/tutorials/dataset_stats.ipynb b/tutorials/dataset_stats.ipynb index 8224f7c0..99f097e3 100644 --- a/tutorials/dataset_stats.ipynb +++ b/tutorials/dataset_stats.ipynb @@ -83,7 +83,7 @@ "metadata": {}, "outputs": [], "source": [ - "root = \"/home/lcornelis/code/bgbench/run_data/omics/\"\n", + "root = \"/home/lcornelis/code/ogbench/run_data/omics/\"\n", "name = osp.join(\n", " root,\n", " f\"{dataset.data_name}\",\n", @@ -119,7 +119,7 @@ " Get statistics of the graph.\n", " \"\"\"\n", " # Load the adjacency matrix\n", - " root = \"/home/lcornelis/code/bgbench/run_data/omics/\"\n", + " root = \"/home/lcornelis/code/ogbench/run_data/omics/\"\n", " name = osp.join(\n", " root,\n", " f\"{dataset.data_name}\",\n", diff --git a/tutorials/dataset_stats_analysis.py b/tutorials/dataset_stats_analysis.py index 1eac5077..8026d426 100644 --- a/tutorials/dataset_stats_analysis.py +++ b/tutorials/dataset_stats_analysis.py @@ -43,7 +43,7 @@ def load_dataset( # Pass 'full' as string, not None, because HFOmicsDataset checks for 'full' string ratio_value = 'full' if node_sample_ratio == 'full' else float(node_sample_ratio) dataset = HFOmicsDataset( - root='/scratch/lcornelis/bgbench-1/run_data/omics', + root='/scratch/lcornelis/ogbench-1/run_data/omics', data_name=dataset_name, method=method, adjacency_threshold=adj_thresh, @@ -79,7 +79,7 @@ def get_graph_stats(dataset: Any) -> dict[str, float]: graph.add_nodes_from(range(num_nodes)) graph.add_edges_from(edge_list) else: - root = '/home/lcornelis/code/bgbench-1/run_data/omics/' + root = '/home/lcornelis/code/ogbench-1/run_data/omics/' name = osp.join( root, f'{dataset.data_name}', @@ -460,7 +460,7 @@ def main(): if __name__ == '__main__': try: print('Testing dataset loading...') - dataset = load_dataset('addneuromed', 0.5, '0.3', 'variance', adjacency_method='string', string_data_dir='/home/johmathe/bgbench/data') + dataset = load_dataset('addneuromed', 0.5, '0.3', 'variance', adjacency_method='string', string_data_dir='/home/johmathe/ogbench/data') print(f'Dataset loaded successfully: {dataset}') print(f'Dataset length: {len(dataset)}') if len(dataset) > 0: diff --git a/webapp/public/data/stats.json b/webapp/public/data/stats.json index cf6ba66c..2943b932 100644 --- a/webapp/public/data/stats.json +++ b/webapp/public/data/stats.json @@ -12479,4 +12479,4 @@ "dataset": "brca", "adjacency_method": "string" } -} \ No newline at end of file +}