From 9ebc25bc082868424515d5562031b5c579b72907 Mon Sep 17 00:00:00 2001 From: Kyle Montemayor Date: Mon, 18 May 2026 23:10:24 +0000 Subject: [PATCH 1/3] Fix latent bugs surfaced by ty migration Resolves 4 `# ty: ignore` comments from PR #585 by fixing the underlying logic (not suppressing). Includes None-division risk in loss.py and masked None flow in optimizer code paths. See: docs/plans/20260518-ty-ignore-resolution-meta.md --- .../common/torchrec/utils.py | 22 +++-- .../node_classification_modeling_task_spec.py | 5 +- .../utils/profiler_wrapper.py | 2 +- gigl/src/common/models/layers/loss.py | 22 +++-- .../unit/experimental/test_torchrec_utils.py | 83 +++++++++++++++++++ .../common/models/layers/legacy_loss_test.py | 78 +++++++++++++++++ 6 files changed, 195 insertions(+), 17 deletions(-) create mode 100644 tests/unit/experimental/test_torchrec_utils.py create mode 100644 tests/unit/src/common/models/layers/legacy_loss_test.py diff --git a/gigl/experimental/knowledge_graph_embedding/common/torchrec/utils.py b/gigl/experimental/knowledge_graph_embedding/common/torchrec/utils.py index f8726dcb1..9d3b54ffd 100644 --- a/gigl/experimental/knowledge_graph_embedding/common/torchrec/utils.py +++ b/gigl/experimental/knowledge_graph_embedding/common/torchrec/utils.py @@ -120,24 +120,30 @@ def get_sharding_plan( def apply_sparse_optimizer( parameters: Iterable[nn.Parameter], optimizer_cls: Optional[Type[Optimizer]] = None, - optimizer_kwargs: Dict[str, Any] = dict(), + optimizer_kwargs: Optional[Dict[str, Any]] = None, ) -> None: - """ - Apply a sparse optimizer to the sparse/EBC parts of a model. + """Apply a sparse optimizer to the sparse/EBC parts of a model. + This optimizer is fused, so it will be applied directly in the backward pass. This should only be used for sparse parameters. Args: parameters (Iterable[nn.Parameter]): The sparse parameters to apply the optimizer to. - optimizer_cls (Type[Optimizer], optional): The optimizer class to use. Defaults to RowWiseAdagrad. - optimizer_kwargs (Dict[str, Any], optional): Additional keyword arguments for the optimizer. + optimizer_cls (Type[Optimizer], optional): The optimizer class to use. + Defaults to ``RowWiseAdagrad`` when ``None`` is passed. + optimizer_kwargs (Dict[str, Any], optional): Additional keyword arguments + for the optimizer. Defaults to ``{"lr": 0.01}`` when both + ``optimizer_cls`` and ``optimizer_kwargs`` are unset. """ - if not optimizer_cls and optimizer_kwargs: + if optimizer_cls is None: optimizer_cls = RowWiseAdagrad - optimizer_kwargs = {"lr": 0.01} - apply_optimizer_in_backward(optimizer_cls, parameters, optimizer_kwargs) # ty: ignore[invalid-argument-type] TODO(ty-torch-api-surface): fix ty false positives around the torch API surface. + if not optimizer_kwargs: + optimizer_kwargs = {"lr": 0.01} + if optimizer_kwargs is None: + optimizer_kwargs = {} + apply_optimizer_in_backward(optimizer_cls, parameters, optimizer_kwargs) def apply_dense_optimizer( diff --git a/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py b/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py index bfaba1fb0..feed188b4 100644 --- a/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py +++ b/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py @@ -248,11 +248,14 @@ def train( graph_backend=self._graph_backend, device=device, ) + assert data_loaders.train_main is not None, ( + "train_main dataloader required for training" + ) best_val_acc = 0.0 for epoch in range(self.__num_epochs): logger.info(f"Batch training... for epoch {epoch}/{self.__num_epochs}") train_loss = self._train( - data_loader=data_loaders.train_main, # type: ignore[arg-type] # ty: ignore[invalid-argument-type] TODO(ty-torch-api-surface): fix ty false positives around the torch API surface. + data_loader=data_loaders.train_main, device=device, ) train_loss_str = ( diff --git a/gigl/src/common/modeling_task_specs/utils/profiler_wrapper.py b/gigl/src/common/modeling_task_specs/utils/profiler_wrapper.py index 9e634fce4..2a7c423a5 100644 --- a/gigl/src/common/modeling_task_specs/utils/profiler_wrapper.py +++ b/gigl/src/common/modeling_task_specs/utils/profiler_wrapper.py @@ -20,7 +20,7 @@ class TorchProfiler: def __init__(self, **kwargs) -> None: self.trace_handler = tensorboard_trace_handler( - dir_name=TMP_PROFILER_LOG_DIR_NAME, # type: ignore[arg-type] # ty: ignore[invalid-argument-type] TODO(ty-torch-api-surface): fix ty false positives around the torch API surface. + dir_name=TMP_PROFILER_LOG_DIR_NAME.uri, use_gzip=True, ) self.wait = int(kwargs.get("wait", 5)) diff --git a/gigl/src/common/models/layers/loss.py b/gigl/src/common/models/layers/loss.py index 08ef93c7a..9ca934126 100644 --- a/gigl/src/common/models/layers/loss.py +++ b/gigl/src/common/models/layers/loss.py @@ -100,18 +100,27 @@ def forward( class SoftmaxLoss(nn.Module): - """ - A loss layer built on top of the PyTorch implementation of the softmax cross entropy loss. + """A loss layer built on top of the PyTorch implementation of the softmax cross entropy loss. + + The loss function by default calculates the loss by + cross_entropy(all_scores / softmax_temperature, ys, reduction='sum'). - The loss function by default calculate the loss by - cross_entropy(all_scores, ys, reduction='sum') + Dividing the scores by ``softmax_temperature`` controls the sharpness of the + softmax distribution. A temperature of ``1.0`` is a no-op and corresponds to + plain cross-entropy. See: https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html for more information. + + Args: + softmax_temperature (float): Scaling factor applied via ``scores / + softmax_temperature`` before computing cross-entropy. Defaults to + ``1.0`` (no scaling). Must be non-zero; the caller is responsible for + supplying a finite, non-zero value. """ def __init__( self, - softmax_temperature: Optional[float] = None, + softmax_temperature: float = 1.0, ): super(SoftmaxLoss, self).__init__() self.softmax_temperature = softmax_temperature @@ -142,8 +151,7 @@ def _calculate_softmax_loss( ) # shape=[num_pos_nodes] loss = F.cross_entropy( - input=all_scores - / self.softmax_temperature, # https://github.com/Snapchat/GiGL/issues/408 # ty: ignore[unsupported-operator] TODO(ty-torch-union-inference): fix ty Tensor/Module union inference regressions. + input=all_scores / self.softmax_temperature, target=ys, reduction="sum", ) diff --git a/tests/unit/experimental/test_torchrec_utils.py b/tests/unit/experimental/test_torchrec_utils.py new file mode 100644 index 000000000..6307a7be1 --- /dev/null +++ b/tests/unit/experimental/test_torchrec_utils.py @@ -0,0 +1,83 @@ +"""Regression tests for ``apply_sparse_optimizer`` in ``gigl.experimental.knowledge_graph_embedding.common.torchrec.utils``. + +These tests guard the optimizer-flow bug surfaced by the ty migration: the +previous guard ``if not optimizer_cls and optimizer_kwargs`` only fired when +both arguments were truthy, allowing ``None`` to leak through into +``apply_optimizer_in_backward`` whenever ``optimizer_cls`` was unset but +``optimizer_kwargs`` was the default empty dict. +""" + +from typing import Any +from unittest.mock import patch + +import torch +import torch.nn as nn +from torch.optim import SGD + +from gigl.experimental.knowledge_graph_embedding.common.torchrec import utils +from tests.test_assets.test_case import TestCase + + +def _make_dummy_parameters() -> list[nn.Parameter]: + """Build a one-parameter list so the optimizer call has something to bind to.""" + return [nn.Parameter(torch.zeros(1))] + + +class ApplySparseOptimizerTest(TestCase): + def test_none_optimizer_cls_defaults_to_rowwise_adagrad(self) -> None: + """Calling without ``optimizer_cls`` must default to RowWiseAdagrad. + + Regression: the old guard ``if not optimizer_cls and optimizer_kwargs`` + skipped the assignment when ``optimizer_kwargs`` defaulted to ``{}``, + leaving ``optimizer_cls=None`` to crash ``apply_optimizer_in_backward``. + """ + parameters = _make_dummy_parameters() + with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: + utils.apply_sparse_optimizer(parameters=parameters) + mock_apply.assert_called_once() + forwarded_cls, forwarded_params, forwarded_kwargs = mock_apply.call_args[0] + self.assertIs(forwarded_cls, utils.RowWiseAdagrad) + self.assertEqual(list(forwarded_params), parameters) + self.assertEqual(forwarded_kwargs, {"lr": 0.01}) + + def test_none_optimizer_cls_with_kwargs_still_defaults_to_rowwise_adagrad( + self, + ) -> None: + """``optimizer_cls=None`` with user kwargs uses RowWiseAdagrad + user kwargs. + + The user-supplied kwargs (when non-empty) are preserved verbatim; only + the missing class is filled in. + """ + parameters = _make_dummy_parameters() + user_kwargs: dict[str, Any] = {"lr": 0.5} + with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: + utils.apply_sparse_optimizer( + parameters=parameters, optimizer_kwargs=user_kwargs + ) + forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] + self.assertIs(forwarded_cls, utils.RowWiseAdagrad) + self.assertEqual(forwarded_kwargs, {"lr": 0.5}) + + def test_explicit_optimizer_cls_is_forwarded(self) -> None: + """An explicit ``optimizer_cls`` must be forwarded unchanged.""" + parameters = _make_dummy_parameters() + with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: + utils.apply_sparse_optimizer(parameters=parameters, optimizer_cls=SGD) + forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] + self.assertIs(forwarded_cls, SGD) + # No kwargs were supplied; the call site sees an empty dict, not None. + self.assertEqual(forwarded_kwargs, {}) + + def test_explicit_optimizer_cls_and_kwargs_forwarded(self) -> None: + """Explicit ``optimizer_cls`` and kwargs must flow through verbatim.""" + parameters = _make_dummy_parameters() + user_kwargs: dict[str, Any] = {"lr": 0.1, "momentum": 0.9} + with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: + utils.apply_sparse_optimizer( + parameters=parameters, + optimizer_cls=SGD, + optimizer_kwargs=user_kwargs, + ) + forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] + self.assertIs(forwarded_cls, SGD) + self.assertEqual(forwarded_kwargs, {"lr": 0.1, "momentum": 0.9}) diff --git a/tests/unit/src/common/models/layers/legacy_loss_test.py b/tests/unit/src/common/models/layers/legacy_loss_test.py new file mode 100644 index 000000000..8ec3b7c61 --- /dev/null +++ b/tests/unit/src/common/models/layers/legacy_loss_test.py @@ -0,0 +1,78 @@ +"""Regression tests for the legacy loss layers under ``gigl.src.common.models.layers.loss``. + +These tests guard against latent ``None`` propagation bugs that the ty migration +surfaced. In particular, ``SoftmaxLoss.softmax_temperature`` used to be typed as +``Optional[float]`` with a ``None`` default, which made the in-loop expression +``all_scores / self.softmax_temperature`` raise ``TypeError`` at runtime whenever +the constructor was called without an explicit temperature. +""" + +import torch + +from gigl.src.common.models.layers.loss import SoftmaxLoss +from gigl.src.common.types.graph_data import CondensedEdgeType +from gigl.src.common.types.task_inputs import BatchScores +from tests.test_assets.test_case import TestCase + + +def _make_batch_scores() -> dict[CondensedEdgeType, BatchScores]: + """Build a minimal non-empty BatchScores dict suitable for SoftmaxLoss.forward. + + ``BatchScores`` is annotated as ``FloatTensor`` but PyTorch factories return + plain ``Tensor`` — PR 3 of the ty-cleanup plan will relax these annotations + to ``Tensor``. Until then, ty rejects the call, so we ignore the + specialization mismatch at the construction site. + """ + pos_scores = torch.tensor([[0.8, 0.6]]) + hard_neg_scores = torch.tensor([[0.1, 0.2]]) + random_neg_scores = torch.tensor([[0.05, 0.15]]) + return { + CondensedEdgeType(0): BatchScores( + pos_scores=pos_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. + hard_neg_scores=hard_neg_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. + random_neg_scores=random_neg_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. + ) + } + + +class SoftmaxLossTest(TestCase): + def test_default_temperature_does_not_raise(self) -> None: + """SoftmaxLoss must be constructible without explicit temperature. + + Regression: before the ty-driven fix, ``softmax_temperature`` defaulted to + ``None``, so ``all_scores / self.softmax_temperature`` raised + ``TypeError: unsupported operand type(s) for /: 'Tensor' and 'NoneType'``. + """ + loss_fn = SoftmaxLoss() + # The default is 1.0 (no scaling), and dividing by 1.0 must not raise. + self.assertEqual(loss_fn.softmax_temperature, 1.0) + loss, sample_size = loss_fn(loss_input=[_make_batch_scores()]) + self.assertTrue(torch.isfinite(loss).item()) + self.assertGreater(sample_size, 0) + + def test_default_temperature_matches_unit_scale(self) -> None: + """Default temperature ``1.0`` produces the same loss as explicit ``1.0``.""" + default_loss, _ = SoftmaxLoss()(loss_input=[_make_batch_scores()]) + explicit_loss, _ = SoftmaxLoss(softmax_temperature=1.0)( + loss_input=[_make_batch_scores()] + ) + self.assert_tensor_equality(default_loss, explicit_loss) + + def test_custom_temperature_scales_loss(self) -> None: + """Smaller temperatures sharpen the softmax, yielding a different loss.""" + unit_loss, _ = SoftmaxLoss(softmax_temperature=1.0)( + loss_input=[_make_batch_scores()] + ) + sharper_loss, _ = SoftmaxLoss(softmax_temperature=0.5)( + loss_input=[_make_batch_scores()] + ) + # The two scalar losses must differ; both must be finite. + self.assertTrue(torch.isfinite(unit_loss).item()) + self.assertTrue(torch.isfinite(sharper_loss).item()) + self.assertNotAlmostEqual(unit_loss.item(), sharper_loss.item()) + + def test_empty_input_returns_zero_loss(self) -> None: + """An empty ``loss_input`` list yields a zero loss with sample_size 1.""" + loss, sample_size = SoftmaxLoss()(loss_input=[]) + self.assert_tensor_equality(loss, torch.tensor(0.0)) + self.assertEqual(sample_size, 1) From efbbd1810c8501cb66e7689c34af1bcc5cf3d5a9 Mon Sep 17 00:00:00 2001 From: Kyle Montemayor Date: Tue, 19 May 2026 17:46:02 +0000 Subject: [PATCH 2/3] Address codex review: rename regression test to match discovery pattern The repo's default unit-test pattern is *_test.py (Makefile:29, tests/unit/main.py:8); the original name test_torchrec_utils.py uses the prefix form, so the new regression coverage for apply_sparse_optimizer would be silently skipped by make unit_test_py / CI. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../{test_torchrec_utils.py => torchrec_utils_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/unit/experimental/{test_torchrec_utils.py => torchrec_utils_test.py} (100%) diff --git a/tests/unit/experimental/test_torchrec_utils.py b/tests/unit/experimental/torchrec_utils_test.py similarity index 100% rename from tests/unit/experimental/test_torchrec_utils.py rename to tests/unit/experimental/torchrec_utils_test.py From 93fb43e46a407ab1860f37b1140f8220e40447c8 Mon Sep 17 00:00:00 2001 From: Kyle Montemayor Date: Tue, 19 May 2026 20:55:06 +0000 Subject: [PATCH 3/3] Address PR feedback: simplify regression tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Consolidate SoftmaxLoss regression coverage into the existing loss_test.py (one assertion on the default temperature). - Delete legacy_loss_test.py (was a misnamed parallel file created to avoid clobbering the existing loss_test.py). - Trim torchrec_utils_test.py from 4 tests to 1 — the actual regression path (defaults flowing through to RowWiseAdagrad). - Drop test-side narrative docstrings that re-described what the fix did rather than the contract under test. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../unit/experimental/torchrec_utils_test.py | 78 +++---------------- .../common/models/layers/legacy_loss_test.py | 78 ------------------- .../src/common/models/layers/loss_test.py | 9 +++ 3 files changed, 18 insertions(+), 147 deletions(-) delete mode 100644 tests/unit/src/common/models/layers/legacy_loss_test.py diff --git a/tests/unit/experimental/torchrec_utils_test.py b/tests/unit/experimental/torchrec_utils_test.py index 6307a7be1..3a8f07061 100644 --- a/tests/unit/experimental/torchrec_utils_test.py +++ b/tests/unit/experimental/torchrec_utils_test.py @@ -1,83 +1,23 @@ -"""Regression tests for ``apply_sparse_optimizer`` in ``gigl.experimental.knowledge_graph_embedding.common.torchrec.utils``. +"""Regression tests for ``apply_sparse_optimizer``.""" -These tests guard the optimizer-flow bug surfaced by the ty migration: the -previous guard ``if not optimizer_cls and optimizer_kwargs`` only fired when -both arguments were truthy, allowing ``None`` to leak through into -``apply_optimizer_in_backward`` whenever ``optimizer_cls`` was unset but -``optimizer_kwargs`` was the default empty dict. -""" - -from typing import Any from unittest.mock import patch import torch import torch.nn as nn -from torch.optim import SGD from gigl.experimental.knowledge_graph_embedding.common.torchrec import utils from tests.test_assets.test_case import TestCase -def _make_dummy_parameters() -> list[nn.Parameter]: - """Build a one-parameter list so the optimizer call has something to bind to.""" - return [nn.Parameter(torch.zeros(1))] - - class ApplySparseOptimizerTest(TestCase): - def test_none_optimizer_cls_defaults_to_rowwise_adagrad(self) -> None: - """Calling without ``optimizer_cls`` must default to RowWiseAdagrad. - - Regression: the old guard ``if not optimizer_cls and optimizer_kwargs`` - skipped the assignment when ``optimizer_kwargs`` defaulted to ``{}``, - leaving ``optimizer_cls=None`` to crash ``apply_optimizer_in_backward``. + def test_default_optimizer_uses_rowwise_adagrad(self) -> None: + """When called with neither ``optimizer_cls`` nor ``optimizer_kwargs``, + ``apply_sparse_optimizer`` falls back to ``RowWiseAdagrad`` with + ``lr=0.01`` and forwards them to ``apply_optimizer_in_backward``. """ - parameters = _make_dummy_parameters() + parameters = [nn.Parameter(torch.zeros(1))] with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: utils.apply_sparse_optimizer(parameters=parameters) - mock_apply.assert_called_once() - forwarded_cls, forwarded_params, forwarded_kwargs = mock_apply.call_args[0] - self.assertIs(forwarded_cls, utils.RowWiseAdagrad) - self.assertEqual(list(forwarded_params), parameters) - self.assertEqual(forwarded_kwargs, {"lr": 0.01}) - - def test_none_optimizer_cls_with_kwargs_still_defaults_to_rowwise_adagrad( - self, - ) -> None: - """``optimizer_cls=None`` with user kwargs uses RowWiseAdagrad + user kwargs. - - The user-supplied kwargs (when non-empty) are preserved verbatim; only - the missing class is filled in. - """ - parameters = _make_dummy_parameters() - user_kwargs: dict[str, Any] = {"lr": 0.5} - with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: - utils.apply_sparse_optimizer( - parameters=parameters, optimizer_kwargs=user_kwargs - ) - forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] - self.assertIs(forwarded_cls, utils.RowWiseAdagrad) - self.assertEqual(forwarded_kwargs, {"lr": 0.5}) - - def test_explicit_optimizer_cls_is_forwarded(self) -> None: - """An explicit ``optimizer_cls`` must be forwarded unchanged.""" - parameters = _make_dummy_parameters() - with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: - utils.apply_sparse_optimizer(parameters=parameters, optimizer_cls=SGD) - forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] - self.assertIs(forwarded_cls, SGD) - # No kwargs were supplied; the call site sees an empty dict, not None. - self.assertEqual(forwarded_kwargs, {}) - - def test_explicit_optimizer_cls_and_kwargs_forwarded(self) -> None: - """Explicit ``optimizer_cls`` and kwargs must flow through verbatim.""" - parameters = _make_dummy_parameters() - user_kwargs: dict[str, Any] = {"lr": 0.1, "momentum": 0.9} - with patch.object(utils, "apply_optimizer_in_backward") as mock_apply: - utils.apply_sparse_optimizer( - parameters=parameters, - optimizer_cls=SGD, - optimizer_kwargs=user_kwargs, - ) - forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] - self.assertIs(forwarded_cls, SGD) - self.assertEqual(forwarded_kwargs, {"lr": 0.1, "momentum": 0.9}) + forwarded_cls, _, forwarded_kwargs = mock_apply.call_args[0] + self.assertIs(forwarded_cls, utils.RowWiseAdagrad) + self.assertEqual(forwarded_kwargs, {"lr": 0.01}) diff --git a/tests/unit/src/common/models/layers/legacy_loss_test.py b/tests/unit/src/common/models/layers/legacy_loss_test.py deleted file mode 100644 index 8ec3b7c61..000000000 --- a/tests/unit/src/common/models/layers/legacy_loss_test.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Regression tests for the legacy loss layers under ``gigl.src.common.models.layers.loss``. - -These tests guard against latent ``None`` propagation bugs that the ty migration -surfaced. In particular, ``SoftmaxLoss.softmax_temperature`` used to be typed as -``Optional[float]`` with a ``None`` default, which made the in-loop expression -``all_scores / self.softmax_temperature`` raise ``TypeError`` at runtime whenever -the constructor was called without an explicit temperature. -""" - -import torch - -from gigl.src.common.models.layers.loss import SoftmaxLoss -from gigl.src.common.types.graph_data import CondensedEdgeType -from gigl.src.common.types.task_inputs import BatchScores -from tests.test_assets.test_case import TestCase - - -def _make_batch_scores() -> dict[CondensedEdgeType, BatchScores]: - """Build a minimal non-empty BatchScores dict suitable for SoftmaxLoss.forward. - - ``BatchScores`` is annotated as ``FloatTensor`` but PyTorch factories return - plain ``Tensor`` — PR 3 of the ty-cleanup plan will relax these annotations - to ``Tensor``. Until then, ty rejects the call, so we ignore the - specialization mismatch at the construction site. - """ - pos_scores = torch.tensor([[0.8, 0.6]]) - hard_neg_scores = torch.tensor([[0.1, 0.2]]) - random_neg_scores = torch.tensor([[0.05, 0.15]]) - return { - CondensedEdgeType(0): BatchScores( - pos_scores=pos_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. - hard_neg_scores=hard_neg_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. - random_neg_scores=random_neg_scores, # ty: ignore[invalid-argument-type] TODO(ty-torch-tensor-specialization): cleared by PR 3 relaxing BatchScores to Tensor. - ) - } - - -class SoftmaxLossTest(TestCase): - def test_default_temperature_does_not_raise(self) -> None: - """SoftmaxLoss must be constructible without explicit temperature. - - Regression: before the ty-driven fix, ``softmax_temperature`` defaulted to - ``None``, so ``all_scores / self.softmax_temperature`` raised - ``TypeError: unsupported operand type(s) for /: 'Tensor' and 'NoneType'``. - """ - loss_fn = SoftmaxLoss() - # The default is 1.0 (no scaling), and dividing by 1.0 must not raise. - self.assertEqual(loss_fn.softmax_temperature, 1.0) - loss, sample_size = loss_fn(loss_input=[_make_batch_scores()]) - self.assertTrue(torch.isfinite(loss).item()) - self.assertGreater(sample_size, 0) - - def test_default_temperature_matches_unit_scale(self) -> None: - """Default temperature ``1.0`` produces the same loss as explicit ``1.0``.""" - default_loss, _ = SoftmaxLoss()(loss_input=[_make_batch_scores()]) - explicit_loss, _ = SoftmaxLoss(softmax_temperature=1.0)( - loss_input=[_make_batch_scores()] - ) - self.assert_tensor_equality(default_loss, explicit_loss) - - def test_custom_temperature_scales_loss(self) -> None: - """Smaller temperatures sharpen the softmax, yielding a different loss.""" - unit_loss, _ = SoftmaxLoss(softmax_temperature=1.0)( - loss_input=[_make_batch_scores()] - ) - sharper_loss, _ = SoftmaxLoss(softmax_temperature=0.5)( - loss_input=[_make_batch_scores()] - ) - # The two scalar losses must differ; both must be finite. - self.assertTrue(torch.isfinite(unit_loss).item()) - self.assertTrue(torch.isfinite(sharper_loss).item()) - self.assertNotAlmostEqual(unit_loss.item(), sharper_loss.item()) - - def test_empty_input_returns_zero_loss(self) -> None: - """An empty ``loss_input`` list yields a zero loss with sample_size 1.""" - loss, sample_size = SoftmaxLoss()(loss_input=[]) - self.assert_tensor_equality(loss, torch.tensor(0.0)) - self.assertEqual(sample_size, 1) diff --git a/tests/unit/src/common/models/layers/loss_test.py b/tests/unit/src/common/models/layers/loss_test.py index 5c0f3f073..4be356ca3 100644 --- a/tests/unit/src/common/models/layers/loss_test.py +++ b/tests/unit/src/common/models/layers/loss_test.py @@ -2,6 +2,7 @@ import torch.nn.functional as F from gigl.nn.loss import RetrievalLoss +from gigl.src.common.models.layers.loss import SoftmaxLoss from tests.test_assets.test_case import TestCase @@ -176,3 +177,11 @@ def test_empty_loss(self): scores=empty_scores, query_ids=query_ids, candidate_ids=candidate_ids ) self.assert_tensor_equality(loss, expected_loss) + + +class SoftmaxLossDefaultTemperatureTest(TestCase): + def test_default_temperature_is_one(self) -> None: + """SoftmaxLoss defaults softmax_temperature to 1.0 so the forward-pass + division scores / softmax_temperature is always well-defined. + """ + self.assertEqual(SoftmaxLoss().softmax_temperature, 1.0)